mpri-webdam/profiles/models.py

263 lines
7.9 KiB
Python
Raw Normal View History

2018-02-21 11:35:53 +01:00
"""
A django module that defines a profile, and all the information that can be
stored in a profile.
It stores interests, technical information such as the browser fingerprint,
the preferred search engin, and if the user is likely to directly use urls
or to type in the search engine.
"""
2018-02-25 16:10:38 +01:00
import os
2018-02-21 11:35:53 +01:00
import random
2018-01-23 18:12:47 +01:00
from django.db import models
2018-02-25 16:10:38 +01:00
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
NICKNAMES = None
2018-02-25 16:10:38 +01:00
LASTNAMES = open(BASE_DIR + "/data/lastnames.txt").read().splitlines()
FIRSTNAMES = open(BASE_DIR + "/data/firstnames.txt").read().splitlines()
EMAIL_DOMAINS = open(BASE_DIR + "/data/email_domains.txt").read().splitlines()
2018-02-25 13:17:44 +01:00
2018-01-24 13:36:55 +01:00
def require_nicknames(fct):
def read_file(path):
global NICKNAMES
print("Trying {}".format(path))
with open(path, 'r') as handle:
NICKNAMES = handle.read().splitlines()
nicknames_files = [
os.path.join(BASE_DIR, 'data/nicknames_dict'),
"/usr/share/dict/american-english",
]
if NICKNAMES is None:
for nick_file in nicknames_files:
try:
read_file(nick_file)
break
except FileNotFoundError:
pass
if NICKNAMES is None:
raise FileNotFoundError
return fct
2018-01-24 13:50:16 +01:00
class InvalidData(Exception):
''' Thrown when the DB contains invalid data, and cannot perform
something '''
def __init__(self, what):
self.what = what
super(InvalidData, self).__init__()
def __str__(self):
return self.what
2018-01-24 14:09:33 +01:00
class Keyword(models.Model):
''' A keyword/search term '''
text = models.CharField(max_length=256)
def __str__(self):
return self.text
def generate_url(self, user):
""" Generates the url for a keyword, based on the user search engine.
"""
return user.search_engine.search_url(self)
2018-01-24 14:09:33 +01:00
class Webpage(models.Model):
2018-02-21 11:35:53 +01:00
''' A webpage url '''
2018-01-24 14:09:33 +01:00
url = models.URLField()
def __str__(self):
return self.url
class Website(models.Model):
''' A website usually visited '''
name = models.CharField(max_length=256)
url = models.URLField()
keywords = models.ManyToManyField(Keyword)
notable_pages = models.ManyToManyField(Webpage)
def __str__(self):
return self.name
def generate_url(self, user):
""" Generates the url in case the interest chosen is a website.
"""
rand = random.random()
if user.uses_url:
url = self.url
elif rand <= 0.1:
url = random.choice(self.notable_pages).url
elif rand <= 0.8:
search_term_text = self.name + " " + \
random.choice(self.keywords)
url = user.search_engine.search_url(search_term_text)
else:
url = user.search_engine.search_url(self.name)
return url
2018-01-24 14:09:33 +01:00
2018-01-24 22:39:20 +01:00
class Place(models.Model):
''' A real-life place '''
name = models.CharField(max_length=256)
address = models.CharField(max_length=512)
lat = models.FloatField('Latitude', blank=True)
lon = models.FloatField('Longitude', blank=True)
def __str__(self):
return self.name
def generate_url(self, user):
""" Generates the url for a place.
"""
rand = random.random()
if rand < 1/2:
url = user.search_engine.search_url(self.name)
else:
url = user.search_engine.search_url(self.address)
return url
2018-01-24 22:39:20 +01:00
class Event(models.Model):
''' A real-life event (protests, meeting, ...) '''
name = models.CharField(max_length=256)
date = models.DateTimeField()
place = models.ForeignKey(Place, on_delete=models.CASCADE)
def __str__(self):
return self.name
def generate_url(self, user):
""" generate the url for an event object.
"""
possibilities = random.sample(
[self.name, self.date, self.place],
3
)
return user.search_engine.search_url(" ".join(possibilities))
2018-01-24 13:36:55 +01:00
class BrowserFingerprint(models.Model):
''' A browser fingerprint, containing things like a user agent '''
description = models.CharField(max_length=256)
useragent = models.CharField(max_length=256)
appname = models.CharField(max_length=256)
appversion = models.CharField(max_length=256)
platform = models.CharField(max_length=256)
vendor = models.CharField(max_length=256)
vendorsub = models.CharField(max_length=256)
buildID = models.CharField(max_length=256)
oscpu = models.CharField(max_length=256)
accept_encoding = models.CharField(max_length=256)
accept_default = models.CharField(max_length=256)
accept_lang = models.CharField(max_length=256)
pixeldepth = models.IntegerField()
colordepth = models.IntegerField()
screens = models.CharField(max_length=256)
def __str__(self):
return self.description
2018-01-24 13:50:16 +01:00
2018-02-24 18:40:27 +01:00
def serialize_headers(self):
return {
"Description": str(self.description),
"User-Agent": str(self.useragent),
"Accept-Encoding": str(self.accept_encoding),
"Accept": str(self.accept_default),
"Accept-Language": str(self.accept_lang),
2018-02-24 18:40:27 +01:00
}
2018-01-24 13:50:16 +01:00
class SearchEngine(models.Model):
''' A search engine, and all the data needed to use it '''
name = models.CharField(max_length=256)
url = models.URLField()
query_pattern = models.CharField(max_length=256) # This field is the
# query pattern. It should contain a `{}`, which, when substituted with a
# search term (using `.format()`), must yield a URL that can be resolved to
# perform the search
def __str__(self):
return self.name
def search_url(self, search_term):
''' Obtain a url to search `search_term` with this search engine '''
pattern = str(self.query_pattern)
if '{}' not in pattern:
raise InvalidData("Search engine {}: bad pattern".format(self))
return str(self.query_pattern).format(search_term)
2018-01-24 22:44:58 +01:00
class Interest(models.Model):
''' A class of interests '''
name = models.CharField(max_length=256)
keywords = models.ManyToManyField(Keyword)
places = models.ManyToManyField(Place)
websites = models.ManyToManyField(Website)
events = models.ManyToManyField(Event)
def __str__(self):
return self.name
class Profile(models.Model):
''' Represents a user profile, containing a few data to make their requests
consistent '''
nick = models.CharField(max_length=64,
help_text="The user's online identity")
first_name = models.CharField(max_length=64)
last_name = models.CharField(max_length=64)
email = models.EmailField()
uses_urls = models.BooleanField(
help_text=('Does the user usually go to a given website using its url '
'or searching it in a search engine?'))
interests = models.ManyToManyField(Interest)
search_engine = models.ForeignKey(SearchEngine,
on_delete=models.CASCADE)
browser_fingerprint = models.ForeignKey(BrowserFingerprint,
on_delete=models.CASCADE)
2018-02-25 13:18:12 +01:00
def generate_email(nick, first_name, last_name):
domain = random.choice(EMAIL_DOMAINS)
if random.random() < 0.3:
email = first_name + "." + last_name + "@" + domain
else:
email = nick + "@" + domain
2018-02-25 13:18:12 +01:00
return email
@require_nicknames
2018-02-25 13:18:12 +01:00
def create_profile(nick=None):
nick = "".join(random.sample(NICKNAMES, random.randrange(2, 5)))
2018-02-25 13:18:12 +01:00
first_name = random.choice(FIRSTNAMES)
last_name = random.choice(LASTNAMES)
email = generate_email(nick, first_name, last_name)
profile = Profile(
nick=nick,
first_name=first_name,
last_name=last_name,
2018-02-25 16:10:38 +01:00
email=email,
2018-02-26 11:42:45 +01:00
uses_urls=(random.random() < 0.5),
2018-02-25 13:18:12 +01:00
)
profile.search_engine = random.choice(SearchEngine.objects.all())
profile.browser_fingerprint = random.choice(BrowserFingerprint.objects.all())
profile.full_clean()
2018-02-26 11:42:45 +01:00
profile.save()
return profile