mpri-webdam/profiles/models.py

265 lines
8.1 KiB
Python

"""
A django module that defines a profile, and all the information that can be
stored in a profile.
It stores interests, technical information such as the browser fingerprint,
the preferred search engin, and if the user is likely to directly use urls
or to type in the search engine.
"""
import os
import random
from django.db import models
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
NICKNAMES = None
LASTNAMES = open(BASE_DIR + "/data/lastnames.txt").read().splitlines()
FIRSTNAMES = open(BASE_DIR + "/data/firstnames.txt").read().splitlines()
EMAIL_DOMAINS = open(BASE_DIR + "/data/email_domains.txt").read().splitlines()
def require_nicknames(fct):
def read_file(path):
global NICKNAMES
print("Trying {}".format(path))
with open(path, 'r') as handle:
NICKNAMES = handle.read().splitlines()
nicknames_files = [
os.path.join(BASE_DIR, 'data/nicknames_dict'),
"/usr/share/dict/american-english",
]
if NICKNAMES is None:
for nick_file in nicknames_files:
try:
read_file(nick_file)
break
except FileNotFoundError:
pass
if NICKNAMES is None:
raise FileNotFoundError
return fct
class InvalidData(Exception):
''' Thrown when the DB contains invalid data, and cannot perform
something '''
def __init__(self, what):
self.what = what
super(InvalidData, self).__init__()
def __str__(self):
return self.what
class Keyword(models.Model):
''' A keyword/search term '''
text = models.CharField(max_length=256)
def __str__(self):
return self.text
def generate_url(self, user):
""" Generates the url for a keyword, based on the user search engine.
"""
return user.search_engine.search_url(self)
class Webpage(models.Model):
''' A webpage url '''
url = models.URLField()
def __str__(self):
return self.url
class Website(models.Model):
''' A website usually visited '''
name = models.CharField(max_length=256)
url = models.URLField()
keywords = models.ManyToManyField(Keyword)
notable_pages = models.ManyToManyField(Webpage)
def __str__(self):
return self.name
def generate_url(self, user):
""" Generates the url in case the interest chosen is a website.
"""
rand = random.random()
if user.uses_urls:
url = self.url
elif rand <= 0.1:
url = random.choice(self.notable_pages.all()).url
elif rand <= 0.8:
search_term_text = self.name + " " + \
random.choice(self.keywords.all())
url = user.search_engine.search_url(search_term_text)
else:
url = user.search_engine.search_url(self.name)
return url
class Place(models.Model):
''' A real-life place '''
name = models.CharField(max_length=256)
address = models.CharField(max_length=512)
lat = models.FloatField('Latitude', blank=True)
lon = models.FloatField('Longitude', blank=True)
def __str__(self):
return self.name
def generate_url(self, user):
""" Generates the url for a place.
"""
rand = random.random()
if rand < 1/2:
url = user.search_engine.search_url(self.name)
else:
url = user.search_engine.search_url(self.address)
return url
class Event(models.Model):
''' A real-life event (protests, meeting, ...) '''
name = models.CharField(max_length=256)
date = models.DateTimeField()
place = models.ForeignKey(Place, on_delete=models.CASCADE)
def __str__(self):
return self.name
def generate_url(self, user):
""" generate the url for an event object.
"""
possibilities = random.sample(
[self.name, self.date, self.place],
3
)
return user.search_engine.search_url(" ".join(possibilities))
class BrowserFingerprint(models.Model):
''' A browser fingerprint, containing things like a user agent '''
description = models.CharField(max_length=256)
useragent = models.CharField(max_length=256)
appname = models.CharField(max_length=256)
appversion = models.CharField(max_length=256)
platform = models.CharField(max_length=256)
vendor = models.CharField(max_length=256)
vendorsub = models.CharField(max_length=256)
buildID = models.CharField(max_length=256)
oscpu = models.CharField(max_length=256)
accept_encoding = models.CharField(max_length=256)
accept_default = models.CharField(max_length=256)
accept_lang = models.CharField(max_length=256)
pixeldepth = models.IntegerField()
colordepth = models.IntegerField()
screens = models.CharField(max_length=256)
def __str__(self):
return self.description
def serialize_headers(self):
return {
"Description": str(self.description),
"User-Agent": str(self.useragent),
"Accept-Encoding": str(self.accept_encoding),
"Accept": str(self.accept_default),
"Accept-Language": str(self.accept_lang),
}
class SearchEngine(models.Model):
''' A search engine, and all the data needed to use it '''
name = models.CharField(max_length=256)
url = models.URLField()
query_pattern = models.CharField(max_length=256) # This field is the
# query pattern. It should contain a `{}`, which, when substituted with a
# search term (using `.format()`), must yield a URL tail that can be
# concatenated with `url` to perform a search (eg. `?q={}` for ddg).
def __str__(self):
return self.name
def search_url(self, search_term):
''' Obtain a url to search `search_term` with this search engine '''
pattern = str(self.query_pattern)
search_term = str(search_term).replace(' ', '+')
if '{}' not in pattern:
raise InvalidData("Search engine {}: bad pattern".format(self))
return self.url + (str(self.query_pattern).format(search_term))
class Interest(models.Model):
''' A class of interests '''
name = models.CharField(max_length=256)
keywords = models.ManyToManyField(Keyword)
places = models.ManyToManyField(Place)
websites = models.ManyToManyField(Website)
events = models.ManyToManyField(Event)
def __str__(self):
return self.name
class Profile(models.Model):
''' Represents a user profile, containing a few data to make their requests
consistent '''
nick = models.CharField(max_length=64,
help_text="The user's online identity")
first_name = models.CharField(max_length=64)
last_name = models.CharField(max_length=64)
email = models.EmailField()
uses_urls = models.BooleanField(
help_text=('Does the user usually go to a given website using its url '
'or searching it in a search engine?'))
interests = models.ManyToManyField(Interest)
search_engine = models.ForeignKey(SearchEngine,
on_delete=models.CASCADE)
browser_fingerprint = models.ForeignKey(BrowserFingerprint,
on_delete=models.CASCADE)
def generate_email(nick, first_name, last_name):
domain = random.choice(EMAIL_DOMAINS)
if random.random() < 0.3:
email = first_name + "." + last_name + "@" + domain
else:
email = nick + "@" + domain
return email
@require_nicknames
def create_profile(nick=None):
nick = "".join(random.sample(NICKNAMES, random.randrange(2, 5)))
first_name = random.choice(FIRSTNAMES)
last_name = random.choice(LASTNAMES)
email = generate_email(nick, first_name, last_name)
profile = Profile(
nick=nick,
first_name=first_name,
last_name=last_name,
email=email,
uses_urls=(random.random() < 0.5),
)
profile.search_engine = random.choice(SearchEngine.objects.all())
profile.browser_fingerprint = random.choice(BrowserFingerprint.objects.all())
profile.full_clean()
profile.save()
profile.interests.add(random.choice(Interest.objects.all()))
profile.save()
return profile