""" A django module that defines a profile, and all the information that can be stored in a profile. It stores interests, technical information such as the browser fingerprint, the preferred search engin, and if the user is likely to directly use urls or to type in the search engine. """ import os import random from django.db import models BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) NICKNAMES = None LASTNAMES = open(BASE_DIR + "/data/lastnames.txt").read().splitlines() FIRSTNAMES = open(BASE_DIR + "/data/firstnames.txt").read().splitlines() EMAIL_DOMAINS = open(BASE_DIR + "/data/email_domains.txt").read().splitlines() def require_nicknames(fct): def read_file(path): global NICKNAMES print("Trying {}".format(path)) with open(path, 'r') as handle: NICKNAMES = handle.read().splitlines() nicknames_files = [ os.path.join(BASE_DIR, 'data/nicknames_dict'), "/usr/share/dict/american-english", ] if NICKNAMES is None: for nick_file in nicknames_files: try: read_file(nick_file) break except FileNotFoundError: pass if NICKNAMES is None: raise FileNotFoundError return fct class InvalidData(Exception): ''' Thrown when the DB contains invalid data, and cannot perform something ''' def __init__(self, what): self.what = what super(InvalidData, self).__init__() def __str__(self): return self.what class Keyword(models.Model): ''' A keyword/search term ''' text = models.CharField(max_length=256) def __str__(self): return self.text def generate_url(self, user): """ Generates the url for a keyword, based on the user search engine. """ return user.search_engine.search_url(self) class Webpage(models.Model): ''' A webpage url ''' url = models.URLField() def __str__(self): return self.url class Website(models.Model): ''' A website usually visited ''' name = models.CharField(max_length=256) url = models.URLField() keywords = models.ManyToManyField(Keyword) notable_pages = models.ManyToManyField(Webpage) def __str__(self): return self.name def generate_url(self, user): """ Generates the url in case the interest chosen is a website. """ rand = random.random() if user.uses_urls: url = self.url elif rand <= 0.1: url = random.choice(self.notable_pages.all()).url elif rand <= 0.8: search_term_text = self.name + " " + \ str(random.choice(self.keywords.all())) url = user.search_engine.search_url(search_term_text) else: url = user.search_engine.search_url(self.name) return url class Place(models.Model): ''' A real-life place ''' name = models.CharField(max_length=256) address = models.CharField(max_length=512) lat = models.FloatField('Latitude', blank=True) lon = models.FloatField('Longitude', blank=True) def __str__(self): return self.name def generate_url(self, user): """ Generates the url for a place. """ rand = random.random() if rand < 1/2: url = user.search_engine.search_url(self.name) else: url = user.search_engine.search_url(self.address) return url class Event(models.Model): ''' A real-life event (protests, meeting, ...) ''' name = models.CharField(max_length=256) date = models.DateTimeField() place = models.ForeignKey(Place, on_delete=models.CASCADE) def __str__(self): return self.name def generate_url(self, user): """ generate the url for an event object. """ possibilities = random.sample( [self.name, self.date, self.place], 3 ) return user.search_engine.search_url(" ".join(possibilities)) class BrowserFingerprint(models.Model): ''' A browser fingerprint, containing things like a user agent ''' description = models.CharField(max_length=256) useragent = models.CharField(max_length=256) appname = models.CharField(max_length=256) appversion = models.CharField(max_length=256) platform = models.CharField(max_length=256) vendor = models.CharField(max_length=256) vendorsub = models.CharField(max_length=256) buildID = models.CharField(max_length=256) oscpu = models.CharField(max_length=256) accept_encoding = models.CharField(max_length=256) accept_default = models.CharField(max_length=256) accept_lang = models.CharField(max_length=256) pixeldepth = models.IntegerField() colordepth = models.IntegerField() screens = models.CharField(max_length=256) def __str__(self): return self.description def serialize_headers(self): return { "Description": str(self.description), "User-Agent": str(self.useragent), "Accept-Encoding": str(self.accept_encoding), "Accept": str(self.accept_default), "Accept-Language": str(self.accept_lang), } class SearchEngine(models.Model): ''' A search engine, and all the data needed to use it ''' name = models.CharField(max_length=256) url = models.URLField() query_pattern = models.CharField(max_length=256) # This field is the # query pattern. It should contain a `{}`, which, when substituted with a # search term (using `.format()`), must yield a URL tail that can be # concatenated with `url` to perform a search (eg. `?q={}` for ddg). def __str__(self): return self.name def search_url(self, search_term): ''' Obtain a url to search `search_term` with this search engine ''' pattern = str(self.query_pattern) search_term = str(search_term).replace(' ', '+') if '{}' not in pattern: raise InvalidData("Search engine {}: bad pattern".format(self)) return self.url + (str(self.query_pattern).format(search_term)) class Interest(models.Model): ''' A class of interests ''' name = models.CharField(max_length=256) keywords = models.ManyToManyField(Keyword) places = models.ManyToManyField(Place) websites = models.ManyToManyField(Website) events = models.ManyToManyField(Event) def __str__(self): return self.name class Profile(models.Model): ''' Represents a user profile, containing a few data to make their requests consistent ''' nick = models.CharField(max_length=64, help_text="The user's online identity") first_name = models.CharField(max_length=64) last_name = models.CharField(max_length=64) email = models.EmailField() uses_urls = models.BooleanField( help_text=('Does the user usually go to a given website using its url ' 'or searching it in a search engine?')) interests = models.ManyToManyField(Interest) search_engine = models.ForeignKey(SearchEngine, on_delete=models.CASCADE) browser_fingerprint = models.ForeignKey(BrowserFingerprint, on_delete=models.CASCADE) def generate_email(nick, first_name, last_name): domain = random.choice(EMAIL_DOMAINS) if random.random() < 0.3: email = first_name + "." + last_name + "@" + domain else: email = nick + "@" + domain return email @require_nicknames def create_profile(nick=None): nick = "".join(random.sample(NICKNAMES, random.randrange(2, 5))) first_name = random.choice(FIRSTNAMES) last_name = random.choice(LASTNAMES) email = generate_email(nick, first_name, last_name) profile = Profile( nick=nick, first_name=first_name, last_name=last_name, email=email, uses_urls=(random.random() < 0.5), ) profile.search_engine = random.choice(SearchEngine.objects.all()) profile.browser_fingerprint = random.choice(BrowserFingerprint.objects.all()) profile.full_clean() profile.save() profile.interests.add(random.choice(Interest.objects.all())) profile.save() return profile