""" Models for the history. This history should be able to generate history entries, which looks like human-based browsing, according to a dedicated user interests, keywords... """ import random from math import floor from queue import Queue from django.db import models import profiles.models as profiles from crawl import crawl from pinocchio.settings import HISTORY_MIN class HistoryEntry(models.Model): """ A history entry, aka a url, and a timestamp. """ search = models.URLField(help_text="The url to be searched") timestamp = models.DateTimeField() history = models.ForeignKey( 'History', on_delete=models.CASCADE ) def __str__(self): """ Returns the string representation of a history entry. """ return "{} : {}".format(self.timestamp, self.search) class History(models.Model): """ A history for a user, containing some web connections (http, https). Each history is timed, in a human-behaviour manner. """ start_ts = models.DateTimeField( help_text='The starting timestamp of the history. Useful for cron-like ' 'structure.' ) played = models.BooleanField(default=False) user = models.ForeignKey( profiles.Profile, on_delete=models.CASCADE ) def return_history(self): """ Returns the history, sorted by increasing timestamps """ history_set = self.history_set.order_by('timestamp') return history_set def __str__(self): """ Returns the string representation of a history. """ history_set = self.history_set.order_by('timestamp') header = "[History]:\n" return header + "\n".join(history_set) def play_history(self): """ Actually plays the history. """ self.played = True self.save() def generate_partial_history(user, t_start): """ Generate the part of the history resulting from the crawl starting at the given url. """ result = [] basis = generate_first_url(user) result.append((basis, t_start)) t_start += 5* random.weibullvariate(1, 1.5) queue = Queue() crawler = crawl.CrawlingThread(user, basis, queue) crawler.start() crawler.join() urls = queue.get() for url in urls: t_start += 5* random.weibullvariate(1, 1.5) result.append((url, t_start) return result def generate_first_url(user): """ Generate the first url of a partial history, based on the user information. """ interest = random.choice( [user.interests.keywords.all(), user.interests.places.all(), user.interests.websites.all(), user.interests.events.all() ] ) search_term = random.choice(interest) url = search_term.generate_url(user) return url def generate_history(user, ts_start): """ Generate a new history for the user `user`, starting from timestamp `ts_start`. A few heuristics are used in order to give the impression that the history is actually played by a user. """ # let's define a new history object. history = History(start_ts=ts_start, user=user) length = HISTORY_MIN + floor(10 * random.weibullvariate(1, 1.5)) history_line = 0 while history_line < length: ts_start += 5 * random.weibullvariate(1, 2.8) history_list = generate_partial_history(user, ts_start) ts_start = history_list[-1].timestamp + 5 * random.weibullvariate(1, 5) for (url, timestamp) in history_list: new_line = HistoryEntry( search=url, timestamp=timestamp, history=history ) new_line.save()