""" Models for the history. This history should be able to generate history entries, which looks like human-based browsing, according to a dedicated user interests, keywords... """ import random from math import floor from queue import Queue from django.db import models import profiles.models as profiles from tor_runner import TorInstance from crawl import crawl from pinocchio.settings import HISTORY_MIN class HistoryEntry(models.Model): """ A history entry, aka a url, and a timestamp. """ search = models.URLField(help_text="The url to be searched") timestamp = models.DateTimeField() history = models.ForeignKey( 'History', on_delete=models.CASCADE ) def __str__(self): """ Returns the string representation of a history entry. """ return "{} : {}".format(self.timestamp, self.search) class History(models.Model): """ A history for a user, containing some web connections (http, https). Each history is timed, in a human-behaviour manner. """ start_ts = models.DateTimeField( help_text='The starting timestamp of the history. Useful for cron-like ' 'structure.' ) played = models.BooleanField(default=False) user = models.ForeignKey( profiles.Profile, on_delete=models.CASCADE ) def return_history(self): """ Returns the history, sorted by increasing timestamps """ history_set = self.history_set.order_by('timestamp') history_set = [(item.search, item.timestamp.date()) for item in history_set] return history_set def __str__(self): """ Returns the string representation of a history. """ history_set = self.history_set.order_by('timestamp') header = "[History]:\n" return header + "\n".join(history_set) def play_histories(self): """ Actually plays the history. """ self.played = True runner = TorInstance(self.history) self.save() def generate_partial_history(user, t_start): """ Generate the part of the history resulting from the crawl starting at the given url. """ timestamp = t_start result = [] basis = generate_first_url(user) result.append((basis, timestamp)) timestamp += 5* random.weibullvariate(1, 1.5) queue = Queue() search_engine_query = profiles.SearchEngine.objects.all() search_engine_list = [item.url for item in search_engine_query] crawler = crawl.CrawlingThread(user, basis, search_engine_list, queue) crawler.start() crawler.join() urls = queue.get() for url in urls: timestamp += 5* random.weibullvariate(1, 1.5) result.append((url, timestamp)) return result def generate_first_url(user): """ Generate the first url of a partial history, based on the user information. """ interest = random.choice( [user.interests.keywords.all(), user.interests.places.all(), user.interests.websites.all(), user.interests.events.all() ] ) search_term = random.choice(interest) url = search_term.generate_url(user) return url def generate_history(user, ts_start): """ Generate a new history for the user `user`, starting from timestamp `ts_start`. A few heuristics are used in order to give the impression that the history is actually played by a user. """ # let's define a new history object. history = History(start_ts=ts_start, user=user) length = HISTORY_MIN + floor(10 * random.weibullvariate(1, 1.5)) history_line = 0 while history_line < length: ts_start += 5 * random.weibullvariate(1, 2.8) history_list = generate_partial_history(user, ts_start) ts_start = history_list[-1].timestamp + 5 * random.weibullvariate(1, 5) for (url, timestamp) in history_list: new_line = HistoryEntry( search=url, timestamp=timestamp, history=history ) new_line.save()