diff --git a/histories/models.py b/histories/models.py index df542f3..bd696dc 100644 --- a/histories/models.py +++ b/histories/models.py @@ -3,10 +3,11 @@ entries, which looks like human-based browsing, according to a dedicated user interests, keywords... """ -from random import weibullvariate +import random from math import floor from django.db import models -from profiles.models import Profile +import profiles.models as profiles +from crawl import crawl from pinocchio.settings import HISTORY_MIN class HistoryEntry(models.Model): @@ -36,7 +37,7 @@ class History(models.Model): ) played = models.BooleanField(default=False) user = models.ForeignKey( - Profile, + profiles.Profile, on_delete=models.CASCADE ) @@ -61,6 +62,35 @@ class History(models.Model): self.save() +def generate_partial_history(user, t_start, url, history): + """ Generate the part of the history resulting from the crawl starting at + the given url. + """ + #crawler = crawl.CrawlingThread() + return [] + +def generate_first_url(user): + """ Generate the first url of a partial history, based on the user + information. """ + interest = random.choice( + [user.interests.keywords.all(), user.interests.places.all(), + user.interests.websites.all(), user.interests.events.all() + ] + ) + search_term = random.choice(interset) + if isinstance(search_term, profiles.Website): + if user.uses_url: + url = search_term.url + elif random.random() <= 0.1: + url = random.choice(search_term.notable_pages).url + elif random.random() >= 0.3: + search_term_text = search_term.name + " " + random.choice(search_term.keywords) + url = user.search_engine.search_url(search_term_text) + #if + #elif isinstance(search_term, profiles.Website): + # url = user.search_engine.search_url(search_term) + + def generate_history(user, ts_start): """ Generate a new history for the user `user`, starting from timestamp @@ -69,16 +99,19 @@ def generate_history(user, ts_start): is actually played by a user. """ - # let's defin a new history object. + # let's define a new history object. history = History(start_ts=ts_start, user=user) - length = HISTORY_MIN + floor(10 * weibullvariate(1, 1.5)) + length = HISTORY_MIN + floor(10 * random.weibullvariate(1, 1.5)) history_line = 0 while history_line < length: ts_start += random.uniform(1, 10) - new_line = HistoryEntry( - search="https://google.com:", - timestamp=ts_start, - history=history - ) + history = generate_partial_history(user, ts_start, url) + ts_start = history[-1].timestamp + 5 * weilbullvariate(1, 5) + for (url, ts) in history: + new_line = HistoryEntry( + search=url, + timestamp=ts, + history=history + )