Going on in the generation of history

This commit is contained in:
Rémi Oudin 2018-02-20 23:42:21 +01:00
parent 7c13ee17d4
commit 4920de5838

View file

@ -3,10 +3,11 @@ entries, which looks like human-based browsing, according to a dedicated user
interests, keywords... interests, keywords...
""" """
from random import weibullvariate import random
from math import floor from math import floor
from django.db import models from django.db import models
from profiles.models import Profile import profiles.models as profiles
from crawl import crawl
from pinocchio.settings import HISTORY_MIN from pinocchio.settings import HISTORY_MIN
class HistoryEntry(models.Model): class HistoryEntry(models.Model):
@ -36,7 +37,7 @@ class History(models.Model):
) )
played = models.BooleanField(default=False) played = models.BooleanField(default=False)
user = models.ForeignKey( user = models.ForeignKey(
Profile, profiles.Profile,
on_delete=models.CASCADE on_delete=models.CASCADE
) )
@ -61,6 +62,35 @@ class History(models.Model):
self.save() self.save()
def generate_partial_history(user, t_start, url, history):
""" Generate the part of the history resulting from the crawl starting at
the given url.
"""
#crawler = crawl.CrawlingThread()
return []
def generate_first_url(user):
""" Generate the first url of a partial history, based on the user
information. """
interest = random.choice(
[user.interests.keywords.all(), user.interests.places.all(),
user.interests.websites.all(), user.interests.events.all()
]
)
search_term = random.choice(interset)
if isinstance(search_term, profiles.Website):
if user.uses_url:
url = search_term.url
elif random.random() <= 0.1:
url = random.choice(search_term.notable_pages).url
elif random.random() >= 0.3:
search_term_text = search_term.name + " " + random.choice(search_term.keywords)
url = user.search_engine.search_url(search_term_text)
#if
#elif isinstance(search_term, profiles.Website):
# url = user.search_engine.search_url(search_term)
def generate_history(user, ts_start): def generate_history(user, ts_start):
""" Generate a new history for the user `user`, starting from timestamp """ Generate a new history for the user `user`, starting from timestamp
@ -69,16 +99,19 @@ def generate_history(user, ts_start):
is actually played by a user. is actually played by a user.
""" """
# let's defin a new history object. # let's define a new history object.
history = History(start_ts=ts_start, user=user) history = History(start_ts=ts_start, user=user)
length = HISTORY_MIN + floor(10 * weibullvariate(1, 1.5)) length = HISTORY_MIN + floor(10 * random.weibullvariate(1, 1.5))
history_line = 0 history_line = 0
while history_line < length: while history_line < length:
ts_start += random.uniform(1, 10) ts_start += random.uniform(1, 10)
history = generate_partial_history(user, ts_start, url)
ts_start = history[-1].timestamp + 5 * weilbullvariate(1, 5)
for (url, ts) in history:
new_line = HistoryEntry( new_line = HistoryEntry(
search="https://google.com:", search=url,
timestamp=ts_start, timestamp=ts,
history=history history=history
) )