Going on in the generation of history

This commit is contained in:
Rémi Oudin 2018-02-20 23:42:21 +01:00
parent 7c13ee17d4
commit 4920de5838
1 changed files with 43 additions and 10 deletions

View File

@ -3,10 +3,11 @@ entries, which looks like human-based browsing, according to a dedicated user
interests, keywords...
"""
from random import weibullvariate
import random
from math import floor
from django.db import models
from profiles.models import Profile
import profiles.models as profiles
from crawl import crawl
from pinocchio.settings import HISTORY_MIN
class HistoryEntry(models.Model):
@ -36,7 +37,7 @@ class History(models.Model):
)
played = models.BooleanField(default=False)
user = models.ForeignKey(
Profile,
profiles.Profile,
on_delete=models.CASCADE
)
@ -61,6 +62,35 @@ class History(models.Model):
self.save()
def generate_partial_history(user, t_start, url, history):
""" Generate the part of the history resulting from the crawl starting at
the given url.
"""
#crawler = crawl.CrawlingThread()
return []
def generate_first_url(user):
""" Generate the first url of a partial history, based on the user
information. """
interest = random.choice(
[user.interests.keywords.all(), user.interests.places.all(),
user.interests.websites.all(), user.interests.events.all()
]
)
search_term = random.choice(interset)
if isinstance(search_term, profiles.Website):
if user.uses_url:
url = search_term.url
elif random.random() <= 0.1:
url = random.choice(search_term.notable_pages).url
elif random.random() >= 0.3:
search_term_text = search_term.name + " " + random.choice(search_term.keywords)
url = user.search_engine.search_url(search_term_text)
#if
#elif isinstance(search_term, profiles.Website):
# url = user.search_engine.search_url(search_term)
def generate_history(user, ts_start):
""" Generate a new history for the user `user`, starting from timestamp
@ -69,16 +99,19 @@ def generate_history(user, ts_start):
is actually played by a user.
"""
# let's defin a new history object.
# let's define a new history object.
history = History(start_ts=ts_start, user=user)
length = HISTORY_MIN + floor(10 * weibullvariate(1, 1.5))
length = HISTORY_MIN + floor(10 * random.weibullvariate(1, 1.5))
history_line = 0
while history_line < length:
ts_start += random.uniform(1, 10)
new_line = HistoryEntry(
search="https://google.com:",
timestamp=ts_start,
history=history
)
history = generate_partial_history(user, ts_start, url)
ts_start = history[-1].timestamp + 5 * weilbullvariate(1, 5)
for (url, ts) in history:
new_line = HistoryEntry(
search=url,
timestamp=ts,
history=history
)