Going on in the generation of history
This commit is contained in:
parent
7c13ee17d4
commit
4920de5838
1 changed files with 43 additions and 10 deletions
|
@ -3,10 +3,11 @@ entries, which looks like human-based browsing, according to a dedicated user
|
|||
interests, keywords...
|
||||
"""
|
||||
|
||||
from random import weibullvariate
|
||||
import random
|
||||
from math import floor
|
||||
from django.db import models
|
||||
from profiles.models import Profile
|
||||
import profiles.models as profiles
|
||||
from crawl import crawl
|
||||
from pinocchio.settings import HISTORY_MIN
|
||||
|
||||
class HistoryEntry(models.Model):
|
||||
|
@ -36,7 +37,7 @@ class History(models.Model):
|
|||
)
|
||||
played = models.BooleanField(default=False)
|
||||
user = models.ForeignKey(
|
||||
Profile,
|
||||
profiles.Profile,
|
||||
on_delete=models.CASCADE
|
||||
)
|
||||
|
||||
|
@ -61,6 +62,35 @@ class History(models.Model):
|
|||
self.save()
|
||||
|
||||
|
||||
def generate_partial_history(user, t_start, url, history):
|
||||
""" Generate the part of the history resulting from the crawl starting at
|
||||
the given url.
|
||||
"""
|
||||
#crawler = crawl.CrawlingThread()
|
||||
return []
|
||||
|
||||
def generate_first_url(user):
|
||||
""" Generate the first url of a partial history, based on the user
|
||||
information. """
|
||||
interest = random.choice(
|
||||
[user.interests.keywords.all(), user.interests.places.all(),
|
||||
user.interests.websites.all(), user.interests.events.all()
|
||||
]
|
||||
)
|
||||
search_term = random.choice(interset)
|
||||
if isinstance(search_term, profiles.Website):
|
||||
if user.uses_url:
|
||||
url = search_term.url
|
||||
elif random.random() <= 0.1:
|
||||
url = random.choice(search_term.notable_pages).url
|
||||
elif random.random() >= 0.3:
|
||||
search_term_text = search_term.name + " " + random.choice(search_term.keywords)
|
||||
url = user.search_engine.search_url(search_term_text)
|
||||
#if
|
||||
#elif isinstance(search_term, profiles.Website):
|
||||
# url = user.search_engine.search_url(search_term)
|
||||
|
||||
|
||||
|
||||
def generate_history(user, ts_start):
|
||||
""" Generate a new history for the user `user`, starting from timestamp
|
||||
|
@ -69,16 +99,19 @@ def generate_history(user, ts_start):
|
|||
is actually played by a user.
|
||||
"""
|
||||
|
||||
# let's defin a new history object.
|
||||
# let's define a new history object.
|
||||
history = History(start_ts=ts_start, user=user)
|
||||
length = HISTORY_MIN + floor(10 * weibullvariate(1, 1.5))
|
||||
length = HISTORY_MIN + floor(10 * random.weibullvariate(1, 1.5))
|
||||
|
||||
history_line = 0
|
||||
|
||||
while history_line < length:
|
||||
ts_start += random.uniform(1, 10)
|
||||
new_line = HistoryEntry(
|
||||
search="https://google.com:",
|
||||
timestamp=ts_start,
|
||||
history=history
|
||||
)
|
||||
history = generate_partial_history(user, ts_start, url)
|
||||
ts_start = history[-1].timestamp + 5 * weilbullvariate(1, 5)
|
||||
for (url, ts) in history:
|
||||
new_line = HistoryEntry(
|
||||
search=url,
|
||||
timestamp=ts,
|
||||
history=history
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue