Compare commits

..

15 commits

6 changed files with 54 additions and 15 deletions

View file

@ -1,3 +1,6 @@
# mpri-webdam # mpri-webdam
Générer tout plein de faux historiques. Parce qu'il faut bien valider ce cours. Generate realistic fake browsing histories for borderline and/or activists
users, to hide real traffic from global surveillance.
Lacks proper documentation at the moment `:(`

View file

@ -13,6 +13,13 @@
"query_pattern":"?q={}" "query_pattern":"?q={}"
} }
}, },
{
"searchengine": {
"name":"Duckduckgo Lite",
"url":"https://duckduckgo.com/lite/",
"query_pattern":"?q={}"
}
},
{ {
"searchengine": { "searchengine": {
"name":"Qwant", "name":"Qwant",

View file

@ -0,0 +1,16 @@
from django.core.management.base import BaseCommand
from profiles import models as profiles
from histories.models import generate_history
from datetime import datetime
class Command(BaseCommand):
''' Generates an history and prints the related XML '''
def add_arguments(self, parser):
pass
def handle(self, *args, **kwargs):
prof = profiles.Profile.objects.all()[0]
history = generate_history(prof, datetime.now())
print(history.to_xml_string())

View file

@ -5,6 +5,7 @@ interests, keywords...
from collections import namedtuple from collections import namedtuple
import random import random
import asyncio
from math import floor from math import floor
from xml.etree import ElementTree as ET from xml.etree import ElementTree as ET
from datetime import datetime from datetime import datetime
@ -43,9 +44,9 @@ class HistoryEntry(models.Model):
def to_xml(self, xml_root): def to_xml(self, xml_root):
entry = ET.Element('history') entry = ET.Element('history')
entry_url = ET.Element('url') entry_url = ET.Element('url')
entry_url.text = self.search entry_url.text = str(self.search)
entry_ts = ET.Element('timestamp') entry_ts = ET.Element('timestamp')
entry_ts.text = self.timestamp.timestamp() entry_ts.text = str(self.timestamp.timestamp())
entry.append(entry_url) entry.append(entry_url)
entry.append(entry_ts) entry.append(entry_ts)
xml_root.append(entry) xml_root.append(entry)
@ -107,13 +108,18 @@ class History(models.Model):
output += str(entry) + '\n' output += str(entry) + '\n'
return output return output
async def play_histories(self): async def _handler(self):
runner = await TorInstance.create(self.return_history(), self.user.browser_fingerprint.serialize_headers())
await runner.run()
self.played = True
self.save()
def play_histories(self):
""" Actually plays the history. """ Actually plays the history.
""" """
self.played = True loop = asyncio.new_event_loop()
runner = await TorInstance.create(self.return_history(), self.user.browser_fingerprint.serialize_headers()) asyncio.set_event_loop(loop)
runner.run() loop.run_until_complete(asyncio.wait([self._handler()]))
self.save()
def to_xml(self, xml_root=None): def to_xml(self, xml_root=None):
''' Exports the current history to xml ''' ''' Exports the current history to xml '''
@ -123,9 +129,9 @@ class History(models.Model):
xml_root = ET.Element('root') xml_root = ET.Element('root')
hist_node = ET.Element("history", attrib={ hist_node = ET.Element("history", attrib={
'start-ts': self.start_ts, 'start-ts': str(self.start_ts),
'played': 1 if self.played else 0, 'played': '1' if self.played else '0',
'user': self.user.pk, 'user': str(self.user.pk),
}) })
xml_root.append(hist_node) xml_root.append(hist_node)
for entry in self.historyentry_set.all(): for entry in self.historyentry_set.all():
@ -134,6 +140,10 @@ class History(models.Model):
if standalone: if standalone:
return xml_root return xml_root
def to_xml_string(self):
xml = self.to_xml()
return ET.tostring(xml)
@staticmethod @staticmethod
def from_xml(xml_root): def from_xml(xml_root):
''' Loads an history from an XML file ''' ''' Loads an history from an XML file '''
@ -177,7 +187,6 @@ def generate_partial_history(user, t_start):
timestamp = t_start timestamp = t_start
result = [] result = []
basis = generate_first_url(user) basis = generate_first_url(user)
result.append(PartialHistoryEntry(basis, timestamp))
t_start += 5 * random.weibullvariate(1, 1.5) t_start += 5 * random.weibullvariate(1, 1.5)
crawler = crawl.CrawlingThread(basis) crawler = crawl.CrawlingThread(basis)
crawler.start() crawler.start()

View file

@ -58,7 +58,9 @@ class TorInstance():
async def run(self): async def run(self):
""" Runs the Tor Instance on the history. """ Runs the Tor Instance on the history.
""" """
while (self.history[0][1] - dt.datetime.now()).total_seconds >= 10: while (self.history) and (dt.datetime.combine(self.history[0][1],
dt.datetime.min.time()) -
dt.datetime.now()).total_seconds() >= 10:
print("Sleeping") print("Sleeping")
sleep(10) sleep(10)
while self.history: while self.history:
@ -66,8 +68,9 @@ class TorInstance():
async with async_timeout.timeout(30): async with async_timeout.timeout(30):
await(self.query(item[0])) await(self.query(item[0]))
now = dt.datetime.now() now = dt.datetime.now()
if now <= self.history[0][1]: print(self.history[0])
sleep((self.history[0][1] - now).total_seconds()) if now <= dt.datetime.combine(self.history[0][1], dt.datetime.min.time()):
sleep((dt.datetime.combine(self.history[0][1], dt.datetime.min.time()) - now).total_seconds())
def create_session(self): def create_session(self):

View file

@ -16,3 +16,4 @@ stem==1.6.0
pycurl==7.43.0.1 pycurl==7.43.0.1
rdflib==4.2.2 rdflib==4.2.2
git+https://github.com/tobast/RDFSerializer.git git+https://github.com/tobast/RDFSerializer.git
aiosocks==0.2.6