Compare commits
No commits in common. "master" and "histories_models" have entirely different histories.
master
...
histories_
6 changed files with 15 additions and 54 deletions
|
@ -1,6 +1,3 @@
|
||||||
# mpri-webdam
|
# mpri-webdam
|
||||||
|
|
||||||
Generate realistic fake browsing histories for borderline and/or activists
|
Générer tout plein de faux historiques. Parce qu'il faut bien valider ce cours.
|
||||||
users, to hide real traffic from global surveillance.
|
|
||||||
|
|
||||||
Lacks proper documentation at the moment `:(`
|
|
||||||
|
|
|
@ -13,13 +13,6 @@
|
||||||
"query_pattern":"?q={}"
|
"query_pattern":"?q={}"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"searchengine": {
|
|
||||||
"name":"Duckduckgo Lite",
|
|
||||||
"url":"https://duckduckgo.com/lite/",
|
|
||||||
"query_pattern":"?q={}"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"searchengine": {
|
"searchengine": {
|
||||||
"name":"Qwant",
|
"name":"Qwant",
|
||||||
|
|
|
@ -1,16 +0,0 @@
|
||||||
from django.core.management.base import BaseCommand
|
|
||||||
from profiles import models as profiles
|
|
||||||
from histories.models import generate_history
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
|
||||||
''' Generates an history and prints the related XML '''
|
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def handle(self, *args, **kwargs):
|
|
||||||
prof = profiles.Profile.objects.all()[0]
|
|
||||||
history = generate_history(prof, datetime.now())
|
|
||||||
print(history.to_xml_string())
|
|
|
@ -5,7 +5,6 @@ interests, keywords...
|
||||||
|
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import random
|
import random
|
||||||
import asyncio
|
|
||||||
from math import floor
|
from math import floor
|
||||||
from xml.etree import ElementTree as ET
|
from xml.etree import ElementTree as ET
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
@ -44,9 +43,9 @@ class HistoryEntry(models.Model):
|
||||||
def to_xml(self, xml_root):
|
def to_xml(self, xml_root):
|
||||||
entry = ET.Element('history')
|
entry = ET.Element('history')
|
||||||
entry_url = ET.Element('url')
|
entry_url = ET.Element('url')
|
||||||
entry_url.text = str(self.search)
|
entry_url.text = self.search
|
||||||
entry_ts = ET.Element('timestamp')
|
entry_ts = ET.Element('timestamp')
|
||||||
entry_ts.text = str(self.timestamp.timestamp())
|
entry_ts.text = self.timestamp.timestamp()
|
||||||
entry.append(entry_url)
|
entry.append(entry_url)
|
||||||
entry.append(entry_ts)
|
entry.append(entry_ts)
|
||||||
xml_root.append(entry)
|
xml_root.append(entry)
|
||||||
|
@ -108,18 +107,13 @@ class History(models.Model):
|
||||||
output += str(entry) + '\n'
|
output += str(entry) + '\n'
|
||||||
return output
|
return output
|
||||||
|
|
||||||
async def _handler(self):
|
async def play_histories(self):
|
||||||
runner = await TorInstance.create(self.return_history(), self.user.browser_fingerprint.serialize_headers())
|
|
||||||
await runner.run()
|
|
||||||
self.played = True
|
|
||||||
self.save()
|
|
||||||
|
|
||||||
def play_histories(self):
|
|
||||||
""" Actually plays the history.
|
""" Actually plays the history.
|
||||||
"""
|
"""
|
||||||
loop = asyncio.new_event_loop()
|
self.played = True
|
||||||
asyncio.set_event_loop(loop)
|
runner = await TorInstance.create(self.return_history(), self.user.browser_fingerprint.serialize_headers())
|
||||||
loop.run_until_complete(asyncio.wait([self._handler()]))
|
runner.run()
|
||||||
|
self.save()
|
||||||
|
|
||||||
def to_xml(self, xml_root=None):
|
def to_xml(self, xml_root=None):
|
||||||
''' Exports the current history to xml '''
|
''' Exports the current history to xml '''
|
||||||
|
@ -129,9 +123,9 @@ class History(models.Model):
|
||||||
xml_root = ET.Element('root')
|
xml_root = ET.Element('root')
|
||||||
|
|
||||||
hist_node = ET.Element("history", attrib={
|
hist_node = ET.Element("history", attrib={
|
||||||
'start-ts': str(self.start_ts),
|
'start-ts': self.start_ts,
|
||||||
'played': '1' if self.played else '0',
|
'played': 1 if self.played else 0,
|
||||||
'user': str(self.user.pk),
|
'user': self.user.pk,
|
||||||
})
|
})
|
||||||
xml_root.append(hist_node)
|
xml_root.append(hist_node)
|
||||||
for entry in self.historyentry_set.all():
|
for entry in self.historyentry_set.all():
|
||||||
|
@ -140,10 +134,6 @@ class History(models.Model):
|
||||||
if standalone:
|
if standalone:
|
||||||
return xml_root
|
return xml_root
|
||||||
|
|
||||||
def to_xml_string(self):
|
|
||||||
xml = self.to_xml()
|
|
||||||
return ET.tostring(xml)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_xml(xml_root):
|
def from_xml(xml_root):
|
||||||
''' Loads an history from an XML file '''
|
''' Loads an history from an XML file '''
|
||||||
|
@ -187,6 +177,7 @@ def generate_partial_history(user, t_start):
|
||||||
timestamp = t_start
|
timestamp = t_start
|
||||||
result = []
|
result = []
|
||||||
basis = generate_first_url(user)
|
basis = generate_first_url(user)
|
||||||
|
result.append(PartialHistoryEntry(basis, timestamp))
|
||||||
t_start += 5 * random.weibullvariate(1, 1.5)
|
t_start += 5 * random.weibullvariate(1, 1.5)
|
||||||
crawler = crawl.CrawlingThread(basis)
|
crawler = crawl.CrawlingThread(basis)
|
||||||
crawler.start()
|
crawler.start()
|
||||||
|
|
|
@ -58,9 +58,7 @@ class TorInstance():
|
||||||
async def run(self):
|
async def run(self):
|
||||||
""" Runs the Tor Instance on the history.
|
""" Runs the Tor Instance on the history.
|
||||||
"""
|
"""
|
||||||
while (self.history) and (dt.datetime.combine(self.history[0][1],
|
while (self.history[0][1] - dt.datetime.now()).total_seconds >= 10:
|
||||||
dt.datetime.min.time()) -
|
|
||||||
dt.datetime.now()).total_seconds() >= 10:
|
|
||||||
print("Sleeping")
|
print("Sleeping")
|
||||||
sleep(10)
|
sleep(10)
|
||||||
while self.history:
|
while self.history:
|
||||||
|
@ -68,9 +66,8 @@ class TorInstance():
|
||||||
async with async_timeout.timeout(30):
|
async with async_timeout.timeout(30):
|
||||||
await(self.query(item[0]))
|
await(self.query(item[0]))
|
||||||
now = dt.datetime.now()
|
now = dt.datetime.now()
|
||||||
print(self.history[0])
|
if now <= self.history[0][1]:
|
||||||
if now <= dt.datetime.combine(self.history[0][1], dt.datetime.min.time()):
|
sleep((self.history[0][1] - now).total_seconds())
|
||||||
sleep((dt.datetime.combine(self.history[0][1], dt.datetime.min.time()) - now).total_seconds())
|
|
||||||
|
|
||||||
|
|
||||||
def create_session(self):
|
def create_session(self):
|
||||||
|
|
|
@ -16,4 +16,3 @@ stem==1.6.0
|
||||||
pycurl==7.43.0.1
|
pycurl==7.43.0.1
|
||||||
rdflib==4.2.2
|
rdflib==4.2.2
|
||||||
git+https://github.com/tobast/RDFSerializer.git
|
git+https://github.com/tobast/RDFSerializer.git
|
||||||
aiosocks==0.2.6
|
|
||||||
|
|
Loading…
Reference in a new issue