mpri-webdam/histories/tor_runner.py

121 lines
3.6 KiB
Python
Raw Normal View History

2018-02-25 19:42:58 +01:00
"""
Modules that handles tor instaces creations in order to safely run histories
"""
import shutil
2018-02-26 09:49:24 +01:00
import datetime as dt
from time import sleep
2018-02-25 19:42:58 +01:00
import asyncio
2018-02-25 21:27:15 +01:00
import aiohttp
from aiosocks.connector import ProxyConnector, ProxyClientRequest
import async_timeout
2018-02-25 21:34:20 +01:00
import stem.process as tor
2018-02-25 19:42:58 +01:00
class TorInstance():
"""
A tor instance object, with some useful information.
It is designed to be used as a worker in order to replay an history.
"""
BASE_SOCKS_PORT = 40000
BASE_CONTROL_PORT = 20000
BASE_DATA_DIR = "/tmp/tor{}/"
TOR_RUNNER = 0
@classmethod
2018-02-25 23:56:51 +01:00
async def create(cls, history, headers):
2018-02-25 19:42:58 +01:00
""" Factory creation of tor processes"""
socks_port = cls.BASE_SOCKS_PORT + cls.TOR_RUNNER
control_port = cls.BASE_CONTROL_PORT + cls.TOR_RUNNER
data_dir = cls.BASE_DATA_DIR.format(cls.TOR_RUNNER)
TorInstance.TOR_RUNNER += 1
self = TorInstance()
self.socks_port = socks_port
self.control_port = control_port
self.data_dir = data_dir
2018-02-25 21:27:15 +01:00
self.history = history
2018-02-25 23:56:51 +01:00
self.headers = headers
2018-02-25 21:27:15 +01:00
self.proxy = "socks5://127.0.0.1:{}".format(self.socks_port)
2018-02-25 21:34:20 +01:00
self.create_session()
2018-02-25 19:42:58 +01:00
self.process = tor.launch_tor_with_config(
2018-02-25 21:34:20 +01:00
config={
2018-02-25 19:42:58 +01:00
'ControlPort' : str(control_port),
'SocksPort' : str(socks_port),
'DataDir' : data_dir
}
)
return self
2018-02-25 21:34:20 +01:00
def __init__(self):
self.socks_port = 0
self.control_port = 0
self.data_dir = ""
self.history = None
self.proxy = ""
2018-02-25 23:56:51 +01:00
self.headers = {}
2018-02-25 21:34:20 +01:00
self.session = None
self.process = None
2018-02-25 21:59:29 +01:00
async def run(self):
""" Runs the Tor Instance on the history.
"""
2018-02-26 09:49:24 +01:00
while (self.history[0][1] - dt.datetime.now()).total_seconds >= 10:
print("Sleeping")
sleep(10)
while self.history:
item = self.history.pop(0)
async with async_timeout.timeout(30):
await(self.query(item[0]))
2018-02-26 09:49:24 +01:00
now = dt.datetime.now()
if now <= self.history[0][1]:
sleep((self.history[0][1] - now).total_seconds())
2018-02-25 21:34:20 +01:00
2018-02-25 21:27:15 +01:00
def create_session(self):
2018-02-25 21:34:20 +01:00
""" Create a aiohttp session.
"""
2018-02-25 21:27:15 +01:00
conn = ProxyConnector(remote_resolve=True)
2018-02-25 21:34:20 +01:00
self.session = aiohttp.ClientSession(
2018-02-25 21:27:15 +01:00
connector=conn,
2018-02-25 23:56:51 +01:00
headers=self.headers,
2018-02-25 21:27:15 +01:00
request_class=ProxyClientRequest
)
2018-02-25 21:34:20 +01:00
2018-02-25 21:27:15 +01:00
async def query(self, url):
2018-02-25 21:34:20 +01:00
""" Performs a query.
"""
2018-02-25 21:27:15 +01:00
async with async_timeout.timeout(30):
async with self.session.get(
url,
proxy=self.proxy,
proxy_auth=None) as resp:
try:
return await resp.text()
except UnicodeDecodeError:
return None
2018-02-25 19:42:58 +01:00
def __str__(self):
""" Utility function """
2018-02-25 21:27:15 +01:00
return ('[TOR] SOCKSPort: {0.socks_port}, ControlPort: '
2018-02-25 21:34:20 +01:00
'{0.control_port}, DataDir: {0.data_dir}'.format(self))
2018-02-25 19:42:58 +01:00
async def kill(self):
""" Kills the process and remove the data dir"""
self.process.kill()
2018-02-25 21:27:15 +01:00
self.session.close()
2018-02-25 19:42:58 +01:00
shutil.rmtree(self.data_dir)
2018-02-25 21:27:15 +01:00
2018-02-25 19:42:58 +01:00
async def main():
""" Test function """
2018-02-25 21:34:20 +01:00
for _ in range(3):
2018-02-25 23:56:51 +01:00
instance = await TorInstance.create(None, {"user-agent" : "Blah"})
2018-02-25 21:34:20 +01:00
await instance.query("https://python.org/")
2018-02-25 21:27:15 +01:00
print("One page received")
2018-02-25 21:34:20 +01:00
await instance.kill()
2018-02-25 19:42:58 +01:00
if __name__ == "__main__":
2018-02-25 21:34:20 +01:00
LOOP = asyncio.get_event_loop()
LOOP.run_until_complete(main())