mpri-webdam/histories/tor_runner.py

116 lines
3.3 KiB
Python
Raw Normal View History

2018-02-25 19:42:58 +01:00
"""
Modules that handles tor instaces creations in order to safely run histories
"""
import shutil
import asyncio
2018-02-25 21:27:15 +01:00
import aiohttp
from aiosocks.connector import ProxyConnector, ProxyClientRequest
from time import sleep
2018-02-25 21:27:15 +01:00
import async_timeout
2018-02-25 21:34:20 +01:00
import stem.process as tor
2018-02-25 19:42:58 +01:00
class TorInstance():
"""
A tor instance object, with some useful information.
It is designed to be used as a worker in order to replay an history.
"""
BASE_SOCKS_PORT = 40000
BASE_CONTROL_PORT = 20000
BASE_DATA_DIR = "/tmp/tor{}/"
TOR_RUNNER = 0
@classmethod
2018-02-25 21:27:15 +01:00
async def create(cls, history):
2018-02-25 19:42:58 +01:00
""" Factory creation of tor processes"""
socks_port = cls.BASE_SOCKS_PORT + cls.TOR_RUNNER
control_port = cls.BASE_CONTROL_PORT + cls.TOR_RUNNER
data_dir = cls.BASE_DATA_DIR.format(cls.TOR_RUNNER)
TorInstance.TOR_RUNNER += 1
self = TorInstance()
self.socks_port = socks_port
self.control_port = control_port
self.data_dir = data_dir
2018-02-25 21:27:15 +01:00
self.history = history
self.proxy = "socks5://127.0.0.1:{}".format(self.socks_port)
2018-02-25 21:34:20 +01:00
self.create_session()
2018-02-25 19:42:58 +01:00
self.process = tor.launch_tor_with_config(
2018-02-25 21:34:20 +01:00
config={
2018-02-25 19:42:58 +01:00
'ControlPort' : str(control_port),
'SocksPort' : str(socks_port),
'DataDir' : data_dir
}
)
return self
2018-02-25 21:34:20 +01:00
def __init__(self):
self.socks_port = 0
self.control_port = 0
self.data_dir = ""
self.history = None
self.proxy = ""
self.session = None
self.process = None
2018-02-25 21:59:29 +01:00
async def run(self):
""" Runs the Tor Instance on the history.
"""
t_start = time.now()
while history:
item = history.pop(0)
async with async_timeout.timeout(30):
await(self.query(item[0]))
t_end = time.now()
diff = (history[0][1] - item[0]) - (t_end - t_start)
2018-02-25 21:59:29 +01:00
if diff > 0:
sleep(diff)
2018-02-25 21:34:20 +01:00
2018-02-25 21:27:15 +01:00
def create_session(self):
2018-02-25 21:34:20 +01:00
""" Create a aiohttp session.
"""
2018-02-25 21:27:15 +01:00
conn = ProxyConnector(remote_resolve=True)
2018-02-25 21:34:20 +01:00
self.session = aiohttp.ClientSession(
2018-02-25 21:27:15 +01:00
connector=conn,
request_class=ProxyClientRequest
)
2018-02-25 21:34:20 +01:00
2018-02-25 21:27:15 +01:00
async def query(self, url):
2018-02-25 21:34:20 +01:00
""" Performs a query.
"""
2018-02-25 21:27:15 +01:00
async with async_timeout.timeout(30):
async with self.session.get(
url,
proxy=self.proxy,
proxy_auth=None) as resp:
try:
return await resp.text()
except UnicodeDecodeError:
return None
2018-02-25 19:42:58 +01:00
def __str__(self):
""" Utility function """
2018-02-25 21:27:15 +01:00
return ('[TOR] SOCKSPort: {0.socks_port}, ControlPort: '
2018-02-25 21:34:20 +01:00
'{0.control_port}, DataDir: {0.data_dir}'.format(self))
2018-02-25 19:42:58 +01:00
async def kill(self):
""" Kills the process and remove the data dir"""
self.process.kill()
2018-02-25 21:27:15 +01:00
self.session.close()
2018-02-25 19:42:58 +01:00
shutil.rmtree(self.data_dir)
2018-02-25 21:27:15 +01:00
2018-02-25 19:42:58 +01:00
async def main():
""" Test function """
2018-02-25 21:34:20 +01:00
for _ in range(3):
instance = await TorInstance.create(None)
await instance.query("https://python.org/")
2018-02-25 21:27:15 +01:00
print("One page received")
2018-02-25 21:34:20 +01:00
await instance.kill()
2018-02-25 19:42:58 +01:00
if __name__ == "__main__":
2018-02-25 21:34:20 +01:00
LOOP = asyncio.get_event_loop()
LOOP.run_until_complete(main())