2018-02-25 19:42:58 +01:00
|
|
|
"""
|
|
|
|
Modules that handles tor instaces creations in order to safely run histories
|
|
|
|
"""
|
|
|
|
|
|
|
|
import shutil
|
2018-02-26 09:49:24 +01:00
|
|
|
import datetime as dt
|
|
|
|
from time import sleep
|
2018-02-25 19:42:58 +01:00
|
|
|
import asyncio
|
2018-02-25 21:27:15 +01:00
|
|
|
import aiohttp
|
|
|
|
from aiosocks.connector import ProxyConnector, ProxyClientRequest
|
|
|
|
import async_timeout
|
2018-02-25 21:34:20 +01:00
|
|
|
import stem.process as tor
|
2018-02-25 19:42:58 +01:00
|
|
|
|
|
|
|
class TorInstance():
|
|
|
|
"""
|
|
|
|
A tor instance object, with some useful information.
|
|
|
|
It is designed to be used as a worker in order to replay an history.
|
|
|
|
"""
|
|
|
|
BASE_SOCKS_PORT = 40000
|
|
|
|
BASE_CONTROL_PORT = 20000
|
|
|
|
BASE_DATA_DIR = "/tmp/tor{}/"
|
|
|
|
TOR_RUNNER = 0
|
|
|
|
|
|
|
|
@classmethod
|
2018-02-25 23:56:51 +01:00
|
|
|
async def create(cls, history, headers):
|
2018-02-25 19:42:58 +01:00
|
|
|
""" Factory creation of tor processes"""
|
|
|
|
socks_port = cls.BASE_SOCKS_PORT + cls.TOR_RUNNER
|
|
|
|
control_port = cls.BASE_CONTROL_PORT + cls.TOR_RUNNER
|
|
|
|
data_dir = cls.BASE_DATA_DIR.format(cls.TOR_RUNNER)
|
|
|
|
TorInstance.TOR_RUNNER += 1
|
|
|
|
self = TorInstance()
|
|
|
|
self.socks_port = socks_port
|
|
|
|
self.control_port = control_port
|
|
|
|
self.data_dir = data_dir
|
2018-02-25 21:27:15 +01:00
|
|
|
self.history = history
|
2018-02-25 23:56:51 +01:00
|
|
|
self.headers = headers
|
2018-02-25 21:27:15 +01:00
|
|
|
self.proxy = "socks5://127.0.0.1:{}".format(self.socks_port)
|
2018-02-25 21:34:20 +01:00
|
|
|
self.create_session()
|
2018-02-25 19:42:58 +01:00
|
|
|
self.process = tor.launch_tor_with_config(
|
2018-02-25 21:34:20 +01:00
|
|
|
config={
|
2018-02-25 19:42:58 +01:00
|
|
|
'ControlPort' : str(control_port),
|
|
|
|
'SocksPort' : str(socks_port),
|
|
|
|
'DataDir' : data_dir
|
|
|
|
}
|
|
|
|
)
|
|
|
|
return self
|
|
|
|
|
2018-02-25 21:34:20 +01:00
|
|
|
def __init__(self):
|
|
|
|
self.socks_port = 0
|
|
|
|
self.control_port = 0
|
|
|
|
self.data_dir = ""
|
|
|
|
self.history = None
|
|
|
|
self.proxy = ""
|
2018-02-25 23:56:51 +01:00
|
|
|
self.headers = {}
|
2018-02-25 21:34:20 +01:00
|
|
|
self.session = None
|
|
|
|
self.process = None
|
|
|
|
|
2018-02-25 21:59:29 +01:00
|
|
|
async def run(self):
|
2018-02-25 21:53:28 +01:00
|
|
|
""" Runs the Tor Instance on the history.
|
|
|
|
"""
|
2018-02-26 09:49:24 +01:00
|
|
|
while (self.history[0][1] - dt.datetime.now()).total_seconds >= 10:
|
|
|
|
print("Sleeping")
|
|
|
|
sleep(10)
|
|
|
|
while self.history:
|
|
|
|
item = self.history.pop(0)
|
2018-02-25 21:53:28 +01:00
|
|
|
async with async_timeout.timeout(30):
|
|
|
|
await(self.query(item[0]))
|
2018-02-26 09:49:24 +01:00
|
|
|
now = dt.datetime.now()
|
|
|
|
if now <= self.history[0][1]:
|
|
|
|
sleep((self.history[0][1] - now).total_seconds())
|
2018-02-25 21:53:28 +01:00
|
|
|
|
2018-02-25 21:34:20 +01:00
|
|
|
|
2018-02-25 21:27:15 +01:00
|
|
|
def create_session(self):
|
2018-02-25 21:34:20 +01:00
|
|
|
""" Create a aiohttp session.
|
|
|
|
"""
|
2018-02-25 21:27:15 +01:00
|
|
|
conn = ProxyConnector(remote_resolve=True)
|
2018-02-25 21:34:20 +01:00
|
|
|
self.session = aiohttp.ClientSession(
|
2018-02-25 21:27:15 +01:00
|
|
|
connector=conn,
|
2018-02-25 23:56:51 +01:00
|
|
|
headers=self.headers,
|
2018-02-25 21:27:15 +01:00
|
|
|
request_class=ProxyClientRequest
|
|
|
|
)
|
|
|
|
|
2018-02-25 21:34:20 +01:00
|
|
|
|
2018-02-25 21:27:15 +01:00
|
|
|
async def query(self, url):
|
2018-02-25 21:34:20 +01:00
|
|
|
""" Performs a query.
|
|
|
|
"""
|
2018-02-25 21:27:15 +01:00
|
|
|
async with async_timeout.timeout(30):
|
|
|
|
async with self.session.get(
|
|
|
|
url,
|
|
|
|
proxy=self.proxy,
|
|
|
|
proxy_auth=None) as resp:
|
|
|
|
try:
|
|
|
|
return await resp.text()
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2018-02-25 19:42:58 +01:00
|
|
|
def __str__(self):
|
|
|
|
""" Utility function """
|
2018-02-25 21:27:15 +01:00
|
|
|
return ('[TOR] SOCKSPort: {0.socks_port}, ControlPort: '
|
2018-02-25 21:34:20 +01:00
|
|
|
'{0.control_port}, DataDir: {0.data_dir}'.format(self))
|
2018-02-25 19:42:58 +01:00
|
|
|
|
|
|
|
async def kill(self):
|
|
|
|
""" Kills the process and remove the data dir"""
|
|
|
|
self.process.kill()
|
2018-02-25 21:27:15 +01:00
|
|
|
self.session.close()
|
2018-02-25 19:42:58 +01:00
|
|
|
shutil.rmtree(self.data_dir)
|
|
|
|
|
2018-02-25 21:27:15 +01:00
|
|
|
|
2018-02-25 19:42:58 +01:00
|
|
|
async def main():
|
|
|
|
""" Test function """
|
2018-02-25 21:34:20 +01:00
|
|
|
for _ in range(3):
|
2018-02-25 23:56:51 +01:00
|
|
|
instance = await TorInstance.create(None, {"user-agent" : "Blah"})
|
2018-02-25 21:34:20 +01:00
|
|
|
await instance.query("https://python.org/")
|
2018-02-25 21:27:15 +01:00
|
|
|
print("One page received")
|
2018-02-25 21:34:20 +01:00
|
|
|
await instance.kill()
|
2018-02-25 19:42:58 +01:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2018-02-25 21:34:20 +01:00
|
|
|
LOOP = asyncio.get_event_loop()
|
|
|
|
LOOP.run_until_complete(main())
|