2018-02-25 19:42:58 +01:00
|
|
|
"""
|
|
|
|
Modules that handles tor instaces creations in order to safely run histories
|
|
|
|
"""
|
|
|
|
|
|
|
|
import stem.process as tor
|
|
|
|
import shutil
|
|
|
|
import asyncio
|
2018-02-25 21:27:15 +01:00
|
|
|
import aiohttp
|
|
|
|
import aiosocks
|
|
|
|
from aiosocks.connector import ProxyConnector, ProxyClientRequest
|
|
|
|
import async_timeout
|
|
|
|
import io
|
2018-02-25 19:42:58 +01:00
|
|
|
|
|
|
|
class TorInstance():
|
|
|
|
"""
|
|
|
|
A tor instance object, with some useful information.
|
|
|
|
It is designed to be used as a worker in order to replay an history.
|
|
|
|
"""
|
|
|
|
BASE_SOCKS_PORT = 40000
|
|
|
|
BASE_CONTROL_PORT = 20000
|
|
|
|
BASE_DATA_DIR = "/tmp/tor{}/"
|
|
|
|
TOR_RUNNER = 0
|
|
|
|
|
|
|
|
@classmethod
|
2018-02-25 21:27:15 +01:00
|
|
|
async def create(cls, history):
|
2018-02-25 19:42:58 +01:00
|
|
|
""" Factory creation of tor processes"""
|
|
|
|
socks_port = cls.BASE_SOCKS_PORT + cls.TOR_RUNNER
|
|
|
|
control_port = cls.BASE_CONTROL_PORT + cls.TOR_RUNNER
|
|
|
|
data_dir = cls.BASE_DATA_DIR.format(cls.TOR_RUNNER)
|
|
|
|
TorInstance.TOR_RUNNER += 1
|
|
|
|
self = TorInstance()
|
|
|
|
self.socks_port = socks_port
|
|
|
|
self.control_port = control_port
|
|
|
|
self.data_dir = data_dir
|
2018-02-25 21:27:15 +01:00
|
|
|
self.history = history
|
|
|
|
self.proxy = "socks5://127.0.0.1:{}".format(self.socks_port)
|
|
|
|
self.session = self.create_session()
|
2018-02-25 19:42:58 +01:00
|
|
|
self.process = tor.launch_tor_with_config(
|
|
|
|
config = {
|
|
|
|
'ControlPort' : str(control_port),
|
|
|
|
'SocksPort' : str(socks_port),
|
|
|
|
'DataDir' : data_dir
|
|
|
|
}
|
|
|
|
)
|
|
|
|
return self
|
|
|
|
|
2018-02-25 21:27:15 +01:00
|
|
|
def create_session(self):
|
|
|
|
conn = ProxyConnector(remote_resolve=True)
|
|
|
|
return aiohttp.ClientSession(
|
|
|
|
connector=conn,
|
|
|
|
request_class=ProxyClientRequest
|
|
|
|
)
|
|
|
|
|
|
|
|
async def query(self, url):
|
|
|
|
async with async_timeout.timeout(30):
|
|
|
|
async with self.session.get(
|
|
|
|
url,
|
|
|
|
proxy=self.proxy,
|
|
|
|
proxy_auth=None) as resp:
|
|
|
|
try:
|
|
|
|
return await resp.text()
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2018-02-25 19:42:58 +01:00
|
|
|
def __str__(self):
|
|
|
|
""" Utility function """
|
2018-02-25 21:27:15 +01:00
|
|
|
return ('[TOR] SOCKSPort: {0.socks_port}, ControlPort: '
|
|
|
|
'{0.control_port}, DataDir: {0.data_dir}'.format(self))
|
2018-02-25 19:42:58 +01:00
|
|
|
|
|
|
|
async def kill(self):
|
|
|
|
""" Kills the process and remove the data dir"""
|
|
|
|
self.process.kill()
|
2018-02-25 21:27:15 +01:00
|
|
|
self.session.close()
|
2018-02-25 19:42:58 +01:00
|
|
|
shutil.rmtree(self.data_dir)
|
|
|
|
|
2018-02-25 21:27:15 +01:00
|
|
|
|
2018-02-25 19:42:58 +01:00
|
|
|
async def main():
|
|
|
|
""" Test function """
|
|
|
|
for i in range(3):
|
2018-02-25 21:27:15 +01:00
|
|
|
a = await TorInstance.create(None)
|
|
|
|
output = await a.query("https://python.org/")
|
|
|
|
print("One page received")
|
2018-02-25 19:42:58 +01:00
|
|
|
await a.kill()
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
loop.run_until_complete(main())
|