Fix function calls

This commit is contained in:
Théophile Bastian 2018-02-26 11:56:02 +01:00
parent 3e5fc2f9b3
commit 02e91bb2b7
1 changed files with 8 additions and 5 deletions

View File

@ -234,7 +234,8 @@ class PageGetter:
async def async_print(url): async def async_print(url):
""" Debug function to follow what's actually happening """ """ Debug function to follow what's actually happening """
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
html = await PageGetter(session, url).get(ssl=False) html = await PageGetter(session, url,
settings.USER_AGENT).get(ssl=False)
print('GOT {}HTML for {}'.format( print('GOT {}HTML for {}'.format(
'None ' if html is None else '', 'None ' if html is None else '',
@ -244,9 +245,11 @@ async def async_print(url):
async def async_crawler(url, queue, headers=None): async def async_crawler(url, queue, headers=None):
if headers is None: if headers is None:
headers = { headers = {}
'User-Agent': settings.USER_AGENT, if 'User-Agent' not in headers:
} headers['User-Agent'] = settings.USER_AGENT
user_agent = headers['User-Agent']
queued = [url] queued = [url]
crawled = [] crawled = []
@ -259,7 +262,7 @@ async def async_crawler(url, queue, headers=None):
return crawled return crawled
parsed_url = urlparse(url) parsed_url = urlparse(url)
print("Crawling {}".format(url)) print("Crawling {}".format(url))
html = await PageGetter(session, url).get(ssl=False) html = await PageGetter(session, url, user_agent).get(ssl=False)
if html: if html:
new_urls = url_getter( new_urls = url_getter(
html, html,