Fix function calls
This commit is contained in:
parent
3e5fc2f9b3
commit
02e91bb2b7
1 changed files with 8 additions and 5 deletions
|
@ -234,7 +234,8 @@ class PageGetter:
|
||||||
async def async_print(url):
|
async def async_print(url):
|
||||||
""" Debug function to follow what's actually happening """
|
""" Debug function to follow what's actually happening """
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
html = await PageGetter(session, url).get(ssl=False)
|
html = await PageGetter(session, url,
|
||||||
|
settings.USER_AGENT).get(ssl=False)
|
||||||
|
|
||||||
print('GOT {}HTML for {}'.format(
|
print('GOT {}HTML for {}'.format(
|
||||||
'None ' if html is None else '',
|
'None ' if html is None else '',
|
||||||
|
@ -244,9 +245,11 @@ async def async_print(url):
|
||||||
|
|
||||||
async def async_crawler(url, queue, headers=None):
|
async def async_crawler(url, queue, headers=None):
|
||||||
if headers is None:
|
if headers is None:
|
||||||
headers = {
|
headers = {}
|
||||||
'User-Agent': settings.USER_AGENT,
|
if 'User-Agent' not in headers:
|
||||||
}
|
headers['User-Agent'] = settings.USER_AGENT
|
||||||
|
|
||||||
|
user_agent = headers['User-Agent']
|
||||||
|
|
||||||
queued = [url]
|
queued = [url]
|
||||||
crawled = []
|
crawled = []
|
||||||
|
@ -259,7 +262,7 @@ async def async_crawler(url, queue, headers=None):
|
||||||
return crawled
|
return crawled
|
||||||
parsed_url = urlparse(url)
|
parsed_url = urlparse(url)
|
||||||
print("Crawling {}".format(url))
|
print("Crawling {}".format(url))
|
||||||
html = await PageGetter(session, url).get(ssl=False)
|
html = await PageGetter(session, url, user_agent).get(ssl=False)
|
||||||
if html:
|
if html:
|
||||||
new_urls = url_getter(
|
new_urls = url_getter(
|
||||||
html,
|
html,
|
||||||
|
|
Loading…
Reference in a new issue