Add a timeout to a single page retrieval
This commit is contained in:
parent
e140d4a8a7
commit
67ad232533
1 changed files with 5 additions and 1 deletions
|
@ -289,7 +289,11 @@ async def async_crawler(url, out_tree, crawled, user_agent, session, parent):
|
|||
crawled.add(simplify_url(url))
|
||||
parsed_url = urlparse(url)
|
||||
print("Crawling {}".format(url))
|
||||
html = await PageGetter(session, url, user_agent).get(ssl=False)
|
||||
try:
|
||||
with async_timeout.timeout(3):
|
||||
html = await PageGetter(session, url, user_agent).get(ssl=False)
|
||||
except asyncio.TimeoutError:
|
||||
return
|
||||
|
||||
new_tasks = []
|
||||
|
||||
|
|
Loading…
Reference in a new issue