Add a timeout to a single page retrieval
This commit is contained in:
parent
e140d4a8a7
commit
67ad232533
1 changed files with 5 additions and 1 deletions
|
@ -289,7 +289,11 @@ async def async_crawler(url, out_tree, crawled, user_agent, session, parent):
|
||||||
crawled.add(simplify_url(url))
|
crawled.add(simplify_url(url))
|
||||||
parsed_url = urlparse(url)
|
parsed_url = urlparse(url)
|
||||||
print("Crawling {}".format(url))
|
print("Crawling {}".format(url))
|
||||||
|
try:
|
||||||
|
with async_timeout.timeout(3):
|
||||||
html = await PageGetter(session, url, user_agent).get(ssl=False)
|
html = await PageGetter(session, url, user_agent).get(ssl=False)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
return
|
||||||
|
|
||||||
new_tasks = []
|
new_tasks = []
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue