More robust crawling
This commit is contained in:
parent
5d4bd30e20
commit
968ff6d24c
2 changed files with 3 additions and 1 deletions
|
@ -291,6 +291,8 @@ async def async_crawler(url, output_tree, headers=None):
|
||||||
if sample_url not in queued
|
if sample_url not in queued
|
||||||
and sample_url not in crawled
|
and sample_url not in crawled
|
||||||
]
|
]
|
||||||
|
else:
|
||||||
|
print("No html received")
|
||||||
print(crawled)
|
print(crawled)
|
||||||
output_tree += crawl_tree
|
output_tree += crawl_tree
|
||||||
|
|
||||||
|
|
|
@ -244,6 +244,6 @@ def generate_history(user, start_time):
|
||||||
new_line.full_clean()
|
new_line.full_clean()
|
||||||
new_line.save()
|
new_line.save()
|
||||||
except ValidationError:
|
except ValidationError:
|
||||||
pass
|
continue
|
||||||
|
|
||||||
return history
|
return history
|
||||||
|
|
Loading…
Reference in a new issue