From f6da17982048b319508987d7d13ac3e99bf29bd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Oudin?= Date: Fri, 23 Feb 2018 10:36:14 +0100 Subject: [PATCH] If robots.txt file is invalid, abort mission. --- crawl/crawl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crawl/crawl.py b/crawl/crawl.py index 46c7707..ab3b8e1 100644 --- a/crawl/crawl.py +++ b/crawl/crawl.py @@ -117,6 +117,8 @@ class WebsiteScheduler(metaclass=WebsiteSchedulerMeta): except Exception as e: print(e) raise e + if not self.robot_parser.default_entry: + self.dead = True if not self.dead: delay = self.robot_parser.crawl_delay(settings.USER_AGENT) if delay is None: