From bdfa285e6bc9a7f06366a46b960a3318068a8aeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Oudin?= Date: Mon, 26 Feb 2018 15:14:53 +0100 Subject: [PATCH] We do not want to use settings --- crawl/crawl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawl/crawl.py b/crawl/crawl.py index 162b26b..eadd653 100644 --- a/crawl/crawl.py +++ b/crawl/crawl.py @@ -73,7 +73,7 @@ def url_getter(html, current_page, root_url): # Works only with python >= 3.6 links_list = list(dict.fromkeys(links_list)) - forbidden_words = ['login', 'agreement', 'mailto'] + forbidden_words = ['login', 'agreement', 'mailto', 'settings'] links_list = [link for link in links_list if not any(word in link.lower() for word in forbidden_words)]