From d19c2e821623b715e0d13a7dcc8a91cccc03c960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Oudin?= Date: Sat, 24 Feb 2018 15:41:46 +0100 Subject: [PATCH] Add mailto adresses to forbidden list --- crawl/crawl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawl/crawl.py b/crawl/crawl.py index cc86f18..e8467f1 100644 --- a/crawl/crawl.py +++ b/crawl/crawl.py @@ -71,7 +71,7 @@ def url_getter(html, current_page, root_url): # Works only with python >= 3.6 links_list = list(dict.fromkeys(links_list)) - forbidden_words = ['login', 'agreement'] + forbidden_words = ['login', 'agreement', 'mailto'] links_list = [link for link in links_list if not any(word in link.lower() for word in forbidden_words)]