diff --git a/crawl/crawl.py b/crawl/crawl.py index cc86f18..e8467f1 100644 --- a/crawl/crawl.py +++ b/crawl/crawl.py @@ -71,7 +71,7 @@ def url_getter(html, current_page, root_url): # Works only with python >= 3.6 links_list = list(dict.fromkeys(links_list)) - forbidden_words = ['login', 'agreement'] + forbidden_words = ['login', 'agreement', 'mailto'] links_list = [link for link in links_list if not any(word in link.lower() for word in forbidden_words)]