From e2fb6546e1dfa36379d565fb69f5bed696312273 Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Thu, 23 Aug 2018 22:44:20 -0500 Subject: [PATCH 1/3] Add more checks to CI --- verify.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 86 insertions(+), 14 deletions(-) diff --git a/verify.py b/verify.py index de16b75..116f43d 100644 --- a/verify.py +++ b/verify.py @@ -5,33 +5,105 @@ import io import sys +from collections import Counter from publicsuffixlist import PublicSuffixList from requests import get -def main(arguments): + +blacklist = "disposable_email_blacklist.conf" +whitelist = "whitelist.conf" + +files = { + filename: open(filename).read().splitlines() for filename in [whitelist, blacklist] +} + + +def download_suffixes(): + with open("public_suffix_list.dat", "wb") as file: + response = get("https://publicsuffix.org/list/public_suffix_list.dat") + file.write(response.content) + + +def check_for_public_suffixes(filename): + lines = files[filename] suffix_detected = False psl = None download_suffixes() with open("public_suffix_list.dat", "r") as latest: psl = PublicSuffixList(latest) - with io.open('disposable_email_blacklist.conf', 'r') as deb: - for i, line in enumerate(deb): - current_line = line.strip() - public_suffix = psl.publicsuffix(current_line) - if public_suffix == current_line: - print(f'The line number {i+1} contains just a public suffix: {current_line}') - suffix_detected = True + for i, line in enumerate(lines): + current_line = line.strip() + public_suffix = psl.publicsuffix(current_line) + if public_suffix == current_line: + print( + f"The line number {i+1} contains just a public suffix: {current_line}" + ) + suffix_detected = True if suffix_detected: - print ('At least one valid public suffix found in the blacklist, please remove it. See https://publicsuffix.org for details on why this shouldn\'t be blacklisted.') + print( + "At least one valid public suffix found in {!r}, please " + "remove it. See https://publicsuffix.org for details on why this " + "shouldn't be blacklisted.".format(filename) + ) sys.exit(1) -def download_suffixes(): - with open('public_suffix_list.dat', "wb") as file: - response = get('https://publicsuffix.org/list/public_suffix_list.dat') - file.write(response.content) +def check_for_non_lowercase(filename): + lines = files[filename] + invalid = set(lines) - set(line.lower() for line in lines) + if invalid: + print("The following domains should be lowercased in {!r}:".format(filename)) + for line in sorted(invalid): + print("* {}".format(line)) + sys.exit(1) + + +def check_for_duplicates(filename): + lines = files[filename] + count = Counter(lines) - Counter(set(lines)) + if count: + print("The following domains appear twice in {!r}:".format(filename)) + for line in sorted(count): + print("* {}".format(line)) + sys.exit(1) + + +def check_sort_order(filename): + lines = files[filename] + for a, b in zip(lines, sorted(lines)): + if a != b: + print("The list is not sorted in {!r}:".format(filename)) + print("* {!r} should come before {!r}".format(b, a)) + sys.exit(1) + + +def check_for_intersection(filename_a, filename_b): + a = files[filename_a] + b = files[filename_b] + intersection = set(a) & set(b) + if intersection: + print("The following domains appear in both lists:") + for line in sorted(intersection): + print("* {}".format(line)) if __name__ == "__main__": - main(sys.argv) + + # Check if any domains have a public suffix + check_for_public_suffixes(blacklist) + + # Check if any domains are not lowercase + check_for_non_lowercase(whitelist) + check_for_non_lowercase(blacklist) + + # Check if any domains are duplicated in the same list + check_for_duplicates(whitelist) + check_for_duplicates(blacklist) + + # Check if any lists are not sorted + check_sort_order(whitelist) + check_sort_order(blacklist) + + # Check if any domains are in both the whitelist and blacklist + check_for_intersection(whitelist, blacklist) From a1b7162ccae89a57ef3aa177d76c65f80d33b7f4 Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Thu, 23 Aug 2018 22:44:44 -0500 Subject: [PATCH 2/3] Sort blacklist alphabetically --- disposable_email_blacklist.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/disposable_email_blacklist.conf b/disposable_email_blacklist.conf index b8b7f7e..c6b00f9 100644 --- a/disposable_email_blacklist.conf +++ b/disposable_email_blacklist.conf @@ -1753,6 +1753,7 @@ mohmal.com mohmal.im mohmal.in mohmal.tech +molms.com momentics.ru monachat.tk monadi.ml @@ -2957,4 +2958,3 @@ zxcvbnm.com zymuying.com zzi.us zzz.com -molms.com From eb7dc8e89defb544baacb1ca9f3864521b964ae4 Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Thu, 23 Aug 2018 22:47:30 -0500 Subject: [PATCH 3/3] Remove domains that were in whitelist --- disposable_email_blacklist.conf | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/disposable_email_blacklist.conf b/disposable_email_blacklist.conf index c6b00f9..5fcd817 100644 --- a/disposable_email_blacklist.conf +++ b/disposable_email_blacklist.conf @@ -304,8 +304,6 @@ anonymousness.com anotherdomaincyka.tk ansibleemail.com anthony-junkmail.com -antichef.com -antichef.net antireg.com antireg.ru antispam.de @@ -472,7 +470,6 @@ buzzcluby.com byebyemail.com byespm.com byom.de -c2.hu c51vsgq.com cachedot.net californiafitnessdeals.com @@ -640,7 +637,6 @@ dev-null.gq dev-null.ml devnullmail.com deyom.com -dfgh.net dharmatel.net dhm.ro dhy.cc @@ -1848,7 +1844,6 @@ netricity.nl netris.net netviewer-france.com netzidiot.de -neverbox.com nevermail.de newbpotato.tk newideasfornewpeople.info @@ -2105,7 +2100,6 @@ receiveee.com recipeforfailure.com recode.me reconmail.com -recursor.net recyclemail.dk reddit.usa.cc redfeathercrow.com @@ -2155,7 +2149,6 @@ s33db0x.com sabrestlouis.com sackboii.com safaat.cf -safe-mail.net safermail.info safersignup.de safetymail.info @@ -2229,7 +2222,6 @@ showslow.de shrib.com shut.name shut.ws -sibmail.com sify.com sikux.com siliwangi.ga @@ -2272,9 +2264,7 @@ smtp99.com smwg.info snakemail.com snapwet.com -sneakemail.com sneakmail.de -snkmail.com social-mailer.tk socialfurry.org sofimail.com @@ -2316,8 +2306,6 @@ spambox.info spambox.irishspringrealty.com spambox.org spambox.us -spamcannon.com -spamcannon.net spamcero.com spamcon.org spamcorptastic.com @@ -2340,9 +2328,6 @@ spamfree24.info spamfree24.net spamfree24.org spamgoes.in -spamgourmet.com -spamgourmet.net -spamgourmet.org spamherelots.com spamhereplease.com spamhole.com @@ -2859,7 +2844,6 @@ xn--9kq967o.com xn--d-bga.net xost.us xoxox.cc -xoxy.net xperiae5.com xrho.com xvx.us @@ -2886,7 +2870,6 @@ yaqp.com ycare.de ycn.ro ye.vc -yeah.net yedi.org yep.it yert.ye.vc