Merge pull request #175 from di/add-more-ci-checks

Add more CI checks
This commit is contained in:
Martin Cech 2018-08-24 10:44:14 -04:00 committed by GitHub
commit cb6632eae4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 87 additions and 32 deletions

View File

@ -304,8 +304,6 @@ anonymousness.com
anotherdomaincyka.tk anotherdomaincyka.tk
ansibleemail.com ansibleemail.com
anthony-junkmail.com anthony-junkmail.com
antichef.com
antichef.net
antireg.com antireg.com
antireg.ru antireg.ru
antispam.de antispam.de
@ -472,7 +470,6 @@ buzzcluby.com
byebyemail.com byebyemail.com
byespm.com byespm.com
byom.de byom.de
c2.hu
c51vsgq.com c51vsgq.com
cachedot.net cachedot.net
californiafitnessdeals.com californiafitnessdeals.com
@ -640,7 +637,6 @@ dev-null.gq
dev-null.ml dev-null.ml
devnullmail.com devnullmail.com
deyom.com deyom.com
dfgh.net
dharmatel.net dharmatel.net
dhm.ro dhm.ro
dhy.cc dhy.cc
@ -1753,6 +1749,7 @@ mohmal.com
mohmal.im mohmal.im
mohmal.in mohmal.in
mohmal.tech mohmal.tech
molms.com
momentics.ru momentics.ru
monachat.tk monachat.tk
monadi.ml monadi.ml
@ -1847,7 +1844,6 @@ netricity.nl
netris.net netris.net
netviewer-france.com netviewer-france.com
netzidiot.de netzidiot.de
neverbox.com
nevermail.de nevermail.de
newbpotato.tk newbpotato.tk
newideasfornewpeople.info newideasfornewpeople.info
@ -2104,7 +2100,6 @@ receiveee.com
recipeforfailure.com recipeforfailure.com
recode.me recode.me
reconmail.com reconmail.com
recursor.net
recyclemail.dk recyclemail.dk
reddit.usa.cc reddit.usa.cc
redfeathercrow.com redfeathercrow.com
@ -2154,7 +2149,6 @@ s33db0x.com
sabrestlouis.com sabrestlouis.com
sackboii.com sackboii.com
safaat.cf safaat.cf
safe-mail.net
safermail.info safermail.info
safersignup.de safersignup.de
safetymail.info safetymail.info
@ -2228,7 +2222,6 @@ showslow.de
shrib.com shrib.com
shut.name shut.name
shut.ws shut.ws
sibmail.com
sify.com sify.com
sikux.com sikux.com
siliwangi.ga siliwangi.ga
@ -2271,9 +2264,7 @@ smtp99.com
smwg.info smwg.info
snakemail.com snakemail.com
snapwet.com snapwet.com
sneakemail.com
sneakmail.de sneakmail.de
snkmail.com
social-mailer.tk social-mailer.tk
socialfurry.org socialfurry.org
sofimail.com sofimail.com
@ -2315,8 +2306,6 @@ spambox.info
spambox.irishspringrealty.com spambox.irishspringrealty.com
spambox.org spambox.org
spambox.us spambox.us
spamcannon.com
spamcannon.net
spamcero.com spamcero.com
spamcon.org spamcon.org
spamcorptastic.com spamcorptastic.com
@ -2339,9 +2328,6 @@ spamfree24.info
spamfree24.net spamfree24.net
spamfree24.org spamfree24.org
spamgoes.in spamgoes.in
spamgourmet.com
spamgourmet.net
spamgourmet.org
spamherelots.com spamherelots.com
spamhereplease.com spamhereplease.com
spamhole.com spamhole.com
@ -2858,7 +2844,6 @@ xn--9kq967o.com
xn--d-bga.net xn--d-bga.net
xost.us xost.us
xoxox.cc xoxox.cc
xoxy.net
xperiae5.com xperiae5.com
xrho.com xrho.com
xvx.us xvx.us
@ -2885,7 +2870,6 @@ yaqp.com
ycare.de ycare.de
ycn.ro ycn.ro
ye.vc ye.vc
yeah.net
yedi.org yedi.org
yep.it yep.it
yert.ye.vc yert.ye.vc
@ -2957,4 +2941,3 @@ zxcvbnm.com
zymuying.com zymuying.com
zzi.us zzi.us
zzz.com zzz.com
molms.com

View File

@ -5,33 +5,105 @@
import io import io
import sys import sys
from collections import Counter
from publicsuffixlist import PublicSuffixList from publicsuffixlist import PublicSuffixList
from requests import get from requests import get
def main(arguments):
blacklist = "disposable_email_blacklist.conf"
whitelist = "whitelist.conf"
files = {
filename: open(filename).read().splitlines() for filename in [whitelist, blacklist]
}
def download_suffixes():
with open("public_suffix_list.dat", "wb") as file:
response = get("https://publicsuffix.org/list/public_suffix_list.dat")
file.write(response.content)
def check_for_public_suffixes(filename):
lines = files[filename]
suffix_detected = False suffix_detected = False
psl = None psl = None
download_suffixes() download_suffixes()
with open("public_suffix_list.dat", "r") as latest: with open("public_suffix_list.dat", "r") as latest:
psl = PublicSuffixList(latest) psl = PublicSuffixList(latest)
with io.open('disposable_email_blacklist.conf', 'r') as deb: for i, line in enumerate(lines):
for i, line in enumerate(deb):
current_line = line.strip() current_line = line.strip()
public_suffix = psl.publicsuffix(current_line) public_suffix = psl.publicsuffix(current_line)
if public_suffix == current_line: if public_suffix == current_line:
print(f'The line number {i+1} contains just a public suffix: {current_line}') print(
f"The line number {i+1} contains just a public suffix: {current_line}"
)
suffix_detected = True suffix_detected = True
if suffix_detected: if suffix_detected:
print ('At least one valid public suffix found in the blacklist, please remove it. See https://publicsuffix.org for details on why this shouldn\'t be blacklisted.') print(
"At least one valid public suffix found in {!r}, please "
"remove it. See https://publicsuffix.org for details on why this "
"shouldn't be blacklisted.".format(filename)
)
sys.exit(1) sys.exit(1)
def download_suffixes():
with open('public_suffix_list.dat', "wb") as file:
response = get('https://publicsuffix.org/list/public_suffix_list.dat')
file.write(response.content)
def check_for_non_lowercase(filename):
lines = files[filename]
invalid = set(lines) - set(line.lower() for line in lines)
if invalid:
print("The following domains should be lowercased in {!r}:".format(filename))
for line in sorted(invalid):
print("* {}".format(line))
sys.exit(1)
def check_for_duplicates(filename):
lines = files[filename]
count = Counter(lines) - Counter(set(lines))
if count:
print("The following domains appear twice in {!r}:".format(filename))
for line in sorted(count):
print("* {}".format(line))
sys.exit(1)
def check_sort_order(filename):
lines = files[filename]
for a, b in zip(lines, sorted(lines)):
if a != b:
print("The list is not sorted in {!r}:".format(filename))
print("* {!r} should come before {!r}".format(b, a))
sys.exit(1)
def check_for_intersection(filename_a, filename_b):
a = files[filename_a]
b = files[filename_b]
intersection = set(a) & set(b)
if intersection:
print("The following domains appear in both lists:")
for line in sorted(intersection):
print("* {}".format(line))
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv)
# Check if any domains have a public suffix
check_for_public_suffixes(blacklist)
# Check if any domains are not lowercase
check_for_non_lowercase(whitelist)
check_for_non_lowercase(blacklist)
# Check if any domains are duplicated in the same list
check_for_duplicates(whitelist)
check_for_duplicates(blacklist)
# Check if any lists are not sorted
check_sort_order(whitelist)
check_sort_order(blacklist)
# Check if any domains are in both the whitelist and blacklist
check_for_intersection(whitelist, blacklist)