Add more checks to CI

This commit is contained in:
Dustin Ingram 2018-08-23 22:44:20 -05:00
parent 9e0868427c
commit e2fb6546e1
No known key found for this signature in database
GPG Key ID: 93D2B8D4930A5E39

View File

@ -5,33 +5,105 @@
import io import io
import sys import sys
from collections import Counter
from publicsuffixlist import PublicSuffixList from publicsuffixlist import PublicSuffixList
from requests import get from requests import get
def main(arguments):
blacklist = "disposable_email_blacklist.conf"
whitelist = "whitelist.conf"
files = {
filename: open(filename).read().splitlines() for filename in [whitelist, blacklist]
}
def download_suffixes():
with open("public_suffix_list.dat", "wb") as file:
response = get("https://publicsuffix.org/list/public_suffix_list.dat")
file.write(response.content)
def check_for_public_suffixes(filename):
lines = files[filename]
suffix_detected = False suffix_detected = False
psl = None psl = None
download_suffixes() download_suffixes()
with open("public_suffix_list.dat", "r") as latest: with open("public_suffix_list.dat", "r") as latest:
psl = PublicSuffixList(latest) psl = PublicSuffixList(latest)
with io.open('disposable_email_blacklist.conf', 'r') as deb: for i, line in enumerate(lines):
for i, line in enumerate(deb):
current_line = line.strip() current_line = line.strip()
public_suffix = psl.publicsuffix(current_line) public_suffix = psl.publicsuffix(current_line)
if public_suffix == current_line: if public_suffix == current_line:
print(f'The line number {i+1} contains just a public suffix: {current_line}') print(
f"The line number {i+1} contains just a public suffix: {current_line}"
)
suffix_detected = True suffix_detected = True
if suffix_detected: if suffix_detected:
print ('At least one valid public suffix found in the blacklist, please remove it. See https://publicsuffix.org for details on why this shouldn\'t be blacklisted.') print(
"At least one valid public suffix found in {!r}, please "
"remove it. See https://publicsuffix.org for details on why this "
"shouldn't be blacklisted.".format(filename)
)
sys.exit(1) sys.exit(1)
def download_suffixes():
with open('public_suffix_list.dat', "wb") as file:
response = get('https://publicsuffix.org/list/public_suffix_list.dat')
file.write(response.content)
def check_for_non_lowercase(filename):
lines = files[filename]
invalid = set(lines) - set(line.lower() for line in lines)
if invalid:
print("The following domains should be lowercased in {!r}:".format(filename))
for line in sorted(invalid):
print("* {}".format(line))
sys.exit(1)
def check_for_duplicates(filename):
lines = files[filename]
count = Counter(lines) - Counter(set(lines))
if count:
print("The following domains appear twice in {!r}:".format(filename))
for line in sorted(count):
print("* {}".format(line))
sys.exit(1)
def check_sort_order(filename):
lines = files[filename]
for a, b in zip(lines, sorted(lines)):
if a != b:
print("The list is not sorted in {!r}:".format(filename))
print("* {!r} should come before {!r}".format(b, a))
sys.exit(1)
def check_for_intersection(filename_a, filename_b):
a = files[filename_a]
b = files[filename_b]
intersection = set(a) & set(b)
if intersection:
print("The following domains appear in both lists:")
for line in sorted(intersection):
print("* {}".format(line))
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv)
# Check if any domains have a public suffix
check_for_public_suffixes(blacklist)
# Check if any domains are not lowercase
check_for_non_lowercase(whitelist)
check_for_non_lowercase(blacklist)
# Check if any domains are duplicated in the same list
check_for_duplicates(whitelist)
check_for_duplicates(blacklist)
# Check if any lists are not sorted
check_sort_order(whitelist)
check_sort_order(blacklist)
# Check if any domains are in both the whitelist and blacklist
check_for_intersection(whitelist, blacklist)