Check for lines with third or lower level domains

This commit is contained in:
Dustin Ingram 2020-02-12 14:43:02 -06:00
parent f39c3d8ea5
commit 93debdb7e1
No known key found for this signature in database
GPG Key ID: 93D2B8D4930A5E39

View File

@ -29,7 +29,6 @@ def check_for_public_suffixes(filename):
lines = files[filename]
suffix_detected = False
psl = None
download_suffixes()
with open("public_suffix_list.dat", "r") as latest:
psl = PublicSuffixList(latest)
for i, line in enumerate(lines):
@ -49,6 +48,22 @@ def check_for_public_suffixes(filename):
sys.exit(1)
def check_for_third_level_domains(filename):
with open("public_suffix_list.dat", "r") as latest:
psl = PublicSuffixList(latest)
invalid = {
line
for line in files[filename]
if len(psl.privateparts(line.strip())) > 1
}
if invalid:
print("The following domains contain a third or lower level domain in {!r}:".format(filename))
for line in sorted(invalid):
print("* {}".format(line))
sys.exit(1)
def check_for_non_lowercase(filename):
lines = files[filename]
invalid = set(lines) - set(line.lower() for line in lines)
@ -90,10 +105,15 @@ def check_for_intersection(filename_a, filename_b):
if __name__ == "__main__":
# Download the list of public suffixes
download_suffixes()
# Check if any domains have a public suffix
check_for_public_suffixes(blocklist)
# Check if any domains are a third or lower level domain
check_for_third_level_domains(blocklist)
# Check if any domains are not lowercase
check_for_non_lowercase(allowlist)
check_for_non_lowercase(blocklist)