From d7ac0a4d28613a7e4935114bb89379420385d360 Mon Sep 17 00:00:00 2001 From: Mikael Kullberg Date: Tue, 19 Nov 2024 20:08:08 +0000 Subject: [PATCH 1/5] Order is important for longest-match... Also, typing.Self not in 3.9. --- dnstapir/dns/mozpsl.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/dnstapir/dns/mozpsl.py b/dnstapir/dns/mozpsl.py index 9edd97b..7ed72c9 100644 --- a/dnstapir/dns/mozpsl.py +++ b/dnstapir/dns/mozpsl.py @@ -1,6 +1,4 @@ import io -from typing import Self - import httpx @@ -10,7 +8,7 @@ class TrieNode: def __init__(self) -> None: self.count = 0 self.icann: bool | None = None - self.children: dict[str, Self] = {} + self.children = {} class Trie: @@ -47,17 +45,21 @@ def search(self, key: list[str]) -> tuple[int, int]: pcore = 0 current = self.root for label in key: - if current.icann is True: - core = current.count - elif current.icann is False: - pcore = current.count - # # If current.icann is None, do not update core or pcore if label not in current.children: + #if '*' in current.children: + # current = current.children['*'] if current.count != 0: break else: raise KeyError - current = current.children[label] + else: + current = current.children[label] + + # If current.icann is None, do not update core or pcore + if current.icann is True: + core = current.count + elif current.icann is False: + pcore = current.count if pcore == core: pcore = 0 return (core, pcore) @@ -133,7 +135,7 @@ def coredomain(self, domain: str) -> tuple[str, str]: core.reverse() pcore = lbls[0:p] pcore.reverse() - return (".".join(core), ".".join(pcore)) + return (".".join(core)+".", ".".join(pcore)+".") def rdomain(self, rdomain: str) -> tuple[str, str]: """Find ICANN and private name cut-off for domain, reverse order process""" From 34f70a2a0f196d29d23bd5c9e1fc0d05e2de6c7b Mon Sep 17 00:00:00 2001 From: Mikael Kullberg Date: Tue, 19 Nov 2024 20:31:16 +0000 Subject: [PATCH 2/5] Fixed tests --- tests/test_dns_mozpsl.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_dns_mozpsl.py b/tests/test_dns_mozpsl.py index f79cd0f..3e21793 100644 --- a/tests/test_dns_mozpsl.py +++ b/tests/test_dns_mozpsl.py @@ -9,24 +9,24 @@ def test_mozpsl(): psl = PublicSuffixList() psl.load_psl_url(url=MOZ_PSL) - assert psl.coredomain("www.ck.") == ("www.ck", "") - assert psl.coredomain("www.something.gov.ck.") == ("something.gov.ck", "") - assert psl.coredomain("www.something.or.other.microsoft.com.") == ("microsoft.com", "") - assert psl.coredomain("www.something.or.other.microsoft.com.br.") == ("microsoft.com.br", "") + assert psl.coredomain("www.ck.") == ("ck.", "") + assert psl.coredomain("www.something.gov.ck.") == ("something.gov.ck.", "") + assert psl.coredomain("www.something.or.other.microsoft.com.") == ("microsoft.com.", "") + assert psl.coredomain("www.something.or.other.microsoft.com.br.") == ("microsoft.com.br.", "") assert psl.coredomain("www.something.emrstudio-prod.us-gov-east-1.amazonaws.com.") == ( - "amazonaws.com", - "something.emrstudio-prod.us-gov-east-1.amazonaws.com", + "amazonaws.com.", + "something.emrstudio-prod.us-gov-east-1.amazonaws.com.", ) assert psl.rdomain("com.amazonaws.us-gov-east-1.emrstudio-prod.www.something.emrstudio-prod") == ( - "com.amazonaws", - "com.amazonaws.us-gov-east-1.emrstudio-prod.www", + "com.amazonaws.", + "com.amazonaws.us-gov-east-1.emrstudio-prod.www.", ) with pytest.raises(KeyError): psl.coredomain("local.") # IDN test - assert psl.coredomain("www.xn--mnchen-3ya.de.") == ("xn--mnchen-3ya.de", "") + assert psl.coredomain("www.xn--mnchen-3ya.de.") == ("xn--mnchen-3ya.de.", "") # Edge cases with pytest.raises(ValueError): From 08e0e5ce2fc05f2c53413b3b08cd7e6db4c4415c Mon Sep 17 00:00:00 2001 From: Mikael Kullberg Date: Tue, 19 Nov 2024 20:40:21 +0000 Subject: [PATCH 3/5] Add dots to FQDNs --- dnstapir/dns/mozpsl.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/dnstapir/dns/mozpsl.py b/dnstapir/dns/mozpsl.py index 7ed72c9..05534a2 100644 --- a/dnstapir/dns/mozpsl.py +++ b/dnstapir/dns/mozpsl.py @@ -130,12 +130,23 @@ def coredomain(self, domain: str) -> tuple[str, str]: raise ValueError from exc lbls = domain.split(".") lbls.reverse() + c, p = self.trie.search(lbls) - core = lbls[0:c] - core.reverse() - pcore = lbls[0:p] - pcore.reverse() - return (".".join(core)+".", ".".join(pcore)+".") + if c != 0: + core = lbls[0:c] + core.reverse() + core_txt = ".".join(core) + "." + else: + core_txt = "" + + if p != 0: + pcore = lbls[0:p] + pcore.reverse() + pcore_txt = ".".join(pcore) + "." + else: + pcore_txt = "" + + return (core_txt, pcore_txt) def rdomain(self, rdomain: str) -> tuple[str, str]: """Find ICANN and private name cut-off for domain, reverse order process""" From e10497f809249f323628ef5ba87547ac2b22d305 Mon Sep 17 00:00:00 2001 From: Mikael Kullberg Date: Tue, 19 Nov 2024 20:47:28 +0000 Subject: [PATCH 4/5] Tests, tests... now properly fixed, I hope. --- tests/test_dns_mozpsl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_dns_mozpsl.py b/tests/test_dns_mozpsl.py index 3e21793..1900807 100644 --- a/tests/test_dns_mozpsl.py +++ b/tests/test_dns_mozpsl.py @@ -18,8 +18,8 @@ def test_mozpsl(): "something.emrstudio-prod.us-gov-east-1.amazonaws.com.", ) assert psl.rdomain("com.amazonaws.us-gov-east-1.emrstudio-prod.www.something.emrstudio-prod") == ( - "com.amazonaws.", - "com.amazonaws.us-gov-east-1.emrstudio-prod.www.", + "com.amazonaws", + "com.amazonaws.us-gov-east-1.emrstudio-prod.www", ) with pytest.raises(KeyError): From 962d8bf8663553f3cca6d72cea246ddcad593aac Mon Sep 17 00:00:00 2001 From: Jakob Schlyter Date: Wed, 20 Nov 2024 09:21:31 +0100 Subject: [PATCH 5/5] we want typing --- dnstapir/dns/mozpsl.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dnstapir/dns/mozpsl.py b/dnstapir/dns/mozpsl.py index 05534a2..461879a 100644 --- a/dnstapir/dns/mozpsl.py +++ b/dnstapir/dns/mozpsl.py @@ -1,4 +1,5 @@ import io + import httpx @@ -8,7 +9,7 @@ class TrieNode: def __init__(self) -> None: self.count = 0 self.icann: bool | None = None - self.children = {} + self.children: dict[str, TrieNode] = {} class Trie: @@ -46,7 +47,7 @@ def search(self, key: list[str]) -> tuple[int, int]: current = self.root for label in key: if label not in current.children: - #if '*' in current.children: + # if '*' in current.children: # current = current.children['*'] if current.count != 0: break @@ -142,7 +143,7 @@ def coredomain(self, domain: str) -> tuple[str, str]: if p != 0: pcore = lbls[0:p] pcore.reverse() - pcore_txt = ".".join(pcore) + "." + pcore_txt = ".".join(pcore) + "." else: pcore_txt = ""