Skip to content

Commit

Permalink
pull out extra file dataframe creation into function, formatting & sm…
Browse files Browse the repository at this point in the history
…all fixups
  • Loading branch information
jurraca committed Nov 20, 2024
1 parent 918079b commit 6f7a396
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 36 deletions.
74 changes: 45 additions & 29 deletions kartograf/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@

class BaseNetworkIndex:
'''
A class whose _dict represents a mapping of the network number and IP networks within that network for a given AS file.
A class whose _dict represents a mapping of the network number and
IP networks within that network for a given AS file.
To check inclusion of a given IP network in the base AS file, we can compare (see check_inclusion) the networks under the root network number instead of all the networks in the base file.
To check inclusion of a given IP network in the base AS file,
we can compare (see check_inclusion) the networks under the root network number
instead of all the networks in the base file.
'''


Expand All @@ -21,7 +24,12 @@ def get(self):
return self._dict

def update(self, pfx):
ipn = ipaddress.ip_network(pfx)
try:
ipn = ipaddress.ip_network(pfx)
except ValueError:
print(f"Invalid prefix provided: {pfx}")
return

netw = int(ipn.network_address)
mask = int(ipn.netmask)
v = ipn.version
Expand Down Expand Up @@ -51,7 +59,7 @@ def contains_row(self, row):
version = ipaddress.ip_network(row.PFXS).version
if version == 4 and (root_net in self._v4_keys):
return self.check_inclusion(row, root_net, version)
elif version == 6 and (root_net in self._v6_keys):
if version == 6 and (root_net in self._v6_keys):
return self.check_inclusion(row, root_net, version)
return 0

Expand Down Expand Up @@ -95,28 +103,19 @@ def merge_pfx2as(context):
shutil.copy2(out_file, context.final_result_file)


def general_merge(
base_file, extra_file, extra_filtered_file, out_file
):
"""
Merge lists of IP networks into a base file.
"""
print("Parse base file to dictionary")
base = BaseNetworkIndex()
with open(base_file, "r") as file:
for line in file:
pfx, asn = line.split(" ")
base.update(pfx)

print("Parse extra file to Pandas DataFrame")
def extra_file_to_df(extra_file_path):
extra_nets_int = []
extra_asns = []
extra_pfxs = []
extra_pfxs_leading = []
with open(extra_file, "r") as file:
with open(extra_file_path, "r") as file:
for line in file:
pfx, asn = line.split(" ")
ipn = ipaddress.ip_network(pfx)
try:
ipn = ipaddress.ip_network(pfx)
except ValueError:
print(f"Invalid IP network: {pfx}, skipping")
continue
netw_int = int(ipn.network_address)
extra_nets_int.append(netw_int)
extra_asns.append(asn.strip())
Expand All @@ -127,14 +126,33 @@ def general_merge(
root_net = str(pfx).split(":", maxsplit=1)[0]
extra_pfxs_leading.append(root_net)

df_extra = pd.DataFrame()
df_extra["INETS"] = extra_nets_int
df_extra["ASNS"] = extra_asns
df_extra["PFXS"] = extra_pfxs
df_extra["PFXS_LEADING"] = extra_pfxs_leading
df_extra = pd.DataFrame({
"INETS": extra_nets_int,
"ASNS": extra_asns,
"PFXS": extra_pfxs,
"PFXS_LEADING": extra_pfxs_leading
})

print("Merging extra prefixes that were not included in the base file:\n")
return df_extra


def general_merge(
base_file, extra_file, extra_filtered_file, out_file
):
"""
Merge lists of IP networks into a base file.
"""
print("Parse base file to dictionary")
base = BaseNetworkIndex()
with open(base_file, "r") as file:
for line in file:
pfx, _ = line.split(" ")
base.update(pfx)

print("Parse extra file to Pandas DataFrame")
df_extra = extra_file_to_df(extra_file)

print("Merging extra prefixes that were not included in the base file:\n")
extra_included = []
for row in df_extra.itertuples(index=False):
result = base.contains_row(row)
Expand Down Expand Up @@ -173,7 +191,5 @@ def general_merge(
with open(base_file, "r") as base:
base_contents = base.read()

merged_contents = base_contents + extra_contents

with open(out_file, "w") as merge_file:
merge_file.write(merged_contents)
merge_file.write(base_contents + extra_contents)
12 changes: 5 additions & 7 deletions tests/merge_base_class_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
import pandas as pd
from kartograf.merge import BaseNetworkIndex


def test_base_dict_create():
base = BaseNetworkIndex()
state = base.get()
assert state == {4: {}, 6: {}}


def test_base_dict_update():
base = BaseNetworkIndex()
state = base.get()
Expand All @@ -23,12 +25,8 @@ def test_check_inclusion():
subnet = "10.10.0.0/21"
network_int = int(ipaddress.ip_network(subnet).network_address)
df_extra = pd.DataFrame(
data={
"INETS": network_int,
"ASNS": 345,
"PFXS": subnet,
"PFXS_LEADING": 10},
index=[0])
data={"INETS": network_int, "ASNS": 345, "PFXS": subnet, "PFXS_LEADING": 10},
index=[0],
)
for row in df_extra.itertuples(index=False):
assert base.contains_row(row)

0 comments on commit 6f7a396

Please sign in to comment.