Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merger gzip #47

Merged
merged 7 commits into from
Jan 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 21 additions & 19 deletions data/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class Compare(object):
"""
An object for comparing a file with its reference version
A class for comparing a file with its reference version
that is expected to be found in <filename>.ref
"""

Expand All @@ -17,15 +17,8 @@ class Compare(object):
re.compile("<color>[0-9a-f]+</color>"),
]

def __init__(self, filename, ref_name=None):
"""
create object from its filename
compute reference filename if not specified
"""
self.filename = filename
self.ref_name = ref_name or f"{self.filename}.ref"

def _bool_compare(self):
@staticmethod
def _bool_compare(fn_fm, fnref_fm):
"""
returns True if both files match - modulo ignored portions
- and False otherwise
Expand All @@ -37,28 +30,37 @@ def _bool_compare(self):
# contents[1] for the reference version
contents = [None, None]

for i, name in enumerate((self.filename, self.ref_name)):
for i, (name, fm) in enumerate((fn_fm, fnref_fm)):
try:
with open(name, "r", newline="\n") as input:
with fm.open(name, mode=fm.READ) as input:
full = input.read()
# remove ignored portions
for ignore in self.ignore_regexps:
full = re.sub(ignore, '', full)
# remove ignored portions
for ignore in Compare.ignore_regexps:
full = ignore.sub('', full)
contents[i] = full
# if anything goes wrong we just return False
except Exception as e:
print(f"Could not read output {name}")
return False
# result is True iff both contents match
# result is True if both contents match
return contents[0] == contents[1]

def compare_and_print(self):
@staticmethod
def compare_and_print(fn_fm, fnref_fm):
"""
Checks for equality and prints a one-liner
returns a boolean that says if both files indeed are equal

Args:
fn_fm: tuple of filename to test and filemanager
fnref_fm: tuple of filename of reference and filemanager
"""
bool_result = self._bool_compare()
filename, _ = fn_fm
ref_name, fm = fnref_fm
# compute reference filename if not specified
ref_name = ref_name or f"{filename}.ref"
bool_result = Compare._bool_compare(fn_fm, (ref_name, fm))
status = "OK" if bool_result else "KO"
message = f"Comparing {self.filename} and {self.ref_name} -> {status}"
message = f"Comparing {filename} and {ref_name} -> {status}"
print(message)
return bool_result
48 changes: 48 additions & 0 deletions data/file_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import abc
import enum
import gzip


class AbstractFileManager(abc.ABC):

@classmethod
def open(cls, filename, mode):
return cls._open(filename,
**{"encoding": "UTF-8", "mode": mode, "newline": '\n'})

@staticmethod
@abc.abstractmethod
def _open(filename, **kwargs):
pass

@property
def READ(self):
return self.Mode.READ.value

@property
def WRITE(self):
return self.Mode.WRITE.value


class GzipFileManager(AbstractFileManager):

@enum.unique
class Mode(enum.Enum):
READ = "rt"
WRITE = "wt"

@staticmethod
def _open(filename, **kwargs):
return gzip.open(filename=filename, **kwargs)


class TextFileManager(AbstractFileManager):

@enum.unique
class Mode(enum.Enum):
READ = "r"
WRITE = "w"

@staticmethod
def _open(filename, **kwargs):
return open(file=filename, **kwargs)
29 changes: 18 additions & 11 deletions data/merger.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Version : 3.0

# standard library imports
import gzip
import json
import glob
from argparse import ArgumentParser
#

# application imports
import file_manager
from shipdict import ShipDict
from kml import KML
from compare import Compare
Expand Down Expand Up @@ -50,6 +48,12 @@ def __init__(self):

self.ship_dict = ShipDict()

# file manager
if self.args.gzip:
self.fm = file_manager.GzipFileManager()
else:
self.fm = file_manager.TextFileManager()

def merge(self, json_filenames):
"""
given a list of json filenames, decode the JSON content
Expand Down Expand Up @@ -95,7 +99,8 @@ def write_ships_summary(self, ships, out_name):

print(f"Opening {filename} for listing ships")

with open(filename, 'w', newline="\n") as summary:
tfm = file_manager.TextFileManager()
with tfm.open(filename, tfm.WRITE) as summary:
# one line to say how many ships we have seen
summary.write(f"Found {len(ships)} ships\n")
# ships are expected to be sorted already
Expand Down Expand Up @@ -131,8 +136,7 @@ def write_kml_output(self, ships, out_name):
# message
print(f"Opening {kml_filename} for ship {out_name}")
# open a plain file or compressed file as requested
with gzip.open(kml_filename, 'w', newline="\n") if self.args.gzip \
else open(kml_filename, 'w', newline="\n") as out:
with self.fm.open(kml_filename, mode=self.fm.WRITE) as out:
out.write(contents)
# return filename
return kml_filename
Expand Down Expand Up @@ -186,9 +190,7 @@ def main(self):
output_name = self.args.ship_name

# sort ships once and for good
# we can't sort these objects inline as they are
# typically dict_values objects
ships = sorted(ships, key=lambda ship: ship.name)
ships.sort(key=lambda ship: ship.name)

# create summary file
summary_filename = self.write_ships_summary(ships, output_name)
Expand All @@ -201,8 +203,13 @@ def main(self):
else:
# for each of the 2 files, compare contents with the reference
# that is expected to be in this directory with a .ref extension
ok_summary = Compare(summary_filename).compare_and_print()
ok_kml = Compare(kml_filename).compare_and_print()
tfm = file_manager.TextFileManager()
ok_summary = Compare.compare_and_print(
(summary_filename, tfm),
(None, tfm))
ok_kml = Compare.compare_and_print(
(kml_filename, self.fm),
("ALL_SHIPS.kml.ref", tfm))
# is everything fine ?
ok = ok_summary and ok_kml
# if so return 0 otherwise 1
Expand Down
2 changes: 1 addition & 1 deletion data/shipdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,6 @@ def all_ships(self):
# we need to create an actual list because it
# may need to be sorted later on, and so
# a raw dict_values object won't be good enough
return self.values()
return list(self.values())

# @END@
Loading