Skip to content

Commit

Permalink
Merge pull request #47 from aollier/merger-gzip
Browse files Browse the repository at this point in the history
Merger gzip
  • Loading branch information
parmentelat authored Jan 15, 2021
2 parents f07b615 + 104a73c commit 1b4303d
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 85 deletions.
40 changes: 21 additions & 19 deletions data/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class Compare(object):
"""
An object for comparing a file with its reference version
A class for comparing a file with its reference version
that is expected to be found in <filename>.ref
"""

Expand All @@ -17,15 +17,8 @@ class Compare(object):
re.compile("<color>[0-9a-f]+</color>"),
]

def __init__(self, filename, ref_name=None):
"""
create object from its filename
compute reference filename if not specified
"""
self.filename = filename
self.ref_name = ref_name or f"{self.filename}.ref"

def _bool_compare(self):
@staticmethod
def _bool_compare(fn_fm, fnref_fm):
"""
returns True if both files match - modulo ignored portions
- and False otherwise
Expand All @@ -37,28 +30,37 @@ def _bool_compare(self):
# contents[1] for the reference version
contents = [None, None]

for i, name in enumerate((self.filename, self.ref_name)):
for i, (name, fm) in enumerate((fn_fm, fnref_fm)):
try:
with open(name, "r", newline="\n") as input:
with fm.open(name, mode=fm.READ) as input:
full = input.read()
# remove ignored portions
for ignore in self.ignore_regexps:
full = re.sub(ignore, '', full)
# remove ignored portions
for ignore in Compare.ignore_regexps:
full = ignore.sub('', full)
contents[i] = full
# if anything goes wrong we just return False
except Exception as e:
print(f"Could not read output {name}")
return False
# result is True iff both contents match
# result is True if both contents match
return contents[0] == contents[1]

def compare_and_print(self):
@staticmethod
def compare_and_print(fn_fm, fnref_fm):
"""
Checks for equality and prints a one-liner
returns a boolean that says if both files indeed are equal
Args:
fn_fm: tuple of filename to test and filemanager
fnref_fm: tuple of filename of reference and filemanager
"""
bool_result = self._bool_compare()
filename, _ = fn_fm
ref_name, fm = fnref_fm
# compute reference filename if not specified
ref_name = ref_name or f"{filename}.ref"
bool_result = Compare._bool_compare(fn_fm, (ref_name, fm))
status = "OK" if bool_result else "KO"
message = f"Comparing {self.filename} and {self.ref_name} -> {status}"
message = f"Comparing {filename} and {ref_name} -> {status}"
print(message)
return bool_result
48 changes: 48 additions & 0 deletions data/file_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import abc
import enum
import gzip


class AbstractFileManager(abc.ABC):

@classmethod
def open(cls, filename, mode):
return cls._open(filename,
**{"encoding": "UTF-8", "mode": mode, "newline": '\n'})

@staticmethod
@abc.abstractmethod
def _open(filename, **kwargs):
pass

@property
def READ(self):
return self.Mode.READ.value

@property
def WRITE(self):
return self.Mode.WRITE.value


class GzipFileManager(AbstractFileManager):

@enum.unique
class Mode(enum.Enum):
READ = "rt"
WRITE = "wt"

@staticmethod
def _open(filename, **kwargs):
return gzip.open(filename=filename, **kwargs)


class TextFileManager(AbstractFileManager):

@enum.unique
class Mode(enum.Enum):
READ = "r"
WRITE = "w"

@staticmethod
def _open(filename, **kwargs):
return open(file=filename, **kwargs)
29 changes: 18 additions & 11 deletions data/merger.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Version : 3.0

# standard library imports
import gzip
import json
import glob
from argparse import ArgumentParser
#

# application imports
import file_manager
from shipdict import ShipDict
from kml import KML
from compare import Compare
Expand Down Expand Up @@ -50,6 +48,12 @@ def __init__(self):

self.ship_dict = ShipDict()

# file manager
if self.args.gzip:
self.fm = file_manager.GzipFileManager()
else:
self.fm = file_manager.TextFileManager()

def merge(self, json_filenames):
"""
given a list of json filenames, decode the JSON content
Expand Down Expand Up @@ -95,7 +99,8 @@ def write_ships_summary(self, ships, out_name):

print(f"Opening {filename} for listing ships")

with open(filename, 'w', newline="\n") as summary:
tfm = file_manager.TextFileManager()
with tfm.open(filename, tfm.WRITE) as summary:
# one line to say how many ships we have seen
summary.write(f"Found {len(ships)} ships\n")
# ships are expected to be sorted already
Expand Down Expand Up @@ -131,8 +136,7 @@ def write_kml_output(self, ships, out_name):
# message
print(f"Opening {kml_filename} for ship {out_name}")
# open a plain file or compressed file as requested
with gzip.open(kml_filename, 'w', newline="\n") if self.args.gzip \
else open(kml_filename, 'w', newline="\n") as out:
with self.fm.open(kml_filename, mode=self.fm.WRITE) as out:
out.write(contents)
# return filename
return kml_filename
Expand Down Expand Up @@ -186,9 +190,7 @@ def main(self):
output_name = self.args.ship_name

# sort ships once and for good
# we can't sort these objects inline as they are
# typically dict_values objects
ships = sorted(ships, key=lambda ship: ship.name)
ships.sort(key=lambda ship: ship.name)

# create summary file
summary_filename = self.write_ships_summary(ships, output_name)
Expand All @@ -201,8 +203,13 @@ def main(self):
else:
# for each of the 2 files, compare contents with the reference
# that is expected to be in this directory with a .ref extension
ok_summary = Compare(summary_filename).compare_and_print()
ok_kml = Compare(kml_filename).compare_and_print()
tfm = file_manager.TextFileManager()
ok_summary = Compare.compare_and_print(
(summary_filename, tfm),
(None, tfm))
ok_kml = Compare.compare_and_print(
(kml_filename, self.fm),
("ALL_SHIPS.kml.ref", tfm))
# is everything fine ?
ok = ok_summary and ok_kml
# if so return 0 otherwise 1
Expand Down
2 changes: 1 addition & 1 deletion data/shipdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,6 @@ def all_ships(self):
# we need to create an actual list because it
# may need to be sorted later on, and so
# a raw dict_values object won't be good enough
return self.values()
return list(self.values())

# @END@
Loading

0 comments on commit 1b4303d

Please sign in to comment.