diff --git a/README.md b/README.md index f115910..246271e 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,11 @@ pipenv install ``` **Running MQM** - Locate your [Atlas Checks](https://github.com/osmlab/atlas-checks) data to use as the input for MQM, and execute the following command: ``` -python3 -m mqm ---folderPath [a absolute folder path] ---maxDepth [maximum tree depth (default = 10)] +python3 -m mqm --input [input directory containing atlas check geometries and boundary files] +--output [output directory to store results] --maxDepth [maximum tree depth (default = 10)] --countNum [a count number (default = 10)] --gridPercent [a grid percentage (default = 0.9)] --maxCount [maximum count to the second k-d tree] diff --git a/src/mqm/geo_process.py b/src/mqm/geo_process.py index bd7f04d..1a55e05 100644 --- a/src/mqm/geo_process.py +++ b/src/mqm/geo_process.py @@ -4,6 +4,7 @@ import os from area import area from .utility import Utility +import gzip import argparse @@ -208,17 +209,22 @@ def bounding_box_process(self): start_point = 0 end_point = 0 - # loop through all geojson files for f in os.listdir(self.folder_path): # load the Geo-json file and ignore other files - if os.path.splitext(os.path.join(self.folder_path, f))[1] == '.geojson': + if ('geojson' in (os.path.join(self.folder_path, f)).split('.')) and not f.startswith('.'): if len(os.path.splitext(f)[0].split('-')) == 3: # pull out this function - name_num_list.append([os.path.splitext(f)[0].split('-')[0], int(os.path.splitext(f)[0].split('-')[2])]) + if os.path.splitext(os.path.join(self.folder_path, f))[1] == '.gz': + name_num_list.append([os.path.splitext(f)[0].split('-')[0], int((os.path.splitext(f)[0].split('-')[2]).split('.')[0])]) + else: + name_num_list.append([os.path.splitext(f)[0].split('-')[0], (int(os.path.splitext(f)[0].split('-')[2]))]) # open geojson files - with open(os.path.join(self.folder_path, f), encoding='utf-8') as new_f: - data = json.load(new_f) + if os.path.splitext(os.path.join(self.folder_path, f))[1] == '.gz': + new_f = gzip.open(os.path.join(self.folder_path, f)) + else: + new_f = open(os.path.join(self.folder_path, f), encoding='utf-8') + data = json.load(new_f) # randomly generate unique integers (flag ids) end_point = start_point + len(data['features']) @@ -238,7 +244,7 @@ def bounding_box_process(self): # ============================== geometry_bounding_box_list.append(geometry_bounding_box) self.output_data += tmp_geometry_collec - + else: # discard a feature without feature properties if len(data['features'][geometry_index]['properties']['feature_properties']) != 0: @@ -251,11 +257,11 @@ def bounding_box_process(self): data['features'][geometry_index]['properties']['feature_properties'][0]['identifier'], f]) geometry_bounding_box_list.append(geometry_bounding_box) - + # get a file bounding box for given multiple geometry bounding boxes, and add it into folder bounding box list folder_bounding_box_set.append(self.final_bounding_box_generation(geometry_bounding_box_list, 4)) del geometry_bounding_box_list - + # update start point start_point = len(data['features']) diff --git a/src/mqm/mqm_tool.py b/src/mqm/mqm_tool.py index fc67b97..277b4dc 100644 --- a/src/mqm/mqm_tool.py +++ b/src/mqm/mqm_tool.py @@ -7,6 +7,9 @@ from .utility import Utility from .geo_process import GeoProcessor import argparse +import gzip +from pathlib import Path +from zipfile import ZipFile import ast @@ -95,7 +98,7 @@ def get_argument(): This function grabs all of the arguments that the program needs. Returns: - args.folderPath: an input folder path. + folder_path: an input folder path. args.maxDepth: a maximum tree depth. output_folder: a result folder. int(args.countNum): a count number for a stop condition in the first k-d tree. @@ -107,17 +110,18 @@ def get_argument(): """ # declare arguments and variables parser = argparse.ArgumentParser() - parser.add_argument('--folderPath', type=str, default='', help='path to an input folder') + parser.add_argument('--input', type=str, default='', help='path to an input folder') parser.add_argument('--maxDepth', type=str, default='10', help='max depth of a k-d tree') parser.add_argument('--countNum', type=str, default='10', help='a count value for a stop condition') parser.add_argument('--gridPercent', type=str, default='0.9', help='a grid percentage') parser.add_argument('--maxCount', type=str, default='', help='maximum count to the second k-d tree') + parser.add_argument('--output', type=str, required=True, help='path to an output folder') args = parser.parse_args() max_count = -1 path = 'histogram' geojson_path = 'geojson' - folder_path = os.path.normpath(args.folderPath) - output_folder = os.path.join(os.path.split(folder_path)[0], 'result') + folder_path = os.path.normpath(args.input) + output_folder = os.path.normpath(args.output) if args.maxCount: max_count = int(args.maxCount) @@ -261,10 +265,6 @@ def process_single_folder(input_folder, folder_path, maximum_level, count_num, g initial_area = geo_processor.get_initial_extend_area(out_BB) del geo_processor - util = Utility() - util.csv_writer(name_num, os.path.join(folder_path, os.path.basename(input_folder) + '.csv')) - del util - # perform the 1st k-d tree for depth_count in range(1, int(maximum_level) + 1): bb_collec, hist, _ = extend_partition(depth_count, out_BB, entire_data, 1) @@ -354,7 +354,7 @@ def main(): folder_list.append(input_folder) # iterate through all sub-directories - for sub_folder in folder_list: + for sub_folder in folder_list: directory_creation(folder_path, os.path.join(folder_path, os.path.split(sub_folder)[1]), path, geojson_path) # process single sub-folder