initial commit: create perfromance_check helper.

GEOS-DEV · Sep 13, 2023 · 8659112 · 8659112
1 parent f40b9bc
commit 8659112
Showing 1 changed file with 217 additions and 0 deletions.
diff --git a/scripts/geos_ats_package/geos_ats/helpers/performance_check.py b/scripts/geos_ats_package/geos_ats/helpers/performance_check.py
@@ -0,0 +1,217 @@
+
+import os
+import importlib.util
+import sys
+import re
+import argparse
+import numpy as np
+from scipy.interpolate import interp1d
+import matplotlib.pyplot as plt
+import hdf5_wrapper
+import h5py
+
+def parse_log_file( fname ):
+    """
+    Parses the log file and creates an hdf5 with number of linear and nonlinear iterations per time-step
+    
+    Args: fname (str): name of the log file to parse
+
+    Returns: output_fileName (str):
+             errors:
+    """
+    # Define regular expressions
+    cycle_pattern = r"\d+\s*:\s*Time: [\d.e+-]+ s, dt: [\d.e+-]+ s, Cycle: (\d+)"
+    config_and_nnlinear_iter_pattern = r"\d+\s*:\s*Attempt:\s*(\d+),\s*ConfigurationIter:\s*(\d+),\s*NewtonIter:\s*(\d+)"
+    linear_iter_pattern = r"\d+\s*:\s*Last LinSolve\(iter,res\) = \(\s*(\d+),\s*([\d.e+-]+)\s*\) ;"
+
+    # Initialize variables to store the extracted data
+    data = {}
+
+    with open(fname, 'r') as file:
+        for line in file:
+            # Match Cycle number
+            cycle_match = re.match(cycle_pattern, line)
+            if cycle_match:
+                cycle_number = cycle_match.group(1)
+                data[cycle_number] = {
+                    'Attempts': {}
+                }
+
+            # Match ConfigurationIter data
+            config_iter_match = re.match(config_and_nnlinear_iter_pattern, line)
+            if config_iter_match and cycle_number:
+                attempt, config_iter, newton_iter = config_iter_match.groups()
+                if int(newton_iter) > 0:
+                    attempt_data = data[cycle_number]['Attempts'].get(attempt, {})
+                    config_data = attempt_data.get('ConfigurationIters', [])
+                    config_data.append({
+                        'ConfigurationIter': config_iter,
+                        'NewtonIters': {}
+                    })
+                    attempt_data['ConfigurationIters'] = config_data
+                    data[cycle_number]['Attempts'][attempt] = attempt_data
+
+            # Match Iteration data
+            iteration_match = re.match(linear_iter_pattern, line)
+            if iteration_match and cycle_number and attempt and config_iter:
+                num_iterations = int(iteration_match.group(1))
+                attempt_data = data[cycle_number]['Attempts'][attempt]
+                config_data = attempt_data['ConfigurationIters']
+                config_iter_data = config_data[-1]
+                config_iter_data['NewtonIters'][newton_iter] = num_iterations
+
+    # Create an HDF5 file for storing the data
+    output_fileName = 'extracted_performance_data.h5'
+    with h5py.File(output_fileName, 'w') as hdf5_file:
+        for cycle, cycle_data in data.items():
+            cycle_group = hdf5_file.create_group(f'Cycle_{cycle}')
+            for attempt, attempt_data in cycle_data['Attempts'].items():
+                attempt_group = cycle_group.create_group(f'Attempt_{attempt}')
+                for config_iter_data in attempt_data['ConfigurationIters']:
+                    config_iter_group = attempt_group.create_group(f'ConfigIter_{config_iter_data["ConfigurationIter"]}')
+                    newton_iter_list = []
+                    linear_iter_list = []
+                    for newton_iter, num_iterations in config_iter_data['NewtonIters'].items():
+                        newton_iter_list.append(int(newton_iter))
+                        linear_iter_list.append(num_iterations)
+
+                    matrix_data = np.column_stack((newton_iter_list, linear_iter_list))
+                    config_iter_group.create_dataset('NewtonAndLinearIterations', data=matrix_data)
+
+    print(f'Data has been saved to {output_fileName}')                    
+
+    errors = []                
+
+    return output_fileName, errors        
+
+def load_data(fname, errors):
+    """
+    Args: 
+        fname (str):
+        errors (list): 
+
+    Returns:
+        tuple: data, errors          
+    """
+    data = {}
+    if os.path.isfile(fname):
+        data = hdf5_wrapper.hdf5_wrapper(fname).get_copy()
+    else:
+        errors.append(f'File {fname} not found.')
+
+    return data, errors
+
+# def plot_performance_curves():
+#     """
+#     """
+
+
+def compare_performance_curves( fname, baseline, tolerances, output ):
+    """
+    Compute time history curves
+
+    Args:
+        fname (str): Target curve file name
+        baseline (str): Baseline curve file name
+        tolerances (list): Tolerance for nonlinear and linear iterations
+        output (str): Path to place output figures
+    Returns:
+        tuple: warnings, errors
+    """
+    # Setup
+    warnings = []
+    errors = []
+
+    newton_iterations_tolerance, linear_iterations_tolerance = tolerances
+
+    # Load data
+    target_data, errors  = load_data(fname, errors)
+    baseline_data, errors = load_data(baseline, errors)
+
+    # Check if the number of cycles is the same
+    target_cycles   = set(target_data.keys())
+    baseline_cycles = set(baseline_data.keys())
+    if target_cycles != baseline_cycles:
+        errors.append(f'Number of cycles is different.')
+
+    # Loop over each cycle
+    for cycle in target_cycles:
+        target_num_attempts = set(target_data[cycle].keys())
+        baseline_num_attempts = set(baseline_data[cycle].keys())
+
+        # Check if the number of attempts is the same for this cycle
+        if target_num_attempts != baseline_num_attempts:
+            errors.append(f'Number of attempts for Cycle {cycle} is different.')
+
+        # Loop over each attempt
+        for attempt in target_num_attempts:
+            target_config_iters = set(target_data[cycle][attempt].keys())
+            baeline_config_iters = set(baseline_data[cycle][attempt].keys())
+
+            # Check if the number of ConfigurationIters is the same for this Attempt
+            if target_config_iters != baeline_config_iters:
+                errors.append(f'Number of ConfigurationIters for Cycle {cycle}, Attempt {attempt} is different.')
+
+            # Loop over each ConfigurationIter
+            for config_iter in target_config_iters:
+                # Check if the NewtonAndLinearIterations are within tolerance
+                target_iterations = np.array(target_data[cycle][attempt][config_iter]['NewtonAndLinearIterations'])
+                baseline_iterations = np.array(baseline_data[cycle][attempt][config_iter]['NewtonAndLinearIterations'])
+
+                newton_diff = np.abs(target_iterations[:, 0] - baseline_iterations[:, 0])
+                linear_diff = np.abs(target_iterations[:, 1] - baseline_iterations[:, 1])
+
+                if (np.any(newton_diff > newton_iterations_tolerance * target_iterations[:, 0]) or
+                    np.any(linear_diff > linear_iterations_tolerance * target_iterations[:, 1])):
+                    errors.append(f'Differences found in NewtonAndLinearIterations for Cycle {cycle}, Attempt {attempt}, ConfigurationIter {config_iter}.')
+
+    return warnings, errors
+
+def performance_check_parser():
+    """
+    Build the curve check parser
+
+    Returns:
+        argparse.parser: The performance check parser
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("filename", help="Path to the log file")
+    parser.add_argument("baseline", help="Path to the baseline file")
+
+    parser.add_argument("-t",
+                        "--tolerance",
+                        help="The tolerances for nonlinear and linear iterations",
+                        default=[0.1, 0.2])
+    parser.add_argument("-o",
+                        "--output",
+                        help="Output figures to this directory",
+                        default='./performance_check_figures')
+    return parser
+
+
+def main():
+    """
+    Entry point for the performance check script
+    """
+    parser = performance_check_parser()
+    args = parser.parse_args()
+    fname, parsingErrors = parse_log_file( args.filename )
+
+    # We raise immediately if there is any issue while parsing
+    if len(parsingErrors):
+        print('\n'.join(parsingErrors))
+        raise Exception(f'Performance check error while parsing log file.')
+
+    warnings, errors = compare_performance_curves( fname, args.baseline, args.tolerance, args.output )
+
+    if len(warnings):
+        print('Performance check warnings:')
+        print('\n'.join(warnings))
+
+    if len(errors):
+        print('Performance check errors:')
+        print('\n'.join(errors))
+        raise Exception(f'Performance check produced {len(errors)} errors!')
+
+if __name__ == '__main__':
+    main()