diff --git a/.github/workflows/ unit_testing_umfile_utils.yml b/.github/workflows/ unit_testing_umfile_utils.yml new file mode 100644 index 00000000..77405286 --- /dev/null +++ b/.github/workflows/ unit_testing_umfile_utils.yml @@ -0,0 +1,45 @@ +name: CI + +on: + push: + branches: main + pull_request: + branches: main + workflow_dispatch: + +jobs: + + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Setup conda environment + uses: conda-incubator/setup-miniconda@v3 + with: + miniconda-version: "latest" + python-version: ${{ matrix.python-version }} + environment-file: environment-dev.yml + auto-activate-base: false + activate-environment: umfile-utils-dev + auto-update-conda: false + show-channel-urls: true + + - name: Install source + shell: bash -l {0} + run: python -m pip install --no-deps --no-build-isolation -e . + + - name: List installed packages + shell: bash -l {0} + run: conda list + + - name: Run tests + shell: bash -l {0} + run: python -m pytest --cov=src --cov-report=html -s tests + diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/environment-dev.yml b/environment-dev.yml new file mode 100644 index 00000000..e4540d4d --- /dev/null +++ b/environment-dev.yml @@ -0,0 +1,17 @@ +name: umfile-utils-dev +channels: + - accessnri + - conda-forge + - coecms + - nodefaults +dependencies: + - python >=3.10 + - pytest + - xarray + - mule + - numpy < 2 + - versioneer + - ipykernel + - pytest-cov + - pip + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..01ccc632 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,39 @@ +[project] +name = "umfile-utils" +dynamic = ["version"] +authors = [ + {name = "Martin Dix", email="martin.dix@anu.edu.au"}, + {name = "Lindsey Oberhelman", email="lindsey.oberhelman@anu.edu.au"}, + {name = "Davide Marchegiani", email="davide.marchegiani@anu.edu.au"}, +] +description = "Collection of tools for UM fieldsfiles." +license = { file = "LICENSE" } +keywords = ["UM", "UM utilities", "UM fields files", "umfile_utils"] +requires-python = ">=3.10" +dependencies = [ + "mule", + "numpy <2", + "versioneer", +] + +[project.urls] +Repository = "https://github.com/ACCESS-NRI/umfile_utils" + +[build-system] +build-backend = "setuptools.build_meta" +requires = [ + "setuptools>64", + "versioneer[toml]" +] + +[tool.setuptools.packages.find] +where = ["src"] +namespaces = false + +[tool.versioneer] +VCS = "git" +style = "pep440" +versionfile_source = "src/umfile_utils/_version.py" +versionfile_build = "umfile_utils/_version.py" +tag_prefix = "" +parentdir_prefix = "umfile-utils-" diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..d14fa5b4 --- /dev/null +++ b/setup.py @@ -0,0 +1,7 @@ +import versioneer +from setuptools import setup + +setup( + version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), +) diff --git a/src/perturbIC.py b/src/perturbIC.py index 22ff4a38..932a4d70 100644 --- a/src/perturbIC.py +++ b/src/perturbIC.py @@ -1,52 +1,229 @@ #!/usr/bin/env python # Apply a perturbation to initial condition. -# Note that this works in place. -# For ENDGAME perturb thetavd as well if it's present - # Martin Dix martin.dix@csiro.au +import os import argparse -import umfile -from um_fileheaders import * -from numpy.random import MT19937, RandomState, SeedSequence - -parser = argparse.ArgumentParser(description="Perturb UM initial dump") -parser.add_argument('-a', dest='amplitude', type=float, default=0.01, - help = 'Amplitude of perturbation') -parser.add_argument('-s', dest='seed', type=int, required=True, - help = 'Random number seed (must be non-negative integer)') -parser.add_argument('ifile', help='Input file (modified in place)') - -args = parser.parse_args() - -if args.seed >= 0: - rs = RandomState(MT19937(SeedSequence(args.seed))) -else: - raise Exception('Seed must be positive') - -f = umfile.UMFile(args.ifile, 'r+') - -# Set up theta perturbation. -nlon = f.inthead[IC_XLen] -nlat = f.inthead[IC_YLen] -# Same at each level so as not to upset vertical stability -perturb = args.amplitude * (2.*rs.random(nlon*nlat).reshape((nlat,nlon)) - 1.) -# Set poles to zero (only necessary for ND grids, but doesn't hurt EG) -perturb[0] = 0. -perturb[-1] = 0. - -for k in range(f.fixhd[FH_LookupSize2]): - ilookup = f.ilookup[k] - lbegin = ilookup[LBEGIN] # lbegin is offset from start - if lbegin == -99: - break - # 4 is theta, 388 is thetavd (ENDGAME only) - if ilookup[ITEM_CODE] in (4, 388): - a = f.readfld(k) - # Note that using += ensures the datatype of a doesn't change - # (in case it's float32) - a += perturb - f.writefld(a,k) - -f.close() +from numpy.random import PCG64, Generator +import mule +TIMESERIES_LBCODES = (31320, 31323) + +def parse_args(): + """ + Parse the command line arguments. + + Parameters + ---------- + None + + Returns + ---------- + args_parsed : argparse.Namespace + Argparse namespace containing the parsed command line arguments. + """ + parser = argparse.ArgumentParser(description="Perturb UM initial dump") + # Positional arguments + parser.add_argument('ifile', metavar="INPUT_PATH", help='Path to the input file.') + # Optional arguments + parser.add_argument('-a', dest='amplitude', type=float, default=0.01, + help = 'Amplitude of the perturbation.') + parser.add_argument('-s','--seed', dest='seed', type=int, + help = 'The seed value used to generate the random perturbation (must be a non-negative integer).') + parser.add_argument('--validate', action='store_true', + help='Validate the output fields file using mule validation.') + parser.add_argument('-o', '--output', dest = 'output_path', metavar="OUTPUT_PATH", help='Path to the output file. If omitted, the default output file is created by appending "_perturbed" to the input path.') + args_parsed = parser.parse_args() + return args_parsed + +def create_random_generator(value=None): + """ + Create the random generator object using the provided value as a seed. + + Parameters + ---------- + value : int + The seed value used to create the random generator. + + Returns + ---------- + numpy.random.Generator + The numpy random generator object. + """ + if value is not None and value < 0: + raise ValueError('Seed value must be non-negative.') + return Generator(PCG64(value)) + +def remove_timeseries(ff): + """ + Remove any timeseries from a fields file. + + Parameters + ---------- + ff : mule.dump.DumpFile + The mule DumpFile to remove the timeseries from. + + Returns + ---------- + ff_out : mule.dump.DumpFile + The mule DumpFile with no timeseries. + """ + ff_out = ff.copy() + ff_out.fields=[field for field in ff.fields if field.lbcode not in TIMESERIES_LBCODES] + return ff_out + + +def create_default_outname(filename, suffix="_perturbed"): + """ + Create a default output filename by appending a suffix to the input filename. + If an output filename already exists, a number will be appended to produce a unique output filename. + + Parameters + ---------- + filename: str + The input filename. + suffix: str, optional + The suffix to append to the filename. + + Returns + ---------- + output_filename: str + The default output filename. + """ + output_filename = f"{filename}{suffix}" + num="" + if os.path.exists(output_filename): + num = 1 + while os.path.exists(f"{output_filename}{num}"): + num += 1 + return f"{output_filename}{num}" + + +def create_perturbation(amplitude, random_generator, shape, nullify_poles = True): + """ + Create a uniformly-distributed random perturbation of given amplitude and shape, using the given random_generator. + If nullify_poles is set to True, nullifies the perturbation amplitude at the poles. + + Parameters + ---------- + amplitude: float + The amplitude of the random perturbation. + random_generator: numpy.random.Generator + The random generator used to generate the random perturbation. + shape: tuple or list + Shape of the generated perturbation. + nullify_poles: bool, optional + If set to True, nullifies the perturbation amplitude at the poles. + + Returns + ---------- + pertubation: numpy.ndarray + The generated random perturbation. + """ + perturbation = random_generator.uniform(low = -amplitude, high = amplitude, size = shape) + # Set poles to zero (only necessary for ND grids, but doesn't hurt EG) + if nullify_poles: + perturbation[[0,-1],:] = 0 + return perturbation + + +def is_field_to_perturb(field, stash_to_perturb): + """ + Check if the field STASH itemcode correspond to the one to perturb. + + Parameters + ---------- + field : mule.Field + Field to check. + stash_to_perturb: int + STASH itemcode to perturb. + + Returns + ---------- + bool + Returns True if the field STASH itemcode corresponds to the one to perturb. + """ + return field.lbuser4 == stash_to_perturb + +class AdditionOperator(mule.DataOperator): + """ + Create a mule operator that adds an array to a field, provided that the two have the same shape. + + Attributes + ---------- + array : numpy.ndarray + The array to add to the field. + """ + def __init__(self, array): + self.array = array + + def new_field(self, source_field): + """ + Create the new field object by copying the source field. + """ + return source_field.copy() + + def transform(self, source_field, new_field): + """ + Perform the field data manipulation: check that the array and source field data have the same shape and then add them together. + """ + data = source_field.get_data() + if (field_shape:=data.shape) != (array_shape:=self.array.shape): + raise ValueError(f"Array and field could not be broadcast together with shapes {array_shape} and {field_shape}.") + else: + return data + self.array + + +def void_validation(*args, **kwargs): + """ + Don't perform the validation, but print a message to inform that validation has been skipped. + """ + print('Skipping mule validation. To enable the validation, run using the "--validate" option.') + return + + +def main(): + """ + Add a bi-dimensional random perturbation to the potential temperature field 'Theta' (STASH itemcode = 4) of a UM fields file. + """ + + # Define all the variables + STASH_THETA = 4 + + # Parse the command line arguments + args = parse_args() + + # Create the output filename + output_file = create_default_outname(args.ifile) if args.output_path is None else args.output_path + + # Create the random generator. + random_generator = create_random_generator(args.seed) + + # Skip mule validation if the "--validate" option is provided + if args.validate: + mule.DumpFile.validate = void_validation + ff_raw = mule.DumpFile.from_file(args.ifile) + + + # Remove the time series from the data to ensure mule will work + ff = remove_timeseries(ff_raw) + + # loop through the fields + for ifield, field in enumerate(ff.fields): + if is_field_to_perturb(field, STASH_THETA): + try: + ff.fields[ifield] = perturb_operator(field) + except NameError: # perturb_operator is not defined + # Only create the perturb_operator if it does not exist yet + + shape = field.get_data().shape + perturbation = create_perturbation(args.amplitude, random_generator, shape) + perturb_operator = AdditionOperator(perturbation) + ff.fields[ifield] = perturb_operator(field) + + ff.to_file(output_file) + +if __name__== "__main__": + + main() + diff --git a/src/GLOBE30_patch_aus.py b/src/umfile_utils/GLOBE30_patch_aus.py similarity index 100% rename from src/GLOBE30_patch_aus.py rename to src/umfile_utils/GLOBE30_patch_aus.py diff --git a/src/umfile_utils/README.md b/src/umfile_utils/README.md new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/umfile_utils/README.md @@ -0,0 +1 @@ + diff --git a/src/umfile_utils/__init__.py b/src/umfile_utils/__init__.py new file mode 100644 index 00000000..96e13be6 --- /dev/null +++ b/src/umfile_utils/__init__.py @@ -0,0 +1,2 @@ +from umfile_utils import _version +__version__ = _version.get_versions()["version"] diff --git a/src/umfile_utils/_version.py b/src/umfile_utils/_version.py new file mode 100644 index 00000000..6ae7bead --- /dev/null +++ b/src/umfile_utils/_version.py @@ -0,0 +1,682 @@ +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple +import functools + + +def get_keywords() -> Dict[str, str]: + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool + + +def get_config() -> VersioneerConfig: + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "" + cfg.parentdir_prefix = "umpost-" + cfg.versionfile_source = "umpost/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} + + +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: + """Call the given command(s).""" + assert isinstance(commands, list) + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: + try: + dispcmd = str([command] + args) + # remember shell=False, so use git.cmd on windows, not just git + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) + break + except OSError as e: + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, process.returncode + return stdout, process.returncode + + +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for _ in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %s but none started with prefix %s" % + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords: Dict[str, str] = {} + try: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: + """Get version information from git keywords.""" + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r'\d', r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue + if verbose: + print("picking %s" % r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces: Dict[str, Any] = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" + % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" + % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces: Dict[str, Any]) -> str: + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces: Dict[str, Any]) -> str: + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). + + Exceptions: + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + if pieces["distance"]: + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces: Dict[str, Any]) -> str: + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces: Dict[str, Any]) -> str: + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +def get_versions() -> Dict[str, Any]: + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for _ in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", "date": None} diff --git a/src/access_landmask.py b/src/umfile_utils/access_landmask.py similarity index 100% rename from src/access_landmask.py rename to src/umfile_utils/access_landmask.py diff --git a/src/add_fields_to_dump.py b/src/umfile_utils/add_fields_to_dump.py similarity index 100% rename from src/add_fields_to_dump.py rename to src/umfile_utils/add_fields_to_dump.py diff --git a/src/ancil2netcdf.py b/src/umfile_utils/ancil2netcdf.py similarity index 100% rename from src/ancil2netcdf.py rename to src/umfile_utils/ancil2netcdf.py diff --git a/src/change_calendar.py b/src/umfile_utils/change_calendar.py similarity index 100% rename from src/change_calendar.py rename to src/umfile_utils/change_calendar.py diff --git a/src/change_calendar365.py b/src/umfile_utils/change_calendar365.py similarity index 100% rename from src/change_calendar365.py rename to src/umfile_utils/change_calendar365.py diff --git a/src/change_calendar_header.py b/src/umfile_utils/change_calendar_header.py similarity index 100% rename from src/change_calendar_header.py rename to src/umfile_utils/change_calendar_header.py diff --git a/src/change_dump_date.py b/src/umfile_utils/change_dump_date.py similarity index 100% rename from src/change_dump_date.py rename to src/umfile_utils/change_dump_date.py diff --git a/src/change_dump_date2.py b/src/umfile_utils/change_dump_date2.py similarity index 100% rename from src/change_dump_date2.py rename to src/umfile_utils/change_dump_date2.py diff --git a/src/change_endianness.py b/src/umfile_utils/change_endianness.py similarity index 100% rename from src/change_endianness.py rename to src/umfile_utils/change_endianness.py diff --git a/src/change_stashcode.py b/src/umfile_utils/change_stashcode.py similarity index 100% rename from src/change_stashcode.py rename to src/umfile_utils/change_stashcode.py diff --git a/src/check_land_overlap.py b/src/umfile_utils/check_land_overlap.py similarity index 100% rename from src/check_land_overlap.py rename to src/umfile_utils/check_land_overlap.py diff --git a/src/check_land_overlap_idx.py b/src/umfile_utils/check_land_overlap_idx.py similarity index 100% rename from src/check_land_overlap_idx.py rename to src/umfile_utils/check_land_overlap_idx.py diff --git a/src/count_land.py b/src/umfile_utils/count_land.py similarity index 100% rename from src/count_land.py rename to src/umfile_utils/count_land.py diff --git a/src/count_tiles.py b/src/umfile_utils/count_tiles.py similarity index 100% rename from src/count_tiles.py rename to src/umfile_utils/count_tiles.py diff --git a/src/eqtoll.py b/src/umfile_utils/eqtoll.py similarity index 100% rename from src/eqtoll.py rename to src/umfile_utils/eqtoll.py diff --git a/src/fix_polar_anom.py b/src/umfile_utils/fix_polar_anom.py similarity index 100% rename from src/fix_polar_anom.py rename to src/umfile_utils/fix_polar_anom.py diff --git a/src/get_calendar.py b/src/umfile_utils/get_calendar.py similarity index 100% rename from src/get_calendar.py rename to src/umfile_utils/get_calendar.py diff --git a/src/limit_soilmoisture.py b/src/umfile_utils/imit_soilmoisture.py similarity index 100% rename from src/limit_soilmoisture.py rename to src/umfile_utils/imit_soilmoisture.py diff --git a/src/interpolate_ancillary.py b/src/umfile_utils/interpolate_ancillary.py similarity index 100% rename from src/interpolate_ancillary.py rename to src/umfile_utils/interpolate_ancillary.py diff --git a/src/iris_stashname.py b/src/umfile_utils/iris_stashname.py similarity index 100% rename from src/iris_stashname.py rename to src/umfile_utils/iris_stashname.py diff --git a/src/lbcdump.py b/src/umfile_utils/lbcdump.py similarity index 100% rename from src/lbcdump.py rename to src/umfile_utils/lbcdump.py diff --git a/src/levelheights.py b/src/umfile_utils/levelheights.py similarity index 100% rename from src/levelheights.py rename to src/umfile_utils/levelheights.py diff --git a/src/mask_edit.py b/src/umfile_utils/mask_edit.py similarity index 100% rename from src/mask_edit.py rename to src/umfile_utils/mask_edit.py diff --git a/src/mergefiles.py b/src/umfile_utils/mergefiles.py similarity index 100% rename from src/mergefiles.py rename to src/umfile_utils/mergefiles.py diff --git a/src/mergefiles_region.py b/src/umfile_utils/mergefiles_region.py similarity index 100% rename from src/mergefiles_region.py rename to src/umfile_utils/mergefiles_region.py diff --git a/src/nccmp_um2netcdf.py b/src/umfile_utils/nccmp_um2netcdf.py similarity index 100% rename from src/nccmp_um2netcdf.py rename to src/umfile_utils/nccmp_um2netcdf.py diff --git a/src/polar_anom.py b/src/umfile_utils/polar_anom.py similarity index 100% rename from src/polar_anom.py rename to src/umfile_utils/polar_anom.py diff --git a/src/prog_fields_mismatch.py b/src/umfile_utils/prog_fields_mismatch.py similarity index 100% rename from src/prog_fields_mismatch.py rename to src/umfile_utils/prog_fields_mismatch.py diff --git a/src/read_stashmaster.py b/src/umfile_utils/read_stashmaster.py similarity index 100% rename from src/read_stashmaster.py rename to src/umfile_utils/read_stashmaster.py diff --git a/src/remove_stash_duplicates.py b/src/umfile_utils/remove_stash_duplicates.py similarity index 100% rename from src/remove_stash_duplicates.py rename to src/umfile_utils/remove_stash_duplicates.py diff --git a/src/reset_neg_snow.py b/src/umfile_utils/reset_neg_snow.py similarity index 100% rename from src/reset_neg_snow.py rename to src/umfile_utils/reset_neg_snow.py diff --git a/src/show_land_overlap.py b/src/umfile_utils/show_land_overlap.py similarity index 100% rename from src/show_land_overlap.py rename to src/umfile_utils/show_land_overlap.py diff --git a/src/sortum.py b/src/umfile_utils/sortum.py similarity index 100% rename from src/sortum.py rename to src/umfile_utils/sortum.py diff --git a/src/split_times.py b/src/umfile_utils/split_times.py similarity index 100% rename from src/split_times.py rename to src/umfile_utils/split_times.py diff --git a/src/sstice_ancil.py b/src/umfile_utils/sstice_ancil.py similarity index 100% rename from src/sstice_ancil.py rename to src/umfile_utils/sstice_ancil.py diff --git a/src/stashvar.py b/src/umfile_utils/stashvar.py similarity index 100% rename from src/stashvar.py rename to src/umfile_utils/stashvar.py diff --git a/src/stashvar_cmip6.py b/src/umfile_utils/stashvar_cmip6.py similarity index 100% rename from src/stashvar_cmip6.py rename to src/umfile_utils/stashvar_cmip6.py diff --git a/src/stashvar_validate_stdnames.py b/src/umfile_utils/stashvar_validate_stdnames.py similarity index 99% rename from src/stashvar_validate_stdnames.py rename to src/umfile_utils/stashvar_validate_stdnames.py index c1315144..7b544478 100644 --- a/src/stashvar_validate_stdnames.py +++ b/src/umfile_utils/stashvar_validate_stdnames.py @@ -18,4 +18,4 @@ if STASH_TO_CF[key].standard_name and std_name and STASH_TO_CF[key].standard_name != std_name: print("Name mismatch", v, std_name, STASH_TO_CF[key].standard_name) if STASH_TO_CF[key].units and units and STASH_TO_CF[key].units != units: - print("Units mismatch", v, units, STASH_TO_CF[key].units) \ No newline at end of file + print("Units mismatch", v, units, STASH_TO_CF[key].units) diff --git a/src/subset_ancillary.py b/src/umfile_utils/subset_ancillary.py similarity index 100% rename from src/subset_ancillary.py rename to src/umfile_utils/subset_ancillary.py diff --git a/src/subset_dump.py b/src/umfile_utils/subset_dump.py similarity index 100% rename from src/subset_dump.py rename to src/umfile_utils/subset_dump.py diff --git a/src/um2netcdf.py b/src/umfile_utils/um2netcdf.py similarity index 100% rename from src/um2netcdf.py rename to src/umfile_utils/um2netcdf.py diff --git a/src/um2netcdf4.py b/src/umfile_utils/um2netcdf4.py similarity index 100% rename from src/um2netcdf4.py rename to src/umfile_utils/um2netcdf4.py diff --git a/src/um2netcdf4_cmip6.py b/src/umfile_utils/um2netcdf4_cmip6.py similarity index 100% rename from src/um2netcdf4_cmip6.py rename to src/umfile_utils/um2netcdf4_cmip6.py diff --git a/src/um2netcdf4_dev.py b/src/umfile_utils/um2netcdf4_dev.py similarity index 100% rename from src/um2netcdf4_dev.py rename to src/umfile_utils/um2netcdf4_dev.py diff --git a/src/um2netcdf_all.py b/src/umfile_utils/um2netcdf_all.py similarity index 100% rename from src/um2netcdf_all.py rename to src/umfile_utils/um2netcdf_all.py diff --git a/src/um2netcdf_iris.py b/src/umfile_utils/um2netcdf_iris.py similarity index 100% rename from src/um2netcdf_iris.py rename to src/umfile_utils/um2netcdf_iris.py diff --git a/src/um2netcdf_iris_mon.py b/src/umfile_utils/um2netcdf_iris_mon.py similarity index 100% rename from src/um2netcdf_iris_mon.py rename to src/umfile_utils/um2netcdf_iris_mon.py diff --git a/src/um_complexity.py b/src/umfile_utils/um_complexity.py similarity index 100% rename from src/um_complexity.py rename to src/umfile_utils/um_complexity.py diff --git a/src/um_copy_field.py b/src/umfile_utils/um_copy_field.py similarity index 100% rename from src/um_copy_field.py rename to src/umfile_utils/um_copy_field.py diff --git a/src/um_fields_subset.py b/src/umfile_utils/um_fields_subset.py similarity index 100% rename from src/um_fields_subset.py rename to src/umfile_utils/um_fields_subset.py diff --git a/src/um_fieldsfile_dump.py b/src/umfile_utils/um_fieldsfile_dump.py similarity index 100% rename from src/um_fieldsfile_dump.py rename to src/umfile_utils/um_fieldsfile_dump.py diff --git a/src/um_fileheaders.py b/src/umfile_utils/um_fileheaders.py similarity index 100% rename from src/um_fileheaders.py rename to src/umfile_utils/um_fileheaders.py diff --git a/src/um_grid_flip.py b/src/umfile_utils/um_grid_flip.py similarity index 100% rename from src/um_grid_flip.py rename to src/umfile_utils/um_grid_flip.py diff --git a/src/um_modify_field.py b/src/umfile_utils/um_modify_field.py similarity index 100% rename from src/um_modify_field.py rename to src/umfile_utils/um_modify_field.py diff --git a/src/um_replace_field.py b/src/umfile_utils/um_replace_field.py similarity index 100% rename from src/um_replace_field.py rename to src/umfile_utils/um_replace_field.py diff --git a/src/um_timeseries.py b/src/umfile_utils/um_timeseries.py similarity index 100% rename from src/um_timeseries.py rename to src/umfile_utils/um_timeseries.py diff --git a/src/um_zero_field.py b/src/umfile_utils/um_zero_field.py similarity index 100% rename from src/um_zero_field.py rename to src/umfile_utils/um_zero_field.py diff --git a/src/umfile.py b/src/umfile_utils/umfile.py similarity index 100% rename from src/umfile.py rename to src/umfile_utils/umfile.py diff --git a/src/umv2netcdf.py b/src/umfile_utils/umv2netcdf.py similarity index 100% rename from src/umv2netcdf.py rename to src/umfile_utils/umv2netcdf.py diff --git a/src/umv2reg.py b/src/umfile_utils/umv2reg.py similarity index 100% rename from src/umv2reg.py rename to src/umfile_utils/umv2reg.py diff --git a/src/valid_times.py b/src/umfile_utils/valid_times.py similarity index 100% rename from src/valid_times.py rename to src/umfile_utils/valid_times.py diff --git a/tests/perturbIC_test.py b/tests/perturbIC_test.py new file mode 100644 index 00000000..034d4830 --- /dev/null +++ b/tests/perturbIC_test.py @@ -0,0 +1,229 @@ +import pytest +import sys +from perturbIC import parse_args, create_random_generator, remove_timeseries, is_field_to_perturb, create_default_outname, create_perturbation, AdditionOperator +from unittest.mock import patch, Mock, MagicMock +import numpy as np +import numpy.random as rs + +#This section sets up the testing for the parse args +@pytest.fixture +def fake_args(monkeypatch): + """ + Fixture to set fake command-line arguments. + """ + def _fake_args(args): + monkeypatch.setattr('sys.argv', args) + return args + return _fake_args + + +@pytest.mark.parametrize( + "input_args, expected", + [ + # Case 1: Test only essential arguments. + (["script.py", "input_file"], {"ifile": "input_file", "amplitude": 0.01, "seed": None, "validate": False, "output_path": None}), + # Case 2: Test the amplitude + (["script.py", "input_file", "-a", "0.05"], {"ifile": "input_file", "amplitude": 0.05, "seed": None, "validate": False, "output_path": None}), + # Case 3: Test the validate + (["script.py", "input_file", "-s", "42", "--validate"], {"ifile": "input_file", "amplitude": 0.01, "seed": 42, "validate": True, "output_path": None}), + # Case 4: Inclusion of the output file + (["script.py", "input_file", "-o", "output_file"], {"ifile": "input_file", "amplitude": 0.01, "seed": None, "validate": False, "output_path": "output_file"}), + ], +) +def test_parse_args(fake_args, input_args, expected): + """ + Test parse_args function with test 4 cases if if the optional arguements are not included. + """ + fake_args(input_args) + args = parse_args() + for key, value in expected.items(): + assert getattr(args, key) == value + +#This section tests the output file creation. +@pytest.mark.parametrize( + # description of the arguments + "existing_files, filename, expected_output", + [ + # Case 1: Filename with suffix doesn't exist, return filename with suffix + ([], "testfilename", "testfilename_perturbed"), + # Case 2: Filename with suffix exists, returns filename with suffix appending 1 + (["testfilename_perturbed"], "testfilename", "testfilename_perturbed1"), + # Case 3: Filename with suffix and a few numbered versions exist, returns + # filename with suffix and the first numbered version that doesn't exist + ( + ["testfilename_perturbed", "testfilename_perturbed1", "testfilename_perturbed2"], + "testfilename", + "testfilename_perturbed3", + ), + ], +) +@patch("os.path.exists") +def test_create_default_outname_suffix_not_passed(mock_exists, existing_files, filename, expected_output): + """ + Test the function that creates the default output file name, without passing a suffix. + 3 cases tested with pytest.mark.parametrize. + """ + # Mock os.path.exists to simulate the presence of specific files + mock_exists.side_effect = lambda f: f in existing_files + result = create_default_outname(filename) + assert result == expected_output + +@patch("os.path.exists") +def test_create_default_outname_suffix_passed(mock_exists): + """ + Test the function that creates the default output file name, passing a custom suffix. + """ + # Mock os.path.exists to simulate the presence of specific files + mock_exists.return_value = False + filename = "testfilename" + suffix = "testsuffix" + result = create_default_outname(filename, suffix) + expected_output = "testfilenametestsuffix" + assert result == expected_output + +#This section of code tests the removal of the timeseries +class MockField: + """ + Mock class to simulate a field with an lbcode attribute. + """ + def __init__(self, lbcode): + self.lbcode = lbcode + +class MockDumpFile: + """ + Mock class to simulate a mule DumpFile. + """ + def __init__(self, fields): + self.fields = fields + + def copy(self): + """ + Simulate the copy method of a mule DumpFile. + """ + return MockDumpFile(self.fields[:]) + +TIMESERIES_LBCODES = [31320] +@pytest.mark.parametrize( + "input_fields, expected_codes", + [ #Time series is the first field + ([MockField(31320), MockField(1001)], [1001]), + #If it is all timeseries + ([MockField(31320), MockField(31320)], []), + #If none are timeseries + ([MockField(1001), MockField(2002)], [1001, 2002]), + #If there are no files + ([], []), + ], +) +def test_remove_timeseries(input_fields, expected_codes): + """ + Test the remove_timeseries function with various input scenarios. + """ + mock_dumpfile = MockDumpFile(input_fields) + result = remove_timeseries(mock_dumpfile) + result_codes = [field.lbcode for field in result.fields] + + assert result_codes == expected_codes + +@pytest.fixture +def mock_metadata(): + """ + This function create a callable um metadata + + Outputs + list - Command line arguements + """ + + # Mock fields with different lbuser4 values + field_theta = MagicMock() + field_not_theta = MagicMock() + + # Correctly set the lbuser4 attribute + field_theta.lbuser4 = 4 + field_not_theta.lbuser4 = 56 + stash_code = 4 + + return field_theta, field_not_theta, stash_code + +def test_is_field_to_perturb(mock_metadata): + + """ + Tests the item code conditional + + Inputs + fixture - A fake list of arrays and a fake index + Outputs + The results of assertion tests. + """ + + field_theta, field_not_theta, stash_code = mock_metadata + + # Assertions to verify the function's behavior + assert is_field_to_perturb(field_theta, stash_code) == True, "field_theta should match the stash_code" + assert is_field_to_perturb(field_not_theta, stash_code) == False, "field_not_theta should not match the stash_code" + + +#This section tests creating the perturbation +@pytest.mark.parametrize( + "amplitude, shape, nullify_poles, expected_shape", + [ + (0.5, (10, 20), True, (10, 20)), + (1.0, (5, 5), False, (5, 5)), + (0.3, (3, 7), True, (3, 7)), + ], +) + +def test_create_perturbation(amplitude, shape, nullify_poles, expected_shape): + """ + Test the create_perturbation function with different amplitudes, shapes, and nullify_poles settings. + """ + random_seed = np.random.default_rng(43) + # Create the perturbation + perturbation = create_perturbation(amplitude, random_seed, shape, nullify_poles) + + # Check the shape of the perturbation + assert perturbation.shape == expected_shape, "Perturbation shape does not match expected shape" + + # Check that values are within the range [-amplitude, amplitude] + assert np.all(perturbation >= -amplitude) and np.all(perturbation <= amplitude), \ + "Perturbation values exceed specified amplitude range" + + # Check nullification of poles + if nullify_poles: + assert np.all(perturbation[0, :] == 0) and np.all(perturbation[-1, :] == 0), \ + "Perturbation poles were not nullified as expected" + else: + assert not (np.all(perturbation[0, :] == 0) and np.all(perturbation[-1, :] == 0)), \ + "Perturbation poles should not have been nullified" + +def test_operator_initialization(): + """ + Test the addition operator.. + + Outputs + The results of testing if the peturbation intialize worked + + """ + + # Mock the source field + source_field = MagicMock() + source_field.get_data.return_value = np.array([[1, 2], [3, 4]]) + + # Mock the new field + new_field = MagicMock() + + # Array to add + array_to_add = np.array([[10, 20], [30, 40]]) + + # Create the operator + operator = AdditionOperator(array_to_add) + + # Test transform method + result = operator.transform(source_field, new_field) + + # Expected output + expected = np.array([[11, 22], [33, 44]]) + + # Assertions + np.testing.assert_array_equal(result, expected) +