diff --git a/.github/workflows/ unit_testing_umfile_utils.yml b/.github/workflows/ unit_testing_umfile_utils.yml
new file mode 100644
index 00000000..77405286
--- /dev/null
+++ b/.github/workflows/ unit_testing_umfile_utils.yml	
@@ -0,0 +1,45 @@
+name: CI
+
+on:
+  push:
+    branches: main
+  pull_request:
+    branches: main
+  workflow_dispatch:
+
+jobs:
+
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Setup conda environment
+        uses: conda-incubator/setup-miniconda@v3
+        with:
+          miniconda-version: "latest"
+          python-version: ${{ matrix.python-version }}
+          environment-file: environment-dev.yml
+          auto-activate-base: false
+          activate-environment: umfile-utils-dev
+          auto-update-conda: false
+          show-channel-urls: true
+  
+      - name: Install source
+        shell: bash -l {0}
+        run:  python -m pip install --no-deps --no-build-isolation -e .
+
+      - name: List installed packages
+        shell: bash -l {0}
+        run: conda list
+
+      - name: Run tests
+        shell: bash -l {0}
+        run: python -m pytest --cov=src --cov-report=html -s tests
+      
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..261eeb9e
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/environment-dev.yml b/environment-dev.yml
new file mode 100644
index 00000000..e4540d4d
--- /dev/null
+++ b/environment-dev.yml
@@ -0,0 +1,17 @@
+name: umfile-utils-dev
+channels:
+  - accessnri
+  - conda-forge
+  - coecms
+  - nodefaults
+dependencies:
+  - python >=3.10
+  - pytest
+  - xarray
+  - mule
+  - numpy < 2
+  - versioneer
+  - ipykernel
+  - pytest-cov
+  - pip
+  
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..01ccc632
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,39 @@
+[project]
+name = "umfile-utils"
+dynamic = ["version"]
+authors = [
+  {name = "Martin Dix", email="martin.dix@anu.edu.au"},
+  {name = "Lindsey Oberhelman", email="lindsey.oberhelman@anu.edu.au"},
+  {name = "Davide Marchegiani", email="davide.marchegiani@anu.edu.au"},
+]
+description = "Collection of tools for UM fieldsfiles."
+license = { file = "LICENSE" }
+keywords = ["UM", "UM utilities", "UM fields files", "umfile_utils"]
+requires-python = ">=3.10"
+dependencies = [
+    "mule",
+    "numpy <2",
+    "versioneer",
+]
+
+[project.urls]
+Repository = "https://github.com/ACCESS-NRI/umfile_utils"
+
+[build-system]
+build-backend = "setuptools.build_meta"
+requires = [
+    "setuptools>64", 
+    "versioneer[toml]"
+]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+namespaces = false
+
+[tool.versioneer]
+VCS = "git"
+style = "pep440"
+versionfile_source = "src/umfile_utils/_version.py"
+versionfile_build = "umfile_utils/_version.py"
+tag_prefix = ""
+parentdir_prefix = "umfile-utils-"
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..d14fa5b4
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,7 @@
+import versioneer
+from setuptools import setup
+
+setup(
+    version=versioneer.get_version(),
+    cmdclass=versioneer.get_cmdclass(),
+)
diff --git a/src/perturbIC.py b/src/perturbIC.py
index 22ff4a38..932a4d70 100644
--- a/src/perturbIC.py
+++ b/src/perturbIC.py
@@ -1,52 +1,229 @@
 #!/usr/bin/env python
 
 # Apply a perturbation to initial condition.
-# Note that this works in place.
-# For ENDGAME perturb thetavd as well if it's present
-
 # Martin Dix martin.dix@csiro.au
 
+import os
 import argparse
-import umfile
-from um_fileheaders import *
-from numpy.random import MT19937, RandomState, SeedSequence
-
-parser = argparse.ArgumentParser(description="Perturb UM initial dump")
-parser.add_argument('-a', dest='amplitude', type=float, default=0.01,
-                    help = 'Amplitude of perturbation')
-parser.add_argument('-s', dest='seed', type=int, required=True,
-    help = 'Random number seed (must be non-negative integer)')
-parser.add_argument('ifile', help='Input file (modified in place)')
-
-args = parser.parse_args()
-
-if args.seed >= 0:
-    rs = RandomState(MT19937(SeedSequence(args.seed)))
-else:
-    raise Exception('Seed must be positive')
-
-f = umfile.UMFile(args.ifile, 'r+')
-
-# Set up theta perturbation.
-nlon = f.inthead[IC_XLen]
-nlat = f.inthead[IC_YLen]
-# Same at each level so as not to upset vertical stability
-perturb = args.amplitude * (2.*rs.random(nlon*nlat).reshape((nlat,nlon)) - 1.)
-# Set poles to zero (only necessary for ND grids, but doesn't hurt EG)
-perturb[0] = 0.
-perturb[-1] = 0.
-
-for k in range(f.fixhd[FH_LookupSize2]):
-    ilookup = f.ilookup[k]
-    lbegin = ilookup[LBEGIN] # lbegin is offset from start
-    if lbegin == -99:
-        break
-    # 4 is theta, 388 is thetavd (ENDGAME only)
-    if ilookup[ITEM_CODE] in (4, 388):
-        a = f.readfld(k)
-        # Note that using += ensures the datatype of a doesn't change
-        # (in case it's float32)
-        a += perturb
-        f.writefld(a,k)
-
-f.close()
+from numpy.random import PCG64, Generator
+import mule
+TIMESERIES_LBCODES = (31320, 31323)
+
+def parse_args():
+    """
+   Parse the command line arguments.
+
+    Parameters
+    ----------
+    None
+
+    Returns
+    ----------
+    args_parsed : argparse.Namespace
+        Argparse namespace containing the parsed command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="Perturb UM initial dump")
+    # Positional arguments
+    parser.add_argument('ifile', metavar="INPUT_PATH", help='Path to the input file.')
+    # Optional arguments
+    parser.add_argument('-a', dest='amplitude', type=float, default=0.01,
+                        help = 'Amplitude of the perturbation.')
+    parser.add_argument('-s','--seed', dest='seed', type=int,
+        help = 'The seed value used to generate the random perturbation (must be a non-negative integer).')
+    parser.add_argument('--validate', action='store_true',
+        help='Validate the output fields file using mule validation.')
+    parser.add_argument('-o', '--output', dest = 'output_path', metavar="OUTPUT_PATH", help='Path to the output file. If omitted, the default output file is created by appending "_perturbed" to the input path.')
+    args_parsed = parser.parse_args()
+    return args_parsed
+
+def create_random_generator(value=None):
+    """
+    Create the random generator object using the provided value as a seed.
+
+    Parameters
+    ----------
+    value : int
+           The seed value used to create the random generator.
+
+    Returns
+    ----------
+    numpy.random.Generator
+        The numpy random generator object.
+    """
+    if value is not None and value < 0:
+        raise ValueError('Seed value must be non-negative.')
+    return Generator(PCG64(value))
+
+def remove_timeseries(ff):
+    """
+    Remove any timeseries from a fields file.
+
+    Parameters
+    ----------
+    ff : mule.dump.DumpFile
+           The mule DumpFile to remove the timeseries from.
+
+    Returns
+    ----------
+    ff_out : mule.dump.DumpFile 
+        The mule DumpFile with no timeseries.
+    """
+    ff_out = ff.copy()
+    ff_out.fields=[field for field in ff.fields if field.lbcode not in TIMESERIES_LBCODES]
+    return ff_out
+
+
+def create_default_outname(filename, suffix="_perturbed"):
+    """
+    Create a default output filename by appending a suffix to the input filename. 
+    If an output filename already exists, a number will be appended to produce a unique output filename. 
+
+    Parameters
+    ----------
+    filename: str
+         The input filename.
+    suffix: str, optional
+        The suffix to append to the filename.
+
+    Returns
+    ----------
+    output_filename: str 
+        The default output filename.
+    """
+    output_filename = f"{filename}{suffix}"
+    num=""
+    if os.path.exists(output_filename):
+        num = 1
+        while os.path.exists(f"{output_filename}{num}"):
+            num += 1
+    return f"{output_filename}{num}"
+
+
+def create_perturbation(amplitude, random_generator, shape, nullify_poles = True):
+    """
+    Create a uniformly-distributed random perturbation of given amplitude and shape, using the given random_generator.
+    If nullify_poles is set to True, nullifies the perturbation amplitude at the poles.
+
+    Parameters
+    ----------
+    amplitude: float
+        The amplitude of the random perturbation.
+    random_generator: numpy.random.Generator
+        The random generator used to generate the random perturbation.
+    shape: tuple or list
+        Shape of the generated perturbation.
+    nullify_poles: bool, optional
+        If set to True, nullifies the perturbation amplitude at the poles.
+
+    Returns
+    ----------
+    pertubation: numpy.ndarray 
+        The generated random perturbation.
+    """
+    perturbation = random_generator.uniform(low = -amplitude, high = amplitude, size = shape)
+    # Set poles to zero (only necessary for ND grids, but doesn't hurt EG)
+    if nullify_poles:
+        perturbation[[0,-1],:] = 0
+    return perturbation
+
+
+def is_field_to_perturb(field, stash_to_perturb):
+    """
+    Check if the field STASH itemcode correspond to the one to perturb.
+    
+    Parameters
+    ----------
+    field : mule.Field
+           Field to check.
+    stash_to_perturb: int
+        STASH itemcode to perturb.
+
+    Returns
+    ----------
+    bool
+        Returns True if the field STASH itemcode corresponds to the one to perturb.
+    """
+    return field.lbuser4 == stash_to_perturb
+
+class AdditionOperator(mule.DataOperator):
+    """
+    Create a mule operator that adds an array to a field, provided that the two have the same shape.
+    
+    Attributes
+    ----------
+    array : numpy.ndarray
+             The array to add to the field.
+    """
+    def __init__(self, array):
+        self.array = array
+
+    def new_field(self, source_field):
+        """
+        Create the new field object by copying the source field.
+        """
+        return source_field.copy()
+
+    def transform(self, source_field, new_field):
+        """
+        Perform the field data manipulation: check that the array and source field data have the same shape and then add them together.
+        """
+        data = source_field.get_data()
+        if (field_shape:=data.shape) != (array_shape:=self.array.shape):
+            raise ValueError(f"Array and field could not be broadcast together with shapes {array_shape} and {field_shape}.")
+        else:
+            return data + self.array
+
+
+def void_validation(*args, **kwargs):
+    """
+    Don't perform the validation, but print a message to inform that validation has been skipped.
+    """
+    print('Skipping mule validation. To enable the validation, run using the "--validate" option.')
+    return
+
+
+def main():
+    """
+    Add a bi-dimensional random perturbation to the potential temperature field 'Theta' (STASH itemcode = 4) of a UM fields file.
+    """
+
+    # Define all the variables  
+    STASH_THETA = 4
+
+    # Parse the command line arguments
+    args = parse_args()
+
+    # Create the output filename
+    output_file = create_default_outname(args.ifile) if args.output_path is None else args.output_path
+
+    # Create the random generator.
+    random_generator = create_random_generator(args.seed)
+
+    # Skip mule validation if the "--validate" option is provided
+    if args.validate:
+        mule.DumpFile.validate = void_validation
+    ff_raw = mule.DumpFile.from_file(args.ifile)
+
+
+    # Remove the time series from the data to ensure mule will work
+    ff = remove_timeseries(ff_raw)
+
+    # loop through the fields
+    for ifield, field in enumerate(ff.fields):
+        if is_field_to_perturb(field, STASH_THETA):
+            try:
+                ff.fields[ifield] = perturb_operator(field)
+            except NameError: # perturb_operator is not defined
+            # Only create the perturb_operator if it does not exist yet
+
+                shape = field.get_data().shape
+                perturbation = create_perturbation(args.amplitude, random_generator, shape)
+                perturb_operator = AdditionOperator(perturbation)
+                ff.fields[ifield] = perturb_operator(field)
+
+    ff.to_file(output_file)
+
+if __name__== "__main__":
+
+    main()
+
diff --git a/src/GLOBE30_patch_aus.py b/src/umfile_utils/GLOBE30_patch_aus.py
similarity index 100%
rename from src/GLOBE30_patch_aus.py
rename to src/umfile_utils/GLOBE30_patch_aus.py
diff --git a/src/umfile_utils/README.md b/src/umfile_utils/README.md
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/src/umfile_utils/README.md
@@ -0,0 +1 @@
+
diff --git a/src/umfile_utils/__init__.py b/src/umfile_utils/__init__.py
new file mode 100644
index 00000000..96e13be6
--- /dev/null
+++ b/src/umfile_utils/__init__.py
@@ -0,0 +1,2 @@
+from umfile_utils import _version
+__version__ = _version.get_versions()["version"]
diff --git a/src/umfile_utils/_version.py b/src/umfile_utils/_version.py
new file mode 100644
index 00000000..6ae7bead
--- /dev/null
+++ b/src/umfile_utils/_version.py
@@ -0,0 +1,682 @@
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain.
+# Generated by versioneer-0.29
+# https://github.com/python-versioneer/python-versioneer
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import functools
+
+
+def get_keywords() -> Dict[str, str]:
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+    VCS: str
+    style: str
+    tag_prefix: str
+    parentdir_prefix: str
+    versionfile_source: str
+    verbose: bool
+
+
+def get_config() -> VersioneerConfig:
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = "umpost-"
+    cfg.versionfile_source = "umpost/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f: Callable) -> Callable:
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(
+    commands: List[str],
+    args: List[str],
+    cwd: Optional[str] = None,
+    verbose: bool = False,
+    hide_stderr: bool = False,
+    env: Optional[Dict[str, str]] = None,
+) -> Tuple[Optional[str], Optional[int]]:
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+
+    popen_kwargs: Dict[str, Any] = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None), **popen_kwargs)
+            break
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(
+    parentdir_prefix: str,
+    root: str,
+    verbose: bool,
+) -> Dict[str, Any]:
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords: Dict[str, str] = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(
+    keywords: Dict[str, str],
+    tag_prefix: str,
+    verbose: bool,
+) -> Dict[str, Any]:
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(
+    tag_prefix: str,
+    root: str,
+    verbose: bool,
+    runner: Callable = run_command
+) -> Dict[str, Any]:
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=not verbose)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces: Dict[str, Any] = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces: Dict[str, Any]) -> str:
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces: Dict[str, Any]) -> str:
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces: Dict[str, Any]) -> str:
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces: Dict[str, Any]) -> str:
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces: Dict[str, Any]) -> str:
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces: Dict[str, Any]) -> str:
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces: Dict[str, Any]) -> str:
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces: Dict[str, Any]) -> str:
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions() -> Dict[str, Any]:
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/src/access_landmask.py b/src/umfile_utils/access_landmask.py
similarity index 100%
rename from src/access_landmask.py
rename to src/umfile_utils/access_landmask.py
diff --git a/src/add_fields_to_dump.py b/src/umfile_utils/add_fields_to_dump.py
similarity index 100%
rename from src/add_fields_to_dump.py
rename to src/umfile_utils/add_fields_to_dump.py
diff --git a/src/ancil2netcdf.py b/src/umfile_utils/ancil2netcdf.py
similarity index 100%
rename from src/ancil2netcdf.py
rename to src/umfile_utils/ancil2netcdf.py
diff --git a/src/change_calendar.py b/src/umfile_utils/change_calendar.py
similarity index 100%
rename from src/change_calendar.py
rename to src/umfile_utils/change_calendar.py
diff --git a/src/change_calendar365.py b/src/umfile_utils/change_calendar365.py
similarity index 100%
rename from src/change_calendar365.py
rename to src/umfile_utils/change_calendar365.py
diff --git a/src/change_calendar_header.py b/src/umfile_utils/change_calendar_header.py
similarity index 100%
rename from src/change_calendar_header.py
rename to src/umfile_utils/change_calendar_header.py
diff --git a/src/change_dump_date.py b/src/umfile_utils/change_dump_date.py
similarity index 100%
rename from src/change_dump_date.py
rename to src/umfile_utils/change_dump_date.py
diff --git a/src/change_dump_date2.py b/src/umfile_utils/change_dump_date2.py
similarity index 100%
rename from src/change_dump_date2.py
rename to src/umfile_utils/change_dump_date2.py
diff --git a/src/change_endianness.py b/src/umfile_utils/change_endianness.py
similarity index 100%
rename from src/change_endianness.py
rename to src/umfile_utils/change_endianness.py
diff --git a/src/change_stashcode.py b/src/umfile_utils/change_stashcode.py
similarity index 100%
rename from src/change_stashcode.py
rename to src/umfile_utils/change_stashcode.py
diff --git a/src/check_land_overlap.py b/src/umfile_utils/check_land_overlap.py
similarity index 100%
rename from src/check_land_overlap.py
rename to src/umfile_utils/check_land_overlap.py
diff --git a/src/check_land_overlap_idx.py b/src/umfile_utils/check_land_overlap_idx.py
similarity index 100%
rename from src/check_land_overlap_idx.py
rename to src/umfile_utils/check_land_overlap_idx.py
diff --git a/src/count_land.py b/src/umfile_utils/count_land.py
similarity index 100%
rename from src/count_land.py
rename to src/umfile_utils/count_land.py
diff --git a/src/count_tiles.py b/src/umfile_utils/count_tiles.py
similarity index 100%
rename from src/count_tiles.py
rename to src/umfile_utils/count_tiles.py
diff --git a/src/eqtoll.py b/src/umfile_utils/eqtoll.py
similarity index 100%
rename from src/eqtoll.py
rename to src/umfile_utils/eqtoll.py
diff --git a/src/fix_polar_anom.py b/src/umfile_utils/fix_polar_anom.py
similarity index 100%
rename from src/fix_polar_anom.py
rename to src/umfile_utils/fix_polar_anom.py
diff --git a/src/get_calendar.py b/src/umfile_utils/get_calendar.py
similarity index 100%
rename from src/get_calendar.py
rename to src/umfile_utils/get_calendar.py
diff --git a/src/limit_soilmoisture.py b/src/umfile_utils/imit_soilmoisture.py
similarity index 100%
rename from src/limit_soilmoisture.py
rename to src/umfile_utils/imit_soilmoisture.py
diff --git a/src/interpolate_ancillary.py b/src/umfile_utils/interpolate_ancillary.py
similarity index 100%
rename from src/interpolate_ancillary.py
rename to src/umfile_utils/interpolate_ancillary.py
diff --git a/src/iris_stashname.py b/src/umfile_utils/iris_stashname.py
similarity index 100%
rename from src/iris_stashname.py
rename to src/umfile_utils/iris_stashname.py
diff --git a/src/lbcdump.py b/src/umfile_utils/lbcdump.py
similarity index 100%
rename from src/lbcdump.py
rename to src/umfile_utils/lbcdump.py
diff --git a/src/levelheights.py b/src/umfile_utils/levelheights.py
similarity index 100%
rename from src/levelheights.py
rename to src/umfile_utils/levelheights.py
diff --git a/src/mask_edit.py b/src/umfile_utils/mask_edit.py
similarity index 100%
rename from src/mask_edit.py
rename to src/umfile_utils/mask_edit.py
diff --git a/src/mergefiles.py b/src/umfile_utils/mergefiles.py
similarity index 100%
rename from src/mergefiles.py
rename to src/umfile_utils/mergefiles.py
diff --git a/src/mergefiles_region.py b/src/umfile_utils/mergefiles_region.py
similarity index 100%
rename from src/mergefiles_region.py
rename to src/umfile_utils/mergefiles_region.py
diff --git a/src/nccmp_um2netcdf.py b/src/umfile_utils/nccmp_um2netcdf.py
similarity index 100%
rename from src/nccmp_um2netcdf.py
rename to src/umfile_utils/nccmp_um2netcdf.py
diff --git a/src/polar_anom.py b/src/umfile_utils/polar_anom.py
similarity index 100%
rename from src/polar_anom.py
rename to src/umfile_utils/polar_anom.py
diff --git a/src/prog_fields_mismatch.py b/src/umfile_utils/prog_fields_mismatch.py
similarity index 100%
rename from src/prog_fields_mismatch.py
rename to src/umfile_utils/prog_fields_mismatch.py
diff --git a/src/read_stashmaster.py b/src/umfile_utils/read_stashmaster.py
similarity index 100%
rename from src/read_stashmaster.py
rename to src/umfile_utils/read_stashmaster.py
diff --git a/src/remove_stash_duplicates.py b/src/umfile_utils/remove_stash_duplicates.py
similarity index 100%
rename from src/remove_stash_duplicates.py
rename to src/umfile_utils/remove_stash_duplicates.py
diff --git a/src/reset_neg_snow.py b/src/umfile_utils/reset_neg_snow.py
similarity index 100%
rename from src/reset_neg_snow.py
rename to src/umfile_utils/reset_neg_snow.py
diff --git a/src/show_land_overlap.py b/src/umfile_utils/show_land_overlap.py
similarity index 100%
rename from src/show_land_overlap.py
rename to src/umfile_utils/show_land_overlap.py
diff --git a/src/sortum.py b/src/umfile_utils/sortum.py
similarity index 100%
rename from src/sortum.py
rename to src/umfile_utils/sortum.py
diff --git a/src/split_times.py b/src/umfile_utils/split_times.py
similarity index 100%
rename from src/split_times.py
rename to src/umfile_utils/split_times.py
diff --git a/src/sstice_ancil.py b/src/umfile_utils/sstice_ancil.py
similarity index 100%
rename from src/sstice_ancil.py
rename to src/umfile_utils/sstice_ancil.py
diff --git a/src/stashvar.py b/src/umfile_utils/stashvar.py
similarity index 100%
rename from src/stashvar.py
rename to src/umfile_utils/stashvar.py
diff --git a/src/stashvar_cmip6.py b/src/umfile_utils/stashvar_cmip6.py
similarity index 100%
rename from src/stashvar_cmip6.py
rename to src/umfile_utils/stashvar_cmip6.py
diff --git a/src/stashvar_validate_stdnames.py b/src/umfile_utils/stashvar_validate_stdnames.py
similarity index 99%
rename from src/stashvar_validate_stdnames.py
rename to src/umfile_utils/stashvar_validate_stdnames.py
index c1315144..7b544478 100644
--- a/src/stashvar_validate_stdnames.py
+++ b/src/umfile_utils/stashvar_validate_stdnames.py
@@ -18,4 +18,4 @@
         if STASH_TO_CF[key].standard_name and std_name and STASH_TO_CF[key].standard_name != std_name:
             print("Name mismatch", v, std_name, STASH_TO_CF[key].standard_name)
         if STASH_TO_CF[key].units and units and STASH_TO_CF[key].units != units:
-            print("Units mismatch", v, units, STASH_TO_CF[key].units)
\ No newline at end of file
+            print("Units mismatch", v, units, STASH_TO_CF[key].units)
diff --git a/src/subset_ancillary.py b/src/umfile_utils/subset_ancillary.py
similarity index 100%
rename from src/subset_ancillary.py
rename to src/umfile_utils/subset_ancillary.py
diff --git a/src/subset_dump.py b/src/umfile_utils/subset_dump.py
similarity index 100%
rename from src/subset_dump.py
rename to src/umfile_utils/subset_dump.py
diff --git a/src/um2netcdf.py b/src/umfile_utils/um2netcdf.py
similarity index 100%
rename from src/um2netcdf.py
rename to src/umfile_utils/um2netcdf.py
diff --git a/src/um2netcdf4.py b/src/umfile_utils/um2netcdf4.py
similarity index 100%
rename from src/um2netcdf4.py
rename to src/umfile_utils/um2netcdf4.py
diff --git a/src/um2netcdf4_cmip6.py b/src/umfile_utils/um2netcdf4_cmip6.py
similarity index 100%
rename from src/um2netcdf4_cmip6.py
rename to src/umfile_utils/um2netcdf4_cmip6.py
diff --git a/src/um2netcdf4_dev.py b/src/umfile_utils/um2netcdf4_dev.py
similarity index 100%
rename from src/um2netcdf4_dev.py
rename to src/umfile_utils/um2netcdf4_dev.py
diff --git a/src/um2netcdf_all.py b/src/umfile_utils/um2netcdf_all.py
similarity index 100%
rename from src/um2netcdf_all.py
rename to src/umfile_utils/um2netcdf_all.py
diff --git a/src/um2netcdf_iris.py b/src/umfile_utils/um2netcdf_iris.py
similarity index 100%
rename from src/um2netcdf_iris.py
rename to src/umfile_utils/um2netcdf_iris.py
diff --git a/src/um2netcdf_iris_mon.py b/src/umfile_utils/um2netcdf_iris_mon.py
similarity index 100%
rename from src/um2netcdf_iris_mon.py
rename to src/umfile_utils/um2netcdf_iris_mon.py
diff --git a/src/um_complexity.py b/src/umfile_utils/um_complexity.py
similarity index 100%
rename from src/um_complexity.py
rename to src/umfile_utils/um_complexity.py
diff --git a/src/um_copy_field.py b/src/umfile_utils/um_copy_field.py
similarity index 100%
rename from src/um_copy_field.py
rename to src/umfile_utils/um_copy_field.py
diff --git a/src/um_fields_subset.py b/src/umfile_utils/um_fields_subset.py
similarity index 100%
rename from src/um_fields_subset.py
rename to src/umfile_utils/um_fields_subset.py
diff --git a/src/um_fieldsfile_dump.py b/src/umfile_utils/um_fieldsfile_dump.py
similarity index 100%
rename from src/um_fieldsfile_dump.py
rename to src/umfile_utils/um_fieldsfile_dump.py
diff --git a/src/um_fileheaders.py b/src/umfile_utils/um_fileheaders.py
similarity index 100%
rename from src/um_fileheaders.py
rename to src/umfile_utils/um_fileheaders.py
diff --git a/src/um_grid_flip.py b/src/umfile_utils/um_grid_flip.py
similarity index 100%
rename from src/um_grid_flip.py
rename to src/umfile_utils/um_grid_flip.py
diff --git a/src/um_modify_field.py b/src/umfile_utils/um_modify_field.py
similarity index 100%
rename from src/um_modify_field.py
rename to src/umfile_utils/um_modify_field.py
diff --git a/src/um_replace_field.py b/src/umfile_utils/um_replace_field.py
similarity index 100%
rename from src/um_replace_field.py
rename to src/umfile_utils/um_replace_field.py
diff --git a/src/um_timeseries.py b/src/umfile_utils/um_timeseries.py
similarity index 100%
rename from src/um_timeseries.py
rename to src/umfile_utils/um_timeseries.py
diff --git a/src/um_zero_field.py b/src/umfile_utils/um_zero_field.py
similarity index 100%
rename from src/um_zero_field.py
rename to src/umfile_utils/um_zero_field.py
diff --git a/src/umfile.py b/src/umfile_utils/umfile.py
similarity index 100%
rename from src/umfile.py
rename to src/umfile_utils/umfile.py
diff --git a/src/umv2netcdf.py b/src/umfile_utils/umv2netcdf.py
similarity index 100%
rename from src/umv2netcdf.py
rename to src/umfile_utils/umv2netcdf.py
diff --git a/src/umv2reg.py b/src/umfile_utils/umv2reg.py
similarity index 100%
rename from src/umv2reg.py
rename to src/umfile_utils/umv2reg.py
diff --git a/src/valid_times.py b/src/umfile_utils/valid_times.py
similarity index 100%
rename from src/valid_times.py
rename to src/umfile_utils/valid_times.py
diff --git a/tests/perturbIC_test.py b/tests/perturbIC_test.py
new file mode 100644
index 00000000..034d4830
--- /dev/null
+++ b/tests/perturbIC_test.py
@@ -0,0 +1,229 @@
+import pytest
+import sys
+from perturbIC import parse_args, create_random_generator, remove_timeseries, is_field_to_perturb, create_default_outname, create_perturbation, AdditionOperator
+from unittest.mock import patch, Mock, MagicMock
+import numpy as np
+import numpy.random as rs
+
+#This section sets up the testing for the parse args
+@pytest.fixture
+def fake_args(monkeypatch):
+    """
+    Fixture to set fake command-line arguments.
+    """
+    def _fake_args(args):
+        monkeypatch.setattr('sys.argv', args)
+        return args
+    return _fake_args
+
+
+@pytest.mark.parametrize(
+    "input_args, expected",
+    [
+        # Case 1: Test only essential arguments.
+        (["script.py", "input_file"], {"ifile": "input_file", "amplitude": 0.01, "seed": None, "validate": False, "output_path": None}),
+        # Case 2: Test the amplitude
+        (["script.py", "input_file", "-a", "0.05"], {"ifile": "input_file", "amplitude": 0.05, "seed": None, "validate": False, "output_path": None}),
+        # Case 3: Test the validate
+        (["script.py", "input_file", "-s", "42", "--validate"], {"ifile": "input_file", "amplitude": 0.01, "seed": 42, "validate": True, "output_path": None}),
+        # Case 4: Inclusion of the output file
+        (["script.py", "input_file", "-o", "output_file"], {"ifile": "input_file", "amplitude": 0.01, "seed": None, "validate": False, "output_path": "output_file"}),
+    ],
+)
+def test_parse_args(fake_args, input_args, expected):
+    """
+    Test parse_args function with test 4 cases if if the optional arguements are not included.
+    """
+    fake_args(input_args)
+    args = parse_args()
+    for key, value in expected.items():
+        assert getattr(args, key) == value
+
+#This section tests the output file creation. 
+@pytest.mark.parametrize(
+    # description of the arguments
+    "existing_files, filename, expected_output",
+    [
+        # Case 1: Filename with suffix doesn't exist, return filename with suffix
+        ([], "testfilename", "testfilename_perturbed"),
+        # Case 2: Filename with suffix exists, returns filename with suffix appending 1
+        (["testfilename_perturbed"], "testfilename", "testfilename_perturbed1"),
+        # Case 3: Filename with suffix and a few numbered versions exist, returns 
+        # filename with suffix and the first numbered version that doesn't exist
+        (
+            ["testfilename_perturbed", "testfilename_perturbed1", "testfilename_perturbed2"],
+            "testfilename",
+            "testfilename_perturbed3",
+        ),
+    ],
+)
+@patch("os.path.exists")
+def test_create_default_outname_suffix_not_passed(mock_exists, existing_files, filename, expected_output):
+    """
+    Test the function that creates the default output file name, without passing a suffix.
+    3 cases tested with pytest.mark.parametrize.
+    """
+    # Mock os.path.exists to simulate the presence of specific files
+    mock_exists.side_effect = lambda f: f in existing_files
+    result = create_default_outname(filename)
+    assert result == expected_output
+
+@patch("os.path.exists")
+def test_create_default_outname_suffix_passed(mock_exists):
+    """
+    Test the function that creates the default output file name, passing a custom suffix.
+    """
+    # Mock os.path.exists to simulate the presence of specific files
+    mock_exists.return_value = False
+    filename = "testfilename"
+    suffix = "testsuffix"
+    result = create_default_outname(filename, suffix)
+    expected_output = "testfilenametestsuffix"
+    assert result == expected_output
+
+#This section of code tests the removal of the timeseries
+class MockField:
+    """
+    Mock class to simulate a field with an lbcode attribute.
+    """
+    def __init__(self, lbcode):
+        self.lbcode = lbcode
+
+class MockDumpFile:
+    """
+    Mock class to simulate a mule DumpFile.
+    """
+    def __init__(self, fields):
+        self.fields = fields
+
+    def copy(self):
+        """
+        Simulate the copy method of a mule DumpFile.
+        """
+        return MockDumpFile(self.fields[:])
+
+TIMESERIES_LBCODES = [31320]
+@pytest.mark.parametrize(
+    "input_fields, expected_codes",
+    [   #Time series is the first field
+        ([MockField(31320), MockField(1001)], [1001]),
+        #If it is all timeseries
+        ([MockField(31320), MockField(31320)], []),
+        #If none are timeseries
+        ([MockField(1001), MockField(2002)], [1001, 2002]),
+        #If there are no files
+        ([], []),
+    ],
+)
+def test_remove_timeseries(input_fields, expected_codes):
+    """
+    Test the remove_timeseries function with various input scenarios.
+    """
+    mock_dumpfile = MockDumpFile(input_fields)
+    result = remove_timeseries(mock_dumpfile)
+    result_codes = [field.lbcode for field in result.fields]
+
+    assert result_codes == expected_codes
+
+@pytest.fixture
+def mock_metadata():
+    """
+    This function create a callable um metadata
+
+    Outputs
+        list - Command line arguements
+    """
+
+    # Mock fields with different lbuser4 values
+    field_theta = MagicMock()
+    field_not_theta = MagicMock()
+
+    # Correctly set the lbuser4 attribute
+    field_theta.lbuser4 = 4
+    field_not_theta.lbuser4 = 56
+    stash_code = 4
+
+    return field_theta, field_not_theta, stash_code
+
+def test_is_field_to_perturb(mock_metadata):
+
+    """
+    Tests the item code conditional
+
+    Inputs
+        fixture - A fake list of arrays and a fake index
+    Outputs
+        The results of assertion tests.
+    """
+
+    field_theta, field_not_theta, stash_code = mock_metadata
+
+    # Assertions to verify the function's behavior
+    assert is_field_to_perturb(field_theta, stash_code) == True, "field_theta should match the stash_code"
+    assert is_field_to_perturb(field_not_theta, stash_code) == False, "field_not_theta should not match the stash_code"
+
+
+#This section tests creating the perturbation
+@pytest.mark.parametrize(
+    "amplitude, shape, nullify_poles, expected_shape",
+    [
+        (0.5, (10, 20), True, (10, 20)),
+        (1.0, (5, 5), False, (5, 5)),
+        (0.3, (3, 7), True, (3, 7)),
+    ],
+)
+
+def test_create_perturbation(amplitude, shape, nullify_poles, expected_shape):
+    """
+    Test the create_perturbation function with different amplitudes, shapes, and nullify_poles settings.
+    """
+    random_seed = np.random.default_rng(43)
+    # Create the perturbation
+    perturbation = create_perturbation(amplitude, random_seed, shape, nullify_poles)
+
+    # Check the shape of the perturbation
+    assert perturbation.shape == expected_shape, "Perturbation shape does not match expected shape"
+
+    # Check that values are within the range [-amplitude, amplitude]
+    assert np.all(perturbation >= -amplitude) and np.all(perturbation <= amplitude), \
+        "Perturbation values exceed specified amplitude range"
+
+    # Check nullification of poles
+    if nullify_poles:
+        assert np.all(perturbation[0, :] == 0) and np.all(perturbation[-1, :] == 0), \
+            "Perturbation poles were not nullified as expected"
+    else:
+        assert not (np.all(perturbation[0, :] == 0) and np.all(perturbation[-1, :] == 0)), \
+            "Perturbation poles should not have been nullified"
+
+def test_operator_initialization():
+    """
+    Test the addition operator..
+
+    Outputs
+        The results of testing if the peturbation intialize worked 
+
+    """
+
+    # Mock the source field
+    source_field = MagicMock()
+    source_field.get_data.return_value = np.array([[1, 2], [3, 4]])
+
+    # Mock the new field
+    new_field = MagicMock()
+
+    # Array to add
+    array_to_add = np.array([[10, 20], [30, 40]])
+
+    # Create the operator
+    operator = AdditionOperator(array_to_add)
+
+    # Test transform method
+    result = operator.transform(source_field, new_field)
+
+    # Expected output
+    expected = np.array([[11, 22], [33, 44]])
+
+    # Assertions
+    np.testing.assert_array_equal(result, expected)
+