From 2c1cfb9fe4b29abd2c6224b4e27613eb8737215a Mon Sep 17 00:00:00 2001
From: Jonathan Chico <37243453+JPchico@users.noreply.github.com>
Date: Sun, 7 May 2023 20:40:23 +0200
Subject: [PATCH] Modification of the structure to lammps format (#70)

* Removing ase dependency on how the structure string is written.

Moving the parsing functions to the actual parser folder to make the structure cleaner.

---------

Co-authored-by: Jonathan Chico <jonathan.chico@sandvik.com>
---
 aiida_lammps/calculations/base.py             |   4 +-
 aiida_lammps/common/__init__.py               |   0
 aiida_lammps/common/raw_parsers.py            | 360 ------------------
 aiida_lammps/common/utils.py                  |  86 -----
 aiida_lammps/data/trajectory.py               |   2 +-
 aiida_lammps/parsers/base.py                  |   2 +-
 .../inputfile.py}                             |   2 +-
 aiida_lammps/parsers/parse_raw/__init__.py    |  13 +
 aiida_lammps/parsers/parse_raw/final_data.py  |  33 ++
 aiida_lammps/parsers/parse_raw/lammps_log.py  |  93 +++++
 .../parse_raw/trajectory.py}                  |   4 +-
 .../utils.py}                                 |  97 ++++-
 .../{common => parsers}/variables_types.json  |   0
 docs/source/nitpick-exceptions                |   2 +-
 docs/source/users/get_started.md              |   2 +-
 tests/test_generate_inputs.py                 |  14 +-
 tests/test_generate_structure.py              |   2 +-
 tests/test_parsers.py                         |   2 +-
 tests/test_potential_data.py                  |   4 +-
 tests/test_trajectory.py                      |   2 +-
 20 files changed, 238 insertions(+), 486 deletions(-)
 delete mode 100644 aiida_lammps/common/__init__.py
 delete mode 100644 aiida_lammps/common/raw_parsers.py
 delete mode 100644 aiida_lammps/common/utils.py
 rename aiida_lammps/{common/input_generator.py => parsers/inputfile.py} (99%)
 create mode 100644 aiida_lammps/parsers/parse_raw/__init__.py
 create mode 100644 aiida_lammps/parsers/parse_raw/final_data.py
 create mode 100644 aiida_lammps/parsers/parse_raw/lammps_log.py
 rename aiida_lammps/{common/parse_trajectory.py => parsers/parse_raw/trajectory.py} (98%)
 rename aiida_lammps/{common/generate_structure.py => parsers/utils.py} (63%)
 rename aiida_lammps/{common => parsers}/variables_types.json (100%)

diff --git a/aiida_lammps/calculations/base.py b/aiida_lammps/calculations/base.py
index 76792b7..e739aaf 100644
--- a/aiida_lammps/calculations/base.py
+++ b/aiida_lammps/calculations/base.py
@@ -12,10 +12,10 @@
 from aiida.common import datastructures, exceptions
 from aiida.engine import CalcJob
 
-from aiida_lammps.common.generate_structure import generate_lammps_structure
-from aiida_lammps.common.input_generator import generate_input_file
 from aiida_lammps.data.potential import LammpsPotentialData
 from aiida_lammps.data.trajectory import LammpsTrajectory
+from aiida_lammps.parsers.inputfile import generate_input_file
+from aiida_lammps.parsers.utils import generate_lammps_structure
 
 
 class LammpsBaseCalculation(CalcJob):
diff --git a/aiida_lammps/common/__init__.py b/aiida_lammps/common/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/aiida_lammps/common/raw_parsers.py b/aiida_lammps/common/raw_parsers.py
deleted file mode 100644
index c295711..0000000
--- a/aiida_lammps/common/raw_parsers.py
+++ /dev/null
@@ -1,360 +0,0 @@
-"""Set of functions to parse the unformatted files generated by LAMMPS.
-"""
-# pylint: disable=fixme
-import ast
-import re
-from typing import Union
-
-import numpy as np
-import yaml
-
-
-def parse_logfile(filename: str = None, file_contents: str = None) -> Union[dict, dict]:
-    """
-    Parse the log.lammps file.
-
-    This will gather the time dependent data stored in the log file and
-    stores it as a dictionary. It will also gather single quantities and stores
-    them into a different dictionary.
-
-    :param filename: name of the lammps log file, defaults to None
-    :type filename: str, optional
-    :param file_contents: contents of the lammps log file, defaults to None
-    :type file_contents: str, optional
-    :return: dictionary with the time dependent data, dictionary with the global data
-    :rtype: Union[dict, dict]
-    """
-    # pylint: disable=too-many-branches, too-many-locals
-
-    if filename is None and file_contents is None:
-        return None
-
-    if filename is not None:
-
-        try:
-            with open(filename) as handler:
-                data = handler.read()
-                data = data.split("\n")
-        except OSError:
-            return None
-
-    if file_contents is not None:
-        data = file_contents.split("\n")
-
-    header_line_position = -1
-    header_line = ""
-    _data = []
-    end_found = False
-    parsed_data = {}
-    global_parsed_data = {}
-
-    perf_regex = re.compile(r".*Performance\:.*\,\s+([0-9\.]*)\stimesteps\/s.*")
-    performance_match = perf_regex.search(file_contents or "")
-    if performance_match:
-        global_parsed_data["steps_per_second"] = float(performance_match.group(1))
-
-    for index, line in enumerate(data):
-        line = line.strip()
-        if "binsize" in line:
-            global_parsed_data["binsize"] = ast.literal_eval(
-                line.split()[2].replace(",", "")
-            )
-            global_parsed_data["bins"] = [
-                ast.literal_eval(entry) for entry in line.split()[5:]
-            ]
-        if "ghost atom cutoff" in line:
-            global_parsed_data["ghost_atom_cutoff"] = ast.literal_eval(line.split()[-1])
-        if "master list distance cutoff" in line:
-            global_parsed_data["master_list_distance_cutoff"] = ast.literal_eval(
-                line.split()[-1]
-            )
-        if "max neighbors/atom" in line:
-            global_parsed_data["max_neighbors_atom"] = ast.literal_eval(
-                line.split()[2].replace(",", "")
-            )
-        if "units" in line:
-            global_parsed_data["units_style"] = line.split()[1]
-        if "Total wall time:" in line:
-            global_parsed_data["total_wall_time"] = line.split()[-1]
-        if "bin:" in line:
-            global_parsed_data["bin"] = line.split()[-1]
-        if line.startswith("Step"):
-            header_line_position = index
-            header_line = [
-                re.sub("[^a-zA-Z0-9_]", "__", entry) for entry in line.split()
-            ]
-        if header_line_position > 0 and index != header_line_position and not end_found:
-            if not line.split()[0].replace(".", "", 1).isdigit():
-                end_found = True
-        if header_line_position > 0 and index != header_line_position and not end_found:
-            _data.append([ast.literal_eval(entry) for entry in line.split()])
-    _data = np.asarray(_data)
-    for index, entry in enumerate(header_line):
-        parsed_data[entry] = _data[:, index].tolist()
-    return {"time_dependent": parsed_data, "global": global_parsed_data}
-
-
-def parse_final_data(filename: str = None, file_contents: str = None) -> dict:
-    """
-    Read the yaml file with the global final data.
-
-    The final iteration for each of computed variables is sotred into a yaml
-    file which is then read and stored as a dictionary.
-
-    :param filename: name of the yaml file where the variables are stored,
-        defaults to None
-    :type filename: str, optional
-    :param file_contents: contents of the yaml file where the variables are stored,
-        defaults to None
-    :type file_contents: str, optional
-    :return: dictionary with the final compute variables
-    :rtype: dict
-    """
-
-    if filename is None and file_contents is None:
-        return None
-    if filename is not None:
-        try:
-            with open(filename) as handle:
-                data = yaml.load(handle, Loader=yaml.Loader)
-        except OSError:
-            data = None
-    if file_contents is not None:
-        data = yaml.load(file_contents, Loader=yaml.Loader)
-    return data
-
-
-def read_log_file(logdata_txt, compute_stress=False):
-    """Read the log.lammps file."""
-    # pylint: disable= too-many-locals, too-many-branches
-    data = logdata_txt.splitlines()
-
-    if not data:
-        raise OSError("The logfile is empty")
-
-    perf_regex = re.compile(
-        r"Performance\:\s(.+)\sns\/day,\s(.+)\shours\/ns\,\s(.+)\stimesteps\/s\s*"
-    )
-
-    data_dict = {}
-    cell_params = None
-    stress_params = None
-    found_end = False
-    for _, line in enumerate(data):
-        line = line.strip()
-        if "END_OF_COMP" in line:
-            found_end = True
-        elif "Total wall time:" in line:
-            data_dict["total_wall_time"] = line.split()[-1]
-        # These are handled in LammpsBaseParser.add_warnings_and_errors
-        # if line.strip().startswith("WARNING"):
-        #     data_dict.setdefault("warnings", []).append(line.strip())
-        # if line.strip().startswith("ERROR"):
-        #     data_dict.setdefault("errors", []).append(line.strip())
-        elif perf_regex.match(line):
-            _, _, step_sec = perf_regex.match(line).groups()
-            data_dict.setdefault("steps_per_second", []).append(float(step_sec))
-        elif "units" in line:
-            data_dict["units_style"] = line.split()[1]
-        elif line.startswith("final_energy:"):
-            data_dict["energy"] = float(line.split()[1])
-        elif line.startswith("final_variable:"):
-            if "final_variables" not in data_dict:
-                data_dict["final_variables"] = {}
-            data_dict["final_variables"][line.split()[1]] = float(line.split()[3])
-
-        elif line.startswith("final_cell:"):
-            cell_params = [float(v) for v in line.split()[1:10]]
-        elif line.startswith("final_stress:"):
-            stress_params = [float(v) for v in line.split()[1:7]]
-
-    if not compute_stress:
-        return {"data": data_dict, "found_end": found_end}
-
-    if cell_params is None:
-        raise OSError("'final_cell' could not be found")
-    if stress_params is None:
-        raise OSError("'final_stress' could not be found")
-
-    xlo, xhi, box_xy, ylo, yhi, box_xz, zlo, zhi, box_yz = cell_params
-    super_cell = np.array(
-        [
-            [xhi - xlo, box_xy, box_xz],
-            [0, yhi - ylo, box_yz],
-            [0, 0, zhi - zlo],
-        ]
-    )
-    cell = super_cell.T
-    if np.linalg.det(cell) < 0:
-        cell = -1.0 * cell
-    volume = np.linalg.det(cell)
-
-    box_xx, box_yy, box_zz, box_xy, box_xz, box_yz = stress_params
-    stress = np.array(
-        [
-            [box_xx, box_xy, box_xz],
-            [box_xy, box_yy, box_yz],
-            [box_xz, box_yz, box_zz],
-        ],
-        dtype=float,
-    )
-    stress = -stress / volume  # to get stress in units of pressure
-
-    return {"data": data_dict, "cell": cell, "stress": stress, "found_end": found_end}
-
-
-def get_units_dict(style, quantities, suffix="_units"):
-    """Return a mapping of the unit name to the units, for a particular style.
-
-    :param style: the unit style set in the lammps input
-    :type style: str
-    :param quantities: the quantities to get units for
-    :type quantities: list of str
-    :rtype: dict
-
-    """
-    units_dict = {
-        "real": {
-            "mass": "grams/mole",
-            "distance": "Angstroms",
-            "time": "femtoseconds",
-            "energy": "Kcal/mole",
-            "velocity": "Angstroms/femtosecond",
-            "force": "Kcal/mole-Angstrom",
-            "torque": "Kcal/mole",
-            "temperature": "Kelvin",
-            "pressure": "atmospheres",
-            "dynamic_viscosity": "Poise",
-            "charge": "e",  # multiple of electron charge (1.0 is a proton)
-            "dipole": "charge*Angstroms",
-            "electric field": "volts/Angstrom",
-            "density": "gram/cm^dim",
-        },
-        "metal": {
-            "mass": "grams/mole",
-            "distance": "Angstroms",
-            "time": "picoseconds",
-            "energy": "eV",
-            "velocity": "Angstroms/picosecond",
-            "force": "eV/Angstrom",
-            "torque": "eV",
-            "temperature": "Kelvin",
-            "pressure": "bars",
-            "dynamic_viscosity": "Poise",
-            "charge": "e",  # multiple of electron charge (1.0 is a proton)
-            "dipole": "charge*Angstroms",
-            "electric field": "volts/Angstrom",
-            "density": "gram/cm^dim",
-        },
-        "si": {
-            "mass": "kilograms",
-            "distance": "meters",
-            "time": "seconds",
-            "energy": "Joules",
-            "velocity": "meters/second",
-            "force": "Newtons",
-            "torque": "Newton-meters",
-            "temperature": "Kelvin",
-            "pressure": "Pascals",
-            "dynamic_viscosity": "Pascal*second",
-            "charge": "Coulombs",  # (1.6021765e-19 is a proton)
-            "dipole": "Coulombs*meters",
-            "electric field": "volts/meter",
-            "density": "kilograms/meter^dim",
-        },
-        "cgs": {
-            "mass": "grams",
-            "distance": "centimeters",
-            "time": "seconds",
-            "energy": "ergs",
-            "velocity": "centimeters/second",
-            "force": "dynes",
-            "torque": "dyne-centimeters",
-            "temperature": "Kelvin",
-            "pressure": "dyne/cm^2",  # or barye': '1.0e-6 bars
-            "dynamic_viscosity": "Poise",
-            "charge": "statcoulombs",  # or esu (4.8032044e-10 is a proton)
-            "dipole": "statcoul-cm",  #: '10^18 debye
-            "electric_field": "statvolt/cm",  # or dyne/esu
-            "density": "grams/cm^dim",
-        },
-        "electron": {
-            "mass": "amu",
-            "distance": "Bohr",
-            "time": "femtoseconds",
-            "energy": "Hartrees",
-            "velocity": "Bohr/atu",  # [1.03275e-15 seconds]
-            "force": "Hartrees/Bohr",
-            "temperature": "Kelvin",
-            "pressure": "Pascals",
-            "charge": "e",  # multiple of electron charge (1.0 is a proton)
-            "dipole_moment": "Debye",
-            "electric_field": "volts/cm",
-        },
-        "micro": {
-            "mass": "picograms",
-            "distance": "micrometers",
-            "time": "microseconds",
-            "energy": "picogram-micrometer^2/microsecond^2",
-            "velocity": "micrometers/microsecond",
-            "force": "picogram-micrometer/microsecond^2",
-            "torque": "picogram-micrometer^2/microsecond^2",
-            "temperature": "Kelvin",
-            "pressure": "picogram/(micrometer-microsecond^2)",
-            "dynamic_viscosity": "picogram/(micrometer-microsecond)",
-            "charge": "picocoulombs",  # (1.6021765e-7 is a proton)
-            "dipole": "picocoulomb-micrometer",
-            "electric field": "volt/micrometer",
-            "density": "picograms/micrometer^dim",
-        },
-        "nano": {
-            "mass": "attograms",
-            "distance": "nanometers",
-            "time": "nanoseconds",
-            "energy": "attogram-nanometer^2/nanosecond^2",
-            "velocity": "nanometers/nanosecond",
-            "force": "attogram-nanometer/nanosecond^2",
-            "torque": "attogram-nanometer^2/nanosecond^2",
-            "temperature": "Kelvin",
-            "pressure": "attogram/(nanometer-nanosecond^2)",
-            "dynamic_viscosity": "attogram/(nanometer-nanosecond)",
-            "charge": "e",  # multiple of electron charge (1.0 is a proton)
-            "dipole": "charge-nanometer",
-            "electric_field": "volt/nanometer",
-            "density": "attograms/nanometer^dim",
-        },
-    }
-    out_dict = {}
-    for quantity in quantities:
-        out_dict[quantity + suffix] = units_dict[style][quantity]
-    return out_dict
-
-
-def convert_units(value, style, unit_type, out_units):
-    """Convert the units of the system from one set to another.
-
-    :param value: value to be converted
-    :type value: float
-    :param style: LAMMPS unit style name
-    :type style: str
-    :param unit_type: what kind of unit is being converted
-    :type unit_type: str
-    :param out_units: what kind of units one wants to have after conversion
-    :type out_units: str
-    :raises NotImplementedError: if the type of unit to be converted is not "time"
-    :return: converted unit to the new unit type
-    :rtype: float
-    """
-    conversion = {
-        "seconds": 1,
-        "milliseconds": 1e-3,
-        "microseconds": 1e-6,
-        "nanoseconds": 1e-9,
-        "picoseconds": 1e-12,
-        "femtoseconds": 1e-15,
-    }
-    if unit_type != "time" or out_units not in conversion:
-        # TODO use https://pint.readthedocs.io
-        raise NotImplementedError
-    in_units = get_units_dict(style, [unit_type], "")[unit_type]
-    return value * conversion[in_units] * (1.0 / conversion[out_units])
diff --git a/aiida_lammps/common/utils.py b/aiida_lammps/common/utils.py
deleted file mode 100644
index d907c39..0000000
--- a/aiida_lammps/common/utils.py
+++ /dev/null
@@ -1,86 +0,0 @@
-"""Utility functions for the handling of the input files"""
-from collections.abc import Iterable
-from datetime import datetime
-
-from dateutil.parser import parse as get_date
-
-
-def generate_header(value: str) -> str:
-    """
-    Generate the header for the blocks.
-
-    :param value: string indicating the input block
-    :type value: str
-    :return: header/footer for the input block
-    :rtype: str
-    """
-    return "#" + value.center(80, "-") + "#\n"
-
-
-def flatten(full_list: list) -> list:
-    """Flattens a list of list into a flat list.
-
-    :param full_list: list of lists to be flattened
-    :type full_list: list
-    :yield: flattened list
-    :rtype: list
-    """
-    for element in full_list:
-        if isinstance(element, Iterable) and not isinstance(element, (str, bytes)):
-            yield from flatten(element)
-        else:
-            yield element
-
-
-def convert_date_string(string):
-    """converts date string e.g. '10 Nov 2017' to datetime object
-    if None, return todays date
-    '"""
-
-    if string is None:
-        date = datetime.today()
-    else:
-        date = get_date(string)
-    return date
-
-
-def convert_to_str(value):
-    """convert True/False to yes/no and all values to strings"""
-    if isinstance(value, bool):
-        if value:
-            return "yes"
-        return "no"
-    return str(value)
-
-
-def _convert_values(value):
-    if isinstance(value, (tuple, list)):
-        return " ".join([convert_to_str(v) for v in value])
-    return convert_to_str(value)
-
-
-def join_keywords(dct, ignore=None):
-    """join a dict of {keyword: value, ...} into a string 'keyword value ...'
-
-    value can be a single value or a list/tuple of values
-    """
-    ignore = [] if not ignore else ignore
-    return " ".join(
-        [
-            f"{k} {_convert_values(dct[k])}"
-            for k in sorted(dct.keys())
-            if k not in ignore
-        ]
-    )
-
-
-def get_path(dct, path, default=None, raise_error=True):
-    """return the value from a key path in a nested dictionary"""
-    subdct = dct
-    for i, key in enumerate(path):
-        if not isinstance(subdct, dict) or key not in subdct:
-            if raise_error:
-                raise KeyError(f"path does not exist in dct: {path[0:i + 1]}")
-            return default
-        subdct = subdct[key]
-    return subdct
diff --git a/aiida_lammps/data/trajectory.py b/aiida_lammps/data/trajectory.py
index 4c44800..6d0198e 100644
--- a/aiida_lammps/data/trajectory.py
+++ b/aiida_lammps/data/trajectory.py
@@ -12,7 +12,7 @@
 from aiida import orm
 from aiida.common.exceptions import ValidationError
 
-from aiida_lammps.common.parse_trajectory import (
+from aiida_lammps.parsers.parse_raw import (
     create_structure,
     iter_trajectories,
     parse_step,
diff --git a/aiida_lammps/parsers/base.py b/aiida_lammps/parsers/base.py
index e351c0f..1f59a00 100644
--- a/aiida_lammps/parsers/base.py
+++ b/aiida_lammps/parsers/base.py
@@ -14,8 +14,8 @@
 from aiida.parsers.parser import Parser
 import numpy as np
 
-from aiida_lammps.common.raw_parsers import parse_final_data, parse_logfile
 from aiida_lammps.data.trajectory import LammpsTrajectory
+from aiida_lammps.parsers.parse_raw import parse_final_data, parse_logfile
 
 
 class LammpsBaseParser(Parser):
diff --git a/aiida_lammps/common/input_generator.py b/aiida_lammps/parsers/inputfile.py
similarity index 99%
rename from aiida_lammps/common/input_generator.py
rename to aiida_lammps/parsers/inputfile.py
index 35a4a7d..28651c8 100644
--- a/aiida_lammps/common/input_generator.py
+++ b/aiida_lammps/parsers/inputfile.py
@@ -20,8 +20,8 @@
 import jsonschema
 import numpy as np
 
-from aiida_lammps.common.utils import flatten, generate_header
 from aiida_lammps.data.potential import LammpsPotentialData
+from aiida_lammps.parsers.utils import flatten, generate_header
 
 
 def generate_input_file(
diff --git a/aiida_lammps/parsers/parse_raw/__init__.py b/aiida_lammps/parsers/parse_raw/__init__.py
new file mode 100644
index 0000000..ad6a54b
--- /dev/null
+++ b/aiida_lammps/parsers/parse_raw/__init__.py
@@ -0,0 +1,13 @@
+"""Set of functions to parse the unformatted raw files generated by lammps"""
+
+from .final_data import parse_final_data
+from .lammps_log import parse_logfile
+from .trajectory import create_structure, iter_trajectories, parse_step
+
+__all__ = (
+    "parse_logfile",
+    "parse_final_data",
+    "parse_step",
+    "create_structure",
+    "iter_trajectories",
+)
diff --git a/aiida_lammps/parsers/parse_raw/final_data.py b/aiida_lammps/parsers/parse_raw/final_data.py
new file mode 100644
index 0000000..73953ce
--- /dev/null
+++ b/aiida_lammps/parsers/parse_raw/final_data.py
@@ -0,0 +1,33 @@
+"""Set of functions to parse the files containing the final variables printed by LAMMPS"""
+
+import yaml
+
+
+def parse_final_data(filename: str = None, file_contents: str = None) -> dict:
+    """
+    Read the yaml file with the global final data.
+
+    The final iteration for each of computed variables is sotred into a yaml
+    file which is then read and stored as a dictionary.
+
+    :param filename: name of the yaml file where the variables are stored,
+        defaults to None
+    :type filename: str, optional
+    :param file_contents: contents of the yaml file where the variables are stored,
+        defaults to None
+    :type file_contents: str, optional
+    :return: dictionary with the final compute variables
+    :rtype: dict
+    """
+
+    if filename is None and file_contents is None:
+        return None
+    if filename is not None:
+        try:
+            with open(filename) as handle:
+                data = yaml.load(handle, Loader=yaml.Loader)
+        except OSError:
+            data = None
+    if file_contents is not None:
+        data = yaml.load(file_contents, Loader=yaml.Loader)
+    return data
diff --git a/aiida_lammps/parsers/parse_raw/lammps_log.py b/aiida_lammps/parsers/parse_raw/lammps_log.py
new file mode 100644
index 0000000..ff9a774
--- /dev/null
+++ b/aiida_lammps/parsers/parse_raw/lammps_log.py
@@ -0,0 +1,93 @@
+"""Set of functions to parse the unformatted log files generated by LAMMPS.
+"""
+# pylint: disable=fixme
+import ast
+import re
+from typing import Union
+
+import numpy as np
+
+
+def parse_logfile(filename: str = None, file_contents: str = None) -> Union[dict, dict]:
+    """
+    Parse the log.lammps file.
+
+    This will gather the time dependent data stored in the log file and
+    stores it as a dictionary. It will also gather single quantities and stores
+    them into a different dictionary.
+
+    :param filename: name of the lammps log file, defaults to None
+    :type filename: str, optional
+    :param file_contents: contents of the lammps log file, defaults to None
+    :type file_contents: str, optional
+    :return: dictionary with the time dependent data, dictionary with the global data
+    :rtype: Union[dict, dict]
+    """
+    # pylint: disable=too-many-branches, too-many-locals
+
+    if filename is None and file_contents is None:
+        return None
+
+    if filename is not None:
+
+        try:
+            with open(filename) as handler:
+                data = handler.read()
+                data = data.split("\n")
+        except OSError:
+            return None
+
+    if file_contents is not None:
+        data = file_contents.split("\n")
+
+    header_line_position = -1
+    header_line = ""
+    _data = []
+    end_found = False
+    parsed_data = {}
+    global_parsed_data = {}
+
+    perf_regex = re.compile(r".*Performance\:.*\,\s+([0-9\.]*)\stimesteps\/s.*")
+    performance_match = perf_regex.search(file_contents or "")
+    if performance_match:
+        global_parsed_data["steps_per_second"] = float(performance_match.group(1))
+
+    for index, line in enumerate(data):
+        line = line.strip()
+        if "binsize" in line:
+            global_parsed_data["binsize"] = ast.literal_eval(
+                line.split()[2].replace(",", "")
+            )
+            global_parsed_data["bins"] = [
+                ast.literal_eval(entry) for entry in line.split()[5:]
+            ]
+        if "ghost atom cutoff" in line:
+            global_parsed_data["ghost_atom_cutoff"] = ast.literal_eval(line.split()[-1])
+        if "master list distance cutoff" in line:
+            global_parsed_data["master_list_distance_cutoff"] = ast.literal_eval(
+                line.split()[-1]
+            )
+        if "max neighbors/atom" in line:
+            global_parsed_data["max_neighbors_atom"] = ast.literal_eval(
+                line.split()[2].replace(",", "")
+            )
+        if "units" in line:
+            global_parsed_data["units_style"] = line.split()[1]
+        if "Total wall time:" in line:
+            global_parsed_data["total_wall_time"] = line.split()[-1]
+        if "bin:" in line:
+            global_parsed_data["bin"] = line.split()[-1]
+        if line.startswith("Step"):
+            header_line_position = index
+            header_line = [
+                re.sub("[^a-zA-Z0-9_]", "__", entry) for entry in line.split()
+            ]
+        if header_line_position > 0 and index != header_line_position and not end_found:
+            if not line.split()[0].replace(".", "", 1).isdigit():
+                end_found = True
+        if header_line_position > 0 and index != header_line_position and not end_found:
+            _data.append([ast.literal_eval(entry) for entry in line.split()])
+    _data = np.asarray(_data)
+    for index, entry in enumerate(header_line):
+        parsed_data[entry] = _data[:, index].tolist()
+    return {"time_dependent": parsed_data, "global": global_parsed_data}
diff --git a/aiida_lammps/common/parse_trajectory.py b/aiida_lammps/parsers/parse_raw/trajectory.py
similarity index 98%
rename from aiida_lammps/common/parse_trajectory.py
rename to aiida_lammps/parsers/parse_raw/trajectory.py
index 0b6e70e..8e0a089 100644
--- a/aiida_lammps/common/parse_trajectory.py
+++ b/aiida_lammps/parsers/parse_raw/trajectory.py
@@ -12,7 +12,7 @@
 )
 
 
-def iter_step_lines(file_obj):
+def _iter_step_lines(file_obj):
     """Parse the lines containing the time step information
 
     :param file_obj: file object that is being parsed
@@ -109,7 +109,7 @@ def parse_step(lines, initial_line=0) -> namedtuple:
 
 def iter_trajectories(file_obj):
     """Parse a LAMMPS Trajectory file, yielding data for each time step."""
-    for line_num, lines in iter_step_lines(file_obj):
+    for line_num, lines in _iter_step_lines(file_obj):
         yield parse_step(lines, line_num)
 
 
diff --git a/aiida_lammps/common/generate_structure.py b/aiida_lammps/parsers/utils.py
similarity index 63%
rename from aiida_lammps/common/generate_structure.py
rename to aiida_lammps/parsers/utils.py
index 282f07c..335483a 100644
--- a/aiida_lammps/common/generate_structure.py
+++ b/aiida_lammps/parsers/utils.py
@@ -1,19 +1,11 @@
-"""
-Creation of the structure file content.
-
-As allowing the users to create their lattices using LAMMPS' would be too
-complex, one must ensure that the aiida StructureData is written to file in
-a format that is compatible to LAMMPS.
-
-In the case of non-orthogonal structures, this will take care of generating
-a triclinic cell compatible with what LAMMPS expects.
-"""
-import typing
+"""Utility functions for the handling of the input files"""
+from collections.abc import Iterable
+from typing import Union
 
 import numpy as np
 
 
-def transform_cell(cell) -> typing.Union[np.array, np.array]:
+def _transform_cell(cell) -> Union[np.array, np.array]:
     """Transform the cell to an orientation, compatible with LAMMPS
 
     LAMMPS requires the simulation cell to be in the format of a
@@ -23,7 +15,7 @@ def transform_cell(cell) -> typing.Union[np.array, np.array]:
 
     :param cell: crystal cell of the original structure
     :returns: LAMMPS compatible cell, transformation between original and final cell
-    :rtype: typing.Union[np.array, np.array]
+    :rtype: Union[np.array, np.array]
     """
     cell = np.array(cell)
     transform, upper_tri = np.linalg.qr(cell.T, mode="complete")
@@ -47,8 +39,15 @@ def generate_lammps_structure(
     charge_dict: dict = None,
     round_dp: float = None,
     docstring: str = "generated by aiida_lammps",
-) -> typing.Union[str, np.array]:
-    """Create lammps input structure file content.
+) -> Union[str, np.array]:
+    """Creation of the structure file content.
+
+    As allowing the users to create their lattices using LAMMPS' would be too
+    complex, one must ensure that the aiida StructureData is written to file in
+    a format that is compatible to LAMMPS.
+
+    In the case of non-orthogonal structures, this will take care of generating
+    a triclinic cell compatible with what LAMMPS expects.
 
     :param structure: the structure to use in the simulation
     :type structure: orm.StructureData
@@ -63,7 +62,7 @@ def generate_lammps_structure(
     :raises ValueError: if the atom_style does not belong to either 'atomic' or 'charge'
     :return: the structure file content, the transformation matrix applied to
         the structure cell and coordinates
-    :rtype: typing.Union[str, np.array]
+    :rtype: Union[str, np.array]
     """
     # pylint: disable=too-many-locals
 
@@ -87,9 +86,12 @@ def generate_lammps_structure(
     filestring += f"{len(structure.sites)} atoms\n"
     filestring += f"{len(kind_name_id_map)} atom types\n\n"
 
-    atoms = structure.get_ase()
-    cell, coord_transform = transform_cell(atoms.cell)
-    positions = np.transpose(np.dot(coord_transform, np.transpose(atoms.positions)))
+    cell, coord_transform = _transform_cell(structure.cell)
+    positions = np.transpose(
+        np.dot(
+            coord_transform, np.transpose([_site.position for _site in structure.sites])
+        )
+    )
 
     if round_dp:
         cell = np.round(cell, round_dp) + 0.0
@@ -124,3 +126,60 @@ def generate_lammps_structure(
             filestring += f" {pos[0]:20.10f} {pos[1]:20.10f} {pos[2]:20.10f}\n"
 
     return filestring, coord_transform
+
+
+def generate_header(value: str) -> str:
+    """
+    Generate the header for the blocks.
+
+    :param value: string indicating the input block
+    :type value: str
+    :return: header/footer for the input block
+    :rtype: str
+    """
+    return "#" + value.center(80, "-") + "#\n"
+
+
+def flatten(full_list: list) -> list:
+    """Flattens a list of list into a flat list.
+
+    :param full_list: list of lists to be flattened
+    :type full_list: list
+    :yield: flattened list
+    :rtype: list
+    """
+    for element in full_list:
+        if isinstance(element, Iterable) and not isinstance(element, (str, bytes)):
+            yield from flatten(element)
+        else:
+            yield element
+
+
+def convert_to_str(value):
+    """convert True/False to yes/no and all values to strings"""
+    if isinstance(value, bool):
+        if value:
+            return "yes"
+        return "no"
+    return str(value)
+
+
+def _convert_values(value):
+    if isinstance(value, (tuple, list)):
+        return " ".join([convert_to_str(v) for v in value])
+    return convert_to_str(value)
+
+
+def join_keywords(dct, ignore=None):
+    """join a dict of {keyword: value, ...} into a string 'keyword value ...'
+
+    value can be a single value or a list/tuple of values
+    """
+    ignore = [] if not ignore else ignore
+    return " ".join(
+        [
+            f"{k} {_convert_values(dct[k])}"
+            for k in sorted(dct.keys())
+            if k not in ignore
+        ]
+    )
diff --git a/aiida_lammps/common/variables_types.json b/aiida_lammps/parsers/variables_types.json
similarity index 100%
rename from aiida_lammps/common/variables_types.json
rename to aiida_lammps/parsers/variables_types.json
diff --git a/docs/source/nitpick-exceptions b/docs/source/nitpick-exceptions
index c3e9ffb..9874bf2 100644
--- a/docs/source/nitpick-exceptions
+++ b/docs/source/nitpick-exceptions
@@ -14,7 +14,7 @@ py:class utils.AttributesFrozendict
 py:exc jsonschema.exceptions.ValidationError
 py:exc jsonschema.exceptions.SchemaError
 py:exc InputValidationError
-py:class aiida_lammps.common.parse_trajectory.TRAJ_BLOCK
+py:class aiida_lammps.parsers.parse_raw.trajectory.TRAJ_BLOCK
 py:exc aiida.common.StoringNotAllowed
 py:class Logger
 py:class AttributeDict
diff --git a/docs/source/users/get_started.md b/docs/source/users/get_started.md
index 47ed5a4..6a06b8b 100644
--- a/docs/source/users/get_started.md
+++ b/docs/source/users/get_started.md
@@ -13,7 +13,7 @@ The structure is the simulation box that will be used for the simulation. The da
 ```
 
 ```{note}
-    LAMMPS requires the simulation cell to be in the format of a lower triangular matrix (right-handed basis). Therefore the cell and positions may require [rotation and inversion](https://lammps.sandia.gov/doc/Howto_triclinic.html). This is automatically done to **every structure** at the calculation level, so it might be that the cell that is provided is modified so that it follows this convention.
+    LAMMPS requires the simulation cell to be in the format of a lower triangular matrix (right-handed basis). Therefore the cell and positions may require [rotation and inversion](https://lammps.sandia.gov/doc/Howto_triclinic.html). This is **automatically** done to **every structure** at the calculation level, so it might be that the cell that is provided is modified so that it follows this convention. However, this is just a different representation of the cell, its symmetry group, should remain unchanged in this process.
 ```
 
 ## Potential
diff --git a/tests/test_generate_inputs.py b/tests/test_generate_inputs.py
index 1614b3f..3316e09 100644
--- a/tests/test_generate_inputs.py
+++ b/tests/test_generate_inputs.py
@@ -3,13 +3,13 @@
 
 import pytest
 
-from aiida_lammps.common import input_generator
 from aiida_lammps.data.potential import LammpsPotentialData
 from aiida_lammps.fixtures.inputs import (
     parameters_md,
     parameters_minimize,
     restart_data,
 )
+from aiida_lammps.parsers import inputfile
 from .utils import TEST_DIR
 
 
@@ -26,7 +26,7 @@ def test_input_generate_minimize(
     """Test the generation of the input file for minimize calculations"""
     # pylint: disable=too-many-locals
 
-    input_generator.validate_input_parameters(parameters_minimize)
+    inputfile.validate_input_parameters(parameters_minimize)
     # Generate the potential
     potential_information = get_lammps_potential_data(potential_type)
     potential = LammpsPotentialData.get_or_create(
@@ -37,7 +37,7 @@ def test_input_generate_minimize(
     # Generating the structure
     structure = potential_information["structure"]
     # Generating the input file
-    input_file = input_generator.generate_input_file(
+    input_file = inputfile.generate_input_file(
         parameters=parameters_minimize,
         potential=potential,
         structure=structure,
@@ -75,7 +75,7 @@ def test_input_generate_md(
     """Test the generation of the input file for MD calculations"""
     # pylint: disable=too-many-locals
 
-    input_generator.validate_input_parameters(parameters_md)
+    inputfile.validate_input_parameters(parameters_md)
     # Generate the potential
     potential_information = get_lammps_potential_data(potential_type)
     potential = LammpsPotentialData.get_or_create(
@@ -86,7 +86,7 @@ def test_input_generate_md(
     # Generating the structure
     structure = potential_information["structure"]
     # Generating the input file
-    input_file = input_generator.generate_input_file(
+    input_file = inputfile.generate_input_file(
         parameters=parameters_md,
         potential=potential,
         structure=structure,
@@ -139,10 +139,10 @@ def test_input_generate_restart(
     if num_steps:
         parameters_md["restart"]["num_steps"] = num_steps
 
-    input_generator.validate_input_parameters(parameters_md)
+    inputfile.validate_input_parameters(parameters_md)
 
     # Generating the input file
-    input_file = input_generator.write_restart_block(
+    input_file = inputfile.write_restart_block(
         parameters_restart=parameters_md["restart"],
         restart_filename="restart.aiida",
         max_number_steps=1000,
diff --git a/tests/test_generate_structure.py b/tests/test_generate_structure.py
index 8896b2d..db7708a 100644
--- a/tests/test_generate_structure.py
+++ b/tests/test_generate_structure.py
@@ -1,7 +1,7 @@
 """Test the structure generation in aiida-lammps"""
 import pytest
 
-from aiida_lammps.common.generate_structure import generate_lammps_structure
+from aiida_lammps.parsers.utils import generate_lammps_structure
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
index 8622bc0..d7a8da7 100644
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -8,7 +8,7 @@
 from aiida.orm import FolderData, SinglefileData
 from aiida.plugins import ParserFactory
 
-from aiida_lammps.common.raw_parsers import parse_final_data, parse_logfile
+from aiida_lammps.parsers.parse_raw import parse_final_data, parse_logfile
 from .utils import TEST_DIR
 
 
diff --git a/tests/test_potential_data.py b/tests/test_potential_data.py
index bd41c6a..58d9fae 100644
--- a/tests/test_potential_data.py
+++ b/tests/test_potential_data.py
@@ -4,8 +4,8 @@
 import pytest
 import yaml
 
-from aiida_lammps.common import input_generator
 from aiida_lammps.data.potential import LammpsPotentialData
+from aiida_lammps.parsers import inputfile
 from .utils import TEST_DIR
 
 
@@ -87,7 +87,7 @@ def test_lammps_potentials_input_block(
         **potential_information["parameters"],
     )
 
-    potential_block = input_generator.write_potential_block(
+    potential_block = inputfile.write_potential_block(
         parameters_potential={},
         potential_file="potential.dat",
         potential=node,
diff --git a/tests/test_trajectory.py b/tests/test_trajectory.py
index 67baed2..100d2f7 100644
--- a/tests/test_trajectory.py
+++ b/tests/test_trajectory.py
@@ -1,8 +1,8 @@
 """Tests for the aiida-lammps trajectory data type"""
 import os
 
-from aiida_lammps.common.parse_trajectory import create_structure, iter_trajectories
 from aiida_lammps.data.trajectory import LammpsTrajectory
+from aiida_lammps.parsers.parse_raw import create_structure, iter_trajectories
 from .utils import TEST_DIR, recursive_round