From 74a18bc59beafd62082b24f809bfba29a402489d Mon Sep 17 00:00:00 2001
From: Lorenzo <79980269+bastonero@users.noreply.github.com>
Date: Tue, 14 Jan 2025 20:55:52 +0100
Subject: [PATCH] `PhParser`: allow for pattern initialization (#1034)

The ph.x should be parallelized by setting in the input parameters
`start_irr` and `last_irr` to 0. This allows the program to exit
smoothly and it further avoids to wait for the rewriting of the
wavefunctions, which can be a rather long and intensive IO
operation, not really suited for initialization runs.

The parser is then adjusted to account for this option, as for some
reason the line having `JOB DONE` is not printed in such cases.
A simple specialized parser is also added to store the number of
q-points and their values, which can be later on used to parallelize
over q-points by specifying `last_q` and `start_q`.
---
 .../parsers/parse_raw/ph.py                   | 46 +++++++++++++
 src/aiida_quantumespresso/parsers/ph.py       | 25 ++++++-
 .../fixtures/ph/initialization/aiida.out      | 67 +++++++++++++++++++
 .../ph/initialization_failed/DYN_MAT/.gitkeep |  0
 .../ph/initialization_failed/aiida.out        | 66 ++++++++++++++++++
 tests/parsers/test_ph.py                      | 37 +++++++++-
 .../test_ph/test_ph_initialization.yml        |  1 +
 tests/workflows/ph/test_base.py               | 10 ++-
 8 files changed, 248 insertions(+), 4 deletions(-)
 create mode 100644 tests/parsers/fixtures/ph/initialization/aiida.out
 create mode 100644 tests/parsers/fixtures/ph/initialization_failed/DYN_MAT/.gitkeep
 create mode 100644 tests/parsers/fixtures/ph/initialization_failed/aiida.out
 create mode 100644 tests/parsers/test_ph/test_ph_initialization.yml

diff --git a/src/aiida_quantumespresso/parsers/parse_raw/ph.py b/src/aiida_quantumespresso/parsers/parse_raw/ph.py
index 91cb377bf..2dc2d5ddb 100644
--- a/src/aiida_quantumespresso/parsers/parse_raw/ph.py
+++ b/src/aiida_quantumespresso/parsers/parse_raw/ph.py
@@ -4,6 +4,8 @@
 The function that needs to be called from outside is parse_raw_ph_output(). Ideally, the functions should work even
 without aiida and will return a dictionary with parsed keys.
 """
+from __future__ import annotations
+
 import numpy
 from qe_tools import CONSTANTS
 
@@ -438,3 +440,47 @@ def parse_ph_dynmat(data, logs, lattice_parameter=None, also_eigenvectors=False,
         parsed_data['eigenvectors'] = eigenvectors
 
     return parsed_data
+
+
+def parse_initialization_qpoints(stdout: str) -> dict:
+    """Return the number of q-points from an initialization run.
+
+    Here, the initialization run refers to the one performed by specifying
+    `start_irr` and `last_irr` to 0 in the inputs.
+
+    :return: parsed dictionary
+
+    :raise: `RuntimeError` if the number of q-points cannot be parsed or it
+        differs from the number of q-points in the stdout list.
+    """
+    import re
+
+    parameters = {}
+
+    # Regular expression to match `N` in `(  N q-points)`
+    pattern = r'\(\s*(\d+)\s*q-points\)'
+    match = re.search(pattern, stdout)
+    if match:
+        parameters.update({'number_of_qpoints': int(match.group(1))})
+    else:
+        raise RuntimeError('the number of q-points cannot be parsed')
+
+    # Regular expression pattern to match the q-points section
+    pattern = r'\(\s*\d+\s*q-points\):\s*\n\s*N\s*xq\(1\)\s*xq\(2\)\s*xq\(3\)\s*\n((?:\s*\d+\s*[\d\.\-\s]+\n?)*)'
+    match = re.search(pattern, stdout)
+
+    if match:
+        q_points_block = match.group(1)
+
+        # Regular expression to match each line of coordinates
+        coord_pattern = r'\s*\d+\s*([\d\.\-]+)\s*([\d\.\-]+)\s*([\d\.\-]+)'
+
+        coords = re.findall(coord_pattern, q_points_block) # Find all coordinates in the block
+        q_points = [list(map(float, coord)) for coord in coords]
+    else:
+        raise RuntimeError('the list of q-points cannot be parsed')
+
+    if parameters['number_of_qpoints'] != len(q_points):
+        raise RuntimeError('the number of q-points do not coincde with the number of listed q-points')
+
+    return parameters
diff --git a/src/aiida_quantumespresso/parsers/ph.py b/src/aiida_quantumespresso/parsers/ph.py
index 636b3695b..5b8fc3d25 100644
--- a/src/aiida_quantumespresso/parsers/ph.py
+++ b/src/aiida_quantumespresso/parsers/ph.py
@@ -6,12 +6,25 @@
 from aiida import orm
 
 from aiida_quantumespresso.calculations.ph import PhCalculation
-from aiida_quantumespresso.parsers.parse_raw.ph import parse_raw_ph_output
+from aiida_quantumespresso.parsers.parse_raw.ph import parse_initialization_qpoints, parse_raw_ph_output
 from aiida_quantumespresso.utils.mapping import get_logging_container
 
 from .base import BaseParser
 
 
+def _is_initialization(parameters: dict) -> bool:
+    """Return whether the `ph.x` was run with (patterns) initialization options.
+
+    When `ph.x` is used with `start_irr` and `last_irr` set to 0, the binary doesn't
+    produce the usual `JOB DONE` statement, and immediately exits the job. This is
+    used to quickly generate the displacement patterns needed for a correct parallelization
+    of the code over both q-points and irreducible representations (irreps).
+    """
+    if 'start_irr' in parameters['INPUTPH'] and 'last_irr' in parameters['INPUTPH']:
+        return parameters['INPUTPH']['start_irr'] == parameters['INPUTPH']['last_irr'] == 0
+    return False
+
+
 class PhParser(BaseParser):
     """``Parser`` implementation for the ``PhCalculation`` calculation job class."""
 
@@ -28,6 +41,16 @@ def parse(self, **kwargs):
 
         stdout, parsed_data, logs = self.parse_stdout_from_retrieved(logs)
 
+        # When `start_irr` and `last_irr` are set to 0, `JOB DONE` is not in stdout (expected behaviour).
+        # Though, we at least expect that `stdout` is not empty, otherwise something went wrong.
+        if stdout and _is_initialization(self.node.inputs.parameters.get_dict()):
+            try:
+                parameters = parse_initialization_qpoints(stdout)
+                self.out('output_parameters', orm.Dict(parameters))
+                return
+            except RuntimeError as exc:
+                logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')
+
         # If the scheduler detected OOW, simply keep that exit code by not returning anything more specific.
         if self.node.exit_status == PhCalculation.exit_codes.ERROR_SCHEDULER_OUT_OF_WALLTIME:
             return
diff --git a/tests/parsers/fixtures/ph/initialization/aiida.out b/tests/parsers/fixtures/ph/initialization/aiida.out
new file mode 100644
index 000000000..620b83ccb
--- /dev/null
+++ b/tests/parsers/fixtures/ph/initialization/aiida.out
@@ -0,0 +1,67 @@
+
+     Program PHONON v.7.2 starts on 18Jun2024 at 18:33:27 
+
+     This program is part of the open-source Quantum ESPRESSO suite
+     for quantum simulation of materials; please cite
+         "P. Giannozzi et al., J. Phys.:Condens. Matter 21 395502 (2009);
+         "P. Giannozzi et al., J. Phys.:Condens. Matter 29 465901 (2017);
+         "P. Giannozzi et al., J. Chem. Phys. 152 154105 (2020);
+          URL http://www.quantum-espresso.org", 
+     in publications or presentations arising from this work. More details at
+     http://www.quantum-espresso.org/quote
+
+     Parallel version (MPI), running on     8 processors
+
+     MPI processes distributed on     1 nodes
+     37704 MiB available memory on the printing compute node when the environment starts
+ 
+     Reading input from ph.init.in
+
+     Reading xml data from directory:
+
+     ./tmp/graphene.save/
+ 
+     R & G space division:  proc/nbgrp/npool/nimage =       8
+     Subspace diagonalization in iterative solution of the eigenvalue problem:
+     a serial algorithm will be used
+
+
+     IMPORTANT: XC functional enforced from input :
+     Exchange-correlation= PZ
+                           (   1   1   0   0   0   0   0)
+     Any further DFT definition will be discarded
+     Please, verify this is what you really want
+
+ 
+     Parallelization info
+     --------------------
+     sticks:   dense  smooth     PW     G-vecs:    dense   smooth      PW
+     Min          30      30     10                 3040     3040     660
+     Max          31      31     11                 3065     3065     691
+     Sum         241     241     85                24369    24369    5409
+ 
+     Using Slab Decomposition
+ 
+ ----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D
+  The code is running with the 2D cutoff
+  Please refer to:
+  Sohier, T., Calandra, M., & Mauri, F. (2017), 
+  Density functional perturbation theory for gated two-dimensional heterostructu
+ res:
+  Theoretical developments and application to flexural phonons in graphene.
+  Physical Review B, 96(7), 75448. https://doi.org/10.1103/PhysRevB.96.075448
+ ----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D
+     Reading collected, re-writing distributed wavefunctions in ./tmp/
+
+
+     Dynamical matrices for ( 2, 2, 2)  uniform grid of q-points
+     (   8 q-points):
+       N         xq(1)         xq(2)         xq(3) 
+       1   0.000000000   0.000000000   0.000000000
+       2   0.000000000   0.000000000  -0.061660223
+       3   0.000000000  -0.577350269   0.000000000
+       4   0.000000000  -0.577350269  -0.061660223
+       5  -0.500000000  -0.288675135   0.000000000
+       6  -0.500000000  -0.288675135  -0.061660223
+       7  -0.500000000  -0.866025404   0.000000000
+       8  -0.500000000  -0.866025404  -0.061660223
diff --git a/tests/parsers/fixtures/ph/initialization_failed/DYN_MAT/.gitkeep b/tests/parsers/fixtures/ph/initialization_failed/DYN_MAT/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/parsers/fixtures/ph/initialization_failed/aiida.out b/tests/parsers/fixtures/ph/initialization_failed/aiida.out
new file mode 100644
index 000000000..17cb066b0
--- /dev/null
+++ b/tests/parsers/fixtures/ph/initialization_failed/aiida.out
@@ -0,0 +1,66 @@
+
+     Program PHONON v.7.2 starts on 18Jun2024 at 18:33:27 
+
+     This program is part of the open-source Quantum ESPRESSO suite
+     for quantum simulation of materials; please cite
+         "P. Giannozzi et al., J. Phys.:Condens. Matter 21 395502 (2009);
+         "P. Giannozzi et al., J. Phys.:Condens. Matter 29 465901 (2017);
+         "P. Giannozzi et al., J. Chem. Phys. 152 154105 (2020);
+          URL http://www.quantum-espresso.org", 
+     in publications or presentations arising from this work. More details at
+     http://www.quantum-espresso.org/quote
+
+     Parallel version (MPI), running on     8 processors
+
+     MPI processes distributed on     1 nodes
+     37704 MiB available memory on the printing compute node when the environment starts
+ 
+     Reading input from ph.init.in
+
+     Reading xml data from directory:
+
+     ./tmp/graphene.save/
+ 
+     R & G space division:  proc/nbgrp/npool/nimage =       8
+     Subspace diagonalization in iterative solution of the eigenvalue problem:
+     a serial algorithm will be used
+
+
+     IMPORTANT: XC functional enforced from input :
+     Exchange-correlation= PZ
+                           (   1   1   0   0   0   0   0)
+     Any further DFT definition will be discarded
+     Please, verify this is what you really want
+
+ 
+     Parallelization info
+     --------------------
+     sticks:   dense  smooth     PW     G-vecs:    dense   smooth      PW
+     Min          30      30     10                 3040     3040     660
+     Max          31      31     11                 3065     3065     691
+     Sum         241     241     85                24369    24369    5409
+ 
+     Using Slab Decomposition
+ 
+ ----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D
+  The code is running with the 2D cutoff
+  Please refer to:
+  Sohier, T., Calandra, M., & Mauri, F. (2017), 
+  Density functional perturbation theory for gated two-dimensional heterostructu
+ res:
+  Theoretical developments and application to flexural phonons in graphene.
+  Physical Review B, 96(7), 75448. https://doi.org/10.1103/PhysRevB.96.075448
+ ----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D
+     Reading collected, re-writing distributed wavefunctions in ./tmp/
+
+
+     Dynamical matrices for ( 2, 2, 2)  uniform grid of q-points
+     (   8 q-points):
+       N         xq(1)         xq(2)         xq(3) 
+       1   0.000000000   0.000000000   0.000000000
+       2   0.000000000   0.000000000  -0.061660223
+       3   0.000000000  -0.577350269   0.000000000
+       4   0.000000000  -0.577350269  -0.061660223
+       5  -0.500000000  -0.288675135   0.000000000
+       6  -0.500000000  -0.288675135  -0.061660223
+       7  -0.500000000  -0.866025404   0.000000000
diff --git a/tests/parsers/test_ph.py b/tests/parsers/test_ph.py
index 49d4a9a40..70023bd39 100644
--- a/tests/parsers/test_ph.py
+++ b/tests/parsers/test_ph.py
@@ -6,7 +6,7 @@
 
 def generate_inputs():
     """Return only those inputs that the parser will expect to be there."""
-    return {}
+    return {'parameters': orm.Dict({'INPUTPH': {}})}
 
 
 @pytest.mark.parametrize('test_name', ['default', 'single_qpoint', 'no_modes_printed'])
@@ -61,6 +61,41 @@ def test_ph_out_of_walltime(fixture_localhost, generate_calc_job_node, generate_
     data_regression.check(results['output_parameters'].get_dict())
 
 
+def test_ph_initialization(fixture_localhost, generate_calc_job_node, generate_parser, data_regression):
+    """Test a `ph.x` calculation performed with `start_irr` and `last_irr` set to 0."""
+    name = 'initialization'
+    entry_point_calc_job = 'quantumespresso.ph'
+    entry_point_parser = 'quantumespresso.ph'
+
+    inputs = {'parameters': orm.Dict({'INPUTPH': {'start_irr': 0, 'last_irr': 0}})}
+
+    node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, name, inputs)
+    parser = generate_parser(entry_point_parser)
+    results, calcfunction = parser.parse_from_node(node, store_provenance=False)
+
+    assert calcfunction.is_finished, calcfunction.exception
+    assert calcfunction.is_finished_ok, calcfunction.exit_message
+    assert 'output_parameters' in results
+    data_regression.check(results['output_parameters'].get_dict())
+
+
+def test_ph_initialization_failed(fixture_localhost, generate_calc_job_node, generate_parser):
+    """Test a failed `ph.x` calculation performed with `start_irr` and `last_irr` set to 0."""
+    name = 'initialization_failed'
+    entry_point_calc_job = 'quantumespresso.ph'
+    entry_point_parser = 'quantumespresso.ph'
+
+    inputs = {'parameters': orm.Dict({'INPUTPH': {'start_irr': 0, 'last_irr': 0}})}
+
+    node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, name, inputs)
+    parser = generate_parser(entry_point_parser)
+    _, calcfunction = parser.parse_from_node(node, store_provenance=False)
+
+    assert calcfunction.is_finished, calcfunction.exception
+    assert calcfunction.is_failed, calcfunction.exit_status
+    assert calcfunction.exit_status == node.process_class.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE.status
+
+
 def test_ph_failed_computing_cholesky(fixture_localhost, generate_calc_job_node, generate_parser):
     """Test the parsing of a calculation that failed during cholesky factorization.
 
diff --git a/tests/parsers/test_ph/test_ph_initialization.yml b/tests/parsers/test_ph/test_ph_initialization.yml
new file mode 100644
index 000000000..1945c22a3
--- /dev/null
+++ b/tests/parsers/test_ph/test_ph_initialization.yml
@@ -0,0 +1 @@
+number_of_qpoints: 8
diff --git a/tests/workflows/ph/test_base.py b/tests/workflows/ph/test_base.py
index 8a23d04f0..8b6fcb101 100644
--- a/tests/workflows/ph/test_base.py
+++ b/tests/workflows/ph/test_base.py
@@ -10,6 +10,11 @@
 from aiida_quantumespresso.workflows.ph.base import PhBaseWorkChain
 
 
+def generate_inputs():
+    """Return only those inputs that the parser will expect to be there."""
+    return {'parameters': orm.Dict({'INPUTPH': {}})}
+
+
 @pytest.fixture
 def generate_ph_calc_job_node(generate_calc_job_node, fixture_localhost):
     """Generate a ``CalcJobNode`` that would have been created by a ``PhCalculation``."""
@@ -221,9 +226,10 @@ def test_merge_outputs(
 
     entry_point_calc_job = 'quantumespresso.ph'
     parser = generate_parser('quantumespresso.ph')
+    inputs = generate_inputs()
 
     node_1 = generate_calc_job_node(
-        entry_point_name=entry_point_calc_job, computer=fixture_localhost, test_name=f'{name}_1'
+        entry_point_name=entry_point_calc_job, computer=fixture_localhost, test_name=f'{name}_1', inputs=inputs
     )
     results_1, calcjob_1 = parser.parse_from_node(node_1, store_provenance=False)
 
@@ -236,7 +242,7 @@ def test_merge_outputs(
     assert calcjob_1.exit_status == PhCalculation.exit_codes.ERROR_OUT_OF_WALLTIME.status
 
     node_2 = generate_calc_job_node(
-        entry_point_name=entry_point_calc_job, computer=fixture_localhost, test_name=f'{name}_2'
+        entry_point_name=entry_point_calc_job, computer=fixture_localhost, test_name=f'{name}_2', inputs=inputs
     )
     results_2, calcjob_2 = parser.parse_from_node(node_2, store_provenance=False)