diff --git a/.github/workflows/catalog_preparation.yml b/.github/workflows/catalog_preparation.yml index 3968d83099..67ef116b6f 100644 --- a/.github/workflows/catalog_preparation.yml +++ b/.github/workflows/catalog_preparation.yml @@ -18,6 +18,9 @@ jobs: HF_DATASETS_DISABLE_PROGRESS_BARS: "True" TQDM_DISABLE: "True" + strategy: + matrix: + modulo: [0,1,2,3,4,5,6,7] steps: - uses: actions/checkout@v4 @@ -31,4 +34,10 @@ jobs: - run: huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }} - name: Run Tests - run: python -m unittest discover -s tests/catalog -p "test_*.py" \ No newline at end of file + run: | + modulo="${{ matrix.modulo }}" + echo "modulo=${modulo}" >> $GITHUB_STEP_SUMMARY + echo "sed -i 's/^num_par = 1 /num_par = 8 /' tests/catalog/test_preparation.py" > sedit.sh + echo "sed -i 's/^modulo = 0/modulo = ${modulo}/' tests/catalog/test_preparation.py" >> sedit.sh + sh sedit.sh + python -m unittest tests.catalog.test_preparation diff --git a/tests/catalog/test_preparation.py b/tests/catalog/test_preparation.py index caec4d2d3c..d00e5d801e 100644 --- a/tests/catalog/test_preparation.py +++ b/tests/catalog/test_preparation.py @@ -20,16 +20,30 @@ ) glob_query = os.path.join(project_dir, "prepare", "**", "*.py") all_preparation_files = glob.glob(glob_query, recursive=True) +# Make sure the order in which the tests are run is deterministic +# Having a different order for local testing and github testing may cause diffs in results. +all_preparation_files.sort() +num_par = 1 # num of parallel executions +logger.critical( + f"Over all, {len(all_preparation_files)} files will now be tested over {num_par} parallel processes." +) +# the following should be any of modulo num_par: 0,1,2,3,4,5,6,7,8,.. num_par-1 +modulo = 0 +all_preparation_files = [ + file for i, file in enumerate(all_preparation_files) if i % num_par == modulo +] class TestCatalogPreparation(UnitxtCatalogPreparationTestCase): def test_preparations(self): logger.info(glob_query) - logger.critical(f"Testing preparation files: {all_preparation_files}") - # Make sure the order in which the tests are run is deterministic - # Having a different order for local testing and github testing may cause diffs in results. + all_preparation_files_as_string = "\n".join( + [file.split("prepare")[-1] for file in all_preparation_files] + ) + logger.critical( + f"Testing {len(all_preparation_files)} preparation files: \n{all_preparation_files_as_string}\n" + ) times = {} - all_preparation_files.sort() for file in all_preparation_files: logger.info( "\n_____________________________________________\n" @@ -50,6 +64,7 @@ def test_preparations(self): f"Skipping file {file} due to ignored error {e}" ) continue + self.assertTrue(False) raise logger.info(f"Testing preparation file: {file} passed") self.assertTrue(True) @@ -70,6 +85,6 @@ def test_preparations(self): logger.critical(f"Testing preparation file '{file}' failed:") raise e - logger.critical("Preparation times table:") + logger.critical(f"Preparation times table for {len(times)} files:") times = dict(sorted(times.items(), key=lambda item: item[1], reverse=True)) print_dict(times, log_level="critical")