Skip to content

Commit

Permalink
shamelessly use 8 parallel executions
Browse files Browse the repository at this point in the history
Signed-off-by: dafnapension <[email protected]>
  • Loading branch information
dafnapension committed Nov 12, 2024
1 parent 86d8533 commit 0994fbf
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 6 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/catalog_preparation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ jobs:
HF_DATASETS_DISABLE_PROGRESS_BARS: "True"
TQDM_DISABLE: "True"

strategy:
matrix:
modulo: [0,1,2,3,4, 5, 6, 7]

steps:
- uses: actions/checkout@v4
Expand All @@ -31,4 +34,9 @@ jobs:
- run: huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }}

- name: Run Tests
run: python -m unittest discover -s tests/catalog -p "test_*.py"
run: |
modulo="${{ matrix.modulo }}"
echo "modulo=${modulo}" >> $GITHUB_STEP_SUMMARY
echo "sed -i 's/^modulo = ./modulo = ${modulo}/' tests/catalog/test_preparation.py" > sedit.sh
sh sedit.sh
python -m unittest tests.catalog.test_preparation
23 changes: 18 additions & 5 deletions tests/catalog/test_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,27 @@
)
glob_query = os.path.join(project_dir, "prepare", "**", "*.py")
all_preparation_files = glob.glob(glob_query, recursive=True)
# Make sure the order in which the tests are run is deterministic
# Having a different order for local testing and github testing may cause diffs in results.
all_preparation_files.sort()
num_par = 8 # num of parallel executions
logger.critical(
f"Over all, {len(all_preparation_files)} files will now be tested over {num_par} parallel processes."
)
# the following should be any of modulo num_par: 0,1,2,3,4,5,6,7
modulo = 1
all_preparation_files = [
file for i, file in enumerate(all_preparation_files) if i % num_par == modulo
]


class TestCatalogPreparation(UnitxtCatalogPreparationTestCase):
def test_preparations(self):
logger.info(glob_query)
logger.critical(f"Testing preparation files: {all_preparation_files}")
# Make sure the order in which the tests are run is deterministic
# Having a different order for local testing and github testing may cause diffs in results.
logger.critical(
f"Testing {len(all_preparation_files)} preparation files: {all_preparation_files}"
)
times = {}
all_preparation_files.sort()
for file in all_preparation_files:
logger.info(
"\n_____________________________________________\n"
Expand Down Expand Up @@ -70,6 +81,8 @@ def test_preparations(self):
logger.critical(f"Testing preparation file '{file}' failed:")
raise e

logger.critical("Preparation times table:")
logger.critical(
f"Preparation times table for the {len(times)} files that completed successfully:"
)
times = dict(sorted(times.items(), key=lambda item: item[1], reverse=True))
print_dict(times, log_level="critical")

0 comments on commit 0994fbf

Please sign in to comment.