From 6345d338a9e53f665a9d3e1e48d648997915e948 Mon Sep 17 00:00:00 2001 From: "Buyuan(Alex) Cui" <69030297+BuyuanCui@users.noreply.github.com> Date: Fri, 16 Aug 2024 09:59:06 -0700 Subject: [PATCH] Zh tn bug 240712 (#187) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * IT TN improvement on tests (#120) * add missing test cases Signed-off-by: Mariana Graterol Fuenmayor * fix bug with time tests Signed-off-by: Mariana Graterol Fuenmayor * update ci date Signed-off-by: Mariana Graterol Fuenmayor * add sentence test cases Signed-off-by: Mariana Graterol Fuenmayor * refine shortest path for irregular cardinals Signed-off-by: Mariana Graterol Fuenmayor * update ci date Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * add single letter exception for roman numerals (#121) * add single letter exception for roman numerals Signed-off-by: Mariana Graterol Fuenmayor * update ci dir Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * fix broken path for nondet whitelist (#124) Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * Increase weights for serial (en TN) (#128) * Increase weights for serial (en TN) Resolves https://github.com/NVIDIA/NeMo-text-processing/issues/126 Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * Add tests for fix Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * Update Jenkinsfile cache path Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * Update Jenkinsfile. Fix cache folder Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> --------- Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Alex Cui * add measures file for FR TN (#131) * add measures file Signed-off-by: Mariana Graterol Fuenmayor * update whitelist data Signed-off-by: Mariana Graterol Fuenmayor * add fr tn tests Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * Sh jenkins (#127) * Add SH tests to Jenkins Signed-off-by: Anand Joseph * Update cache paths Signed-off-by: Anand Joseph * Update Jenkins tests Signed-off-by: Anand Joseph * Add CI/CD tests for sparrowhawk Signed-off-by: Anand Joseph * docker build only if in test mode Signed-off-by: Anand Joseph * Fix missing variable Signed-off-by: Anand Joseph * Fix comments and remove arguments not required Signed-off-by: Anand Joseph * Fix commands not executing Signed-off-by: Anand Joseph * Missing arguments Signed-off-by: Anand Joseph * Missing quotes Signed-off-by: Anand Joseph * Fix incorrect path for tests Signed-off-by: Anand Joseph * Fix paths Signed-off-by: Anand Joseph * Incorrect paths of tests and shunit2 Signed-off-by: Anand Joseph * Fix issues with paths as arguments to shunit Signed-off-by: Anand Joseph * Undo path change Signed-off-by: Anand Joseph * Fix intentional fail test Signed-off-by: Anand Joseph * revert redundant check for cased option Signed-off-by: Anand Joseph * Fix default path in export_grammars.sh Signed-off-by: Anand Joseph * Update cache paths Signed-off-by: Anand Joseph * Add interactive option Signed-off-by: Anand Joseph * Add SH tests for cased EN ITN Signed-off-by: Anand Joseph --------- Signed-off-by: Anand Joseph Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Alex Cui * update isort - fix precommit (#138) * update isort version Signed-off-by: Evelina * update isort version Signed-off-by: Evelina * fix format Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove unused imports Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Armenian itn (#136) * Added Armenian ITN Signed-off-by: David Sargsyan * Added Armenian ITN Signed-off-by: David Sargsyan * Added Armenian ITN Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: David Sargsyan * Added context for tests and fixed CodeQL errors Signed-off-by: David Sargsyan * Revert "Added context for tests and fixed CodeQL errors" This reverts commit 2c804d941963c0be21d3aad07e6cd13568ab747b. Signed-off-by: David Sargsyan * Added context to some test files and fixed CodeQL errors Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: David Sargsyan * deleted unnecessary data Signed-off-by: David Sargsyan * translated a few measurements to Armenian Signed-off-by: David Sargsyan * adjusted some things for better readability and maintainer support Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed one test case and some issues Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: David Sargsyan Co-authored-by: David Sargsyan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Fix CI (#142) * fix whitelist deployment Signed-off-by: Evelina * clean up Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * comment out tests to recreate grammars Signed-off-by: Evelina * shorten test Signed-off-by: Evelina * fix jenkins Signed-off-by: Evelina * cased for TN Signed-off-by: Evelina * revert debug changes Signed-off-by: Evelina * fix args default Signed-off-by: Evelina * try parallel Signed-off-by: Evelina * debug parallel Signed-off-by: Evelina * rerun Signed-off-by: Evelina * rerun Signed-off-by: Evelina * fix sh tests for local SH launcher Signed-off-by: Evelina * enable all ci tests Signed-off-by: Evelina * enable all ci tests Signed-off-by: Evelina --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Armenian TN (#137) * merged with main branch and fixed conflicts Signed-off-by: David Sargsyan * fixing conflicts Signed-off-by: David Sargsyan * fixing some more conflicts Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: David Sargsyan * fixed a minor issue Signed-off-by: David Sargsyan * deleted unused imports Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix: add "hy" language option for armenian Signed-off-by: Ara Yeroyan <60027241+Ara-Yeroyan@users.noreply.github.com> * added optional space for measurements after cardinals/decimals Signed-off-by: David Sargsyan * added Armenian dot Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: David Sargsyan Signed-off-by: Ara Yeroyan <60027241+Ara-Yeroyan@users.noreply.github.com> Signed-off-by: tbartley94 <90423858+tbartley94@users.noreply.github.com> Co-authored-by: David Sargsyan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ara Yeroyan <60027241+Ara-Yeroyan@users.noreply.github.com> Co-authored-by: tbartley94 <90423858+tbartley94@users.noreply.github.com> Signed-off-by: Alex Cui * Marathi ITN (#134) * Added Marathi ITN Signed-off-by: Chinmay Patil * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * adding jenkins test Signed-off-by: Travis Bartley --------- Signed-off-by: Chinmay Patil Signed-off-by: tbartley94 <90423858+tbartley94@users.noreply.github.com> Signed-off-by: Travis Bartley Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: tbartley94 <90423858+tbartley94@users.noreply.github.com> Co-authored-by: Travis Bartley Signed-off-by: Alex Cui * jenkins fix (#150) * jenkins fix Signed-off-by: Travis Bartley * removing armenian to troubleshoot jenkins Signed-off-by: Travis Bartley * removing armenian to troubleshoot jenkins Signed-off-by: Travis Bartley * missing _init_ for python Signed-off-by: Travis Bartley * mislabled cache Signed-off-by: Travis Bartley --------- Signed-off-by: Travis Bartley Signed-off-by: Alex Cui * r0.3.0 release (#151) Signed-off-by: Evelina Signed-off-by: Alex Cui * Fix text=line[text] to text=line[text_field] (#153) Signed-off-by: Sasha Meister Signed-off-by: Alex Cui * use real string on docstring (#157) Signed-off-by: Kevin Sanders Signed-off-by: Alex Cui * Sh postprocess (#147) * Add support for postprocessor far in sparrowhawk Signed-off-by: Anand Joseph * Cleanup Signed-off-by: Anand Joseph * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Choose between having a post processor or not Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> --------- Signed-off-by: Anand Joseph Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * update run_evaluate script for cased itn (#164) * update run_evaluate script for cased itn Signed-off-by: Mariana Graterol Fuenmayor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Mariana Graterol Fuenmayor Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * remove unused function from ar tn decimals (#165) * remove unused function from ar tn decimals Signed-off-by: Mariana Graterol Fuenmayor * update ci date Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * ZH sentence-level TN (#112) * Swedish telephone fix (#60) * port fix for telephone from swedish-itn branch Signed-off-by: Jim O'Regan * extend cardinal in non-deterministic mode Signed-off-by: Jim O'Regan * whitespace fixes Signed-off-by: Jim O'Regan * also fix in the verbaliser Signed-off-by: Jim O'Regan * Update Jenkinsfile Signed-off-by: Jim O’Regan --------- Signed-off-by: Jim O'Regan Signed-off-by: Jim O’Regan Signed-off-by: Alex Cui * log instead of print in graph_utils.py (#68) Signed-off-by: Enno Hermann Signed-off-by: Alex Cui * CER estimation speedup for audio-based text normalization (#73) * Replaced jiwer with editdistance to speed up CER estimation Signed-off-by: Vitaly Lavrukhin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Vitaly Lavrukhin Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * add measure coverage for TN and ITN (#62) * add measure coverage for TN and ITN Signed-off-by: ealbasiri * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove unused imports Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove unused imports Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * Remove unused imports Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update measure.py Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> --------- Signed-off-by: ealbasiri Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Alex Cui * upload es-ES, es-LA, fr-FR and it-IT g2p dicts (#63) * upload es-ES and fr-FR g2p dicts Signed-off-by: Mariana Graterol Fuenmayor * add inits Signed-off-by: Mariana Graterol Fuenmayor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add NALA Spanish dict Signed-off-by: Mariana Graterol Fuenmayor * rename Spanish and French dictionaries Signed-off-by: Mariana Graterol Fuenmayor * add Italian dictionary Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * add country codes from hu (#77) Signed-off-by: Jim O'Regan Signed-off-by: Alex Cui * fix electronic case for username (#75) * fix electronic username w/o . Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * disable sv tests Signed-off-by: Evelina * disable sv tests Signed-off-by: Evelina * fix ar test Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * disable sv tests Signed-off-by: Evelina * update ci dirs, enable sv tests Signed-off-by: Evelina --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * 0.1.8 release (#79) Signed-off-by: Evelina Signed-off-by: Alex Cui * Codeswitched ES/EN ITN (#78) * Initial commit for ES-EN codeswitched ITN Signed-off-by: Anand Joseph * Enable export for es_en codeswitched ITN Signed-off-by: Anand Joseph * Add whitelist, update weights Signed-off-by: Anand Joseph * Add tests for en_es, zone tagged separately in es Signed-off-by: Anand Joseph * Fix path to test data for sparrowhawk tests Signed-off-by: Anand Joseph * Update Jenkinsfile - enable ES/EN tests Signed-off-by: Anand Joseph * Add __init__.py files Signed-off-by: Anand Joseph * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix issues with failed docker build - due to archiving of debian and issues with re2 Signed-off-by: Anand Joseph * Remove unused imports and variables Signed-off-by: Anand Joseph * Update date Signed-off-by: Anand Joseph * Enable NBSP in sparrowhawk tests Signed-off-by: Anand Joseph * Update copyrights Signed-off-by: Anand Joseph * Update cache path in for ES/EN CI/CD Signed-off-by: Anand Joseph --------- Signed-off-by: Anand Joseph Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * electronic verbalizer fallback (#81) * 0.1.8 release Signed-off-by: Evelina * add elec fallback Signed-off-by: Evelina * update ci Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * minor normalize.py edit for usability (#84) * electronic verbalizer fallback (#81) * 0.1.8 release Signed-off-by: Evelina * add elec fallback Signed-off-by: Evelina * update ci Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Linnea Pari Leaver * documentation edits for grammar/clarity Signed-off-by: Linnea Pari Leaver * added --output_field flag for command line interface Signed-off-by: Linnea Pari Leaver * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Evelina Signed-off-by: Linnea Pari Leaver Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Linnea Pari Leaver Signed-off-by: Alex Cui * Swedish ITN (#40) * force two digits for month Signed-off-by: Jim O'Regan * put it in a function, because I reject the garbage pre-commit.ci came up with Signed-off-by: Jim O'Regan * wrap some more pieces Signed-off-by: Jim O'Regan * add graph pieces Signed-off-by: Jim O'Regan * delete junk Signed-off-by: Jim O'Regan * my copyright Signed-off-by: Jim O'Regan * add date verbaliser (copy from es) Signed-off-by: Jim O'Regan * tweaks Signed-off-by: Jim O'Regan * add date verbaliser Signed-off-by: Jim O'Regan * add right tokens Signed-off-by: Jim O'Regan * some tweaks, more needed Signed-off-by: Jim O'Regan * basic test cases Signed-off-by: Jim O'Regan * tweaks to TN date tagger Signed-off-by: Jim O'Regan * tweaks to ITN date tagger Signed-off-by: Jim O'Regan * tweaks to TN date tagger Signed-off-by: Jim O'Regan * remove duplicate Signed-off-by: Jim O'Regan * moved to tagger Signed-off-by: Jim O'Regan * nothing actually fixed here Signed-off-by: Jim O'Regan * now most tests pass Signed-off-by: Jim O'Regan * electronic Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fractions Signed-off-by: Jim O'Regan * extend Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * bare fractions is a bit of an overreach Signed-off-by: Jim O'Regan * whitelist Signed-off-by: Jim O'Regan * just inverting the TN whitelist tagger will not work/be useful Signed-off-by: Jim O'Regan * copy from English Signed-off-by: Jim O'Regan * overwrite with version from en Signed-off-by: Jim O'Regan * add basic test case Signed-off-by: Jim O'Regan * fix call Signed-off-by: Jim O'Regan * swap tsv sides Signed-off-by: Jim O'Regan * remove unused imports Signed-off-by: Jim O'Regan * add optional_era variable Signed-off-by: Jim O'Regan * add test case Signed-off-by: Jim O'Regan * make deterministic default, like most of the others Signed-off-by: Jim O'Regan * also add lowercase versions Signed-off-by: Jim O'Regan * replacing NEMO_SPACE does not work either Signed-off-by: Jim O'Regan * increasing weight... did not work last time Signed-off-by: Jim O'Regan * tweaking test cases, in case it was a sentence splitting issue. It was not Signed-off-by: Jim O'Regan * put the full stops back Signed-off-by: Jim O'Regan * add filler words Signed-off-by: Jim O'Regan * try splitting this out to see if it makes a difference Signed-off-by: Jim O'Regan * aha, this part should be non-deterministic only Signed-off-by: Jim O'Regan * single line only Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Revert "increasing weight... did not work last time" This reverts commit 39b020b50db745dfd6b281c8cbca45a033926996. Signed-off-by: Jim O'Regan * disabling ITN here makes TN work again(?) Signed-off-by: Jim O'Regan * Revert "disabling ITN here makes TN work again(?)" This reverts commit be49d7d5c687876e51c2e9ce1cf1e01491df280f. Signed-off-by: Jim O'Regan * changing the variable name fixes norm tests Signed-off-by: Jim O'Regan * change the variable names Signed-off-by: Jim O'Regan * add missing test tooling Signed-off-by: Jim O'Regan * copy telephone fixes from hu Signed-off-by: Jim O'Regan * copy telephone fixes from hu Signed-off-by: Jim O'Regan * add a piece for area codes for ITN Signed-off-by: Jim O'Regan * add country codes from hu Signed-off-by: Jim O'Regan * extend any_read_digit for ITN Signed-off-by: Jim O'Regan * country/area codes for ITN Signed-off-by: Jim O'Regan * first attempt Signed-off-by: Jim O'Regan * add to t&c Signed-off-by: Jim O'Regan * add to t&c Signed-off-by: Jim O'Regan * remove country codes for the time being, makes things ambiguous Signed-off-by: Jim O'Regan * basic test cases Signed-off-by: Jim O'Regan * fix Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove trailing whitespace Signed-off-by: Jim O'Regan * Update __init__.py Signed-off-by: Jim O’Regan * fix comment Signed-off-by: Jim O'Regan * fix comment Signed-off-by: Jim O'Regan * basic transform of TN tests Signed-off-by: Jim O'Regan * basic transformation of TN decimal tests Signed-off-by: Jim O'Regan * slight changes to date Signed-off-by: Jim O'Regan * tweak Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * include space Signed-off-by: Jim O'Regan * problem with tusen Signed-off-by: Jim O'Regan * problem with tusen was not that Signed-off-by: Jim O'Regan * add functions from hu Signed-off-by: Jim O'Regan * respect my own copyright xD Signed-off-by: Jim O'Regan * move data loading to constructor; had weirdness in this file, probably due to module-level python-suckage Signed-off-by: Jim O'Regan * move data loading, this has been an oddity before Signed-off-by: Jim O'Regan * try changing this year declaration Signed-off-by: Jim O'Regan * add year + era Signed-off-by: Jim O'Regan * eliminate more module-level data loading Signed-off-by: Jim O'Regan * Revert "eliminate more module-level data loading" This reverts commit 6a26e5d927817e1308e818758196924441ff7b3a. Signed-off-by: Jim O'Regan * expose variables Signed-off-by: Jim O'Regan * extra param for itn mode Signed-off-by: Jim O'Regan * change call Signed-off-by: Jim O'Regan * change comment Signed-off-by: Jim O'Regan * change comment Signed-off-by: Jim O'Regan * move data loading Signed-off-by: Jim O'Regan * fix parens Signed-off-by: Jim O'Regan * move data loading Signed-off-by: Jim O'Regan * adapt comments Signed-off-by: Jim O'Regan * adapt comments Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * adapt/extend tests Signed-off-by: Jim O'Regan * fix dict init/change keys to something useful Signed-off-by: Jim O'Regan * initial stab at prefixed numbers Signed-off-by: Jim O'Regan * some adapting Signed-off-by: Jim O'Regan * insert kl. if absent Signed-off-by: Jim O'Regan * fix comments Signed-off-by: Jim O'Regan * the relative prefixed times Signed-off-by: Jim O'Regan * + comments Signed-off-by: Jim O'Regan * enable time Signed-off-by: Jim O'Regan * space in both directions Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix comment Signed-off-by: Jim O'Regan * fix hours to Signed-off-by: Jim O'Regan * split by before/after Signed-off-by: Jim O'Regan * delete, not insert Signed-off-by: Jim O'Regan * fix if Signed-off-by: Jim O'Regan * kl. 9 Signed-off-by: Jim O'Regan * copy from en Signed-off-by: Jim O'Regan * keep only get_abs_path Signed-off-by: Jim O'Regan * imports Signed-off-by: Jim O'Regan * add trimmed file Signed-off-by: Jim O'Regan * fix imports Signed-off-by: Jim O'Regan * two abs_paths... could be fun Signed-off-by: Jim O'Regan * minutes/seconds Signed-off-by: Jim O'Regan * suffix Signed-off-by: Jim O'Regan * delete, not insert Signed-off-by: Jim O'Regan * one optional Signed-off-by: Jim O'Regan * export variable Signed-off-by: Jim O'Regan * kl. or one of suffix/zone Signed-off-by: Jim O'Regan * already disambiguated Signed-off-by: Jim O'Regan * closure Signed-off-by: Jim O'Regan * do not insert kl. Signed-off-by: Jim O'Regan * fix test case Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix spelling Signed-off-by: Jim O'Regan * Delete measure.py Signed-off-by: Jim O’Regan * Delete money.py Signed-off-by: Jim O’Regan * remove unused pieces Signed-off-by: Jim O'Regan * remove unused pieces Signed-off-by: Jim O'Regan * remove unused test pieces Signed-off-by: Jim O'Regan * copy from es Signed-off-by: Jim O'Regan * add SV ITN Signed-off-by: Jim O'Regan * add/update __init__ Signed-off-by: Jim O'Regan * blank line Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix comment Signed-off-by: Jim O'Regan * fix lang Signed-off-by: Jim O'Regan * fix decimal verbaliser Signed-off-by: Jim O'Regan * fix Signed-off-by: Jim O'Regan * remove year, conflicts with cardinal Signed-off-by: Jim O'Regan * space before, not after Signed-off-by: Jim O'Regan * fix cardinal tests Signed-off-by: Jim O'Regan * spurious deletion Signed-off-by: Jim O'Regan * fix comment Signed-off-by: Jim O'Regan * unused imports Signed-off-by: Jim O'Regan * re-enable SV TN; enable SV ITN Signed-off-by: Jim O'Regan * Revert "re-enable SV TN; enable SV ITN" This reverts commit 3ce4dfde1f70a89afc274284f6e4c737b3fac95b. Signed-off-by: Jim O'Regan * fix singulras Signed-off-by: Jim O'Regan * add an export Signed-off-by: Jim O'Regan * change integer graph Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * move spaces Signed-off-by: Jim O'Regan * use cdrewrite Signed-off-by: Jim O'Regan * just EOS/BOS Signed-off-by: Jim O'Regan * fix typo Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Jim O'Regan * omit en/ett, because they are also articles Signed-off-by: Jim O'Regan * uncomment Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * unused Signed-off-by: Jim O'Regan * strip spaces from decimal part Signed-off-by: Jim O'Regan * export Signed-off-by: Jim O'Regan * partial fix, not what I wanted Signed-off-by: Jim O'Regan * move comment Signed-off-by: Jim O'Regan * en/ett cannot work in itn case Signed-off-by: Jim O'Regan * be more deliberate in graph construction Signed-off-by: Jim O'Regan * accept both Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * +2 tests Signed-off-by: Jim O'Regan * (try to) accept singular quantities for plurals Signed-off-by: Jim O'Regan * retry Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * oops Signed-off-by: Jim O'Regan * replace Signed-off-by: Jim O'Regan * arcmap Signed-off-by: Jim O'Regan * version without ones Signed-off-by: Jim O'Regan * add another test Signed-off-by: Jim O'Regan * change graph Signed-off-by: Jim O'Regan * simplify Signed-off-by: Jim O'Regan * get rid of this, this is where it goes wrong Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * more tests Signed-off-by: Jim O'Regan * add a test Signed-off-by: Jim O'Regan * multiple states from both ones, try removing and readding Signed-off-by: Jim O'Regan * remove ones, see if that fixes at least the bare quantities Signed-off-by: Jim O'Regan * works in the repl, dunno why it still breaks Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove duplicate Signed-off-by: Jim O'Regan * move definition Signed-off-by: Jim O'Regan * simplify Signed-off-by: Jim O'Regan * tweak Signed-off-by: Jim O'Regan * another test Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * local declaration, seems to not be working Signed-off-by: Jim O'Regan * more tests Signed-off-by: Jim O'Regan * match verbaliser Signed-off-by: Jim O'Regan * fix last two failing tests Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add missing tests for telephone and word Signed-off-by: Jim O'Regan * remove unused variable Signed-off-by: Jim O'Regan * remove unused imports Signed-off-by: Jim O'Regan * fix comment Signed-off-by: Jim O'Regan * get rid of convert_space, tests fail Signed-off-by: Jim O'Regan * put convert_spaces back, change test file; pytest fails Signed-off-by: Jim O'Regan * Revert "put convert_spaces back, change test file; pytest fails" This reverts commit a7bb7489137b8026aab02aff64df39e874630043. Signed-off-by: Jim O'Regan * put convert_spaces back, change test file; pytest fails, take 2 Signed-off-by: Jim O'Regan * deliberately remove spaces rather than have a non-determinism that comes out differently in sparrowhawk Signed-off-by: Jim O'Regan * try converting the non-breaking spaces in the shell script Signed-off-by: Jim O'Regan * wrong place Signed-off-by: Jim O'Regan * fix typo Signed-off-by: Jim O'Regan * fix path Signed-off-by: Jim O'Regan * export Signed-off-by: Jim O'Regan * export Signed-off-by: Jim O'Regan * remove unused Signed-off-by: Jim O'Regan * Update date.py Signed-off-by: Jim O’Regan * Update time.py Signed-off-by: Jim O’Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix comment Signed-off-by: Jim O’Regan * trim comments Signed-off-by: Jim O’Regan * remove commented line Signed-off-by: Jim O’Regan * en halv Signed-off-by: Jim O’Regan * Update test_sparrowhawk_inverse_text_normalization.sh Signed-off-by: Jim O’Regan --------- Signed-off-by: Jim O'Regan Signed-off-by: Jim O’Regan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Italian_TN (#67) * add TN italian Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix init Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix LOCATION Signed-off-by: GiacomoLeoneMaria * modify graph_utils Signed-off-by: GiacomoLeoneMaria * correct decimals Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix electronic Signed-off-by: Giacomo Cavallini * fix electronic Signed-off-by: Giacomo Cavallini * fix measure Signed-off-by: Giacomo Cavallini --------- Signed-off-by: GiacomoLeoneMaria Signed-off-by: Giacomo Cavallini Signed-off-by: Mariana <47233618+mgrafu@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mariana <47233618+mgrafu@users.noreply.github.com> Signed-off-by: Alex Cui * Zh itn (#74) * Add ZH ITN Signed-off-by: Anand Joseph * Fix copyrights and code cleanup Signed-off-by: Anand Joseph * Remove invalid tests Signed-off-by: Anand Joseph * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Resolve CodeQL issues Signed-off-by: Anand Joseph * Cleanup Signed-off-by: Anand Joseph * Fix missing 'zh' option for ITN and correct comment Signed-off-by: Anand Joseph * Update __init__.py Change to zh instead of en for the imports. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update for decimal test data Signed-off-by: BuyuanCui * update for langauge import Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update for Chinese punctuations Signed-off-by: BuyuanCui * a new class for whitelist Signed-off-by: BuyuanCui * PYNINI_AVAILABLE = False Signed-off-by: BuyuanCui * recreated due to file import format issue Signed-off-by: BuyuanCui * recreated due to format issue Signed-off-by: BuyuanCui * caught duplicates, removed Signed-off-by: BuyuanCui * removed duplicates, arranges for CHInese Yuan updates Signed-off-by: BuyuanCui * updates accordingly to the comments from last PR. Recreated some of the files due to format issues Signed-off-by: BuyuanCui * removed the hours_to and minute_to files used for back counting. ALso removed am and pm suffix files according to the last PR. Recreated some of them for format issue Signed-off-by: BuyuanCui * re-added this file to avoid data file import error Signed-off-by: BuyuanCui * updated gramamr according to last PR. Removed the acceptance of 千 Signed-off-by: BuyuanCui * updates Signed-off-by: BuyuanCui * updated according to last PR. Removed comma after decimal points Signed-off-by: BuyuanCui * gramamr for Fraction Signed-off-by: BuyuanCui * gramamr for money and updated according to last PR. Plus process of 元 Signed-off-by: BuyuanCui * ordinal grammar. updates due to the updates in cardinal grammar Signed-off-by: BuyuanCui * updated accordingly to last PR comments. removing am and pm and allowing simple mandarin expression Signed-off-by: BuyuanCui * arrangements Signed-off-by: BuyuanCui * added whitelist grammar Signed-off-by: BuyuanCui * word grammar for non-classified items Signed-off-by: BuyuanCui * updated cardinal, decimal, time, itn data Signed-off-by: BuyuanCui * updates according to last PR Signed-off-by: BuyuanCui * updates according to the updates for cardinal grammar Signed-off-by: BuyuanCui * updates for more Mandarin punctuations Signed-off-by: BuyuanCui * updated accordingly to last PR. removing am pm Signed-off-by: BuyuanCui * adjustment on the weight Signed-off-by: BuyuanCui * updated accordingly to the targger updates Signed-off-by: BuyuanCui * updated accordingly to the time tagger Signed-off-by: BuyuanCui * updates according to changes in tagger on am and pm Signed-off-by: BuyuanCui * verbalizer for fraction Signed-off-by: BuyuanCui * added for mandarin grammar Signed-off-by: BuyuanCui * kept this file because using English utils results in data namin error Signed-off-by: BuyuanCui * merge conflict Signed-off-by: BuyuanCui * removed unsed imports Signed-off-by: BuyuanCui * deleted unsed import os Signed-off-by: BuyuanCui * deleted unsed variables Signed-off-by: BuyuanCui * removed unsed imports Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates and edits based on pr checks Signed-off-by: BuyuanCui * updates and edits based on pr checks Signed-off-by: BuyuanCui * format issue, reccreated Signed-off-by: BuyuanCui * format issue recreated Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed codeing style/format Signed-off-by: BuyuanCui * fixed coding style and format Signed-off-by: BuyuanCui * removed duplicated graph for 毛 Signed-off-by: BuyuanCui * removed the comment Signed-off-by: BuyuanCui * removed the comment Signed-off-by: BuyuanCui * removing unnecessary comments Signed-off-by: BuyuanCui * unnecessary comment removed Signed-off-by: BuyuanCui * test file updated for more cases Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated with a comment explaining why this file is kept Signed-off-by: BuyuanCui * updated the file explaining why this file is kept Signed-off-by: BuyuanCui * added Mandarin as zh Signed-off-by: BuyuanCui * removing for dplication Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removed unused NEMO objects Signed-off-by: BuyuanCui * removed duplicates Signed-off-by: BuyuanCui * removing unsed imports Signed-off-by: BuyuanCui * updates to fix test file failures Signed-off-by: BuyuanCui * updates to fix file failtures Signed-off-by: BuyuanCui * updates to resolve test case failture Signed-off-by: BuyuanCui * updates to resolve test case failure Signed-off-by: BuyuanCui * updates to resolve test case failure Signed-off-by: BuyuanCui * updates to resolve test case failure Signed-off-by: BuyuanCui * updates to adap to cardinal grammar changes Signed-off-by: BuyuanCui * updates to adapt to grammar changes Signed-off-by: BuyuanCui * updates to adopt to cardinal grammar changes Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix style Signed-off-by: BuyuanCui * fix style Signed-off-by: BuyuanCui * fix style Signed-off-by: BuyuanCui * fix style Signed-off-by: BuyuanCui * fixing pr checks Signed-off-by: BuyuanCui * removed // for zhtn/itn cache Signed-off-by: BuyuanCui * Update inverse_normalize.py Added zh as a selection to pass Jenkins checks. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> --------- Signed-off-by: Anand Joseph Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> Signed-off-by: BuyuanCui Co-authored-by: Alex Cui Co-authored-by: Anand Joseph Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * updated pynini_export.py file to create far files (#88) Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * readd Swedish (#87) Signed-off-by: Jim O'Regan Signed-off-by: Alex Cui * Zh tn 0712 (#89) * updates Signed-off-by: BuyuanCui * updates and fixings according to document on natonal gideline Signed-off-by: BuyuanCui * Decimal grammar added Signed-off-by: BuyuanCui * fraction updated Signed-off-by: BuyuanCui * money updated Signed-off-by: BuyuanCui * ordinal grammar added Signed-off-by: BuyuanCui * punctuation grammar added Signed-off-by: BuyuanCui * time gramamr updated Signed-off-by: BuyuanCui * tokenizaer updated Signed-off-by: BuyuanCui * updates on certificate Signed-off-by: BuyuanCui * data updated and added due to updates and chanegs to the existing grammar Signed-off-by: BuyuanCui * cardinal updated Signed-off-by: BuyuanCui * date grammar changed Signed-off-by: BuyuanCui * decimal grammar added Signed-off-by: BuyuanCui * grammar updated Signed-off-by: BuyuanCui * grammar updated Signed-off-by: BuyuanCui * grammar added Signed-off-by: BuyuanCui * grammar updates Signed-off-by: BuyuanCui * test data added Signed-off-by: BuyuanCui * test python file edits Signed-off-by: BuyuanCui * updates for tn1.0 and previous tn grammar from contribution Signed-off-by: BuyuanCui * test cases updated Signed-off-by: BuyuanCui * coding style fixed Signed-off-by: BuyuanCui * dates updated for init files Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated the date for zh Signed-off-by: BuyuanCui * removed unsed imports Signed-off-by: BuyuanCui * removed comments Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added back the itn tests Signed-off-by: BuyuanCui * added back measure and math from previou TN Signed-off-by: BuyuanCui * updated for tests reruns Signed-off-by: BuyuanCui * updats Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated weights Signed-off-by: BuyuanCui --------- Signed-off-by: BuyuanCui Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Zh tn char (#95) * file name change Signed-off-by: BuyuanCui * file name change Signed-off-by: BuyuanCui * file name change Signed-off-by: BuyuanCui * file name change Signed-off-by: BuyuanCui * file name change Signed-off-by: BuyuanCui * file name Signed-off-by: BuyuanCui * file name Signed-off-by: BuyuanCui * file name Signed-off-by: BuyuanCui * file name Signed-off-by: BuyuanCui * file name Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * code stle Signed-off-by: BuyuanCui * fixed import error Signed-off-by: BuyuanCui --------- Signed-off-by: BuyuanCui Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * audio-based TN fix for empty pred_text/text (#92) * fix for empty pred_text Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add unittests Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix path Signed-off-by: Evelina * fix path Signed-off-by: Evelina * fix pytest Signed-off-by: Evelina --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * pip 1.2.0 Signed-off-by: Evelina Signed-off-by: Alex Cui * French tn (#91) * add tests for fr tn Signed-off-by: Mariana Graterol Fuenmayor * add fr tn for cardinals, decimals, fractions and ordinals Signed-off-by: Mariana Graterol Fuenmayor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * delete it far files from tools Signed-off-by: Mariana Graterol Fuenmayor * add languages to run_evaluate Signed-off-by: Mariana Graterol Fuenmayor * remove ambiguous spacing Signed-off-by: Mariana Graterol Fuenmayor * enable sh testing for fr tn Signed-off-by: Mariana Graterol Fuenmayor * fix bug with ordinals Signed-off-by: Mariana Graterol Fuenmayor * update jenkinsfile cache date Signed-off-by: Mariana Graterol Fuenmayor * fix test for ordinals Signed-off-by: Mariana Graterol Fuenmayor * update tn cache for fr Signed-off-by: Mariana Graterol Fuenmayor * resolve codeql issues Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Add whitelist_tech.tsv (#96) Signed-off-by: Anand Joseph Signed-off-by: Alex Cui * Zhitn 0727 (#93) * updates on itn grammar to pass sparrowhawk tests Signed-off-by: BuyuanCui * updats for sparrowhawk tests Signed-off-by: BuyuanCui * updates fro sparrowhawk tests Signed-off-by: BuyuanCui * coding style fix Signed-off-by: BuyuanCui * updates for coding style and sparrowhawk test Signed-off-by: BuyuanCui * updated classes for tests on whitelist and word grammar Signed-off-by: BuyuanCui * added for tests on whitelist Signed-off-by: BuyuanCui * added for test on word Signed-off-by: BuyuanCui * added to run test on whitelist Signed-off-by: BuyuanCui * added to run test on word Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update test_word.py Removed unused import. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_word.py Removed imports according to CodeQL Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_whitelist.py Removing imports according to CodeQL Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_whitelist.py Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update Jenkinsfile changed zh cache to 07-27-23 as it is the latest update. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> --------- Signed-off-by: BuyuanCui Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Es tn romans fix (#98) * fix es tn roman exceptions Signed-off-by: Mariana Graterol Fuenmayor * update jenkinsfile Signed-off-by: Mariana Graterol Fuenmayor * update eval script for ITN Signed-off-by: Mariana Graterol Fuenmayor * codeql fix Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * Change docker image (#102) Change docker image to one including sparrowhawk Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Alex Cui * Print warning instead exception (#97) * raise text Signed-off-by: Nikolay Karpov * text arg Signed-off-by: Nikolay Karpov * Failed text Signed-off-by: Nikolay Karpov * add logger Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rm raise Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * logger Signed-off-by: Nikolay Karpov * NeMo-text-processing Signed-off-by: Nikolay Karpov * info level Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rm raise Signed-off-by: Nikolay Karpov * verbose Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Normalizer.select_verbalizer Signed-off-by: Nikolay Karpov * Exception Signed-off-by: Nikolay Karpov * verbose Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * restart ci Signed-off-by: Evelina --------- Signed-off-by: Nikolay Karpov Signed-off-by: Nikolay Karpov Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Nikolay Karpov Co-authored-by: Evelina Signed-off-by: Alex Cui * warning regardless of verbose flag (#107) * warning Signed-off-by: Nikolay Karpov * self.verbose Signed-off-by: Nikolay Karpov --------- Signed-off-by: Nikolay Karpov Signed-off-by: Alex Cui * Unpin setuptools (#106) Signed-off-by: Peter Plantinga Signed-off-by: Alex Cui * fixed warnings: File is not always closes. (#113) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Alex Cui * fix bug #111 (ar currencies) (#117) * fix bug #111 (ar currencies) Signed-off-by: Mariana Graterol Fuenmayor * update ci folder Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * Logging clean up + IT TN fix (#118) * fix utils and it TN Signed-off-by: Evelina * clean up Signed-off-by: Evelina * fix logging Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix format Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix format Signed-off-by: Evelina * fix format Signed-off-by: Evelina * add IT TN to CI Signed-off-by: Evelina * update patch Signed-off-by: Evelina --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Time_IT_TN (#105) * add time verbalizer Signed-off-by: GiacomoLeoneMaria * add time tagger and verba Signed-off-by: GiacomoLeoneMaria * add pytest time Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * codeQL Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix numbers with eight Signed-off-by: GiacomoLeoneMaria --------- Signed-off-by: GiacomoLeoneMaria Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * IT TN improvement on tests (#120) * add missing test cases Signed-off-by: Mariana Graterol Fuenmayor * fix bug with time tests Signed-off-by: Mariana Graterol Fuenmayor * update ci date Signed-off-by: Mariana Graterol Fuenmayor * add sentence test cases Signed-off-by: Mariana Graterol Fuenmayor * refine shortest path for irregular cardinals Signed-off-by: Mariana Graterol Fuenmayor * update ci date Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * add single letter exception for roman numerals (#121) * add single letter exception for roman numerals Signed-off-by: Mariana Graterol Fuenmayor * update ci dir Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * rewrote tokenizer Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * removed the file and replaced it with char in 1.8 Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * jenkins file update Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * to fix tn bug@ xuesong Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * tn bug Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Alex Cui * fixeds and updates Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Alex Cui * adjustments Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * testing commit Signed-off-by: Alex Cui * removing unsed file Signed-off-by: Alex Cui * updated test cases Signed-off-by: Alex Cui * updating etst cases Signed-off-by: Alex Cui * updates adapting to graphs Signed-off-by: Alex Cui * updated cases for SH tests Signed-off-by: Alex Cui * updated cases Signed-off-by: Alex Cui * added some sentences Signed-off-by: Alex Cui * test cases update Signed-off-by: Alex Cui * solving rebase issue, repushing changes Signed-off-by: Alex Cui * resolving conflict Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixings according to ci Signed-off-by: Alex Cui * fixings according to the ci Signed-off-by: Alex Cui * removed not used Signed-off-by: Alex Cui * notused removing Signed-off-by: Alex Cui * format issue Signed-off-by: Alex Cui * formt issue Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removing unused files Signed-off-by: Alex Cui * removing unused files Signed-off-by: Alex Cui * remiving unsed files; Signed-off-by: Alex Cui * removing unsed files Signed-off-by: Alex Cui * removing unsed files Signed-off-by: Alex Cui * added sentences as test cases Signed-off-by: Alex Cui * added senetnces as test cases Signed-off-by: Alex Cui * removed commentyed out tests Signed-off-by: Alex Cui * updating dates Signed-off-by: Alex Cui * attemps to fix bug Signed-off-by: Alex Cui * inprocess of fixing the bug Signed-off-by: Alex Cui * fixing existing issue Signed-off-by: Alex Cui * updated graph_utils, tokenize and classify, and word graphs Signed-off-by: Alex Cui * added bacl the ppostprocessor far creation Signed-off-by: Alex Cui * updated NEMO_NOT_ALPHA as a new variable Signed-off-by: Alex Cui * far files Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * combiedn into measure Signed-off-by: Alex Cui * removing and combined to meaasure Signed-off-by: Alex Cui * removing, not used Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates to fix space issue Signed-off-by: Alex Cui * updates to fix space issue Signed-off-by: Alex Cui * updates to fix space issue Signed-off-by: Alex Cui * updates to solve the space issue Signed-off-by: Alex Cui * resolving sh issue Signed-off-by: Alex Cui * resolving sh test issue Signed-off-by: Alex Cui * adding anands updates Signed-off-by: Alex Cui * data updated for measure and whitelist Signed-off-by: Alex Cui * updates Signed-off-by: Alex Cui * updates Signed-off-by: Alex Cui * updates Signed-off-by: Alex Cui * removing fraction and math part Signed-off-by: Alex Cui * removing comments Signed-off-by: Alex Cui * removing preprocessor, updating measure, adding shitelist cases Signed-off-by: Alex Cui * removing processor, modification for sp test, shitelist and word Signed-off-by: Alex Cui * updating zh date Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * realized itn being cvommented out, adding back Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * trying to run zh tn separately because it takes long time to run Signed-off-by: Alex Cui * modification to ru zh tn separately Signed-off-by: Alex Cui * independent zh tnitn tests for more time Signed-off-by: Alex Cui * adding lines to save far file Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates for reducing testing time Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * for ounct graph Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removing used graphs Signed-off-by: Alex Cui * format and removing used comments Signed-off-by: Alex Cui * removing this one, not used Signed-off-by: Alex Cui * remove unused commentss Signed-off-by: Alex Cui * removing unsed comments Signed-off-by: Alex Cui * removing unsed comments Signed-off-by: Alex Cui * removing comments Signed-off-by: Alex Cui * Delete tools/text_processing_deployment/zh directory Removing far files. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * updates according to the github comments Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removing comments Signed-off-by: Alex Cui * punct grammar Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update test_cases_cardinal.txt Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update Dockerfile Copied from main branch ( which included Anand's updates) Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update launch.sh Found differences in the file. Fixing it back. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_word.py Saw word ITN being commented out. Adding it back. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update money.py Found cardinal grammar not accepting suffix. Fixed it. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update Jenkinsfile Removed duplicated zh test from line 230s Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update utils.py Addressing bug raised in bug in graph_utils.py of zh ITN and decimal tagger of ar TN #162. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update graph_utils.py Addressing bug in graph_utils.py of zh ITN and decimal tagger of ar TN #162. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update measure.py Fixing code style, removing unused imports Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update word.py Fixing code style, removing unused imports Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update measure.py Removing unused import. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update post_processing.py Removing unused imports Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update post_processing.py Removing unused import Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update word.py Removing unused imports Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update cardinal.py Deleting unused graph Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update word.py Removing import pynini Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update word.py removing pynini import Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update verbalize.py removing pynutil import Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update post_processing.py removing punct graph imported Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_sparrowhawk_normalization.sh Update on test issue for Docker file locations Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_ordinal.py Fixing style. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Delete nemo_text_processing/text_normalization/zh/taggers/math_symbol.py Removing because it's not one of the semiotic classes. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Delete nemo_text_processing/text_normalization/zh/verbalizers/math_symbol.py Removing because it's not one of the semiotic classes. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update Jenkinsfile Updating Jenkins date Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> --------- Signed-off-by: Jim O'Regan Signed-off-by: Jim O’Regan Signed-off-by: Alex Cui Signed-off-by: Enno Hermann Signed-off-by: Vitaly Lavrukhin Signed-off-by: ealbasiri Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Evelina Signed-off-by: Anand Joseph Signed-off-by: Linnea Pari Leaver Signed-off-by: GiacomoLeoneMaria Signed-off-by: Giacomo Cavallini Signed-off-by: Mariana <47233618+mgrafu@users.noreply.github.com> Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> Signed-off-by: BuyuanCui Signed-off-by: Nikolay Karpov Signed-off-by: Nikolay Karpov Signed-off-by: Peter Plantinga Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Jim O’Regan Co-authored-by: Enno Hermann Co-authored-by: Vitaly Lavrukhin Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Enas Albasiri <71229149+ealbasiri@users.noreply.github.com> Co-authored-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Co-authored-by: Mariana <47233618+mgrafu@users.noreply.github.com> Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com> Co-authored-by: lleaver <137942999+lleaver@users.noreply.github.com> Co-authored-by: Linnea Pari Leaver Co-authored-by: Jim O’Regan Co-authored-by: Giacomo Leone Maria Cavallini <72698188+GiacomoLeoneMaria@users.noreply.github.com> Co-authored-by: Alex Cui Co-authored-by: Anand Joseph Co-authored-by: Evelina Co-authored-by: Nikolay Karpov Co-authored-by: Nikolay Karpov Co-authored-by: Peter Plantinga Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Alex Cui * preparing release, updating change log (#168) * preparing release, updating change log Signed-off-by: Travis Bartley * adding changelog Signed-off-by: Travis Bartley * updating pre release Signed-off-by: Travis Bartley --------- Signed-off-by: Travis Bartley Signed-off-by: Alex Cui * hotfix (#169) Signed-off-by: Travis Bartley Co-authored-by: Travis Bartley Signed-off-by: Alex Cui * hotfix (#170) Signed-off-by: Travis Bartley Signed-off-by: Alex Cui * DE TN Fixes (#177) * Adds support for social media tags (e.g. @zoobereq) Signed-off-by: Simon Zuberek * Adds test cases for social media tags Signed-off-by: Simon Zuberek * Fixes pathing for Sparrowhawk Signed-off-by: Simon Zuberek * Fixes the issue of the DE normalizer not accepting comma-separated digit strings Signed-off-by: Simon Zuberek * Fixes the issue where the normalizer didn't accept time formatted as 00.00 Uhr or 0.00 Uhr Signed-off-by: Simon Zuberek * Fixes the issue where the the sentence-final period in sentences ending with a domain name would be tagged as part of that domain name Signed-off-by: Simon Zuberek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Removes unused imports Signed-off-by: Simon Zuberek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixes the formatting Signed-off-by: Simon Zuberek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixes https://github.com/NVIDIA/NeMo-text-processing/issues/166 for DE Signed-off-by: Simon Zuberek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updates grammar paths Signed-off-by: Simon Zuberek * Minor Fixes Signed-off-by: Simon Zuberek * Fixes test cases Signed-off-by: Simon Zuberek --------- Signed-off-by: Simon Zuberek Co-authored-by: Simon Zuberek Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Tts en tech terms (#167) * update tts whitelist Signed-off-by: Mariana Graterol Fuenmayor * enable normalization of emphasized input Signed-off-by: Mariana Graterol Fuenmayor * add whitelist terms Signed-off-by: Mariana Graterol Fuenmayor * add test for emphasis Signed-off-by: Mariana Graterol Fuenmayor * read card numbers as digits Signed-off-by: Mariana Graterol Fuenmayor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * make ccs deterministic Signed-off-by: Mariana Graterol Fuenmayor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update jenkins Signed-off-by: Mariana Graterol Fuenmayor * fix sh tests bug Signed-off-by: Mariana Graterol Fuenmayor * fix bug with time Signed-off-by: Mariana Graterol Fuenmayor * update jenkins Signed-off-by: Mariana Graterol Fuenmayor * fix sh time bug Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Mariana <47233618+mgrafu@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Normalizes the '%' sign (#180) Signed-off-by: Simon Zuberek Co-authored-by: Simon Zuberek Signed-off-by: Alex Cui * FR TN Fixes (#181) * Normalizes the '%' sign Signed-off-by: Simon Zuberek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updates pathing in Jenkins Signed-off-by: Simon Zuberek * Fixes test cases Signed-off-by: Simon Zuberek * More test case fixes Signed-off-by: Simon Zuberek --------- Signed-off-by: Simon Zuberek Co-authored-by: Simon Zuberek Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * testing Signed-off-by: Alex Cui * removing test.txt Signed-off-by: Alex Cui * fixing zh tn money curreny on l Signed-off-by: Alex Cui * bug fix on money currency l Signed-off-by: Alex Cui * updates for zh tn Signed-off-by: Alex Cui * resolving failed ci tests for money grammar Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates for decimal maoney failure Signed-off-by: Alex Cui * removing comments Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates on money grammar for failure cases Signed-off-by: Alex Cui * adding test cases in the nvbug Signed-off-by: Alex Cui * updates for ci etst Signed-off-by: Alex Cui * updating date for rerun Signed-off-by: Alex Cui * renaming final graphs Signed-off-by: Alex Cui * conflicts Signed-off-by: Alex Cui * updating data Signed-off-by: Alex Cui * attempt to resolve jenkins issue Signed-off-by: Alex Cui * ci tests resolving Signed-off-by: Alex Cui * testing Signed-off-by: Alex Cui * removing test.txt Signed-off-by: Alex Cui * fixing zh tn money curreny on l Signed-off-by: Alex Cui * bug fix on money currency l Signed-off-by: Alex Cui * resolving failed ci tests for money grammar Signed-off-by: Alex Cui * updates for decimal maoney failure Signed-off-by: Alex Cui * removing comments Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates on money grammar for failure cases Signed-off-by: Alex Cui * adding test cases in the nvbug Signed-off-by: Alex Cui * renaming final graphs Signed-off-by: Alex Cui * conflicts Signed-off-by: Alex Cui * updating data Signed-off-by: Alex Cui * attempt to resolve jenkins issue Signed-off-by: Alex Cui * ci tests resolving Signed-off-by: Alex Cui * resolving conflict for ci tests update Signed-off-by: Alex Cui * Increase weights for serial (en TN) (#128) * Increase weights for serial (en TN) Resolves https://github.com/NVIDIA/NeMo-text-processing/issues/126 Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * Add tests for fix Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * Update Jenkinsfile cache path Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * Update Jenkinsfile. Fix cache folder Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> --------- Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Alex Cui * add measures file for FR TN (#131) * add measures file Signed-off-by: Mariana Graterol Fuenmayor * update whitelist data Signed-off-by: Mariana Graterol Fuenmayor * add fr tn tests Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * Sh jenkins (#127) * Add SH tests to Jenkins Signed-off-by: Anand Joseph * Update cache paths Signed-off-by: Anand Joseph * Update Jenkins tests Signed-off-by: Anand Joseph * Add CI/CD tests for sparrowhawk Signed-off-by: Anand Joseph * docker build only if in test mode Signed-off-by: Anand Joseph * Fix missing variable Signed-off-by: Anand Joseph * Fix comments and remove arguments not required Signed-off-by: Anand Joseph * Fix commands not executing Signed-off-by: Anand Joseph * Missing arguments Signed-off-by: Anand Joseph * Missing quotes Signed-off-by: Anand Joseph * Fix incorrect path for tests Signed-off-by: Anand Joseph * Fix paths Signed-off-by: Anand Joseph * Incorrect paths of tests and shunit2 Signed-off-by: Anand Joseph * Fix issues with paths as arguments to shunit Signed-off-by: Anand Joseph * Undo path change Signed-off-by: Anand Joseph * Fix intentional fail test Signed-off-by: Anand Joseph * revert redundant check for cased option Signed-off-by: Anand Joseph * Fix default path in export_grammars.sh Signed-off-by: Anand Joseph * Update cache paths Signed-off-by: Anand Joseph * Add interactive option Signed-off-by: Anand Joseph * Add SH tests for cased EN ITN Signed-off-by: Anand Joseph --------- Signed-off-by: Anand Joseph Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Alex Cui * update isort - fix precommit (#138) * update isort version Signed-off-by: Evelina * update isort version Signed-off-by: Evelina * fix format Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove unused imports Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Armenian itn (#136) * Added Armenian ITN Signed-off-by: David Sargsyan * Added Armenian ITN Signed-off-by: David Sargsyan * Added Armenian ITN Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: David Sargsyan * Added context for tests and fixed CodeQL errors Signed-off-by: David Sargsyan * Revert "Added context for tests and fixed CodeQL errors" This reverts commit 2c804d941963c0be21d3aad07e6cd13568ab747b. Signed-off-by: David Sargsyan * Added context to some test files and fixed CodeQL errors Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: David Sargsyan * deleted unnecessary data Signed-off-by: David Sargsyan * translated a few measurements to Armenian Signed-off-by: David Sargsyan * adjusted some things for better readability and maintainer support Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed one test case and some issues Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: David Sargsyan Co-authored-by: David Sargsyan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Fix CI (#142) * fix whitelist deployment Signed-off-by: Evelina * clean up Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * comment out tests to recreate grammars Signed-off-by: Evelina * shorten test Signed-off-by: Evelina * fix jenkins Signed-off-by: Evelina * cased for TN Signed-off-by: Evelina * revert debug changes Signed-off-by: Evelina * fix args default Signed-off-by: Evelina * try parallel Signed-off-by: Evelina * debug parallel Signed-off-by: Evelina * rerun Signed-off-by: Evelina * rerun Signed-off-by: Evelina * fix sh tests for local SH launcher Signed-off-by: Evelina * enable all ci tests Signed-off-by: Evelina * enable all ci tests Signed-off-by: Evelina --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Armenian TN (#137) * merged with main branch and fixed conflicts Signed-off-by: David Sargsyan * fixing conflicts Signed-off-by: David Sargsyan * fixing some more conflicts Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: David Sargsyan * fixed a minor issue Signed-off-by: David Sargsyan * deleted unused imports Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix: add "hy" language option for armenian Signed-off-by: Ara Yeroyan <60027241+Ara-Yeroyan@users.noreply.github.com> * added optional space for measurements after cardinals/decimals Signed-off-by: David Sargsyan * added Armenian dot Signed-off-by: David Sargsyan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: David Sargsyan Signed-off-by: Ara Yeroyan <60027241+Ara-Yeroyan@users.noreply.github.com> Signed-off-by: tbartley94 <90423858+tbartley94@users.noreply.github.com> Co-authored-by: David Sargsyan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ara Yeroyan <60027241+Ara-Yeroyan@users.noreply.github.com> Co-authored-by: tbartley94 <90423858+tbartley94@users.noreply.github.com> Signed-off-by: Alex Cui * Marathi ITN (#134) * Added Marathi ITN Signed-off-by: Chinmay Patil * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * adding jenkins test Signed-off-by: Travis Bartley --------- Signed-off-by: Chinmay Patil Signed-off-by: tbartley94 <90423858+tbartley94@users.noreply.github.com> Signed-off-by: Travis Bartley Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: tbartley94 <90423858+tbartley94@users.noreply.github.com> Co-authored-by: Travis Bartley Signed-off-by: Alex Cui * jenkins fix (#150) * jenkins fix Signed-off-by: Travis Bartley * removing armenian to troubleshoot jenkins Signed-off-by: Travis Bartley * removing armenian to troubleshoot jenkins Signed-off-by: Travis Bartley * missing _init_ for python Signed-off-by: Travis Bartley * mislabled cache Signed-off-by: Travis Bartley --------- Signed-off-by: Travis Bartley Signed-off-by: Alex Cui * ZH sentence-level TN (#112) * Swedish telephone fix (#60) * port fix for telephone from swedish-itn branch Signed-off-by: Jim O'Regan * extend cardinal in non-deterministic mode Signed-off-by: Jim O'Regan * whitespace fixes Signed-off-by: Jim O'Regan * also fix in the verbaliser Signed-off-by: Jim O'Regan * Update Jenkinsfile Signed-off-by: Jim O’Regan --------- Signed-off-by: Jim O'Regan Signed-off-by: Jim O’Regan Signed-off-by: Alex Cui * log instead of print in graph_utils.py (#68) Signed-off-by: Enno Hermann Signed-off-by: Alex Cui * CER estimation speedup for audio-based text normalization (#73) * Replaced jiwer with editdistance to speed up CER estimation Signed-off-by: Vitaly Lavrukhin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Vitaly Lavrukhin Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * add measure coverage for TN and ITN (#62) * add measure coverage for TN and ITN Signed-off-by: ealbasiri * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove unused imports Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove unused imports Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * Remove unused imports Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update measure.py Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> --------- Signed-off-by: ealbasiri Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Alex Cui * upload es-ES, es-LA, fr-FR and it-IT g2p dicts (#63) * upload es-ES and fr-FR g2p dicts Signed-off-by: Mariana Graterol Fuenmayor * add inits Signed-off-by: Mariana Graterol Fuenmayor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add NALA Spanish dict Signed-off-by: Mariana Graterol Fuenmayor * rename Spanish and French dictionaries Signed-off-by: Mariana Graterol Fuenmayor * add Italian dictionary Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * add country codes from hu (#77) Signed-off-by: Jim O'Regan Signed-off-by: Alex Cui * fix electronic case for username (#75) * fix electronic username w/o . Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * disable sv tests Signed-off-by: Evelina * disable sv tests Signed-off-by: Evelina * fix ar test Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * disable sv tests Signed-off-by: Evelina * update ci dirs, enable sv tests Signed-off-by: Evelina --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * 0.1.8 release (#79) Signed-off-by: Evelina Signed-off-by: Alex Cui * Codeswitched ES/EN ITN (#78) * Initial commit for ES-EN codeswitched ITN Signed-off-by: Anand Joseph * Enable export for es_en codeswitched ITN Signed-off-by: Anand Joseph * Add whitelist, update weights Signed-off-by: Anand Joseph * Add tests for en_es, zone tagged separately in es Signed-off-by: Anand Joseph * Fix path to test data for sparrowhawk tests Signed-off-by: Anand Joseph * Update Jenkinsfile - enable ES/EN tests Signed-off-by: Anand Joseph * Add __init__.py files Signed-off-by: Anand Joseph * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix issues with failed docker build - due to archiving of debian and issues with re2 Signed-off-by: Anand Joseph * Remove unused imports and variables Signed-off-by: Anand Joseph * Update date Signed-off-by: Anand Joseph * Enable NBSP in sparrowhawk tests Signed-off-by: Anand Joseph * Update copyrights Signed-off-by: Anand Joseph * Update cache path in for ES/EN CI/CD Signed-off-by: Anand Joseph --------- Signed-off-by: Anand Joseph Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * electronic verbalizer fallback (#81) * 0.1.8 release Signed-off-by: Evelina * add elec fallback Signed-off-by: Evelina * update ci Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * minor normalize.py edit for usability (#84) * electronic verbalizer fallback (#81) * 0.1.8 release Signed-off-by: Evelina * add elec fallback Signed-off-by: Evelina * update ci Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Linnea Pari Leaver * documentation edits for grammar/clarity Signed-off-by: Linnea Pari Leaver * added --output_field flag for command line interface Signed-off-by: Linnea Pari Leaver * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Evelina Signed-off-by: Linnea Pari Leaver Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Linnea Pari Leaver Signed-off-by: Alex Cui * Swedish ITN (#40) * force two digits for month Signed-off-by: Jim O'Regan * put it in a function, because I reject the garbage pre-commit.ci came up with Signed-off-by: Jim O'Regan * wrap some more pieces Signed-off-by: Jim O'Regan * add graph pieces Signed-off-by: Jim O'Regan * delete junk Signed-off-by: Jim O'Regan * my copyright Signed-off-by: Jim O'Regan * add date verbaliser (copy from es) Signed-off-by: Jim O'Regan * tweaks Signed-off-by: Jim O'Regan * add date verbaliser Signed-off-by: Jim O'Regan * add right tokens Signed-off-by: Jim O'Regan * some tweaks, more needed Signed-off-by: Jim O'Regan * basic test cases Signed-off-by: Jim O'Regan * tweaks to TN date tagger Signed-off-by: Jim O'Regan * tweaks to ITN date tagger Signed-off-by: Jim O'Regan * tweaks to TN date tagger Signed-off-by: Jim O'Regan * remove duplicate Signed-off-by: Jim O'Regan * moved to tagger Signed-off-by: Jim O'Regan * nothing actually fixed here Signed-off-by: Jim O'Regan * now most tests pass Signed-off-by: Jim O'Regan * electronic Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fractions Signed-off-by: Jim O'Regan * extend Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * bare fractions is a bit of an overreach Signed-off-by: Jim O'Regan * whitelist Signed-off-by: Jim O'Regan * just inverting the TN whitelist tagger will not work/be useful Signed-off-by: Jim O'Regan * copy from English Signed-off-by: Jim O'Regan * overwrite with version from en Signed-off-by: Jim O'Regan * add basic test case Signed-off-by: Jim O'Regan * fix call Signed-off-by: Jim O'Regan * swap tsv sides Signed-off-by: Jim O'Regan * remove unused imports Signed-off-by: Jim O'Regan * add optional_era variable Signed-off-by: Jim O'Regan * add test case Signed-off-by: Jim O'Regan * make deterministic default, like most of the others Signed-off-by: Jim O'Regan * also add lowercase versions Signed-off-by: Jim O'Regan * replacing NEMO_SPACE does not work either Signed-off-by: Jim O'Regan * increasing weight... did not work last time Signed-off-by: Jim O'Regan * tweaking test cases, in case it was a sentence splitting issue. It was not Signed-off-by: Jim O'Regan * put the full stops back Signed-off-by: Jim O'Regan * add filler words Signed-off-by: Jim O'Regan * try splitting this out to see if it makes a difference Signed-off-by: Jim O'Regan * aha, this part should be non-deterministic only Signed-off-by: Jim O'Regan * single line only Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Revert "increasing weight... did not work last time" This reverts commit 39b020b50db745dfd6b281c8cbca45a033926996. Signed-off-by: Jim O'Regan * disabling ITN here makes TN work again(?) Signed-off-by: Jim O'Regan * Revert "disabling ITN here makes TN work again(?)" This reverts commit be49d7d5c687876e51c2e9ce1cf1e01491df280f. Signed-off-by: Jim O'Regan * changing the variable name fixes norm tests Signed-off-by: Jim O'Regan * change the variable names Signed-off-by: Jim O'Regan * add missing test tooling Signed-off-by: Jim O'Regan * copy telephone fixes from hu Signed-off-by: Jim O'Regan * copy telephone fixes from hu Signed-off-by: Jim O'Regan * add a piece for area codes for ITN Signed-off-by: Jim O'Regan * add country codes from hu Signed-off-by: Jim O'Regan * extend any_read_digit for ITN Signed-off-by: Jim O'Regan * country/area codes for ITN Signed-off-by: Jim O'Regan * first attempt Signed-off-by: Jim O'Regan * add to t&c Signed-off-by: Jim O'Regan * add to t&c Signed-off-by: Jim O'Regan * remove country codes for the time being, makes things ambiguous Signed-off-by: Jim O'Regan * basic test cases Signed-off-by: Jim O'Regan * fix Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove trailing whitespace Signed-off-by: Jim O'Regan * Update __init__.py Signed-off-by: Jim O’Regan * fix comment Signed-off-by: Jim O'Regan * fix comment Signed-off-by: Jim O'Regan * basic transform of TN tests Signed-off-by: Jim O'Regan * basic transformation of TN decimal tests Signed-off-by: Jim O'Regan * slight changes to date Signed-off-by: Jim O'Regan * tweak Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * include space Signed-off-by: Jim O'Regan * problem with tusen Signed-off-by: Jim O'Regan * problem with tusen was not that Signed-off-by: Jim O'Regan * add functions from hu Signed-off-by: Jim O'Regan * respect my own copyright xD Signed-off-by: Jim O'Regan * move data loading to constructor; had weirdness in this file, probably due to module-level python-suckage Signed-off-by: Jim O'Regan * move data loading, this has been an oddity before Signed-off-by: Jim O'Regan * try changing this year declaration Signed-off-by: Jim O'Regan * add year + era Signed-off-by: Jim O'Regan * eliminate more module-level data loading Signed-off-by: Jim O'Regan * Revert "eliminate more module-level data loading" This reverts commit 6a26e5d927817e1308e818758196924441ff7b3a. Signed-off-by: Jim O'Regan * expose variables Signed-off-by: Jim O'Regan * extra param for itn mode Signed-off-by: Jim O'Regan * change call Signed-off-by: Jim O'Regan * change comment Signed-off-by: Jim O'Regan * change comment Signed-off-by: Jim O'Regan * move data loading Signed-off-by: Jim O'Regan * fix parens Signed-off-by: Jim O'Regan * move data loading Signed-off-by: Jim O'Regan * adapt comments Signed-off-by: Jim O'Regan * adapt comments Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * adapt/extend tests Signed-off-by: Jim O'Regan * fix dict init/change keys to something useful Signed-off-by: Jim O'Regan * initial stab at prefixed numbers Signed-off-by: Jim O'Regan * some adapting Signed-off-by: Jim O'Regan * insert kl. if absent Signed-off-by: Jim O'Regan * fix comments Signed-off-by: Jim O'Regan * the relative prefixed times Signed-off-by: Jim O'Regan * + comments Signed-off-by: Jim O'Regan * enable time Signed-off-by: Jim O'Regan * space in both directions Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix comment Signed-off-by: Jim O'Regan * fix hours to Signed-off-by: Jim O'Regan * split by before/after Signed-off-by: Jim O'Regan * delete, not insert Signed-off-by: Jim O'Regan * fix if Signed-off-by: Jim O'Regan * kl. 9 Signed-off-by: Jim O'Regan * copy from en Signed-off-by: Jim O'Regan * keep only get_abs_path Signed-off-by: Jim O'Regan * imports Signed-off-by: Jim O'Regan * add trimmed file Signed-off-by: Jim O'Regan * fix imports Signed-off-by: Jim O'Regan * two abs_paths... could be fun Signed-off-by: Jim O'Regan * minutes/seconds Signed-off-by: Jim O'Regan * suffix Signed-off-by: Jim O'Regan * delete, not insert Signed-off-by: Jim O'Regan * one optional Signed-off-by: Jim O'Regan * export variable Signed-off-by: Jim O'Regan * kl. or one of suffix/zone Signed-off-by: Jim O'Regan * already disambiguated Signed-off-by: Jim O'Regan * closure Signed-off-by: Jim O'Regan * do not insert kl. Signed-off-by: Jim O'Regan * fix test case Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix spelling Signed-off-by: Jim O'Regan * Delete measure.py Signed-off-by: Jim O’Regan * Delete money.py Signed-off-by: Jim O’Regan * remove unused pieces Signed-off-by: Jim O'Regan * remove unused pieces Signed-off-by: Jim O'Regan * remove unused test pieces Signed-off-by: Jim O'Regan * copy from es Signed-off-by: Jim O'Regan * add SV ITN Signed-off-by: Jim O'Regan * add/update __init__ Signed-off-by: Jim O'Regan * blank line Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix comment Signed-off-by: Jim O'Regan * fix lang Signed-off-by: Jim O'Regan * fix decimal verbaliser Signed-off-by: Jim O'Regan * fix Signed-off-by: Jim O'Regan * remove year, conflicts with cardinal Signed-off-by: Jim O'Regan * space before, not after Signed-off-by: Jim O'Regan * fix cardinal tests Signed-off-by: Jim O'Regan * spurious deletion Signed-off-by: Jim O'Regan * fix comment Signed-off-by: Jim O'Regan * unused imports Signed-off-by: Jim O'Regan * re-enable SV TN; enable SV ITN Signed-off-by: Jim O'Regan * Revert "re-enable SV TN; enable SV ITN" This reverts commit 3ce4dfde1f70a89afc274284f6e4c737b3fac95b. Signed-off-by: Jim O'Regan * fix singulras Signed-off-by: Jim O'Regan * add an export Signed-off-by: Jim O'Regan * change integer graph Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * move spaces Signed-off-by: Jim O'Regan * use cdrewrite Signed-off-by: Jim O'Regan * just EOS/BOS Signed-off-by: Jim O'Regan * fix typo Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Jim O'Regan * omit en/ett, because they are also articles Signed-off-by: Jim O'Regan * uncomment Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * unused Signed-off-by: Jim O'Regan * strip spaces from decimal part Signed-off-by: Jim O'Regan * export Signed-off-by: Jim O'Regan * partial fix, not what I wanted Signed-off-by: Jim O'Regan * move comment Signed-off-by: Jim O'Regan * en/ett cannot work in itn case Signed-off-by: Jim O'Regan * be more deliberate in graph construction Signed-off-by: Jim O'Regan * accept both Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * +2 tests Signed-off-by: Jim O'Regan * (try to) accept singular quantities for plurals Signed-off-by: Jim O'Regan * retry Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * oops Signed-off-by: Jim O'Regan * replace Signed-off-by: Jim O'Regan * arcmap Signed-off-by: Jim O'Regan * version without ones Signed-off-by: Jim O'Regan * add another test Signed-off-by: Jim O'Regan * change graph Signed-off-by: Jim O'Regan * simplify Signed-off-by: Jim O'Regan * get rid of this, this is where it goes wrong Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * more tests Signed-off-by: Jim O'Regan * add a test Signed-off-by: Jim O'Regan * multiple states from both ones, try removing and readding Signed-off-by: Jim O'Regan * remove ones, see if that fixes at least the bare quantities Signed-off-by: Jim O'Regan * works in the repl, dunno why it still breaks Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove duplicate Signed-off-by: Jim O'Regan * move definition Signed-off-by: Jim O'Regan * simplify Signed-off-by: Jim O'Regan * tweak Signed-off-by: Jim O'Regan * another test Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * local declaration, seems to not be working Signed-off-by: Jim O'Regan * more tests Signed-off-by: Jim O'Regan * match verbaliser Signed-off-by: Jim O'Regan * fix last two failing tests Signed-off-by: Jim O'Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add missing tests for telephone and word Signed-off-by: Jim O'Regan * remove unused variable Signed-off-by: Jim O'Regan * remove unused imports Signed-off-by: Jim O'Regan * fix comment Signed-off-by: Jim O'Regan * get rid of convert_space, tests fail Signed-off-by: Jim O'Regan * put convert_spaces back, change test file; pytest fails Signed-off-by: Jim O'Regan * Revert "put convert_spaces back, change test file; pytest fails" This reverts commit a7bb7489137b8026aab02aff64df39e874630043. Signed-off-by: Jim O'Regan * put convert_spaces back, change test file; pytest fails, take 2 Signed-off-by: Jim O'Regan * deliberately remove spaces rather than have a non-determinism that comes out differently in sparrowhawk Signed-off-by: Jim O'Regan * try converting the non-breaking spaces in the shell script Signed-off-by: Jim O'Regan * wrong place Signed-off-by: Jim O'Regan * fix typo Signed-off-by: Jim O'Regan * fix path Signed-off-by: Jim O'Regan * export Signed-off-by: Jim O'Regan * export Signed-off-by: Jim O'Regan * remove unused Signed-off-by: Jim O'Regan * Update date.py Signed-off-by: Jim O’Regan * Update time.py Signed-off-by: Jim O’Regan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix comment Signed-off-by: Jim O’Regan * trim comments Signed-off-by: Jim O’Regan * remove commented line Signed-off-by: Jim O’Regan * en halv Signed-off-by: Jim O’Regan * Update test_sparrowhawk_inverse_text_normalization.sh Signed-off-by: Jim O’Regan --------- Signed-off-by: Jim O'Regan Signed-off-by: Jim O’Regan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Italian_TN (#67) * add TN italian Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix init Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix LOCATION Signed-off-by: GiacomoLeoneMaria * modify graph_utils Signed-off-by: GiacomoLeoneMaria * correct decimals Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix electronic Signed-off-by: Giacomo Cavallini * fix electronic Signed-off-by: Giacomo Cavallini * fix measure Signed-off-by: Giacomo Cavallini --------- Signed-off-by: GiacomoLeoneMaria Signed-off-by: Giacomo Cavallini Signed-off-by: Mariana <47233618+mgrafu@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mariana <47233618+mgrafu@users.noreply.github.com> Signed-off-by: Alex Cui * Zh itn (#74) * Add ZH ITN Signed-off-by: Anand Joseph * Fix copyrights and code cleanup Signed-off-by: Anand Joseph * Remove invalid tests Signed-off-by: Anand Joseph * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Resolve CodeQL issues Signed-off-by: Anand Joseph * Cleanup Signed-off-by: Anand Joseph * Fix missing 'zh' option for ITN and correct comment Signed-off-by: Anand Joseph * Update __init__.py Change to zh instead of en for the imports. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update for decimal test data Signed-off-by: BuyuanCui * update for langauge import Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update for Chinese punctuations Signed-off-by: BuyuanCui * a new class for whitelist Signed-off-by: BuyuanCui * PYNINI_AVAILABLE = False Signed-off-by: BuyuanCui * recreated due to file import format issue Signed-off-by: BuyuanCui * recreated due to format issue Signed-off-by: BuyuanCui * caught duplicates, removed Signed-off-by: BuyuanCui * removed duplicates, arranges for CHInese Yuan updates Signed-off-by: BuyuanCui * updates accordingly to the comments from last PR. Recreated some of the files due to format issues Signed-off-by: BuyuanCui * removed the hours_to and minute_to files used for back counting. ALso removed am and pm suffix files according to the last PR. Recreated some of them for format issue Signed-off-by: BuyuanCui * re-added this file to avoid data file import error Signed-off-by: BuyuanCui * updated gramamr according to last PR. Removed the acceptance of 千 Signed-off-by: BuyuanCui * updates Signed-off-by: BuyuanCui * updated according to last PR. Removed comma after decimal points Signed-off-by: BuyuanCui * gramamr for Fraction Signed-off-by: BuyuanCui * gramamr for money and updated according to last PR. Plus process of 元 Signed-off-by: BuyuanCui * ordinal grammar. updates due to the updates in cardinal grammar Signed-off-by: BuyuanCui * updated accordingly to last PR comments. removing am and pm and allowing simple mandarin expression Signed-off-by: BuyuanCui * arrangements Signed-off-by: BuyuanCui * added whitelist grammar Signed-off-by: BuyuanCui * word grammar for non-classified items Signed-off-by: BuyuanCui * updated cardinal, decimal, time, itn data Signed-off-by: BuyuanCui * updates according to last PR Signed-off-by: BuyuanCui * updates according to the updates for cardinal grammar Signed-off-by: BuyuanCui * updates for more Mandarin punctuations Signed-off-by: BuyuanCui * updated accordingly to last PR. removing am pm Signed-off-by: BuyuanCui * adjustment on the weight Signed-off-by: BuyuanCui * updated accordingly to the targger updates Signed-off-by: BuyuanCui * updated accordingly to the time tagger Signed-off-by: BuyuanCui * updates according to changes in tagger on am and pm Signed-off-by: BuyuanCui * verbalizer for fraction Signed-off-by: BuyuanCui * added for mandarin grammar Signed-off-by: BuyuanCui * kept this file because using English utils results in data namin error Signed-off-by: BuyuanCui * merge conflict Signed-off-by: BuyuanCui * removed unsed imports Signed-off-by: BuyuanCui * deleted unsed import os Signed-off-by: BuyuanCui * deleted unsed variables Signed-off-by: BuyuanCui * removed unsed imports Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates and edits based on pr checks Signed-off-by: BuyuanCui * updates and edits based on pr checks Signed-off-by: BuyuanCui * format issue, reccreated Signed-off-by: BuyuanCui * format issue recreated Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed codeing style/format Signed-off-by: BuyuanCui * fixed coding style and format Signed-off-by: BuyuanCui * removed duplicated graph for 毛 Signed-off-by: BuyuanCui * removed the comment Signed-off-by: BuyuanCui * removed the comment Signed-off-by: BuyuanCui * removing unnecessary comments Signed-off-by: BuyuanCui * unnecessary comment removed Signed-off-by: BuyuanCui * test file updated for more cases Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated with a comment explaining why this file is kept Signed-off-by: BuyuanCui * updated the file explaining why this file is kept Signed-off-by: BuyuanCui * added Mandarin as zh Signed-off-by: BuyuanCui * removing for dplication Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removed unused NEMO objects Signed-off-by: BuyuanCui * removed duplicates Signed-off-by: BuyuanCui * removing unsed imports Signed-off-by: BuyuanCui * updates to fix test file failures Signed-off-by: BuyuanCui * updates to fix file failtures Signed-off-by: BuyuanCui * updates to resolve test case failture Signed-off-by: BuyuanCui * updates to resolve test case failure Signed-off-by: BuyuanCui * updates to resolve test case failure Signed-off-by: BuyuanCui * updates to resolve test case failure Signed-off-by: BuyuanCui * updates to adap to cardinal grammar changes Signed-off-by: BuyuanCui * updates to adapt to grammar changes Signed-off-by: BuyuanCui * updates to adopt to cardinal grammar changes Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix style Signed-off-by: BuyuanCui * fix style Signed-off-by: BuyuanCui * fix style Signed-off-by: BuyuanCui * fix style Signed-off-by: BuyuanCui * fixing pr checks Signed-off-by: BuyuanCui * removed // for zhtn/itn cache Signed-off-by: BuyuanCui * Update inverse_normalize.py Added zh as a selection to pass Jenkins checks. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> --------- Signed-off-by: Anand Joseph Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> Signed-off-by: BuyuanCui Co-authored-by: Alex Cui Co-authored-by: Anand Joseph Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * updated pynini_export.py file to create far files (#88) Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * readd Swedish (#87) Signed-off-by: Jim O'Regan Signed-off-by: Alex Cui * Zh tn 0712 (#89) * updates Signed-off-by: BuyuanCui * updates and fixings according to document on natonal gideline Signed-off-by: BuyuanCui * Decimal grammar added Signed-off-by: BuyuanCui * fraction updated Signed-off-by: BuyuanCui * money updated Signed-off-by: BuyuanCui * ordinal grammar added Signed-off-by: BuyuanCui * punctuation grammar added Signed-off-by: BuyuanCui * time gramamr updated Signed-off-by: BuyuanCui * tokenizaer updated Signed-off-by: BuyuanCui * updates on certificate Signed-off-by: BuyuanCui * data updated and added due to updates and chanegs to the existing grammar Signed-off-by: BuyuanCui * cardinal updated Signed-off-by: BuyuanCui * date grammar changed Signed-off-by: BuyuanCui * decimal grammar added Signed-off-by: BuyuanCui * grammar updated Signed-off-by: BuyuanCui * grammar updated Signed-off-by: BuyuanCui * grammar added Signed-off-by: BuyuanCui * grammar updates Signed-off-by: BuyuanCui * test data added Signed-off-by: BuyuanCui * test python file edits Signed-off-by: BuyuanCui * updates for tn1.0 and previous tn grammar from contribution Signed-off-by: BuyuanCui * test cases updated Signed-off-by: BuyuanCui * coding style fixed Signed-off-by: BuyuanCui * dates updated for init files Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated the date for zh Signed-off-by: BuyuanCui * removed unsed imports Signed-off-by: BuyuanCui * removed comments Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added back the itn tests Signed-off-by: BuyuanCui * added back measure and math from previou TN Signed-off-by: BuyuanCui * updated for tests reruns Signed-off-by: BuyuanCui * updats Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated weights Signed-off-by: BuyuanCui --------- Signed-off-by: BuyuanCui Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Zh tn char (#95) * file name change Signed-off-by: BuyuanCui * file name change Signed-off-by: BuyuanCui * file name change Signed-off-by: BuyuanCui * file name change Signed-off-by: BuyuanCui * file name change Signed-off-by: BuyuanCui * file name Signed-off-by: BuyuanCui * file name Signed-off-by: BuyuanCui * file name Signed-off-by: BuyuanCui * file name Signed-off-by: BuyuanCui * file name Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * code stle Signed-off-by: BuyuanCui * fixed import error Signed-off-by: BuyuanCui --------- Signed-off-by: BuyuanCui Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * audio-based TN fix for empty pred_text/text (#92) * fix for empty pred_text Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add unittests Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix path Signed-off-by: Evelina * fix path Signed-off-by: Evelina * fix pytest Signed-off-by: Evelina --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * pip 1.2.0 Signed-off-by: Evelina Signed-off-by: Alex Cui * French tn (#91) * add tests for fr tn Signed-off-by: Mariana Graterol Fuenmayor * add fr tn for cardinals, decimals, fractions and ordinals Signed-off-by: Mariana Graterol Fuenmayor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * delete it far files from tools Signed-off-by: Mariana Graterol Fuenmayor * add languages to run_evaluate Signed-off-by: Mariana Graterol Fuenmayor * remove ambiguous spacing Signed-off-by: Mariana Graterol Fuenmayor * enable sh testing for fr tn Signed-off-by: Mariana Graterol Fuenmayor * fix bug with ordinals Signed-off-by: Mariana Graterol Fuenmayor * update jenkinsfile cache date Signed-off-by: Mariana Graterol Fuenmayor * fix test for ordinals Signed-off-by: Mariana Graterol Fuenmayor * update tn cache for fr Signed-off-by: Mariana Graterol Fuenmayor * resolve codeql issues Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Add whitelist_tech.tsv (#96) Signed-off-by: Anand Joseph Signed-off-by: Alex Cui * Zhitn 0727 (#93) * updates on itn grammar to pass sparrowhawk tests Signed-off-by: BuyuanCui * updats for sparrowhawk tests Signed-off-by: BuyuanCui * updates fro sparrowhawk tests Signed-off-by: BuyuanCui * coding style fix Signed-off-by: BuyuanCui * updates for coding style and sparrowhawk test Signed-off-by: BuyuanCui * updated classes for tests on whitelist and word grammar Signed-off-by: BuyuanCui * added for tests on whitelist Signed-off-by: BuyuanCui * added for test on word Signed-off-by: BuyuanCui * added to run test on whitelist Signed-off-by: BuyuanCui * added to run test on word Signed-off-by: BuyuanCui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update test_word.py Removed unused import. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_word.py Removed imports according to CodeQL Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_whitelist.py Removing imports according to CodeQL Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_whitelist.py Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update Jenkinsfile changed zh cache to 07-27-23 as it is the latest update. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> --------- Signed-off-by: BuyuanCui Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Es tn romans fix (#98) * fix es tn roman exceptions Signed-off-by: Mariana Graterol Fuenmayor * update jenkinsfile Signed-off-by: Mariana Graterol Fuenmayor * update eval script for ITN Signed-off-by: Mariana Graterol Fuenmayor * codeql fix Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * Change docker image (#102) Change docker image to one including sparrowhawk Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Alex Cui * Print warning instead exception (#97) * raise text Signed-off-by: Nikolay Karpov * text arg Signed-off-by: Nikolay Karpov * Failed text Signed-off-by: Nikolay Karpov * add logger Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rm raise Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * logger Signed-off-by: Nikolay Karpov * NeMo-text-processing Signed-off-by: Nikolay Karpov * info level Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rm raise Signed-off-by: Nikolay Karpov * verbose Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Normalizer.select_verbalizer Signed-off-by: Nikolay Karpov * Exception Signed-off-by: Nikolay Karpov * verbose Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * restart ci Signed-off-by: Evelina --------- Signed-off-by: Nikolay Karpov Signed-off-by: Nikolay Karpov Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Nikolay Karpov Co-authored-by: Evelina Signed-off-by: Alex Cui * warning regardless of verbose flag (#107) * warning Signed-off-by: Nikolay Karpov * self.verbose Signed-off-by: Nikolay Karpov --------- Signed-off-by: Nikolay Karpov Signed-off-by: Alex Cui * Unpin setuptools (#106) Signed-off-by: Peter Plantinga Signed-off-by: Alex Cui * fixed warnings: File is not always closes. (#113) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Alex Cui * fix bug #111 (ar currencies) (#117) * fix bug #111 (ar currencies) Signed-off-by: Mariana Graterol Fuenmayor * update ci folder Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * Logging clean up + IT TN fix (#118) * fix utils and it TN Signed-off-by: Evelina * clean up Signed-off-by: Evelina * fix logging Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix format Signed-off-by: Evelina * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix format Signed-off-by: Evelina * fix format Signed-off-by: Evelina * add IT TN to CI Signed-off-by: Evelina * update patch Signed-off-by: Evelina --------- Signed-off-by: Evelina Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * Time_IT_TN (#105) * add time verbalizer Signed-off-by: GiacomoLeoneMaria * add time tagger and verba Signed-off-by: GiacomoLeoneMaria * add pytest time Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * codeQL Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix numbers with eight Signed-off-by: GiacomoLeoneMaria --------- Signed-off-by: GiacomoLeoneMaria Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * IT TN improvement on tests (#120) * add missing test cases Signed-off-by: Mariana Graterol Fuenmayor * fix bug with time tests Signed-off-by: Mariana Graterol Fuenmayor * update ci date Signed-off-by: Mariana Graterol Fuenmayor * add sentence test cases Signed-off-by: Mariana Graterol Fuenmayor * refine shortest path for irregular cardinals Signed-off-by: Mariana Graterol Fuenmayor * update ci date Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * add single letter exception for roman numerals (#121) * add single letter exception for roman numerals Signed-off-by: Mariana Graterol Fuenmayor * update ci dir Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui * rewrote tokenizer Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * removed the file and replaced it with char in 1.8 Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * jenkins file update Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * to fix tn bug@ xuesong Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * tn bug Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Alex Cui * fixeds and updates Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Alex Cui * adjustments Signed-off-by: BuyuanCui Signed-off-by: Alex Cui * testing commit Signed-off-by: Alex Cui * removing unsed file Signed-off-by: Alex Cui * updated test cases Signed-off-by: Alex Cui * updating etst cases Signed-off-by: Alex Cui * updates adapting to graphs Signed-off-by: Alex Cui * updated cases for SH tests Signed-off-by: Alex Cui * updated cases Signed-off-by: Alex Cui * added some sentences Signed-off-by: Alex Cui * test cases update Signed-off-by: Alex Cui * solving rebase issue, repushing changes Signed-off-by: Alex Cui * resolving conflict Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixings according to ci Signed-off-by: Alex Cui * fixings according to the ci Signed-off-by: Alex Cui * removed not used Signed-off-by: Alex Cui * notused removing Signed-off-by: Alex Cui * format issue Signed-off-by: Alex Cui * formt issue Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removing unused files Signed-off-by: Alex Cui * removing unused files Signed-off-by: Alex Cui * remiving unsed files; Signed-off-by: Alex Cui * removing unsed files Signed-off-by: Alex Cui * removing unsed files Signed-off-by: Alex Cui * added sentences as test cases Signed-off-by: Alex Cui * added senetnces as test cases Signed-off-by: Alex Cui * removed commentyed out tests Signed-off-by: Alex Cui * updating dates Signed-off-by: Alex Cui * attemps to fix bug Signed-off-by: Alex Cui * inprocess of fixing the bug Signed-off-by: Alex Cui * fixing existing issue Signed-off-by: Alex Cui * updated graph_utils, tokenize and classify, and word graphs Signed-off-by: Alex Cui * added bacl the ppostprocessor far creation Signed-off-by: Alex Cui * updated NEMO_NOT_ALPHA as a new variable Signed-off-by: Alex Cui * far files Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * combiedn into measure Signed-off-by: Alex Cui * removing and combined to meaasure Signed-off-by: Alex Cui * removing, not used Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates to fix space issue Signed-off-by: Alex Cui * updates to fix space issue Signed-off-by: Alex Cui * updates to fix space issue Signed-off-by: Alex Cui * updates to solve the space issue Signed-off-by: Alex Cui * resolving sh issue Signed-off-by: Alex Cui * resolving sh test issue Signed-off-by: Alex Cui * adding anands updates Signed-off-by: Alex Cui * data updated for measure and whitelist Signed-off-by: Alex Cui * updates Signed-off-by: Alex Cui * updates Signed-off-by: Alex Cui * updates Signed-off-by: Alex Cui * removing fraction and math part Signed-off-by: Alex Cui * removing comments Signed-off-by: Alex Cui * removing preprocessor, updating measure, adding shitelist cases Signed-off-by: Alex Cui * removing processor, modification for sp test, shitelist and word Signed-off-by: Alex Cui * updating zh date Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * realized itn being cvommented out, adding back Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * trying to run zh tn separately because it takes long time to run Signed-off-by: Alex Cui * modification to ru zh tn separately Signed-off-by: Alex Cui * independent zh tnitn tests for more time Signed-off-by: Alex Cui * adding lines to save far file Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates for reducing testing time Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * for ounct graph Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removing used graphs Signed-off-by: Alex Cui * format and removing used comments Signed-off-by: Alex Cui * removing this one, not used Signed-off-by: Alex Cui * remove unused commentss Signed-off-by: Alex Cui * removing unsed comments Signed-off-by: Alex Cui * removing unsed comments Signed-off-by: Alex Cui * removing comments Signed-off-by: Alex Cui * Delete tools/text_processing_deployment/zh directory Removing far files. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * updates according to the github comments Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removing comments Signed-off-by: Alex Cui * punct grammar Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update test_cases_cardinal.txt Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update Dockerfile Copied from main branch ( which included Anand's updates) Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update launch.sh Found differences in the file. Fixing it back. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_word.py Saw word ITN being commented out. Adding it back. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update money.py Found cardinal grammar not accepting suffix. Fixed it. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update Jenkinsfile Removed duplicated zh test from line 230s Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update utils.py Addressing bug raised in bug in graph_utils.py of zh ITN and decimal tagger of ar TN #162. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update graph_utils.py Addressing bug in graph_utils.py of zh ITN and decimal tagger of ar TN #162. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update measure.py Fixing code style, removing unused imports Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update word.py Fixing code style, removing unused imports Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update measure.py Removing unused import. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update post_processing.py Removing unused imports Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update post_processing.py Removing unused import Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update word.py Removing unused imports Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update cardinal.py Deleting unused graph Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update word.py Removing import pynini Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update word.py removing pynini import Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update verbalize.py removing pynutil import Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update post_processing.py removing punct graph imported Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_sparrowhawk_normalization.sh Update on test issue for Docker file locations Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update test_ordinal.py Fixing style. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Delete nemo_text_processing/text_normalization/zh/taggers/math_symbol.py Removing because it's not one of the semiotic classes. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Delete nemo_text_processing/text_normalization/zh/verbalizers/math_symbol.py Removing because it's not one of the semiotic classes. Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> * Update Jenkinsfile Updating Jenkins date Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> --------- Signed-off-by: Jim O'Regan Signed-off-by: Jim O’Regan Signed-off-by: Alex Cui Signed-off-by: Enno Hermann Signed-off-by: Vitaly Lavrukhin Signed-off-by: ealbasiri Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Evelina Signed-off-by: Anand Joseph Signed-off-by: Linnea Pari Leaver Signed-off-by: GiacomoLeoneMaria Signed-off-by: Giacomo Cavallini Signed-off-by: Mariana <47233618+mgrafu@users.noreply.github.com> Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> Signed-off-by: BuyuanCui Signed-off-by: Nikolay Karpov Signed-off-by: Nikolay Karpov Signed-off-by: Peter Plantinga Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Jim O’Regan Co-authored-by: Enno Hermann Co-authored-by: Vitaly Lavrukhin Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Enas Albasiri <71229149+ealbasiri@users.noreply.github.com> Co-authored-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Co-authored-by: Mariana <47233618+mgrafu@users.noreply.github.com> Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com> Co-authored-by: lleaver <137942999+lleaver@users.noreply.github.com> Co-authored-by: Linnea Pari Leaver Co-authored-by: Jim O’Regan Co-authored-by: Giacomo Leone Maria Cavallini <72698188+GiacomoLeoneMaria@users.noreply.github.com> Co-authored-by: Alex Cui Co-authored-by: Anand Joseph Co-authored-by: Evelina Co-authored-by: Nikolay Karpov Co-authored-by: Nikolay Karpov Co-authored-by: Peter Plantinga Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Alex Cui * Tts en tech terms (#167) * update tts whitelist Signed-off-by: Mariana Graterol Fuenmayor * enable normalization of emphasized input Signed-off-by: Mariana Graterol Fuenmayor * add whitelist terms Signed-off-by: Mariana Graterol Fuenmayor * add test for emphasis Signed-off-by: Mariana Graterol Fuenmayor * read card numbers as digits Signed-off-by: Mariana Graterol Fuenmayor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * make ccs deterministic Signed-off-by: Mariana Graterol Fuenmayor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update jenkins Signed-off-by: Mariana Graterol Fuenmayor * fix sh tests bug Signed-off-by: Mariana Graterol Fuenmayor * fix bug with time Signed-off-by: Mariana Graterol Fuenmayor * update jenkins Signed-off-by: Mariana Graterol Fuenmayor * fix sh time bug Signed-off-by: Mariana Graterol Fuenmayor --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Mariana <47233618+mgrafu@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Alex Cui * testing Signed-off-by: Alex Cui * removing test.txt Signed-off-by: Alex Cui * fixing zh tn money curreny on l Signed-off-by: Alex Cui * resolving failed ci tests for money grammar Signed-off-by: Alex Cui * updates for decimal maoney failure Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates on money grammar for failure cases Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * renaming final graphs Signed-off-by: Alex Cui * conflicts Signed-off-by: Alex Cui * updating data Signed-off-by: Alex Cui * attempt to resolve jenkins issue Signed-off-by: Alex Cui * ci tests resolving Signed-off-by: Alex Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * resolving conflict Signed-off-by: Alex Cui * Jenkins test not starting, copied form main branch Signed-off-by: Alex Cui * copied from Nemo main, esolving Jenkins isue Signed-off-by: Alex Cui * copied from NeMo main, resolving Jenkins issue Signed-off-by: Alex Cui --------- Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Alex Cui Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Mariana Graterol Fuenmayor Signed-off-by: Anand Joseph Signed-off-by: Evelina Signed-off-by: David Sargsyan Signed-off-by: Ara Yeroyan <60027241+Ara-Yeroyan@users.noreply.github.com> Signed-off-by: tbartley94 <90423858+tbartley94@users.noreply.github.com> Signed-off-by: Chinmay Patil Signed-off-by: Travis Bartley Signed-off-by: Sasha Meister Signed-off-by: Kevin Sanders Signed-off-by: Jim O'Regan Signed-off-by: Jim O’Regan Signed-off-by: Enno Hermann Signed-off-by: Vitaly Lavrukhin Signed-off-by: ealbasiri Signed-off-by: Linnea Pari Leaver Signed-off-by: GiacomoLeoneMaria Signed-off-by: Giacomo Cavallini Signed-off-by: Mariana <47233618+mgrafu@users.noreply.github.com> Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com> Signed-off-by: BuyuanCui Signed-off-by: Nikolay Karpov Signed-off-by: Nikolay Karpov Signed-off-by: Peter Plantinga Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Simon Zuberek Co-authored-by: Mariana <47233618+mgrafu@users.noreply.github.com> Co-authored-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: David Sargsyan <66821320+davidks13@users.noreply.github.com> Co-authored-by: David Sargsyan Co-authored-by: Ara Yeroyan <60027241+Ara-Yeroyan@users.noreply.github.com> Co-authored-by: tbartley94 <90423858+tbartley94@users.noreply.github.com> Co-authored-by: Chinmay Patil <72211393+ChinmayPatil11@users.noreply.github.com> Co-authored-by: Travis Bartley Co-authored-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Co-authored-by: kevsan4 <65792419+kevsan4@users.noreply.github.com> Co-authored-by: Jim O’Regan Co-authored-by: Enno Hermann Co-authored-by: Vitaly Lavrukhin Co-authored-by: Enas Albasiri <71229149+ealbasiri@users.noreply.github.com> Co-authored-by: lleaver <137942999+lleaver@users.noreply.github.com> Co-authored-by: Linnea Pari Leaver Co-authored-by: Jim O’Regan Co-authored-by: Giacomo Leone Maria Cavallini <72698188+GiacomoLeoneMaria@users.noreply.github.com> Co-authored-by: Alex Cui Co-authored-by: Anand Joseph Co-authored-by: Evelina Co-authored-by: Nikolay Karpov Co-authored-by: Nikolay Karpov Co-authored-by: Peter Plantinga Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Simon Zuberek Co-authored-by: Simon Zuberek Signed-off-by: Alex Cui --- .../ja/verbalizers/word.py | 14 ++++++++++++++ .../text_normalization/en/taggers/electronic.py | 16 ++++++++++++++++ .../text_normalization/zh/taggers/money.py | 6 +++++- .../export_grammars.sh | 6 +++++- .../text_processing_deployment/pynini_export.py | 6 ++++++ 5 files changed, 46 insertions(+), 2 deletions(-) diff --git a/nemo_text_processing/inverse_text_normalization/ja/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ja/verbalizers/word.py index cfebf22a0..a4046328e 100644 --- a/nemo_text_processing/inverse_text_normalization/ja/verbalizers/word.py +++ b/nemo_text_processing/inverse_text_normalization/ja/verbalizers/word.py @@ -1,4 +1,7 @@ <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,15 +25,24 @@ class WordFst(GraphFst): ''' +<<<<<<< HEAD tokens { name: "一" } -> 一 ''' def __init__(self, deterministic: bool = True, lm: bool = False): super().__init__(name="word", kind="verbalize", deterministic=deterministic) +======= + tokens { char: "一" } -> 一 + ''' + + def __init__(self, deterministic: bool = True, lm: bool = False): + super().__init__(name="char", kind="verbalize", deterministic=deterministic) +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) graph = pynutil.delete("name: \"") + NEMO_NOT_QUOTE + pynutil.delete("\"") graph = pynini.closure(delete_space) + graph + pynini.closure(delete_space) self.fst = graph.optimize() +<<<<<<< HEAD ======= # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # @@ -65,3 +77,5 @@ def __init__(self, deterministic: bool = True, lm: bool = False): graph = pynini.closure(delete_space) + graph + pynini.closure(delete_space) self.fst = graph.optimize() >>>>>>> 0a4a21c (Jp itn 20240221 (#141)) +======= +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) diff --git a/nemo_text_processing/text_normalization/en/taggers/electronic.py b/nemo_text_processing/text_normalization/en/taggers/electronic.py index 2e4610adf..0205dc202 100644 --- a/nemo_text_processing/text_normalization/en/taggers/electronic.py +++ b/nemo_text_processing/text_normalization/en/taggers/electronic.py @@ -51,7 +51,11 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True): cc_cues = pynutil.add_weight(pynini.string_file(get_abs_path("data/electronic/cc_cues.tsv")), MIN_NEG_WEIGHT,) +<<<<<<< HEAD cc_cues = pynutil.add_weight(pynini.string_file(get_abs_path("data/electronic/cc_cues.tsv")), MIN_NEG_WEIGHT,) +======= + cc_cues = pynutil.add_weight(pynini.string_file(get_abs_path("data/electronic/cc_cues.tsv")), MIN_NEG_WEIGHT) +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) accepted_symbols = pynini.project(pynini.string_file(get_abs_path("data/electronic/symbol.tsv")), "input") @@ -163,6 +167,18 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True): ) graph |= cc_phrases + if deterministic: + # credit card cues + numbers = pynini.closure(NEMO_DIGIT, 4, 16) + cc_phrases = ( + pynutil.insert("protocol: \"") + + cc_cues + + pynutil.insert("\" domain: \"") + + numbers + + pynutil.insert("\"") + ) + graph |= cc_phrases + final_graph = self.add_tokens(graph) self.fst = final_graph.optimize() \ No newline at end of file diff --git a/nemo_text_processing/text_normalization/zh/taggers/money.py b/nemo_text_processing/text_normalization/zh/taggers/money.py index 44fee1f75..0b21c8ac0 100644 --- a/nemo_text_processing/text_normalization/zh/taggers/money.py +++ b/nemo_text_processing/text_normalization/zh/taggers/money.py @@ -136,4 +136,8 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True, lm: bool = Fa ) final_graph = self.add_tokens(graph) - self.fst = final_graph.optimize() \ No newline at end of file +<<<<<<< HEAD + self.fst = final_graph.optimize() +======= + self.fst = final_graph.optimize() +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) diff --git a/tools/text_processing_deployment/export_grammars.sh b/tools/text_processing_deployment/export_grammars.sh index 017472ae9..c30dcb3d9 100644 --- a/tools/text_processing_deployment/export_grammars.sh +++ b/tools/text_processing_deployment/export_grammars.sh @@ -106,4 +106,8 @@ if [[ ${MODE} == "test" ]] || [[ ${MODE} == "interactive" ]]; then else echo "done mode: $MODE" exit 0 -fi \ No newline at end of file +<<<<<<< HEAD +fi +======= +fi +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py index da883db71..3bf8757d9 100644 --- a/tools/text_processing_deployment/pynini_export.py +++ b/tools/text_processing_deployment/pynini_export.py @@ -290,10 +290,14 @@ def parse_args(): VerbalizeFst as ITNVerbalizeFst, ) <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) from nemo_text_processing.text_normalization.hy.taggers.tokenize_and_classify import ( ClassifyFst as TNClassifyFst, ) from nemo_text_processing.text_normalization.hy.verbalizers.verbalize import VerbalizeFst as TNVerbalizeFst +<<<<<<< HEAD elif args.language == 'rw': from nemo_text_processing.text_normalization.rw.taggers.tokenize_and_classify import ( ClassifyFst as TNClassifyFst, @@ -301,6 +305,8 @@ def parse_args(): from nemo_text_processing.text_normalization.rw.verbalizers.verbalize import VerbalizeFst as TNVerbalizeFst ======= >>>>>>> 0a4a21c (Jp itn 20240221 (#141)) +======= +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) output_dir = os.path.join(args.output_dir, f"{args.language}_{args.grammars}_{args.input_case}") export_grammars( output_dir=output_dir,