diff --git a/nemo_text_processing/inverse_text_normalization/ja/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ja/verbalizers/word.py index cfebf22a0..a4046328e 100644 --- a/nemo_text_processing/inverse_text_normalization/ja/verbalizers/word.py +++ b/nemo_text_processing/inverse_text_normalization/ja/verbalizers/word.py @@ -1,4 +1,7 @@ <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,15 +25,24 @@ class WordFst(GraphFst): ''' +<<<<<<< HEAD tokens { name: "一" } -> 一 ''' def __init__(self, deterministic: bool = True, lm: bool = False): super().__init__(name="word", kind="verbalize", deterministic=deterministic) +======= + tokens { char: "一" } -> 一 + ''' + + def __init__(self, deterministic: bool = True, lm: bool = False): + super().__init__(name="char", kind="verbalize", deterministic=deterministic) +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) graph = pynutil.delete("name: \"") + NEMO_NOT_QUOTE + pynutil.delete("\"") graph = pynini.closure(delete_space) + graph + pynini.closure(delete_space) self.fst = graph.optimize() +<<<<<<< HEAD ======= # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # @@ -65,3 +77,5 @@ def __init__(self, deterministic: bool = True, lm: bool = False): graph = pynini.closure(delete_space) + graph + pynini.closure(delete_space) self.fst = graph.optimize() >>>>>>> 0a4a21c (Jp itn 20240221 (#141)) +======= +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) diff --git a/nemo_text_processing/text_normalization/en/taggers/electronic.py b/nemo_text_processing/text_normalization/en/taggers/electronic.py index 2e4610adf..0205dc202 100644 --- a/nemo_text_processing/text_normalization/en/taggers/electronic.py +++ b/nemo_text_processing/text_normalization/en/taggers/electronic.py @@ -51,7 +51,11 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True): cc_cues = pynutil.add_weight(pynini.string_file(get_abs_path("data/electronic/cc_cues.tsv")), MIN_NEG_WEIGHT,) +<<<<<<< HEAD cc_cues = pynutil.add_weight(pynini.string_file(get_abs_path("data/electronic/cc_cues.tsv")), MIN_NEG_WEIGHT,) +======= + cc_cues = pynutil.add_weight(pynini.string_file(get_abs_path("data/electronic/cc_cues.tsv")), MIN_NEG_WEIGHT) +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) accepted_symbols = pynini.project(pynini.string_file(get_abs_path("data/electronic/symbol.tsv")), "input") @@ -163,6 +167,18 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True): ) graph |= cc_phrases + if deterministic: + # credit card cues + numbers = pynini.closure(NEMO_DIGIT, 4, 16) + cc_phrases = ( + pynutil.insert("protocol: \"") + + cc_cues + + pynutil.insert("\" domain: \"") + + numbers + + pynutil.insert("\"") + ) + graph |= cc_phrases + final_graph = self.add_tokens(graph) self.fst = final_graph.optimize() \ No newline at end of file diff --git a/nemo_text_processing/text_normalization/zh/taggers/money.py b/nemo_text_processing/text_normalization/zh/taggers/money.py index 44fee1f75..0b21c8ac0 100644 --- a/nemo_text_processing/text_normalization/zh/taggers/money.py +++ b/nemo_text_processing/text_normalization/zh/taggers/money.py @@ -136,4 +136,8 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True, lm: bool = Fa ) final_graph = self.add_tokens(graph) - self.fst = final_graph.optimize() \ No newline at end of file +<<<<<<< HEAD + self.fst = final_graph.optimize() +======= + self.fst = final_graph.optimize() +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) diff --git a/tools/text_processing_deployment/export_grammars.sh b/tools/text_processing_deployment/export_grammars.sh index 017472ae9..c30dcb3d9 100644 --- a/tools/text_processing_deployment/export_grammars.sh +++ b/tools/text_processing_deployment/export_grammars.sh @@ -106,4 +106,8 @@ if [[ ${MODE} == "test" ]] || [[ ${MODE} == "interactive" ]]; then else echo "done mode: $MODE" exit 0 -fi \ No newline at end of file +<<<<<<< HEAD +fi +======= +fi +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py index da883db71..3bf8757d9 100644 --- a/tools/text_processing_deployment/pynini_export.py +++ b/tools/text_processing_deployment/pynini_export.py @@ -290,10 +290,14 @@ def parse_args(): VerbalizeFst as ITNVerbalizeFst, ) <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) from nemo_text_processing.text_normalization.hy.taggers.tokenize_and_classify import ( ClassifyFst as TNClassifyFst, ) from nemo_text_processing.text_normalization.hy.verbalizers.verbalize import VerbalizeFst as TNVerbalizeFst +<<<<<<< HEAD elif args.language == 'rw': from nemo_text_processing.text_normalization.rw.taggers.tokenize_and_classify import ( ClassifyFst as TNClassifyFst, @@ -301,6 +305,8 @@ def parse_args(): from nemo_text_processing.text_normalization.rw.verbalizers.verbalize import VerbalizeFst as TNVerbalizeFst ======= >>>>>>> 0a4a21c (Jp itn 20240221 (#141)) +======= +>>>>>>> 8a7e28e (Zh tn bug 240712 (#187)) output_dir = os.path.join(args.output_dir, f"{args.language}_{args.grammars}_{args.input_case}") export_grammars( output_dir=output_dir,