Skip to content

Commit

Permalink
Added init.py files and removed unused commented lines
Browse files Browse the repository at this point in the history
Signed-off-by: Namrata Gachchi <[email protected]>
  • Loading branch information
ngachchi committed Nov 5, 2024
1 parent 65d3c12 commit 9c49578
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 10 deletions.
13 changes: 13 additions & 0 deletions nemo_text_processing/text_normalization/hi/data/date/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
13 changes: 13 additions & 0 deletions nemo_text_processing/text_normalization/hi/data/money/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
13 changes: 13 additions & 0 deletions nemo_text_processing/text_normalization/hi/data/time/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class CardinalFst(GraphFst):
"""
Finite state transducer for classifying cardinals, e.g.
-२३ -> cardinal { negative: "true" integer: "तेइस" } }
s
s
Args:
deterministic: if True will provide a single transduction option,
for False multiple transduction are generated (used for audio-based normalization)
Expand All @@ -35,8 +35,6 @@ def __init__(self, deterministic: bool = True, lm: bool = False):
digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
teens_ties = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv"))
# hundred = pynini.string_file(get_abs_path("data/numbers/hundred.tsv"))
# thousand = pynini.string_file(get_abs_path("data/numbers/thousands.tsv"))
teens_and_ties = pynutil.add_weight(teens_ties, -0.1)

def create_graph_suffix(digit_graph, suffix, zeros_counts):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
super().__init__(name="money", kind="classify")

cardinal_graph = cardinal.final_graph
# decimal_graph = decimal.final_graph_wo_negative

optional_graph_negative = pynini.closure(
pynutil.insert("negative: ") + pynini.cross("-", "\"true\"") + insert_space, 0, 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from pynini.lib import pynutil

from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_SPACE, NEMO_SIGMA, GraphFst
from nemo_text_processing.text_normalization.hi.utils import get_abs_path, load_labels


class PunctuationFst(GraphFst):
Expand All @@ -45,9 +44,7 @@ def __init__(self, deterministic: bool = True):
if category(chr(i)).startswith("P") and chr(i) not in punct_symbols_to_exclude
]

whitelist_symbols = load_labels(get_abs_path("data/whitelist/symbol.tsv"))
whitelist_symbols = [x[0] for x in whitelist_symbols]
self.punct_marks = [p for p in punct_unicode + list(s) if p not in whitelist_symbols]
self.punct_marks = [p for p in punct_unicode + list(s)]

punct = pynini.union(*self.punct_marks)
punct = pynini.closure(punct, 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,10 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
)

graph_integer = integer_part + delete_space + currency
# graph_integer |= currency + delete_space + integer_part

graph_interger_fraction = (
integer_part + delete_space + currency + delete_space + fractional_part + delete_space + insert_paise
)
# graph_interger_fraction |= currency + delete_space + integer_part + delete_space + fractional_part + delete_space + insert_paise

graph = graph_integer | graph_interger_fraction

Expand Down

0 comments on commit 9c49578

Please sign in to comment.