Skip to content

Commit

Permalink
Telephone modified tagger and verbalizer
Browse files Browse the repository at this point in the history
Signed-off-by: Tarushi V <[email protected]>
  • Loading branch information
tarushi2k2 committed Jan 21, 2025
1 parent a5cf050 commit 1c506e2
Showing 1 changed file with 28 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,99 +36,47 @@ class TelephoneFst(GraphFst):

def __init__(self, cardinal: GraphFst):
super().__init__(name="telephone", kind="classify")

hindi_digit_graph = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
hindi_digit_graph |= pynini.string_file(get_abs_path("data/numbers/zero.tsv")).invert()

english_digit_graph = pynini.string_file(get_abs_path("data/telephone/eng_to_hindi_digit.tsv")).invert()

country_code_graph_single_digits = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
country_code_graph_single_digits |= pynini.string_file(get_abs_path("data/numbers/zero.tsv")).invert()
country_code_graph_single_digits |= pynini.string_file(
get_abs_path("data/telephone/eng_to_hindi_digit.tsv")
).invert()

country_code_graph_single_digits |= pynini.string_file(get_abs_path("data/telephone/eng_to_hindi_digit.tsv")).invert()

country_code_graph_double_digits = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv")).invert()
country_code_graph_double_digits |= pynini.string_file(
get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")
).invert()

self.hindi_digit = (
pynutil.insert("number_part: \"")
+ pynini.closure(hindi_digit_graph + delete_space, 0, 9)
+ hindi_digit_graph
+ pynutil.insert("\" ")
)
self.english_digit = (
pynutil.insert("number_part: \"")
+ pynini.closure(english_digit_graph + delete_space, 0, 9)
+ english_digit_graph
+ delete_space
+ pynutil.insert("\" ")
)

self.country_code_with_single_digits = (
pynutil.insert("country_code: \"")
+ pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
+ pynutil.insert("\" ")
)
self.country_code_with_double_digits = (
pynutil.insert("country_code: \"")
+ pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
+ pynutil.insert("\" ")
)
self.country_code = self.country_code_with_single_digits | self.country_code_with_double_digits

self.city_code_with_single_digits = (
pynutil.insert("city_code: \"")
+ pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
+ pynutil.insert("\" ")
)
self.city_code_with_double_digits = (
pynutil.insert("city_code: \"")
+ pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
+ pynutil.insert("\" ")
)
self.city_code = self.city_code_with_single_digits | self.city_code_with_double_digits

self.landline_hindi_digit = (
pynutil.insert("number_part: \"")
+ pynini.closure(hindi_digit_graph + delete_space, 0, 6)
+ hindi_digit_graph
+ pynutil.insert("\" ")
)
self.landline_english_digit = (
pynutil.insert("number_part: \"")
+ pynini.closure(english_digit_graph + delete_space, 0, 6)
+ english_digit_graph
+ pynutil.insert("\" ")
)

country_code_graph_double_digits |= pynini.string_file(get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")).invert()

self.hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 9) + hindi_digit_graph + pynutil.insert("\" ")
self.english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 9) + english_digit_graph + delete_space + pynutil.insert("\" ")

self.country_code_with_single_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
self.country_code_with_double_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
self.country_code = (self.country_code_with_single_digits | self.country_code_with_double_digits)

self.city_code_with_single_digits = pynutil.insert("extension: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
self.city_code_with_double_digits = pynutil.insert("extension: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
self.city_code = (self.city_code_with_single_digits | self.city_code_with_double_digits)

self.landline_hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 6) + hindi_digit_graph + pynutil.insert("\" ")
self.landline_english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 6) + english_digit_graph + pynutil.insert("\" ")

delete_plus = pynini.union(
pynutil.delete("प्लस") | pynutil.delete("plus") | pynutil.delete("Plus") | pynutil.delete("PLUS")
)

delete_zero = pynini.union(
pynutil.delete("शून्य") | pynutil.delete("zero") | pynutil.delete("Zero") | pynutil.delete("ZERO")
)

graph_number_with_hindi_digit = (
delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
)

graph_number_with_hindi_digit = delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit

graph_landline_with_hindi_digit = delete_zero + delete_space + self.city_code + delete_space + self.landline_hindi_digit
graph_landline_with_english_digit = delete_zero + delete_space + self.city_code + delete_space + self.landline_english_digit

graph_landline_with_hindi_digit = (
delete_zero + delete_space + self.city_code + delete_space + self.landline_hindi_digit
)
graph_landline_with_english_digit = (
delete_zero + delete_space + self.city_code + delete_space + self.landline_english_digit
)

graph = (
graph_number_with_hindi_digit
| graph_number_with_english_digit
| graph_landline_with_hindi_digit
| graph_landline_with_english_digit
)
graph = (graph_number_with_hindi_digit | graph_number_with_english_digit | graph_landline_with_hindi_digit | graph_landline_with_english_digit)
final_graph = self.add_tokens(graph)
self.fst = final_graph

0 comments on commit 1c506e2

Please sign in to comment.