Skip to content

Commit

Permalink
add measures file for FR TN (#131)
Browse files Browse the repository at this point in the history
* add measures file

Signed-off-by: Mariana Graterol Fuenmayor <[email protected]>

* update whitelist data

Signed-off-by: Mariana Graterol Fuenmayor <[email protected]>

* add fr tn tests

Signed-off-by: Mariana Graterol Fuenmayor <[email protected]>

---------

Signed-off-by: Mariana Graterol Fuenmayor <[email protected]>
  • Loading branch information
mgrafu authored Dec 8, 2023
1 parent 53b2a02 commit 9d93839
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pipeline {
EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-18-23-0'
ES_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-27-23-0'
ES_EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-13-23-1'
FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-16-23-1'
FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/12-05-23-0'
HU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
PT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
RU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
m mètres
mètres carrés
mètres cubes
s secondes
min minutes
h heures
° degrés
°C degrés celsius
g grammes
l litres
kg kilos
'' pouce
lb livres
% pour cent
pour mille
km/h kilomètres heure
m/h mètres à l’heure
13 changes: 12 additions & 1 deletion nemo_text_processing/text_normalization/fr/data/whitelist.tsv
Original file line number Diff line number Diff line change
@@ -1 +1,12 @@
bonjour
Mᵐᵉ madame
Mᵐᵉˢ mesdames
Mˡˡᵉ mademoiselle
Mˡˡᵉˢ mademoiselles
docteur
Dʳˢ docteurs
Dʳᵉ docteure
Dʳᵉˢ docteures
apr. J.-C. après jésus-christ
av. J.-C. avant Jésus-Christ
le hon. l’honorable
le très hon. le très hononrable
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Dʳ~docteur
Dʳᵉˢ~docteures
Mᵐᵉ~madame
Mᵐᵉˢ~mesdames
Mˡˡᵉ~mademoiselle
Mˡˡᵉˢ~mademoiselles
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
~
yahoo!~yahoo!
20 !~vingt !
x~x
—~—
aaa~aaa
aabach~aabach
aabenraa~aabenraa
aabye~aabye
aaccessed~aaccessed
aach~aach
aachen's~aachen's
aadri~aadri
aafia~aafia
aagaard~aagaard
aagadu~aagadu
aagard~aagard
aagathadi~aagathadi
aaghart's~aaghart's
aagnes~aagnes
aagomoni~aagomoni
aagon~aagon
aagoo~aagoo
aagot~aagot
aahar~aahar
aahh~aahh
aahperd~aahperd
aaibinterstate~aaibinterstate
aajab~aajab
aakasa~aakasa
aakervik~aakervik
aakirkeby~aakirkeby
aalam~aalam
aalbaek~aalbaek
aaldiu~aaldiu
aalem~aalem
a'ali~a'ali
aalilaassamthey~aalilaassamthey
aalin~aalin
aaliyan~aaliyan
aaliyan's~aaliyan's
aamadu~aamadu
aamara~aamara
aambala~aambala
aamera~aamera
aamer's~aamer's
aamina~aamina
aaminah~aaminah
aamjiwnaang~aamjiwnaang
10 changes: 10 additions & 0 deletions tests/nemo_text_processing/fr/test_sparrowhawk_normalization.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,15 @@ testTNOrdinal() {
runtest $input
}

testTNWhitelist() {
input=$PROJECT_DIR/fr/data_text_normalization/test_cases_whitelist.txt
runtest $input
}

testTNWord() {
input=$PROJECT_DIR/fr/data_text_normalization/test_cases_word.txt
runtest $input
}

# Load shUnit2
. $PROJECT_DIR/../shunit2/shunit2
10 changes: 10 additions & 0 deletions tests/nemo_text_processing/fr/test_whitelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import pytest
from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
from nemo_text_processing.text_normalization.normalize import Normalizer
from parameterized import parameterized

from ..utils import CACHE_DIR, parse_test_case_file
Expand All @@ -29,3 +30,12 @@ class TestWhitelist:
def test_denorm(self, test_input, expected):
pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
assert pred == expected

normalizer = Normalizer(lang='fr', cache_dir=CACHE_DIR, overwrite_cache=False, input_case='cased')

@parameterized.expand(parse_test_case_file('fr/data_text_normalization/test_cases_whitelist.txt'))
@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_norm(self, test_input, expected):
pred = self.normalizer.normalize(test_input, verbose=False)
assert pred == expected
10 changes: 10 additions & 0 deletions tests/nemo_text_processing/fr/test_word.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import pytest
from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
from nemo_text_processing.text_normalization.normalize import Normalizer
from parameterized import parameterized

from ..utils import CACHE_DIR, parse_test_case_file
Expand All @@ -29,3 +30,12 @@ class TestWord:
def test_denorm(self, test_input, expected):
pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
assert pred == expected

normalizer = Normalizer(lang='fr', cache_dir=CACHE_DIR, overwrite_cache=False, input_case='cased')

@parameterized.expand(parse_test_case_file('fr/data_text_normalization/test_cases_word.txt'))
@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_norm(self, test_input, expected):
pred = self.normalizer.normalize(test_input, verbose=False)
assert pred == expected

0 comments on commit 9d93839

Please sign in to comment.