Skip to content

Commit

Permalink
Adds tests for 'próf.' and 'bandar.'
Browse files Browse the repository at this point in the history
  • Loading branch information
sultur committed Feb 1, 2024
1 parent 051041f commit 6bcf195
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/icespeak/transcribe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def _split_substring_types(t: str) -> Iterable[str]:
(TOK.WORD, TOK.PERSON, TOK.ENTITY, TOK.TIMESTAMP, TOK.UNKNOWN)
)
# These should not be interpreted as abbreviations
# if they aren't followed by a period
# unless they include a period
_IGNORED_ABBREVS = frozenset(("mið", "fim", "bandar", "mao", "próf", "tom", "mar"))
_HYPHEN_SYMBOLS = frozenset(HYPHENS)

Expand Down
6 changes: 6 additions & 0 deletions tests/test_transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,12 @@ def test_dt_token_transcribe_basic() -> None:
t = "maðurinn tom fékk mar eftir strembið próf í síðustu viku"
n = DT.token_transcribe(t)
assert n == t
t = "Undirritað, próf. Jónína"
n = DT.token_transcribe(t)
assert "prófessor" in n
t = "Hann er bandar. ríkisborgari"
n = DT.token_transcribe(t)
assert "bandarískur" in n


def test_dt_token_transcribe_experimental():
Expand Down

0 comments on commit 6bcf195

Please sign in to comment.