Skip to content

Commit

Permalink
辞書のSurfaceの検証を厳密化
Browse files Browse the repository at this point in the history
  • Loading branch information
takana-v committed Feb 2, 2025
1 parent ab1df7e commit 7615029
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
25 changes: 25 additions & 0 deletions test/unit/user_dict/test_user_dict_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,20 @@ def test_convert_to_zenkaku() -> None:
assert surface == true_surface


def test_remove_newlines_and_null() -> None:
"""UserDictWord は surface 内の改行や null 文字を削除する。"""
# Inputs
test_value = generate_model()
test_value["surface"] = "te\n\r\x00st"
# Expects
true_surface = "test"
# Outputs
surface = UserDictWord(**test_value).surface

# Test
assert surface == true_surface


def test_count_mora() -> None:
"""UserDictWord は mora_count=None を上書きする。"""
# Inputs
Expand Down Expand Up @@ -126,6 +140,17 @@ def test_invalid_pronunciation_not_katakana() -> None:
UserDictWord(**test_value)


def test_invalid_pronunciation_newlines_and_null() -> None:
"""UserDictWord は改行や null 文字を含む pronunciation をエラーとする。"""
# Inputs
test_value = generate_model()
test_value["pronunciation"] = "ボイ\n\r\x00ボ"

# Test
with pytest.raises(ValidationError):
UserDictWord(**test_value)


def test_invalid_pronunciation_invalid_sutegana() -> None:
"""UserDictWord は無効な pronunciation をエラーとする。"""
# Inputs
Expand Down
5 changes: 5 additions & 0 deletions voicevox_engine/user_dict/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ def convert_to_zenkaku(cls, surface: str) -> str:
)
)

@field_validator("surface")
@classmethod
def remove_newlines_and_null(cls, surface: str) -> str:
return surface.replace("\n", "").replace("\r", "").replace("\x00", "")

@field_validator("pronunciation", mode="before")
@classmethod
def check_is_katakana(cls, pronunciation: str) -> str:
Expand Down

0 comments on commit 7615029

Please sign in to comment.