Skip to content

Commit

Permalink
Minor changes and fix/test total command for QIDs
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewtavis committed Jan 6, 2025
1 parent 777d02c commit 248a56f
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 23 deletions.
2 changes: 1 addition & 1 deletion src/scribe_data/cli/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def prompt_user_download_all():
if user_choice:
print("Overwrite chosen. Removing existing files...")
for file in existing_files:
if file.exists(): # Check if the file exists before unlinking
if file.exists(): # check if the file exists before unlinking
file.unlink()
else:
print(f"Skipping update for {language.title()} {data_type}.")
Expand Down
17 changes: 12 additions & 5 deletions src/scribe_data/cli/total.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,15 +268,22 @@ def get_total_lexemes(language, data_type, do_print=True):
str
A formatted string indicating the language, data type and total number of lexemes, if found.
"""

if language is not None and language.startswith("Q") and language[1:].isdigit():
language_qid = language
if (
language is not None
and (language.startswith("Q") or language.startswith("q"))
and language[1:].isdigit()
):
language_qid = language.capitalize()

else:
language_qid = get_qid_by_input(language)

if data_type is not None and data_type.startswith("Q") and data_type[1:].isdigit():
data_type_qid = data_type
if (
data_type is not None
and (data_type.startswith("Q") or data_type.startswith("q"))
and data_type[1:].isdigit()
):
data_type_qid = data_type.capitalize()

else:
data_type_qid = get_qid_by_input(data_type)
Expand Down
10 changes: 5 additions & 5 deletions src/scribe_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,8 @@ def _load_json(package_path: str, file_name: str) -> Any:
-------
A python entity representing the JSON content.
"""
with (
resources.files(package_path)
.joinpath(file_name)
.open(encoding="utf-8") as in_stream
):
json_file = resources.files(package_path).joinpath(file_name)
with json_file.open(encoding="utf-8") as in_stream:
return json.load(in_stream)


Expand Down Expand Up @@ -547,6 +544,9 @@ def format_sublanguage_name(lang, language_metadata=_languages):
> format_sublanguage_name("english", language_metadata)
'English'
"""
if (lang.startswith("Q") or lang.startswith("q")) and lang[1:].isdigit():
return lang

for main_lang, lang_data in language_metadata.items():
# If it's not a sub-language, return the original name.
if main_lang == lang:
Expand Down
7 changes: 0 additions & 7 deletions src/scribe_data/wikidata/query_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ def query_data(

# MARK: Run Queries

# Run queries and format data.
for q in tqdm(
queries_to_run,
desc="Data updated",
Expand Down Expand Up @@ -216,7 +215,6 @@ def query_data(

else:
print(f"Skipping update for {lang.title()} {target_type}.")
# return {"success": False, "skipped": True}

print(f"Querying and formatting {lang.title()} {target_type}")

Expand Down Expand Up @@ -342,8 +340,3 @@ def query_data(
print(
f"Successfully queried and formatted data for {lang.title()} {target_type}."
)
# return {"success": True, "skipped": False}


# if __name__ == "__main__":
# query_data()
7 changes: 4 additions & 3 deletions tests/cli/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def test_get_data_with_overwrite_false(self, mock_query_data):
interactive=False,
)

# MARK : User Chooses to skip
# MARK: User Chooses Skip

@patch("scribe_data.cli.get.query_data")
@patch(
Expand All @@ -238,11 +238,11 @@ def test_user_skips_existing_file(
language="English", data_type="nouns", output_dir="./test_output"
)

# Validate the skip result
# Validate the skip result.
self.assertEqual(result, {"success": False, "skipped": True})
mock_query_data.assert_not_called()

# MARK : User Chooses to overwrite
# MARK: User Chooses Overwrite

@patch("scribe_data.cli.get.query_data")
@patch(
Expand Down Expand Up @@ -270,6 +270,7 @@ def test_user_overwrites_existing_file(
)

# MARK: Translations

@patch("scribe_data.cli.get.parse_wd_lexeme_dump")
def test_get_translations_no_language_specified(self, mock_parse):
"""
Expand Down
15 changes: 13 additions & 2 deletions tests/load/test_update_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,22 @@ def test_format_sublanguage_name_positive(lang, expected_output):
assert utils.format_sublanguage_name(lang) == expected_output


@pytest.mark.parametrize(
"lang, expected_output",
[
("Q42", "Q42"), # test that any QID is returned
("Q1860", "Q1860"),
],
)
def test_format_sublanguage_name_qid_positive(lang, expected_output):
assert utils.format_sublanguage_name(lang) == expected_output


def test_format_sublanguage_name_negative():
with pytest.raises(ValueError) as excp:
_ = utils.format_sublanguage_name("Silence")
_ = utils.format_sublanguage_name("Newspeak")

assert str(excp.value) == "Silence is not a valid language or sub-language."
assert str(excp.value) == "Newspeak is not a valid language or sub-language."


def test_list_all_languages():
Expand Down

0 comments on commit 248a56f

Please sign in to comment.