Minor changes and fix/test total command for QIDs

scribe-org · Jan 6, 2025 · 248a56f · 248a56f
1 parent 777d02c
commit 248a56f
Show file tree

Hide file tree

Showing 6 changed files with 35 additions and 23 deletions.
diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py
@@ -222,7 +222,7 @@ def prompt_user_download_all():
             if user_choice:
                 print("Overwrite chosen. Removing existing files...")
                 for file in existing_files:
-                    if file.exists():  # Check if the file exists before unlinking
+                    if file.exists():  # check if the file exists before unlinking
                         file.unlink()
             else:
                 print(f"Skipping update for {language.title()} {data_type}.")

diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py
@@ -268,15 +268,22 @@ def get_total_lexemes(language, data_type, do_print=True):
     str
         A formatted string indicating the language, data type and total number of lexemes, if found.
     """
-
-    if language is not None and language.startswith("Q") and language[1:].isdigit():
-        language_qid = language
+    if (
+        language is not None
+        and (language.startswith("Q") or language.startswith("q"))
+        and language[1:].isdigit()
+    ):
+        language_qid = language.capitalize()
 
     else:
         language_qid = get_qid_by_input(language)
 
-    if data_type is not None and data_type.startswith("Q") and data_type[1:].isdigit():
-        data_type_qid = data_type
+    if (
+        data_type is not None
+        and (data_type.startswith("Q") or data_type.startswith("q"))
+        and data_type[1:].isdigit()
+    ):
+        data_type_qid = data_type.capitalize()
 
     else:
         data_type_qid = get_qid_by_input(data_type)

diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py
@@ -123,11 +123,8 @@ def _load_json(package_path: str, file_name: str) -> Any:
     -------
     A python entity representing the JSON content.
     """
-    with (
-        resources.files(package_path)
-        .joinpath(file_name)
-        .open(encoding="utf-8") as in_stream
-    ):
+    json_file = resources.files(package_path).joinpath(file_name)
+    with json_file.open(encoding="utf-8") as in_stream:
         return json.load(in_stream)
 
 
@@ -547,6 +544,9 @@ def format_sublanguage_name(lang, language_metadata=_languages):
     > format_sublanguage_name("english", language_metadata)
     'English'
     """
+    if (lang.startswith("Q") or lang.startswith("q")) and lang[1:].isdigit():
+        return lang
+
     for main_lang, lang_data in language_metadata.items():
         # If it's not a sub-language, return the original name.
         if main_lang == lang:

diff --git a/src/scribe_data/wikidata/query_data.py b/src/scribe_data/wikidata/query_data.py
@@ -166,7 +166,6 @@ def query_data(
 
     # MARK: Run Queries
 
-    # Run queries and format data.
     for q in tqdm(
         queries_to_run,
         desc="Data updated",
@@ -216,7 +215,6 @@ def query_data(
 
                     else:
                         print(f"Skipping update for {lang.title()} {target_type}.")
-                        # return {"success": False, "skipped": True}
 
         print(f"Querying and formatting {lang.title()} {target_type}")
 
@@ -342,8 +340,3 @@ def query_data(
             print(
                 f"Successfully queried and formatted data for {lang.title()} {target_type}."
             )
-            # return {"success": True, "skipped": False}
-
-
-# if __name__ == "__main__":
-#     query_data()
diff --git a/tests/cli/test_get.py b/tests/cli/test_get.py
@@ -217,7 +217,7 @@ def test_get_data_with_overwrite_false(self, mock_query_data):
             interactive=False,
         )
 
-    # MARK : User Chooses to skip
+    # MARK: User Chooses Skip
 
     @patch("scribe_data.cli.get.query_data")
     @patch(
@@ -238,11 +238,11 @@ def test_user_skips_existing_file(
             language="English", data_type="nouns", output_dir="./test_output"
         )
 
-        # Validate the skip result
+        # Validate the skip result.
         self.assertEqual(result, {"success": False, "skipped": True})
         mock_query_data.assert_not_called()
 
-    # MARK : User Chooses to overwrite
+    # MARK: User Chooses Overwrite
 
     @patch("scribe_data.cli.get.query_data")
     @patch(
@@ -270,6 +270,7 @@ def test_user_overwrites_existing_file(
         )
 
     # MARK: Translations
+
     @patch("scribe_data.cli.get.parse_wd_lexeme_dump")
     def test_get_translations_no_language_specified(self, mock_parse):
         """

diff --git a/tests/load/test_update_utils.py b/tests/load/test_update_utils.py
@@ -123,11 +123,22 @@ def test_format_sublanguage_name_positive(lang, expected_output):
     assert utils.format_sublanguage_name(lang) == expected_output
 
 
+@pytest.mark.parametrize(
+    "lang, expected_output",
+    [
+        ("Q42", "Q42"),  # test that any QID is returned
+        ("Q1860", "Q1860"),
+    ],
+)
+def test_format_sublanguage_name_qid_positive(lang, expected_output):
+    assert utils.format_sublanguage_name(lang) == expected_output
+
+
 def test_format_sublanguage_name_negative():
     with pytest.raises(ValueError) as excp:
-        _ = utils.format_sublanguage_name("Silence")
+        _ = utils.format_sublanguage_name("Newspeak")
 
-    assert str(excp.value) == "Silence is not a valid language or sub-language."
+    assert str(excp.value) == "Newspeak is not a valid language or sub-language."
 
 
 def test_list_all_languages():