From bfdb1263d4b7a69eb0e3b8035b798c828462e468 Mon Sep 17 00:00:00 2001 From: Elron Bandel Date: Wed, 8 Jan 2025 14:11:23 +0200 Subject: [PATCH] Simplify artifact link [Non Backward Compatible!] (#1494) * Simplify artifact link Signed-off-by: elronbandel * Refactor artifact linking to use 'to' instead of 'artifact_linked_to' for consistency and clarity in catalog documentation and implementation. Signed-off-by: elronbandel * Remove unused temporary recipe JSON file to clean up the catalog structure. Signed-off-by: elronbandel --------- Signed-off-by: elronbandel --- docs/docs/saving_and_loading_from_catalog.rst | 30 ++++----- src/unitxt/artifact.py | 65 ++++--------------- src/unitxt/catalog.py | 2 +- ...itespace_prefix_and_suffix_task_input.json | 2 +- .../augment_whitespace_task_input.json | 2 +- .../tasks/qa/with_context/abstractive.json | 2 +- .../tasks/qa/with_context/extractive.json | 2 +- tests/library/test_artifact.py | 4 +- 8 files changed, 33 insertions(+), 76 deletions(-) diff --git a/docs/docs/saving_and_loading_from_catalog.rst b/docs/docs/saving_and_loading_from_catalog.rst index 41e912f46..a38375312 100644 --- a/docs/docs/saving_and_loading_from_catalog.rst +++ b/docs/docs/saving_and_loading_from_catalog.rst @@ -42,7 +42,7 @@ It's also possible to add artifacts to the library's default catalog: Using Catalog Assets -------------------- -To use catalog objects, simply specify their name in the Unitxt object that will use them. +To use catalog objects, simply specify their name in the Unitxt object that will use them. .. code-block:: python @@ -56,8 +56,8 @@ To use catalog objects, simply specify their name in the Unitxt object that will Modifying Catalog Assets on the Fly ----------------------------------- -To modify a catalog asset's fields dynamically, upon fetching the asset from the catalog, use the syntax: ``artifact_name[key_to_modify=new_value]``. -To assign lists, use: ``artifact_name[key_to_modify=[new_value_0, new_value_1]]``. +To modify a catalog asset's fields dynamically, upon fetching the asset from the catalog, use the syntax: ``artifact_name[key_to_modify=new_value]``. +To assign lists, use: ``artifact_name[key_to_modify=[new_value_0, new_value_1]]``. To assign dictionaries, use: ``artifact_name[key_to_modify={new_key_0=new_value_0,new_key_1=new_value_1}]``. Note that the whole new value of the field has to be specified; not just one item of a list, or one key of the dictionary. For instance, to change the metric specification of a task: @@ -85,20 +85,20 @@ Use ``get_from_catalog`` to directly access catalog assets, and obtain an asset A Catalog Asset Linking to Another Catalog Asset ------------------------------------------------ -A catalog asset can be just a link to another asset. -This feature comes handy when for some reason, we want to change the catalog name -of an existing asset (e.g. ``asset1`` to ``asset2``), while there is already code +A catalog asset can be just a link to another asset. +This feature comes handy when for some reason, we want to change the catalog name +of an existing asset (e.g. ``asset1`` to ``asset2``), while there is already code that uses the old name of the asset and we want to avoid non-backward compatible changes. -In such a case, we can save the asset as ``asset2``, create an asset of type +In such a case, we can save the asset as ``asset2``, create an asset of type :class:`ArtifactLink ` that links to ``asset2``, and save that one as ``asset1``. -When ``asset1`` is accessed from an existing code, Unixt Catalog realizes that the asset fetched from position ``asset1`` -is an ``ArtifactLink``, so it continues and fetches ``asset2`` -- the Artifact linked to by ``asset1``. +When ``asset1`` is accessed from an existing code, Unixt Catalog realizes that the asset fetched from position ``asset1`` +is an ``ArtifactLink``, so it continues and fetches ``asset2`` -- the Artifact linked to by ``asset1``. .. code-block:: python - link_to_asset2 = ArtifactLink(artifact_linked_to="asset2") + link_to_asset2 = ArtifactLink(to="asset2") add_to_catalog( link_to_asset2, "asset1", @@ -109,8 +109,8 @@ Deprecated Asset ---------------- Every asset has a special field named ``__deprecated_msg__`` of type ``str``, whose default value is None. -When None, the asset is cocnsidered non-deprecated. When not None, the asset is considered deprecated, and -its ``__deprecated_msg__`` is logged at level WARN upon its instantiation. (Other than this logging, +When None, the asset is cocnsidered non-deprecated. When not None, the asset is considered deprecated, and +its ``__deprecated_msg__`` is logged at level WARN upon its instantiation. (Other than this logging, the artifact is instantiated normally.) Example of a deprecated catalog asset: @@ -123,12 +123,12 @@ Example of a deprecated catalog asset: "text": "You are an agent in charge of answering a boolean (yes/no) question. The system presents you with a passage and a question. Read the passage carefully, and then answer yes or no. Think about your answer, and make sure it makes sense. Do not explain the answer. Only say yes or no." } -Combining this feature with ``ArtifactLink`` in the above example, we can also log a warning to the accessing code that -the name ``asset1`` is to be replaced by ``asset2``. +Combining this feature with ``ArtifactLink`` in the above example, we can also log a warning to the accessing code that +the name ``asset1`` is to be replaced by ``asset2``. .. code-block:: python - link_to_asset2 = ArtifactLink(artifact_linked_to="asset2", + link_to_asset2 = ArtifactLink(to="asset2", __deprecated_msg__="'asset1' is going to be deprecated. In future uses, please access 'asset2' instead.") add_to_catalog( link_to_asset2, diff --git a/src/unitxt/artifact.py b/src/unitxt/artifact.py index ed9036225..df4dbdf40 100644 --- a/src/unitxt/artifact.py +++ b/src/unitxt/artifact.py @@ -282,10 +282,12 @@ def from_dict(cls, d, overwrite_args=None): @classmethod def load(cls, path, artifact_identifier=None, overwrite_args=None): d = artifacts_json_cache(path) - if "artifact_linked_to" in d and d["artifact_linked_to"] is not None: - # d stands for an ArtifactLink - artifact_link = ArtifactLink.from_dict(d) - return artifact_link.load(overwrite_args) + if "__type__" in d and d["__type__"] == "artifact_link": + cls.from_dict(d) # for verifications and warnings + catalog, artifact_rep, _ = get_catalog_name_and_args(name=d["to"]) + return catalog.get_with_overwrite( + artifact_rep, overwrite_args=overwrite_args + ) new_artifact = cls.from_dict(d, overwrite_args=overwrite_args) new_artifact.__id__ = artifact_identifier @@ -466,54 +468,11 @@ def __repr__(self): class ArtifactLink(Artifact): - # the artifact linked to, expressed by its catalog id - artifact_linked_to: str = Field(default=None, required=True) + to: Artifact - @classmethod - def from_dict(cls, d: dict): - assert isinstance(d, dict), f"argument must be a dictionary, got: d = {d}." - assert ( - "artifact_linked_to" in d and d["artifact_linked_to"] is not None - ), f"A non-none field named 'artifact_linked_to' is expected in input argument d, but got: {d}." - artifact_linked_to = d["artifact_linked_to"] - # artifact_linked_to is a name of catalog entry - assert isinstance( - artifact_linked_to, str - ), f"'artifact_linked_to' should be a string expressing a name of a catalog entry. Got{artifact_linked_to}." - msg = d["__deprecated_msg__"] if "__deprecated_msg__" in d else None - return ArtifactLink( - artifact_linked_to=artifact_linked_to, __deprecated_msg__=msg - ) - - def load(self, overwrite_args: dict) -> Artifact: - # identify the catalog for the artifact_linked_to - assert ( - self.artifact_linked_to is not None - ), "'artifact_linked_to' must be non-None in order to load it from the catalog. Currently, it is None." - assert isinstance( - self.artifact_linked_to, str - ), f"'artifact_linked_to' should be a string (expressing a name of a catalog entry). Currently, its type is: {type(self.artifact_linked_to)}." - needed_catalog = None - catalogs = list(Catalogs()) - for catalog in catalogs: - if self.artifact_linked_to in catalog: - needed_catalog = catalog - - if needed_catalog is None: - raise UnitxtArtifactNotFoundError(self.artifact_linked_to, catalogs) - - path = needed_catalog.path(self.artifact_linked_to) - d = artifacts_json_cache(path) - # if needed, follow, in a recursive manner, over multiple links, - # passing through instantiating of the ArtifactLink-s on the way, triggering - # deprecatioin warning as needed. - if "artifact_linked_to" in d and d["artifact_linked_to"] is not None: - # d stands for an ArtifactLink - artifact_link = ArtifactLink.from_dict(d) - return artifact_link.load(overwrite_args) - new_artifact = Artifact.from_dict(d, overwrite_args=overwrite_args) - new_artifact.__id__ = self.artifact_linked_to - return new_artifact + def verify(self): + if self.to.__id__ is None: + raise UnitxtError("ArtifactLink must link to existing catalog entry.") def get_raw(obj): @@ -577,14 +536,12 @@ def fetch_artifact(artifact_rep) -> Tuple[Artifact, Union[AbstractCatalog, None] """ if isinstance(artifact_rep, Artifact): if isinstance(artifact_rep, ArtifactLink): - return fetch_artifact(artifact_rep.artifact_linked_to) + return fetch_artifact(artifact_rep.to) return artifact_rep, None # If local file if isinstance(artifact_rep, str) and Artifact.is_artifact_file(artifact_rep): artifact_to_return = Artifact.load(artifact_rep) - if isinstance(artifact_rep, ArtifactLink): - artifact_to_return = fetch_artifact(artifact_to_return.artifact_linked_to) return artifact_to_return, None diff --git a/src/unitxt/catalog.py b/src/unitxt/catalog.py index ee4347cfc..3221c3ee0 100644 --- a/src/unitxt/catalog.py +++ b/src/unitxt/catalog.py @@ -153,7 +153,7 @@ def add_link_to_catalog( deprecated_msg = None artifact_link = ArtifactLink( - artifact_linked_to=artifact_linked_to, __deprecated_msg__=deprecated_msg + to=artifact_linked_to, __deprecated_msg__=deprecated_msg ) add_to_catalog( diff --git a/src/unitxt/catalog/augmentors/augment_whitespace_prefix_and_suffix_task_input.json b/src/unitxt/catalog/augmentors/augment_whitespace_prefix_and_suffix_task_input.json index 5d97983af..1857b4d2e 100644 --- a/src/unitxt/catalog/augmentors/augment_whitespace_prefix_and_suffix_task_input.json +++ b/src/unitxt/catalog/augmentors/augment_whitespace_prefix_and_suffix_task_input.json @@ -1,5 +1,5 @@ { "__type__": "artifact_link", - "artifact_linked_to": "augmentors.text.whitespace_prefix_suffix", + "to": "augmentors.text.whitespace_prefix_suffix", "__deprecated_msg__": "Artifact 'augmentors.augment_whitespace_prefix_and_suffix_task_input' is deprecated. Artifact 'augmentors.text.whitespace_prefix_suffix' will be instantiated instead. In future uses, please reference artifact 'augmentors.text.whitespace_prefix_suffix' directly." } diff --git a/src/unitxt/catalog/augmentors/augment_whitespace_task_input.json b/src/unitxt/catalog/augmentors/augment_whitespace_task_input.json index 117304ea7..c802e5202 100644 --- a/src/unitxt/catalog/augmentors/augment_whitespace_task_input.json +++ b/src/unitxt/catalog/augmentors/augment_whitespace_task_input.json @@ -1,5 +1,5 @@ { "__type__": "artifact_link", - "artifact_linked_to": "augmentors.text.whitespace_prefix_suffix", + "to": "augmentors.text.whitespace_prefix_suffix", "__deprecated_msg__": "Artifact 'augmentors.augment_whitespace_task_input' is deprecated. Artifact 'augmentors.text.whitespace_prefix_suffix' will be instantiated instead. In future uses, please reference artifact 'augmentors.text.whitespace_prefix_suffix' directly." } diff --git a/src/unitxt/catalog/tasks/qa/with_context/abstractive.json b/src/unitxt/catalog/tasks/qa/with_context/abstractive.json index 6a80f9cb9..7d7861a34 100644 --- a/src/unitxt/catalog/tasks/qa/with_context/abstractive.json +++ b/src/unitxt/catalog/tasks/qa/with_context/abstractive.json @@ -1,5 +1,5 @@ { "__type__": "artifact_link", - "artifact_linked_to": "tasks.qa.with_context", + "to": "tasks.qa.with_context", "__deprecated_msg__": null } diff --git a/src/unitxt/catalog/tasks/qa/with_context/extractive.json b/src/unitxt/catalog/tasks/qa/with_context/extractive.json index 802717b3e..6ba616fd7 100644 --- a/src/unitxt/catalog/tasks/qa/with_context/extractive.json +++ b/src/unitxt/catalog/tasks/qa/with_context/extractive.json @@ -1,5 +1,5 @@ { "__type__": "artifact_link", - "artifact_linked_to": "tasks.qa.extractive", + "to": "tasks.qa.extractive", "__deprecated_msg__": null } diff --git a/tests/library/test_artifact.py b/tests/library/test_artifact.py index 6f2ff280f..ea7fabec3 100644 --- a/tests/library/test_artifact.py +++ b/tests/library/test_artifact.py @@ -349,7 +349,7 @@ def test_artifact_link_with_deprecation_warning(self): with self.assertWarns(DeprecationWarning): rename_fields = ArtifactLink( - artifact_linked_to="rename.for.test.artifact.link", + to="rename.for.test.artifact.link", __deprecated_msg__="Artifact is deprecated. " "'rename.for.test.artifact.link' is now instantiated instead. " "\nIn the future, please use 'rename.for.test.artifact.link'.", @@ -536,7 +536,7 @@ def test_artifact_link_in_recursive_load(self): ) link_to_copy_operator = ArtifactLink( - artifact_linked_to="copy.operator", + to="copy.operator", ) add_to_catalog( link_to_copy_operator,