Skip to content

Commit

Permalink
simplify class parameter listing on web page
Browse files Browse the repository at this point in the history
Signed-off-by: dafnapension <[email protected]>
  • Loading branch information
dafnapension committed Dec 24, 2024
1 parent bbab5ee commit aa39ba5
Show file tree
Hide file tree
Showing 15 changed files with 322 additions and 255 deletions.
21 changes: 11 additions & 10 deletions src/unitxt/card.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,17 @@
class TaskCard(Artifact):
"""TaskCard delineates the phases in transforming the source dataset into model input, and specifies the metrics for evaluation of model output.
Attributes:
loader: specifies the source address and the loading operator that can access that source and transform it into a unitxt multistream.
preprocess_steps: list of unitxt operators to process the data source into model input.
task: specifies the fields (of the already (pre)processed instance) making the inputs, the fields making the outputs, and the metrics to be used for evaluating the model output.
templates: format strings to be applied on the input fields (specified by the task) and the output fields. The template also carries the instructions and the list of postprocessing steps, to be applied to the model output.
default_template: a default template for tasks with very specific task dataset specific template
Args:
loader:
specifies the source address and the loading operator that can access that source and transform it into a unitxt multistream.
preprocess_steps:
list of unitxt operators to process the data source into model input.
task:
specifies the fields (of the already (pre)processed instance) making the inputs, the fields making the outputs, and the metrics to be used for evaluating the model output.
templates:
format strings to be applied on the input fields (specified by the task) and the output fields. The template also carries the instructions and the list of postprocessing steps, to be applied to the model output.
default_template:
a default template for tasks with very specific task dataset specific template
"""

loader: Loader
Expand Down
23 changes: 14 additions & 9 deletions src/unitxt/deprecation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,24 @@ def compare_versions(version1, version2):
"""Compare two semantic versioning strings and determine their relationship.
Parameters:
- version1 (str): The first version string to compare.
- version2 (str): The second version string to compare.
version1 (str):
The first version string to compare.
version2 (str):
The second version string to compare.
Returns:
- int: -1 if version1 < version2, 1 if version1 > version2, 0 if equal.
int: -1 if version1 < version2, 1 if version1 > version2, 0 if equal.
Example:
>>> compare_versions("1.2.0", "1.2.3")
-1
>>> compare_versions("1.3.0", "1.2.8")
1
>>> compare_versions("1.0.0", "1.0.0")
0
.. code-block:: text
>>> compare_versions("1.2.0", "1.2.3")
-1
>>> compare_versions("1.3.0", "1.2.8")
1
>>> compare_versions("1.0.0", "1.0.0")
0
"""
parts1 = [int(part) for part in version1.split(".")]
parts2 = [int(part) for part in version2.split(".")]
Expand Down
34 changes: 22 additions & 12 deletions src/unitxt/dialog_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,17 @@ class SerializeDialog(InstanceFieldOperator):
of system responses and can operate on a per-turn basis or aggregate the entire
dialog.
Attributes:
field (str): The field in the input data that contains the dialog.
to_field (Optional[str]): The field in the output data where the serialized dialog will be stored.
last_user_turn_to_field (Optional[str]): Field to store the last user turn.
last_system_turn_to_field (Optional[str]): Field to store the last system turn.
context_field (Optional[str]): Field that contains additional context to be prepended to the dialog.
Args:
field (str):
The field in the input data that contains the dialog.
to_field (Optional[str]):
The field in the output data where the serialized dialog will be stored.
last_user_turn_to_field (Optional[str]):
Field to store the last user turn.
last_system_turn_to_field (Optional[str]):
Field to store the last system turn.
context_field (Optional[str]):
Field that contains additional context to be prepended to the dialog.
"""

format: SystemFormat = None
Expand Down Expand Up @@ -100,12 +105,17 @@ class SerializeOpenAiFormatDialog(SerializeDialog):
of system responses and can operate on a per-turn basis or aggregate the entire
dialog.
Attributes:
field (str): The field in the input data that contains the dialog.
to_field (Optional[str]): The field in the output data where the serialized dialog will be stored.
last_user_turn_to_field (Optional[str]): Field to store the last user turn.
last_system_turn_to_field (Optional[str]): Field to store the last system turn.
context_field (Optional[str]): Field that contains additional context to be prepended to the dialog.
Args:
field (str):
The field in the input data that contains the dialog.
to_field (Optional[str]):
The field in the output data where the serialized dialog will be stored.
last_user_turn_to_field (Optional[str]):
Field to store the last user turn.
last_system_turn_to_field (Optional[str]):
Field to store the last system turn.
context_field (Optional[str]):
Field that contains additional context to be prepended to the dialog.
"""

is_last_turn_user_only: bool = True
Expand Down
20 changes: 12 additions & 8 deletions src/unitxt/error_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ def additional_info(path: str) -> str:
class UnitxtError(Exception):
"""Exception raised for Unitxt errors.
Attributes:
message : str -- explanation of the error
additional_info_id : Optional[str] -- relative path to additional documentation on web
If set, should be one of the DOCUMENATION_* constants in the error_utils.py file.
Args:
message (str):
explanation of the error
additional_info_id (Optional[str]):
relative path to additional documentation on web
If set, should be one of the DOCUMENATION_* constants in the error_utils.py file.
"""

Expand All @@ -43,10 +45,12 @@ def __init__(self, message: str, additional_info_id: Optional[str] = None):
class UnitxtWarning:
"""Object to format warning message to log.
Attributes:
message -- explanation of the warning
additional_info_id : Optional[str] -- relative path to additional documentation on web
If set, should be one of the DOCUMENATION_* constants in the error_utils.py file.
Args:
message (str):
explanation of the warning
additional_info_id (Optional[str]):
relative path to additional documentation on web
If set, should be one of the DOCUMENATION_* constants in the error_utils.py file.
"""

def __init__(self, message: str, additional_info_id: Optional[str] = None):
Expand Down
12 changes: 7 additions & 5 deletions src/unitxt/image_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,13 +216,15 @@ def process_image(self, image):
class PixelNoise(ImageAugmentor):
"""A class that overlays a mask of randomly colored nxn squares across an image based on a specified noise rate.
Attributes:
square_size (int): Size of each square in pixels.
noise_rate (float): Proportion of the image that should be affected by noise (0 to 1).
Args:
square_size (int):
Size of each square in pixels.
noise_rate (float):
Proportion of the image that should be affected by noise (0 to 1).
Methods:
process_image(image): Adds the random square mask to the provided image and returns the modified image.
process_image(image):
Adds the random square mask to the provided image and returns the modified image.
"""

square_size: int = 1
Expand Down
27 changes: 16 additions & 11 deletions src/unitxt/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -1807,16 +1807,19 @@ class WMLInferenceEngineBase(
):
"""Base for classes running inference using ibm-watsonx-ai.
Attributes:
credentials (Dict[str, str], optional): By default, it is created by a class
Args:
credentials (Dict[str, str], optional):
By default, it is created by a class
instance which tries to retrieve proper environment variables
("WML_URL", "WML_PROJECT_ID", "WML_SPACE_ID", "WML_APIKEY", "WML_USERNAME", "WML_PASSWORD").
However, a dictionary with the following keys: "url", "apikey", "project_id", "space_id",
"username", "password".
can be directly provided instead.
model_name (str, optional): ID of a model to be used for inference. Mutually
model_name (str, optional):
ID of a model to be used for inference. Mutually
exclusive with 'deployment_id'.
deployment_id (str, optional): Deployment ID of a tuned model to be used for
deployment_id (str, optional):
Deployment ID of a tuned model to be used for
inference. Mutually exclusive with 'model_name'.
parameters (Union[WMLInferenceEngineParams, WMLGenerationParamsMixin, WMLChatParamsMixin], optional):
Defines inference parameters and their values. Deprecated attribute, please pass respective
Expand Down Expand Up @@ -2077,9 +2080,10 @@ class WMLInferenceEngineGeneration(WMLInferenceEngineBase, WMLGenerationParamsMi
If you want to include images in your input, please use 'WMLInferenceEngineChat' instead.
Attributes:
concurrency_limit (int): Number of concurrent requests sent to a model. Default is 10,
which is also the maximum value.
Args:
concurrency_limit (int):
Number of concurrent requests sent to a model. Default is 10,
which is also the maximum value.
Examples:
.. code-block:: python
Expand Down Expand Up @@ -2207,10 +2211,11 @@ class WMLInferenceEngineChat(WMLInferenceEngineBase, WMLChatParamsMixin):
concatenate images within an instance into a single image and adjust your query
accordingly (if necessary).
Attributes:
image_encoder (EncodeImageToString, optional): operator which encodes images in
given format to base64 strings required by service. You should specify it when
you are using images in your inputs.
Args:
image_encoder (EncodeImageToString, optional):
operator which encodes images in
given format to base64 strings required by service. You should specify it when
you are using images in your inputs.
Example:
.. code-block:: python
Expand Down
35 changes: 18 additions & 17 deletions src/unitxt/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,23 +162,24 @@ class LoadHF(Loader):
and it can filter datasets upon loading.
Args:
path: The path or identifier of the dataset on the HuggingFace Hub.
name: An optional dataset name.
data_dir: Optional directory to store downloaded data.
split: Optional specification of which split to load.
data_files: Optional specification of particular data files to load.
revision: Optional. The revision of the dataset. Often the commit id. Use in case you want to set the dataset version.
streaming (bool): indicating if streaming should be used.
filtering_lambda: A lambda function for filtering the data after loading.
num_proc (int): Optional integer to specify the number of processes to use for parallel dataset loading.
path:
The path or identifier of the dataset on the HuggingFace Hub.
name:
An optional dataset name.
data_dir (optional):
directory to store downloaded data.
split (optional):
specification of which split to load.
data_files (optional):
specification of particular data files to load.
revision (optional):
The revision of the dataset. Often the commit id. Use in case you want to set the dataset version.
streaming (bool):
indicating if streaming should be used.
filtering_lambda (optional):
A lambda function for filtering the data after loading.
num_proc (int, optional):
specify the number of processes to use for parallel dataset loading.
Example:
Loading glue's mrpc dataset
Expand Down
15 changes: 6 additions & 9 deletions src/unitxt/metric_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,13 +353,11 @@ def prepare(self):
class GlobalScores(dict):
"""GlobalScores is a dictionary-based class designed to handle and transform metric results into a structured format.
Attributes:
score (float): The main score value.
score_name (str): The name of the main score.
Methods:
to_df():
Transforms the dictionary of results into a pandas DataFrame with score_name as the index,
Args:
score (float):
The main score value.
score_name (str):
The name of the main score.
"""

@property
Expand Down Expand Up @@ -550,12 +548,11 @@ class GroupsScores(dict):
This class provides a property to summarize the scores and a custom
string representation for pretty-printing.
Attributes:
summary (property): A property to get a summary of the group scores.
"""

@property
def summary(self):
"""A property to get a summary of the group scores."""
data = self
# Desired metric columns
metric_cols = [
Expand Down
20 changes: 12 additions & 8 deletions src/unitxt/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4929,14 +4929,18 @@ def _prepare_instances_for_model(self, texts: List[str]):
class MetricsEnsemble(InstanceMetric, ArtifactFetcherMixin):
"""Metrics Ensemble class for creating ensemble of given metrics.
Attributes:
main_score (str): The main score label used for evaluation.
metrics (List[Union[Metric, str]]): List of metrics that will be ensemble.
weights (List[float]): Weight of each the metrics
InstanceMetric currently allows two reductions:
reduction_map (Dict[str, List[str]]. Parameter for specifying the redaction method of the global score.
(see it definition at InstanceMetric class). This class define its default
value to reduce by the mean of the main score.
Args:
main_score (str):
The main score label used for evaluation.
metrics (List[Union[Metric, str]]):
List of metrics that will be ensemble.
weights (List[float]):
Weight of each the metrics
reduction_map (Dict[str, List[str]]):
Specifies the redaction method of the global score.
InstanceMetric currently allows two reductions
(see it definition at InstanceMetric class).
This class define its default value to reduce by the mean of the main score.
"""

Expand Down
Loading

0 comments on commit aa39ba5

Please sign in to comment.