Skip to content

Commit

Permalink
[V2][RFC][wip] make room for batch eval
Browse files Browse the repository at this point in the history
This moves the existing eval library to "test_suite_eval" and starts the equivalent
for batch runs. Also makes the interface a little clearer.

Essentially, the differences are:
- each metric runs on a _list_ of inputs, not just one
- each input can be paired with a reference. This is possible in the "test suite"
setup, but it is clunkier.
  • Loading branch information
jonathanlastmileai committed Jan 23, 2024
1 parent c9fc17a commit 402d73d
Show file tree
Hide file tree
Showing 15 changed files with 1,055 additions and 430 deletions.
13 changes: 9 additions & 4 deletions python/src/aiconfig/eval/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,23 @@
TestSuiteWithInputsSettings,
)
"""
from .. import common, metrics
from .. import test_suite_common, test_suite_metrics

# pyright: reportWildcardImportFromLibrary=false
from ..lib import (
from ..test_suite_lib import (
TestSuiteWithInputsSettings,
run_test_suite_outputs_only,
run_test_suite_with_inputs,
)
from ..test_suite_metrics import TestSuiteMetric, brevity, substring_match

__all__ = [
"common",
"metrics",
"TestSuiteMetric",
"test_suite_common",
"test_suite_metrics",
"brevity",
"substring_match",
"run_test_suite_with_inputs",
"run_test_suite_outputs_only",
"run_test_suite_with_inputs",
"TestSuiteWithInputsSettings",
Expand Down
31 changes: 31 additions & 0 deletions python/src/aiconfig/eval/batch_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from abc import abstractmethod
from typing import Protocol, Sequence, TypeVar

from aiconfig.eval import batch_common, common

T_Ref = TypeVar("T_Ref")
T_Ref_contra = TypeVar("T_Ref_contra", contravariant=True)


class BatchEvaluationFunctionWithReference(
Protocol[
common.T_Evaluable, batch_common.T_Ref_contra, common.T_MetricValue_inv
]
):
@abstractmethod
async def __call__(
self,
data: Sequence[common.T_Evaluable],
ref: Sequence[batch_common.T_Ref_contra],
) -> list[common.T_MetricValue_inv]:
pass


class BatchEvaluationFunctionWithoutReference(
Protocol[common.T_Evaluable, common.T_MetricValue_inv]
):
@abstractmethod
async def __call__(
self, data: Sequence[common.T_Evaluable]
) -> list[common.T_MetricValue_inv]:
pass
Loading

0 comments on commit 402d73d

Please sign in to comment.