Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: move cost estimation logic to the backend #249

Merged
merged 1 commit into from
Jul 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 0 additions & 93 deletions src/openlayer/lib/constants.py

This file was deleted.

8 changes: 0 additions & 8 deletions src/openlayer/lib/integrations/anthropic_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def trace_anthropic(
- end_time: The time when the completion was received.
- latency: The time it took to generate the completion.
- tokens: The total number of tokens used to generate the completion.
- cost: The estimated cost of the completion.
- prompt_tokens: The number of tokens in the prompt.
- completion_tokens: The number of tokens in the completion.
- model: The model used to generate the completion.
Expand Down Expand Up @@ -152,15 +151,12 @@ def stream_chunks(
collected_function_call["inputs"] = json.loads(collected_function_call["inputs"])
output_data = collected_function_call

cost = 0

trace_args = create_trace_args(
end_time=end_time,
inputs={"prompt": kwargs["messages"]},
output=output_data,
latency=latency,
tokens=num_of_completion_tokens,
cost=cost,
prompt_tokens=num_of_prompt_tokens,
completion_tokens=num_of_completion_tokens,
model=kwargs.get("model"),
Expand Down Expand Up @@ -206,14 +202,12 @@ def handle_non_streaming_create(
# Try to add step to the trace
try:
output_data = parse_non_streaming_output_data(response)
cost = 0
trace_args = create_trace_args(
end_time=end_time,
inputs={"prompt": kwargs["messages"]},
output=output_data,
latency=(end_time - start_time) * 1000,
tokens=response.usage.input_tokens + response.usage.output_tokens,
cost=cost,
prompt_tokens=response.usage.input_tokens,
completion_tokens=response.usage.output_tokens,
model=response.model,
Expand Down Expand Up @@ -275,7 +269,6 @@ def create_trace_args(
output: str,
latency: float,
tokens: int,
cost: float,
prompt_tokens: int,
completion_tokens: int,
model: str,
Expand All @@ -291,7 +284,6 @@ def create_trace_args(
"output": output,
"latency": latency,
"tokens": tokens,
"cost": cost,
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"model": model,
Expand Down
14 changes: 0 additions & 14 deletions src/openlayer/lib/integrations/langchain_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from langchain import schema as langchain_schema
from langchain.callbacks.base import BaseCallbackHandler

from .. import constants
from ..tracing import tracer

LANGCHAIN_TO_OPENLAYER_PROVIDER_MAP = {"openai-chat": "OpenAI"}
Expand All @@ -27,7 +26,6 @@ def __init__(self, **kwargs: Any) -> None:
self.provider: str = None
self.model: Optional[str] = None
self.model_parameters: Dict[str, Any] = None
self.cost: Optional[float] = None
self.prompt_tokens: int = None
self.completion_tokens: int = None
self.total_tokens: int = None
Expand Down Expand Up @@ -87,10 +85,6 @@ def on_llm_end(self, response: langchain_schema.LLMResult, **kwargs: Any) -> Any
if response.llm_output and "token_usage" in response.llm_output:
self.prompt_tokens = response.llm_output["token_usage"].get("prompt_tokens", 0)
self.completion_tokens = response.llm_output["token_usage"].get("completion_tokens", 0)
self.cost = self._get_cost_estimate(
num_input_tokens=self.prompt_tokens,
num_output_tokens=self.completion_tokens,
)
self.total_tokens = response.llm_output["token_usage"].get("total_tokens", 0)

for generations in response.generations:
Expand All @@ -99,13 +93,6 @@ def on_llm_end(self, response: langchain_schema.LLMResult, **kwargs: Any) -> Any

self._add_to_trace()

def _get_cost_estimate(self, num_input_tokens: int, num_output_tokens: int) -> float:
"""Returns the cost estimate for a given model and number of tokens."""
if self.model not in constants.OPENAI_COST_PER_TOKEN:
return None
cost_per_token = constants.OPENAI_COST_PER_TOKEN[self.model]
return cost_per_token["input"] * num_input_tokens + cost_per_token["output"] * num_output_tokens

def _add_to_trace(self) -> None:
"""Adds to the trace."""
name = PROVIDER_TO_STEP_NAME.get(self.provider, "Chat Completion Model")
Expand All @@ -114,7 +101,6 @@ def _add_to_trace(self) -> None:
provider=self.provider,
inputs={"prompt": self.prompt},
output=self.output,
cost=self.cost,
tokens=self.total_tokens,
latency=self.latency,
start_time=self.start_time,
Expand Down
40 changes: 1 addition & 39 deletions src/openlayer/lib/integrations/openai_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

import openai

from .. import constants
from ..tracing import tracer

logger = logging.getLogger(__name__)
Expand All @@ -24,7 +23,6 @@ def trace_openai(
- end_time: The time when the completion was received.
- latency: The time it took to generate the completion.
- tokens: The total number of tokens used to generate the completion.
- cost: The estimated cost of the completion.
- prompt_tokens: The number of tokens in the prompt.
- completion_tokens: The number of tokens in the completion.
- model: The model used to generate the completion.
Expand Down Expand Up @@ -161,20 +159,13 @@ def stream_chunks(
else:
collected_function_call["arguments"] = json.loads(collected_function_call["arguments"])
output_data = collected_function_call
completion_cost = estimate_cost(
model=kwargs.get("model"),
prompt_tokens=0,
completion_tokens=(num_of_completion_tokens if num_of_completion_tokens else 0),
is_azure_openai=is_azure_openai,
)

trace_args = create_trace_args(
end_time=end_time,
inputs={"prompt": kwargs["messages"]},
output=output_data,
latency=latency,
tokens=num_of_completion_tokens,
cost=completion_cost,
prompt_tokens=0,
completion_tokens=num_of_completion_tokens,
model=kwargs.get("model"),
Expand All @@ -196,21 +187,6 @@ def stream_chunks(
)


def estimate_cost(
prompt_tokens: int,
completion_tokens: int,
model: str,
is_azure_openai: bool = False,
) -> float:
"""Returns the cost estimate for a given OpenAI model and number of tokens."""
if is_azure_openai and model in constants.AZURE_OPENAI_COST_PER_TOKEN:
cost_per_token = constants.AZURE_OPENAI_COST_PER_TOKEN[model]
elif model in constants.OPENAI_COST_PER_TOKEN:
cost_per_token = constants.OPENAI_COST_PER_TOKEN[model]
return cost_per_token["input"] * prompt_tokens + cost_per_token["output"] * completion_tokens
return None


def get_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]:
"""Gets the model parameters from the kwargs."""
return {
Expand All @@ -234,7 +210,6 @@ def create_trace_args(
output: str,
latency: float,
tokens: int,
cost: float,
prompt_tokens: int,
completion_tokens: int,
model: str,
Expand All @@ -250,7 +225,6 @@ def create_trace_args(
"output": output,
"latency": latency,
"tokens": tokens,
"cost": cost,
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"model": model,
Expand Down Expand Up @@ -300,19 +274,12 @@ def handle_non_streaming_create(
# Try to add step to the trace
try:
output_data = parse_non_streaming_output_data(response)
cost = estimate_cost(
model=response.model,
prompt_tokens=response.usage.prompt_tokens,
completion_tokens=response.usage.completion_tokens,
is_azure_openai=is_azure_openai,
)
trace_args = create_trace_args(
end_time=end_time,
inputs={"prompt": kwargs["messages"]},
output=output_data,
latency=(end_time - start_time) * 1000,
tokens=response.usage.total_tokens,
cost=cost,
prompt_tokens=response.usage.prompt_tokens,
completion_tokens=response.usage.completion_tokens,
model=response.model,
Expand Down Expand Up @@ -373,7 +340,7 @@ def trace_openai_assistant_thread_run(client: openai.OpenAI, run: "openai.types.
"""Trace a run from an OpenAI assistant.

Once the run is completed, the thread data is published to Openlayer,
along with the latency, cost, and number of tokens used."""
along with the latency, and number of tokens used."""
_type_check_run(run)

# Do nothing if the run is not completed
Expand Down Expand Up @@ -420,11 +387,6 @@ def _extract_run_vars(run: "openai.types.beta.threads.run.Run") -> Dict[str, any
"completion_tokens": run.usage.completion_tokens,
"tokens": run.usage.total_tokens,
"model": run.model,
"cost": estimate_cost(
model=run.model,
prompt_tokens=run.usage.prompt_tokens,
completion_tokens=run.usage.completion_tokens,
),
}


Expand Down
33 changes: 1 addition & 32 deletions src/openlayer/lib/tracing/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def post_process_trace(
else:
input_variable_names = []

processed_steps = bubble_up_costs_and_tokens(trace_obj.to_dict())
processed_steps = trace_obj.to_dict()

trace_data = {
"inferenceTimestamp": root_step.start_time,
Expand All @@ -322,34 +322,3 @@ def post_process_trace(
trace_data.update(input_variables)

return trace_data, input_variable_names


def bubble_up_costs_and_tokens(trace_dict: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Adds the cost and number of tokens of nested steps to their parent steps."""

def add_step_costs_and_tokens(step: Dict[str, Any]) -> Tuple[float, int]:
step_cost = step_tokens = 0

if "cost" in step and step["cost"] is not None:
step_cost += step["cost"]
if "tokens" in step and step["tokens"] is not None:
step_tokens += step["tokens"]

# Recursively add costs and tokens from nested steps
for nested_step in step.get("steps", []):
nested_cost, nested_tokens = add_step_costs_and_tokens(nested_step)
step_cost += nested_cost
step_tokens += nested_tokens

if "steps" in step:
if step_cost > 0 and "cost" not in step:
step["cost"] = step_cost
if step_tokens > 0 and "tokens" not in step:
step["tokens"] = step_tokens

return step_cost, step_tokens

for root_step_dict in trace_dict:
add_step_costs_and_tokens(root_step_dict)

return trace_dict