Skip to content

Commit

Permalink
Merge pull request #43 from genai-impact/docs/reference
Browse files Browse the repository at this point in the history
Add API References to Documentation
  • Loading branch information
samuelrince authored Apr 30, 2024
2 parents 57de458 + 174bedc commit dafaa13
Show file tree
Hide file tree
Showing 10 changed files with 506 additions and 7 deletions.
4 changes: 0 additions & 4 deletions docs/api_reference.md

This file was deleted.

30 changes: 30 additions & 0 deletions docs/scripts/gen_references.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Generate the code reference pages."""

from pathlib import Path

import mkdocs_gen_files

PACKAGE = "ecologits"

nav = mkdocs_gen_files.Nav()

for path in sorted(Path(PACKAGE).rglob("*.py")):
module_path = path.relative_to(PACKAGE).with_suffix("")
doc_path = path.relative_to(PACKAGE).with_suffix(".md")
full_doc_path = Path("reference", doc_path)

parts = list(module_path.parts)

if parts[-1] == "__init__" or parts[-1] == "__main__":
continue

nav[parts] = doc_path.as_posix()

with mkdocs_gen_files.open(full_doc_path, "w") as fd:
identifier = ".".join(parts)
fd.write("::: " + identifier)

mkdocs_gen_files.set_edit_path(full_doc_path, Path("../") / path)

with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file: #
nav_file.writelines(nav.build_literate_nav())
1 change: 1 addition & 0 deletions ecologits/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .ecologits import EcoLogits

__all__ = ["EcoLogits"]
__version__ = "0.1.3"
30 changes: 30 additions & 0 deletions ecologits/ecologits.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,40 @@


class EcoLogits:
"""
EcoLogits instrumentor to initialize function patching for each provider.
By default, the initialization will be done on all available and compatible providers that are supported by the
library.
Examples:
EcoLogits initialization example with OpenAI.
```python
from ecologits import EcoLogits
from openai import OpenAI
EcoLogits.init()
client = OpenAI(api_key="<OPENAI_API_KEY>")
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": "Tell me a funny joke!"}
]
)
# Get estimated environmental impacts of the inference
print(f"Energy consumption: {response.impacts.energy.value} kWh")
print(f"GHG emissions: {response.impacts.gwp.value} kgCO2eq")
```
"""

initialized = False

@staticmethod
def init() -> None:
"""Initialization static method."""
if not EcoLogits.initialized:
init_instruments()
EcoLogits.initialized = True
Expand Down
199 changes: 199 additions & 0 deletions ecologits/impacts/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ def gpu_energy(
gpu_energy_alpha: float,
gpu_energy_beta: float
) -> float:
"""
Compute energy consumption of a single GPU.
Args:
model_active_parameter_count: Number of active parameters of the model.
output_token_count: Number of generated tokens.
gpu_energy_alpha: Alpha parameter of the GPU linear power consumption profile.
gpu_energy_beta: Beta parameter of the GPU linear power consumption profile.
Returns:
The energy consumption of a single GPU.
"""
return output_token_count * (gpu_energy_alpha * model_active_parameter_count + gpu_energy_beta)


Expand All @@ -52,6 +64,19 @@ def generation_latency(
gpu_latency_beta: float,
request_latency: float,
) -> float:
"""
Compute the token generation latency.
Args:
model_active_parameter_count: Number of active parameters of the model.
output_token_count: Number of generated tokens.
gpu_latency_alpha: Alpha parameter of the GPU linear latency profile.
gpu_latency_beta: Beta parameter of the GPU linear latency profile.
request_latency: Measured request latency (upper bound).
Returns:
The token generation latency.
"""
gpu_latency = output_token_count * (gpu_latency_alpha * model_active_parameter_count + gpu_latency_beta)
return min(gpu_latency, request_latency)

Expand All @@ -61,6 +86,16 @@ def model_required_memory(
model_total_parameter_count: float,
model_quantization_bits: int,
) -> float:
"""
Compute the required memory to load the model on GPU.
Args:
model_total_parameter_count: Number of parameters of the model.
model_quantization_bits: Number of bits used to represent the model weights.
Returns:
The amount of required GPU memory to load the model.
"""
return 1.2 * model_total_parameter_count * model_quantization_bits / 8


Expand All @@ -69,6 +104,16 @@ def gpu_required_count(
model_required_memory: float,
gpu_memory: float
) -> int:
"""
Compute the number of required GPU to store the model.
Args:
model_required_memory: Required memory to load the model on GPU.
gpu_memory: Amount of memory available on a single GPU.
Returns:
The number of required GPUs to load the model.
"""
return ceil(model_required_memory / gpu_memory)


Expand All @@ -79,6 +124,18 @@ def server_energy(
server_gpu_count: int,
gpu_required_count: int
) -> float:
"""
Compute the energy consumption of the server.
Args:
generation_latency: Token generation latency.
server_power: Power consumption of the server.
server_gpu_count: Number of available GPUs in the server.
gpu_required_count: Number of required GPUs to load the model.
Returns:
The energy consumption of the server (GPUs are not included).
"""
return generation_latency * server_power * (gpu_required_count / server_gpu_count)


Expand All @@ -89,6 +146,18 @@ def request_energy(
gpu_required_count: int,
gpu_energy: float
) -> float:
"""
Compute the energy consumption of the request.
Args:
datacenter_pue: PUE of the datacenter.
server_energy: Energy consumption of the server.
gpu_required_count: Number of required GPUs to load the model.
gpu_energy: Energy consumption of a single GPU.
Returns:
The energy consumption of the request.
"""
return datacenter_pue * (server_energy + gpu_required_count * gpu_energy)


Expand All @@ -97,6 +166,16 @@ def request_usage_gwp(
request_energy: float,
if_electricity_mix_gwp: float
) -> float:
"""
Compute the Global Warming Potential (GWP) usage impact of the request.
Args:
request_energy: Energy consumption of the request.
if_electricity_mix_gwp: GWP impact factor of electricity consumption.
Returns:
The GWP usage impact of the request.
"""
return request_energy * if_electricity_mix_gwp


Expand All @@ -105,6 +184,16 @@ def request_usage_adpe(
request_energy: float,
if_electricity_mix_adpe: float
) -> float:
"""
Compute the Abiotic Depletion Potential for Elements (ADPe) usage impact of the request.
Args:
request_energy: Energy consumption of the request.
if_electricity_mix_adpe: ADPe impact factor of electricity consumption.
Returns:
The ADPe usage impact of the request.
"""
return request_energy * if_electricity_mix_adpe


Expand All @@ -113,6 +202,16 @@ def request_usage_pe(
request_energy: float,
if_electricity_mix_pe: float
) -> float:
"""
Compute the Primary Energy (PE) usage impact of the request.
Args:
request_energy: Energy consumption of the request.
if_electricity_mix_pe: PE impact factor of electricity consumption.
Returns:
The PE usage impact of the request.
"""
return request_energy * if_electricity_mix_pe


Expand All @@ -123,6 +222,18 @@ def server_gpu_embodied_gwp(
gpu_embodied_gwp: float,
gpu_required_count: int
) -> float:
"""
Compute the Global Warming Potential (GWP) embodied impact of the server
Args:
server_embodied_gwp: GWP embodied impact of the server.
server_gpu_count: Number of available GPUs in the server.
gpu_embodied_gwp: GWP embodied impact of a single GPU.
gpu_required_count: Number of required GPUs to load the model.
Returns:
The GWP embodied impact of the server and the GPUs.
"""
return (gpu_required_count / server_gpu_count) * server_embodied_gwp + gpu_required_count * gpu_embodied_gwp


Expand All @@ -133,6 +244,18 @@ def server_gpu_embodied_adpe(
gpu_embodied_adpe: float,
gpu_required_count: int
) -> float:
"""
Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the server
Args:
server_embodied_adpe: ADPe embodied impact of the server.
server_gpu_count: Number of available GPUs in the server.
gpu_embodied_adpe: ADPe embodied impact of a single GPU.
gpu_required_count: Number of required GPUs to load the model.
Returns:
The ADPe embodied impact of the server and the GPUs.
"""
return (gpu_required_count / server_gpu_count) * server_embodied_adpe + gpu_required_count * gpu_embodied_adpe


Expand All @@ -143,6 +266,18 @@ def server_gpu_embodied_pe(
gpu_embodied_pe: float,
gpu_required_count: int
) -> float:
"""
Compute the Primary Energy (PE) embodied impact of the server
Args:
server_embodied_pe: PE embodied impact of the server.
server_gpu_count: Number of available GPUs in the server.
gpu_embodied_pe: PE embodied impact of a single GPU.
gpu_required_count: Number of required GPUs to load the model.
Returns:
The PE embodied impact of the server and the GPUs.
"""
return (gpu_required_count / server_gpu_count) * server_embodied_pe + gpu_required_count * gpu_embodied_pe


Expand All @@ -152,6 +287,17 @@ def request_embodied_gwp(
server_lifetime: float,
generation_latency: float
) -> float:
"""
Compute the Global Warming Potential (GWP) embodied impact of the request.
Args:
server_gpu_embodied_gwp: GWP embodied impact of the server and the GPUs.
server_lifetime: Lifetime duration of the server.
generation_latency: Token generation latency.
Returns:
The GWP embodied impact of the request.
"""
return (generation_latency / server_lifetime) * server_gpu_embodied_gwp


Expand All @@ -161,6 +307,17 @@ def request_embodied_adpe(
server_lifetime: float,
generation_latency: float
) -> float:
"""
Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the request.
Args:
server_gpu_embodied_adpe: ADPe embodied impact of the server and the GPUs.
server_lifetime: Lifetime duration of the server.
generation_latency: Token generation latency.
Returns:
The ADPe embodied impact of the request.
"""
return (generation_latency / server_lifetime) * server_gpu_embodied_adpe


Expand All @@ -170,6 +327,17 @@ def request_embodied_pe(
server_lifetime: float,
generation_latency: float
) -> float:
"""
Compute the Primary Energy (PE) embodied impact of the request.
Args:
server_gpu_embodied_pe: PE embodied impact of the server and the GPUs.
server_lifetime: Lifetime duration of the server.
generation_latency: Token generation latency.
Returns:
The PE embodied impact of the request.
"""
return (generation_latency / server_lifetime) * server_gpu_embodied_pe


Expand Down Expand Up @@ -198,6 +366,37 @@ def compute_llm_impacts(
if_electricity_mix_adpe: Optional[float] = IF_ELECTRICITY_MIX_ADPE,
if_electricity_mix_pe: Optional[float] = IF_ELECTRICITY_MIX_PE,
) -> Impacts:
"""
Compute the impacts of an LLM generation request.
Args:
model_active_parameter_count: Number of active parameters of the model.
model_total_parameter_count: Number of parameters of the model.
output_token_count: Number of generated tokens.
request_latency: Measured request latency.
model_quantization_bits: Number of bits used to represent the model weights.
gpu_energy_alpha: Alpha parameter of the GPU linear power consumption profile.
gpu_energy_beta: Beta parameter of the GPU linear power consumption profile.
gpu_latency_alpha: Alpha parameter of the GPU linear latency profile.
gpu_latency_beta: Beta parameter of the GPU linear latency profile.
gpu_memory: Amount of memory available on a single GPU.
gpu_embodied_gwp: GWP embodied impact of a single GPU.
gpu_embodied_adpe: ADPe embodied impact of a single GPU.
gpu_embodied_pe: PE embodied impact of a single GPU.
server_gpu_count: Number of available GPUs in the server.
server_power: Power consumption of the server.
server_embodied_gwp: GWP embodied impact of the server.
server_embodied_adpe: ADPe embodied impact of the server.
server_embodied_pe: PE embodied impact of the server.
server_lifetime: Lifetime duration of the server.
datacenter_pue: PUE of the datacenter.
if_electricity_mix_gwp: GWP impact factor of electricity consumption.
if_electricity_mix_adpe: ADPe impact factor of electricity consumption.
if_electricity_mix_pe: PE impact factor of electricity consumption.
Returns:
The impacts of an LLM generation request.
"""
results = dag.execute(
model_active_parameter_count=model_active_parameter_count,
model_total_parameter_count=model_total_parameter_count,
Expand Down
Loading

0 comments on commit dafaa13

Please sign in to comment.