Skip to content

Commit

Permalink
feat: nested cli sub-commands for plots
Browse files Browse the repository at this point in the history
  • Loading branch information
nishaq503 committed Nov 10, 2023
1 parent 235ed63 commit 86132c5
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 80 deletions.
2 changes: 2 additions & 0 deletions cakes-results/python/cakes_results/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
"""Plots and analysis of the results of the Cakes project."""

from . import scaling # noqa: F401
51 changes: 2 additions & 49 deletions cakes-results/python/cakes_results/__main__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
"""Provides the CLI for the Image Calculator plugin."""

import concurrent.futures
import logging
import pathlib

import tqdm
import typer

from cakes_results import scaling_plots
from cakes_results import scaling

# Initialize the logger
logging.basicConfig(
Expand All @@ -18,51 +15,7 @@
logger.setLevel("INFO")

app = typer.Typer()


@app.command()
def main(
input_dir: pathlib.Path = typer.Option(
...,
"--input-dir",
"-i",
help="The directory containing the reports from the scaling experiments.",
exists=True,
readable=True,
file_okay=False,
resolve_path=True,
),
output_dir: pathlib.Path = typer.Option(
...,
"--output-dir",
"-o",
help="The directory to save the plots.",
exists=True,
writable=True,
file_okay=False,
resolve_path=True,
),
) -> None:
"""Create the plots for the scaling results of the Cakes search."""
logger.info(f"input_dir = {input_dir}")
logger.info(f"output_dir = {output_dir}")

files = list(input_dir.glob("*.json"))
logger.info(f"Found {len(files)} json files.")

with concurrent.futures.ProcessPoolExecutor() as executor:
futures: list[concurrent.futures.Future[bool]] = []
for f in files:
futures.append(
executor.submit(scaling_plots.plot_throughput, f, False, output_dir),
)

for f in tqdm.tqdm(
concurrent.futures.as_completed(futures),
total=len(futures),
desc="Processing files",
):
f.result() # type: ignore[attr-defined]
app.add_typer(scaling.app, name="scaling")


if __name__ == "__main__":
Expand Down
5 changes: 5 additions & 0 deletions cakes-results/python/cakes_results/scaling/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Plots and analysis of the search scaling results for Cakes."""

from . import reports # noqa: F401
from .plots import create_plots # noqa: F401
from .app import app # noqa: F401
61 changes: 61 additions & 0 deletions cakes-results/python/cakes_results/scaling/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""CLI command to create the plots for the scaling results of the Cakes search."""

import concurrent.futures
import logging
import pathlib

import tqdm
import typer

from . import create_plots as _create_plots

# Initialize the logger
logger = logging.getLogger("scaling")
logger.setLevel("INFO")

app = typer.Typer()


@app.command()
def create_plots(
input_dir: pathlib.Path = typer.Option(
...,
"--input-dir",
"-i",
help="The directory containing the reports from the scaling experiments.",
exists=True,
readable=True,
file_okay=False,
resolve_path=True,
),
output_dir: pathlib.Path = typer.Option(
...,
"--output-dir",
"-o",
help="The directory to save the plots.",
exists=True,
writable=True,
file_okay=False,
resolve_path=True,
),
) -> None:
"""Create the plots for the scaling results of the Cakes search."""
logger.info(f"input_dir = {input_dir}")
logger.info(f"output_dir = {output_dir}")

files = list(input_dir.glob("*.json"))
logger.info(f"Found {len(files)} json files.")

with concurrent.futures.ProcessPoolExecutor() as executor:
futures: list[concurrent.futures.Future[bool]] = []
for f in files:
futures.append(
executor.submit(_create_plots, f, False, output_dir),
)

for f in tqdm.tqdm(
concurrent.futures.as_completed(futures),
total=len(futures),
desc="Processing files",
):
f.result() # type: ignore[attr-defined]
Original file line number Diff line number Diff line change
@@ -1,38 +1,9 @@
"""Plots for the scaling results of the Cakes scaling benchmarks."""

import enum
import json
import pathlib
import typing

import matplotlib.pyplot as plt
import pandas
import pydantic


class Report(pydantic.BaseModel):
"""Report of the scaling results of the Cakes search."""

dataset: str
metric: str
base_cardinality: int
dimensionality: int
num_queries: int
error_rate: float
ks: list[int]
csv_path: pathlib.Path = pathlib.Path(".").resolve()

@staticmethod
def from_json(json_path: pathlib.Path) -> "Report":
"""Load the report from a JSON file."""
with json_path.open("r") as json_file:
contents: dict[str, typing.Any] = json.load(json_file)
contents["csv_path"] = json_path.parent.joinpath(contents.pop("csv_name"))
return Report(**contents)

def to_pandas(self) -> pandas.DataFrame:
"""Read the CSV file into a pandas DataFrame."""
return pandas.read_csv(self.csv_path)
from .reports import Report


class Markers(str, enum.Enum):
Expand Down Expand Up @@ -71,7 +42,7 @@ def marker(self) -> str:
return m


def plot_throughput(
def create_plots(
json_path: pathlib.Path, make_title: bool, output_dir: pathlib.Path
) -> bool:
"""Plot the throughput of the Cakes search."""
Expand Down
33 changes: 33 additions & 0 deletions cakes-results/python/cakes_results/scaling/reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Parser for the scaling results of the Cakes search."""

import json
import pathlib
import typing

import pandas
import pydantic


class Report(pydantic.BaseModel):
"""Report of the scaling results of the Cakes search."""

dataset: str
metric: str
base_cardinality: int
dimensionality: int
num_queries: int
error_rate: float
ks: list[int]
csv_path: pathlib.Path = pathlib.Path(".").resolve()

@staticmethod
def from_json(json_path: pathlib.Path) -> "Report":
"""Load the report from a JSON file."""
with json_path.open("r") as json_file:
contents: dict[str, typing.Any] = json.load(json_file)
contents["csv_path"] = json_path.parent.joinpath(contents.pop("csv_name"))
return Report(**contents)

def to_pandas(self) -> pandas.DataFrame:
"""Read the CSV file into a pandas DataFrame."""
return pandas.read_csv(self.csv_path)

0 comments on commit 86132c5

Please sign in to comment.