From a35909580b97a4cfcbd1b802c5425f1af36edfad Mon Sep 17 00:00:00 2001 From: Keith Ralphs Date: Tue, 24 Sep 2024 13:44:13 +0000 Subject: [PATCH 01/12] Rebase against pinned dependencies --- .devcontainer/devcontainer.json | 4 + .github/pages/make_switcher.py | 2 +- Dockerfile | 10 ++ helm/blueapi/values.yaml | 40 +++++-- pyproject.toml | 1 + src/blueapi/cli/cli.py | 3 + src/blueapi/client/client.py | 26 +++++ src/blueapi/client/rest.py | 11 +- src/blueapi/config.py | 18 +++ src/blueapi/service/interface.py | 20 ++++ src/blueapi/service/main.py | 47 ++++++++ src/blueapi/service/runner.py | 38 ++++++- src/blueapi/worker/task_worker.py | 118 +++++++++++++++++--- tests/unit_tests/worker/test_task_worker.py | 28 +++-- 14 files changed, 327 insertions(+), 39 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 79b85ff41..266ebe128 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -39,6 +39,10 @@ // Make sure SELinux does not disable with access to host filesystems like tmp "--security-opt=label=disable" ], + // Remove this before pusing, only to allow local testing with editable library + "mounts": [ + "source=/scratch/athena/observability-utils,target=/scratch/athena/observability-utils,type=bind,consistency=cached" + ], // Mount the parent as /workspaces so we can pip install peers as editable "workspaceMount": "source=${localWorkspaceFolder}/..,target=/workspaces,type=bind", // After the container is created, install the python project in editable form diff --git a/.github/pages/make_switcher.py b/.github/pages/make_switcher.py index 6d90f4905..14577cce6 100755 --- a/.github/pages/make_switcher.py +++ b/.github/pages/make_switcher.py @@ -52,7 +52,7 @@ def get_versions(ref: str, add: str | None) -> list[str]: return versions -def write_json(path: Path, repository: str, versions: str): +def write_json(path: Path, repository: str, versions: list[str]): org, repo_name = repository.split("/") struct = [ {"version": version, "url": f"https://{org}.github.io/{repo_name}/{version}/"} diff --git a/Dockerfile b/Dockerfile index dd1a2398f..7ec1b29f5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ RUN python -m venv /venv ENV PATH=/venv/bin:$PATH +# enable opentelemetry support +ENV OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http/protobuf +# Chnage this to point to Jaeger server before merging +ENV OTEL_EXPORTER_OTLP_ENDPOINT=http://127.0.0.1:4318 +# Change this to enable proper secrity before merging +ENV OTEL_EXPORTER_OTLP_INSECURE=true +# Ensure that all Http headers are captured +ENV OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST=".*" +ENV OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE=".*" + # The build stage installs the context into the venv FROM developer as build COPY . /context diff --git a/helm/blueapi/values.yaml b/helm/blueapi/values.yaml index d32e9f1eb..feb7adccf 100644 --- a/helm/blueapi/values.yaml +++ b/helm/blueapi/values.yaml @@ -76,13 +76,35 @@ listener: resources: {} # Additional envVars to mount to the pod as a String -extraEnvVars: [] +extraEnvVars: | + - name: OTEL_EXPORTER_OTLP_TRACES_PROTOCOL + value: {{ .Values.jaeger.otlp.protocol }} + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "{{ .Values.jaeger.otlp.host }}:{{ .Values.jaeger.otlp.port }}" + - name: OTEL_EXPORTER_OTLP_INSECURE + value: "{{ .Values.jaeger.otlp.insecure }}" + - name: OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST + value: {{ .Values.jaeger.otlp.request.headers }} + - name: OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE + value: {{ .Values.jaeger.otlp.response.headers }} # - name: RABBITMQ_PASSWORD # valueFrom: # secretKeyRef: # name: rabbitmq-password # key: rabbitmq-password +jaeger: + otlp: + protocol: http/protobuf + insecure: true + # + host: http://localhost + port: 4318 + request: + headers: ".*" + response: + headers: ".*" + # Config for the worker goes here, will be mounted into a config file worker: api: @@ -90,14 +112,14 @@ worker: port: 8000 env: sources: - - kind: deviceFunctions - module: blueapi.startup.example_devices - - kind: planFunctions - module: blueapi.startup.example_plans - - kind: planFunctions - module: dls_bluesky_core.plans - - kind: planFunctions - module: dls_bluesky_core.stubs + - kind: deviceFunctions + module: blueapi.startup.example_devices + - kind: planFunctions + module: blueapi.startup.example_plans + - kind: planFunctions + module: dls_bluesky_core.plans + - kind: planFunctions + module: dls_bluesky_core.stubs stomp: auth: username: guest diff --git a/pyproject.toml b/pyproject.toml index c55616ba4..aa6bbd4fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "GitPython", "bluesky-stomp>=0.1.2", "event-model==1.21", # https://github.com/DiamondLightSource/blueapi/issues/684 + "observability-utils>=0.1.2", ] dynamic = ["version"] license.file = "LICENSE" diff --git a/src/blueapi/cli/cli.py b/src/blueapi/cli/cli.py index e898c5b81..0fb105684 100644 --- a/src/blueapi/cli/cli.py +++ b/src/blueapi/cli/cli.py @@ -9,6 +9,7 @@ from bluesky.callbacks.best_effort import BestEffortCallback from bluesky_stomp.messaging import MessageContext, StompClient from bluesky_stomp.models import Broker +from observability_utils.tracing import setup_tracing from pydantic import ValidationError from requests.exceptions import ConnectionError @@ -41,6 +42,7 @@ def main(ctx: click.Context, config: Path | None | tuple[Path, ...]) -> None: # if no command is supplied, run with the options passed + setup_tracing("BlueAPI") # initialise TracerProvider for server app config_loader = ConfigLoader(ApplicationConfig) if config is not None: configs = (config,) if isinstance(config, Path) else config @@ -103,6 +105,7 @@ def start_application(obj: dict): def controller(ctx: click.Context, output: str) -> None: """Client utility for controlling and introspecting the worker""" + setup_tracing("BlueAPICLI") # initialise TracerProvider for controller app if ctx.invoked_subcommand is None: print("Please invoke subcommand!") return diff --git a/src/blueapi/client/client.py b/src/blueapi/client/client.py index 4468bf10e..486f3b86d 100644 --- a/src/blueapi/client/client.py +++ b/src/blueapi/client/client.py @@ -3,6 +3,10 @@ from bluesky_stomp.messaging import MessageContext, StompClient from bluesky_stomp.models import Broker +from observability_utils.tracing import ( + get_tracer, + start_as_current_span, +) from blueapi.config import ApplicationConfig from blueapi.core.bluesky_types import DataEvent @@ -22,6 +26,8 @@ from .event_bus import AnyEvent, BlueskyStreamingError, EventBusClient, OnAnyEvent from .rest import BlueapiRestClient, BlueskyRemoteControlError +TRACER = get_tracer("client") + class BlueapiClient: """Unified client for controlling blueapi""" @@ -53,6 +59,7 @@ def from_config(cls, config: ApplicationConfig) -> "BlueapiClient": events = None return cls(rest, events) + @start_as_current_span(TRACER) def get_plans(self) -> PlanResponse: """ List plans available @@ -62,6 +69,7 @@ def get_plans(self) -> PlanResponse: """ return self._rest.get_plans() + @start_as_current_span(TRACER, "name") def get_plan(self, name: str) -> PlanModel: """ Get details of a single plan @@ -74,6 +82,7 @@ def get_plan(self, name: str) -> PlanModel: """ return self._rest.get_plan(name) + @start_as_current_span(TRACER) def get_devices(self) -> DeviceResponse: """ List devices available @@ -84,6 +93,7 @@ def get_devices(self) -> DeviceResponse: return self._rest.get_devices() + @start_as_current_span(TRACER, "name") def get_device(self, name: str) -> DeviceModel: """ Get details of a single device @@ -97,6 +107,7 @@ def get_device(self, name: str) -> DeviceModel: return self._rest.get_device(name) + @start_as_current_span(TRACER) def get_state(self) -> WorkerState: """ Get current state of the blueapi worker @@ -107,6 +118,7 @@ def get_state(self) -> WorkerState: return self._rest.get_state() + @start_as_current_span(TRACER, "defer") def pause(self, defer: bool = False) -> WorkerState: """ Pause execution of the current task, if any @@ -122,6 +134,7 @@ def pause(self, defer: bool = False) -> WorkerState: return self._rest.set_state(WorkerState.PAUSED, defer=defer) + @start_as_current_span(TRACER) def resume(self) -> WorkerState: """ Resume plan execution if previously paused @@ -133,6 +146,7 @@ def resume(self) -> WorkerState: return self._rest.set_state(WorkerState.RUNNING, defer=False) + @start_as_current_span(TRACER, "task_id") def get_task(self, task_id: str) -> TrackableTask[Task]: """ Get a task stored by the worker @@ -146,6 +160,7 @@ def get_task(self, task_id: str) -> TrackableTask[Task]: assert task_id, "Task ID not provided!" return self._rest.get_task(task_id) + @start_as_current_span(TRACER) def get_all_tasks(self) -> TasksListResponse: """ Get a list of all task stored by the worker @@ -156,6 +171,7 @@ def get_all_tasks(self) -> TasksListResponse: return self._rest.get_all_tasks() + @start_as_current_span(TRACER) def get_active_task(self) -> WorkerTask: """ Get the currently active task, if any @@ -167,6 +183,7 @@ def get_active_task(self) -> WorkerTask: return self._rest.get_active_task() + @start_as_current_span(TRACER, "task", "timeout") def run_task( self, task: Task, @@ -229,6 +246,7 @@ def inner_on_event(event: AnyEvent, ctx: MessageContext) -> None: self.start_task(WorkerTask(task_id=task_id)) return complete.result(timeout=timeout) + @start_as_current_span(TRACER, "task") def create_and_start_task(self, task: Task) -> TaskResponse: """ Create a new task and instruct the worker to start it @@ -251,6 +269,7 @@ def create_and_start_task(self, task: Task) -> TaskResponse: f"but {worker_response.task_id} was started instead" ) + @start_as_current_span(TRACER, "task") def create_task(self, task: Task) -> TaskResponse: """ Create a new task, does not start execution @@ -264,6 +283,7 @@ def create_task(self, task: Task) -> TaskResponse: return self._rest.create_task(task) + @start_as_current_span(TRACER) def clear_task(self, task_id: str) -> TaskResponse: """ Delete a stored task on the worker @@ -277,6 +297,7 @@ def clear_task(self, task_id: str) -> TaskResponse: return self._rest.clear_task(task_id) + @start_as_current_span(TRACER, "task") def start_task(self, task: WorkerTask) -> WorkerTask: """ Instruct the worker to start a stored task immediately @@ -290,6 +311,7 @@ def start_task(self, task: WorkerTask) -> WorkerTask: return self._rest.update_worker_task(task) + @start_as_current_span(TRACER, "reason") def abort(self, reason: str | None = None) -> WorkerState: """ Abort the plan currently being executed, if any. @@ -310,6 +332,7 @@ def abort(self, reason: str | None = None) -> WorkerState: reason=reason, ) + @start_as_current_span(TRACER) def stop(self) -> WorkerState: """ Stop execution of the current plan early. @@ -323,6 +346,7 @@ def stop(self) -> WorkerState: return self._rest.cancel_current_task(WorkerState.STOPPING) + @start_as_current_span(TRACER) def get_environment(self) -> EnvironmentResponse: """ Get details of the worker environment @@ -334,6 +358,7 @@ def get_environment(self) -> EnvironmentResponse: return self._rest.get_environment() + @start_as_current_span(TRACER, "timeout", "polling_interval") def reload_environment( self, timeout: float | None = None, @@ -366,6 +391,7 @@ def reload_environment( polling_interval, ) + @start_as_current_span(TRACER, "timeout", "polling_interval") def _wait_for_reload( self, status: EnvironmentResponse, diff --git a/src/blueapi/client/rest.py b/src/blueapi/client/rest.py index 2ec60a1c7..274bc72cb 100644 --- a/src/blueapi/client/rest.py +++ b/src/blueapi/client/rest.py @@ -2,6 +2,11 @@ from typing import Any, Literal, TypeVar import requests +from observability_utils.tracing import ( + get_context_propagator, + get_tracer, + start_as_current_span, +) from pydantic import TypeAdapter from blueapi.config import RestConfig @@ -19,6 +24,8 @@ T = TypeVar("T") +TRACER = get_tracer("rest") + class BlueskyRemoteControlError(Exception): def __init__(self, message: str) -> None: @@ -118,6 +125,7 @@ def delete_environment(self) -> EnvironmentResponse: "/environment", EnvironmentResponse, method="DELETE" ) + @start_as_current_span(TRACER, "method", "data", "suffix") def _request_and_deserialize( self, suffix: str, @@ -127,8 +135,9 @@ def _request_and_deserialize( get_exception: Callable[[requests.Response], Exception | None] = _exception, ) -> T: url = self._url(suffix) + carr = get_context_propagator() if data: - response = requests.request(method, url, json=data) + response = requests.request(method, url, json=data, headers=carr) else: response = requests.request(method, url) exception = get_exception(response) diff --git a/src/blueapi/config.py b/src/blueapi/config.py index 16e470c0d..baea502ec 100644 --- a/src/blueapi/config.py +++ b/src/blueapi/config.py @@ -23,6 +23,24 @@ class Source(BaseModel): module: Path | str +# class BasicAuthentication(BaseModel): +# """ +# Log in details for when a server uses authentication. +# If username or passcode match exactly the regex ^\\${(.*)}$ +# they attempt to replace with an environment variable of the same. +# i.e. ${foo} or ${FOO} are replaced with the value of FOO +# """ + +# username: str = "test" # "guest" +# passcode: str = "test" # "guest" + +# @validator("username", "passcode") +# def get_from_env(cls, v: str): +# if v.startswith("${") and v.endswith("}"): +# return os.environ[v.removeprefix("${").removesuffix("}").upper()] +# return v + + class StompConfig(BaseModel): """ Config for connecting to stomp broker diff --git a/src/blueapi/service/interface.py b/src/blueapi/service/interface.py index 2fe820abb..739384fcb 100644 --- a/src/blueapi/service/interface.py +++ b/src/blueapi/service/interface.py @@ -5,6 +5,7 @@ from bluesky_stomp.messaging import StompClient from bluesky_stomp.models import Broker, DestinationBase, MessageTopic +from observability_utils.tracing import get_tracer, use_propagated_context from blueapi.config import ApplicationConfig from blueapi.core.context import BlueskyContext @@ -17,6 +18,8 @@ """This module provides interface between web application and underlying Bluesky context and worker""" +TRACER = get_tracer("interface") +PROP_KEY = "carrier" _CONFIG: ApplicationConfig = ApplicationConfig() @@ -74,6 +77,7 @@ def stomp_client() -> StompClient | None: return None +@use_propagated_context def setup(config: ApplicationConfig) -> None: """Creates and starts a worker with supplied config""" @@ -86,6 +90,7 @@ def setup(config: ApplicationConfig) -> None: stomp_client() +@use_propagated_context def teardown() -> None: worker().stop() if (template := stomp_client()) is not None: @@ -110,36 +115,43 @@ def forward_message(event: Any, correlation_id: str | None) -> None: stream.subscribe(forward_message) +@use_propagated_context def get_plans() -> list[PlanModel]: """Get all available plans in the BlueskyContext""" return [PlanModel.from_plan(plan) for plan in context().plans.values()] +@use_propagated_context def get_plan(name: str) -> PlanModel: """Get plan by name from the BlueskyContext""" return PlanModel.from_plan(context().plans[name]) +@use_propagated_context def get_devices() -> list[DeviceModel]: """Get all available devices in the BlueskyContext""" return [DeviceModel.from_device(device) for device in context().devices.values()] +@use_propagated_context def get_device(name: str) -> DeviceModel: """Retrieve device by name from the BlueskyContext""" return DeviceModel.from_device(context().devices[name]) +@use_propagated_context def submit_task(task: Task) -> str: """Submit a task to be run on begin_task""" return worker().submit_task(task) +@use_propagated_context def clear_task(task_id: str) -> str: """Remove a task from the worker""" return worker().clear_task(task_id) +@use_propagated_context def begin_task(task: WorkerTask) -> WorkerTask: """Trigger a task. Will fail if the worker is busy""" if task.task_id is not None: @@ -147,43 +159,51 @@ def begin_task(task: WorkerTask) -> WorkerTask: return task +@use_propagated_context def get_tasks_by_status(status: TaskStatusEnum) -> list[TrackableTask]: """Retrieve a list of tasks based on their status.""" return worker().get_tasks_by_status(status) +@use_propagated_context def get_active_task() -> TrackableTask | None: """Task the worker is currently running""" return worker().get_active_task() +@use_propagated_context def get_worker_state() -> WorkerState: """State of the worker""" return worker().state +@use_propagated_context def pause_worker(defer: bool | None) -> None: """Command the worker to pause""" worker().pause(defer or False) +@use_propagated_context def resume_worker() -> None: """Command the worker to resume""" worker().resume() +@use_propagated_context def cancel_active_task(failure: bool, reason: str | None) -> str: """Remove the currently active task from the worker if there is one Returns the task_id of the active task""" return worker().cancel_active_task(failure, reason) +@use_propagated_context def get_tasks() -> list[TrackableTask]: """Return a list of all tasks on the worker, any one of which can be triggered with begin_task""" return worker().get_tasks() +@use_propagated_context def get_task_by_id(task_id: str) -> TrackableTask | None: """Returns a task matching the task ID supplied, if the worker knows of it""" diff --git a/src/blueapi/service/main.py b/src/blueapi/service/main.py index 2f4651f6a..ce9c16fe1 100644 --- a/src/blueapi/service/main.py +++ b/src/blueapi/service/main.py @@ -10,6 +10,14 @@ Response, status, ) +from observability_utils.tracing import ( + add_span_attributes, + get_tracer, + start_as_current_span, +) +from opentelemetry.context import attach +from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor +from opentelemetry.propagate import get_global_textmap from pydantic import ValidationError from starlette.responses import JSONResponse from super_state_machine.errors import TransitionError @@ -36,6 +44,8 @@ RUNNER: WorkerDispatcher | None = None +CONTEXT_HDR = "traceparent" + def _runner() -> WorkerDispatcher: """Intended to be used only with FastAPI Depends""" @@ -75,6 +85,13 @@ async def lifespan(app: FastAPI): version=REST_API_VERSION, ) +FastAPIInstrumentor().instrument_app(app) +TRACER = get_tracer("API") +""" +Set up basic automated instrumentation for the FastAPI app, creating the +observability context. +""" + @app.exception_handler(KeyError) async def on_key_error_404(_: Request, __: KeyError): @@ -85,6 +102,7 @@ async def on_key_error_404(_: Request, __: KeyError): @app.get("/environment", response_model=EnvironmentResponse) +@start_as_current_span(TRACER, "runner") def get_environment( runner: WorkerDispatcher = Depends(_runner), ) -> EnvironmentResponse: @@ -93,6 +111,7 @@ def get_environment( @app.delete("/environment", response_model=EnvironmentResponse) +@start_as_current_span(TRACER, "background_tasks", "runner") async def delete_environment( background_tasks: BackgroundTasks, runner: WorkerDispatcher = Depends(_runner), @@ -105,6 +124,7 @@ async def delete_environment( @app.get("/plans", response_model=PlanResponse) +@start_as_current_span(TRACER) def get_plans(runner: WorkerDispatcher = Depends(_runner)): """Retrieve information about all available plans.""" return PlanResponse(plans=runner.run(interface.get_plans)) @@ -114,12 +134,14 @@ def get_plans(runner: WorkerDispatcher = Depends(_runner)): "/plans/{name}", response_model=PlanModel, ) +@start_as_current_span(TRACER, "name") def get_plan_by_name(name: str, runner: WorkerDispatcher = Depends(_runner)): """Retrieve information about a plan by its (unique) name.""" return runner.run(interface.get_plan, name) @app.get("/devices", response_model=DeviceResponse) +@start_as_current_span(TRACER) def get_devices(runner: WorkerDispatcher = Depends(_runner)): """Retrieve information about all available devices.""" return DeviceResponse(devices=runner.run(interface.get_devices)) @@ -129,6 +151,7 @@ def get_devices(runner: WorkerDispatcher = Depends(_runner)): "/devices/{name}", response_model=DeviceModel, ) +@start_as_current_span(TRACER, "name") def get_device_by_name(name: str, runner: WorkerDispatcher = Depends(_runner)): """Retrieve information about a devices by its (unique) name.""" return runner.run(interface.get_device, name) @@ -142,6 +165,7 @@ def get_device_by_name(name: str, runner: WorkerDispatcher = Depends(_runner)): response_model=TaskResponse, status_code=status.HTTP_201_CREATED, ) +@start_as_current_span(TRACER, "request", "task.name", "task.params") def submit_task( request: Request, response: Response, @@ -171,6 +195,7 @@ def submit_task( @app.delete("/tasks/{task_id}", status_code=status.HTTP_200_OK) +@start_as_current_span(TRACER, "task_id") def delete_submitted_task( task_id: str, runner: WorkerDispatcher = Depends(_runner), @@ -178,6 +203,7 @@ def delete_submitted_task( return TaskResponse(task_id=runner.run(interface.clear_task, task_id)) +@start_as_current_span(TRACER, "v") def validate_task_status(v: str) -> TaskStatusEnum: v_upper = v.upper() if v_upper not in TaskStatusEnum.__members__: @@ -186,6 +212,7 @@ def validate_task_status(v: str) -> TaskStatusEnum: @app.get("/tasks", response_model=TasksListResponse, status_code=status.HTTP_200_OK) +@start_as_current_span(TRACER) def get_tasks( task_status: str | None = None, runner: WorkerDispatcher = Depends(_runner), @@ -196,6 +223,7 @@ def get_tasks( """ tasks = [] if task_status: + add_span_attributes({"status": task_status}) try: desired_status = validate_task_status(task_status) except ValueError as e: @@ -215,6 +243,7 @@ def get_tasks( response_model=WorkerTask, responses={status.HTTP_409_CONFLICT: {"worker": "already active"}}, ) +@start_as_current_span(TRACER, "task.task_id") def set_active_task( task: WorkerTask, runner: WorkerDispatcher = Depends(_runner), @@ -234,6 +263,7 @@ def set_active_task( "/tasks/{task_id}", response_model=TrackableTask, ) +@start_as_current_span(TRACER, "task_id") def get_task( task_id: str, runner: WorkerDispatcher = Depends(_runner), @@ -246,6 +276,7 @@ def get_task( @app.get("/worker/task") +@start_as_current_span(TRACER) def get_active_task(runner: WorkerDispatcher = Depends(_runner)) -> WorkerTask: active = runner.run(interface.get_active_task) if active is not None: @@ -255,6 +286,7 @@ def get_active_task(runner: WorkerDispatcher = Depends(_runner)) -> WorkerTask: @app.get("/worker/state") +@start_as_current_span(TRACER) def get_state(runner: WorkerDispatcher = Depends(_runner)) -> WorkerState: """Get the State of the Worker""" return runner.run(interface.get_worker_state) @@ -283,6 +315,7 @@ def get_state(runner: WorkerDispatcher = Depends(_runner)) -> WorkerState: status.HTTP_202_ACCEPTED: {"detail": "Transition requested"}, }, ) +@start_as_current_span(TRACER, "state_change_request.new_state") def set_state( state_change_request: StateChangeRequest, response: Response, @@ -307,6 +340,7 @@ def set_state( """ current_state = runner.run(interface.get_worker_state) new_state = state_change_request.new_state + add_span_attributes({"current_state": current_state}) if ( current_state in _ALLOWED_TRANSITIONS and new_state in _ALLOWED_TRANSITIONS[current_state] @@ -330,6 +364,7 @@ def set_state( return runner.run(interface.get_worker_state) +@start_as_current_span(TRACER, "config") def start(config: ApplicationConfig): import uvicorn from uvicorn.config import LOGGING_CONFIG @@ -350,3 +385,15 @@ async def add_api_version_header(request: Request, call_next): response = await call_next(request) response.headers["X-API-Version"] = REST_API_VERSION return response + + +@app.middleware("http") +async def inject_propagated_observability_context(request: Request, call_next): + """Middleware to extract the any prorpagated observability context from the + HTTP headers and attatch it to the local one. + """ + if CONTEXT_HDR in request.headers: + ctx = get_global_textmap().extract({CONTEXT_HDR: request.headers[CONTEXT_HDR]}) + attach(ctx) + response = await call_next(request) + return response diff --git a/src/blueapi/service/runner.py b/src/blueapi/service/runner.py index e122bf7d7..b2efb9218 100644 --- a/src/blueapi/service/runner.py +++ b/src/blueapi/service/runner.py @@ -5,8 +5,14 @@ from importlib import import_module from multiprocessing import Pool, set_start_method from multiprocessing.pool import Pool as PoolClass -from typing import Any, ParamSpec, TypeVar - +from typing import Any, Concatenate, ParamSpec, TypeVar + +from observability_utils.tracing import ( + add_span_attributes, + get_context_propagator, + get_tracer, + start_as_current_span, +) from pydantic import TypeAdapter from blueapi.config import ApplicationConfig @@ -17,6 +23,7 @@ set_start_method("spawn", force=True) LOGGER = logging.getLogger(__name__) +TRACER = get_tracer("interface") P = ParamSpec("P") T = TypeVar("T") @@ -50,13 +57,18 @@ def __init__( initialized=False, ) + @start_as_current_span(TRACER) def reload(self): """Reload the subprocess to account for any changes in python modules""" self.stop() self.start() LOGGER.info("Runner reloaded") + @start_as_current_span(TRACER) def start(self): + add_span_attributes( + {"_use_subprocess": self._use_subprocess, "_config": self._config} + ) try: if self._use_subprocess: self._subprocess = Pool(initializer=_init_worker, processes=1) @@ -69,6 +81,7 @@ def start(self): ) LOGGER.exception(e) + @start_as_current_span(TRACER) def stop(self): try: self.run(teardown) @@ -86,18 +99,33 @@ def stop(self): ) LOGGER.exception(e) - def run(self, function: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T: + @start_as_current_span(TRACER, "function", "args", "kwargs") + def run( + self, + function: Callable[Concatenate[dict[str, Any], P], T], + *args: P.args, + **kwargs: P.kwargs, + ) -> T: + """Calls the supplied function, which is modified to accept a dict as it's new + first param, before being passed to the subprocess runner, or just run in place. + """ + add_span_attributes({"use_subprocess": self._use_subprocess}) if self._use_subprocess: return self._run_in_subprocess(function, *args, **kwargs) else: - return function(*args, **kwargs) + return function(get_context_propagator(), *args, **kwargs) + @start_as_current_span(TRACER, "function", "args", "kwargs") def _run_in_subprocess( self, function: Callable[P, T], *args: P.args, **kwargs: P.kwargs, ) -> T: + """Call the supplied function, prepending its parameter list with the current + Span ID from the observability context, if one exists. this will allow functions + decorated with @use_propagated_context to use the corresponding span as their + parent span.""" if self._subprocess is None: raise InvalidRunnerStateError("Subprocess runner has not been started") if not (hasattr(function, "__name__") and hasattr(function, "__module__")): @@ -115,7 +143,7 @@ def _run_in_subprocess( function.__module__, function.__name__, return_type, - *args, + (get_context_propagator(), *args), ), kwargs, ) diff --git a/src/blueapi/worker/task_worker.py b/src/blueapi/worker/task_worker.py index 3f41bd22f..9ae907901 100644 --- a/src/blueapi/worker/task_worker.py +++ b/src/blueapi/worker/task_worker.py @@ -9,6 +9,16 @@ from typing import Any, Generic, TypeVar from bluesky.protocols import Status +from observability_utils.tracing import ( + add_span_attributes, + get_trace_context, + get_tracer, + setup_tracing, + start_as_current_span, +) +from opentelemetry.baggage import get_baggage +from opentelemetry.context import Context +from opentelemetry.trace import SpanKind from pydantic import Field from super_state_machine.errors import TransitionError @@ -36,6 +46,8 @@ from .worker_errors import WorkerAlreadyStartedError, WorkerBusyError LOGGER = logging.getLogger(__name__) +TRACER = get_tracer("reworker") +""" Initialise a Tracer for this module provided by the app's global TracerProvider. """ DEFAULT_START_STOP_TIMEOUT: float = 30.0 @@ -48,6 +60,7 @@ class TrackableTask(BlueapiBaseModel, Generic[T]): """ task_id: str + request_id: str = "" task: T is_complete: bool = False is_pending: bool = True @@ -83,6 +96,7 @@ class TaskWorker: _started: Event _stopping: Event _stopped: Event + _context_register: dict[str, Context] def __init__( self, @@ -111,15 +125,20 @@ def __init__( self._stopped = Event() self._stopped.set() self._broadcast_statuses = broadcast_statuses + self._context_register = {} + setup_tracing("BlueAPIWorker") - def clear_task(self, task_id: str) -> str: + @start_as_current_span(TRACER, "task_id") + def clear_task(self, task_id: str, carr: dict[str, Any] = None) -> str: task = self._tasks.pop(task_id) return task.task_id + @start_as_current_span(TRACER) def cancel_active_task( self, failure: bool = False, reason: str | None = None, + carr: dict[str, Any] = None, ) -> str: if self._current is None: # Persuades mypy that self._current is not None @@ -127,17 +146,26 @@ def cancel_active_task( raise TransitionError("Attempted to cancel while no active Task") if failure: self._ctx.run_engine.abort(reason) + add_span_attributes({"Task aborted": reason}) else: self._ctx.run_engine.stop() + add_span_attributes({"Task stopped": reason}) return self._current.task_id - def get_tasks(self) -> list[TrackableTask]: + @start_as_current_span(TRACER) + def get_tasks(self, carr: dict[str, Any] = None) -> list[TrackableTask]: return list(self._tasks.values()) - def get_task_by_id(self, task_id: str) -> TrackableTask | None: + @start_as_current_span(TRACER, "task_id") + def get_task_by_id( + self, task_id: str, carr: dict[str, Any] = None + ) -> TrackableTask | None: return self._tasks.get(task_id) - def get_tasks_by_status(self, status: TaskStatusEnum) -> list[TrackableTask]: + @start_as_current_span(TRACER, "status") + def get_tasks_by_status( + self, status: TaskStatusEnum, carr: dict[str, Any] = None + ) -> list[TrackableTask]: if status == TaskStatusEnum.RUNNING: return [ task @@ -150,23 +178,40 @@ def get_tasks_by_status(self, status: TaskStatusEnum) -> list[TrackableTask]: return [task for task in self._tasks.values() if task.is_complete] return [] - def get_active_task(self) -> TrackableTask[Task] | None: - return self._current - - def begin_task(self, task_id: str) -> None: + @start_as_current_span(TRACER) + def get_active_task( + self, carr: dict[str, Any] = None + ) -> TrackableTask[Task] | None: + current = self._current + if current is not None: + add_span_attributes({"Active Task": current.task_id}) + return current + + @start_as_current_span(TRACER, "task_id") + def begin_task(self, task_id: str, carr: dict[str, Any] = None) -> None: task = self._tasks.get(task_id) if task is not None: self._submit_trackable_task(task) else: raise KeyError(f"No pending task with ID {task_id}") - def submit_task(self, task: Task) -> str: + @start_as_current_span(TRACER, "task.name", "task.params") + def submit_task(self, task: Task, carr: dict[str, Any] = None) -> str: task.prepare_params(self._ctx) # Will raise if parameters are invalid task_id: str = str(uuid.uuid4()) - trackable_task = TrackableTask(task_id=task_id, task=task) + add_span_attributes({"TaskId": task_id}) + trackable_task = TrackableTask( + task_id=task_id, request_id=str(get_baggage("correlation_id")), task=task + ) self._tasks[task_id] = trackable_task return task_id + @start_as_current_span( + TRACER, + "trackable_task.task_id", + "trackable_task.task.name", + "trackable_task.task.params", + ) def _submit_trackable_task(self, trackable_task: TrackableTask) -> None: if self.state is not WorkerState.IDLE: raise WorkerBusyError(f"Worker is in state {self.state}") @@ -177,12 +222,15 @@ def mark_task_as_started(event: WorkerEvent, _: str | None) -> None: if ( event.task_status is not None and event.task_status.task_id == trackable_task.task_id + and trackable_task.task_id in self._context_register ): task_started.set() LOGGER.info(f"Submitting: {trackable_task}") try: sub = self.worker_events.subscribe(mark_task_as_started) + self._context_register[trackable_task.task_id] = get_trace_context() + """ Cache the current trace context as the one for this task id """ self._task_channel.put_nowait(trackable_task) task_started.wait(timeout=5.0) if not task_started.is_set(): @@ -193,13 +241,16 @@ def mark_task_as_started(event: WorkerEvent, _: str | None) -> None: finally: self.worker_events.unsubscribe(sub) + @start_as_current_span(TRACER) def start(self) -> None: if self._started.is_set(): raise WorkerAlreadyStartedError("Worker is already running") self._wait_until_stopped() - run_worker_in_own_thread(self) + fut = run_worker_in_own_thread(self) self._wait_until_started() + add_span_attributes({"worker thread state": fut._state}) + @start_as_current_span(TRACER) def stop(self) -> None: LOGGER.info("Attempting to stop worker") @@ -208,15 +259,18 @@ def stop(self) -> None: self._task_channel.put(KillSignal()) else: LOGGER.info("Stopping worker: nothing to do") - LOGGER.info("Stopped") self._wait_until_stopped() + add_span_attributes({"worker thread state": "STOPPED"}) + LOGGER.info("Stopped") + @start_as_current_span(TRACER) def _wait_until_started(self) -> None: if not self._started.wait(timeout=self._start_stop_timeout): raise TimeoutError( f"Worker did not start within {self._start_stop_timeout} seconds" ) + @start_as_current_span(TRACER) def _wait_until_stopped(self) -> None: if not self._stopped.wait(timeout=self._start_stop_timeout): raise TimeoutError( @@ -227,6 +281,7 @@ def _wait_until_stopped(self) -> None: def state(self) -> WorkerState: return self._state + @start_as_current_span(TRACER) def run(self) -> None: LOGGER.info("Worker starting") self._ctx.run_engine.state_hook = self._on_state_change @@ -242,25 +297,30 @@ def run(self) -> None: self._stopping.clear() self._stopped.set() - def pause(self, defer=False): + @start_as_current_span(TRACER, "defer") + def pause(self, defer=False, carr: dict[str, Any] = None): LOGGER.info("Requesting to pause the worker") self._ctx.run_engine.request_pause(defer) - def resume(self): + @start_as_current_span(TRACER) + def resume(self, carr: dict[str, Any] = None): LOGGER.info("Requesting to resume the worker") self._ctx.run_engine.resume() + @start_as_current_span(TRACER) def _cycle_with_error_handling(self) -> None: try: self._cycle() except Exception as ex: self._report_error(ex) + @start_as_current_span(TRACER) def _cycle(self) -> None: try: LOGGER.info("Awaiting task") next_task: TrackableTask | KillSignal = self._task_channel.get() if isinstance(next_task, TrackableTask): + add_span_attributes({"next_task.task_id": next_task.task_id}) LOGGER.info(f"Got new task: {next_task}") self._current = next_task # Informing mypy that the task is not None self._current.is_pending = False @@ -270,6 +330,7 @@ def _cycle(self) -> None: # Note that the kill signal is explicitly not a type of task as we don't # want it to be part of the worker's public API self._stopping.set() + add_span_attributes({"server shutting down": "true"}) else: raise KeyError(f"Unknown command: {next_task}") except Exception as err: @@ -294,6 +355,7 @@ def progress_events(self) -> EventStream[ProgressEvent, int]: def data_events(self) -> EventStream[DataEvent, int]: return self._data_events + @start_as_current_span(TRACER, "raw_new_state", "raw_old_state") def _on_state_change( self, raw_new_state: RawRunEngineState, @@ -314,6 +376,7 @@ def _report_error(self, err: Exception) -> None: self._current.errors.append(str(err)) self._errors.append(str(err)) + @start_as_current_span(TRACER) def _report_status( self, ) -> None: @@ -327,6 +390,13 @@ def _report_status( task_failed=bool(self._current.errors), ) correlation_id = self._current.task_id + add_span_attributes( + { + "task_id": self._current.task_id, + "task_complete": self._current.is_complete, + "task_failed": self._current.errors, + } + ) else: task_status = None correlation_id = None @@ -345,6 +415,26 @@ def _on_document(self, name: str, document: Mapping[str, Any]) -> None: self._data_events.publish( DataEvent(name=name, doc=document), correlation_id ) + with TRACER.start_as_current_span( + "_on_document", + context=self._context_register[self._current.task_id], + kind=SpanKind.PRODUCER, + ): + """Start a new span but inject the context cached when the current task + was created. This will make the documents received part of the same + trace.""" + add_span_attributes( + { + "task_id": self._current.task_id, + "name": name, + "document": str(document), + } + ) + + correlation_id = self._current.request_id + self._data_events.publish( + DataEvent(name=name, doc=document), correlation_id + ) else: raise KeyError( "Trying to emit a document despite the fact that the RunEngine is idle" diff --git a/tests/unit_tests/worker/test_task_worker.py b/tests/unit_tests/worker/test_task_worker.py index 96777db9b..cfb606424 100644 --- a/tests/unit_tests/worker/test_task_worker.py +++ b/tests/unit_tests/worker/test_task_worker.py @@ -114,17 +114,21 @@ def test_multi_start(inert_worker: TaskWorker) -> None: def test_submit_task(worker: TaskWorker) -> None: assert worker.get_tasks() == [] task_id = worker.submit_task(_SIMPLE_TASK) - assert worker.get_tasks() == [TrackableTask(task_id=task_id, task=_SIMPLE_TASK)] + assert worker.get_tasks() == [ + TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) + ] def test_submit_multiple_tasks(worker: TaskWorker) -> None: assert worker.get_tasks() == [] task_id_1 = worker.submit_task(_SIMPLE_TASK) - assert worker.get_tasks() == [TrackableTask(task_id=task_id_1, task=_SIMPLE_TASK)] + assert worker.get_tasks() == [ + TrackableTask(task_id=task_id_1, request_id="None", task=_SIMPLE_TASK) + ] task_id_2 = worker.submit_task(_LONG_TASK) assert worker.get_tasks() == [ - TrackableTask(task_id=task_id_1, task=_SIMPLE_TASK), - TrackableTask(task_id=task_id_2, task=_LONG_TASK), + TrackableTask(task_id=task_id_1, request_id="None", task=_SIMPLE_TASK), + TrackableTask(task_id=task_id_2, request_id="None", task=_LONG_TASK), ] @@ -136,27 +140,33 @@ def test_stop_with_task_pending(inert_worker: TaskWorker) -> None: def test_restart_leaves_task_pending(worker: TaskWorker) -> None: task_id = worker.submit_task(_SIMPLE_TASK) - assert worker.get_tasks() == [TrackableTask(task_id=task_id, task=_SIMPLE_TASK)] + assert worker.get_tasks() == [ + TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) + ] worker.stop() worker.start() - assert worker.get_tasks() == [TrackableTask(task_id=task_id, task=_SIMPLE_TASK)] + assert worker.get_tasks() == [ + TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) + ] def test_submit_before_start_pending(inert_worker: TaskWorker) -> None: task_id = inert_worker.submit_task(_SIMPLE_TASK) inert_worker.start() assert inert_worker.get_tasks() == [ - TrackableTask(task_id=task_id, task=_SIMPLE_TASK) + TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) ] inert_worker.stop() assert inert_worker.get_tasks() == [ - TrackableTask(task_id=task_id, task=_SIMPLE_TASK) + TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) ] def test_clear_task(worker: TaskWorker) -> None: task_id = worker.submit_task(_SIMPLE_TASK) - assert worker.get_tasks() == [TrackableTask(task_id=task_id, task=_SIMPLE_TASK)] + assert worker.get_tasks() == [ + TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) + ] assert worker.clear_task(task_id) assert worker.get_tasks() == [] From 0ee1425234e1dd9f647eb8945c6bbe8f347e515e Mon Sep 17 00:00:00 2001 From: Keith Ralphs Date: Wed, 25 Sep 2024 15:06:32 +0000 Subject: [PATCH 02/12] change sig of _rpc and add dependencies --- .devcontainer/devcontainer.json | 2 +- pyproject.toml | 4 +++- src/blueapi/service/runner.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 266ebe128..31547f83e 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -39,7 +39,7 @@ // Make sure SELinux does not disable with access to host filesystems like tmp "--security-opt=label=disable" ], - // Remove this before pusing, only to allow local testing with editable library + // Remove this before pushing, only to allow local testing with editable library "mounts": [ "source=/scratch/athena/observability-utils,target=/scratch/athena/observability-utils,type=bind,consistency=cached" ], diff --git a/pyproject.toml b/pyproject.toml index aa6bbd4fc..eda8753f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,12 +27,14 @@ dependencies = [ "fastapi>=0.112.0", "uvicorn", "requests", - "dls-bluesky-core", #requires ophyd-async + "dls-bluesky-core", #requires ophyd-async "dls-dodal>=1.31.0", "super-state-machine", # https://github.com/DiamondLightSource/blueapi/issues/553 "GitPython", "bluesky-stomp>=0.1.2", "event-model==1.21", # https://github.com/DiamondLightSource/blueapi/issues/684 + "opentelemetry-distro>=0.48b0", + "opentelemetry-instrumentation-fastapi>=0.48b0", "observability-utils>=0.1.2", ] dynamic = ["version"] diff --git a/src/blueapi/service/runner.py b/src/blueapi/service/runner.py index b2efb9218..a01bb4ce3 100644 --- a/src/blueapi/service/runner.py +++ b/src/blueapi/service/runner.py @@ -165,7 +165,7 @@ def _rpc( module_name: str, function_name: str, expected_type: type[T] | None, - *args: Any, + args: Any, **kwargs: Any, ) -> T: mod = import_module(module_name) From 51c60cc8d22aa01a2460f631f4907b4429c9c48d Mon Sep 17 00:00:00 2001 From: Keith Ralphs Date: Mon, 14 Oct 2024 13:51:22 +0000 Subject: [PATCH 03/12] Add tracing adjusting startup to prevent multiple FastAPI apps --- .devcontainer/devcontainer.json | 4 +- .vscode/launch.json | 20 ++- Dockerfile | 5 +- dev-requirements.txt | 17 +++ docs/reference/openapi.yaml | 4 + helm/blueapi/values.yaml | 22 ++-- pyproject.toml | 4 +- src/blueapi/cli/cli.py | 35 ++++-- src/blueapi/client/rest.py | 3 +- src/blueapi/config.py | 18 --- src/blueapi/core/__init__.py | 4 + src/blueapi/service/main.py | 9 +- src/blueapi/service/runner.py | 2 +- src/blueapi/worker/task_worker.py | 29 ++--- tests/system_tests/plans.json | 132 ++++---------------- tests/unit_tests/core/fake_device_module.py | 2 +- tests/unit_tests/service/test_interface.py | 59 +++++---- tests/unit_tests/service/test_rest_api.py | 33 ++++- tests/unit_tests/service/test_runner.py | 18 +-- tests/unit_tests/worker/test_task_worker.py | 38 ++++-- 20 files changed, 224 insertions(+), 234 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 31547f83e..4be3c2073 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -40,9 +40,7 @@ "--security-opt=label=disable" ], // Remove this before pushing, only to allow local testing with editable library - "mounts": [ - "source=/scratch/athena/observability-utils,target=/scratch/athena/observability-utils,type=bind,consistency=cached" - ], + "mounts": [], // Mount the parent as /workspaces so we can pip install peers as editable "workspaceMount": "source=${localWorkspaceFolder}/..,target=/workspaces,type=bind", // After the container is created, install the python project in editable form diff --git a/.vscode/launch.json b/.vscode/launch.json index 12992fef3..00d42fb9b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -25,9 +25,10 @@ "request": "launch", "justMyCode": false, "module": "blueapi", - "args": [ - "serve" - ] + "env": { + "OTLP_EXPORT_ENABLED": "false" + }, + "args": "-c ${input:config_path} serve" }, { "name": "Blueapi Server (Custom Config)", @@ -43,12 +44,21 @@ "request": "launch", "justMyCode": false, "module": "blueapi", - "args": "controller ${input:controller_args}" + "env": { + "OTLP_EXPORT_ENABLED": "false" + }, + "args": "-c ${input:config_path} controller ${input:args}" }, ], "inputs": [ { - "id": "controller_args", + "id": "config_path", + "type": "promptString", + "description": "Path to configuration YAML file", + "default": "" + }, + { + "id": "args", "type": "promptString", "description": "Arguments to pass to controller", "default": "" diff --git a/Dockerfile b/Dockerfile index 7ec1b29f5..601323921 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,12 +12,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ RUN python -m venv /venv ENV PATH=/venv/bin:$PATH +ENV OTLP_EXPORT_ENABLED=false # enable opentelemetry support ENV OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http/protobuf -# Chnage this to point to Jaeger server before merging +# Change this to point to Jaeger server before merging e.g. https://daq-services-jaeger ENV OTEL_EXPORTER_OTLP_ENDPOINT=http://127.0.0.1:4318 -# Change this to enable proper secrity before merging -ENV OTEL_EXPORTER_OTLP_INSECURE=true # Ensure that all Http headers are captured ENV OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST=".*" ENV OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE=".*" diff --git a/dev-requirements.txt b/dev-requirements.txt index 24ff1d64c..2d16b8b48 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -9,6 +9,7 @@ annotated-types==0.7.0 anyio==4.6.2.post1 appdirs==1.4.4 asciitree==0.3.3 +asgiref==3.8.1 asttokens==2.4.1 attrs==24.2.0 babel==2.16.0 @@ -68,7 +69,9 @@ fsspec==2024.10.0 funcy==2.0 gitdb==4.0.11 GitPython==3.1.43 +googleapis-common-protos==1.65.0 graypy==2.1.0 +grpcio==1.66.2 h11==0.14.0 h5py==3.12.1 HeapDict==1.0.1 @@ -118,8 +121,21 @@ nose2==0.15.1 nslsii==0.10.5 numcodecs==0.13.1 numpy==1.26.4 +observability-utils==0.1.2 opencv-python-headless==4.10.0.84 opentelemetry-api==1.27.0 +opentelemetry-distro==0.48b0 +opentelemetry-exporter-otlp==1.27.0 +opentelemetry-exporter-otlp-proto-common==1.27.0 +opentelemetry-exporter-otlp-proto-grpc==1.27.0 +opentelemetry-exporter-otlp-proto-http==1.27.0 +opentelemetry-instrumentation==0.48b0 +opentelemetry-instrumentation-asgi==0.48b0 +opentelemetry-instrumentation-fastapi==0.48b0 +opentelemetry-proto==1.27.0 +opentelemetry-sdk==1.27.0 +opentelemetry-semantic-conventions==0.48b0 +opentelemetry-util-http==0.48b0 ophyd==1.9.0 ophyd-async==0.6.0 orderly-set==5.2.2 @@ -146,6 +162,7 @@ pre_commit==4.0.1 prettytable==3.11.0 prompt-toolkit==3.0.36 propcache==0.2.0 +protobuf==4.25.5 psutil==6.1.0 ptyprocess==0.7.0 pure_eval==0.2.3 diff --git a/docs/reference/openapi.yaml b/docs/reference/openapi.yaml index a9fdf63cc..57a4d9352 100644 --- a/docs/reference/openapi.yaml +++ b/docs/reference/openapi.yaml @@ -179,6 +179,10 @@ components: default: true title: Is Pending type: boolean + request_id: + default: '' + title: Request Id + type: string task: title: Task task_id: diff --git a/helm/blueapi/values.yaml b/helm/blueapi/values.yaml index feb7adccf..ee7d5b2b0 100644 --- a/helm/blueapi/values.yaml +++ b/helm/blueapi/values.yaml @@ -77,29 +77,29 @@ listener: # Additional envVars to mount to the pod as a String extraEnvVars: | + - name: OTLP_EXPORT_ENABLED + value: {{ .Values.tracing.otlp.export_enabled] }} - name: OTEL_EXPORTER_OTLP_TRACES_PROTOCOL - value: {{ .Values.jaeger.otlp.protocol }} + value: {{ .Values.tracing.otlp.protocol }} - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: "{{ .Values.jaeger.otlp.host }}:{{ .Values.jaeger.otlp.port }}" - - name: OTEL_EXPORTER_OTLP_INSECURE - value: "{{ .Values.jaeger.otlp.insecure }}" + value: "{{ .Values.tracing.otlp.host }}:{{ .Values.tracing.otlp.port }}" - name: OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST - value: {{ .Values.jaeger.otlp.request.headers }} + value: {{ .Values.tracing.http.request.headers }} - name: OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE - value: {{ .Values.jaeger.otlp.response.headers }} + value: {{ .Values.tracing.http.response.headers }} # - name: RABBITMQ_PASSWORD # valueFrom: # secretKeyRef: # name: rabbitmq-password # key: rabbitmq-password -jaeger: +tracing: otlp: + export_enabled: false protocol: http/protobuf - insecure: true - # - host: http://localhost + host: https://daq-services-jaeger # replace with central instance port: 4318 + http: request: headers: ".*" response: @@ -126,6 +126,8 @@ worker: password: guest host: rabbitmq port: 61613 + tracing_exporter: + host: "" initContainer: scratch: diff --git a/pyproject.toml b/pyproject.toml index eda8753f5..bdf71adf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "fastapi>=0.112.0", "uvicorn", "requests", - "dls-bluesky-core", #requires ophyd-async + "dls-bluesky-core", #requires ophyd-async "dls-dodal>=1.31.0", "super-state-machine", # https://github.com/DiamondLightSource/blueapi/issues/553 "GitPython", @@ -83,7 +83,7 @@ write_to = "src/blueapi/_version.py" [tool.mypy] ignore_missing_imports = true # Ignore missing stubs in imported modules -namespace_packages = false # rely only on __init__ files to determine fully qualified module names. +namespace_packages = true # necessary for tracing sdk to work with mypy [tool.pytest.ini_options] # Run pytest with all our checkers, and don't spam us with massive tracebacks on error diff --git a/src/blueapi/cli/cli.py b/src/blueapi/cli/cli.py index 0fb105684..c40d26beb 100644 --- a/src/blueapi/cli/cli.py +++ b/src/blueapi/cli/cli.py @@ -10,6 +10,8 @@ from bluesky_stomp.messaging import MessageContext, StompClient from bluesky_stomp.models import Broker from observability_utils.tracing import setup_tracing +from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor +from opentelemetry.trace import get_tracer_provider from pydantic import ValidationError from requests.exceptions import ConnectionError @@ -19,14 +21,7 @@ from blueapi.client.event_bus import AnyEvent, BlueskyStreamingError, EventBusClient from blueapi.client.rest import BlueskyRemoteControlError from blueapi.config import ApplicationConfig, ConfigLoader -from blueapi.core import DataEvent -from blueapi.service.main import start -from blueapi.service.openapi import ( - DOCS_SCHEMA_LOCATION, - generate_schema, - print_schema_as_yaml, - write_schema_as_yaml, -) +from blueapi.core import OTLP_EXPORT_ENABLED, DataEvent from blueapi.worker import ProgressEvent, Task, WorkerEvent from .scratch import setup_scratch @@ -42,7 +37,6 @@ def main(ctx: click.Context, config: Path | None | tuple[Path, ...]) -> None: # if no command is supplied, run with the options passed - setup_tracing("BlueAPI") # initialise TracerProvider for server app config_loader = ConfigLoader(ApplicationConfig) if config is not None: configs = (config,) if isinstance(config, Path) else config @@ -74,6 +68,16 @@ def main(ctx: click.Context, config: Path | None | tuple[Path, ...]) -> None: help="[Development only] update the schema in the documentation", ) def schema(output: Path | None = None, update: bool = False) -> None: + """Only import the service functions when starting the service or generating + the schema, not the controller as a new FastAPI app will be started each time. + """ + from blueapi.service.openapi import ( + DOCS_SCHEMA_LOCATION, + generate_schema, + print_schema_as_yaml, + write_schema_as_yaml, + ) + """Generate the schema for the REST API""" schema = generate_schema() @@ -91,6 +95,17 @@ def start_application(obj: dict): """Run a worker that accepts plans to run""" config: ApplicationConfig = obj["config"] + """Only import the service functions when starting the service or generating + the schema, not the controller as a new FastAPI app will be started each time. + """ + from blueapi.service.main import app, start + + """ + Set up basic automated instrumentation for the FastAPI app, creating the + observability context. + """ + setup_tracing("BlueAPI", OTLP_EXPORT_ENABLED) + FastAPIInstrumentor().instrument_app(app, tracer_provider=get_tracer_provider()) start(config) @@ -105,7 +120,7 @@ def start_application(obj: dict): def controller(ctx: click.Context, output: str) -> None: """Client utility for controlling and introspecting the worker""" - setup_tracing("BlueAPICLI") # initialise TracerProvider for controller app + setup_tracing("BlueAPICLI", OTLP_EXPORT_ENABLED) if ctx.invoked_subcommand is None: print("Please invoke subcommand!") return diff --git a/src/blueapi/client/rest.py b/src/blueapi/client/rest.py index 274bc72cb..e6837fec6 100644 --- a/src/blueapi/client/rest.py +++ b/src/blueapi/client/rest.py @@ -135,11 +135,12 @@ def _request_and_deserialize( get_exception: Callable[[requests.Response], Exception | None] = _exception, ) -> T: url = self._url(suffix) + # Get the trace context to propagate to the REST API carr = get_context_propagator() if data: response = requests.request(method, url, json=data, headers=carr) else: - response = requests.request(method, url) + response = requests.request(method, url, headers=carr) exception = get_exception(response) if exception is not None: raise exception diff --git a/src/blueapi/config.py b/src/blueapi/config.py index baea502ec..16e470c0d 100644 --- a/src/blueapi/config.py +++ b/src/blueapi/config.py @@ -23,24 +23,6 @@ class Source(BaseModel): module: Path | str -# class BasicAuthentication(BaseModel): -# """ -# Log in details for when a server uses authentication. -# If username or passcode match exactly the regex ^\\${(.*)}$ -# they attempt to replace with an environment variable of the same. -# i.e. ${foo} or ${FOO} are replaced with the value of FOO -# """ - -# username: str = "test" # "guest" -# passcode: str = "test" # "guest" - -# @validator("username", "passcode") -# def get_from_env(cls, v: str): -# if v.startswith("${") and v.endswith("}"): -# return os.environ[v.removeprefix("${").removesuffix("}").upper()] -# return v - - class StompConfig(BaseModel): """ Config for connecting to stomp broker diff --git a/src/blueapi/core/__init__.py b/src/blueapi/core/__init__.py index 7b2306b0f..15e3b2602 100644 --- a/src/blueapi/core/__init__.py +++ b/src/blueapi/core/__init__.py @@ -1,3 +1,5 @@ +from os import environ + from .bluesky_event_loop import configure_bluesky_event_loop from .bluesky_types import ( BLUESKY_PROTOCOLS, @@ -14,6 +16,8 @@ from .context import BlueskyContext from .event import EventPublisher, EventStream +OTLP_EXPORT_ENABLED = environ.get("OTLP_EXPORT_ENABLED") == "true" + __all__ = [ "Plan", "PlanGenerator", diff --git a/src/blueapi/service/main.py b/src/blueapi/service/main.py index ce9c16fe1..99e120f96 100644 --- a/src/blueapi/service/main.py +++ b/src/blueapi/service/main.py @@ -16,7 +16,6 @@ start_as_current_span, ) from opentelemetry.context import attach -from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor from opentelemetry.propagate import get_global_textmap from pydantic import ValidationError from starlette.responses import JSONResponse @@ -85,12 +84,7 @@ async def lifespan(app: FastAPI): version=REST_API_VERSION, ) -FastAPIInstrumentor().instrument_app(app) -TRACER = get_tracer("API") -""" -Set up basic automated instrumentation for the FastAPI app, creating the -observability context. -""" +TRACER = get_tracer("interface") @app.exception_handler(KeyError) @@ -111,7 +105,6 @@ def get_environment( @app.delete("/environment", response_model=EnvironmentResponse) -@start_as_current_span(TRACER, "background_tasks", "runner") async def delete_environment( background_tasks: BackgroundTasks, runner: WorkerDispatcher = Depends(_runner), diff --git a/src/blueapi/service/runner.py b/src/blueapi/service/runner.py index a01bb4ce3..293cae2ee 100644 --- a/src/blueapi/service/runner.py +++ b/src/blueapi/service/runner.py @@ -23,7 +23,7 @@ set_start_method("spawn", force=True) LOGGER = logging.getLogger(__name__) -TRACER = get_tracer("interface") +TRACER = get_tracer("runner") P = ParamSpec("P") T = TypeVar("T") diff --git a/src/blueapi/worker/task_worker.py b/src/blueapi/worker/task_worker.py index 9ae907901..97e63e034 100644 --- a/src/blueapi/worker/task_worker.py +++ b/src/blueapi/worker/task_worker.py @@ -23,6 +23,7 @@ from super_state_machine.errors import TransitionError from blueapi.core import ( + OTLP_EXPORT_ENABLED, BlueskyContext, DataEvent, EventPublisher, @@ -46,7 +47,7 @@ from .worker_errors import WorkerAlreadyStartedError, WorkerBusyError LOGGER = logging.getLogger(__name__) -TRACER = get_tracer("reworker") +TRACER = get_tracer("task_worker") """ Initialise a Tracer for this module provided by the app's global TracerProvider. """ DEFAULT_START_STOP_TIMEOUT: float = 30.0 @@ -126,10 +127,10 @@ def __init__( self._stopped.set() self._broadcast_statuses = broadcast_statuses self._context_register = {} - setup_tracing("BlueAPIWorker") + setup_tracing("BlueAPIWorker", OTLP_EXPORT_ENABLED) @start_as_current_span(TRACER, "task_id") - def clear_task(self, task_id: str, carr: dict[str, Any] = None) -> str: + def clear_task(self, task_id: str, carr: dict[str, Any] | None = None) -> str: task = self._tasks.pop(task_id) return task.task_id @@ -138,7 +139,7 @@ def cancel_active_task( self, failure: bool = False, reason: str | None = None, - carr: dict[str, Any] = None, + carr: dict[str, Any] | None = None, ) -> str: if self._current is None: # Persuades mypy that self._current is not None @@ -153,18 +154,18 @@ def cancel_active_task( return self._current.task_id @start_as_current_span(TRACER) - def get_tasks(self, carr: dict[str, Any] = None) -> list[TrackableTask]: + def get_tasks(self, carr: dict[str, Any] | None = None) -> list[TrackableTask]: return list(self._tasks.values()) @start_as_current_span(TRACER, "task_id") def get_task_by_id( - self, task_id: str, carr: dict[str, Any] = None + self, task_id: str, carr: dict[str, Any] | None = None ) -> TrackableTask | None: return self._tasks.get(task_id) @start_as_current_span(TRACER, "status") def get_tasks_by_status( - self, status: TaskStatusEnum, carr: dict[str, Any] = None + self, status: TaskStatusEnum, carr: dict[str, Any] | None = None ) -> list[TrackableTask]: if status == TaskStatusEnum.RUNNING: return [ @@ -180,7 +181,7 @@ def get_tasks_by_status( @start_as_current_span(TRACER) def get_active_task( - self, carr: dict[str, Any] = None + self, carr: dict[str, Any] | None = None ) -> TrackableTask[Task] | None: current = self._current if current is not None: @@ -188,7 +189,7 @@ def get_active_task( return current @start_as_current_span(TRACER, "task_id") - def begin_task(self, task_id: str, carr: dict[str, Any] = None) -> None: + def begin_task(self, task_id: str, carr: dict[str, Any] | None = None) -> None: task = self._tasks.get(task_id) if task is not None: self._submit_trackable_task(task) @@ -196,7 +197,7 @@ def begin_task(self, task_id: str, carr: dict[str, Any] = None) -> None: raise KeyError(f"No pending task with ID {task_id}") @start_as_current_span(TRACER, "task.name", "task.params") - def submit_task(self, task: Task, carr: dict[str, Any] = None) -> str: + def submit_task(self, task: Task, carr: dict[str, Any] | None = None) -> str: task.prepare_params(self._ctx) # Will raise if parameters are invalid task_id: str = str(uuid.uuid4()) add_span_attributes({"TaskId": task_id}) @@ -298,12 +299,12 @@ def run(self) -> None: self._stopped.set() @start_as_current_span(TRACER, "defer") - def pause(self, defer=False, carr: dict[str, Any] = None): + def pause(self, defer=False, carr: dict[str, Any] | None = None): LOGGER.info("Requesting to pause the worker") self._ctx.run_engine.request_pause(defer) @start_as_current_span(TRACER) - def resume(self, carr: dict[str, Any] = None): + def resume(self, carr: dict[str, Any] | None = None): LOGGER.info("Requesting to resume the worker") self._ctx.run_engine.resume() @@ -411,10 +412,6 @@ def _report_status( def _on_document(self, name: str, document: Mapping[str, Any]) -> None: if self._current is not None: - correlation_id = self._current.task_id - self._data_events.publish( - DataEvent(name=name, doc=document), correlation_id - ) with TRACER.start_as_current_span( "_on_document", context=self._context_register[self._current.task_id], diff --git a/tests/system_tests/plans.json b/tests/system_tests/plans.json index b5d0f76e2..d0bba9319 100644 --- a/tests/system_tests/plans.json +++ b/tests/system_tests/plans.json @@ -141,7 +141,7 @@ }, "radius": { "description": "Radius of the circle", - "exclusiveMinimum": 0, + "exclusiveMinimum": 0.0, "title": "Radius", "type": "number" }, @@ -170,19 +170,11 @@ "description": "Abstract baseclass for a combination of two regions, left and right.", "properties": { "left": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The left-hand Region to combine" }, "right": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The right-hand Region to combine" }, "type": { @@ -207,19 +199,11 @@ "description": "Concatenate two Specs together, running one after the other.\n\nEach Dimension of left and right must contain the same axes. Typically\nformed using `Spec.concat`.\n\n.. example_spec::\n\n from scanspec.specs import Line\n\n spec = Line(\"x\", 1, 3, 3).concat(Line(\"x\", 4, 5, 5))", "properties": { "left": { - "allOf": [ - { - "$ref": "#/$defs/Spec" - } - ], + "$ref": "#/$defs/Spec", "description": "The left-hand Spec to Concat, midpoints will appear earlier" }, "right": { - "allOf": [ - { - "$ref": "#/$defs/Spec" - } - ], + "$ref": "#/$defs/Spec", "description": "The right-hand Spec to Concat, midpoints will appear later" }, "gap": { @@ -256,19 +240,11 @@ "description": "A point is in DifferenceOf(a, b) if in a and not in b.\n\nTypically created with the ``-`` operator.\n\n>>> r = Range(\"x\", 0.5, 2.5) - Range(\"x\", 1.5, 3.5)\n>>> r.mask({\"x\": np.array([0, 1, 2, 3, 4])})\narray([False, True, False, False, False])", "properties": { "left": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The left-hand Region to combine" }, "right": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The right-hand Region to combine" }, "type": { @@ -312,18 +288,18 @@ }, "x_radius": { "description": "The radius along the x axis of the ellipse", - "exclusiveMinimum": 0, + "exclusiveMinimum": 0.0, "title": "X Radius", "type": "number" }, "y_radius": { "description": "The radius along the y axis of the ellipse", - "exclusiveMinimum": 0, + "exclusiveMinimum": 0.0, "title": "Y Radius", "type": "number" }, "angle": { - "default": 0, + "default": 0.0, "description": "The angle of the ellipse (degrees)", "title": "Angle", "type": "number" @@ -354,19 +330,11 @@ "description": "A point is in IntersectionOf(a, b) if in both a and b.\n\nTypically created with the ``&`` operator.\n\n>>> r = Range(\"x\", 0.5, 2.5) & Range(\"x\", 1.5, 3.5)\n>>> r.mask({\"x\": np.array([0, 1, 2, 3, 4])})\narray([False, False, True, False, False])", "properties": { "left": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The left-hand Region to combine" }, "right": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The right-hand Region to combine" }, "type": { @@ -434,19 +402,11 @@ "description": "Restrict Spec to only midpoints that fall inside the given Region.\n\nTypically created with the ``&`` operator. It also pushes down the\n``& | ^ -`` operators to its `Region` to avoid the need for brackets on\ncombinations of Regions.\n\nIf a Region spans multiple Frames objects, they will be squashed together.\n\n.. example_spec::\n\n from scanspec.regions import Circle\n from scanspec.specs import Line\n\n spec = Line(\"y\", 1, 3, 3) * Line(\"x\", 3, 5, 5) & Circle(\"x\", \"y\", 4, 2, 1.2)\n\nSee Also: `why-squash-can-change-path`", "properties": { "spec": { - "allOf": [ - { - "$ref": "#/$defs/Spec" - } - ], + "$ref": "#/$defs/Spec", "description": "The Spec containing the source midpoints" }, "region": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The Region that midpoints will be inside" }, "check_path_changes": { @@ -526,19 +486,11 @@ "description": "Outer product of two Specs, nesting inner within outer.\n\nThis means that inner will run in its entirety at each point in outer.\n\n.. example_spec::\n\n from scanspec.specs import Line\n\n spec = Line(\"y\", 1, 2, 3) * Line(\"x\", 3, 4, 12)", "properties": { "outer": { - "allOf": [ - { - "$ref": "#/$defs/Spec" - } - ], + "$ref": "#/$defs/Spec", "description": "Will be executed once" }, "inner": { - "allOf": [ - { - "$ref": "#/$defs/Spec" - } - ], + "$ref": "#/$defs/Spec", "description": "Will be executed len(outer) times" }, "type": { @@ -627,7 +579,7 @@ "type": "number" }, "angle": { - "default": 0, + "default": 0.0, "description": "Clockwise rotation angle of the rectangle", "title": "Angle", "type": "number" @@ -739,11 +691,7 @@ "description": "Run the Spec in reverse on every other iteration when nested.\n\nTypically created with the ``~`` operator.\n\n.. example_spec::\n\n from scanspec.specs import Line\n\n spec = Line(\"y\", 1, 3, 3) * ~Line(\"x\", 3, 5, 5)", "properties": { "spec": { - "allOf": [ - { - "$ref": "#/$defs/Spec" - } - ], + "$ref": "#/$defs/Spec", "description": "The Spec to run in reverse every other iteration" }, "type": { @@ -850,7 +798,7 @@ "type": "integer" }, "rotate": { - "default": 0, + "default": 0.0, "description": "How much to rotate the angle of the spiral", "title": "Rotate", "type": "number" @@ -882,11 +830,7 @@ "description": "Squash a stack of Frames together into a single expanded Frames object.\n\nSee Also:\n `why-squash-can-change-path`\n\n.. example_spec::\n\n from scanspec.specs import Line, Squash\n\n spec = Squash(Line(\"y\", 1, 2, 3) * Line(\"x\", 0, 1, 4))", "properties": { "spec": { - "allOf": [ - { - "$ref": "#/$defs/Spec" - } - ], + "$ref": "#/$defs/Spec", "description": "The Spec to squash the dimensions of" }, "check_path_changes": { @@ -953,19 +897,11 @@ "description": "A point is in SymmetricDifferenceOf(a, b) if in either a or b, but not both.\n\nTypically created with the ``^`` operator.\n\n>>> r = Range(\"x\", 0.5, 2.5) ^ Range(\"x\", 1.5, 3.5)\n>>> r.mask({\"x\": np.array([0, 1, 2, 3, 4])})\narray([False, True, False, True, False])", "properties": { "left": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The left-hand Region to combine" }, "right": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The right-hand Region to combine" }, "type": { @@ -990,19 +926,11 @@ "description": "A point is in UnionOf(a, b) if in either a or b.\n\nTypically created with the ``|`` operator\n\n>>> r = Range(\"x\", 0.5, 2.5) | Range(\"x\", 1.5, 3.5)\n>>> r.mask({\"x\": np.array([0, 1, 2, 3, 4])})\narray([False, True, True, True, False])", "properties": { "left": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The left-hand Region to combine" }, "right": { - "allOf": [ - { - "$ref": "#/$defs/Region" - } - ], + "$ref": "#/$defs/Region", "description": "The right-hand Region to combine" }, "type": { @@ -1027,19 +955,11 @@ "description": "Run two Specs in parallel, merging their midpoints together.\n\nTypically formed using `Spec.zip`.\n\nStacks of Frames are merged by:\n\n- If right creates a stack of a single Frames object of size 1, expand it to\n the size of the fastest Frames object created by left\n- Merge individual Frames objects together from fastest to slowest\n\nThis means that Zipping a Spec producing stack [l2, l1] with a Spec\nproducing stack [r1] will assert len(l1)==len(r1), and produce\nstack [l2, l1.zip(r1)].\n\n.. example_spec::\n\n from scanspec.specs import Line\n\n spec = Line(\"z\", 1, 2, 3) * Line(\"y\", 3, 4, 5).zip(Line(\"x\", 4, 5, 5))", "properties": { "left": { - "allOf": [ - { - "$ref": "#/$defs/Spec" - } - ], + "$ref": "#/$defs/Spec", "description": "The left-hand Spec to Zip, will appear earlier in axes" }, "right": { - "allOf": [ - { - "$ref": "#/$defs/Spec" - } - ], + "$ref": "#/$defs/Spec", "description": "The right-hand Spec to Zip, will appear later in axes" }, "type": { diff --git a/tests/unit_tests/core/fake_device_module.py b/tests/unit_tests/core/fake_device_module.py index d27af8480..0e2ea690c 100644 --- a/tests/unit_tests/core/fake_device_module.py +++ b/tests/unit_tests/core/fake_device_module.py @@ -32,7 +32,7 @@ def _mock_with_name(name: str) -> MagicMock: return mock -def wrong_return_type() -> int: +def wrong_return_type(*args, **kwargs) -> int: return "str" # type: ignore diff --git a/tests/unit_tests/service/test_interface.py b/tests/unit_tests/service/test_interface.py index 36c6b69e9..bc55d6d06 100644 --- a/tests/unit_tests/service/test_interface.py +++ b/tests/unit_tests/service/test_interface.py @@ -1,6 +1,6 @@ import uuid from dataclasses import dataclass -from unittest.mock import MagicMock, Mock, patch +from unittest.mock import ANY, MagicMock, Mock, patch import pytest from bluesky_stomp.messaging import StompClient @@ -16,6 +16,10 @@ from blueapi.worker.task import Task from blueapi.worker.task_worker import TrackableTask +TEST_CARRIER = { + "traceparent": "00-6fc19ca6e39c1fc845bb7c6fe28c7fdc-00babae1b7c9591d-01" +} + @pytest.fixture def mock_connection() -> Mock: @@ -36,7 +40,7 @@ def ensure_worker_stopped(): of an assertion error. The start_worker method is not managed by a fixture as some of the tests require it to be customised.""" yield - interface.teardown() + interface.teardown(TEST_CARRIER) def my_plan() -> MsgGenerator: @@ -56,7 +60,7 @@ def test_get_plans(context_mock: MagicMock): context.plan(my_second_plan) context_mock.return_value = context - assert interface.get_plans() == [ + assert interface.get_plans(TEST_CARRIER) == [ PlanModel( name="my_plan", description="My plan does cool stuff.", @@ -88,7 +92,7 @@ def test_get_plan(context_mock: MagicMock): context.plan(my_second_plan) context_mock.return_value = context - assert interface.get_plan("my_plan") == PlanModel( + assert interface.get_plan(TEST_CARRIER, "my_plan") == PlanModel( name="my_plan", description="My plan does cool stuff.", schema={ @@ -100,7 +104,7 @@ def test_get_plan(context_mock: MagicMock): ) with pytest.raises(KeyError): - interface.get_plan("non_existing_plan") + interface.get_plan(TEST_CARRIER, "non_existing_plan") @dataclass @@ -115,7 +119,7 @@ def test_get_devices(context_mock: MagicMock): context.device(SynAxis(name="my_axis")) context_mock.return_value = context - assert interface.get_devices() == [ + assert interface.get_devices(TEST_CARRIER) == [ DeviceModel(name="my_device", protocols=["HasName"]), DeviceModel( name="my_axis", @@ -143,12 +147,12 @@ def test_get_device(context_mock: MagicMock): context.device(MyDevice(name="my_device")) context_mock.return_value = context - assert interface.get_device("my_device") == DeviceModel( + assert interface.get_device(TEST_CARRIER, "my_device") == DeviceModel( name="my_device", protocols=["HasName"] ) with pytest.raises(KeyError): - assert interface.get_device("non_existing_device") + assert interface.get_device(TEST_CARRIER, "non_existing_device") @patch("blueapi.service.interface.context") @@ -160,7 +164,7 @@ def test_submit_task(context_mock: MagicMock): mock_uuid_value = "8dfbb9c2-7a15-47b6-bea8-b6b77c31d3d9" with patch.object(uuid, "uuid4") as uuid_mock: uuid_mock.return_value = uuid.UUID(mock_uuid_value) - task_uuid = interface.submit_task(task) + task_uuid = interface.submit_task(TEST_CARRIER, task) assert task_uuid == mock_uuid_value @@ -173,9 +177,9 @@ def test_clear_task(context_mock: MagicMock): mock_uuid_value = "3d858a62-b40a-400f-82af-8d2603a4e59a" with patch.object(uuid, "uuid4") as uuid_mock: uuid_mock.return_value = uuid.UUID(mock_uuid_value) - interface.submit_task(task) + interface.submit_task(TEST_CARRIER, task) - clear_task_return = interface.clear_task(mock_uuid_value) + clear_task_return = interface.clear_task(TEST_CARRIER, mock_uuid_value) assert clear_task_return == mock_uuid_value @@ -183,7 +187,7 @@ def test_clear_task(context_mock: MagicMock): def test_begin_task(worker_mock: MagicMock): uuid_value = "350043fd-597e-41a7-9a92-5d5478232cf7" task = WorkerTask(task_id=uuid_value) - returned_task = interface.begin_task(task) + returned_task = interface.begin_task(TEST_CARRIER, task) assert task == returned_task worker_mock.assert_called_once_with(uuid_value) @@ -191,7 +195,7 @@ def test_begin_task(worker_mock: MagicMock): @patch("blueapi.service.interface.TaskWorker.begin_task") def test_begin_task_no_task_id(worker_mock: MagicMock): task = WorkerTask(task_id=None) - returned_task = interface.begin_task(task) + returned_task = interface.begin_task(TEST_CARRIER, task) assert task == returned_task worker_mock.assert_not_called() @@ -212,35 +216,37 @@ def mock_tasks_by_status(status: TaskStatusEnum) -> list[TrackableTask]: get_tasks_by_status_mock.side_effect = mock_tasks_by_status - assert interface.get_tasks_by_status(TaskStatusEnum.PENDING) == [ + assert interface.get_tasks_by_status(TEST_CARRIER, TaskStatusEnum.PENDING) == [ pending_task1, pending_task2, ] - assert interface.get_tasks_by_status(TaskStatusEnum.RUNNING) == [running_task] - assert interface.get_tasks_by_status(TaskStatusEnum.COMPLETE) == [] + assert interface.get_tasks_by_status(TEST_CARRIER, TaskStatusEnum.RUNNING) == [ + running_task + ] + assert interface.get_tasks_by_status(TEST_CARRIER, TaskStatusEnum.COMPLETE) == [] def test_get_active_task(): - assert interface.get_active_task() is None + assert interface.get_active_task(TEST_CARRIER) is None def test_get_worker_state(): - assert interface.get_worker_state() == WorkerState.IDLE + assert interface.get_worker_state(TEST_CARRIER) == WorkerState.IDLE @patch("blueapi.service.interface.TaskWorker.pause") def test_pause_worker(pause_worker_mock: MagicMock): - interface.pause_worker(False) + interface.pause_worker(TEST_CARRIER, False) pause_worker_mock.assert_called_once_with(False) pause_worker_mock.reset_mock() - interface.pause_worker(True) + interface.pause_worker(TEST_CARRIER, True) pause_worker_mock.assert_called_once_with(True) @patch("blueapi.service.interface.TaskWorker.resume") def test_resume_worker(resume_worker_mock: MagicMock): - interface.resume_worker() + interface.resume_worker(TEST_CARRIER) resume_worker_mock.assert_called_once() @@ -250,7 +256,7 @@ def test_cancel_active_task(cancel_active_task_mock: MagicMock): reason = "End of session" task_id = "789" cancel_active_task_mock.return_value = task_id - assert interface.cancel_active_task(fail, reason) == task_id + assert interface.cancel_active_task(TEST_CARRIER, fail, reason) == task_id cancel_active_task_mock.assert_called_once_with(fail, reason) @@ -263,7 +269,7 @@ def test_get_tasks(get_tasks_mock: MagicMock): ] get_tasks_mock.return_value = tasks - assert interface.get_tasks() == tasks + assert interface.get_tasks(TEST_CARRIER) == tasks @patch("blueapi.service.interface.context") @@ -272,10 +278,13 @@ def test_get_task_by_id(context_mock: MagicMock): context.plan(my_plan) context_mock.return_value = context - task_id = interface.submit_task(Task(name="my_plan")) + task_id = interface.submit_task(TEST_CARRIER, Task(name="my_plan")) - assert interface.get_task_by_id(task_id) == TrackableTask( + assert interface.get_task_by_id( + TEST_CARRIER, task_id + ) == TrackableTask.model_construct( task_id=task_id, + request_id=ANY, task=Task(name="my_plan", params={}), is_complete=False, is_pending=True, diff --git a/tests/unit_tests/service/test_rest_api.py b/tests/unit_tests/service/test_rest_api.py index 6bb08cb01..3eba9cdde 100644 --- a/tests/unit_tests/service/test_rest_api.py +++ b/tests/unit_tests/service/test_rest_api.py @@ -23,6 +23,22 @@ from blueapi.worker.task_worker import TrackableTask +class _ANY_DICT(dict): + "A helper object that compares equal to all dictionaries." + + def __eq__(self, other): + return isinstance(other, dict) + + def __ne__(self, other): + return False + + def __repr__(self): + return "" + + +ANY_DICT = _ANY_DICT() + + @pytest.fixture def client() -> Iterator[TestClient]: with ( @@ -70,7 +86,7 @@ class MyModel(BaseModel): response = client.get("/plans/my-plan") - get_plan_mock.assert_called_once_with("my-plan") + get_plan_mock.assert_called_once_with(ANY_DICT, "my-plan") assert response.status_code == status.HTTP_200_OK assert response.json() == { "description": None, @@ -128,7 +144,7 @@ class MyDevice: get_device_mock.return_value = DeviceModel.from_device(device) response = client.get("/devices/my-device") - get_device_mock.assert_called_once_with("my-device") + get_device_mock.assert_called_once_with(ANY_DICT, "my-device") assert response.status_code == status.HTTP_200_OK assert response.json() == { "name": "my-device", @@ -159,7 +175,7 @@ def test_create_task( response = client.post("/tasks", json=task.model_dump()) - submit_task_mock.assert_called_once_with(task) + submit_task_mock.assert_called_once_with(ANY_DICT, task) assert response.json() == {"task_id": task_id} @@ -252,6 +268,7 @@ def test_get_tasks(get_tasks_mock: MagicMock, client: TestClient) -> None: "errors": [], "is_complete": False, "is_pending": True, + "request_id": "", "task": {"name": "sleep", "params": {"time": 0.0}}, "task_id": "0", }, @@ -259,6 +276,7 @@ def test_get_tasks(get_tasks_mock: MagicMock, client: TestClient) -> None: "errors": [], "is_complete": False, "is_pending": True, + "request_id": "", "task": {"name": "first_task", "params": {}}, "task_id": "1", }, @@ -288,6 +306,7 @@ def test_get_tasks_by_status( "errors": [], "is_complete": True, "is_pending": False, + "request_id": "", "task": {"name": "third_task", "params": {}}, "task_id": "3", } @@ -379,6 +398,7 @@ def test_get_task(get_task_by_id: MagicMock, client: TestClient): "errors": [], "is_complete": False, "is_pending": True, + "request_id": "", "task": {"name": "third_task", "params": {}}, "task_id": f"{task_id}", } @@ -404,6 +424,7 @@ def test_get_all_tasks(get_all_tasks: MagicMock, client: TestClient): "task": {"name": "third_task", "params": {}}, "is_complete": False, "is_pending": True, + "request_id": "", "errors": [], } ] @@ -464,7 +485,7 @@ def test_set_state_running_to_paused( "/worker/state", json=StateChangeRequest(new_state=final_state).model_dump() ) - pause_worker_mock.assert_called_once_with(False) + pause_worker_mock.assert_called_once_with(ANY_DICT, False) assert response.status_code == status.HTTP_202_ACCEPTED assert response.json() == final_state @@ -502,7 +523,7 @@ def test_set_state_running_to_aborting( "/worker/state", json=StateChangeRequest(new_state=final_state).model_dump() ) - cancel_active_task_mock.assert_called_once_with(True, None) + cancel_active_task_mock.assert_called_once_with(ANY_DICT, True, None) assert response.status_code == status.HTTP_202_ACCEPTED assert response.json() == final_state @@ -524,7 +545,7 @@ def test_set_state_running_to_stopping_including_reason( json=StateChangeRequest(new_state=final_state, reason=reason).model_dump(), ) - cancel_active_task_mock.assert_called_once_with(False, reason) + cancel_active_task_mock.assert_called_once_with(ANY_DICT, False, reason) assert response.status_code == status.HTTP_202_ACCEPTED assert response.json() == final_state diff --git a/tests/unit_tests/service/test_runner.py b/tests/unit_tests/service/test_runner.py index af65e9b0a..bcce9874d 100644 --- a/tests/unit_tests/service/test_runner.py +++ b/tests/unit_tests/service/test_runner.py @@ -186,42 +186,42 @@ class GenericModel(BaseModel, Generic[T]): b: str -def return_int() -> int: +def return_int(*args, **kwargs) -> int: return 1 -def return_str() -> str: +def return_str(*args, **kwargs) -> str: return "hello" -def return_list() -> list[int]: +def return_list(*args, **kwargs) -> list[int]: return [1, 2, 3] -def return_dict() -> dict[str, int]: +def return_dict(*args, **kwargs) -> dict[str, int]: return { "test": 1, "other_test": 2, } -def return_simple_model() -> SimpleModel: +def return_simple_model(*args, **kwargs) -> SimpleModel: return SimpleModel(a=1, b="hi") -def return_nested_model() -> NestedModel: +def return_nested_model(*args, **kwargs) -> NestedModel: return NestedModel(nested=return_simple_model(), c=False) -def return_unbound_generic_model() -> GenericModel: +def return_unbound_generic_model(*args, **kwargs) -> GenericModel: return GenericModel(a="foo", b="bar") -def return_bound_generic_model() -> GenericModel[int]: +def return_bound_generic_model(*args, **kwargs) -> GenericModel[int]: return GenericModel(a=1, b="hi") -def return_explicitly_bound_generic_model() -> GenericModel[int]: +def return_explicitly_bound_generic_model(*args, **kwargs) -> GenericModel[int]: return GenericModel[int](a=1, b="hi") diff --git a/tests/unit_tests/worker/test_task_worker.py b/tests/unit_tests/worker/test_task_worker.py index cfb606424..601a38d27 100644 --- a/tests/unit_tests/worker/test_task_worker.py +++ b/tests/unit_tests/worker/test_task_worker.py @@ -4,7 +4,7 @@ from concurrent.futures import Future from queue import Full from typing import Any, TypeVar -from unittest.mock import MagicMock, patch +from unittest.mock import ANY, MagicMock, patch import pytest @@ -115,7 +115,9 @@ def test_submit_task(worker: TaskWorker) -> None: assert worker.get_tasks() == [] task_id = worker.submit_task(_SIMPLE_TASK) assert worker.get_tasks() == [ - TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) + TrackableTask.model_construct( + task_id=task_id, request_id=ANY, task=_SIMPLE_TASK + ) ] @@ -123,12 +125,18 @@ def test_submit_multiple_tasks(worker: TaskWorker) -> None: assert worker.get_tasks() == [] task_id_1 = worker.submit_task(_SIMPLE_TASK) assert worker.get_tasks() == [ - TrackableTask(task_id=task_id_1, request_id="None", task=_SIMPLE_TASK) + TrackableTask.model_construct( + task_id=task_id_1, request_id=ANY, task=_SIMPLE_TASK + ) ] task_id_2 = worker.submit_task(_LONG_TASK) assert worker.get_tasks() == [ - TrackableTask(task_id=task_id_1, request_id="None", task=_SIMPLE_TASK), - TrackableTask(task_id=task_id_2, request_id="None", task=_LONG_TASK), + TrackableTask.model_construct( + task_id=task_id_1, request_id=ANY, task=_SIMPLE_TASK + ), + TrackableTask.model_construct( + task_id=task_id_2, request_id=ANY, task=_LONG_TASK + ), ] @@ -141,12 +149,16 @@ def test_stop_with_task_pending(inert_worker: TaskWorker) -> None: def test_restart_leaves_task_pending(worker: TaskWorker) -> None: task_id = worker.submit_task(_SIMPLE_TASK) assert worker.get_tasks() == [ - TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) + TrackableTask.model_construct( + task_id=task_id, request_id=ANY, task=_SIMPLE_TASK + ) ] worker.stop() worker.start() assert worker.get_tasks() == [ - TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) + TrackableTask.model_construct( + task_id=task_id, request_id=ANY, task=_SIMPLE_TASK + ) ] @@ -154,18 +166,24 @@ def test_submit_before_start_pending(inert_worker: TaskWorker) -> None: task_id = inert_worker.submit_task(_SIMPLE_TASK) inert_worker.start() assert inert_worker.get_tasks() == [ - TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) + TrackableTask.model_construct( + task_id=task_id, request_id=ANY, task=_SIMPLE_TASK + ) ] inert_worker.stop() assert inert_worker.get_tasks() == [ - TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) + TrackableTask.model_construct( + task_id=task_id, request_id=ANY, task=_SIMPLE_TASK + ) ] def test_clear_task(worker: TaskWorker) -> None: task_id = worker.submit_task(_SIMPLE_TASK) assert worker.get_tasks() == [ - TrackableTask(task_id=task_id, request_id="None", task=_SIMPLE_TASK) + TrackableTask.model_construct( + task_id=task_id, request_id=ANY, task=_SIMPLE_TASK + ) ] assert worker.clear_task(task_id) assert worker.get_tasks() == [] From 58cb2a9d5e6da189e554b32eb75e4e8aba2cc5bb Mon Sep 17 00:00:00 2001 From: Keith Ralphs Date: Fri, 25 Oct 2024 09:16:24 +0000 Subject: [PATCH 04/12] Review comments changes --- .devcontainer/devcontainer.json | 6 ++- .vscode/launch.json | 25 +++++++-- Dockerfile | 9 ---- helm/blueapi/values.yaml | 13 +---- pyproject.toml | 6 ++- src/blueapi/cli/cli.py | 5 +- src/blueapi/service/interface.py | 20 -------- src/blueapi/service/main.py | 21 ++++++-- src/blueapi/service/runner.py | 21 +++++--- src/blueapi/worker/task_worker.py | 36 +++++++------ tests/unit_tests/core/fake_device_module.py | 2 +- tests/unit_tests/service/test_interface.py | 56 +++++++++------------ tests/unit_tests/service/test_rest_api.py | 28 +++-------- tests/unit_tests/service/test_runner.py | 18 +++---- 14 files changed, 118 insertions(+), 148 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4be3c2073..b66046bc2 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -5,6 +5,10 @@ "dockerfile": "../Dockerfile", "target": "developer" }, + "containerEnv": { + "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "http/protobuf", + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://127.0.0.1:4318", + }, "remoteEnv": { // Allow X11 apps to run inside the container "DISPLAY": "${localEnv:DISPLAY}" @@ -39,8 +43,6 @@ // Make sure SELinux does not disable with access to host filesystems like tmp "--security-opt=label=disable" ], - // Remove this before pushing, only to allow local testing with editable library - "mounts": [], // Mount the parent as /workspaces so we can pip install peers as editable "workspaceMount": "source=${localWorkspaceFolder}/..,target=/workspaces,type=bind", // After the container is created, install the python project in editable form diff --git a/.vscode/launch.json b/.vscode/launch.json index 00d42fb9b..f47618117 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -25,10 +25,20 @@ "request": "launch", "justMyCode": false, "module": "blueapi", + "args": [ + "serve" + ] + }, + { + "name": "Blueapi Server (Custom Config)", + "type": "debugpy", + "request": "launch", + "justMyCode": false, + "module": "blueapi", + "args": "--config ${input:config_path} serve", "env": { "OTLP_EXPORT_ENABLED": "false" }, - "args": "-c ${input:config_path} serve" }, { "name": "Blueapi Server (Custom Config)", @@ -44,9 +54,14 @@ "request": "launch", "justMyCode": false, "module": "blueapi", - "env": { - "OTLP_EXPORT_ENABLED": "false" - }, + "args": "controller ${input:controller_args}" + }, + { + "name": "Blueapi Controller (Custom Config)", + "type": "debugpy", + "request": "launch", + "justMyCode": false, + "module": "blueapi", "args": "-c ${input:config_path} controller ${input:args}" }, ], @@ -58,7 +73,7 @@ "default": "" }, { - "id": "args", + "id": "controller_args", "type": "promptString", "description": "Arguments to pass to controller", "default": "" diff --git a/Dockerfile b/Dockerfile index 601323921..dd1a2398f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,15 +12,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ RUN python -m venv /venv ENV PATH=/venv/bin:$PATH -ENV OTLP_EXPORT_ENABLED=false -# enable opentelemetry support -ENV OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http/protobuf -# Change this to point to Jaeger server before merging e.g. https://daq-services-jaeger -ENV OTEL_EXPORTER_OTLP_ENDPOINT=http://127.0.0.1:4318 -# Ensure that all Http headers are captured -ENV OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST=".*" -ENV OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE=".*" - # The build stage installs the context into the venv FROM developer as build COPY . /context diff --git a/helm/blueapi/values.yaml b/helm/blueapi/values.yaml index ee7d5b2b0..cd4e99ecb 100644 --- a/helm/blueapi/values.yaml +++ b/helm/blueapi/values.yaml @@ -78,15 +78,11 @@ listener: # Additional envVars to mount to the pod as a String extraEnvVars: | - name: OTLP_EXPORT_ENABLED - value: {{ .Values.tracing.otlp.export_enabled] }} + value: {{ .Values.tracing.otlp.export_enabled }} - name: OTEL_EXPORTER_OTLP_TRACES_PROTOCOL value: {{ .Values.tracing.otlp.protocol }} - name: OTEL_EXPORTER_OTLP_ENDPOINT value: "{{ .Values.tracing.otlp.host }}:{{ .Values.tracing.otlp.port }}" - - name: OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST - value: {{ .Values.tracing.http.request.headers }} - - name: OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE - value: {{ .Values.tracing.http.response.headers }} # - name: RABBITMQ_PASSWORD # valueFrom: # secretKeyRef: @@ -99,11 +95,6 @@ tracing: protocol: http/protobuf host: https://daq-services-jaeger # replace with central instance port: 4318 - http: - request: - headers: ".*" - response: - headers: ".*" # Config for the worker goes here, will be mounted into a config file worker: @@ -126,8 +117,6 @@ worker: password: guest host: rabbitmq port: 61613 - tracing_exporter: - host: "" initContainer: scratch: diff --git a/pyproject.toml b/pyproject.toml index bdf71adf0..e356d52e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dependencies = [ "pyepics", "aioca", "pydantic>=2.0", - "scanspec>=0.7.2", + "scanspec>=0.7.6", "pydantic-settings", "stomp-py", "aiohttp", @@ -83,7 +83,9 @@ write_to = "src/blueapi/_version.py" [tool.mypy] ignore_missing_imports = true # Ignore missing stubs in imported modules -namespace_packages = true # necessary for tracing sdk to work with mypy + +# necessary for tracing sdk to work with mypy, set false once migraion to pyright complete +namespace_packages = true [tool.pytest.ini_options] # Run pytest with all our checkers, and don't spam us with massive tracebacks on error diff --git a/src/blueapi/cli/cli.py b/src/blueapi/cli/cli.py index c40d26beb..c98800c20 100644 --- a/src/blueapi/cli/cli.py +++ b/src/blueapi/cli/cli.py @@ -10,8 +10,6 @@ from bluesky_stomp.messaging import MessageContext, StompClient from bluesky_stomp.models import Broker from observability_utils.tracing import setup_tracing -from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor -from opentelemetry.trace import get_tracer_provider from pydantic import ValidationError from requests.exceptions import ConnectionError @@ -98,14 +96,13 @@ def start_application(obj: dict): """Only import the service functions when starting the service or generating the schema, not the controller as a new FastAPI app will be started each time. """ - from blueapi.service.main import app, start + from blueapi.service.main import start """ Set up basic automated instrumentation for the FastAPI app, creating the observability context. """ setup_tracing("BlueAPI", OTLP_EXPORT_ENABLED) - FastAPIInstrumentor().instrument_app(app, tracer_provider=get_tracer_provider()) start(config) diff --git a/src/blueapi/service/interface.py b/src/blueapi/service/interface.py index 739384fcb..2fe820abb 100644 --- a/src/blueapi/service/interface.py +++ b/src/blueapi/service/interface.py @@ -5,7 +5,6 @@ from bluesky_stomp.messaging import StompClient from bluesky_stomp.models import Broker, DestinationBase, MessageTopic -from observability_utils.tracing import get_tracer, use_propagated_context from blueapi.config import ApplicationConfig from blueapi.core.context import BlueskyContext @@ -18,8 +17,6 @@ """This module provides interface between web application and underlying Bluesky context and worker""" -TRACER = get_tracer("interface") -PROP_KEY = "carrier" _CONFIG: ApplicationConfig = ApplicationConfig() @@ -77,7 +74,6 @@ def stomp_client() -> StompClient | None: return None -@use_propagated_context def setup(config: ApplicationConfig) -> None: """Creates and starts a worker with supplied config""" @@ -90,7 +86,6 @@ def setup(config: ApplicationConfig) -> None: stomp_client() -@use_propagated_context def teardown() -> None: worker().stop() if (template := stomp_client()) is not None: @@ -115,43 +110,36 @@ def forward_message(event: Any, correlation_id: str | None) -> None: stream.subscribe(forward_message) -@use_propagated_context def get_plans() -> list[PlanModel]: """Get all available plans in the BlueskyContext""" return [PlanModel.from_plan(plan) for plan in context().plans.values()] -@use_propagated_context def get_plan(name: str) -> PlanModel: """Get plan by name from the BlueskyContext""" return PlanModel.from_plan(context().plans[name]) -@use_propagated_context def get_devices() -> list[DeviceModel]: """Get all available devices in the BlueskyContext""" return [DeviceModel.from_device(device) for device in context().devices.values()] -@use_propagated_context def get_device(name: str) -> DeviceModel: """Retrieve device by name from the BlueskyContext""" return DeviceModel.from_device(context().devices[name]) -@use_propagated_context def submit_task(task: Task) -> str: """Submit a task to be run on begin_task""" return worker().submit_task(task) -@use_propagated_context def clear_task(task_id: str) -> str: """Remove a task from the worker""" return worker().clear_task(task_id) -@use_propagated_context def begin_task(task: WorkerTask) -> WorkerTask: """Trigger a task. Will fail if the worker is busy""" if task.task_id is not None: @@ -159,51 +147,43 @@ def begin_task(task: WorkerTask) -> WorkerTask: return task -@use_propagated_context def get_tasks_by_status(status: TaskStatusEnum) -> list[TrackableTask]: """Retrieve a list of tasks based on their status.""" return worker().get_tasks_by_status(status) -@use_propagated_context def get_active_task() -> TrackableTask | None: """Task the worker is currently running""" return worker().get_active_task() -@use_propagated_context def get_worker_state() -> WorkerState: """State of the worker""" return worker().state -@use_propagated_context def pause_worker(defer: bool | None) -> None: """Command the worker to pause""" worker().pause(defer or False) -@use_propagated_context def resume_worker() -> None: """Command the worker to resume""" worker().resume() -@use_propagated_context def cancel_active_task(failure: bool, reason: str | None) -> str: """Remove the currently active task from the worker if there is one Returns the task_id of the active task""" return worker().cancel_active_task(failure, reason) -@use_propagated_context def get_tasks() -> list[TrackableTask]: """Return a list of all tasks on the worker, any one of which can be triggered with begin_task""" return worker().get_tasks() -@use_propagated_context def get_task_by_id(task_id: str) -> TrackableTask | None: """Returns a task matching the task ID supplied, if the worker knows of it""" diff --git a/src/blueapi/service/main.py b/src/blueapi/service/main.py index 99e120f96..c6f31359d 100644 --- a/src/blueapi/service/main.py +++ b/src/blueapi/service/main.py @@ -1,3 +1,4 @@ +from collections.abc import Callable from contextlib import asynccontextmanager from fastapi import ( @@ -16,7 +17,9 @@ start_as_current_span, ) from opentelemetry.context import attach +from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor from opentelemetry.propagate import get_global_textmap +from opentelemetry.trace import get_tracer_provider from pydantic import ValidationError from starlette.responses import JSONResponse from super_state_machine.errors import TransitionError @@ -43,7 +46,7 @@ RUNNER: WorkerDispatcher | None = None -CONTEXT_HDR = "traceparent" +CONTEXT_HEADER = "traceparent" def _runner() -> WorkerDispatcher: @@ -369,6 +372,12 @@ def start(config: ApplicationConfig): "%(asctime)s %(levelprefix)s %(client_addr)s" + " - '%(request_line)s' %(status_code)s" ) + FastAPIInstrumentor().instrument_app( + app, + tracer_provider=get_tracer_provider(), + http_capture_headers_server_request=[",*"], + http_capture_headers_server_response=[",*"], + ) app.state.config = config uvicorn.run(app, host=config.api.host, port=config.api.port) @@ -381,12 +390,16 @@ async def add_api_version_header(request: Request, call_next): @app.middleware("http") -async def inject_propagated_observability_context(request: Request, call_next): +async def inject_propagated_observability_context( + request: Request, call_next: Callable[[Request], Response] +) -> Response: """Middleware to extract the any prorpagated observability context from the HTTP headers and attatch it to the local one. """ - if CONTEXT_HDR in request.headers: - ctx = get_global_textmap().extract({CONTEXT_HDR: request.headers[CONTEXT_HDR]}) + if CONTEXT_HEADER in request.headers: + ctx = get_global_textmap().extract( + {CONTEXT_HEADER: request.headers[CONTEXT_HEADER]} + ) attach(ctx) response = await call_next(request) return response diff --git a/src/blueapi/service/runner.py b/src/blueapi/service/runner.py index 293cae2ee..8e2cd2958 100644 --- a/src/blueapi/service/runner.py +++ b/src/blueapi/service/runner.py @@ -13,6 +13,8 @@ get_tracer, start_as_current_span, ) +from opentelemetry.context import attach +from opentelemetry.propagate import get_global_textmap from pydantic import TypeAdapter from blueapi.config import ApplicationConfig @@ -113,7 +115,7 @@ def run( if self._use_subprocess: return self._run_in_subprocess(function, *args, **kwargs) else: - return function(get_context_propagator(), *args, **kwargs) + return function(*args, **kwargs) @start_as_current_span(TRACER, "function", "args", "kwargs") def _run_in_subprocess( @@ -122,10 +124,10 @@ def _run_in_subprocess( *args: P.args, **kwargs: P.kwargs, ) -> T: - """Call the supplied function, prepending its parameter list with the current - Span ID from the observability context, if one exists. this will allow functions - decorated with @use_propagated_context to use the corresponding span as their - parent span.""" + """Call the supplied function, passing the current Span ID, if one + exists,from the observability context inro the _rpc caller function. + When this is deserialized in and run by the subprocess, this will allow + its functions to use the corresponding span as their parent span.""" if self._subprocess is None: raise InvalidRunnerStateError("Subprocess runner has not been started") if not (hasattr(function, "__name__") and hasattr(function, "__module__")): @@ -143,7 +145,8 @@ def _run_in_subprocess( function.__module__, function.__name__, return_type, - (get_context_propagator(), *args), + get_context_propagator(), + *args, ), kwargs, ) @@ -165,9 +168,13 @@ def _rpc( module_name: str, function_name: str, expected_type: type[T] | None, - args: Any, + carrier: dict[str, Any] | None, + *args: Any, **kwargs: Any, ) -> T: + if carrier: + ctx = get_global_textmap().extract(carrier) + attach(ctx) mod = import_module(module_name) func: Callable[P, T] = _validate_function( mod.__dict__.get(function_name, None), function_name diff --git a/src/blueapi/worker/task_worker.py b/src/blueapi/worker/task_worker.py index 97e63e034..6e2c765d3 100644 --- a/src/blueapi/worker/task_worker.py +++ b/src/blueapi/worker/task_worker.py @@ -51,6 +51,7 @@ """ Initialise a Tracer for this module provided by the app's global TracerProvider. """ DEFAULT_START_STOP_TIMEOUT: float = 30.0 +WORKER_THREAD_STATE = "worker thread state" T = TypeVar("T") @@ -61,8 +62,8 @@ class TrackableTask(BlueapiBaseModel, Generic[T]): """ task_id: str - request_id: str = "" task: T + request_id: str = "" is_complete: bool = False is_pending: bool = True errors: list[str] = Field(default_factory=list) @@ -130,7 +131,7 @@ def __init__( setup_tracing("BlueAPIWorker", OTLP_EXPORT_ENABLED) @start_as_current_span(TRACER, "task_id") - def clear_task(self, task_id: str, carr: dict[str, Any] | None = None) -> str: + def clear_task(self, task_id: str) -> str: task = self._tasks.pop(task_id) return task.task_id @@ -139,7 +140,6 @@ def cancel_active_task( self, failure: bool = False, reason: str | None = None, - carr: dict[str, Any] | None = None, ) -> str: if self._current is None: # Persuades mypy that self._current is not None @@ -154,19 +154,15 @@ def cancel_active_task( return self._current.task_id @start_as_current_span(TRACER) - def get_tasks(self, carr: dict[str, Any] | None = None) -> list[TrackableTask]: + def get_tasks(self) -> list[TrackableTask]: return list(self._tasks.values()) @start_as_current_span(TRACER, "task_id") - def get_task_by_id( - self, task_id: str, carr: dict[str, Any] | None = None - ) -> TrackableTask | None: + def get_task_by_id(self, task_id: str) -> TrackableTask | None: return self._tasks.get(task_id) @start_as_current_span(TRACER, "status") - def get_tasks_by_status( - self, status: TaskStatusEnum, carr: dict[str, Any] | None = None - ) -> list[TrackableTask]: + def get_tasks_by_status(self, status: TaskStatusEnum) -> list[TrackableTask]: if status == TaskStatusEnum.RUNNING: return [ task @@ -180,16 +176,14 @@ def get_tasks_by_status( return [] @start_as_current_span(TRACER) - def get_active_task( - self, carr: dict[str, Any] | None = None - ) -> TrackableTask[Task] | None: + def get_active_task(self) -> TrackableTask[Task] | None: current = self._current if current is not None: add_span_attributes({"Active Task": current.task_id}) return current @start_as_current_span(TRACER, "task_id") - def begin_task(self, task_id: str, carr: dict[str, Any] | None = None) -> None: + def begin_task(self, task_id: str) -> None: task = self._tasks.get(task_id) if task is not None: self._submit_trackable_task(task) @@ -197,7 +191,7 @@ def begin_task(self, task_id: str, carr: dict[str, Any] | None = None) -> None: raise KeyError(f"No pending task with ID {task_id}") @start_as_current_span(TRACER, "task.name", "task.params") - def submit_task(self, task: Task, carr: dict[str, Any] | None = None) -> str: + def submit_task(self, task: Task) -> str: task.prepare_params(self._ctx) # Will raise if parameters are invalid task_id: str = str(uuid.uuid4()) add_span_attributes({"TaskId": task_id}) @@ -249,7 +243,8 @@ def start(self) -> None: self._wait_until_stopped() fut = run_worker_in_own_thread(self) self._wait_until_started() - add_span_attributes({"worker thread state": fut._state}) + + add_span_attributes({WORKER_THREAD_STATE: fut._state}) @start_as_current_span(TRACER) def stop(self) -> None: @@ -261,7 +256,7 @@ def stop(self) -> None: else: LOGGER.info("Stopping worker: nothing to do") self._wait_until_stopped() - add_span_attributes({"worker thread state": "STOPPED"}) + add_span_attributes({WORKER_THREAD_STATE: "STOPPED"}) LOGGER.info("Stopped") @start_as_current_span(TRACER) @@ -299,12 +294,12 @@ def run(self) -> None: self._stopped.set() @start_as_current_span(TRACER, "defer") - def pause(self, defer=False, carr: dict[str, Any] | None = None): + def pause(self, defer=False): LOGGER.info("Requesting to pause the worker") self._ctx.run_engine.request_pause(defer) @start_as_current_span(TRACER) - def resume(self, carr: dict[str, Any] | None = None): + def resume(self): LOGGER.info("Requesting to resume the worker") self._ctx.run_engine.resume() @@ -336,6 +331,9 @@ def _cycle(self) -> None: raise KeyError(f"Unknown command: {next_task}") except Exception as err: self._report_error(err) + finally: + if self._current is not None: + self._context_register.pop(self._current.task_id) if self._current is not None: self._current.is_complete = True diff --git a/tests/unit_tests/core/fake_device_module.py b/tests/unit_tests/core/fake_device_module.py index 0e2ea690c..d27af8480 100644 --- a/tests/unit_tests/core/fake_device_module.py +++ b/tests/unit_tests/core/fake_device_module.py @@ -32,7 +32,7 @@ def _mock_with_name(name: str) -> MagicMock: return mock -def wrong_return_type(*args, **kwargs) -> int: +def wrong_return_type() -> int: return "str" # type: ignore diff --git a/tests/unit_tests/service/test_interface.py b/tests/unit_tests/service/test_interface.py index bc55d6d06..654881916 100644 --- a/tests/unit_tests/service/test_interface.py +++ b/tests/unit_tests/service/test_interface.py @@ -16,10 +16,6 @@ from blueapi.worker.task import Task from blueapi.worker.task_worker import TrackableTask -TEST_CARRIER = { - "traceparent": "00-6fc19ca6e39c1fc845bb7c6fe28c7fdc-00babae1b7c9591d-01" -} - @pytest.fixture def mock_connection() -> Mock: @@ -40,7 +36,7 @@ def ensure_worker_stopped(): of an assertion error. The start_worker method is not managed by a fixture as some of the tests require it to be customised.""" yield - interface.teardown(TEST_CARRIER) + interface.teardown() def my_plan() -> MsgGenerator: @@ -60,7 +56,7 @@ def test_get_plans(context_mock: MagicMock): context.plan(my_second_plan) context_mock.return_value = context - assert interface.get_plans(TEST_CARRIER) == [ + assert interface.get_plans() == [ PlanModel( name="my_plan", description="My plan does cool stuff.", @@ -92,7 +88,7 @@ def test_get_plan(context_mock: MagicMock): context.plan(my_second_plan) context_mock.return_value = context - assert interface.get_plan(TEST_CARRIER, "my_plan") == PlanModel( + assert interface.get_plan("my_plan") == PlanModel( name="my_plan", description="My plan does cool stuff.", schema={ @@ -104,7 +100,7 @@ def test_get_plan(context_mock: MagicMock): ) with pytest.raises(KeyError): - interface.get_plan(TEST_CARRIER, "non_existing_plan") + interface.get_plan("non_existing_plan") @dataclass @@ -119,7 +115,7 @@ def test_get_devices(context_mock: MagicMock): context.device(SynAxis(name="my_axis")) context_mock.return_value = context - assert interface.get_devices(TEST_CARRIER) == [ + assert interface.get_devices() == [ DeviceModel(name="my_device", protocols=["HasName"]), DeviceModel( name="my_axis", @@ -147,12 +143,12 @@ def test_get_device(context_mock: MagicMock): context.device(MyDevice(name="my_device")) context_mock.return_value = context - assert interface.get_device(TEST_CARRIER, "my_device") == DeviceModel( + assert interface.get_device("my_device") == DeviceModel( name="my_device", protocols=["HasName"] ) with pytest.raises(KeyError): - assert interface.get_device(TEST_CARRIER, "non_existing_device") + assert interface.get_device("non_existing_device") @patch("blueapi.service.interface.context") @@ -164,7 +160,7 @@ def test_submit_task(context_mock: MagicMock): mock_uuid_value = "8dfbb9c2-7a15-47b6-bea8-b6b77c31d3d9" with patch.object(uuid, "uuid4") as uuid_mock: uuid_mock.return_value = uuid.UUID(mock_uuid_value) - task_uuid = interface.submit_task(TEST_CARRIER, task) + task_uuid = interface.submit_task(task) assert task_uuid == mock_uuid_value @@ -177,9 +173,9 @@ def test_clear_task(context_mock: MagicMock): mock_uuid_value = "3d858a62-b40a-400f-82af-8d2603a4e59a" with patch.object(uuid, "uuid4") as uuid_mock: uuid_mock.return_value = uuid.UUID(mock_uuid_value) - interface.submit_task(TEST_CARRIER, task) + interface.submit_task(task) - clear_task_return = interface.clear_task(TEST_CARRIER, mock_uuid_value) + clear_task_return = interface.clear_task(mock_uuid_value) assert clear_task_return == mock_uuid_value @@ -187,7 +183,7 @@ def test_clear_task(context_mock: MagicMock): def test_begin_task(worker_mock: MagicMock): uuid_value = "350043fd-597e-41a7-9a92-5d5478232cf7" task = WorkerTask(task_id=uuid_value) - returned_task = interface.begin_task(TEST_CARRIER, task) + returned_task = interface.begin_task(task) assert task == returned_task worker_mock.assert_called_once_with(uuid_value) @@ -195,7 +191,7 @@ def test_begin_task(worker_mock: MagicMock): @patch("blueapi.service.interface.TaskWorker.begin_task") def test_begin_task_no_task_id(worker_mock: MagicMock): task = WorkerTask(task_id=None) - returned_task = interface.begin_task(TEST_CARRIER, task) + returned_task = interface.begin_task(task) assert task == returned_task worker_mock.assert_not_called() @@ -216,37 +212,35 @@ def mock_tasks_by_status(status: TaskStatusEnum) -> list[TrackableTask]: get_tasks_by_status_mock.side_effect = mock_tasks_by_status - assert interface.get_tasks_by_status(TEST_CARRIER, TaskStatusEnum.PENDING) == [ + assert interface.get_tasks_by_status(TaskStatusEnum.PENDING) == [ pending_task1, pending_task2, ] - assert interface.get_tasks_by_status(TEST_CARRIER, TaskStatusEnum.RUNNING) == [ - running_task - ] - assert interface.get_tasks_by_status(TEST_CARRIER, TaskStatusEnum.COMPLETE) == [] + assert interface.get_tasks_by_status(TaskStatusEnum.RUNNING) == [running_task] + assert interface.get_tasks_by_status(TaskStatusEnum.COMPLETE) == [] def test_get_active_task(): - assert interface.get_active_task(TEST_CARRIER) is None + assert interface.get_active_task() is None def test_get_worker_state(): - assert interface.get_worker_state(TEST_CARRIER) == WorkerState.IDLE + assert interface.get_worker_state() == WorkerState.IDLE @patch("blueapi.service.interface.TaskWorker.pause") def test_pause_worker(pause_worker_mock: MagicMock): - interface.pause_worker(TEST_CARRIER, False) + interface.pause_worker(False) pause_worker_mock.assert_called_once_with(False) pause_worker_mock.reset_mock() - interface.pause_worker(TEST_CARRIER, True) + interface.pause_worker(True) pause_worker_mock.assert_called_once_with(True) @patch("blueapi.service.interface.TaskWorker.resume") def test_resume_worker(resume_worker_mock: MagicMock): - interface.resume_worker(TEST_CARRIER) + interface.resume_worker() resume_worker_mock.assert_called_once() @@ -256,7 +250,7 @@ def test_cancel_active_task(cancel_active_task_mock: MagicMock): reason = "End of session" task_id = "789" cancel_active_task_mock.return_value = task_id - assert interface.cancel_active_task(TEST_CARRIER, fail, reason) == task_id + assert interface.cancel_active_task(fail, reason) == task_id cancel_active_task_mock.assert_called_once_with(fail, reason) @@ -269,7 +263,7 @@ def test_get_tasks(get_tasks_mock: MagicMock): ] get_tasks_mock.return_value = tasks - assert interface.get_tasks(TEST_CARRIER) == tasks + assert interface.get_tasks() == tasks @patch("blueapi.service.interface.context") @@ -278,11 +272,9 @@ def test_get_task_by_id(context_mock: MagicMock): context.plan(my_plan) context_mock.return_value = context - task_id = interface.submit_task(TEST_CARRIER, Task(name="my_plan")) + task_id = interface.submit_task(Task(name="my_plan")) - assert interface.get_task_by_id( - TEST_CARRIER, task_id - ) == TrackableTask.model_construct( + assert interface.get_task_by_id(task_id) == TrackableTask.model_construct( task_id=task_id, request_id=ANY, task=Task(name="my_plan", params={}), diff --git a/tests/unit_tests/service/test_rest_api.py b/tests/unit_tests/service/test_rest_api.py index 3eba9cdde..decf2a483 100644 --- a/tests/unit_tests/service/test_rest_api.py +++ b/tests/unit_tests/service/test_rest_api.py @@ -23,22 +23,6 @@ from blueapi.worker.task_worker import TrackableTask -class _ANY_DICT(dict): - "A helper object that compares equal to all dictionaries." - - def __eq__(self, other): - return isinstance(other, dict) - - def __ne__(self, other): - return False - - def __repr__(self): - return "" - - -ANY_DICT = _ANY_DICT() - - @pytest.fixture def client() -> Iterator[TestClient]: with ( @@ -86,7 +70,7 @@ class MyModel(BaseModel): response = client.get("/plans/my-plan") - get_plan_mock.assert_called_once_with(ANY_DICT, "my-plan") + get_plan_mock.assert_called_once_with("my-plan") assert response.status_code == status.HTTP_200_OK assert response.json() == { "description": None, @@ -144,7 +128,7 @@ class MyDevice: get_device_mock.return_value = DeviceModel.from_device(device) response = client.get("/devices/my-device") - get_device_mock.assert_called_once_with(ANY_DICT, "my-device") + get_device_mock.assert_called_once_with("my-device") assert response.status_code == status.HTTP_200_OK assert response.json() == { "name": "my-device", @@ -175,7 +159,7 @@ def test_create_task( response = client.post("/tasks", json=task.model_dump()) - submit_task_mock.assert_called_once_with(ANY_DICT, task) + submit_task_mock.assert_called_once_with(task) assert response.json() == {"task_id": task_id} @@ -485,7 +469,7 @@ def test_set_state_running_to_paused( "/worker/state", json=StateChangeRequest(new_state=final_state).model_dump() ) - pause_worker_mock.assert_called_once_with(ANY_DICT, False) + pause_worker_mock.assert_called_once_with(False) assert response.status_code == status.HTTP_202_ACCEPTED assert response.json() == final_state @@ -523,7 +507,7 @@ def test_set_state_running_to_aborting( "/worker/state", json=StateChangeRequest(new_state=final_state).model_dump() ) - cancel_active_task_mock.assert_called_once_with(ANY_DICT, True, None) + cancel_active_task_mock.assert_called_once_with(True, None) assert response.status_code == status.HTTP_202_ACCEPTED assert response.json() == final_state @@ -545,7 +529,7 @@ def test_set_state_running_to_stopping_including_reason( json=StateChangeRequest(new_state=final_state, reason=reason).model_dump(), ) - cancel_active_task_mock.assert_called_once_with(ANY_DICT, False, reason) + cancel_active_task_mock.assert_called_once_with(False, reason) assert response.status_code == status.HTTP_202_ACCEPTED assert response.json() == final_state diff --git a/tests/unit_tests/service/test_runner.py b/tests/unit_tests/service/test_runner.py index bcce9874d..af65e9b0a 100644 --- a/tests/unit_tests/service/test_runner.py +++ b/tests/unit_tests/service/test_runner.py @@ -186,42 +186,42 @@ class GenericModel(BaseModel, Generic[T]): b: str -def return_int(*args, **kwargs) -> int: +def return_int() -> int: return 1 -def return_str(*args, **kwargs) -> str: +def return_str() -> str: return "hello" -def return_list(*args, **kwargs) -> list[int]: +def return_list() -> list[int]: return [1, 2, 3] -def return_dict(*args, **kwargs) -> dict[str, int]: +def return_dict() -> dict[str, int]: return { "test": 1, "other_test": 2, } -def return_simple_model(*args, **kwargs) -> SimpleModel: +def return_simple_model() -> SimpleModel: return SimpleModel(a=1, b="hi") -def return_nested_model(*args, **kwargs) -> NestedModel: +def return_nested_model() -> NestedModel: return NestedModel(nested=return_simple_model(), c=False) -def return_unbound_generic_model(*args, **kwargs) -> GenericModel: +def return_unbound_generic_model() -> GenericModel: return GenericModel(a="foo", b="bar") -def return_bound_generic_model(*args, **kwargs) -> GenericModel[int]: +def return_bound_generic_model() -> GenericModel[int]: return GenericModel(a=1, b="hi") -def return_explicitly_bound_generic_model(*args, **kwargs) -> GenericModel[int]: +def return_explicitly_bound_generic_model() -> GenericModel[int]: return GenericModel[int](a=1, b="hi") From 41f2d0f8a24986d7cc975d4c0a3ce3c6b6159ef1 Mon Sep 17 00:00:00 2001 From: Keith Ralphs Date: Fri, 25 Oct 2024 09:27:46 +0000 Subject: [PATCH 05/12] yaml format --- helm/blueapi/values.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/helm/blueapi/values.yaml b/helm/blueapi/values.yaml index cd4e99ecb..fe35c33e2 100644 --- a/helm/blueapi/values.yaml +++ b/helm/blueapi/values.yaml @@ -103,14 +103,14 @@ worker: port: 8000 env: sources: - - kind: deviceFunctions - module: blueapi.startup.example_devices - - kind: planFunctions - module: blueapi.startup.example_plans - - kind: planFunctions - module: dls_bluesky_core.plans - - kind: planFunctions - module: dls_bluesky_core.stubs + - kind: deviceFunctions + module: blueapi.startup.example_devices + - kind: planFunctions + module: blueapi.startup.example_plans + - kind: planFunctions + module: dls_bluesky_core.plans + - kind: planFunctions + module: dls_bluesky_core.stubs stomp: auth: username: guest From bbcd9533be54001fb51c0e9198662a4c4991819e Mon Sep 17 00:00:00 2001 From: Keith Ralphs Date: Fri, 25 Oct 2024 13:26:17 +0000 Subject: [PATCH 06/12] Correct typedef --- src/blueapi/service/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/blueapi/service/main.py b/src/blueapi/service/main.py index c6f31359d..80c0a330e 100644 --- a/src/blueapi/service/main.py +++ b/src/blueapi/service/main.py @@ -1,4 +1,4 @@ -from collections.abc import Callable +from collections.abc import Awaitable, Callable from contextlib import asynccontextmanager from fastapi import ( @@ -391,7 +391,7 @@ async def add_api_version_header(request: Request, call_next): @app.middleware("http") async def inject_propagated_observability_context( - request: Request, call_next: Callable[[Request], Response] + request: Request, call_next: Callable[[Request], Awaitable[Response]] ) -> Response: """Middleware to extract the any prorpagated observability context from the HTTP headers and attatch it to the local one. From d43546507c273b4da6f92cf89d50b1fa0b4c35d4 Mon Sep 17 00:00:00 2001 From: Keith Ralphs Date: Fri, 25 Oct 2024 14:39:38 +0000 Subject: [PATCH 07/12] avoid race --- src/blueapi/worker/task_worker.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/blueapi/worker/task_worker.py b/src/blueapi/worker/task_worker.py index 6e2c765d3..f4a7914c7 100644 --- a/src/blueapi/worker/task_worker.py +++ b/src/blueapi/worker/task_worker.py @@ -223,8 +223,8 @@ def mark_task_as_started(event: WorkerEvent, _: str | None) -> None: LOGGER.info(f"Submitting: {trackable_task}") try: - sub = self.worker_events.subscribe(mark_task_as_started) self._context_register[trackable_task.task_id] = get_trace_context() + sub = self.worker_events.subscribe(mark_task_as_started) """ Cache the current trace context as the one for this task id """ self._task_channel.put_nowait(trackable_task) task_started.wait(timeout=5.0) @@ -332,7 +332,10 @@ def _cycle(self) -> None: except Exception as err: self._report_error(err) finally: - if self._current is not None: + if ( + self._current is not None + and self._current.task_id in self._context_register + ): self._context_register.pop(self._current.task_id) if self._current is not None: From 8cb2e0be0b23ee655e085f32540bafc52a8ed6ac Mon Sep 17 00:00:00 2001 From: Callum Forrester Date: Mon, 28 Oct 2024 14:03:12 +0000 Subject: [PATCH 08/12] Remove context register (#693) --- src/blueapi/worker/task_worker.py | 63 +++++++++++++++++-------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/src/blueapi/worker/task_worker.py b/src/blueapi/worker/task_worker.py index f4a7914c7..10cee7017 100644 --- a/src/blueapi/worker/task_worker.py +++ b/src/blueapi/worker/task_worker.py @@ -98,7 +98,7 @@ class TaskWorker: _started: Event _stopping: Event _stopped: Event - _context_register: dict[str, Context] + _current_task_otel_context: Context | None def __init__( self, @@ -127,7 +127,7 @@ def __init__( self._stopped = Event() self._stopped.set() self._broadcast_statuses = broadcast_statuses - self._context_register = {} + self._current_task_otel_context = None setup_tracing("BlueAPIWorker", OTLP_EXPORT_ENABLED) @start_as_current_span(TRACER, "task_id") @@ -217,13 +217,11 @@ def mark_task_as_started(event: WorkerEvent, _: str | None) -> None: if ( event.task_status is not None and event.task_status.task_id == trackable_task.task_id - and trackable_task.task_id in self._context_register ): task_started.set() LOGGER.info(f"Submitting: {trackable_task}") try: - self._context_register[trackable_task.task_id] = get_trace_context() sub = self.worker_events.subscribe(mark_task_as_started) """ Cache the current trace context as the one for this task id """ self._task_channel.put_nowait(trackable_task) @@ -316,9 +314,12 @@ def _cycle(self) -> None: LOGGER.info("Awaiting task") next_task: TrackableTask | KillSignal = self._task_channel.get() if isinstance(next_task, TrackableTask): - add_span_attributes({"next_task.task_id": next_task.task_id}) LOGGER.info(f"Got new task: {next_task}") self._current = next_task # Informing mypy that the task is not None + + self._current_task_otel_context = get_trace_context() + add_span_attributes({"next_task.task_id": next_task.task_id}) + self._current.is_pending = False self._current.task.do_task(self._ctx) elif isinstance(next_task, KillSignal): @@ -332,11 +333,8 @@ def _cycle(self) -> None: except Exception as err: self._report_error(err) finally: - if ( - self._current is not None - and self._current.task_id in self._context_register - ): - self._context_register.pop(self._current.task_id) + if self._current_task_otel_context is not None: + self._current_task_otel_context = None if self._current is not None: self._current.is_complete = True @@ -413,26 +411,35 @@ def _report_status( def _on_document(self, name: str, document: Mapping[str, Any]) -> None: if self._current is not None: - with TRACER.start_as_current_span( - "_on_document", - context=self._context_register[self._current.task_id], - kind=SpanKind.PRODUCER, - ): - """Start a new span but inject the context cached when the current task - was created. This will make the documents received part of the same - trace.""" - add_span_attributes( - { - "task_id": self._current.task_id, - "name": name, - "document": str(document), - } + if self._current_task_otel_context is not None: + with TRACER.start_as_current_span( + "_on_document", + context=self._current_task_otel_context, + kind=SpanKind.PRODUCER, + ): + """ + Start a new span but inject the context cached when the current task + was created. This will make the documents received part of the same + trace. + """ + add_span_attributes( + { + "task_id": self._current.task_id, + "name": name, + "document": str(document), + } + ) + + correlation_id = self._current.request_id + self._data_events.publish( + DataEvent(name=name, doc=document), correlation_id + ) + else: + raise ValueError( + "There is no context set for tracing despite the fact that a task" + " is running, something has gone wrong..." ) - correlation_id = self._current.request_id - self._data_events.publish( - DataEvent(name=name, doc=document), correlation_id - ) else: raise KeyError( "Trying to emit a document despite the fact that the RunEngine is idle" From 5f2a54005639dd8061b3a21b2eb890d74c86ab7a Mon Sep 17 00:00:00 2001 From: Keith Ralphs Date: Thu, 31 Oct 2024 13:30:14 +0000 Subject: [PATCH 09/12] Test failure diagnosis --- src/blueapi/service/runner.py | 4 +- src/blueapi/worker/task_worker.py | 25 ++- tests/conftest.py | 20 +++ tests/unit_tests/client/test_client.py | 167 ++++++++++++++++++++ tests/unit_tests/service/test_runner.py | 9 ++ tests/unit_tests/utils/test_tracing.py | 65 ++++++++ tests/unit_tests/worker/test_task_worker.py | 36 ++++- 7 files changed, 315 insertions(+), 11 deletions(-) create mode 100644 tests/unit_tests/utils/test_tracing.py diff --git a/src/blueapi/service/runner.py b/src/blueapi/service/runner.py index 8e2cd2958..658fe8a5e 100644 --- a/src/blueapi/service/runner.py +++ b/src/blueapi/service/runner.py @@ -5,7 +5,7 @@ from importlib import import_module from multiprocessing import Pool, set_start_method from multiprocessing.pool import Pool as PoolClass -from typing import Any, Concatenate, ParamSpec, TypeVar +from typing import Any, ParamSpec, TypeVar from observability_utils.tracing import ( add_span_attributes, @@ -104,7 +104,7 @@ def stop(self): @start_as_current_span(TRACER, "function", "args", "kwargs") def run( self, - function: Callable[Concatenate[dict[str, Any], P], T], + function: Callable[P, T], *args: P.args, **kwargs: P.kwargs, ) -> T: diff --git a/src/blueapi/worker/task_worker.py b/src/blueapi/worker/task_worker.py index 10cee7017..2c8ba35d7 100644 --- a/src/blueapi/worker/task_worker.py +++ b/src/blueapi/worker/task_worker.py @@ -222,6 +222,7 @@ def mark_task_as_started(event: WorkerEvent, _: str | None) -> None: LOGGER.info(f"Submitting: {trackable_task}") try: + self._current_task_otel_context = get_trace_context() sub = self.worker_events.subscribe(mark_task_as_started) """ Cache the current trace context as the one for this task id """ self._task_channel.put_nowait(trackable_task) @@ -314,14 +315,22 @@ def _cycle(self) -> None: LOGGER.info("Awaiting task") next_task: TrackableTask | KillSignal = self._task_channel.get() if isinstance(next_task, TrackableTask): - LOGGER.info(f"Got new task: {next_task}") - self._current = next_task # Informing mypy that the task is not None - - self._current_task_otel_context = get_trace_context() - add_span_attributes({"next_task.task_id": next_task.task_id}) - - self._current.is_pending = False - self._current.task.do_task(self._ctx) + if self._current_task_otel_context is not None: + with TRACER.start_as_current_span( + "_cycle", + context=self._current_task_otel_context, + kind=SpanKind.SERVER, + ): + LOGGER.info(f"Got new task: {next_task}") + self._current = ( + next_task # Informing mypy that the task is not None + ) + + self._current_task_otel_context = get_trace_context() + add_span_attributes({"next_task.task_id": next_task.task_id}) + + self._current.is_pending = False + self._current.task.do_task(self._ctx) elif isinstance(next_task, KillSignal): # If we receive a kill signal we begin to shut the worker down. # Note that the kill signal is explicitly not a type of task as we don't diff --git a/tests/conftest.py b/tests/conftest.py index 838d4b219..7ff58847e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,16 @@ import asyncio +from typing import cast # Based on https://docs.pytest.org/en/latest/example/simple.html#control-skipping-of-tests-according-to-command-line-option # noqa: E501 import pytest from bluesky import RunEngine from bluesky.run_engine import TransitionError +from observability_utils.tracing import setup_tracing +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.trace import get_tracer_provider + +from tests.unit_tests.utils.test_tracing import JsonObjectSpanExporter @pytest.fixture(scope="function") @@ -24,3 +31,16 @@ def clean_event_loop(): request.addfinalizer(clean_event_loop) return RE + + +@pytest.fixture +def provider() -> TracerProvider: + setup_tracing("test", False) + return cast(TracerProvider, get_tracer_provider()) + + +@pytest.fixture +def exporter(provider: TracerProvider) -> JsonObjectSpanExporter: + exporter = JsonObjectSpanExporter() + provider.add_span_processor(BatchSpanProcessor(exporter)) + return exporter diff --git a/tests/unit_tests/client/test_client.py b/tests/unit_tests/client/test_client.py index f2ea986fd..1cff09ace 100644 --- a/tests/unit_tests/client/test_client.py +++ b/tests/unit_tests/client/test_client.py @@ -3,6 +3,7 @@ import pytest from bluesky_stomp.messaging import MessageContext +from tests.unit_tests.utils.test_tracing import JsonObjectSpanExporter, span_exporter from blueapi.client.client import BlueapiClient from blueapi.client.event_bus import AnyEvent, BlueskyStreamingError, EventBusClient @@ -98,10 +99,20 @@ def test_get_plans(client: BlueapiClient): assert client.get_plans() == PLANS +def test_get_plans_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with span_exporter(exporter, "get_plans"): + assert client.get_plans() == PLANS + + def test_get_plan(client: BlueapiClient): assert client.get_plan("foo") == PLAN +def test_get_plan_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with span_exporter(exporter, "get_plan", "name"): + assert client.get_plan("foo") == PLAN + + def test_get_nonexistant_plan( client: BlueapiClient, mock_rest: Mock, @@ -115,10 +126,20 @@ def test_get_devices(client: BlueapiClient): assert client.get_devices() == DEVICES +def test_get_devices_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with span_exporter(exporter, "get_devices"): + assert client.get_devices() == DEVICES + + def test_get_device(client: BlueapiClient): assert client.get_device("foo") == DEVICE +def test_get_device_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with span_exporter(exporter, "get_device", "name"): + assert client.get_device("foo") == DEVICE + + def test_get_nonexistant_device( client: BlueapiClient, mock_rest: Mock, @@ -132,10 +153,20 @@ def test_get_state(client: BlueapiClient): assert client.get_state() == WorkerState.IDLE +def test_get_state_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with span_exporter(exporter, "get_state"): + assert client.get_state() == WorkerState.IDLE + + def test_get_task(client: BlueapiClient): assert client.get_task("foo") == TASK +def test_get_task_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with span_exporter(exporter, "get_task", "task_id"): + assert client.get_task("foo") == TASK + + def test_get_nonexistent_task( client: BlueapiClient, mock_rest: Mock, @@ -157,6 +188,14 @@ def test_get_all_tasks( assert client.get_all_tasks() == TASKS +def test_get_all_tasks_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, +): + with span_exporter(exporter, "get_all_tasks"): + assert client.get_all_tasks() == TASKS + + def test_create_task( client: BlueapiClient, mock_rest: Mock, @@ -165,6 +204,16 @@ def test_create_task( mock_rest.create_task.assert_called_once_with(Task(name="foo")) +def test_create_task_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with span_exporter(exporter, "create_task", "task"): + client.create_task(task=Task(name="foo")) + mock_rest.create_task.assert_called_once_with(Task(name="foo")) + + def test_create_task_does_not_start_task( client: BlueapiClient, mock_rest: Mock, @@ -181,10 +230,27 @@ def test_clear_task( mock_rest.clear_task.assert_called_once_with("foo") +def test_clear_task_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with span_exporter(exporter, "clear_task"): + client.clear_task(task_id="foo") + mock_rest.clear_task.assert_called_once_with("foo") + + def test_get_active_task(client: BlueapiClient): assert client.get_active_task() == ACTIVE_TASK +def test_get_active_task_span_ok( + exporter: JsonObjectSpanExporter, client: BlueapiClient +): + with span_exporter(exporter, "get_active_task"): + assert client.get_active_task() == ACTIVE_TASK + + def test_start_task( client: BlueapiClient, mock_rest: Mock, @@ -193,6 +259,16 @@ def test_start_task( mock_rest.update_worker_task.assert_called_once_with(WorkerTask(task_id="bar")) +def test_start_task_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with span_exporter(exporter, "start_task", "task"): + client.start_task(task=WorkerTask(task_id="bar")) + mock_rest.update_worker_task.assert_called_once_with(WorkerTask(task_id="bar")) + + def test_start_nonexistant_task( client: BlueapiClient, mock_rest: Mock, @@ -213,6 +289,19 @@ def test_create_and_start_task_calls_both_creating_and_starting_endpoints( mock_rest.update_worker_task.assert_called_once_with(WorkerTask(task_id="baz")) +def test_create_and_start_task_calls_both_creating_and_starting_endpoints_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + mock_rest.create_task.return_value = TaskResponse(task_id="baz") + mock_rest.update_worker_task.return_value = TaskResponse(task_id="baz") + with span_exporter(exporter, "create_and_start_task", "task"): + client.create_and_start_task(Task(name="baz")) + mock_rest.create_task.assert_called_once_with(Task(name="baz")) + mock_rest.update_worker_task.assert_called_once_with(WorkerTask(task_id="baz")) + + def test_create_and_start_task_fails_if_task_creation_fails( client: BlueapiClient, mock_rest: Mock, @@ -246,6 +335,13 @@ def test_get_environment(client: BlueapiClient): assert client.get_environment() == ENV +def test_get_environment_span_ok( + exporter: JsonObjectSpanExporter, client: BlueapiClient +): + with span_exporter(exporter, "get_environment"): + assert client.get_environment() == ENV + + def test_reload_environment( client: BlueapiClient, mock_rest: Mock, @@ -255,6 +351,17 @@ def test_reload_environment( mock_rest.delete_environment.assert_called_once() +def test_reload_environment_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with span_exporter(exporter, "reload_environment"): + client.reload_environment() + mock_rest.get_environment.assert_called_once() + mock_rest.delete_environment.assert_called_once() + + def test_reload_environment_failure( client: BlueapiClient, mock_rest: Mock, @@ -277,6 +384,19 @@ def test_abort( ) +def test_abort_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with span_exporter(exporter, "abort", "reason"): + client.abort(reason="foo") + mock_rest.cancel_current_task.assert_called_once_with( + WorkerState.ABORTING, + reason="foo", + ) + + def test_stop( client: BlueapiClient, mock_rest: Mock, @@ -285,6 +405,16 @@ def test_stop( mock_rest.cancel_current_task.assert_called_once_with(WorkerState.STOPPING) +def test_stop_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with span_exporter(exporter, "stop"): + client.stop() + mock_rest.cancel_current_task.assert_called_once_with(WorkerState.STOPPING) + + def test_pause( client: BlueapiClient, mock_rest: Mock, @@ -296,6 +426,19 @@ def test_pause( ) +def test_pause_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with span_exporter(exporter, "pause"): + client.pause(defer=True) + mock_rest.set_state.assert_called_once_with( + WorkerState.PAUSED, + defer=True, + ) + + def test_resume( client: BlueapiClient, mock_rest: Mock, @@ -307,6 +450,19 @@ def test_resume( ) +def test_resume_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with span_exporter(exporter, "resume"): + client.resume() + mock_rest.set_state.assert_called_once_with( + WorkerState.RUNNING, + defer=False, + ) + + def test_cannot_run_task_without_message_bus(client: BlueapiClient): with pytest.raises( RuntimeError, @@ -315,6 +471,17 @@ def test_cannot_run_task_without_message_bus(client: BlueapiClient): client.run_task(Task(name="foo")) +def test_cannot_run_task_without_message_bus_span_ok( + exporter: JsonObjectSpanExporter, client: BlueapiClient +): + with pytest.raises( + RuntimeError, + match="Cannot run plans without Stomp configuration to track progress", + ): + with span_exporter(exporter, "grun_task"): + client.run_task(Task(name="foo")) + + def test_run_task_sets_up_control( client_with_events: BlueapiClient, mock_rest: Mock, diff --git a/tests/unit_tests/service/test_runner.py b/tests/unit_tests/service/test_runner.py index af65e9b0a..2a308003c 100644 --- a/tests/unit_tests/service/test_runner.py +++ b/tests/unit_tests/service/test_runner.py @@ -5,6 +5,7 @@ import pytest from ophyd import Callable from pydantic import BaseModel, ValidationError +from tests.unit_tests.utils.test_tracing import JsonObjectSpanExporter, span_exporter from blueapi.service import interface from blueapi.service.model import EnvironmentResponse @@ -55,6 +56,14 @@ def test_raises_if_used_before_started(runner: WorkerDispatcher): runner.run(interface.get_plans) +def test_raises_if_used_before_started_span_ok( + exporter: JsonObjectSpanExporter, runner: WorkerDispatcher +): + with pytest.raises(InvalidRunnerStateError): + with span_exporter(exporter, "run", "function", "args", "kwargs"): + runner.run(interface.get_plans) + + def test_error_on_runner_setup(local_runner: WorkerDispatcher): expected_state = EnvironmentResponse( initialized=False, diff --git a/tests/unit_tests/utils/test_tracing.py b/tests/unit_tests/utils/test_tracing.py new file mode 100644 index 000000000..c94b3cde4 --- /dev/null +++ b/tests/unit_tests/utils/test_tracing.py @@ -0,0 +1,65 @@ +from collections.abc import Callable, Sequence +from concurrent.futures import Future +from contextlib import contextmanager +from typing import IO + +from opentelemetry.sdk.trace import ReadableSpan +from opentelemetry.sdk.trace.export import ( + SpanExporter, + SpanExportResult, +) + + +class JsonObjectSpanExporter(SpanExporter): + """A custom span exporter to allow spans created by open telemetry tracing code to + be examined and verified during normal testing + """ + + def __init__( + self, + service_name: str | None = "Test", + out: IO | None = None, + formatter: Callable[[ReadableSpan], str] | None = None, + ): + self.service_name = service_name + self.top_span = Future() + + # def prime(self): + # if self.top_span is not None: + # try: + # self.top_span.result(timeout=0.0) + # except: + # pass + + def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: + if self.top_span is not None and not self.top_span.done(): + self.top_span.set_result(spans[-1]) + return SpanExportResult.SUCCESS + + def force_flush(self, timeout_millis: int = 30000) -> bool: + return True + + +# setup_tracing("test", False) +# PROVIDER = cast(TracerProvider, get_tracer_provider()) +# EXPORTER = JsonObjectSpanExporter() +# PROVIDER.add_span_processor(BatchSpanProcessor(EXPORTER)) + + +@contextmanager +def span_exporter(exporter: JsonObjectSpanExporter, func_name: str, *span_args: str): + """Use as a with block around the function under test decorated with + start_as_current_span to check span creation and content. + + params: + func_name: The name of the function being tested + span_args: The arguments specified in its start_as_current_span decorator + """ + # EXPORTER.prime() + yield + if exporter.top_span is not None: + span = exporter.top_span.result(10) + exporter.top_span = None + assert span.name == func_name + for param in span_args: + assert param in span.attributes.keys() diff --git a/tests/unit_tests/worker/test_task_worker.py b/tests/unit_tests/worker/test_task_worker.py index 601a38d27..afa9b2902 100644 --- a/tests/unit_tests/worker/test_task_worker.py +++ b/tests/unit_tests/worker/test_task_worker.py @@ -7,6 +7,7 @@ from unittest.mock import ANY, MagicMock, patch import pytest +from tests.unit_tests.utils.test_tracing import JsonObjectSpanExporter, span_exporter from blueapi.config import EnvironmentConfig, Source, SourceKind from blueapi.core import BlueskyContext, EventStream, MsgGenerator @@ -87,6 +88,15 @@ def test_stop_doesnt_hang(inert_worker: TaskWorker) -> None: inert_worker.stop() +def test_stop_doesnt_hang_span_ok( + exporter: JsonObjectSpanExporter, inert_worker: TaskWorker +) -> None: + with span_exporter(exporter, "start"): + inert_worker.start() + with span_exporter(exporter, "stop"): + inert_worker.stop() + + def test_stop_is_idempontent_if_worker_not_started(inert_worker: TaskWorker) -> None: inert_worker.stop() @@ -111,7 +121,9 @@ def test_multi_start(inert_worker: TaskWorker) -> None: inert_worker.stop() -def test_submit_task(worker: TaskWorker) -> None: +def test_submit_task( + worker: TaskWorker, +) -> None: assert worker.get_tasks() == [] task_id = worker.submit_task(_SIMPLE_TASK) assert worker.get_tasks() == [ @@ -121,6 +133,20 @@ def test_submit_task(worker: TaskWorker) -> None: ] +def test_submit_task_span_ok( + exporter: JsonObjectSpanExporter, + worker: TaskWorker, +) -> None: + assert worker.get_tasks() == [] + with span_exporter(exporter, "submit_task", "task.name", "task.params"): + task_id = worker.submit_task(_SIMPLE_TASK) + assert worker.get_tasks() == [ + TrackableTask.model_construct( + task_id=task_id, request_id=ANY, task=_SIMPLE_TASK + ) + ] + + def test_submit_multiple_tasks(worker: TaskWorker) -> None: assert worker.get_tasks() == [] task_id_1 = worker.submit_task(_SIMPLE_TASK) @@ -194,6 +220,14 @@ def test_clear_nonexistent_task(worker: TaskWorker) -> None: worker.clear_task("foo") +def test_clear_nonexistent_task_span_ok( + exporter: JsonObjectSpanExporter, worker: TaskWorker +) -> None: + with pytest.raises(KeyError): + with span_exporter(exporter, "clear_task", "task_id"): + worker.clear_task("foo") + + def test_does_not_allow_simultaneous_running_tasks( worker: TaskWorker, fake_device: FakeDevice, From 9f52398cb692f1a3147b719df454215f4a41bcc8 Mon Sep 17 00:00:00 2001 From: Keith Ralphs Date: Thu, 31 Oct 2024 15:00:31 +0000 Subject: [PATCH 10/12] tidy up --- .vscode/launch.json | 36 +++++++++++--------------- tests/unit_tests/utils/test_tracing.py | 13 ---------- 2 files changed, 15 insertions(+), 34 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index f47618117..8291038c7 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -25,9 +25,10 @@ "request": "launch", "justMyCode": false, "module": "blueapi", - "args": [ - "serve" - ] + "args": "serve", + "env": { + "OTLP_EXPORT_ENABLED": "false" + }, }, { "name": "Blueapi Server (Custom Config)", @@ -40,21 +41,16 @@ "OTLP_EXPORT_ENABLED": "false" }, }, - { - "name": "Blueapi Server (Custom Config)", - "type": "debugpy", - "request": "launch", - "justMyCode": false, - "module": "blueapi", - "args": "--config ${input:config_path} serve" - }, { "name": "Blueapi Controller", "type": "debugpy", "request": "launch", "justMyCode": false, "module": "blueapi", - "args": "controller ${input:controller_args}" + "args": "controller ${input:controller_args}", + "env": { + "OTLP_EXPORT_ENABLED": "false" + }, }, { "name": "Blueapi Controller (Custom Config)", @@ -62,16 +58,13 @@ "request": "launch", "justMyCode": false, "module": "blueapi", - "args": "-c ${input:config_path} controller ${input:args}" + "args": "-c ${input:config_path} controller ${input:controller_args}", + "env": { + "OTLP_EXPORT_ENABLED": "false" + }, }, ], "inputs": [ - { - "id": "config_path", - "type": "promptString", - "description": "Path to configuration YAML file", - "default": "" - }, { "id": "controller_args", "type": "promptString", @@ -81,7 +74,8 @@ { "id": "config_path", "type": "promptString", - "description": "Server config file path", - } + "description": "Path to configuration YAML file", + "default": "" + }, ] } \ No newline at end of file diff --git a/tests/unit_tests/utils/test_tracing.py b/tests/unit_tests/utils/test_tracing.py index c94b3cde4..f3b224653 100644 --- a/tests/unit_tests/utils/test_tracing.py +++ b/tests/unit_tests/utils/test_tracing.py @@ -24,13 +24,6 @@ def __init__( self.service_name = service_name self.top_span = Future() - # def prime(self): - # if self.top_span is not None: - # try: - # self.top_span.result(timeout=0.0) - # except: - # pass - def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: if self.top_span is not None and not self.top_span.done(): self.top_span.set_result(spans[-1]) @@ -40,12 +33,6 @@ def force_flush(self, timeout_millis: int = 30000) -> bool: return True -# setup_tracing("test", False) -# PROVIDER = cast(TracerProvider, get_tracer_provider()) -# EXPORTER = JsonObjectSpanExporter() -# PROVIDER.add_span_processor(BatchSpanProcessor(EXPORTER)) - - @contextmanager def span_exporter(exporter: JsonObjectSpanExporter, func_name: str, *span_args: str): """Use as a with block around the function under test decorated with From d7499ea745d9dc35052e3a7cbf2a3eaa59796263 Mon Sep 17 00:00:00 2001 From: Keith Ralphs Date: Thu, 31 Oct 2024 16:20:51 +0000 Subject: [PATCH 11/12] rearrange tests and fix typing probs --- tests/unit_tests/client/test_client.py | 317 +++++++++----------- tests/unit_tests/service/test_runner.py | 20 +- tests/unit_tests/utils/test_tracing.py | 8 +- tests/unit_tests/worker/test_task_worker.py | 68 +++-- 4 files changed, 201 insertions(+), 212 deletions(-) diff --git a/tests/unit_tests/client/test_client.py b/tests/unit_tests/client/test_client.py index 1cff09ace..fc110a1b6 100644 --- a/tests/unit_tests/client/test_client.py +++ b/tests/unit_tests/client/test_client.py @@ -3,7 +3,10 @@ import pytest from bluesky_stomp.messaging import MessageContext -from tests.unit_tests.utils.test_tracing import JsonObjectSpanExporter, span_exporter +from tests.unit_tests.utils.test_tracing import ( + JsonObjectSpanExporter, + asserting_span_exporter, +) from blueapi.client.client import BlueapiClient from blueapi.client.event_bus import AnyEvent, BlueskyStreamingError, EventBusClient @@ -99,20 +102,10 @@ def test_get_plans(client: BlueapiClient): assert client.get_plans() == PLANS -def test_get_plans_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): - with span_exporter(exporter, "get_plans"): - assert client.get_plans() == PLANS - - def test_get_plan(client: BlueapiClient): assert client.get_plan("foo") == PLAN -def test_get_plan_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): - with span_exporter(exporter, "get_plan", "name"): - assert client.get_plan("foo") == PLAN - - def test_get_nonexistant_plan( client: BlueapiClient, mock_rest: Mock, @@ -126,20 +119,10 @@ def test_get_devices(client: BlueapiClient): assert client.get_devices() == DEVICES -def test_get_devices_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): - with span_exporter(exporter, "get_devices"): - assert client.get_devices() == DEVICES - - def test_get_device(client: BlueapiClient): assert client.get_device("foo") == DEVICE -def test_get_device_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): - with span_exporter(exporter, "get_device", "name"): - assert client.get_device("foo") == DEVICE - - def test_get_nonexistant_device( client: BlueapiClient, mock_rest: Mock, @@ -153,20 +136,10 @@ def test_get_state(client: BlueapiClient): assert client.get_state() == WorkerState.IDLE -def test_get_state_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): - with span_exporter(exporter, "get_state"): - assert client.get_state() == WorkerState.IDLE - - def test_get_task(client: BlueapiClient): assert client.get_task("foo") == TASK -def test_get_task_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): - with span_exporter(exporter, "get_task", "task_id"): - assert client.get_task("foo") == TASK - - def test_get_nonexistent_task( client: BlueapiClient, mock_rest: Mock, @@ -188,14 +161,6 @@ def test_get_all_tasks( assert client.get_all_tasks() == TASKS -def test_get_all_tasks_span_ok( - exporter: JsonObjectSpanExporter, - client: BlueapiClient, -): - with span_exporter(exporter, "get_all_tasks"): - assert client.get_all_tasks() == TASKS - - def test_create_task( client: BlueapiClient, mock_rest: Mock, @@ -204,16 +169,6 @@ def test_create_task( mock_rest.create_task.assert_called_once_with(Task(name="foo")) -def test_create_task_span_ok( - exporter: JsonObjectSpanExporter, - client: BlueapiClient, - mock_rest: Mock, -): - with span_exporter(exporter, "create_task", "task"): - client.create_task(task=Task(name="foo")) - mock_rest.create_task.assert_called_once_with(Task(name="foo")) - - def test_create_task_does_not_start_task( client: BlueapiClient, mock_rest: Mock, @@ -230,27 +185,10 @@ def test_clear_task( mock_rest.clear_task.assert_called_once_with("foo") -def test_clear_task_span_ok( - exporter: JsonObjectSpanExporter, - client: BlueapiClient, - mock_rest: Mock, -): - with span_exporter(exporter, "clear_task"): - client.clear_task(task_id="foo") - mock_rest.clear_task.assert_called_once_with("foo") - - def test_get_active_task(client: BlueapiClient): assert client.get_active_task() == ACTIVE_TASK -def test_get_active_task_span_ok( - exporter: JsonObjectSpanExporter, client: BlueapiClient -): - with span_exporter(exporter, "get_active_task"): - assert client.get_active_task() == ACTIVE_TASK - - def test_start_task( client: BlueapiClient, mock_rest: Mock, @@ -259,16 +197,6 @@ def test_start_task( mock_rest.update_worker_task.assert_called_once_with(WorkerTask(task_id="bar")) -def test_start_task_span_ok( - exporter: JsonObjectSpanExporter, - client: BlueapiClient, - mock_rest: Mock, -): - with span_exporter(exporter, "start_task", "task"): - client.start_task(task=WorkerTask(task_id="bar")) - mock_rest.update_worker_task.assert_called_once_with(WorkerTask(task_id="bar")) - - def test_start_nonexistant_task( client: BlueapiClient, mock_rest: Mock, @@ -289,19 +217,6 @@ def test_create_and_start_task_calls_both_creating_and_starting_endpoints( mock_rest.update_worker_task.assert_called_once_with(WorkerTask(task_id="baz")) -def test_create_and_start_task_calls_both_creating_and_starting_endpoints_span_ok( - exporter: JsonObjectSpanExporter, - client: BlueapiClient, - mock_rest: Mock, -): - mock_rest.create_task.return_value = TaskResponse(task_id="baz") - mock_rest.update_worker_task.return_value = TaskResponse(task_id="baz") - with span_exporter(exporter, "create_and_start_task", "task"): - client.create_and_start_task(Task(name="baz")) - mock_rest.create_task.assert_called_once_with(Task(name="baz")) - mock_rest.update_worker_task.assert_called_once_with(WorkerTask(task_id="baz")) - - def test_create_and_start_task_fails_if_task_creation_fails( client: BlueapiClient, mock_rest: Mock, @@ -335,13 +250,6 @@ def test_get_environment(client: BlueapiClient): assert client.get_environment() == ENV -def test_get_environment_span_ok( - exporter: JsonObjectSpanExporter, client: BlueapiClient -): - with span_exporter(exporter, "get_environment"): - assert client.get_environment() == ENV - - def test_reload_environment( client: BlueapiClient, mock_rest: Mock, @@ -351,17 +259,6 @@ def test_reload_environment( mock_rest.delete_environment.assert_called_once() -def test_reload_environment_span_ok( - exporter: JsonObjectSpanExporter, - client: BlueapiClient, - mock_rest: Mock, -): - with span_exporter(exporter, "reload_environment"): - client.reload_environment() - mock_rest.get_environment.assert_called_once() - mock_rest.delete_environment.assert_called_once() - - def test_reload_environment_failure( client: BlueapiClient, mock_rest: Mock, @@ -384,19 +281,6 @@ def test_abort( ) -def test_abort_span_ok( - exporter: JsonObjectSpanExporter, - client: BlueapiClient, - mock_rest: Mock, -): - with span_exporter(exporter, "abort", "reason"): - client.abort(reason="foo") - mock_rest.cancel_current_task.assert_called_once_with( - WorkerState.ABORTING, - reason="foo", - ) - - def test_stop( client: BlueapiClient, mock_rest: Mock, @@ -405,16 +289,6 @@ def test_stop( mock_rest.cancel_current_task.assert_called_once_with(WorkerState.STOPPING) -def test_stop_span_ok( - exporter: JsonObjectSpanExporter, - client: BlueapiClient, - mock_rest: Mock, -): - with span_exporter(exporter, "stop"): - client.stop() - mock_rest.cancel_current_task.assert_called_once_with(WorkerState.STOPPING) - - def test_pause( client: BlueapiClient, mock_rest: Mock, @@ -426,19 +300,6 @@ def test_pause( ) -def test_pause_span_ok( - exporter: JsonObjectSpanExporter, - client: BlueapiClient, - mock_rest: Mock, -): - with span_exporter(exporter, "pause"): - client.pause(defer=True) - mock_rest.set_state.assert_called_once_with( - WorkerState.PAUSED, - defer=True, - ) - - def test_resume( client: BlueapiClient, mock_rest: Mock, @@ -450,19 +311,6 @@ def test_resume( ) -def test_resume_span_ok( - exporter: JsonObjectSpanExporter, - client: BlueapiClient, - mock_rest: Mock, -): - with span_exporter(exporter, "resume"): - client.resume() - mock_rest.set_state.assert_called_once_with( - WorkerState.RUNNING, - defer=False, - ) - - def test_cannot_run_task_without_message_bus(client: BlueapiClient): with pytest.raises( RuntimeError, @@ -471,17 +319,6 @@ def test_cannot_run_task_without_message_bus(client: BlueapiClient): client.run_task(Task(name="foo")) -def test_cannot_run_task_without_message_bus_span_ok( - exporter: JsonObjectSpanExporter, client: BlueapiClient -): - with pytest.raises( - RuntimeError, - match="Cannot run plans without Stomp configuration to track progress", - ): - with span_exporter(exporter, "grun_task"): - client.run_task(Task(name="foo")) - - def test_run_task_sets_up_control( client_with_events: BlueapiClient, mock_rest: Mock, @@ -593,3 +430,149 @@ def callback(on_event: Callable[[AnyEvent, MessageContext], None]): client_with_events.run_task(Task(name="foo"), on_event=mock_on_event) mock_on_event.assert_called_once_with(COMPLETE_EVENT) + + +def test_get_plans_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with asserting_span_exporter(exporter, "get_plans"): + client.get_plans() + + +def test_get_plan_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with asserting_span_exporter(exporter, "get_plan", "name"): + client.get_plan("foo") + + +def test_get_devices_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with asserting_span_exporter(exporter, "get_devices"): + client.get_devices() + + +def test_get_device_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with asserting_span_exporter(exporter, "get_device", "name"): + client.get_device("foo") + + +def test_get_state_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with asserting_span_exporter(exporter, "get_state"): + client.get_state() + + +def test_get_task_span_ok(exporter: JsonObjectSpanExporter, client: BlueapiClient): + with asserting_span_exporter(exporter, "get_task", "task_id"): + client.get_task("foo") + + +def test_get_all_tasks_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, +): + with asserting_span_exporter(exporter, "get_all_tasks"): + client.get_all_tasks() + + +def test_create_task_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with asserting_span_exporter(exporter, "create_task", "task"): + client.create_task(task=Task(name="foo")) + + +def test_clear_task_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with asserting_span_exporter(exporter, "clear_task"): + client.clear_task(task_id="foo") + + +def test_get_active_task_span_ok( + exporter: JsonObjectSpanExporter, client: BlueapiClient +): + with asserting_span_exporter(exporter, "get_active_task"): + client.get_active_task() + + +def test_start_task_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with asserting_span_exporter(exporter, "start_task", "task"): + client.start_task(task=WorkerTask(task_id="bar")) + + +def test_create_and_start_task_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + mock_rest.create_task.return_value = TaskResponse(task_id="baz") + mock_rest.update_worker_task.return_value = TaskResponse(task_id="baz") + with asserting_span_exporter(exporter, "create_and_start_task", "task"): + client.create_and_start_task(Task(name="baz")) + + +def test_get_environment_span_ok( + exporter: JsonObjectSpanExporter, client: BlueapiClient +): + with asserting_span_exporter(exporter, "get_environment"): + client.get_environment() + + +def test_reload_environment_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with asserting_span_exporter(exporter, "reload_environment"): + client.reload_environment() + + +def test_abort_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with asserting_span_exporter(exporter, "abort", "reason"): + client.abort(reason="foo") + + +def test_stop_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with asserting_span_exporter(exporter, "stop"): + client.stop() + + +def test_pause_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with asserting_span_exporter(exporter, "pause"): + client.pause(defer=True) + + +def test_resume_span_ok( + exporter: JsonObjectSpanExporter, + client: BlueapiClient, + mock_rest: Mock, +): + with asserting_span_exporter(exporter, "resume"): + client.resume() + + +def test_cannot_run_task_span_ok( + exporter: JsonObjectSpanExporter, client: BlueapiClient +): + with pytest.raises( + RuntimeError, + match="Cannot run plans without Stomp configuration to track progress", + ): + with asserting_span_exporter(exporter, "grun_task"): + client.run_task(Task(name="foo")) diff --git a/tests/unit_tests/service/test_runner.py b/tests/unit_tests/service/test_runner.py index 2a308003c..82e06bc24 100644 --- a/tests/unit_tests/service/test_runner.py +++ b/tests/unit_tests/service/test_runner.py @@ -5,7 +5,10 @@ import pytest from ophyd import Callable from pydantic import BaseModel, ValidationError -from tests.unit_tests.utils.test_tracing import JsonObjectSpanExporter, span_exporter +from tests.unit_tests.utils.test_tracing import ( + JsonObjectSpanExporter, + asserting_span_exporter, +) from blueapi.service import interface from blueapi.service.model import EnvironmentResponse @@ -56,14 +59,6 @@ def test_raises_if_used_before_started(runner: WorkerDispatcher): runner.run(interface.get_plans) -def test_raises_if_used_before_started_span_ok( - exporter: JsonObjectSpanExporter, runner: WorkerDispatcher -): - with pytest.raises(InvalidRunnerStateError): - with span_exporter(exporter, "run", "function", "args", "kwargs"): - runner.run(interface.get_plans) - - def test_error_on_runner_setup(local_runner: WorkerDispatcher): expected_state = EnvironmentResponse( initialized=False, @@ -253,3 +248,10 @@ def test_accepts_return_type( rpc_function: Callable[[], Any], ): started_runner.run(rpc_function) + + +def test_run_span_ok( + exporter: JsonObjectSpanExporter, started_runner: WorkerDispatcher +): + with asserting_span_exporter(exporter, "run", "function", "args", "kwargs"): + started_runner.run(interface.get_plans) diff --git a/tests/unit_tests/utils/test_tracing.py b/tests/unit_tests/utils/test_tracing.py index f3b224653..59adcfe9f 100644 --- a/tests/unit_tests/utils/test_tracing.py +++ b/tests/unit_tests/utils/test_tracing.py @@ -22,7 +22,7 @@ def __init__( formatter: Callable[[ReadableSpan], str] | None = None, ): self.service_name = service_name - self.top_span = Future() + self.top_span: Future = Future() def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: if self.top_span is not None and not self.top_span.done(): @@ -34,7 +34,9 @@ def force_flush(self, timeout_millis: int = 30000) -> bool: @contextmanager -def span_exporter(exporter: JsonObjectSpanExporter, func_name: str, *span_args: str): +def asserting_span_exporter( + exporter: JsonObjectSpanExporter, func_name: str, *span_args: str +): """Use as a with block around the function under test decorated with start_as_current_span to check span creation and content. @@ -42,11 +44,9 @@ def span_exporter(exporter: JsonObjectSpanExporter, func_name: str, *span_args: func_name: The name of the function being tested span_args: The arguments specified in its start_as_current_span decorator """ - # EXPORTER.prime() yield if exporter.top_span is not None: span = exporter.top_span.result(10) - exporter.top_span = None assert span.name == func_name for param in span_args: assert param in span.attributes.keys() diff --git a/tests/unit_tests/worker/test_task_worker.py b/tests/unit_tests/worker/test_task_worker.py index afa9b2902..97e6a9ee8 100644 --- a/tests/unit_tests/worker/test_task_worker.py +++ b/tests/unit_tests/worker/test_task_worker.py @@ -7,7 +7,10 @@ from unittest.mock import ANY, MagicMock, patch import pytest -from tests.unit_tests.utils.test_tracing import JsonObjectSpanExporter, span_exporter +from tests.unit_tests.utils.test_tracing import ( + JsonObjectSpanExporter, + asserting_span_exporter, +) from blueapi.config import EnvironmentConfig, Source, SourceKind from blueapi.core import BlueskyContext, EventStream, MsgGenerator @@ -88,15 +91,6 @@ def test_stop_doesnt_hang(inert_worker: TaskWorker) -> None: inert_worker.stop() -def test_stop_doesnt_hang_span_ok( - exporter: JsonObjectSpanExporter, inert_worker: TaskWorker -) -> None: - with span_exporter(exporter, "start"): - inert_worker.start() - with span_exporter(exporter, "stop"): - inert_worker.stop() - - def test_stop_is_idempontent_if_worker_not_started(inert_worker: TaskWorker) -> None: inert_worker.stop() @@ -133,20 +127,6 @@ def test_submit_task( ] -def test_submit_task_span_ok( - exporter: JsonObjectSpanExporter, - worker: TaskWorker, -) -> None: - assert worker.get_tasks() == [] - with span_exporter(exporter, "submit_task", "task.name", "task.params"): - task_id = worker.submit_task(_SIMPLE_TASK) - assert worker.get_tasks() == [ - TrackableTask.model_construct( - task_id=task_id, request_id=ANY, task=_SIMPLE_TASK - ) - ] - - def test_submit_multiple_tasks(worker: TaskWorker) -> None: assert worker.get_tasks() == [] task_id_1 = worker.submit_task(_SIMPLE_TASK) @@ -220,14 +200,6 @@ def test_clear_nonexistent_task(worker: TaskWorker) -> None: worker.clear_task("foo") -def test_clear_nonexistent_task_span_ok( - exporter: JsonObjectSpanExporter, worker: TaskWorker -) -> None: - with pytest.raises(KeyError): - with span_exporter(exporter, "clear_task", "task_id"): - worker.clear_task("foo") - - def test_does_not_allow_simultaneous_running_tasks( worker: TaskWorker, fake_device: FakeDevice, @@ -549,3 +521,35 @@ def test_get_tasks_by_status(worker: TaskWorker, status, expected_task_ids): result_ids = [task_id for task_id, task in worker._tasks.items() if task in result] assert result_ids == expected_task_ids + + +def test_start_span_ok( + exporter: JsonObjectSpanExporter, inert_worker: TaskWorker +) -> None: + with asserting_span_exporter(exporter, "start"): + inert_worker.start() + + +def test_stop_span_ok( + exporter: JsonObjectSpanExporter, inert_worker: TaskWorker +) -> None: + inert_worker.start() + with asserting_span_exporter(exporter, "stop"): + inert_worker.stop() + + +def test_submit_task_span_ok( + exporter: JsonObjectSpanExporter, + worker: TaskWorker, +) -> None: + assert worker.get_tasks() == [] + with asserting_span_exporter(exporter, "submit_task", "task.name", "task.params"): + worker.submit_task(_SIMPLE_TASK) + + +def test_clear_task_span_ok( + exporter: JsonObjectSpanExporter, worker: TaskWorker +) -> None: + with pytest.raises(KeyError): + with asserting_span_exporter(exporter, "clear_task", "task_id"): + worker.clear_task("foo") From 40aed1c1d45dec84d43581055b69f42aedcc3732 Mon Sep 17 00:00:00 2001 From: Callum Forrester Date: Thu, 31 Oct 2024 18:02:37 +0000 Subject: [PATCH 12/12] Update dependencies (#696) --- dev-requirements.txt | 24 ++++++++++++++++++++- tests/conftest.py | 3 ++- tests/unit_tests/utils/test_tracing.py | 2 +- tests/unit_tests/worker/test_task_worker.py | 1 + 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 2d16b8b48..cdcad8fd0 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -11,7 +11,9 @@ appdirs==1.4.4 asciitree==0.3.3 asgiref==3.8.1 asttokens==2.4.1 +async-timeout==4.0.3 attrs==24.2.0 +autodoc_pydantic==2.2.0 babel==2.16.0 beautifulsoup4==4.12.3 bidict==0.23.1 @@ -21,6 +23,7 @@ bluesky-kafka==0.10.0 bluesky-live==0.0.8 bluesky-stomp==0.1.2 boltons==24.0.0 +bump-pydantic==0.8.0 cachetools==5.5.0 caproto==1.1.1 certifi==2024.8.30 @@ -54,11 +57,14 @@ docopt==0.6.2 doct==1.1.0 docutils==0.21.2 dunamai==1.22.0 +email_validator==2.2.0 entrypoints==0.4 epicscorelibs==7.0.7.99.1.1 event-model==1.21.0 +exceptiongroup==1.2.2 executing==2.1.0 fastapi==0.115.3 +fastapi-cli==0.0.5 fasteners==0.19 filelock==3.16.1 flexcache==0.3 @@ -77,6 +83,7 @@ h5py==3.12.1 HeapDict==1.0.1 historydict==1.2.6 httpcore==1.0.6 +httptools==0.6.1 httpx==0.27.2 humanize==4.11.0 identify==2.6.1 @@ -89,14 +96,18 @@ iniconfig==2.0.0 intake==0.6.4 ipython==8.18.0 ipywidgets==8.1.5 +itsdangerous==2.2.0 jedi==0.19.1 Jinja2==3.1.4 jinja2-ansible-filters==1.3.2 +jsonpointer==3.0.0 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 jupyterlab_widgets==3.0.13 kiwisolver==1.4.7 ldap3==2.9.1 +libcst==1.4.0 +livereload==2.7.0 locket==1.0.0 lz4==4.3.3 markdown-it-py==3.0.0 @@ -171,6 +182,7 @@ py==1.11.0 pyasn1==0.6.1 pycryptodome==3.21.0 pydantic==2.9.2 +pydantic-extra-types==2.9.0 pydantic-settings==2.6.0 pydantic_core==2.23.4 pydantic_numpy==5.0.2 @@ -187,22 +199,25 @@ pytest-cov==5.0.0 pytest-random-order==1.1.1 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 +python-multipart==0.0.9 pytz==2024.2 PyYAML==6.0.2 +pyyaml-include==2.1 questionary==2.0.1 redis==5.2.0 redis-json-dict==0.2.1 referencing==0.35.1 requests==2.32.3 responses==0.25.3 +rich==13.7.1 rpds-py==0.20.0 ruamel.yaml==0.18.6 ruamel.yaml.clib==0.2.12 ruff==0.7.1 scanspec==0.7.6 semver==3.0.2 -setuptools==75.2.0 setuptools-dso==2.11 +shellingham==1.5.4 six==1.16.0 slicerator==1.1.0 smmap==5.0.1 @@ -214,6 +229,8 @@ sphinx-autobuild==2024.10.3 sphinx-autodoc-typehints==2.3.0 sphinx-click==6.0.0 sphinx-copybutton==0.5.2 +sphinx-jsonschema==1.19.1 +sphinx-pydantic==0.1.1 sphinx_design==0.6.1 sphinx_mdinclude==0.6.2 sphinxcontrib-applehelp==2.0.0 @@ -233,11 +250,14 @@ suitcase-msgpack==0.3.0 suitcase-utils==0.5.4 super-state-machine==2.0.2 tifffile==2024.9.20 +tomli==2.0.1 toolz==1.0.0 +tornado==6.4.1 tox==3.28.0 tox-direct==0.4 tqdm==4.66.5 traitlets==5.14.3 +typer==0.12.4 types-aiofiles==24.1.0.20240626 types-mock==5.1.0.20240425 types-PyYAML==6.0.12.20240917 @@ -247,8 +267,10 @@ typing-inspect==0.9.0 typing_extensions==4.12.2 tzdata==2024.2 tzlocal==5.2 +ujson==5.10.0 urllib3==2.2.3 uvicorn==0.32.0 +uvloop==0.19.0 virtualenv==20.27.0 watchfiles==0.24.0 wcwidth==0.2.13 diff --git a/tests/conftest.py b/tests/conftest.py index 7ff58847e..006dc6a57 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,5 +42,6 @@ def provider() -> TracerProvider: @pytest.fixture def exporter(provider: TracerProvider) -> JsonObjectSpanExporter: exporter = JsonObjectSpanExporter() - provider.add_span_processor(BatchSpanProcessor(exporter)) + processor = BatchSpanProcessor(exporter) + provider.add_span_processor(processor) return exporter diff --git a/tests/unit_tests/utils/test_tracing.py b/tests/unit_tests/utils/test_tracing.py index 59adcfe9f..7cd83640c 100644 --- a/tests/unit_tests/utils/test_tracing.py +++ b/tests/unit_tests/utils/test_tracing.py @@ -46,7 +46,7 @@ def asserting_span_exporter( """ yield if exporter.top_span is not None: - span = exporter.top_span.result(10) + span = exporter.top_span.result(timeout=5.0) assert span.name == func_name for param in span_args: assert param in span.attributes.keys() diff --git a/tests/unit_tests/worker/test_task_worker.py b/tests/unit_tests/worker/test_task_worker.py index 97e6a9ee8..9d9028a18 100644 --- a/tests/unit_tests/worker/test_task_worker.py +++ b/tests/unit_tests/worker/test_task_worker.py @@ -528,6 +528,7 @@ def test_start_span_ok( ) -> None: with asserting_span_exporter(exporter, "start"): inert_worker.start() + inert_worker.stop() def test_stop_span_ok(