From 0639985828c00f8f6d1d1fbe396d39647dfd5927 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 14:47:12 -0500 Subject: [PATCH 01/29] add observability config section --- README.md | 4 ++++ main.star | 10 ++++++++++ src/package_io/input_parser.star | 14 +++++++++++++- src/package_io/sanity_check.star | 11 +++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d7b36a93..9f8500a2 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,10 @@ The full YAML schema that can be passed in is as follows with the defaults provi ```yaml optimism_package: + # Observability configuration + observability: + # Whether or not to configure observability (e.g. prometheus) + enabled: true # Interop configuration interop: # Whether or not to enable interop mode diff --git a/main.star b/main.star index 923a7f12..fca916a3 100644 --- a/main.star +++ b/main.star @@ -40,6 +40,7 @@ def run(plan, args): global_log_level = optimism_args_with_right_defaults.global_log_level persistent = optimism_args_with_right_defaults.persistent + observability_params = optimism_args_with_right_defaults.observability interop_params = optimism_args_with_right_defaults.interop # Deploy the L1 @@ -112,6 +113,15 @@ def run(plan, args): interop_params, ) + if observability_params.enabled: + plan.print("Launching prometheus...") + # prometheus_private_url = prometheus.launch_prometheus( + # plan, + # metrics_jobs, + # global_node_selectors, + # observability_params, + # ) + if interop_params.enabled: op_supervisor_launcher.launch( plan, diff --git a/src/package_io/input_parser.star b/src/package_io/input_parser.star index f4a046ea..91a33c09 100644 --- a/src/package_io/input_parser.star +++ b/src/package_io/input_parser.star @@ -62,6 +62,9 @@ def input_parser(plan, input_args): results["persistent"] = False return struct( + observability=struct( + enabled=results["observability"]["enabled"], + ), interop=struct( enabled=results["interop"]["enabled"], supervisor_params=struct( @@ -175,6 +178,11 @@ def input_parser(plan, input_args): def parse_network_params(plan, input_args): results = {} + # configure observability + + results["observability"] = default_observability_args() + results["observability"].update(input_args.get("observability", {})) + # configure interop results["interop"] = default_interop_args() @@ -304,6 +312,7 @@ def parse_network_params(plan, input_args): def default_optimism_args(): return { + "observability": default_observability_args(), "interop": default_interop_args(), "chains": default_chains(), "op_contract_deployer_params": default_op_contract_deployer_params(), @@ -313,13 +322,16 @@ def default_optimism_args(): "persistent": False, } +def default_observability_args(): + return { + "enabled": True, + } def default_interop_args(): return { "enabled": False, } - def default_supervisor_params(): return { "image": DEFAULT_SUPERVISOR_IMAGES["op-supervisor"], diff --git a/src/package_io/sanity_check.star b/src/package_io/sanity_check.star index 8e5b6ea2..75e73511 100644 --- a/src/package_io/sanity_check.star +++ b/src/package_io/sanity_check.star @@ -1,3 +1,6 @@ +OBSERVABILITY_PARAMS = [ + "enabled", +] INTEROP_PARAMS = [ "enabled", "supervisor_params", @@ -134,6 +137,14 @@ def sanity_check(plan, optimism_config): if key not in ROOT_PARAMS: fail("Invalid parameter {0}, allowed fields: {1}".format(key, ROOT_PARAMS)) + if "observability" in optimism_config: + validate_params( + plan, + optimism_config, + "observability", + OBSERVABILITY_PARAMS, + ) + if "interop" in optimism_config: validate_params( plan, From 2ac4186311949f4735f9f8fe5977092b1cdfb99e Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 14:48:50 -0500 Subject: [PATCH 02/29] copy over prometheus_launcher from ethereum-package and trim down --- main.star | 14 +- src/prometheus/prometheus_launcher.star | 186 ++++++++++++++++++++++++ 2 files changed, 194 insertions(+), 6 deletions(-) create mode 100644 src/prometheus/prometheus_launcher.star diff --git a/main.star b/main.star index fca916a3..536ec505 100644 --- a/main.star +++ b/main.star @@ -4,6 +4,8 @@ l2_launcher = import_module("./src/l2.star") op_supervisor_launcher = import_module( "./src/interop/op-supervisor/op_supervisor_launcher.star" ) +prometheus = import_module("./src/prometheus/prometheus_launcher.star") + wait_for_sync = import_module("./src/wait/wait_for_sync.star") input_parser = import_module("./src/package_io/input_parser.star") ethereum_package_static_files = import_module( @@ -115,12 +117,12 @@ def run(plan, args): if observability_params.enabled: plan.print("Launching prometheus...") - # prometheus_private_url = prometheus.launch_prometheus( - # plan, - # metrics_jobs, - # global_node_selectors, - # observability_params, - # ) + prometheus_private_url = prometheus.launch_prometheus( + plan, + metrics_jobs, + global_node_selectors, + observability_params, + ) if interop_params.enabled: op_supervisor_launcher.launch( diff --git a/src/prometheus/prometheus_launcher.star b/src/prometheus/prometheus_launcher.star new file mode 100644 index 00000000..34264ddc --- /dev/null +++ b/src/prometheus/prometheus_launcher.star @@ -0,0 +1,186 @@ +prometheus = import_module("github.com/kurtosis-tech/prometheus-package/main.star") + +EXECUTION_CLIENT_TYPE = "execution" +BEACON_CLIENT_TYPE = "beacon" +VC_TYPE = "validator" +REMOTE_SIGNER_TYPE = "remote-signer" + +METRICS_INFO_NAME_KEY = "name" +METRICS_INFO_URL_KEY = "url" +METRICS_INFO_PATH_KEY = "path" +METRICS_INFO_ADDITIONAL_CONFIG_KEY = "config" + +PROMETHEUS_DEFAULT_SCRAPE_INTERVAL = "15s" + + +def launch_prometheus( + plan, + metrics_jobs, + global_node_selectors, + prometheus_params, +): + # metrics_jobs = get_metrics_jobs(metrics_jobs) + + if REGISTERED_METRICS_JOBS.length == 0: + return None + + prometheus_url = prometheus.run( + plan, + REGISTERED_METRICS_JOBS, + "prometheus", + min_cpu=prometheus_params.min_cpu, + max_cpu=prometheus_params.max_cpu, + min_memory=prometheus_params.min_mem, + max_memory=prometheus_params.max_mem, + node_selectors=global_node_selectors, + storage_tsdb_retention_time=prometheus_params.storage_tsdb_retention_time, + storage_tsdb_retention_size=prometheus_params.storage_tsdb_retention_size, + image=prometheus_params.image, + ) + + return prometheus_url + + +def new_metrics_job( + job_name, + endpoint, + metrics_path, + labels, + scrape_interval=PROMETHEUS_DEFAULT_SCRAPE_INTERVAL, +): + return { + "Name": job_name, + "Endpoint": endpoint, + "MetricsPath": metrics_path, + "Labels": labels, + "ScrapeInterval": scrape_interval, + } + +def register_node_metrics_job(node_metrics_info): + labels = { + "service": el_context.service_name, + "client_type": EXECUTION_CLIENT_TYPE, + "client_name": el_context.client_name, + } + + scrape_interval = PROMETHEUS_DEFAULT_SCRAPE_INTERVAL + + additional_config = node_metrics_info[ + METRICS_INFO_ADDITIONAL_CONFIG_KEY + ] + + if additional_config != None: + if additional_config.labels != None: + labels.update(additional_config.labels) + if ( + additional_config.scrape_interval != None + and additional_config.scrape_interval != "" + ): + scrape_interval = additional_config.scrape_interval + + register_metrics_job( + new_metrics_job( + job_name=node_metrics_info[METRICS_INFO_NAME_KEY], + endpoint=node_metrics_info[METRICS_INFO_URL_KEY], + metrics_path=node_metrics_info[METRICS_INFO_PATH_KEY], + labels=labels, + scrape_interval=scrape_interval, + ) + ) + +REGISTERED_METRICS_JOBS = [] + +def register_metrics_job(metrics_job): + REGISTERED_METRICS_JOBS.append(metrics_job) + + + metrics_jobs = [] + + # Adding validator clients metrics jobs + for context in vc_contexts: + if context == None: + continue + metrics_info = context.metrics_info + + scrape_interval = PROMETHEUS_DEFAULT_SCRAPE_INTERVAL + labels = { + "service": context.service_name, + "client_type": VC_TYPE, + "client_name": context.client_name, + } + + metrics_jobs.append( + new_metrics_job( + job_name=metrics_info[METRICS_INFO_NAME_KEY], + endpoint=metrics_info[METRICS_INFO_URL_KEY], + metrics_path=metrics_info[METRICS_INFO_PATH_KEY], + labels=labels, + scrape_interval=scrape_interval, + ) + ) + + # Adding validator clients metrics jobs + for context in remote_signer_contexts: + if context == None: + continue + metrics_info = context.metrics_info + + scrape_interval = PROMETHEUS_DEFAULT_SCRAPE_INTERVAL + labels = { + "service": context.service_name, + "client_type": REMOTE_SIGNER_TYPE, + "client_name": context.client_name, + } + + metrics_jobs.append( + new_metrics_job( + job_name=metrics_info[METRICS_INFO_NAME_KEY], + endpoint=metrics_info[METRICS_INFO_URL_KEY], + metrics_path=metrics_info[METRICS_INFO_PATH_KEY], + labels=labels, + scrape_interval=scrape_interval, + ) + ) + + # Adding ethereum-metrics-exporter metrics jobs + for context in ethereum_metrics_exporter_contexts: + if context != None: + metrics_jobs.append( + new_metrics_job( + job_name="ethereum-metrics-exporter-{0}".format(context.pair_name), + endpoint="{}:{}".format( + context.ip_addr, + context.metrics_port_num, + ), + metrics_path="/metrics", + labels={ + "instance": context.pair_name, + "consensus_client": context.cl_name, + "execution_client": context.el_name, + }, + ) + ) + # Adding Xatu Sentry metrics jobs + for context in xatu_sentry_contexts: + if context != None: + metrics_jobs.append( + new_metrics_job( + job_name="xatu-sentry-{0}".format(context.pair_name), + endpoint="{}:{}".format( + context.ip_addr, + context.metrics_port_num, + ), + metrics_path="/metrics", + labels={ + "pair": context.pair_name, + }, + ) + ) + + # Adding additional metrics jobs + for job in metrics_jobs: + if job == None: + continue + metrics_jobs.append(job) + + return metrics_jobs From 9fc315aa93edf5d48e484bb7bf02f318be9fd873 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 14:51:49 -0500 Subject: [PATCH 03/29] register el/cl metrics jobs --- main.star | 1 + src/el/op-geth/op_geth_launcher.star | 12 ++++++++---- src/el_cl_launcher.star | 21 +++++++++++++++++++++ src/l2.star | 2 ++ src/participant_network.star | 2 ++ 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/main.star b/main.star index 536ec505..52d2f725 100644 --- a/main.star +++ b/main.star @@ -112,6 +112,7 @@ def run(plan, args): global_node_selectors, global_tolerations, persistent, + observability_params, interop_params, ) diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index 7de38d9f..c73f8ca3 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -22,6 +22,7 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") +prometheus = import_module("../../prometheus/prometheus_launcher.star") interop_constants = import_module("../../interop/constants.star") RPC_PORT_NUM = 8545 @@ -105,6 +106,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, + observability_params, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -135,10 +137,12 @@ def launch( plan, service_name, RPC_PORT_ID ) - metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - geth_metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metrics_url - ) + geth_metrics_info = None + if observability_params.enabled: + metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) + geth_metrics_info = ethereum_package_node_metrics.new_node_metrics_info( + service_name, METRICS_PATH, metrics_url + ) http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) diff --git a/src/el_cl_launcher.star b/src/el_cl_launcher.star index 2bedd199..142f7f4d 100644 --- a/src/el_cl_launcher.star +++ b/src/el_cl_launcher.star @@ -7,6 +7,8 @@ ethereum_package_input_parser = import_module( ) input_parser = import_module("./package_io/input_parser.star") +prometheus = import_module("./prometheus/prometheus_launcher.star") + # EL op_geth = import_module("./el/op-geth/op_geth_launcher.star") @@ -37,6 +39,7 @@ def launch( global_tolerations, persistent, additional_services, + observability_params, interop_params, ): el_launchers = { @@ -229,9 +232,17 @@ def launch( all_el_contexts, sequencer_enabled, sequencer_context, + observability_params, interop_params, ) + if observability_params.enabled: + for metrics_info in el_context.el_metrics_info: + if(metrics_info == None): + continue + + prometheus.register_node_metrics_job(metrics_info) + if rollup_boost_enabled: plan.print("Rollup boost enabled") @@ -293,6 +304,16 @@ def launch( interop_params, ) + if observability_params.enabled: + for metrics_info in cl_context.cl_metrics_info: + if(metrics_info == None): + continue + + metrics_info[prometheus.METRICS_INFO_ADDITIONAL_CONFIG_KEY].update({ + "supernode": str(cl_context.supernode), + }) + prometheus.register_node_metrics_job(metrics_info) + sequencer_enabled = False all_el_contexts.append(el_context) diff --git a/src/l2.star b/src/l2.star index 532a7dde..760f370c 100644 --- a/src/l2.star +++ b/src/l2.star @@ -19,6 +19,7 @@ def launch_l2( global_node_selectors, global_tolerations, persistent, + observability_params, interop_params, ): network_params = l2_args.network_params @@ -47,6 +48,7 @@ def launch_l2( global_tolerations, persistent, l2_args.additional_services, + observability_params, interop_params, ) diff --git a/src/participant_network.star b/src/participant_network.star index 37b79733..1afd534c 100644 --- a/src/participant_network.star +++ b/src/participant_network.star @@ -27,6 +27,7 @@ def launch_participant_network( global_tolerations, persistent, additional_services, + observability_params, interop_params, ): num_participants = len(participants) @@ -46,6 +47,7 @@ def launch_participant_network( global_tolerations, persistent, additional_services, + observability_params, interop_params, ) From f5aee04ca1930cacb2eb7b05c2d698288e0a6197 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 15:38:05 -0500 Subject: [PATCH 04/29] conditionally expose metrics port in geth --- src/el/op-geth/op_geth_launcher.star | 112 ++++++++++++++++----------- src/el_cl_launcher.star | 9 ++- 2 files changed, 70 insertions(+), 51 deletions(-) diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index c73f8ca3..40ac0b07 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -73,9 +73,6 @@ def get_used_ports(discovery_port=DISCOVERY_PORT_NUM): ENGINE_RPC_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL, ), - METRICS_PORT_ID: ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ), } return used_ports @@ -137,15 +134,15 @@ def launch( plan, service_name, RPC_PORT_ID ) - geth_metrics_info = None + http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) + + metrics_info = None if observability_params.enabled: metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - geth_metrics_info = ethereum_package_node_metrics.new_node_metrics_info( + metrics_info = ethereum_package_node_metrics.new_node_metrics_info( service_name, METRICS_PATH, metrics_url ) - http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) - return ethereum_package_el_context.new_el_context( client_name="op-geth", enode=enode, @@ -156,7 +153,7 @@ def launch( rpc_http_url=http_url, enr=enr, service_name=service_name, - el_metrics_info=[geth_metrics_info], + el_metrics_info=[metrics_info], ) @@ -173,16 +170,13 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, + observability_params, interop_params, ): - init_datadir_cmd_str = "geth init --datadir={0} --state.scheme=hash {1}".format( - EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS - + "/genesis-{0}.json".format(launcher.network_id), - ) - discovery_port = DISCOVERY_PORT_NUM - used_ports = get_used_ports(discovery_port) + ports = dict(get_used_ports(discovery_port)) + + subcommand_strs = [] cmd = [ "geth", @@ -209,44 +203,17 @@ def get_config( "--syncmode=full", "--nat=extip:" + ethereum_package_constants.PRIVATE_IP_ADDRESS_PLACEHOLDER, "--rpc.allow-unprotected-txs", - "--metrics", - "--metrics.addr=0.0.0.0", - "--metrics.port={0}".format(METRICS_PORT_NUM), "--discovery.port={0}".format(discovery_port), "--port={0}".format(discovery_port), ] - if not sequencer_enabled: - cmd.append("--rollup.sequencerhttp={0}".format(sequencer_context.rpc_http_url)) - - if len(existing_el_clients) > 0: - cmd.append( - "--bootnodes=" - + ",".join( - [ - ctx.enode - for ctx in existing_el_clients[ - : ethereum_package_constants.MAX_ENODE_ENTRIES - ] - ] - ) - ) - - cmd += participant.el_extra_params - cmd_str = " ".join(cmd) - if launcher.network not in ethereum_package_constants.PUBLIC_NETWORKS: - subcommand_strs = [ - init_datadir_cmd_str, - cmd_str, - ] - command_str = " && ".join(subcommand_strs) - else: - command_str = cmd_str - + # configure files + files = { ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, } + if persistent: files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( persistent_key="data-{0}".format(service_name), @@ -257,14 +224,62 @@ def get_config( ], ) - env_vars = dict(participant.cl_extra_env_vars) + if launcher.network not in ethereum_package_constants.PUBLIC_NETWORKS: + init_datadir_cmd_str = "geth init --datadir={0} --state.scheme=hash {1}".format( + EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, + "{0}/genesis-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + launcher.network_id + ), + ) + + subcommand_strs.append(init_datadir_cmd_str) + + # configure environment variables + + env_vars = dict(participant.el_extra_env_vars) + + # apply customizations + + if observability_params.enabled: + cmd += [ + "--metrics", + "--metrics.addr=0.0.0.0", + "--metrics.port={0}".format(METRICS_PORT_NUM), + ] + + ports[METRICS_PORT_ID] = ethereum_package_shared_utils.new_port_spec( + METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL + ) if interop_params.enabled: env_vars["GETH_ROLLUP_INTEROPRPC"] = interop_constants.SUPERVISOR_ENDPOINT + if not sequencer_enabled: + cmd.append("--rollup.sequencerhttp={0}".format(sequencer_context.rpc_http_url)) + + if len(existing_el_clients) > 0: + cmd.append( + "--bootnodes=" + + ",".join( + [ + ctx.enode + for ctx in existing_el_clients[ + : ethereum_package_constants.MAX_ENODE_ENTRIES + ] + ] + ) + ) + + # construct command string + + cmd += participant.el_extra_params + subcommand_strs.append(" ".join(cmd)) + command_str = " && ".join(subcommand_strs) + config_args = { "image": participant.el_image, - "ports": used_ports, + "ports": ports, "cmd": [command_str], "files": files, "entrypoint": ENTRYPOINT_ARGS, @@ -281,6 +296,8 @@ def get_config( "node_selectors": node_selectors, } + # configure resources + if participant.el_min_cpu > 0: config_args["min_cpu"] = participant.el_min_cpu if participant.el_max_cpu > 0: @@ -289,6 +306,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem + return ServiceConfig(**config_args) diff --git a/src/el_cl_launcher.star b/src/el_cl_launcher.star index 142f7f4d..c6dfa5d6 100644 --- a/src/el_cl_launcher.star +++ b/src/el_cl_launcher.star @@ -259,6 +259,7 @@ def launch( all_el_contexts, sequencer_enabled, sequencer_context, + observability_params, interop_params, ) else: @@ -301,17 +302,16 @@ def launch( all_cl_contexts, l1_config_env_vars, sequencer_enabled, + observability_params, interop_params, ) if observability_params.enabled: - for metrics_info in cl_context.cl_metrics_info: - if(metrics_info == None): - continue - + for metrics_info in filter(lambda x: x is not None, cl_context.cl_metrics_info): metrics_info[prometheus.METRICS_INFO_ADDITIONAL_CONFIG_KEY].update({ "supernode": str(cl_context.supernode), }) + prometheus.register_node_metrics_job(metrics_info) sequencer_enabled = False @@ -333,6 +333,7 @@ def launch( all_cl_contexts, l1_config_env_vars, False, + observability_params, interop_params, ) all_cl_contexts.append(cl_builder_context) From 7887d4366026e33e756e8944b063c998e84c95a1 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 16:09:31 -0500 Subject: [PATCH 05/29] nest prometheus module in observability and add constants --- src/el/op-geth/op_geth_launcher.star | 17 +- src/observability/constants.star | 3 + .../prometheus/prometheus_launcher.star | 88 +++++++++ src/prometheus/prometheus_launcher.star | 186 ------------------ 4 files changed, 98 insertions(+), 196 deletions(-) create mode 100644 src/observability/constants.star create mode 100644 src/observability/prometheus/prometheus_launcher.star delete mode 100644 src/prometheus/prometheus_launcher.star diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index 40ac0b07..f14d8e8a 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -22,14 +22,13 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") -prometheus = import_module("../../prometheus/prometheus_launcher.star") +observability = import_module("../../observability/constants.star") interop_constants = import_module("../../interop/constants.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 DISCOVERY_PORT_NUM = 30303 ENGINE_RPC_PORT_NUM = 8551 -METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 300 @@ -42,13 +41,11 @@ TCP_DISCOVERY_PORT_ID = "tcp-discovery" UDP_DISCOVERY_PORT_ID = "udp-discovery" ENGINE_RPC_PORT_ID = "engine-rpc" ENGINE_WS_PORT_ID = "engineWs" -METRICS_PORT_ID = "metrics" + # TODO(old) Scale this dynamically based on CPUs available and Geth nodes mining NUM_MINING_THREADS = 1 -METRICS_PATH = "/debug/metrics/prometheus" - # The dirpath of the execution data directory on the client container EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER = "/data/geth/execution-data" @@ -138,9 +135,9 @@ def launch( metrics_info = None if observability_params.enabled: - metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) + metrics_url = "{0}:{1}".format(service.ip_address, observability.METRICS_PORT_NUM) metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metrics_url + service_name, observability.METRICS_PATH, metrics_url ) return ethereum_package_el_context.new_el_context( @@ -245,11 +242,11 @@ def get_config( cmd += [ "--metrics", "--metrics.addr=0.0.0.0", - "--metrics.port={0}".format(METRICS_PORT_NUM), + "--metrics.port={0}".format(observability.METRICS_PORT_NUM), ] - ports[METRICS_PORT_ID] = ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL + ports[observability.METRICS_PORT_ID] = ethereum_package_shared_utils.new_port_spec( + observability.METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL ) if interop_params.enabled: diff --git a/src/observability/constants.star b/src/observability/constants.star new file mode 100644 index 00000000..a9c2b9b0 --- /dev/null +++ b/src/observability/constants.star @@ -0,0 +1,3 @@ +METRICS_PORT_ID = "metrics" +METRICS_PORT_NUM = 9001 +METRICS_PATH = "/debug/metrics/prometheus" diff --git a/src/observability/prometheus/prometheus_launcher.star b/src/observability/prometheus/prometheus_launcher.star new file mode 100644 index 00000000..fdc2b0f6 --- /dev/null +++ b/src/observability/prometheus/prometheus_launcher.star @@ -0,0 +1,88 @@ +prometheus = import_module("github.com/kurtosis-tech/prometheus-package/main.star") + +EXECUTION_CLIENT_TYPE = "execution" + +METRICS_INFO_NAME_KEY = "name" +METRICS_INFO_URL_KEY = "url" +METRICS_INFO_PATH_KEY = "path" +METRICS_INFO_ADDITIONAL_CONFIG_KEY = "config" + +PROMETHEUS_DEFAULT_SCRAPE_INTERVAL = "15s" + + +REGISTERED_METRICS_JOBS = [] + +def register_metrics_job(metrics_job): + REGISTERED_METRICS_JOBS.append(metrics_job) + +def launch_prometheus( + plan, + metrics_jobs, + global_node_selectors, + prometheus_params, +): + if REGISTERED_METRICS_JOBS.length == 0: + return None + + prometheus_url = prometheus.run( + plan, + REGISTERED_METRICS_JOBS, + "prometheus", + min_cpu=prometheus_params.min_cpu, + max_cpu=prometheus_params.max_cpu, + min_memory=prometheus_params.min_mem, + max_memory=prometheus_params.max_mem, + node_selectors=global_node_selectors, + storage_tsdb_retention_time=prometheus_params.storage_tsdb_retention_time, + storage_tsdb_retention_size=prometheus_params.storage_tsdb_retention_size, + image=prometheus_params.image, + ) + + return prometheus_url + +def new_metrics_job( + job_name, + endpoint, + metrics_path, + labels, + scrape_interval=PROMETHEUS_DEFAULT_SCRAPE_INTERVAL, +): + return { + "Name": job_name, + "Endpoint": endpoint, + "MetricsPath": metrics_path, + "Labels": labels, + "ScrapeInterval": scrape_interval, + } + +def register_node_metrics_job(node_metrics_info): + labels = { + "service": el_context.service_name, + "client_type": EXECUTION_CLIENT_TYPE, + "client_name": el_context.client_name, + } + + scrape_interval = PROMETHEUS_DEFAULT_SCRAPE_INTERVAL + + additional_config = node_metrics_info[ + METRICS_INFO_ADDITIONAL_CONFIG_KEY + ] + + if additional_config != None: + if additional_config.labels != None: + labels.update(additional_config.labels) + if ( + additional_config.scrape_interval != None + and additional_config.scrape_interval != "" + ): + scrape_interval = additional_config.scrape_interval + + register_metrics_job( + new_metrics_job( + job_name=node_metrics_info[METRICS_INFO_NAME_KEY], + endpoint=node_metrics_info[METRICS_INFO_URL_KEY], + metrics_path=node_metrics_info[METRICS_INFO_PATH_KEY], + labels=labels, + scrape_interval=scrape_interval, + ) + ) diff --git a/src/prometheus/prometheus_launcher.star b/src/prometheus/prometheus_launcher.star deleted file mode 100644 index 34264ddc..00000000 --- a/src/prometheus/prometheus_launcher.star +++ /dev/null @@ -1,186 +0,0 @@ -prometheus = import_module("github.com/kurtosis-tech/prometheus-package/main.star") - -EXECUTION_CLIENT_TYPE = "execution" -BEACON_CLIENT_TYPE = "beacon" -VC_TYPE = "validator" -REMOTE_SIGNER_TYPE = "remote-signer" - -METRICS_INFO_NAME_KEY = "name" -METRICS_INFO_URL_KEY = "url" -METRICS_INFO_PATH_KEY = "path" -METRICS_INFO_ADDITIONAL_CONFIG_KEY = "config" - -PROMETHEUS_DEFAULT_SCRAPE_INTERVAL = "15s" - - -def launch_prometheus( - plan, - metrics_jobs, - global_node_selectors, - prometheus_params, -): - # metrics_jobs = get_metrics_jobs(metrics_jobs) - - if REGISTERED_METRICS_JOBS.length == 0: - return None - - prometheus_url = prometheus.run( - plan, - REGISTERED_METRICS_JOBS, - "prometheus", - min_cpu=prometheus_params.min_cpu, - max_cpu=prometheus_params.max_cpu, - min_memory=prometheus_params.min_mem, - max_memory=prometheus_params.max_mem, - node_selectors=global_node_selectors, - storage_tsdb_retention_time=prometheus_params.storage_tsdb_retention_time, - storage_tsdb_retention_size=prometheus_params.storage_tsdb_retention_size, - image=prometheus_params.image, - ) - - return prometheus_url - - -def new_metrics_job( - job_name, - endpoint, - metrics_path, - labels, - scrape_interval=PROMETHEUS_DEFAULT_SCRAPE_INTERVAL, -): - return { - "Name": job_name, - "Endpoint": endpoint, - "MetricsPath": metrics_path, - "Labels": labels, - "ScrapeInterval": scrape_interval, - } - -def register_node_metrics_job(node_metrics_info): - labels = { - "service": el_context.service_name, - "client_type": EXECUTION_CLIENT_TYPE, - "client_name": el_context.client_name, - } - - scrape_interval = PROMETHEUS_DEFAULT_SCRAPE_INTERVAL - - additional_config = node_metrics_info[ - METRICS_INFO_ADDITIONAL_CONFIG_KEY - ] - - if additional_config != None: - if additional_config.labels != None: - labels.update(additional_config.labels) - if ( - additional_config.scrape_interval != None - and additional_config.scrape_interval != "" - ): - scrape_interval = additional_config.scrape_interval - - register_metrics_job( - new_metrics_job( - job_name=node_metrics_info[METRICS_INFO_NAME_KEY], - endpoint=node_metrics_info[METRICS_INFO_URL_KEY], - metrics_path=node_metrics_info[METRICS_INFO_PATH_KEY], - labels=labels, - scrape_interval=scrape_interval, - ) - ) - -REGISTERED_METRICS_JOBS = [] - -def register_metrics_job(metrics_job): - REGISTERED_METRICS_JOBS.append(metrics_job) - - - metrics_jobs = [] - - # Adding validator clients metrics jobs - for context in vc_contexts: - if context == None: - continue - metrics_info = context.metrics_info - - scrape_interval = PROMETHEUS_DEFAULT_SCRAPE_INTERVAL - labels = { - "service": context.service_name, - "client_type": VC_TYPE, - "client_name": context.client_name, - } - - metrics_jobs.append( - new_metrics_job( - job_name=metrics_info[METRICS_INFO_NAME_KEY], - endpoint=metrics_info[METRICS_INFO_URL_KEY], - metrics_path=metrics_info[METRICS_INFO_PATH_KEY], - labels=labels, - scrape_interval=scrape_interval, - ) - ) - - # Adding validator clients metrics jobs - for context in remote_signer_contexts: - if context == None: - continue - metrics_info = context.metrics_info - - scrape_interval = PROMETHEUS_DEFAULT_SCRAPE_INTERVAL - labels = { - "service": context.service_name, - "client_type": REMOTE_SIGNER_TYPE, - "client_name": context.client_name, - } - - metrics_jobs.append( - new_metrics_job( - job_name=metrics_info[METRICS_INFO_NAME_KEY], - endpoint=metrics_info[METRICS_INFO_URL_KEY], - metrics_path=metrics_info[METRICS_INFO_PATH_KEY], - labels=labels, - scrape_interval=scrape_interval, - ) - ) - - # Adding ethereum-metrics-exporter metrics jobs - for context in ethereum_metrics_exporter_contexts: - if context != None: - metrics_jobs.append( - new_metrics_job( - job_name="ethereum-metrics-exporter-{0}".format(context.pair_name), - endpoint="{}:{}".format( - context.ip_addr, - context.metrics_port_num, - ), - metrics_path="/metrics", - labels={ - "instance": context.pair_name, - "consensus_client": context.cl_name, - "execution_client": context.el_name, - }, - ) - ) - # Adding Xatu Sentry metrics jobs - for context in xatu_sentry_contexts: - if context != None: - metrics_jobs.append( - new_metrics_job( - job_name="xatu-sentry-{0}".format(context.pair_name), - endpoint="{}:{}".format( - context.ip_addr, - context.metrics_port_num, - ), - metrics_path="/metrics", - labels={ - "pair": context.pair_name, - }, - ) - ) - - # Adding additional metrics jobs - for job in metrics_jobs: - if job == None: - continue - metrics_jobs.append(job) - - return metrics_jobs From 317610d9e7abdac8523fbcee59ea94537c1e7c2a Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 16:10:24 -0500 Subject: [PATCH 06/29] expose metrics on op-node --- src/cl/op-node/op_node_launcher.star | 99 ++++++++++++++++++---------- 1 file changed, 64 insertions(+), 35 deletions(-) diff --git a/src/cl/op-node/op_node_launcher.star b/src/cl/op-node/op_node_launcher.star index c09967ef..71e90d62 100644 --- a/src/cl/op-node/op_node_launcher.star +++ b/src/cl/op-node/op_node_launcher.star @@ -17,6 +17,7 @@ ethereum_package_input_parser = import_module( constants = import_module("../../package_io/constants.star") util = import_module("../../util.star") +observability = import_module("../../observability/constants.star") interop_constants = import_module("../../interop/constants.star") # ---------------------------------- Beacon client ------------------------------------- @@ -32,7 +33,6 @@ BEACON_HTTP_PORT_ID = "http" BEACON_DISCOVERY_PORT_NUM = 9003 BEACON_HTTP_PORT_NUM = 8547 - def get_used_ports(discovery_port): used_ports = { BEACON_TCP_DISCOVERY_PORT_ID: ethereum_package_shared_utils.new_port_spec( @@ -74,6 +74,7 @@ def launch( existing_cl_clients, l1_config_env_vars, sequencer_enabled, + observability_params, interop_params, ): beacon_node_identity_recipe = PostHttpRequestRecipe( @@ -106,6 +107,7 @@ def launch( l1_config_env_vars, beacon_node_identity_recipe, sequencer_enabled, + observability_params, interop_params, ) @@ -116,6 +118,13 @@ def launch( beacon_service.ip_address, beacon_http_port.number ) + metrics_info = None + if observability_params.enabled: + metrics_url = "{0}:{1}".format(service.ip_address, observability.METRICS_PORT_NUM) + metrics_info = ethereum_package_node_metrics.new_node_metrics_info( + service_name, observability.METRICS_PATH, metrics_url + ) + response = plan.request( recipe=beacon_node_identity_recipe, service_name=service_name ) @@ -130,7 +139,7 @@ def launch( ip_addr=beacon_service.ip_address, http_port=beacon_http_port.number, beacon_http_url=beacon_http_url, - cl_nodes_metrics_info=None, + cl_nodes_metrics_info=[metrics_info], beacon_service_name=service_name, multiaddr=beacon_multiaddr, peer_id=beacon_peer_id, @@ -151,23 +160,25 @@ def get_beacon_config( l1_config_env_vars, beacon_node_identity_recipe, sequencer_enabled, + observability_params, interop_params, ): + ports = dict(get_used_ports(BEACON_DISCOVERY_PORT_NUM)) + EXECUTION_ENGINE_ENDPOINT = "http://{0}:{1}".format( el_context.ip_addr, el_context.engine_rpc_port_num, ) - used_ports = get_used_ports(BEACON_DISCOVERY_PORT_NUM) - cmd = [ "op-node", "--l2={0}".format(EXECUTION_ENGINE_ENDPOINT), "--l2.jwt-secret=" + ethereum_package_constants.JWT_MOUNT_PATH_ON_CONTAINER, "--verifier.l1-confs=4", - "--rollup.config=" - + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS - + "/rollup-{0}.json".format(launcher.network_params.network_id), + "--rollup.config=" + "{0}/rollup-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + launcher.network_params.network_id + ), "--rpc.addr=0.0.0.0", "--rpc.port={0}".format(BEACON_HTTP_PORT_NUM), "--rpc.enable-admin", @@ -185,33 +196,8 @@ def get_beacon_config( "--safedb.path={0}".format(BEACON_DATA_DIRPATH_ON_SERVICE_CONTAINER), ] - sequencer_private_key = util.read_network_config_value( - plan, - launcher.deployment_output, - "sequencer-{0}".format(launcher.network_params.network_id), - ".privateKey", - ) - - if sequencer_enabled: - cmd.append("--p2p.sequencer.key=" + sequencer_private_key) - cmd.append("--sequencer.enabled") - cmd.append("--sequencer.l1-confs=5") - - if len(existing_cl_clients) > 0: - cmd.append( - "--p2p.bootnodes=" - + ",".join( - [ - ctx.enr - for ctx in existing_cl_clients[ - : ethereum_package_constants.MAX_ENR_ENTRIES - ] - ] - ) - ) - - cmd += participant.cl_extra_params - + # configure files + files = { ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, @@ -227,10 +213,23 @@ def get_beacon_config( ], ) - ports = dict(used_ports) + # configure environment variables env_vars = dict(participant.cl_extra_env_vars) + # apply customizations + + if observability_params.enabled: + cmd += [ + "--metrics.enabled=true", + "--metrics.addr=0.0.0.0", + "--metrics.port={0}".format(observability.METRICS_PORT_NUM), + ] + + ports[observability.METRICS_PORT_ID] = ethereum_package_shared_utils.new_port_spec( + observability.METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL + ) + if interop_params.enabled: ports[ interop_constants.INTEROP_WS_PORT_ID @@ -248,6 +247,33 @@ def get_beacon_config( } ) + sequencer_private_key = util.read_network_config_value( + plan, + launcher.deployment_output, + "sequencer-{0}".format(launcher.network_params.network_id), + ".privateKey", + ) + + if sequencer_enabled: + cmd.append("--p2p.sequencer.key=" + sequencer_private_key) + cmd.append("--sequencer.enabled") + cmd.append("--sequencer.l1-confs=5") + + if len(existing_cl_clients) > 0: + cmd.append( + "--p2p.bootnodes=" + + ",".join( + [ + ctx.enr + for ctx in existing_cl_clients[ + : ethereum_package_constants.MAX_ENR_ENTRIES + ] + ] + ) + ) + + cmd += participant.cl_extra_params + config_args = { "image": participant.cl_image, "ports": ports, @@ -273,6 +299,8 @@ def get_beacon_config( "node_selectors": node_selectors, } + # configure resources + if participant.cl_min_cpu > 0: config_args["min_cpu"] = participant.cl_min_cpu if participant.cl_max_cpu > 0: @@ -281,6 +309,7 @@ def get_beacon_config( config_args["min_memory"] = participant.cl_min_mem if participant.cl_max_mem > 0: config_args["max_memory"] = participant.cl_max_mem + return ServiceConfig(**config_args) From 14bca3e6928549a9fa3eeb09d38c4674a411d1b5 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 16:19:16 -0500 Subject: [PATCH 07/29] conditionally expose metrics on op-reth --- src/el/op-reth/op_reth_launcher.star | 75 ++++++++++++------- .../prometheus/prometheus_launcher.star | 1 + 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/src/el/op-reth/op_reth_launcher.star b/src/el/op-reth/op_reth_launcher.star index 70211929..d8721eff 100644 --- a/src/el/op-reth/op_reth_launcher.star +++ b/src/el/op-reth/op_reth_launcher.star @@ -21,12 +21,12 @@ ethereum_package_input_parser = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/constants.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 DISCOVERY_PORT_NUM = 30303 ENGINE_RPC_PORT_NUM = 9551 -METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 100 @@ -38,7 +38,6 @@ WS_PORT_ID = "ws" TCP_DISCOVERY_PORT_ID = "tcp-discovery" UDP_DISCOVERY_PORT_ID = "udp-discovery" ENGINE_RPC_PORT_ID = "engine-rpc" -METRICS_PORT_ID = "metrics" # Paths METRICS_PATH = "/metrics" @@ -66,9 +65,6 @@ def get_used_ports(discovery_port=DISCOVERY_PORT_NUM): ENGINE_RPC_PORT_ID: ethereum_package_shared_utils.new_port_spec( ENGINE_RPC_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL ), - METRICS_PORT_ID: ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ), } return used_ports @@ -94,6 +90,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, + observability_params, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -115,6 +112,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, + observability_params, ) service = plan.add_service(service_name, config) @@ -123,12 +121,14 @@ def launch( plan, service_name, RPC_PORT_ID ) - metric_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - op_reth_metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metric_url - ) - http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) + + metrics_info = None + if observability_params.enabled: + metrics_url = "{0}:{1}".format(service.ip_address, observability.METRICS_PORT_NUM) + metrics_info = ethereum_package_node_metrics.new_node_metrics_info( + service_name, METRICS_PATH, metrics_url + ) return ethereum_package_el_context.new_el_context( client_name="reth", @@ -139,7 +139,7 @@ def launch( engine_rpc_port_num=ENGINE_RPC_PORT_NUM, rpc_http_url=http_url, service_name=service_name, - el_metrics_info=[op_reth_metrics_info], + el_metrics_info=[metrics_info], ) @@ -156,10 +156,10 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, + observability_params, ): - public_ports = {} discovery_port = DISCOVERY_PORT_NUM - used_ports = get_used_ports(discovery_port) + ports = dict(get_used_ports(discovery_port)) cmd = [ "node", @@ -186,12 +186,40 @@ def get_config( "--authrpc.port={0}".format(ENGINE_RPC_PORT_NUM), "--authrpc.jwtsecret=" + ethereum_package_constants.JWT_MOUNT_PATH_ON_CONTAINER, "--authrpc.addr=0.0.0.0", - "--metrics=0.0.0.0:{0}".format(METRICS_PORT_NUM), "--discovery.port={0}".format(discovery_port), "--port={0}".format(discovery_port), "--rpc.eth-proof-window=302400", ] + # configure files + + files = { + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, + ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, + } + if persistent: + files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( + persistent_key="data-{0}".format(service_name), + size=int(participant.el_volume_size) + if int(participant.el_volume_size) > 0 + else constants.VOLUME_SIZE[launcher.network][ + constants.EL_TYPE.op_reth + "_volume_size" + ], + ) + + # configure environment variables + + env_vars = participant.el_extra_env_vars + + # apply customizations + + if observability_params.enabled: + cmd.append("--metrics=0.0.0.0:{0}".format(observability.METRICS_PORT_NUM)) + + ports[observability.METRICS_PORT_ID] = ethereum_package_shared_utils.new_port_spec( + observability.METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL + ) + if not sequencer_enabled: cmd.append("--rollup.sequencer-http={0}".format(sequencer_context.rpc_http_url)) @@ -208,22 +236,8 @@ def get_config( ) ) - files = { - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, - ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, - } - if persistent: - files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( - persistent_key="data-{0}".format(service_name), - size=int(participant.el_volume_size) - if int(participant.el_volume_size) > 0 - else constants.VOLUME_SIZE[launcher.network][ - constants.EL_TYPE.op_reth + "_volume_size" - ], - ) - cmd += participant.el_extra_params - env_vars = participant.el_extra_env_vars + config_args = { "image": participant.el_image, "ports": used_ports, @@ -242,6 +256,8 @@ def get_config( "node_selectors": node_selectors, } + # configure resources + if participant.el_min_cpu > 0: config_args["min_cpu"] = participant.el_min_cpu if participant.el_max_cpu > 0: @@ -250,6 +266,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem + return ServiceConfig(**config_args) diff --git a/src/observability/prometheus/prometheus_launcher.star b/src/observability/prometheus/prometheus_launcher.star index fdc2b0f6..7a628334 100644 --- a/src/observability/prometheus/prometheus_launcher.star +++ b/src/observability/prometheus/prometheus_launcher.star @@ -71,6 +71,7 @@ def register_node_metrics_job(node_metrics_info): if additional_config != None: if additional_config.labels != None: labels.update(additional_config.labels) + if ( additional_config.scrape_interval != None and additional_config.scrape_interval != "" From 00bccb60b71e2793ea39b661adcdf201de9e3bce Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 16:26:09 -0500 Subject: [PATCH 08/29] remove interop_params from el/cl launchers --- src/cl/op-node/op_node_launcher.star | 3 +-- src/el/op-besu/op_besu_launcher.star | 2 -- src/el/op-erigon/op_erigon_launcher.star | 2 -- src/el/op-geth/op_geth_launcher.star | 3 +-- src/el/op-nethermind/op_nethermind_launcher.star | 2 -- src/el/op-reth/op_reth_launcher.star | 2 -- src/el_cl_launcher.star | 7 +------ 7 files changed, 3 insertions(+), 18 deletions(-) diff --git a/src/cl/op-node/op_node_launcher.star b/src/cl/op-node/op_node_launcher.star index 71e90d62..170d7ab7 100644 --- a/src/cl/op-node/op_node_launcher.star +++ b/src/cl/op-node/op_node_launcher.star @@ -313,10 +313,9 @@ def get_beacon_config( return ServiceConfig(**config_args) -def new_op_node_launcher(deployment_output, jwt_file, network_params, interop_params): +def new_op_node_launcher(deployment_output, jwt_file, network_params): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network_params=network_params, - interop_params=interop_params, ) diff --git a/src/el/op-besu/op_besu_launcher.star b/src/el/op-besu/op_besu_launcher.star index d47dc3d0..bb6d56e9 100644 --- a/src/el/op-besu/op_besu_launcher.star +++ b/src/el/op-besu/op_besu_launcher.star @@ -276,12 +276,10 @@ def new_op_besu_launcher( jwt_file, network, network_id, - interop_params, ): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network=network, network_id=network_id, - interop_params=interop_params, ) diff --git a/src/el/op-erigon/op_erigon_launcher.star b/src/el/op-erigon/op_erigon_launcher.star index 6d67972d..5e9d9310 100644 --- a/src/el/op-erigon/op_erigon_launcher.star +++ b/src/el/op-erigon/op_erigon_launcher.star @@ -271,12 +271,10 @@ def new_op_erigon_launcher( jwt_file, network, network_id, - interop_params, ): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network=network, network_id=network_id, - interop_params=interop_params, ) diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index f14d8e8a..9a0b2d3f 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -308,12 +308,11 @@ def get_config( def new_op_geth_launcher( - deployment_output, jwt_file, network, network_id, interop_params + deployment_output, jwt_file, network, network_id ): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network=network, network_id=network_id, - interop_params=interop_params, ) diff --git a/src/el/op-nethermind/op_nethermind_launcher.star b/src/el/op-nethermind/op_nethermind_launcher.star index 414fd12f..d6da0304 100644 --- a/src/el/op-nethermind/op_nethermind_launcher.star +++ b/src/el/op-nethermind/op_nethermind_launcher.star @@ -261,12 +261,10 @@ def new_nethermind_launcher( jwt_file, network, network_id, - interop_params, ): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network=network, network_id=network_id, - interop_params=interop_params, ) diff --git a/src/el/op-reth/op_reth_launcher.star b/src/el/op-reth/op_reth_launcher.star index d8721eff..5796913c 100644 --- a/src/el/op-reth/op_reth_launcher.star +++ b/src/el/op-reth/op_reth_launcher.star @@ -275,12 +275,10 @@ def new_op_reth_launcher( jwt_file, network, network_id, - interop_params, ): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network=network, network_id=network_id, - interop_params=interop_params, ) diff --git a/src/el_cl_launcher.star b/src/el_cl_launcher.star index c6dfa5d6..1af402bc 100644 --- a/src/el_cl_launcher.star +++ b/src/el_cl_launcher.star @@ -49,7 +49,6 @@ def launch( jwt_file, network_params.network, network_params.network_id, - interop_params, ), "launch_method": op_geth.launch, }, @@ -59,7 +58,6 @@ def launch( jwt_file, network_params.network, network_params.network_id, - interop_params, ), "launch_method": op_reth.launch, }, @@ -69,7 +67,6 @@ def launch( jwt_file, network_params.network, network_params.network_id, - interop_params, ), "launch_method": op_erigon.launch, }, @@ -79,7 +76,6 @@ def launch( jwt_file, network_params.network, network_params.network_id, - interop_params, ), "launch_method": op_nethermind.launch, }, @@ -89,7 +85,6 @@ def launch( jwt_file, network_params.network, network_params.network_id, - interop_params, ), "launch_method": op_besu.launch, }, @@ -98,7 +93,7 @@ def launch( cl_launchers = { "op-node": { "launcher": op_node.new_op_node_launcher( - deployment_output, jwt_file, network_params, interop_params + deployment_output, jwt_file, network_params ), "launch_method": op_node.launch, }, From 6859065e5d6ab5e72d63335942283929c9fedae3 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 16:44:46 -0500 Subject: [PATCH 09/29] conditionally expose metrics in op-nethermind --- src/cl/op-node/op_node_launcher.star | 7 +- src/el/op-geth/op_geth_launcher.star | 9 +- .../op-nethermind/op_nethermind_launcher.star | 86 +++++++++++-------- src/observability/constants.star | 8 ++ 4 files changed, 62 insertions(+), 48 deletions(-) diff --git a/src/cl/op-node/op_node_launcher.star b/src/cl/op-node/op_node_launcher.star index 170d7ab7..00d57ca6 100644 --- a/src/cl/op-node/op_node_launcher.star +++ b/src/cl/op-node/op_node_launcher.star @@ -118,12 +118,7 @@ def launch( beacon_service.ip_address, beacon_http_port.number ) - metrics_info = None - if observability_params.enabled: - metrics_url = "{0}:{1}".format(service.ip_address, observability.METRICS_PORT_NUM) - metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, observability.METRICS_PATH, metrics_url - ) + metrics_info = observability.new_metrics_info(service) if observability_params.enabled else None response = plan.request( recipe=beacon_node_identity_recipe, service_name=service_name diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index 9a0b2d3f..7ddc39db 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -133,13 +133,8 @@ def launch( http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) - metrics_info = None - if observability_params.enabled: - metrics_url = "{0}:{1}".format(service.ip_address, observability.METRICS_PORT_NUM) - metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, observability.METRICS_PATH, metrics_url - ) - + metrics_info = observability.new_metrics_info(service) if observability_params.enabled else None + return ethereum_package_el_context.new_el_context( client_name="op-geth", enode=enode, diff --git a/src/el/op-nethermind/op_nethermind_launcher.star b/src/el/op-nethermind/op_nethermind_launcher.star index d6da0304..f515d07e 100644 --- a/src/el/op-nethermind/op_nethermind_launcher.star +++ b/src/el/op-nethermind/op_nethermind_launcher.star @@ -22,12 +22,12 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/constants.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 DISCOVERY_PORT_NUM = 30303 ENGINE_RPC_PORT_NUM = 8551 -METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 300 @@ -40,13 +40,10 @@ TCP_DISCOVERY_PORT_ID = "tcp-discovery" UDP_DISCOVERY_PORT_ID = "udp-discovery" ENGINE_RPC_PORT_ID = "engine-rpc" ENGINE_WS_PORT_ID = "engineWs" -METRICS_PORT_ID = "metrics" # TODO(old) Scale this dynamically based on CPUs available and Nethermind nodes mining NUM_MINING_THREADS = 1 -METRICS_PATH = "/debug/metrics/prometheus" - # The dirpath of the execution data directory on the client container EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER = "/data/nethermind/execution-data" @@ -71,9 +68,6 @@ def get_used_ports(discovery_port=DISCOVERY_PORT_NUM): ENGINE_RPC_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL, ), - METRICS_PORT_ID: ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ), } return used_ports @@ -99,6 +93,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, + observability_params, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -120,6 +115,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, + observability_params, ) service = plan.add_service(service_name, config) @@ -128,14 +124,11 @@ def launch( plan, service_name, RPC_PORT_ID ) - metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - nethermind_metrics_info = ethereum_package_el_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metrics_url - ) - http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) ws_url = "ws://{0}:{1}".format(service.ip_address, WS_PORT_NUM) + metrics_info = observability.new_metrics_info(service) if observability_params.enabled else None + return ethereum_package_el_context.new_el_context( client_name="op-nethermind", enode=enode, @@ -146,7 +139,7 @@ def launch( rpc_http_url=http_url, ws_url=ws_url, service_name=service_name, - el_metrics_info=[nethermind_metrics_info], + el_metrics_info=[metrics_info], ) @@ -163,9 +156,11 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, + observability_params, ): discovery_port = DISCOVERY_PORT_NUM - used_ports = get_used_ports(discovery_port) + ports = dict(get_used_ports(discovery_port)) + cmd = [ "--log=debug", "--datadir=" + EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, @@ -183,10 +178,40 @@ def get_config( "--Network.P2PPort={0}".format(discovery_port), "--JsonRpc.JwtSecretFile=" + ethereum_package_constants.JWT_MOUNT_PATH_ON_CONTAINER, - "--Metrics.Enabled=true", - "--Metrics.ExposePort={0}".format(METRICS_PORT_NUM), - "--Metrics.ExposeHost=0.0.0.0", ] + + # configure files + + files = { + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, + ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, + } + if persistent: + files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( + persistent_key="data-{0}".format(service_name), + size=int(participant.el_volume_size) + if int(participant.el_volume_size) > 0 + else constants.VOLUME_SIZE[launcher.network][ + constants.EL_TYPE.op_nethermind + "_volume_size" + ], + ) + # configure environment variables + + env_vars = dict(participant.el_extra_env_vars) + + # apply customizations + + if observability_params.enabled: + cmd += [ + "--Metrics.Enabled=true", + "--Metrics.ExposeHost=0.0.0.0", + "--Metrics.ExposePort={0}".format(observability.METRICS_PORT_NUM), + ] + + ports[observability.METRICS_PORT_ID] = ethereum_package_shared_utils.new_port_spec( + observability.METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL + ) + if not sequencer_enabled: cmd.append("--Optimism.SequencerUrl={0}".format(sequencer_context.rpc_http_url)) @@ -207,29 +232,17 @@ def get_config( cmd.append("--config=none.cfg") cmd.append( "--Init.ChainSpecPath=" - + ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER - + "/chainspec-{0}.json".format(launcher.network_id) + + "{0}/chainspec-{1}.json".format( + ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER, + launcher.network_id + ), ) - files = { - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, - ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, - } - if persistent: - files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( - persistent_key="data-{0}".format(service_name), - size=int(participant.el_volume_size) - if int(participant.el_volume_size) > 0 - else constants.VOLUME_SIZE[launcher.network][ - constants.EL_TYPE.op_nethermind + "_volume_size" - ], - ) - cmd += participant.el_extra_params - env_vars = participant.el_extra_env_vars + config_args = { "image": participant.el_image, - "ports": used_ports, + "ports": ports, "cmd": cmd, "files": files, "private_ip_address_placeholder": ethereum_package_constants.PRIVATE_IP_ADDRESS_PLACEHOLDER, @@ -245,6 +258,8 @@ def get_config( "node_selectors": node_selectors, } + # configure resources + if participant.el_min_cpu > 0: config_args["min_cpu"] = participant.el_min_cpu if participant.el_max_cpu > 0: @@ -253,6 +268,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem + return ServiceConfig(**config_args) diff --git a/src/observability/constants.star b/src/observability/constants.star index a9c2b9b0..e2c45ef4 100644 --- a/src/observability/constants.star +++ b/src/observability/constants.star @@ -1,3 +1,11 @@ METRICS_PORT_ID = "metrics" METRICS_PORT_NUM = 9001 METRICS_PATH = "/debug/metrics/prometheus" + +def new_metrics_info(service): + metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) + metrics_info = ethereum_package_node_metrics.new_node_metrics_info( + service.name, METRICS_PATH, metrics_url + ) + + return metrics_info From 20c6a75dc42736123832e2d1d5faf106554bf70c Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 16:57:25 -0500 Subject: [PATCH 10/29] extract observability.expose_metrics_port --- src/cl/op-node/op_node_launcher.star | 4 +--- src/el/op-geth/op_geth_launcher.star | 6 ++---- src/el/op-nethermind/op_nethermind_launcher.star | 4 +--- src/el/op-reth/op_reth_launcher.star | 4 +--- src/observability/constants.star | 5 +++++ 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/cl/op-node/op_node_launcher.star b/src/cl/op-node/op_node_launcher.star index 00d57ca6..83cbdac1 100644 --- a/src/cl/op-node/op_node_launcher.star +++ b/src/cl/op-node/op_node_launcher.star @@ -221,9 +221,7 @@ def get_beacon_config( "--metrics.port={0}".format(observability.METRICS_PORT_NUM), ] - ports[observability.METRICS_PORT_ID] = ethereum_package_shared_utils.new_port_spec( - observability.METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ) + observability.expose_metrics_port(ports) if interop_params.enabled: ports[ diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index 7ddc39db..eac2cee8 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -239,10 +239,8 @@ def get_config( "--metrics.addr=0.0.0.0", "--metrics.port={0}".format(observability.METRICS_PORT_NUM), ] - - ports[observability.METRICS_PORT_ID] = ethereum_package_shared_utils.new_port_spec( - observability.METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ) + + observability.expose_metrics_port(ports) if interop_params.enabled: env_vars["GETH_ROLLUP_INTEROPRPC"] = interop_constants.SUPERVISOR_ENDPOINT diff --git a/src/el/op-nethermind/op_nethermind_launcher.star b/src/el/op-nethermind/op_nethermind_launcher.star index f515d07e..73e27bd9 100644 --- a/src/el/op-nethermind/op_nethermind_launcher.star +++ b/src/el/op-nethermind/op_nethermind_launcher.star @@ -208,9 +208,7 @@ def get_config( "--Metrics.ExposePort={0}".format(observability.METRICS_PORT_NUM), ] - ports[observability.METRICS_PORT_ID] = ethereum_package_shared_utils.new_port_spec( - observability.METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ) + observability.expose_metrics_port(ports) if not sequencer_enabled: cmd.append("--Optimism.SequencerUrl={0}".format(sequencer_context.rpc_http_url)) diff --git a/src/el/op-reth/op_reth_launcher.star b/src/el/op-reth/op_reth_launcher.star index 5796913c..95af1320 100644 --- a/src/el/op-reth/op_reth_launcher.star +++ b/src/el/op-reth/op_reth_launcher.star @@ -216,9 +216,7 @@ def get_config( if observability_params.enabled: cmd.append("--metrics=0.0.0.0:{0}".format(observability.METRICS_PORT_NUM)) - ports[observability.METRICS_PORT_ID] = ethereum_package_shared_utils.new_port_spec( - observability.METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ) + observability.expose_metrics_port(ports) if not sequencer_enabled: cmd.append("--rollup.sequencer-http={0}".format(sequencer_context.rpc_http_url)) diff --git a/src/observability/constants.star b/src/observability/constants.star index e2c45ef4..d4a3e5a8 100644 --- a/src/observability/constants.star +++ b/src/observability/constants.star @@ -9,3 +9,8 @@ def new_metrics_info(service): ) return metrics_info + +def expose_metrics_port(ports, port_id=METRICS_PORT_ID, port_num=METRICS_PORT_NUM): + ports[port_id] = ethereum_package_shared_utils.new_port_spec( + port_num, ethereum_package_shared_utils.TCP_PROTOCOL + ) From 8bf05a54093e9a24565e7da312010e9605dbed13 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 16:57:48 -0500 Subject: [PATCH 11/29] support custom metrics_path in observability.new_metrics_info --- src/el/op-reth/op_reth_launcher.star | 7 +------ src/observability/constants.star | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/el/op-reth/op_reth_launcher.star b/src/el/op-reth/op_reth_launcher.star index 95af1320..bd842078 100644 --- a/src/el/op-reth/op_reth_launcher.star +++ b/src/el/op-reth/op_reth_launcher.star @@ -123,12 +123,7 @@ def launch( http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) - metrics_info = None - if observability_params.enabled: - metrics_url = "{0}:{1}".format(service.ip_address, observability.METRICS_PORT_NUM) - metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metrics_url - ) + metrics_info = observability.new_metrics_info(service, METRICS_PATH) if observability_params.enabled else None return ethereum_package_el_context.new_el_context( client_name="reth", diff --git a/src/observability/constants.star b/src/observability/constants.star index d4a3e5a8..59ee8654 100644 --- a/src/observability/constants.star +++ b/src/observability/constants.star @@ -2,10 +2,10 @@ METRICS_PORT_ID = "metrics" METRICS_PORT_NUM = 9001 METRICS_PATH = "/debug/metrics/prometheus" -def new_metrics_info(service): +def new_metrics_info(service, metrics_path=METRICS_PATH): metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service.name, METRICS_PATH, metrics_url + service.name, metrics_path, metrics_url ) return metrics_info From 9feb5a6441ebbf685cc8e88a3bb5db01d2df3707 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 16:58:07 -0500 Subject: [PATCH 12/29] conditionally expose metrics in op-besu --- src/el/op-besu/op_besu_launcher.star | 81 ++++++++++++++++------------ 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/src/el/op-besu/op_besu_launcher.star b/src/el/op-besu/op_besu_launcher.star index bb6d56e9..831835a3 100644 --- a/src/el/op-besu/op_besu_launcher.star +++ b/src/el/op-besu/op_besu_launcher.star @@ -23,12 +23,12 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/constants.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 DISCOVERY_PORT_NUM = 30303 ENGINE_RPC_PORT_NUM = 8551 -METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 300 @@ -41,13 +41,10 @@ TCP_DISCOVERY_PORT_ID = "tcp-discovery" UDP_DISCOVERY_PORT_ID = "udp-discovery" ENGINE_RPC_PORT_ID = "engine-rpc" ENGINE_WS_PORT_ID = "engineWs" -METRICS_PORT_ID = "metrics" # TODO(old) Scale this dynamically based on CPUs available and Geth nodes mining NUM_MINING_THREADS = 1 -METRICS_PATH = "/debug/metrics/prometheus" - # The dirpath of the execution data directory on the client container EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER = "/data/besu/execution-data" @@ -72,9 +69,6 @@ def get_used_ports(discovery_port=DISCOVERY_PORT_NUM): ENGINE_RPC_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL, ), - METRICS_PORT_ID: ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ), } return used_ports @@ -105,6 +99,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, + observability_params, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -126,6 +121,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, + observability_params, ) service = plan.add_service(service_name, config) @@ -134,13 +130,10 @@ def launch( plan, service_name, RPC_PORT_ID ) - metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - besu_metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metrics_url - ) - http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) + metrics_info = observability.new_metrics_info(service) if observability_params.enabled else None + return ethereum_package_el_context.new_el_context( client_name="op-besu", enode=enode, @@ -150,7 +143,7 @@ def launch( engine_rpc_port_num=ENGINE_RPC_PORT_NUM, rpc_http_url=http_url, service_name=service_name, - el_metrics_info=[besu_metrics_info], + el_metrics_info=[metrics_info], ) @@ -167,15 +160,18 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, + observability_params, ): discovery_port = DISCOVERY_PORT_NUM - used_ports = get_used_ports(discovery_port) + ports = dict(get_used_ports(discovery_port)) cmd = [ "besu", "--genesis-file=" - + ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER - + "/genesis-{0}.json".format(launcher.network_id), + + "{0}/genesis-{1}.json".format( + ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER, + launcher.network_id + ), "--network-id={0}".format(launcher.network_id), # "--logging=" + log_level, "--data-path=" + EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, @@ -198,13 +194,41 @@ def get_config( "--engine-host-allowlist=*", "--engine-rpc-port={0}".format(ENGINE_RPC_PORT_NUM), "--sync-mode=FULL", - "--metrics-enabled=true", - "--metrics-host=0.0.0.0", - "--metrics-port={0}".format(METRICS_PORT_NUM), "--bonsai-limit-trie-logs-enabled=false", "--version-compatibility-protection=false", ] + # configure files + + files = { + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, + ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, + } + if persistent: + files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( + persistent_key="data-{0}".format(service_name), + size=int(participant.el_volume_size) + if int(participant.el_volume_size) > 0 + else constants.VOLUME_SIZE[launcher.network][ + constants.EL_TYPE.op_besu + "_volume_size" + ], + ) + + # configure environment variables + + env_vars = dict(participant.el_extra_env_vars) + + # apply customizations + + if observability_params.enabled: + cmd += [ + "--metrics-enabled=true", + "--metrics-host=0.0.0.0", + "--metrics-port={0}".format(observability.METRICS_PORT_NUM), + ] + + observability.expose_metrics_port(ports) + # if not sequencer_enabled: # cmd.append( # "--rollup.sequencerhttp={0}".format(sequencer_context.rpc_http_url) @@ -225,21 +249,7 @@ def get_config( cmd += participant.el_extra_params cmd_str = " ".join(cmd) - - files = { - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, - ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, - } - if persistent: - files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( - persistent_key="data-{0}".format(service_name), - size=int(participant.el_volume_size) - if int(participant.el_volume_size) > 0 - else constants.VOLUME_SIZE[launcher.network][ - constants.EL_TYPE.op_besu + "_volume_size" - ], - ) - env_vars = participant.el_extra_env_vars + config_args = { "image": participant.el_image, "ports": used_ports, @@ -260,6 +270,8 @@ def get_config( "user": User(uid=0, gid=0), } + # configure resources + if participant.el_min_cpu > 0: config_args["min_cpu"] = participant.el_min_cpu if participant.el_max_cpu > 0: @@ -268,6 +280,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem + return ServiceConfig(**config_args) From 1b17b97e05dc69ffa0fe35f1dc86b5ebb47e493e Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 17:13:15 -0500 Subject: [PATCH 13/29] conditionally expose metrics on hildr --- src/cl/hildr/hildr_launcher.star | 82 ++++++++++++++++++---------- src/cl/op-node/op_node_launcher.star | 22 ++++---- 2 files changed, 65 insertions(+), 39 deletions(-) diff --git a/src/cl/hildr/hildr_launcher.star b/src/cl/hildr/hildr_launcher.star index b169c07f..d87fe5ed 100644 --- a/src/cl/hildr/hildr_launcher.star +++ b/src/cl/hildr/hildr_launcher.star @@ -15,6 +15,7 @@ ethereum_package_input_parser = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/constants.star") util = import_module("../../util.star") @@ -31,6 +32,7 @@ BEACON_HTTP_PORT_ID = "http" BEACON_DISCOVERY_PORT_NUM = 9003 BEACON_HTTP_PORT_NUM = 8547 +METRICS_PATH = "/metrics" def get_used_ports(discovery_port): used_ports = { @@ -73,6 +75,7 @@ def launch( existing_cl_clients, l1_config_env_vars, sequencer_enabled, + observability_params, interop_params, ): # beacon_node_identity_recipe = PostHttpRequestRecipe( @@ -104,6 +107,7 @@ def launch( existing_cl_clients, l1_config_env_vars, sequencer_enabled, + observability_params, ) beacon_service = plan.add_service(service_name, config) @@ -113,6 +117,8 @@ def launch( beacon_service.ip_address, beacon_http_port.number ) + metrics_info = observability.new_metrics_info(service, METRICS_PATH) if observability_params.enabled else None + # response = plan.request( # recipe=beacon_node_identity_recipe, service_name=service_name # ) @@ -127,6 +133,7 @@ def launch( ip_addr=beacon_service.ip_address, http_port=beacon_http_port.number, beacon_http_url=beacon_http_url, + cl_nodes_metrics_info=[metrics_info], beacon_service_name=service_name, ) @@ -144,6 +151,7 @@ def get_beacon_config( existing_cl_clients, l1_config_env_vars, sequencer_enabled, + observability_params, ): EXECUTION_ENGINE_ENDPOINT = "http://{0}:{1}".format( el_context.ip_addr, @@ -154,7 +162,7 @@ def get_beacon_config( el_context.rpc_port_num, ) - used_ports = get_used_ports(BEACON_DISCOVERY_PORT_NUM) + ports = dict(get_used_ports(BEACON_DISCOVERY_PORT_NUM)) cmd = [ "--devnet", @@ -168,21 +176,52 @@ def get_beacon_config( "--rpc-port={0}".format(BEACON_HTTP_PORT_NUM), "--sync-mode=full", "--network=" - + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS - + "/rollup-{0}.json".format(launcher.network_params.network_id), + + "{0}/rollup-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + launcher.network_params.network_id + ), ] - sequencer_private_key = util.read_network_config_value( - plan, - launcher.deployment_output, - "sequencer-{0}".format(launcher.network_params.network_id), - ".privateKey", - ) + # configure files + + files = { + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, + ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, + } + if persistent: + files[BEACON_DATA_DIRPATH_ON_SERVICE_CONTAINER] = Directory( + persistent_key="data-{0}".format(service_name), + size=int(participant.cl_volume_size) + if int(participant.cl_volume_size) > 0 + else constants.VOLUME_SIZE[launcher.network][ + constants.CL_TYPE.hildr + "_volume_size" + ], + ) + + # configure environment variables + + env_vars = dict(participant.cl_extra_env_vars) + + # apply customizations + + if observability_params.enabled: + cmd += [ + "--metrics-enable", + "--metrics-port={0}".format(observability.METRICS_PORT_NUM), + ] + + observability.expose_metrics_port(ports) if sequencer_enabled: - cmd.append("--sequencer-enable") + # sequencer private key can't be used by hildr yet + # sequencer_private_key = util.read_network_config_value( + # plan, + # launcher.deployment_output, + # "sequencer-{0}".format(launcher.network_params.network_id), + # ".privateKey", + # ) - # sequencer private key can't be used by hildr yet + cmd.append("--sequencer-enable") if len(existing_cl_clients) == 1: cmd.append( @@ -199,24 +238,6 @@ def get_beacon_config( cmd += participant.cl_extra_params - files = { - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, - ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, - } - if persistent: - files[BEACON_DATA_DIRPATH_ON_SERVICE_CONTAINER] = Directory( - persistent_key="data-{0}".format(service_name), - size=int(participant.cl_volume_size) - if int(participant.cl_volume_size) > 0 - else constants.VOLUME_SIZE[launcher.network][ - constants.CL_TYPE.hildr + "_volume_size" - ], - ) - - ports = {} - ports.update(used_ports) - - env_vars = participant.cl_extra_env_vars config_args = { "image": participant.cl_image, "ports": ports, @@ -235,6 +256,8 @@ def get_beacon_config( "node_selectors": node_selectors, } + # configure resources + if participant.cl_min_cpu > 0: config_args["min_cpu"] = participant.cl_min_cpu if participant.cl_max_cpu > 0: @@ -243,6 +266,7 @@ def get_beacon_config( config_args["min_memory"] = participant.cl_min_mem if participant.cl_max_mem > 0: config_args["max_memory"] = participant.cl_max_mem + return ServiceConfig(**config_args) diff --git a/src/cl/op-node/op_node_launcher.star b/src/cl/op-node/op_node_launcher.star index 83cbdac1..27514619 100644 --- a/src/cl/op-node/op_node_launcher.star +++ b/src/cl/op-node/op_node_launcher.star @@ -240,17 +240,19 @@ def get_beacon_config( } ) - sequencer_private_key = util.read_network_config_value( - plan, - launcher.deployment_output, - "sequencer-{0}".format(launcher.network_params.network_id), - ".privateKey", - ) - if sequencer_enabled: - cmd.append("--p2p.sequencer.key=" + sequencer_private_key) - cmd.append("--sequencer.enabled") - cmd.append("--sequencer.l1-confs=5") + sequencer_private_key = util.read_network_config_value( + plan, + launcher.deployment_output, + "sequencer-{0}".format(launcher.network_params.network_id), + ".privateKey", + ) + + cmd += [ + "--p2p.sequencer.key=" + sequencer_private_key + "--sequencer.enabled", + "--sequencer.l1-confs=5", + ] if len(existing_cl_clients) > 0: cmd.append( From 2e8b9b630bea6f41d8eea0507d1f917fcf517db9 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Wed, 8 Jan 2025 18:36:17 -0500 Subject: [PATCH 14/29] conditionally expose metrics on op-batcher --- .../op-batcher/op_batcher_launcher.star | 26 +++++++++- src/el_cl_launcher.star | 6 +-- src/observability/constants.star | 5 +- .../prometheus/prometheus_launcher.star | 50 ++++++++++++------- src/participant_network.star | 1 + 5 files changed, 63 insertions(+), 25 deletions(-) diff --git a/src/batcher/op-batcher/op_batcher_launcher.star b/src/batcher/op-batcher/op_batcher_launcher.star index e20d0654..3ee16e5a 100644 --- a/src/batcher/op-batcher/op_batcher_launcher.star +++ b/src/batcher/op-batcher/op_batcher_launcher.star @@ -6,6 +6,9 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) +observability = import_module("../../observability/constants.star") +prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") + # # ---------------------------------- Batcher client ------------------------------------- # The Docker container runs as the "op-batcher" user so we can't write to root @@ -41,6 +44,7 @@ def launch( l1_config_env_vars, gs_batcher_private_key, batcher_params, + observability_params, ): batcher_service_name = "{0}".format(service_name) @@ -53,6 +57,7 @@ def launch( l1_config_env_vars, gs_batcher_private_key, batcher_params, + observability_params, ) batcher_service = plan.add_service(service_name, config) @@ -62,6 +67,12 @@ def launch( batcher_service.ip_address, batcher_http_port.number ) + if observability_params.enabled: + prometheus.register_service_metrics_job( + service_name=batcher_service.name + endpoint=prometheus.make_metrics_url(batcher_service), + ) + return "op_batcher" @@ -74,7 +85,10 @@ def get_batcher_config( l1_config_env_vars, gs_batcher_private_key, batcher_params, + observability_params, ): + ports = dict(get_used_ports()) + cmd = [ "op-batcher", "--l2-eth-rpc=" + el_context.rpc_http_url, @@ -93,9 +107,19 @@ def get_batcher_config( "--data-availability-type=blobs", ] + # apply customizations + + if observability_params.enabled: + cmd += [ + "--metrics.enabled", + "--metrics.addr=0.0.0.0", + "--metrics.port={0}".format(observability.METRICS_PORT_NUM), + ] + + observability.expose_metrics_port(ports) + cmd += batcher_params.extra_params - ports = get_used_ports() return ServiceConfig( image=image, ports=ports, diff --git a/src/el_cl_launcher.star b/src/el_cl_launcher.star index 1af402bc..cf6c36ba 100644 --- a/src/el_cl_launcher.star +++ b/src/el_cl_launcher.star @@ -236,7 +236,7 @@ def launch( if(metrics_info == None): continue - prometheus.register_node_metrics_job(metrics_info) + prometheus.register_node_metrics_job(el_context.client_name, "execution", metrics_info) if rollup_boost_enabled: plan.print("Rollup boost enabled") @@ -303,12 +303,10 @@ def launch( if observability_params.enabled: for metrics_info in filter(lambda x: x is not None, cl_context.cl_metrics_info): - metrics_info[prometheus.METRICS_INFO_ADDITIONAL_CONFIG_KEY].update({ + prometheus.register_node_metrics_job(cl_context.client_name, "beacon", metrics_info, { "supernode": str(cl_context.supernode), }) - prometheus.register_node_metrics_job(metrics_info) - sequencer_enabled = False all_el_contexts.append(el_context) diff --git a/src/observability/constants.star b/src/observability/constants.star index 59ee8654..57785778 100644 --- a/src/observability/constants.star +++ b/src/observability/constants.star @@ -2,8 +2,11 @@ METRICS_PORT_ID = "metrics" METRICS_PORT_NUM = 9001 METRICS_PATH = "/debug/metrics/prometheus" +def make_metrics_url(service, metrics_port_num=METRICS_PORT_NUM): + return "{0}:{1}".format(service.ip_address, metrics_port_num) + def new_metrics_info(service, metrics_path=METRICS_PATH): - metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) + metrics_url = make_metrics_url(service.ip_address) metrics_info = ethereum_package_node_metrics.new_node_metrics_info( service.name, metrics_path, metrics_url ) diff --git a/src/observability/prometheus/prometheus_launcher.star b/src/observability/prometheus/prometheus_launcher.star index 7a628334..297527d0 100644 --- a/src/observability/prometheus/prometheus_launcher.star +++ b/src/observability/prometheus/prometheus_launcher.star @@ -1,7 +1,5 @@ prometheus = import_module("github.com/kurtosis-tech/prometheus-package/main.star") -EXECUTION_CLIENT_TYPE = "execution" - METRICS_INFO_NAME_KEY = "name" METRICS_INFO_URL_KEY = "url" METRICS_INFO_PATH_KEY = "path" @@ -12,9 +10,6 @@ PROMETHEUS_DEFAULT_SCRAPE_INTERVAL = "15s" REGISTERED_METRICS_JOBS = [] -def register_metrics_job(metrics_job): - REGISTERED_METRICS_JOBS.append(metrics_job) - def launch_prometheus( plan, metrics_jobs, @@ -55,12 +50,31 @@ def new_metrics_job( "ScrapeInterval": scrape_interval, } -def register_node_metrics_job(node_metrics_info): +def register_metrics_job(metrics_job): + REGISTERED_METRICS_JOBS.append(metrics_job) + +def register_service_metrics_job(service_name, endpoint, metrics_path="", additional_labels={}, scrape_interval=PROMETHEUS_DEFAULT_SCRAPE_INTERVAL): labels = { - "service": el_context.service_name, - "client_type": EXECUTION_CLIENT_TYPE, - "client_name": el_context.client_name, + "service": service_name, } + labels.update(additional_labels) + + register_metrics_job( + new_metrics_job( + job_name=service_name, + endpoint=endpoint, + metrics_path=metrics_path, + labels=labels, + scrape_interval=scrape_interval, + ) + ) + +def register_node_metrics_job(client_name, client_type, node_metrics_info, additional_labels={}): + labels = { + "client_type": client_type, + "client_name": client_name, + } + labels.update(additional_labels) scrape_interval = PROMETHEUS_DEFAULT_SCRAPE_INTERVAL @@ -71,19 +85,17 @@ def register_node_metrics_job(node_metrics_info): if additional_config != None: if additional_config.labels != None: labels.update(additional_config.labels) - + if ( additional_config.scrape_interval != None and additional_config.scrape_interval != "" ): scrape_interval = additional_config.scrape_interval - - register_metrics_job( - new_metrics_job( - job_name=node_metrics_info[METRICS_INFO_NAME_KEY], - endpoint=node_metrics_info[METRICS_INFO_URL_KEY], - metrics_path=node_metrics_info[METRICS_INFO_PATH_KEY], - labels=labels, - scrape_interval=scrape_interval, - ) + + register_service_metrics_job( + service_name=node_metrics_info[METRICS_INFO_NAME_KEY], + endpoint=node_metrics_info[METRICS_INFO_URL_KEY] + metrics_path=node_metrics_info[METRICS_INFO_PATH_KEY] + additional_labels=labels, + scrape_interval=scrape_interval, ) diff --git a/src/participant_network.star b/src/participant_network.star index 1afd534c..03362b63 100644 --- a/src/participant_network.star +++ b/src/participant_network.star @@ -88,6 +88,7 @@ def launch_participant_network( l1_config_env_vars, batcher_key, batcher_params, + observability_params, ) game_factory_address = util.read_network_config_value( From f39715f645111e63a15062fbb7ebd8909d6ba6b6 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Thu, 9 Jan 2025 13:02:09 -0500 Subject: [PATCH 15/29] use standard input_parser pattern for batcher/challenger --- src/package_io/input_parser.star | 4 ++-- src/participant_network.star | 16 ++++------------ 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/package_io/input_parser.star b/src/package_io/input_parser.star index 91a33c09..e6855d9c 100644 --- a/src/package_io/input_parser.star +++ b/src/package_io/input_parser.star @@ -379,14 +379,14 @@ def default_network_params(): def default_batcher_params(): return { - "image": "", + "image": DEFAULT_BATCHER_IMAGES["op-batcher"], "extra_params": [], } def default_challenger_params(): return { - "image": "", + "image": DEFAULT_CHALLENGER_IMAGES["op-challenger"], "extra_params": [], "cannon_prestate_path": "", "cannon_prestates_url": "https://storage.googleapis.com/oplabs-network-data/proofs/op-program/cannon", diff --git a/src/participant_network.star b/src/participant_network.star index 03362b63..6896cdaa 100644 --- a/src/participant_network.star +++ b/src/participant_network.star @@ -74,15 +74,11 @@ def launch_participant_network( "batcher-{0}".format(network_params.network_id), ".privateKey", ) - op_batcher_image = ( - batcher_params.image - if batcher_params.image != "" - else input_parser.DEFAULT_BATCHER_IMAGES["op-batcher"] - ) + op_batcher_launcher.launch( plan, "op-batcher-{0}".format(l2_services_suffix), - op_batcher_image, + batcher_params.image, all_el_contexts[0], all_cl_contexts[0], l1_config_env_vars, @@ -103,15 +99,11 @@ def launch_participant_network( "challenger-{0}".format(network_params.network_id), ".privateKey", ) - op_challenger_image = ( - challenger_params.image - if challenger_params.image != "" - else input_parser.DEFAULT_CHALLENGER_IMAGES["op-challenger"] - ) + op_challenger_launcher.launch( plan, "op-challenger-{0}".format(l2_services_suffix), - op_challenger_image, + challenger_params.image, all_el_contexts[0], all_cl_contexts[0], l1_config_env_vars, From 20a38683bd9e841d06e7481fe026ddb19be13a8b Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Thu, 9 Jan 2025 13:15:12 -0500 Subject: [PATCH 16/29] conditionally expose metrics in op-challenger --- .../op-batcher/op_batcher_launcher.star | 13 ++------ .../op-challenger/op_challenger_launcher.star | 32 ++++++++++++++++--- src/observability/constants.star | 10 ++++++ .../prometheus/prometheus_launcher.star | 6 ++++ 4 files changed, 45 insertions(+), 16 deletions(-) diff --git a/src/batcher/op-batcher/op_batcher_launcher.star b/src/batcher/op-batcher/op_batcher_launcher.star index 3ee16e5a..f989bb8d 100644 --- a/src/batcher/op-batcher/op_batcher_launcher.star +++ b/src/batcher/op-batcher/op_batcher_launcher.star @@ -68,10 +68,7 @@ def launch( ) if observability_params.enabled: - prometheus.register_service_metrics_job( - service_name=batcher_service.name - endpoint=prometheus.make_metrics_url(batcher_service), - ) + prometheus.register_op_service_metrics_job(batcher_service) return "op_batcher" @@ -110,13 +107,7 @@ def get_batcher_config( # apply customizations if observability_params.enabled: - cmd += [ - "--metrics.enabled", - "--metrics.addr=0.0.0.0", - "--metrics.port={0}".format(observability.METRICS_PORT_NUM), - ] - - observability.expose_metrics_port(ports) + observability.configure_op_service_metrics(cmd, ports) cmd += batcher_params.extra_params diff --git a/src/challenger/op-challenger/op_challenger_launcher.star b/src/challenger/op-challenger/op_challenger_launcher.star index 49d656bf..9e5e2cca 100644 --- a/src/challenger/op-challenger/op_challenger_launcher.star +++ b/src/challenger/op-challenger/op_challenger_launcher.star @@ -6,6 +6,9 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) +observability = import_module("../../observability/constants.star") +prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") + # # ---------------------------------- Challenger client ------------------------------------- CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER = "/data/op-challenger/op-challenger-data" @@ -29,6 +32,7 @@ def launch( deployment_output, network_params, challenger_params, + observability_params, ): challenger_service_name = "{0}".format(service_name) @@ -44,10 +48,14 @@ def launch( deployment_output, network_params, challenger_params, + observability_params, ) challenger_service = plan.add_service(service_name, config) + if observability_params.enabled: + prometheus.register_op_service_metrics_job(challenger_service) + return "op_challenger" @@ -63,15 +71,22 @@ def get_challenger_config( deployment_output, network_params, challenger_params, + observability_params, ): + ports = dict(get_used_ports()) + cmd = [ "op-challenger", "--cannon-l2-genesis=" - + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS - + "/genesis-{0}.json".format(network_params.network_id), + + "{0}/genesis-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + network_params.network_id + ), "--cannon-rollup-config=" - + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS - + "/rollup-{0}.json".format(network_params.network_id), + + "{0}/rollup-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + network_params.network_id + ), "--game-factory-address=" + game_factory_address, "--datadir=" + CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER, "--l1-beacon=" + l1_config_env_vars["CL_RPC_URL"], @@ -81,10 +96,18 @@ def get_challenger_config( "--rollup-rpc=" + cl_context.beacon_http_url, "--trace-type=" + "cannon,permissioned", ] + + # configure files + files = { ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: deployment_output, } + # apply customizations + + if observability_params.enabled: + observability.configure_op_service_metrics(cmd, ports) + if ( challenger_params.cannon_prestate_path and challenger_params.cannon_prestates_url @@ -107,7 +130,6 @@ def get_challenger_config( CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER, " ".join(cmd) ) - ports = get_used_ports() return ServiceConfig( image=image, ports=ports, diff --git a/src/observability/constants.star b/src/observability/constants.star index 57785778..0de9a809 100644 --- a/src/observability/constants.star +++ b/src/observability/constants.star @@ -17,3 +17,13 @@ def expose_metrics_port(ports, port_id=METRICS_PORT_ID, port_num=METRICS_PORT_NU ports[port_id] = ethereum_package_shared_utils.new_port_spec( port_num, ethereum_package_shared_utils.TCP_PROTOCOL ) + +# configures the CLI flags and ports for a service using the standard op-service setup +def configure_op_service_metrics(cmd, ports): + cmd += [ + "--metrics.enabled", + "--metrics.addr=0.0.0.0", + "--metrics.port={0}".format(METRICS_PORT_NUM), + ] + + expose_metrics_port(ports) diff --git a/src/observability/prometheus/prometheus_launcher.star b/src/observability/prometheus/prometheus_launcher.star index 297527d0..2a776016 100644 --- a/src/observability/prometheus/prometheus_launcher.star +++ b/src/observability/prometheus/prometheus_launcher.star @@ -53,6 +53,12 @@ def new_metrics_job( def register_metrics_job(metrics_job): REGISTERED_METRICS_JOBS.append(metrics_job) +def register_op_service_metrics_job(service): + register_service_metrics_job( + service_name=service.name + endpoint=prometheus.make_metrics_url(service), + ) + def register_service_metrics_job(service_name, endpoint, metrics_path="", additional_labels={}, scrape_interval=PROMETHEUS_DEFAULT_SCRAPE_INTERVAL): labels = { "service": service_name, From 64ac50a22601c5233e43cef5b1acaf6e11fa51d5 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Thu, 9 Jan 2025 13:16:55 -0500 Subject: [PATCH 17/29] use standard input_parser pattern for proposer --- src/package_io/input_parser.star | 2 +- src/participant_network.star | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/package_io/input_parser.star b/src/package_io/input_parser.star index e6855d9c..9355c6b0 100644 --- a/src/package_io/input_parser.star +++ b/src/package_io/input_parser.star @@ -395,7 +395,7 @@ def default_challenger_params(): def default_proposer_params(): return { - "image": "", + "image": DEFAULT_PROPOSER_IMAGES["op-proposer"], "extra_params": [], "game_type": 1, "proposal_interval": "10m", diff --git a/src/participant_network.star b/src/participant_network.star index 6896cdaa..d1591417 100644 --- a/src/participant_network.star +++ b/src/participant_network.star @@ -120,15 +120,11 @@ def launch_participant_network( "proposer-{0}".format(network_params.network_id), ".privateKey", ) - op_proposer_image = ( - proposer_params.image - if proposer_params.image != "" - else input_parser.DEFAULT_PROPOSER_IMAGES["op-proposer"] - ) + op_proposer_launcher.launch( plan, "op-proposer-{0}".format(l2_services_suffix), - op_proposer_image, + proposer_params.image, all_cl_contexts[0], l1_config_env_vars, proposer_key, From fe3ec2f43adc3ec7ee73309709ff297d3cd01d18 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Thu, 9 Jan 2025 13:19:30 -0500 Subject: [PATCH 18/29] conditionally expose metrics on op-proposer --- src/participant_network.star | 2 ++ .../op-proposer/op_proposer_launcher.star | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/participant_network.star b/src/participant_network.star index d1591417..edc3f386 100644 --- a/src/participant_network.star +++ b/src/participant_network.star @@ -112,6 +112,7 @@ def launch_participant_network( deployment_output, network_params, challenger_params, + observability_params, ) proposer_key = util.read_network_config_value( @@ -130,6 +131,7 @@ def launch_participant_network( proposer_key, game_factory_address, proposer_params, + observability_params, ) return all_participants diff --git a/src/proposer/op-proposer/op_proposer_launcher.star b/src/proposer/op-proposer/op_proposer_launcher.star index c12fb34b..55dbf67d 100644 --- a/src/proposer/op-proposer/op_proposer_launcher.star +++ b/src/proposer/op-proposer/op_proposer_launcher.star @@ -6,6 +6,9 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) +observability = import_module("../../observability/constants.star") +prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") + # # ---------------------------------- Batcher client ------------------------------------- # The Docker container runs as the "op-proposer" user so we can't write to root @@ -41,6 +44,7 @@ def launch( gs_proposer_private_key, game_factory_address, proposer_params, + observability_params, ): proposer_service_name = "{0}".format(service_name) @@ -53,6 +57,7 @@ def launch( gs_proposer_private_key, game_factory_address, proposer_params, + observability_params, ) proposer_service = plan.add_service(service_name, config) @@ -62,6 +67,9 @@ def launch( proposer_service.ip_address, proposer_http_port.number ) + if observability_params.enabled: + prometheus.register_op_service_metrics_job(proposer_service) + return "op_proposer" @@ -74,7 +82,10 @@ def get_proposer_config( gs_proposer_private_key, game_factory_address, proposer_params, + observability_params, ): + ports = dict(get_used_ports()) + cmd = [ "op-proposer", "--poll-interval=12s", @@ -89,6 +100,11 @@ def get_proposer_config( "--wait-node-sync=true", ] + # apply customizations + + if observability_params.enabled: + observability.configure_op_service_metrics(cmd, ports) + cmd += proposer_params.extra_params ports = get_used_ports() From 801a6edaa52d93f02985be7015a62e8547b4a559 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Thu, 9 Jan 2025 13:27:29 -0500 Subject: [PATCH 19/29] conditionally expose metrics on op-supervisor --- main.star | 1 + .../op-supervisor/op_supervisor_launcher.star | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/main.star b/main.star index 52d2f725..10a2610f 100644 --- a/main.star +++ b/main.star @@ -133,6 +133,7 @@ def run(plan, args): all_participants, jwt_file, interop_params.supervisor_params, + observability_params, ) diff --git a/src/interop/op-supervisor/op_supervisor_launcher.star b/src/interop/op-supervisor/op_supervisor_launcher.star index 24f612d1..628e453a 100644 --- a/src/interop/op-supervisor/op_supervisor_launcher.star +++ b/src/interop/op-supervisor/op_supervisor_launcher.star @@ -8,6 +8,9 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) +observability = import_module("../../observability/constants.star") +prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") + interop_constants = import_module("../constants.star") @@ -47,6 +50,7 @@ def launch( all_participants, jwt_file, supervisor_params, + observability_params, ): dependency_set_json = supervisor_params.dependency_set if not dependency_set_json: @@ -64,12 +68,16 @@ def launch( jwt_file, dependency_set_artifact, supervisor_params, + observability_params, ) supervisor_service = plan.add_service( interop_constants.SUPERVISOR_SERVICE_NAME, config ) + if observability_params.enabled: + prometheus.register_op_service_metrics_job(supervisor_service) + return "op_supervisor" @@ -80,10 +88,17 @@ def get_supervisor_config( jwt_file, dependency_set_artifact, supervisor_params, + observability_params, ): + ports = dict(get_used_ports()) + cmd = ["op-supervisor"] + supervisor_params.extra_params - ports = get_used_ports() + # apply customizations + + if observability_params.enabled: + observability.configure_op_service_metrics(cmd, ports) + return ServiceConfig( image=supervisor_params.image, ports=ports, From 13764a73a6e58ae3533b107cac510065c1f6d3a5 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Thu, 9 Jan 2025 13:47:39 -0500 Subject: [PATCH 20/29] add prometheus_params --- README.md | 14 +++++++ main.star | 16 ++++---- .../prometheus/prometheus_launcher.star | 1 - src/package_io/input_parser.star | 39 +++++++++++++++---- src/package_io/sanity_check.star | 22 +++++++++++ 5 files changed, 75 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 9f8500a2..59966ab5 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,20 @@ optimism_package: observability: # Whether or not to configure observability (e.g. prometheus) enabled: true + # Default prometheus configuration + prometheus_params: + storage_tsdb_retention_time: "1d" + storage_tsdb_retention_size: "512MB" + # Resource management for prometheus container + # CPU is milicores + # RAM is in MB + min_cpu: 10 + max_cpu: 1000 + min_mem: 128 + max_mem: 2048 + # Prometheus docker image to use + # Defaults to the latest image + image: "prom/prometheus:latest" # Interop configuration interop: # Whether or not to enable interop mode diff --git a/main.star b/main.star index 10a2610f..0ec4c02d 100644 --- a/main.star +++ b/main.star @@ -116,15 +116,6 @@ def run(plan, args): interop_params, ) - if observability_params.enabled: - plan.print("Launching prometheus...") - prometheus_private_url = prometheus.launch_prometheus( - plan, - metrics_jobs, - global_node_selectors, - observability_params, - ) - if interop_params.enabled: op_supervisor_launcher.launch( plan, @@ -136,6 +127,13 @@ def run(plan, args): observability_params, ) + if observability_params.enabled: + plan.print("Launching prometheus...") + prometheus_private_url = prometheus.launch_prometheus( + plan, + global_node_selectors, + observability_params.prometheus_params, + ) def get_l1_config(all_l1_participants, l1_network_params, l1_network_id): env_vars = {} diff --git a/src/observability/prometheus/prometheus_launcher.star b/src/observability/prometheus/prometheus_launcher.star index 2a776016..b7cd1841 100644 --- a/src/observability/prometheus/prometheus_launcher.star +++ b/src/observability/prometheus/prometheus_launcher.star @@ -12,7 +12,6 @@ REGISTERED_METRICS_JOBS = [] def launch_prometheus( plan, - metrics_jobs, global_node_selectors, prometheus_params, ): diff --git a/src/package_io/input_parser.star b/src/package_io/input_parser.star index 9355c6b0..ebefaed5 100644 --- a/src/package_io/input_parser.star +++ b/src/package_io/input_parser.star @@ -64,6 +64,15 @@ def input_parser(plan, input_args): return struct( observability=struct( enabled=results["observability"]["enabled"], + prometheus_params=struct( + image=results["observability"]["prometheus_params"]["image"], + storage_tsdb_retention_time=results["observability"]["prometheus_params"]["storage_tsdb_retention_time"], + storage_tsdb_retention_size=results["observability"]["prometheus_params"]["storage_tsdb_retention_size"], + min_cpu=results["observability"]["prometheus_params"]["min_cpu"], + max_cpu=results["observability"]["prometheus_params"]["max_cpu"], + min_mem=results["observability"]["prometheus_params"]["min_mem"], + max_mem=results["observability"]["prometheus_params"]["max_mem"], + ), ), interop=struct( enabled=results["interop"]["enabled"], @@ -180,12 +189,17 @@ def parse_network_params(plan, input_args): # configure observability - results["observability"] = default_observability_args() + results["observability"] = default_observability_params() results["observability"].update(input_args.get("observability", {})) + results["observability"]["prometheus_params"] = default_prometheus_params() + results["observability"]["prometheus_params"].update( + input_args.get("observability", {}).get("prometheus_params", {}) + ) + # configure interop - results["interop"] = default_interop_args() + results["interop"] = default_interop_params() results["interop"].update(input_args.get("interop", {})) results["interop"]["supervisor_params"] = default_supervisor_params() @@ -310,10 +324,10 @@ def parse_network_params(plan, input_args): return results -def default_optimism_args(): +def default_optimism_params(): return { - "observability": default_observability_args(), - "interop": default_interop_args(), + "observability": default_observability_params(), + "interop": default_interop_params(), "chains": default_chains(), "op_contract_deployer_params": default_op_contract_deployer_params(), "global_log_level": "info", @@ -322,12 +336,23 @@ def default_optimism_args(): "persistent": False, } -def default_observability_args(): +def default_observability_params(): return { "enabled": True, } -def default_interop_args(): +def default_prometheus_params(): + return { + "image": "prom/prometheus:latest", + "storage_tsdb_retention_time": "1d", + "storage_tsdb_retention_size": "512MB", + "min_cpu": 10, + "max_cpu": 1000, + "min_mem": 128, + "max_mem": 2048, + } + +def default_interop_params(): return { "enabled": False, } diff --git a/src/package_io/sanity_check.star b/src/package_io/sanity_check.star index 75e73511..f14f5ed9 100644 --- a/src/package_io/sanity_check.star +++ b/src/package_io/sanity_check.star @@ -1,6 +1,18 @@ OBSERVABILITY_PARAMS = [ "enabled", + "prometheus_params", ] + +PROMETHEUS_PARAMS = [ + "image", + "storage_tsdb_retention_time", + "storage_tsdb_retention_size", + "min_cpu", + "max_cpu", + "min_mem", + "max_mem", +] + INTEROP_PARAMS = [ "enabled", "supervisor_params", @@ -87,6 +99,7 @@ ADDITIONAL_SERVICES_PARAMS = [ ] ROOT_PARAMS = [ + "observability", "interop", "chains", "op_contract_deployer_params", @@ -145,6 +158,15 @@ def sanity_check(plan, optimism_config): OBSERVABILITY_PARAMS, ) + if "prometheus_params" in optimism_config["observability"]: + validate_params( + plan, + optimism_config["observability"]:, + "prometheus_params", + PROMETHEUS_PARAMS, + ) + + if "interop" in optimism_config: validate_params( plan, From e665f6b020f48f5428e8c336f1edf76372d6ac31 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Thu, 9 Jan 2025 14:11:19 -0500 Subject: [PATCH 21/29] conditionally expose metrics on op-erigon --- src/el/op-erigon/op_erigon_launcher.star | 110 +++++++++++++---------- 1 file changed, 62 insertions(+), 48 deletions(-) diff --git a/src/el/op-erigon/op_erigon_launcher.star b/src/el/op-erigon/op_erigon_launcher.star index 5e9d9310..ac738694 100644 --- a/src/el/op-erigon/op_erigon_launcher.star +++ b/src/el/op-erigon/op_erigon_launcher.star @@ -21,12 +21,12 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/constants.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 DISCOVERY_PORT_NUM = 30303 ENGINE_RPC_PORT_NUM = 8551 -METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 300 @@ -39,9 +39,6 @@ TCP_DISCOVERY_PORT_ID = "tcp-discovery" UDP_DISCOVERY_PORT_ID = "udp-discovery" ENGINE_RPC_PORT_ID = "engine-rpc" ENGINE_WS_PORT_ID = "engineWs" -METRICS_PORT_ID = "metrics" - -METRICS_PATH = "/debug/metrics/prometheus" # The dirpath of the execution data directory on the client container EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER = "/data/op-erigon/execution-data" @@ -67,9 +64,6 @@ def get_used_ports(discovery_port=DISCOVERY_PORT_NUM): ENGINE_RPC_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL, ), - METRICS_PORT_ID: ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ), } return used_ports @@ -97,6 +91,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, + observability_params, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -118,6 +113,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, + observability_params, ) service = plan.add_service(service_name, config) @@ -126,13 +122,10 @@ def launch( plan, service_name, RPC_PORT_ID ) - metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - erigon_metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metrics_url - ) - http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) + metrics_info = observability.new_metrics_info(service) if observability_params.enabled else None + return ethereum_package_el_context.new_el_context( client_name="op-erigon", enode=enode, @@ -143,7 +136,7 @@ def launch( enr=enr, rpc_http_url=http_url, service_name=service_name, - el_metrics_info=[erigon_metrics_info], + el_metrics_info=[metrics_info], ) @@ -160,15 +153,12 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, + observability_params, ): - init_datadir_cmd_str = "erigon init --datadir={0} {1}".format( - EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, - ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER - + "/genesis-{0}.json".format(launcher.network_id), - ) - discovery_port = DISCOVERY_PORT_NUM - used_ports = get_used_ports(discovery_port) + ports = dict(get_used_ports(discovery_port)) + + subcommand_strs = [] cmd = [ "erigon", @@ -188,12 +178,51 @@ def get_config( "--authrpc.jwtsecret=" + ethereum_package_constants.JWT_MOUNT_PATH_ON_CONTAINER, "--nat=extip:" + ethereum_package_constants.PRIVATE_IP_ADDRESS_PLACEHOLDER, "--rpc.allow-unprotected-txs", - "--metrics", - "--metrics.addr=0.0.0.0", - "--metrics.port={0}".format(METRICS_PORT_NUM), "--port={0}".format(discovery_port), ] + # configure files + + files = { + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, + ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, + } + if persistent: + files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( + persistent_key="data-{0}".format(service_name), + size=int(participant.el_volume_size) + if int(participant.el_volume_size) > 0 + else constants.VOLUME_SIZE[launcher.network][ + constants.EL_TYPE.op_erigon + "_volume_size" + ], + ) + + if launcher.network not in ethereum_package_constants.PUBLIC_NETWORKS: + init_datadir_cmd_str = "erigon init --datadir={0} {1}".format( + EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, + "{0}/genesis-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + launcher.network_id + ), + ) + + subcommand_strs.append(init_datadir_cmd_str) + + # configure environment variables + + env_vars = dict(participant.el_extra_env_vars) + + # apply customizations + + if observability_params.enabled: + cmd += [ + "--metrics", + "--metrics.addr=0.0.0.0", + "--metrics.port={0}".format(observability.METRICS_PORT_NUM), + ] + + observability.expose_metrics_port(ports) + if not sequencer_enabled: cmd.append("--rollup.sequencerhttp={0}".format(sequencer_context.rpc_http_url)) @@ -210,34 +239,16 @@ def get_config( ) ) - cmd += participant.el_extra_params - cmd_str = " ".join(cmd) - if launcher.network not in ethereum_package_constants.PUBLIC_NETWORKS: - subcommand_strs = [ - init_datadir_cmd_str, - cmd_str, - ] - command_str = " && ".join(subcommand_strs) - else: - command_str = cmd_str + + # construct command string - files = { - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, - ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, - } - if persistent: - files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( - persistent_key="data-{0}".format(service_name), - size=int(participant.el_volume_size) - if int(participant.el_volume_size) > 0 - else constants.VOLUME_SIZE[launcher.network][ - constants.EL_TYPE.op_erigon + "_volume_size" - ], - ) - env_vars = participant.el_extra_env_vars + cmd += participant.el_extra_params + subcommand_strs.append(" ".join(cmd)) + command_str = " && ".join(subcommand_strs) + config_args = { "image": participant.el_image, - "ports": used_ports, + "ports": ports, "cmd": [command_str], "files": files, "entrypoint": ENTRYPOINT_ARGS, @@ -255,6 +266,8 @@ def get_config( "user": User(uid=0, gid=0), } + # configure resources + if participant.el_min_cpu > 0: config_args["min_cpu"] = participant.el_min_cpu if participant.el_max_cpu > 0: @@ -263,6 +276,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem + return ServiceConfig(**config_args) From 19da2087edba06ad0bc8f9202967caa810b0aa32 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Thu, 9 Jan 2025 14:14:35 -0500 Subject: [PATCH 22/29] fix errors --- main.star | 2 +- src/cl/hildr/hildr_launcher.star | 2 +- src/cl/op-node/op_node_launcher.star | 4 ++-- src/el/op-besu/op_besu_launcher.star | 2 +- src/el/op-geth/op_geth_launcher.star | 1 + src/el/op-reth/op_reth_launcher.star | 2 +- src/el_cl_launcher.star | 9 +++------ src/observability/constants.star | 9 +++++++++ src/observability/prometheus/prometheus_launcher.star | 6 +++--- src/package_io/sanity_check.star | 2 +- 10 files changed, 23 insertions(+), 16 deletions(-) diff --git a/main.star b/main.star index 0ec4c02d..c402d152 100644 --- a/main.star +++ b/main.star @@ -4,7 +4,7 @@ l2_launcher = import_module("./src/l2.star") op_supervisor_launcher = import_module( "./src/interop/op-supervisor/op_supervisor_launcher.star" ) -prometheus = import_module("./src/prometheus/prometheus_launcher.star") +prometheus = import_module("./src/observability/prometheus/prometheus_launcher.star") wait_for_sync = import_module("./src/wait/wait_for_sync.star") input_parser = import_module("./src/package_io/input_parser.star") diff --git a/src/cl/hildr/hildr_launcher.star b/src/cl/hildr/hildr_launcher.star index d87fe5ed..fd0694ab 100644 --- a/src/cl/hildr/hildr_launcher.star +++ b/src/cl/hildr/hildr_launcher.star @@ -117,7 +117,7 @@ def launch( beacon_service.ip_address, beacon_http_port.number ) - metrics_info = observability.new_metrics_info(service, METRICS_PATH) if observability_params.enabled else None + metrics_info = observability.new_metrics_info(beacon_service, METRICS_PATH) if observability_params.enabled else None # response = plan.request( # recipe=beacon_node_identity_recipe, service_name=service_name diff --git a/src/cl/op-node/op_node_launcher.star b/src/cl/op-node/op_node_launcher.star index 27514619..f9ce1c9c 100644 --- a/src/cl/op-node/op_node_launcher.star +++ b/src/cl/op-node/op_node_launcher.star @@ -118,7 +118,7 @@ def launch( beacon_service.ip_address, beacon_http_port.number ) - metrics_info = observability.new_metrics_info(service) if observability_params.enabled else None + metrics_info = observability.new_metrics_info(beacon_service) if observability_params.enabled else None response = plan.request( recipe=beacon_node_identity_recipe, service_name=service_name @@ -249,7 +249,7 @@ def get_beacon_config( ) cmd += [ - "--p2p.sequencer.key=" + sequencer_private_key + "--p2p.sequencer.key=" + sequencer_private_key, "--sequencer.enabled", "--sequencer.l1-confs=5", ] diff --git a/src/el/op-besu/op_besu_launcher.star b/src/el/op-besu/op_besu_launcher.star index 831835a3..127455a5 100644 --- a/src/el/op-besu/op_besu_launcher.star +++ b/src/el/op-besu/op_besu_launcher.star @@ -252,7 +252,7 @@ def get_config( config_args = { "image": participant.el_image, - "ports": used_ports, + "ports": ports, "cmd": [cmd_str], "files": files, "entrypoint": ENTRYPOINT_ARGS, diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index eac2cee8..32397df1 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -122,6 +122,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, + observability_params, interop_params, ) diff --git a/src/el/op-reth/op_reth_launcher.star b/src/el/op-reth/op_reth_launcher.star index bd842078..b4535c43 100644 --- a/src/el/op-reth/op_reth_launcher.star +++ b/src/el/op-reth/op_reth_launcher.star @@ -233,7 +233,7 @@ def get_config( config_args = { "image": participant.el_image, - "ports": used_ports, + "ports": ports, "cmd": cmd, "files": files, "private_ip_address_placeholder": ethereum_package_constants.PRIVATE_IP_ADDRESS_PLACEHOLDER, diff --git a/src/el_cl_launcher.star b/src/el_cl_launcher.star index cf6c36ba..a5c399ff 100644 --- a/src/el_cl_launcher.star +++ b/src/el_cl_launcher.star @@ -7,7 +7,7 @@ ethereum_package_input_parser = import_module( ) input_parser = import_module("./package_io/input_parser.star") -prometheus = import_module("./prometheus/prometheus_launcher.star") +prometheus = import_module("./observability/prometheus/prometheus_launcher.star") # EL @@ -232,10 +232,7 @@ def launch( ) if observability_params.enabled: - for metrics_info in el_context.el_metrics_info: - if(metrics_info == None): - continue - + for metrics_info in [x for x in el_context.el_metrics_info if x != None]: prometheus.register_node_metrics_job(el_context.client_name, "execution", metrics_info) if rollup_boost_enabled: @@ -302,7 +299,7 @@ def launch( ) if observability_params.enabled: - for metrics_info in filter(lambda x: x is not None, cl_context.cl_metrics_info): + for metrics_info in [x for x in cl_context.cl_metrics_info if x != None]: prometheus.register_node_metrics_job(cl_context.client_name, "beacon", metrics_info, { "supernode": str(cl_context.supernode), }) diff --git a/src/observability/constants.star b/src/observability/constants.star index 0de9a809..66bddade 100644 --- a/src/observability/constants.star +++ b/src/observability/constants.star @@ -1,3 +1,12 @@ +ethereum_package_shared_utils = import_module( + "github.com/ethpandaops/ethereum-package/src/shared_utils/shared_utils.star" +) + +ethereum_package_node_metrics = import_module( + "github.com/ethpandaops/ethereum-package/src/node_metrics_info.star" +) + + METRICS_PORT_ID = "metrics" METRICS_PORT_NUM = 9001 METRICS_PATH = "/debug/metrics/prometheus" diff --git a/src/observability/prometheus/prometheus_launcher.star b/src/observability/prometheus/prometheus_launcher.star index b7cd1841..7131078a 100644 --- a/src/observability/prometheus/prometheus_launcher.star +++ b/src/observability/prometheus/prometheus_launcher.star @@ -54,7 +54,7 @@ def register_metrics_job(metrics_job): def register_op_service_metrics_job(service): register_service_metrics_job( - service_name=service.name + service_name=service.name, endpoint=prometheus.make_metrics_url(service), ) @@ -99,8 +99,8 @@ def register_node_metrics_job(client_name, client_type, node_metrics_info, addit register_service_metrics_job( service_name=node_metrics_info[METRICS_INFO_NAME_KEY], - endpoint=node_metrics_info[METRICS_INFO_URL_KEY] - metrics_path=node_metrics_info[METRICS_INFO_PATH_KEY] + endpoint=node_metrics_info[METRICS_INFO_URL_KEY], + metrics_path=node_metrics_info[METRICS_INFO_PATH_KEY], additional_labels=labels, scrape_interval=scrape_interval, ) diff --git a/src/package_io/sanity_check.star b/src/package_io/sanity_check.star index f14f5ed9..b9738136 100644 --- a/src/package_io/sanity_check.star +++ b/src/package_io/sanity_check.star @@ -161,7 +161,7 @@ def sanity_check(plan, optimism_config): if "prometheus_params" in optimism_config["observability"]: validate_params( plan, - optimism_config["observability"]:, + optimism_config["observability"], "prometheus_params", PROMETHEUS_PARAMS, ) From 82b662ba93eadd7c54352a1eb414a80094c06961 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Thu, 9 Jan 2025 16:38:53 -0500 Subject: [PATCH 23/29] rework observability structure due to starlark limitations --- main.star | 12 ++- .../op-batcher/op_batcher_launcher.star | 11 +-- .../op-challenger/op_challenger_launcher.star | 11 +-- src/cl/hildr/hildr_launcher.star | 10 +- src/cl/op-node/op_node_launcher.star | 10 +- src/el/op-besu/op_besu_launcher.star | 10 +- src/el/op-erigon/op_erigon_launcher.star | 10 +- src/el/op-geth/op_geth_launcher.star | 10 +- .../op-nethermind/op_nethermind_launcher.star | 10 +- src/el/op-reth/op_reth_launcher.star | 10 +- src/el_cl_launcher.star | 26 +++--- .../op-supervisor/op_supervisor_launcher.star | 11 +-- src/l2.star | 4 +- src/observability/constants.star | 91 ++++++++++++++++++- .../prometheus/prometheus_launcher.star | 89 +----------------- src/participant_network.star | 10 +- .../op-proposer/op_proposer_launcher.star | 11 +-- 17 files changed, 176 insertions(+), 170 deletions(-) diff --git a/main.star b/main.star index c402d152..fffe6491 100644 --- a/main.star +++ b/main.star @@ -4,6 +4,8 @@ l2_launcher = import_module("./src/l2.star") op_supervisor_launcher = import_module( "./src/interop/op-supervisor/op_supervisor_launcher.star" ) + +observability = import_module("./src/observability/constants.star") prometheus = import_module("./src/observability/prometheus/prometheus_launcher.star") wait_for_sync = import_module("./src/wait/wait_for_sync.star") @@ -45,6 +47,8 @@ def run(plan, args): observability_params = optimism_args_with_right_defaults.observability interop_params = optimism_args_with_right_defaults.interop + observability_helper = observability.make_helper(observability_params) + # Deploy the L1 l1_network = "" if external_l1_args: @@ -112,7 +116,7 @@ def run(plan, args): global_node_selectors, global_tolerations, persistent, - observability_params, + observability_helper, interop_params, ) @@ -124,15 +128,15 @@ def run(plan, args): all_participants, jwt_file, interop_params.supervisor_params, - observability_params, + observability_helper, ) - if observability_params.enabled: + if observability_helper.enabled: plan.print("Launching prometheus...") prometheus_private_url = prometheus.launch_prometheus( plan, + observability_helper, global_node_selectors, - observability_params.prometheus_params, ) def get_l1_config(all_l1_participants, l1_network_params, l1_network_id): diff --git a/src/batcher/op-batcher/op_batcher_launcher.star b/src/batcher/op-batcher/op_batcher_launcher.star index f989bb8d..8cb6f80e 100644 --- a/src/batcher/op-batcher/op_batcher_launcher.star +++ b/src/batcher/op-batcher/op_batcher_launcher.star @@ -44,7 +44,7 @@ def launch( l1_config_env_vars, gs_batcher_private_key, batcher_params, - observability_params, + observability_helper, ): batcher_service_name = "{0}".format(service_name) @@ -57,7 +57,7 @@ def launch( l1_config_env_vars, gs_batcher_private_key, batcher_params, - observability_params, + observability_helper, ) batcher_service = plan.add_service(service_name, config) @@ -67,8 +67,7 @@ def launch( batcher_service.ip_address, batcher_http_port.number ) - if observability_params.enabled: - prometheus.register_op_service_metrics_job(batcher_service) + observability.register_op_service_metrics_job(observability_helper, batcher_service) return "op_batcher" @@ -82,7 +81,7 @@ def get_batcher_config( l1_config_env_vars, gs_batcher_private_key, batcher_params, - observability_params, + observability_helper, ): ports = dict(get_used_ports()) @@ -106,7 +105,7 @@ def get_batcher_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: observability.configure_op_service_metrics(cmd, ports) cmd += batcher_params.extra_params diff --git a/src/challenger/op-challenger/op_challenger_launcher.star b/src/challenger/op-challenger/op_challenger_launcher.star index 9e5e2cca..9e80a958 100644 --- a/src/challenger/op-challenger/op_challenger_launcher.star +++ b/src/challenger/op-challenger/op_challenger_launcher.star @@ -32,7 +32,7 @@ def launch( deployment_output, network_params, challenger_params, - observability_params, + observability_helper, ): challenger_service_name = "{0}".format(service_name) @@ -48,13 +48,12 @@ def launch( deployment_output, network_params, challenger_params, - observability_params, + observability_helper, ) challenger_service = plan.add_service(service_name, config) - if observability_params.enabled: - prometheus.register_op_service_metrics_job(challenger_service) + observability.register_op_service_metrics_job(observability_helper, challenger_service) return "op_challenger" @@ -71,7 +70,7 @@ def get_challenger_config( deployment_output, network_params, challenger_params, - observability_params, + observability_helper, ): ports = dict(get_used_ports()) @@ -105,7 +104,7 @@ def get_challenger_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: observability.configure_op_service_metrics(cmd, ports) if ( diff --git a/src/cl/hildr/hildr_launcher.star b/src/cl/hildr/hildr_launcher.star index fd0694ab..4ca9c732 100644 --- a/src/cl/hildr/hildr_launcher.star +++ b/src/cl/hildr/hildr_launcher.star @@ -75,7 +75,7 @@ def launch( existing_cl_clients, l1_config_env_vars, sequencer_enabled, - observability_params, + observability_helper, interop_params, ): # beacon_node_identity_recipe = PostHttpRequestRecipe( @@ -107,7 +107,7 @@ def launch( existing_cl_clients, l1_config_env_vars, sequencer_enabled, - observability_params, + observability_helper, ) beacon_service = plan.add_service(service_name, config) @@ -117,7 +117,7 @@ def launch( beacon_service.ip_address, beacon_http_port.number ) - metrics_info = observability.new_metrics_info(beacon_service, METRICS_PATH) if observability_params.enabled else None + metrics_info = observability.new_metrics_info(observability_helper, beacon_service, METRICS_PATH) # response = plan.request( # recipe=beacon_node_identity_recipe, service_name=service_name @@ -151,7 +151,7 @@ def get_beacon_config( existing_cl_clients, l1_config_env_vars, sequencer_enabled, - observability_params, + observability_helper, ): EXECUTION_ENGINE_ENDPOINT = "http://{0}:{1}".format( el_context.ip_addr, @@ -204,7 +204,7 @@ def get_beacon_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: cmd += [ "--metrics-enable", "--metrics-port={0}".format(observability.METRICS_PORT_NUM), diff --git a/src/cl/op-node/op_node_launcher.star b/src/cl/op-node/op_node_launcher.star index f9ce1c9c..6da16800 100644 --- a/src/cl/op-node/op_node_launcher.star +++ b/src/cl/op-node/op_node_launcher.star @@ -74,7 +74,7 @@ def launch( existing_cl_clients, l1_config_env_vars, sequencer_enabled, - observability_params, + observability_helper, interop_params, ): beacon_node_identity_recipe = PostHttpRequestRecipe( @@ -107,7 +107,7 @@ def launch( l1_config_env_vars, beacon_node_identity_recipe, sequencer_enabled, - observability_params, + observability_helper, interop_params, ) @@ -118,7 +118,7 @@ def launch( beacon_service.ip_address, beacon_http_port.number ) - metrics_info = observability.new_metrics_info(beacon_service) if observability_params.enabled else None + metrics_info = observability.new_metrics_info(observability_helper, beacon_service) response = plan.request( recipe=beacon_node_identity_recipe, service_name=service_name @@ -155,7 +155,7 @@ def get_beacon_config( l1_config_env_vars, beacon_node_identity_recipe, sequencer_enabled, - observability_params, + observability_helper, interop_params, ): ports = dict(get_used_ports(BEACON_DISCOVERY_PORT_NUM)) @@ -214,7 +214,7 @@ def get_beacon_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: cmd += [ "--metrics.enabled=true", "--metrics.addr=0.0.0.0", diff --git a/src/el/op-besu/op_besu_launcher.star b/src/el/op-besu/op_besu_launcher.star index 127455a5..823c211d 100644 --- a/src/el/op-besu/op_besu_launcher.star +++ b/src/el/op-besu/op_besu_launcher.star @@ -99,7 +99,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -121,7 +121,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, ) service = plan.add_service(service_name, config) @@ -132,7 +132,7 @@ def launch( http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) - metrics_info = observability.new_metrics_info(service) if observability_params.enabled else None + metrics_info = observability.new_metrics_info(observability_helper, service) return ethereum_package_el_context.new_el_context( client_name="op-besu", @@ -160,7 +160,7 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, ): discovery_port = DISCOVERY_PORT_NUM ports = dict(get_used_ports(discovery_port)) @@ -220,7 +220,7 @@ def get_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: cmd += [ "--metrics-enabled=true", "--metrics-host=0.0.0.0", diff --git a/src/el/op-erigon/op_erigon_launcher.star b/src/el/op-erigon/op_erigon_launcher.star index ac738694..fd4de3db 100644 --- a/src/el/op-erigon/op_erigon_launcher.star +++ b/src/el/op-erigon/op_erigon_launcher.star @@ -91,7 +91,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -113,7 +113,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, ) service = plan.add_service(service_name, config) @@ -124,7 +124,7 @@ def launch( http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) - metrics_info = observability.new_metrics_info(service) if observability_params.enabled else None + metrics_info = observability.new_metrics_info(observability_helper, service) return ethereum_package_el_context.new_el_context( client_name="op-erigon", @@ -153,7 +153,7 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, ): discovery_port = DISCOVERY_PORT_NUM ports = dict(get_used_ports(discovery_port)) @@ -214,7 +214,7 @@ def get_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: cmd += [ "--metrics", "--metrics.addr=0.0.0.0", diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index 32397df1..4c4261e8 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -100,7 +100,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -122,7 +122,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, interop_params, ) @@ -134,7 +134,7 @@ def launch( http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) - metrics_info = observability.new_metrics_info(service) if observability_params.enabled else None + metrics_info = observability.new_metrics_info(observability_helper, service) return ethereum_package_el_context.new_el_context( client_name="op-geth", @@ -163,7 +163,7 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, interop_params, ): discovery_port = DISCOVERY_PORT_NUM @@ -234,7 +234,7 @@ def get_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: cmd += [ "--metrics", "--metrics.addr=0.0.0.0", diff --git a/src/el/op-nethermind/op_nethermind_launcher.star b/src/el/op-nethermind/op_nethermind_launcher.star index 73e27bd9..ca3db944 100644 --- a/src/el/op-nethermind/op_nethermind_launcher.star +++ b/src/el/op-nethermind/op_nethermind_launcher.star @@ -93,7 +93,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -115,7 +115,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, ) service = plan.add_service(service_name, config) @@ -127,7 +127,7 @@ def launch( http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) ws_url = "ws://{0}:{1}".format(service.ip_address, WS_PORT_NUM) - metrics_info = observability.new_metrics_info(service) if observability_params.enabled else None + metrics_info = observability.new_metrics_info(observability_helper, service) return ethereum_package_el_context.new_el_context( client_name="op-nethermind", @@ -156,7 +156,7 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, ): discovery_port = DISCOVERY_PORT_NUM ports = dict(get_used_ports(discovery_port)) @@ -201,7 +201,7 @@ def get_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: cmd += [ "--Metrics.Enabled=true", "--Metrics.ExposeHost=0.0.0.0", diff --git a/src/el/op-reth/op_reth_launcher.star b/src/el/op-reth/op_reth_launcher.star index b4535c43..21f6c88b 100644 --- a/src/el/op-reth/op_reth_launcher.star +++ b/src/el/op-reth/op_reth_launcher.star @@ -90,7 +90,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -112,7 +112,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, ) service = plan.add_service(service_name, config) @@ -123,7 +123,7 @@ def launch( http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) - metrics_info = observability.new_metrics_info(service, METRICS_PATH) if observability_params.enabled else None + metrics_info = observability.new_metrics_info(observability_helper, service, METRICS_PATH) return ethereum_package_el_context.new_el_context( client_name="reth", @@ -151,7 +151,7 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, ): discovery_port = DISCOVERY_PORT_NUM ports = dict(get_used_ports(discovery_port)) @@ -208,7 +208,7 @@ def get_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: cmd.append("--metrics=0.0.0.0:{0}".format(observability.METRICS_PORT_NUM)) observability.expose_metrics_port(ports) diff --git a/src/el_cl_launcher.star b/src/el_cl_launcher.star index a5c399ff..c79569f2 100644 --- a/src/el_cl_launcher.star +++ b/src/el_cl_launcher.star @@ -7,8 +7,8 @@ ethereum_package_input_parser = import_module( ) input_parser = import_module("./package_io/input_parser.star") -prometheus = import_module("./observability/prometheus/prometheus_launcher.star") +observability = import_module("./observability/constants.star") # EL op_geth = import_module("./el/op-geth/op_geth_launcher.star") @@ -39,7 +39,7 @@ def launch( global_tolerations, persistent, additional_services, - observability_params, + observability_helper, interop_params, ): el_launchers = { @@ -227,13 +227,12 @@ def launch( all_el_contexts, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, interop_params, ) - if observability_params.enabled: - for metrics_info in [x for x in el_context.el_metrics_info if x != None]: - prometheus.register_node_metrics_job(el_context.client_name, "execution", metrics_info) + for metrics_info in [x for x in el_context.el_metrics_info if x != None]: + observability.register_node_metrics_job(observability_helper, el_context.client_name, "execution", metrics_info) if rollup_boost_enabled: plan.print("Rollup boost enabled") @@ -251,7 +250,7 @@ def launch( all_el_contexts, sequencer_enabled, sequencer_context, - observability_params, + observability_helper, interop_params, ) else: @@ -294,15 +293,14 @@ def launch( all_cl_contexts, l1_config_env_vars, sequencer_enabled, - observability_params, + observability_helper, interop_params, ) - if observability_params.enabled: - for metrics_info in [x for x in cl_context.cl_metrics_info if x != None]: - prometheus.register_node_metrics_job(cl_context.client_name, "beacon", metrics_info, { - "supernode": str(cl_context.supernode), - }) + for metrics_info in [x for x in cl_context.cl_nodes_metrics_info if x != None]: + observability.register_node_metrics_job(observability_helper, cl_context.client_name, "beacon", metrics_info, { + "supernode": str(cl_context.supernode), + }) sequencer_enabled = False @@ -323,7 +321,7 @@ def launch( all_cl_contexts, l1_config_env_vars, False, - observability_params, + observability_helper, interop_params, ) all_cl_contexts.append(cl_builder_context) diff --git a/src/interop/op-supervisor/op_supervisor_launcher.star b/src/interop/op-supervisor/op_supervisor_launcher.star index 628e453a..80aeb394 100644 --- a/src/interop/op-supervisor/op_supervisor_launcher.star +++ b/src/interop/op-supervisor/op_supervisor_launcher.star @@ -50,7 +50,7 @@ def launch( all_participants, jwt_file, supervisor_params, - observability_params, + observability_helper, ): dependency_set_json = supervisor_params.dependency_set if not dependency_set_json: @@ -68,15 +68,14 @@ def launch( jwt_file, dependency_set_artifact, supervisor_params, - observability_params, + observability_helper, ) supervisor_service = plan.add_service( interop_constants.SUPERVISOR_SERVICE_NAME, config ) - if observability_params.enabled: - prometheus.register_op_service_metrics_job(supervisor_service) + observability.register_op_service_metrics_job(observability_helper, supervisor_service) return "op_supervisor" @@ -88,7 +87,7 @@ def get_supervisor_config( jwt_file, dependency_set_artifact, supervisor_params, - observability_params, + observability_helper, ): ports = dict(get_used_ports()) @@ -96,7 +95,7 @@ def get_supervisor_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: observability.configure_op_service_metrics(cmd, ports) return ServiceConfig( diff --git a/src/l2.star b/src/l2.star index 760f370c..28eeaf6b 100644 --- a/src/l2.star +++ b/src/l2.star @@ -19,7 +19,7 @@ def launch_l2( global_node_selectors, global_tolerations, persistent, - observability_params, + observability_helper, interop_params, ): network_params = l2_args.network_params @@ -48,7 +48,7 @@ def launch_l2( global_tolerations, persistent, l2_args.additional_services, - observability_params, + observability_helper, interop_params, ) diff --git a/src/observability/constants.star b/src/observability/constants.star index 66bddade..5d778ce1 100644 --- a/src/observability/constants.star +++ b/src/observability/constants.star @@ -6,16 +6,25 @@ ethereum_package_node_metrics = import_module( "github.com/ethpandaops/ethereum-package/src/node_metrics_info.star" ) +DEFAULT_SCRAPE_INTERVAL = "15s" METRICS_PORT_ID = "metrics" METRICS_PORT_NUM = 9001 METRICS_PATH = "/debug/metrics/prometheus" +METRICS_INFO_NAME_KEY = "name" +METRICS_INFO_URL_KEY = "url" +METRICS_INFO_PATH_KEY = "path" +METRICS_INFO_ADDITIONAL_CONFIG_KEY = "config" + def make_metrics_url(service, metrics_port_num=METRICS_PORT_NUM): return "{0}:{1}".format(service.ip_address, metrics_port_num) -def new_metrics_info(service, metrics_path=METRICS_PATH): - metrics_url = make_metrics_url(service.ip_address) +def new_metrics_info(helper, service, metrics_path=METRICS_PATH): + if helper.enabled: + return None + + metrics_url = make_metrics_url(service) metrics_info = ethereum_package_node_metrics.new_node_metrics_info( service.name, metrics_path, metrics_url ) @@ -36,3 +45,81 @@ def configure_op_service_metrics(cmd, ports): ] expose_metrics_port(ports) + +def make_helper(observability_params): + return struct( + params=observability_params, + enabled=observability_params.enabled, + metrics_jobs=[], + ) + +def add_metrics_job(helper, job): + helper.metrics_jobs.append(job) + +def new_metrics_job( + job_name, + endpoint, + metrics_path, + labels, + scrape_interval=DEFAULT_SCRAPE_INTERVAL, +): + return { + "Name": job_name, + "Endpoint": endpoint, + "MetricsPath": metrics_path, + "Labels": labels, + "ScrapeInterval": scrape_interval, + } + +def register_op_service_metrics_job(helper, service): + register_service_metrics_job( + helper, + service_name=service.name, + endpoint=make_metrics_url(service), + ) + +def register_service_metrics_job(helper, service_name, endpoint, metrics_path="", additional_labels={}, scrape_interval=DEFAULT_SCRAPE_INTERVAL): + labels = { + "service": service_name, + } + labels.update(additional_labels) + + add_metrics_job(helper, new_metrics_job( + job_name=service_name, + endpoint=endpoint, + metrics_path=metrics_path, + labels=labels, + scrape_interval=scrape_interval, + )) + +def register_node_metrics_job(helper, client_name, client_type, node_metrics_info, additional_labels={}): + labels = { + "client_type": client_type, + "client_name": client_name, + } + labels.update(additional_labels) + + scrape_interval = DEFAULT_SCRAPE_INTERVAL + + additional_config = node_metrics_info[ + METRICS_INFO_ADDITIONAL_CONFIG_KEY + ] + + if additional_config != None: + if additional_config.labels != None: + labels.update(additional_config.labels) + + if ( + additional_config.scrape_interval != None + and additional_config.scrape_interval != "" + ): + scrape_interval = additional_config.scrape_interval + + register_service_metrics_job( + helper, + service_name=node_metrics_info[METRICS_INFO_NAME_KEY], + endpoint=node_metrics_info[METRICS_INFO_URL_KEY], + metrics_path=node_metrics_info[METRICS_INFO_PATH_KEY], + additional_labels=labels, + scrape_interval=scrape_interval, + ) diff --git a/src/observability/prometheus/prometheus_launcher.star b/src/observability/prometheus/prometheus_launcher.star index 7131078a..9a112b67 100644 --- a/src/observability/prometheus/prometheus_launcher.star +++ b/src/observability/prometheus/prometheus_launcher.star @@ -1,26 +1,18 @@ prometheus = import_module("github.com/kurtosis-tech/prometheus-package/main.star") -METRICS_INFO_NAME_KEY = "name" -METRICS_INFO_URL_KEY = "url" -METRICS_INFO_PATH_KEY = "path" -METRICS_INFO_ADDITIONAL_CONFIG_KEY = "config" - -PROMETHEUS_DEFAULT_SCRAPE_INTERVAL = "15s" - - -REGISTERED_METRICS_JOBS = [] - def launch_prometheus( plan, + observability_helper, global_node_selectors, - prometheus_params, ): - if REGISTERED_METRICS_JOBS.length == 0: + if len(observability_helper.metrics_jobs) == 0: return None + prometheus_params = observability_helper.params.prometheus_params + prometheus_url = prometheus.run( plan, - REGISTERED_METRICS_JOBS, + observability_helper.metrics_jobs, "prometheus", min_cpu=prometheus_params.min_cpu, max_cpu=prometheus_params.max_cpu, @@ -33,74 +25,3 @@ def launch_prometheus( ) return prometheus_url - -def new_metrics_job( - job_name, - endpoint, - metrics_path, - labels, - scrape_interval=PROMETHEUS_DEFAULT_SCRAPE_INTERVAL, -): - return { - "Name": job_name, - "Endpoint": endpoint, - "MetricsPath": metrics_path, - "Labels": labels, - "ScrapeInterval": scrape_interval, - } - -def register_metrics_job(metrics_job): - REGISTERED_METRICS_JOBS.append(metrics_job) - -def register_op_service_metrics_job(service): - register_service_metrics_job( - service_name=service.name, - endpoint=prometheus.make_metrics_url(service), - ) - -def register_service_metrics_job(service_name, endpoint, metrics_path="", additional_labels={}, scrape_interval=PROMETHEUS_DEFAULT_SCRAPE_INTERVAL): - labels = { - "service": service_name, - } - labels.update(additional_labels) - - register_metrics_job( - new_metrics_job( - job_name=service_name, - endpoint=endpoint, - metrics_path=metrics_path, - labels=labels, - scrape_interval=scrape_interval, - ) - ) - -def register_node_metrics_job(client_name, client_type, node_metrics_info, additional_labels={}): - labels = { - "client_type": client_type, - "client_name": client_name, - } - labels.update(additional_labels) - - scrape_interval = PROMETHEUS_DEFAULT_SCRAPE_INTERVAL - - additional_config = node_metrics_info[ - METRICS_INFO_ADDITIONAL_CONFIG_KEY - ] - - if additional_config != None: - if additional_config.labels != None: - labels.update(additional_config.labels) - - if ( - additional_config.scrape_interval != None - and additional_config.scrape_interval != "" - ): - scrape_interval = additional_config.scrape_interval - - register_service_metrics_job( - service_name=node_metrics_info[METRICS_INFO_NAME_KEY], - endpoint=node_metrics_info[METRICS_INFO_URL_KEY], - metrics_path=node_metrics_info[METRICS_INFO_PATH_KEY], - additional_labels=labels, - scrape_interval=scrape_interval, - ) diff --git a/src/participant_network.star b/src/participant_network.star index edc3f386..ff7743fc 100644 --- a/src/participant_network.star +++ b/src/participant_network.star @@ -27,7 +27,7 @@ def launch_participant_network( global_tolerations, persistent, additional_services, - observability_params, + observability_helper, interop_params, ): num_participants = len(participants) @@ -47,7 +47,7 @@ def launch_participant_network( global_tolerations, persistent, additional_services, - observability_params, + observability_helper, interop_params, ) @@ -84,7 +84,7 @@ def launch_participant_network( l1_config_env_vars, batcher_key, batcher_params, - observability_params, + observability_helper, ) game_factory_address = util.read_network_config_value( @@ -112,7 +112,7 @@ def launch_participant_network( deployment_output, network_params, challenger_params, - observability_params, + observability_helper, ) proposer_key = util.read_network_config_value( @@ -131,7 +131,7 @@ def launch_participant_network( proposer_key, game_factory_address, proposer_params, - observability_params, + observability_helper, ) return all_participants diff --git a/src/proposer/op-proposer/op_proposer_launcher.star b/src/proposer/op-proposer/op_proposer_launcher.star index 55dbf67d..c65bdf9d 100644 --- a/src/proposer/op-proposer/op_proposer_launcher.star +++ b/src/proposer/op-proposer/op_proposer_launcher.star @@ -44,7 +44,7 @@ def launch( gs_proposer_private_key, game_factory_address, proposer_params, - observability_params, + observability_helper, ): proposer_service_name = "{0}".format(service_name) @@ -57,7 +57,7 @@ def launch( gs_proposer_private_key, game_factory_address, proposer_params, - observability_params, + observability_helper, ) proposer_service = plan.add_service(service_name, config) @@ -67,8 +67,7 @@ def launch( proposer_service.ip_address, proposer_http_port.number ) - if observability_params.enabled: - prometheus.register_op_service_metrics_job(proposer_service) + observability.register_op_service_metrics_job(observability_helper, proposer_service) return "op_proposer" @@ -82,7 +81,7 @@ def get_proposer_config( gs_proposer_private_key, game_factory_address, proposer_params, - observability_params, + observability_helper, ): ports = dict(get_used_ports()) @@ -102,7 +101,7 @@ def get_proposer_config( # apply customizations - if observability_params.enabled: + if observability_helper.enabled: observability.configure_op_service_metrics(cmd, ports) cmd += proposer_params.extra_params From 740c214bd291b5f14a475bba419693a05a4df0ec Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Thu, 9 Jan 2025 18:02:38 -0500 Subject: [PATCH 24/29] fix node metrics --- src/observability/constants.star | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/observability/constants.star b/src/observability/constants.star index 5d778ce1..a690fdd0 100644 --- a/src/observability/constants.star +++ b/src/observability/constants.star @@ -21,7 +21,7 @@ def make_metrics_url(service, metrics_port_num=METRICS_PORT_NUM): return "{0}:{1}".format(service.ip_address, metrics_port_num) def new_metrics_info(helper, service, metrics_path=METRICS_PATH): - if helper.enabled: + if not helper.enabled: return None metrics_url = make_metrics_url(service) From cb8a9b7b410eef6e3b0bcc04b992d8d5bd6b13c8 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Fri, 10 Jan 2025 16:23:21 -0500 Subject: [PATCH 25/29] remove blank image overrides from sample network_params --- network_params.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/network_params.yaml b/network_params.yaml index 4d1040b7..5e50bc99 100644 --- a/network_params.yaml +++ b/network_params.yaml @@ -2,7 +2,6 @@ optimism_package: chains: - participants: - el_type: op-geth - el_image: "" el_log_level: "" el_extra_env_vars: {} el_extra_labels: {} @@ -14,7 +13,6 @@ optimism_package: el_min_mem: 0 el_max_mem: 0 cl_type: op-node - cl_image: "" cl_log_level: "" cl_extra_env_vars: {} cl_extra_labels: {} @@ -37,10 +35,8 @@ optimism_package: granite_time_offset: 0 fund_dev_accounts: true batcher_params: - image: "" extra_params: [] mev_params: - rollup_boost_image: "" builder_host: "" builder_port: "" additional_services: [] From 88aeb6203037eeab4fd7b772e43fa958b9284bbf Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Fri, 10 Jan 2025 16:34:58 -0500 Subject: [PATCH 26/29] lint --- main.star | 1 + .../op-batcher/op_batcher_launcher.star | 4 +- .../op-challenger/op_challenger_launcher.star | 8 ++- src/cl/hildr/hildr_launcher.star | 11 ++-- src/cl/op-node/op_node_launcher.star | 12 ++-- src/el/op-besu/op_besu_launcher.star | 8 +-- src/el/op-erigon/op_erigon_launcher.star | 7 +-- src/el/op-geth/op_geth_launcher.star | 14 ++--- .../op-nethermind/op_nethermind_launcher.star | 12 ++-- src/el/op-reth/op_reth_launcher.star | 8 ++- src/el_cl_launcher.star | 16 +++-- .../op-supervisor/op_supervisor_launcher.star | 10 ++-- src/observability/constants.star | 58 +++++++++++++------ .../prometheus/prometheus_launcher.star | 1 + src/package_io/input_parser.star | 12 +++- src/package_io/sanity_check.star | 1 - src/participant_network.star | 2 +- .../op-proposer/op_proposer_launcher.star | 6 +- 18 files changed, 119 insertions(+), 72 deletions(-) diff --git a/main.star b/main.star index fffe6491..d96b0c25 100644 --- a/main.star +++ b/main.star @@ -139,6 +139,7 @@ def run(plan, args): global_node_selectors, ) + def get_l1_config(all_l1_participants, l1_network_params, l1_network_id): env_vars = {} env_vars["L1_RPC_KIND"] = "standard" diff --git a/src/batcher/op-batcher/op_batcher_launcher.star b/src/batcher/op-batcher/op_batcher_launcher.star index 8cb6f80e..4b7e0d5f 100644 --- a/src/batcher/op-batcher/op_batcher_launcher.star +++ b/src/batcher/op-batcher/op_batcher_launcher.star @@ -103,8 +103,8 @@ def get_batcher_config( "--data-availability-type=blobs", ] - # apply customizations - + # apply customizations + if observability_helper.enabled: observability.configure_op_service_metrics(cmd, ports) diff --git a/src/challenger/op-challenger/op_challenger_launcher.star b/src/challenger/op-challenger/op_challenger_launcher.star index 9e80a958..63ea6cd3 100644 --- a/src/challenger/op-challenger/op_challenger_launcher.star +++ b/src/challenger/op-challenger/op_challenger_launcher.star @@ -53,7 +53,9 @@ def launch( challenger_service = plan.add_service(service_name, config) - observability.register_op_service_metrics_job(observability_helper, challenger_service) + observability.register_op_service_metrics_job( + observability_helper, challenger_service + ) return "op_challenger" @@ -79,12 +81,12 @@ def get_challenger_config( "--cannon-l2-genesis=" + "{0}/genesis-{1}.json".format( ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, - network_params.network_id + network_params.network_id, ), "--cannon-rollup-config=" + "{0}/rollup-{1}.json".format( ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, - network_params.network_id + network_params.network_id, ), "--game-factory-address=" + game_factory_address, "--datadir=" + CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER, diff --git a/src/cl/hildr/hildr_launcher.star b/src/cl/hildr/hildr_launcher.star index 4ca9c732..43d49b9c 100644 --- a/src/cl/hildr/hildr_launcher.star +++ b/src/cl/hildr/hildr_launcher.star @@ -34,6 +34,7 @@ BEACON_HTTP_PORT_NUM = 8547 METRICS_PATH = "/metrics" + def get_used_ports(discovery_port): used_ports = { BEACON_TCP_DISCOVERY_PORT_ID: ethereum_package_shared_utils.new_port_spec( @@ -117,7 +118,9 @@ def launch( beacon_service.ip_address, beacon_http_port.number ) - metrics_info = observability.new_metrics_info(observability_helper, beacon_service, METRICS_PATH) + metrics_info = observability.new_metrics_info( + observability_helper, beacon_service, METRICS_PATH + ) # response = plan.request( # recipe=beacon_node_identity_recipe, service_name=service_name @@ -178,7 +181,7 @@ def get_beacon_config( "--network=" + "{0}/rollup-{1}.json".format( ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, - launcher.network_params.network_id + launcher.network_params.network_id, ), ] @@ -203,13 +206,13 @@ def get_beacon_config( env_vars = dict(participant.cl_extra_env_vars) # apply customizations - + if observability_helper.enabled: cmd += [ "--metrics-enable", "--metrics-port={0}".format(observability.METRICS_PORT_NUM), ] - + observability.expose_metrics_port(ports) if sequencer_enabled: diff --git a/src/cl/op-node/op_node_launcher.star b/src/cl/op-node/op_node_launcher.star index 6da16800..f5be9116 100644 --- a/src/cl/op-node/op_node_launcher.star +++ b/src/cl/op-node/op_node_launcher.star @@ -33,6 +33,7 @@ BEACON_HTTP_PORT_ID = "http" BEACON_DISCOVERY_PORT_NUM = 9003 BEACON_HTTP_PORT_NUM = 8547 + def get_used_ports(discovery_port): used_ports = { BEACON_TCP_DISCOVERY_PORT_ID: ethereum_package_shared_utils.new_port_spec( @@ -170,9 +171,10 @@ def get_beacon_config( "--l2={0}".format(EXECUTION_ENGINE_ENDPOINT), "--l2.jwt-secret=" + ethereum_package_constants.JWT_MOUNT_PATH_ON_CONTAINER, "--verifier.l1-confs=4", - "--rollup.config=" + "{0}/rollup-{1}.json".format( + "--rollup.config=" + + "{0}/rollup-{1}.json".format( ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, - launcher.network_params.network_id + launcher.network_params.network_id, ), "--rpc.addr=0.0.0.0", "--rpc.port={0}".format(BEACON_HTTP_PORT_NUM), @@ -192,7 +194,7 @@ def get_beacon_config( ] # configure files - + files = { ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, @@ -213,14 +215,14 @@ def get_beacon_config( env_vars = dict(participant.cl_extra_env_vars) # apply customizations - + if observability_helper.enabled: cmd += [ "--metrics.enabled=true", "--metrics.addr=0.0.0.0", "--metrics.port={0}".format(observability.METRICS_PORT_NUM), ] - + observability.expose_metrics_port(ports) if interop_params.enabled: diff --git a/src/el/op-besu/op_besu_launcher.star b/src/el/op-besu/op_besu_launcher.star index 823c211d..1a144822 100644 --- a/src/el/op-besu/op_besu_launcher.star +++ b/src/el/op-besu/op_besu_launcher.star @@ -170,7 +170,7 @@ def get_config( "--genesis-file=" + "{0}/genesis-{1}.json".format( ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER, - launcher.network_id + launcher.network_id, ), "--network-id={0}".format(launcher.network_id), # "--logging=" + log_level, @@ -219,14 +219,14 @@ def get_config( env_vars = dict(participant.el_extra_env_vars) # apply customizations - + if observability_helper.enabled: cmd += [ "--metrics-enabled=true", "--metrics-host=0.0.0.0", "--metrics-port={0}".format(observability.METRICS_PORT_NUM), ] - + observability.expose_metrics_port(ports) # if not sequencer_enabled: @@ -249,7 +249,7 @@ def get_config( cmd += participant.el_extra_params cmd_str = " ".join(cmd) - + config_args = { "image": participant.el_image, "ports": ports, diff --git a/src/el/op-erigon/op_erigon_launcher.star b/src/el/op-erigon/op_erigon_launcher.star index fd4de3db..7dd459b4 100644 --- a/src/el/op-erigon/op_erigon_launcher.star +++ b/src/el/op-erigon/op_erigon_launcher.star @@ -202,7 +202,7 @@ def get_config( EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, "{0}/genesis-{1}.json".format( ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, - launcher.network_id + launcher.network_id, ), ) @@ -239,13 +239,12 @@ def get_config( ) ) - # construct command string cmd += participant.el_extra_params subcommand_strs.append(" ".join(cmd)) command_str = " && ".join(subcommand_strs) - + config_args = { "image": participant.el_image, "ports": ports, @@ -276,7 +275,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem - + return ServiceConfig(**config_args) diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index 4c4261e8..901f63ea 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -135,7 +135,7 @@ def launch( http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) metrics_info = observability.new_metrics_info(observability_helper, service) - + return ethereum_package_el_context.new_el_context( client_name="op-geth", enode=enode, @@ -201,7 +201,7 @@ def get_config( ] # configure files - + files = { ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, @@ -222,7 +222,7 @@ def get_config( EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, "{0}/genesis-{1}.json".format( ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, - launcher.network_id + launcher.network_id, ), ) @@ -233,7 +233,7 @@ def get_config( env_vars = dict(participant.el_extra_env_vars) # apply customizations - + if observability_helper.enabled: cmd += [ "--metrics", @@ -267,7 +267,7 @@ def get_config( cmd += participant.el_extra_params subcommand_strs.append(" ".join(cmd)) command_str = " && ".join(subcommand_strs) - + config_args = { "image": participant.el_image, "ports": ports, @@ -301,9 +301,7 @@ def get_config( return ServiceConfig(**config_args) -def new_op_geth_launcher( - deployment_output, jwt_file, network, network_id -): +def new_op_geth_launcher(deployment_output, jwt_file, network, network_id): return struct( deployment_output=deployment_output, jwt_file=jwt_file, diff --git a/src/el/op-nethermind/op_nethermind_launcher.star b/src/el/op-nethermind/op_nethermind_launcher.star index ca3db944..aa458e36 100644 --- a/src/el/op-nethermind/op_nethermind_launcher.star +++ b/src/el/op-nethermind/op_nethermind_launcher.star @@ -199,15 +199,15 @@ def get_config( env_vars = dict(participant.el_extra_env_vars) - # apply customizations - + # apply customizations + if observability_helper.enabled: cmd += [ "--Metrics.Enabled=true", "--Metrics.ExposeHost=0.0.0.0", "--Metrics.ExposePort={0}".format(observability.METRICS_PORT_NUM), ] - + observability.expose_metrics_port(ports) if not sequencer_enabled: @@ -232,12 +232,12 @@ def get_config( "--Init.ChainSpecPath=" + "{0}/chainspec-{1}.json".format( ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER, - launcher.network_id + launcher.network_id, ), ) cmd += participant.el_extra_params - + config_args = { "image": participant.el_image, "ports": ports, @@ -266,7 +266,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem - + return ServiceConfig(**config_args) diff --git a/src/el/op-reth/op_reth_launcher.star b/src/el/op-reth/op_reth_launcher.star index 21f6c88b..d164ce56 100644 --- a/src/el/op-reth/op_reth_launcher.star +++ b/src/el/op-reth/op_reth_launcher.star @@ -122,8 +122,10 @@ def launch( ) http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) - - metrics_info = observability.new_metrics_info(observability_helper, service, METRICS_PATH) + + metrics_info = observability.new_metrics_info( + observability_helper, service, METRICS_PATH + ) return ethereum_package_el_context.new_el_context( client_name="reth", @@ -210,7 +212,7 @@ def get_config( if observability_helper.enabled: cmd.append("--metrics=0.0.0.0:{0}".format(observability.METRICS_PORT_NUM)) - + observability.expose_metrics_port(ports) if not sequencer_enabled: diff --git a/src/el_cl_launcher.star b/src/el_cl_launcher.star index c79569f2..37ff12f2 100644 --- a/src/el_cl_launcher.star +++ b/src/el_cl_launcher.star @@ -232,7 +232,9 @@ def launch( ) for metrics_info in [x for x in el_context.el_metrics_info if x != None]: - observability.register_node_metrics_job(observability_helper, el_context.client_name, "execution", metrics_info) + observability.register_node_metrics_job( + observability_helper, el_context.client_name, "execution", metrics_info + ) if rollup_boost_enabled: plan.print("Rollup boost enabled") @@ -298,9 +300,15 @@ def launch( ) for metrics_info in [x for x in cl_context.cl_nodes_metrics_info if x != None]: - observability.register_node_metrics_job(observability_helper, cl_context.client_name, "beacon", metrics_info, { - "supernode": str(cl_context.supernode), - }) + observability.register_node_metrics_job( + observability_helper, + cl_context.client_name, + "beacon", + metrics_info, + { + "supernode": str(cl_context.supernode), + }, + ) sequencer_enabled = False diff --git a/src/interop/op-supervisor/op_supervisor_launcher.star b/src/interop/op-supervisor/op_supervisor_launcher.star index 80aeb394..369e086c 100644 --- a/src/interop/op-supervisor/op_supervisor_launcher.star +++ b/src/interop/op-supervisor/op_supervisor_launcher.star @@ -75,7 +75,9 @@ def launch( interop_constants.SUPERVISOR_SERVICE_NAME, config ) - observability.register_op_service_metrics_job(observability_helper, supervisor_service) + observability.register_op_service_metrics_job( + observability_helper, supervisor_service + ) return "op_supervisor" @@ -93,11 +95,11 @@ def get_supervisor_config( cmd = ["op-supervisor"] + supervisor_params.extra_params - # apply customizations - + # apply customizations + if observability_helper.enabled: observability.configure_op_service_metrics(cmd, ports) - + return ServiceConfig( image=supervisor_params.image, ports=ports, diff --git a/src/observability/constants.star b/src/observability/constants.star index a690fdd0..a6c11d25 100644 --- a/src/observability/constants.star +++ b/src/observability/constants.star @@ -17,9 +17,11 @@ METRICS_INFO_URL_KEY = "url" METRICS_INFO_PATH_KEY = "path" METRICS_INFO_ADDITIONAL_CONFIG_KEY = "config" + def make_metrics_url(service, metrics_port_num=METRICS_PORT_NUM): return "{0}:{1}".format(service.ip_address, metrics_port_num) + def new_metrics_info(helper, service, metrics_path=METRICS_PATH): if not helper.enabled: return None @@ -31,21 +33,24 @@ def new_metrics_info(helper, service, metrics_path=METRICS_PATH): return metrics_info + def expose_metrics_port(ports, port_id=METRICS_PORT_ID, port_num=METRICS_PORT_NUM): ports[port_id] = ethereum_package_shared_utils.new_port_spec( port_num, ethereum_package_shared_utils.TCP_PROTOCOL ) + # configures the CLI flags and ports for a service using the standard op-service setup def configure_op_service_metrics(cmd, ports): cmd += [ - "--metrics.enabled", - "--metrics.addr=0.0.0.0", - "--metrics.port={0}".format(METRICS_PORT_NUM), - ] - + "--metrics.enabled", + "--metrics.addr=0.0.0.0", + "--metrics.port={0}".format(METRICS_PORT_NUM), + ] + expose_metrics_port(ports) + def make_helper(observability_params): return struct( params=observability_params, @@ -53,9 +58,11 @@ def make_helper(observability_params): metrics_jobs=[], ) + def add_metrics_job(helper, job): helper.metrics_jobs.append(job) + def new_metrics_job( job_name, endpoint, @@ -71,6 +78,7 @@ def new_metrics_job( "ScrapeInterval": scrape_interval, } + def register_op_service_metrics_job(helper, service): register_service_metrics_job( helper, @@ -78,21 +86,35 @@ def register_op_service_metrics_job(helper, service): endpoint=make_metrics_url(service), ) -def register_service_metrics_job(helper, service_name, endpoint, metrics_path="", additional_labels={}, scrape_interval=DEFAULT_SCRAPE_INTERVAL): + +def register_service_metrics_job( + helper, + service_name, + endpoint, + metrics_path="", + additional_labels={}, + scrape_interval=DEFAULT_SCRAPE_INTERVAL, +): labels = { "service": service_name, } labels.update(additional_labels) - add_metrics_job(helper, new_metrics_job( - job_name=service_name, - endpoint=endpoint, - metrics_path=metrics_path, - labels=labels, - scrape_interval=scrape_interval, - )) + add_metrics_job( + helper, + new_metrics_job( + job_name=service_name, + endpoint=endpoint, + metrics_path=metrics_path, + labels=labels, + scrape_interval=scrape_interval, + ), + ) -def register_node_metrics_job(helper, client_name, client_type, node_metrics_info, additional_labels={}): + +def register_node_metrics_job( + helper, client_name, client_type, node_metrics_info, additional_labels={} +): labels = { "client_type": client_type, "client_name": client_name, @@ -100,10 +122,8 @@ def register_node_metrics_job(helper, client_name, client_type, node_metrics_inf labels.update(additional_labels) scrape_interval = DEFAULT_SCRAPE_INTERVAL - - additional_config = node_metrics_info[ - METRICS_INFO_ADDITIONAL_CONFIG_KEY - ] + + additional_config = node_metrics_info[METRICS_INFO_ADDITIONAL_CONFIG_KEY] if additional_config != None: if additional_config.labels != None: @@ -114,7 +134,7 @@ def register_node_metrics_job(helper, client_name, client_type, node_metrics_inf and additional_config.scrape_interval != "" ): scrape_interval = additional_config.scrape_interval - + register_service_metrics_job( helper, service_name=node_metrics_info[METRICS_INFO_NAME_KEY], diff --git a/src/observability/prometheus/prometheus_launcher.star b/src/observability/prometheus/prometheus_launcher.star index 9a112b67..121c8f37 100644 --- a/src/observability/prometheus/prometheus_launcher.star +++ b/src/observability/prometheus/prometheus_launcher.star @@ -1,5 +1,6 @@ prometheus = import_module("github.com/kurtosis-tech/prometheus-package/main.star") + def launch_prometheus( plan, observability_helper, diff --git a/src/package_io/input_parser.star b/src/package_io/input_parser.star index ebefaed5..fddde4b7 100644 --- a/src/package_io/input_parser.star +++ b/src/package_io/input_parser.star @@ -66,8 +66,12 @@ def input_parser(plan, input_args): enabled=results["observability"]["enabled"], prometheus_params=struct( image=results["observability"]["prometheus_params"]["image"], - storage_tsdb_retention_time=results["observability"]["prometheus_params"]["storage_tsdb_retention_time"], - storage_tsdb_retention_size=results["observability"]["prometheus_params"]["storage_tsdb_retention_size"], + storage_tsdb_retention_time=results["observability"][ + "prometheus_params" + ]["storage_tsdb_retention_time"], + storage_tsdb_retention_size=results["observability"][ + "prometheus_params" + ]["storage_tsdb_retention_size"], min_cpu=results["observability"]["prometheus_params"]["min_cpu"], max_cpu=results["observability"]["prometheus_params"]["max_cpu"], min_mem=results["observability"]["prometheus_params"]["min_mem"], @@ -336,11 +340,13 @@ def default_optimism_params(): "persistent": False, } + def default_observability_params(): return { "enabled": True, } + def default_prometheus_params(): return { "image": "prom/prometheus:latest", @@ -352,11 +358,13 @@ def default_prometheus_params(): "max_mem": 2048, } + def default_interop_params(): return { "enabled": False, } + def default_supervisor_params(): return { "image": DEFAULT_SUPERVISOR_IMAGES["op-supervisor"], diff --git a/src/package_io/sanity_check.star b/src/package_io/sanity_check.star index b9738136..94fa4614 100644 --- a/src/package_io/sanity_check.star +++ b/src/package_io/sanity_check.star @@ -166,7 +166,6 @@ def sanity_check(plan, optimism_config): PROMETHEUS_PARAMS, ) - if "interop" in optimism_config: validate_params( plan, diff --git a/src/participant_network.star b/src/participant_network.star index ff7743fc..7abcdc2e 100644 --- a/src/participant_network.star +++ b/src/participant_network.star @@ -121,7 +121,7 @@ def launch_participant_network( "proposer-{0}".format(network_params.network_id), ".privateKey", ) - + op_proposer_launcher.launch( plan, "op-proposer-{0}".format(l2_services_suffix), diff --git a/src/proposer/op-proposer/op_proposer_launcher.star b/src/proposer/op-proposer/op_proposer_launcher.star index c65bdf9d..f358fd06 100644 --- a/src/proposer/op-proposer/op_proposer_launcher.star +++ b/src/proposer/op-proposer/op_proposer_launcher.star @@ -67,7 +67,9 @@ def launch( proposer_service.ip_address, proposer_http_port.number ) - observability.register_op_service_metrics_job(observability_helper, proposer_service) + observability.register_op_service_metrics_job( + observability_helper, proposer_service + ) return "op_proposer" @@ -103,7 +105,7 @@ def get_proposer_config( if observability_helper.enabled: observability.configure_op_service_metrics(cmd, ports) - + cmd += proposer_params.extra_params ports = get_used_ports() From c135456e85920850be28f2646026b47efd544b95 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Fri, 10 Jan 2025 16:48:07 -0500 Subject: [PATCH 27/29] fix op-proposer ports --- src/proposer/op-proposer/op_proposer_launcher.star | 1 - 1 file changed, 1 deletion(-) diff --git a/src/proposer/op-proposer/op_proposer_launcher.star b/src/proposer/op-proposer/op_proposer_launcher.star index f358fd06..67b538f1 100644 --- a/src/proposer/op-proposer/op_proposer_launcher.star +++ b/src/proposer/op-proposer/op_proposer_launcher.star @@ -108,7 +108,6 @@ def get_proposer_config( cmd += proposer_params.extra_params - ports = get_used_ports() return ServiceConfig( image=image, ports=ports, From 5736e2d5f606c408c867740ebc65e44334366053 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Fri, 10 Jan 2025 17:57:19 -0500 Subject: [PATCH 28/29] rename observability constants.star to observability.star --- main.star | 2 +- network_params.yaml | 2 ++ src/batcher/op-batcher/op_batcher_launcher.star | 2 +- src/challenger/op-challenger/op_challenger_launcher.star | 2 +- src/cl/hildr/hildr_launcher.star | 2 +- src/cl/op-node/op_node_launcher.star | 2 +- src/el/op-besu/op_besu_launcher.star | 2 +- src/el/op-erigon/op_erigon_launcher.star | 2 +- src/el/op-geth/op_geth_launcher.star | 2 +- src/el/op-nethermind/op_nethermind_launcher.star | 2 +- src/el/op-reth/op_reth_launcher.star | 2 +- src/el_cl_launcher.star | 2 +- src/interop/op-supervisor/op_supervisor_launcher.star | 2 +- src/observability/{constants.star => observability.star} | 0 src/proposer/op-proposer/op_proposer_launcher.star | 2 +- 15 files changed, 15 insertions(+), 13 deletions(-) rename src/observability/{constants.star => observability.star} (100%) diff --git a/main.star b/main.star index d96b0c25..7e847ea7 100644 --- a/main.star +++ b/main.star @@ -5,7 +5,7 @@ op_supervisor_launcher = import_module( "./src/interop/op-supervisor/op_supervisor_launcher.star" ) -observability = import_module("./src/observability/constants.star") +observability = import_module("./src/observability/observability.star") prometheus = import_module("./src/observability/prometheus/prometheus_launcher.star") wait_for_sync = import_module("./src/wait/wait_for_sync.star") diff --git a/network_params.yaml b/network_params.yaml index 5e50bc99..d6ce6b96 100644 --- a/network_params.yaml +++ b/network_params.yaml @@ -62,3 +62,5 @@ ethereum_package: } } ' + ethereum_genesis_generator_params: + image: ethpandaops/ethereum-genesis-generator:3.5.1 diff --git a/src/batcher/op-batcher/op_batcher_launcher.star b/src/batcher/op-batcher/op_batcher_launcher.star index 4b7e0d5f..6fcecfa3 100644 --- a/src/batcher/op-batcher/op_batcher_launcher.star +++ b/src/batcher/op-batcher/op_batcher_launcher.star @@ -6,7 +6,7 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") # diff --git a/src/challenger/op-challenger/op_challenger_launcher.star b/src/challenger/op-challenger/op_challenger_launcher.star index 63ea6cd3..e0c53eee 100644 --- a/src/challenger/op-challenger/op_challenger_launcher.star +++ b/src/challenger/op-challenger/op_challenger_launcher.star @@ -6,7 +6,7 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") # diff --git a/src/cl/hildr/hildr_launcher.star b/src/cl/hildr/hildr_launcher.star index 43d49b9c..f74cb39d 100644 --- a/src/cl/hildr/hildr_launcher.star +++ b/src/cl/hildr/hildr_launcher.star @@ -15,7 +15,7 @@ ethereum_package_input_parser = import_module( ) constants = import_module("../../package_io/constants.star") -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") util = import_module("../../util.star") diff --git a/src/cl/op-node/op_node_launcher.star b/src/cl/op-node/op_node_launcher.star index f5be9116..baf69ff1 100644 --- a/src/cl/op-node/op_node_launcher.star +++ b/src/cl/op-node/op_node_launcher.star @@ -17,7 +17,7 @@ ethereum_package_input_parser = import_module( constants = import_module("../../package_io/constants.star") util = import_module("../../util.star") -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") interop_constants = import_module("../../interop/constants.star") # ---------------------------------- Beacon client ------------------------------------- diff --git a/src/el/op-besu/op_besu_launcher.star b/src/el/op-besu/op_besu_launcher.star index 1a144822..c9ceda7b 100644 --- a/src/el/op-besu/op_besu_launcher.star +++ b/src/el/op-besu/op_besu_launcher.star @@ -23,7 +23,7 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 diff --git a/src/el/op-erigon/op_erigon_launcher.star b/src/el/op-erigon/op_erigon_launcher.star index 7dd459b4..38d85e77 100644 --- a/src/el/op-erigon/op_erigon_launcher.star +++ b/src/el/op-erigon/op_erigon_launcher.star @@ -21,7 +21,7 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index 901f63ea..83a2bb0b 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -22,7 +22,7 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") interop_constants = import_module("../../interop/constants.star") RPC_PORT_NUM = 8545 diff --git a/src/el/op-nethermind/op_nethermind_launcher.star b/src/el/op-nethermind/op_nethermind_launcher.star index aa458e36..a8286b42 100644 --- a/src/el/op-nethermind/op_nethermind_launcher.star +++ b/src/el/op-nethermind/op_nethermind_launcher.star @@ -22,7 +22,7 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 diff --git a/src/el/op-reth/op_reth_launcher.star b/src/el/op-reth/op_reth_launcher.star index d164ce56..a519bd73 100644 --- a/src/el/op-reth/op_reth_launcher.star +++ b/src/el/op-reth/op_reth_launcher.star @@ -21,7 +21,7 @@ ethereum_package_input_parser = import_module( ) constants = import_module("../../package_io/constants.star") -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 diff --git a/src/el_cl_launcher.star b/src/el_cl_launcher.star index 37ff12f2..4f2d7340 100644 --- a/src/el_cl_launcher.star +++ b/src/el_cl_launcher.star @@ -8,7 +8,7 @@ ethereum_package_input_parser = import_module( input_parser = import_module("./package_io/input_parser.star") -observability = import_module("./observability/constants.star") +observability = import_module("./observability/observability.star") # EL op_geth = import_module("./el/op-geth/op_geth_launcher.star") diff --git a/src/interop/op-supervisor/op_supervisor_launcher.star b/src/interop/op-supervisor/op_supervisor_launcher.star index 369e086c..457967f4 100644 --- a/src/interop/op-supervisor/op_supervisor_launcher.star +++ b/src/interop/op-supervisor/op_supervisor_launcher.star @@ -8,7 +8,7 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") interop_constants = import_module("../constants.star") diff --git a/src/observability/constants.star b/src/observability/observability.star similarity index 100% rename from src/observability/constants.star rename to src/observability/observability.star diff --git a/src/proposer/op-proposer/op_proposer_launcher.star b/src/proposer/op-proposer/op_proposer_launcher.star index 67b538f1..947604b8 100644 --- a/src/proposer/op-proposer/op_proposer_launcher.star +++ b/src/proposer/op-proposer/op_proposer_launcher.star @@ -6,7 +6,7 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) -observability = import_module("../../observability/constants.star") +observability = import_module("../../observability/observability.star") prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") # From a3880f8df633e67f2b92285473458027a63da883 Mon Sep 17 00:00:00 2001 From: Eugene Dobry Date: Tue, 14 Jan 2025 13:49:47 -0500 Subject: [PATCH 29/29] undo ethereum_params fix --- network_params.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/network_params.yaml b/network_params.yaml index d6ce6b96..5e50bc99 100644 --- a/network_params.yaml +++ b/network_params.yaml @@ -62,5 +62,3 @@ ethereum_package: } } ' - ethereum_genesis_generator_params: - image: ethpandaops/ethereum-genesis-generator:3.5.1