diff --git a/README.md b/README.md index d7b36a93..59966ab5 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,24 @@ The full YAML schema that can be passed in is as follows with the defaults provi ```yaml optimism_package: + # Observability configuration + observability: + # Whether or not to configure observability (e.g. prometheus) + enabled: true + # Default prometheus configuration + prometheus_params: + storage_tsdb_retention_time: "1d" + storage_tsdb_retention_size: "512MB" + # Resource management for prometheus container + # CPU is milicores + # RAM is in MB + min_cpu: 10 + max_cpu: 1000 + min_mem: 128 + max_mem: 2048 + # Prometheus docker image to use + # Defaults to the latest image + image: "prom/prometheus:latest" # Interop configuration interop: # Whether or not to enable interop mode diff --git a/main.star b/main.star index 923a7f12..7e847ea7 100644 --- a/main.star +++ b/main.star @@ -4,6 +4,10 @@ l2_launcher = import_module("./src/l2.star") op_supervisor_launcher = import_module( "./src/interop/op-supervisor/op_supervisor_launcher.star" ) + +observability = import_module("./src/observability/observability.star") +prometheus = import_module("./src/observability/prometheus/prometheus_launcher.star") + wait_for_sync = import_module("./src/wait/wait_for_sync.star") input_parser = import_module("./src/package_io/input_parser.star") ethereum_package_static_files = import_module( @@ -40,8 +44,11 @@ def run(plan, args): global_log_level = optimism_args_with_right_defaults.global_log_level persistent = optimism_args_with_right_defaults.persistent + observability_params = optimism_args_with_right_defaults.observability interop_params = optimism_args_with_right_defaults.interop + observability_helper = observability.make_helper(observability_params) + # Deploy the L1 l1_network = "" if external_l1_args: @@ -109,6 +116,7 @@ def run(plan, args): global_node_selectors, global_tolerations, persistent, + observability_helper, interop_params, ) @@ -120,6 +128,15 @@ def run(plan, args): all_participants, jwt_file, interop_params.supervisor_params, + observability_helper, + ) + + if observability_helper.enabled: + plan.print("Launching prometheus...") + prometheus_private_url = prometheus.launch_prometheus( + plan, + observability_helper, + global_node_selectors, ) diff --git a/network_params.yaml b/network_params.yaml index 4d1040b7..5e50bc99 100644 --- a/network_params.yaml +++ b/network_params.yaml @@ -2,7 +2,6 @@ optimism_package: chains: - participants: - el_type: op-geth - el_image: "" el_log_level: "" el_extra_env_vars: {} el_extra_labels: {} @@ -14,7 +13,6 @@ optimism_package: el_min_mem: 0 el_max_mem: 0 cl_type: op-node - cl_image: "" cl_log_level: "" cl_extra_env_vars: {} cl_extra_labels: {} @@ -37,10 +35,8 @@ optimism_package: granite_time_offset: 0 fund_dev_accounts: true batcher_params: - image: "" extra_params: [] mev_params: - rollup_boost_image: "" builder_host: "" builder_port: "" additional_services: [] diff --git a/src/batcher/op-batcher/op_batcher_launcher.star b/src/batcher/op-batcher/op_batcher_launcher.star index e20d0654..6fcecfa3 100644 --- a/src/batcher/op-batcher/op_batcher_launcher.star +++ b/src/batcher/op-batcher/op_batcher_launcher.star @@ -6,6 +6,9 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) +observability = import_module("../../observability/observability.star") +prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") + # # ---------------------------------- Batcher client ------------------------------------- # The Docker container runs as the "op-batcher" user so we can't write to root @@ -41,6 +44,7 @@ def launch( l1_config_env_vars, gs_batcher_private_key, batcher_params, + observability_helper, ): batcher_service_name = "{0}".format(service_name) @@ -53,6 +57,7 @@ def launch( l1_config_env_vars, gs_batcher_private_key, batcher_params, + observability_helper, ) batcher_service = plan.add_service(service_name, config) @@ -62,6 +67,8 @@ def launch( batcher_service.ip_address, batcher_http_port.number ) + observability.register_op_service_metrics_job(observability_helper, batcher_service) + return "op_batcher" @@ -74,7 +81,10 @@ def get_batcher_config( l1_config_env_vars, gs_batcher_private_key, batcher_params, + observability_helper, ): + ports = dict(get_used_ports()) + cmd = [ "op-batcher", "--l2-eth-rpc=" + el_context.rpc_http_url, @@ -93,9 +103,13 @@ def get_batcher_config( "--data-availability-type=blobs", ] + # apply customizations + + if observability_helper.enabled: + observability.configure_op_service_metrics(cmd, ports) + cmd += batcher_params.extra_params - ports = get_used_ports() return ServiceConfig( image=image, ports=ports, diff --git a/src/challenger/op-challenger/op_challenger_launcher.star b/src/challenger/op-challenger/op_challenger_launcher.star index 49d656bf..e0c53eee 100644 --- a/src/challenger/op-challenger/op_challenger_launcher.star +++ b/src/challenger/op-challenger/op_challenger_launcher.star @@ -6,6 +6,9 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) +observability = import_module("../../observability/observability.star") +prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") + # # ---------------------------------- Challenger client ------------------------------------- CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER = "/data/op-challenger/op-challenger-data" @@ -29,6 +32,7 @@ def launch( deployment_output, network_params, challenger_params, + observability_helper, ): challenger_service_name = "{0}".format(service_name) @@ -44,10 +48,15 @@ def launch( deployment_output, network_params, challenger_params, + observability_helper, ) challenger_service = plan.add_service(service_name, config) + observability.register_op_service_metrics_job( + observability_helper, challenger_service + ) + return "op_challenger" @@ -63,15 +72,22 @@ def get_challenger_config( deployment_output, network_params, challenger_params, + observability_helper, ): + ports = dict(get_used_ports()) + cmd = [ "op-challenger", "--cannon-l2-genesis=" - + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS - + "/genesis-{0}.json".format(network_params.network_id), + + "{0}/genesis-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + network_params.network_id, + ), "--cannon-rollup-config=" - + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS - + "/rollup-{0}.json".format(network_params.network_id), + + "{0}/rollup-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + network_params.network_id, + ), "--game-factory-address=" + game_factory_address, "--datadir=" + CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER, "--l1-beacon=" + l1_config_env_vars["CL_RPC_URL"], @@ -81,10 +97,18 @@ def get_challenger_config( "--rollup-rpc=" + cl_context.beacon_http_url, "--trace-type=" + "cannon,permissioned", ] + + # configure files + files = { ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: deployment_output, } + # apply customizations + + if observability_helper.enabled: + observability.configure_op_service_metrics(cmd, ports) + if ( challenger_params.cannon_prestate_path and challenger_params.cannon_prestates_url @@ -107,7 +131,6 @@ def get_challenger_config( CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER, " ".join(cmd) ) - ports = get_used_ports() return ServiceConfig( image=image, ports=ports, diff --git a/src/cl/hildr/hildr_launcher.star b/src/cl/hildr/hildr_launcher.star index b169c07f..f74cb39d 100644 --- a/src/cl/hildr/hildr_launcher.star +++ b/src/cl/hildr/hildr_launcher.star @@ -15,6 +15,7 @@ ethereum_package_input_parser = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/observability.star") util = import_module("../../util.star") @@ -31,6 +32,8 @@ BEACON_HTTP_PORT_ID = "http" BEACON_DISCOVERY_PORT_NUM = 9003 BEACON_HTTP_PORT_NUM = 8547 +METRICS_PATH = "/metrics" + def get_used_ports(discovery_port): used_ports = { @@ -73,6 +76,7 @@ def launch( existing_cl_clients, l1_config_env_vars, sequencer_enabled, + observability_helper, interop_params, ): # beacon_node_identity_recipe = PostHttpRequestRecipe( @@ -104,6 +108,7 @@ def launch( existing_cl_clients, l1_config_env_vars, sequencer_enabled, + observability_helper, ) beacon_service = plan.add_service(service_name, config) @@ -113,6 +118,10 @@ def launch( beacon_service.ip_address, beacon_http_port.number ) + metrics_info = observability.new_metrics_info( + observability_helper, beacon_service, METRICS_PATH + ) + # response = plan.request( # recipe=beacon_node_identity_recipe, service_name=service_name # ) @@ -127,6 +136,7 @@ def launch( ip_addr=beacon_service.ip_address, http_port=beacon_http_port.number, beacon_http_url=beacon_http_url, + cl_nodes_metrics_info=[metrics_info], beacon_service_name=service_name, ) @@ -144,6 +154,7 @@ def get_beacon_config( existing_cl_clients, l1_config_env_vars, sequencer_enabled, + observability_helper, ): EXECUTION_ENGINE_ENDPOINT = "http://{0}:{1}".format( el_context.ip_addr, @@ -154,7 +165,7 @@ def get_beacon_config( el_context.rpc_port_num, ) - used_ports = get_used_ports(BEACON_DISCOVERY_PORT_NUM) + ports = dict(get_used_ports(BEACON_DISCOVERY_PORT_NUM)) cmd = [ "--devnet", @@ -168,21 +179,52 @@ def get_beacon_config( "--rpc-port={0}".format(BEACON_HTTP_PORT_NUM), "--sync-mode=full", "--network=" - + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS - + "/rollup-{0}.json".format(launcher.network_params.network_id), + + "{0}/rollup-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + launcher.network_params.network_id, + ), ] - sequencer_private_key = util.read_network_config_value( - plan, - launcher.deployment_output, - "sequencer-{0}".format(launcher.network_params.network_id), - ".privateKey", - ) + # configure files + + files = { + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, + ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, + } + if persistent: + files[BEACON_DATA_DIRPATH_ON_SERVICE_CONTAINER] = Directory( + persistent_key="data-{0}".format(service_name), + size=int(participant.cl_volume_size) + if int(participant.cl_volume_size) > 0 + else constants.VOLUME_SIZE[launcher.network][ + constants.CL_TYPE.hildr + "_volume_size" + ], + ) + + # configure environment variables + + env_vars = dict(participant.cl_extra_env_vars) + + # apply customizations + + if observability_helper.enabled: + cmd += [ + "--metrics-enable", + "--metrics-port={0}".format(observability.METRICS_PORT_NUM), + ] + + observability.expose_metrics_port(ports) if sequencer_enabled: - cmd.append("--sequencer-enable") + # sequencer private key can't be used by hildr yet + # sequencer_private_key = util.read_network_config_value( + # plan, + # launcher.deployment_output, + # "sequencer-{0}".format(launcher.network_params.network_id), + # ".privateKey", + # ) - # sequencer private key can't be used by hildr yet + cmd.append("--sequencer-enable") if len(existing_cl_clients) == 1: cmd.append( @@ -199,24 +241,6 @@ def get_beacon_config( cmd += participant.cl_extra_params - files = { - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, - ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, - } - if persistent: - files[BEACON_DATA_DIRPATH_ON_SERVICE_CONTAINER] = Directory( - persistent_key="data-{0}".format(service_name), - size=int(participant.cl_volume_size) - if int(participant.cl_volume_size) > 0 - else constants.VOLUME_SIZE[launcher.network][ - constants.CL_TYPE.hildr + "_volume_size" - ], - ) - - ports = {} - ports.update(used_ports) - - env_vars = participant.cl_extra_env_vars config_args = { "image": participant.cl_image, "ports": ports, @@ -235,6 +259,8 @@ def get_beacon_config( "node_selectors": node_selectors, } + # configure resources + if participant.cl_min_cpu > 0: config_args["min_cpu"] = participant.cl_min_cpu if participant.cl_max_cpu > 0: @@ -243,6 +269,7 @@ def get_beacon_config( config_args["min_memory"] = participant.cl_min_mem if participant.cl_max_mem > 0: config_args["max_memory"] = participant.cl_max_mem + return ServiceConfig(**config_args) diff --git a/src/cl/op-node/op_node_launcher.star b/src/cl/op-node/op_node_launcher.star index c09967ef..baf69ff1 100644 --- a/src/cl/op-node/op_node_launcher.star +++ b/src/cl/op-node/op_node_launcher.star @@ -17,6 +17,7 @@ ethereum_package_input_parser = import_module( constants = import_module("../../package_io/constants.star") util = import_module("../../util.star") +observability = import_module("../../observability/observability.star") interop_constants = import_module("../../interop/constants.star") # ---------------------------------- Beacon client ------------------------------------- @@ -74,6 +75,7 @@ def launch( existing_cl_clients, l1_config_env_vars, sequencer_enabled, + observability_helper, interop_params, ): beacon_node_identity_recipe = PostHttpRequestRecipe( @@ -106,6 +108,7 @@ def launch( l1_config_env_vars, beacon_node_identity_recipe, sequencer_enabled, + observability_helper, interop_params, ) @@ -116,6 +119,8 @@ def launch( beacon_service.ip_address, beacon_http_port.number ) + metrics_info = observability.new_metrics_info(observability_helper, beacon_service) + response = plan.request( recipe=beacon_node_identity_recipe, service_name=service_name ) @@ -130,7 +135,7 @@ def launch( ip_addr=beacon_service.ip_address, http_port=beacon_http_port.number, beacon_http_url=beacon_http_url, - cl_nodes_metrics_info=None, + cl_nodes_metrics_info=[metrics_info], beacon_service_name=service_name, multiaddr=beacon_multiaddr, peer_id=beacon_peer_id, @@ -151,23 +156,26 @@ def get_beacon_config( l1_config_env_vars, beacon_node_identity_recipe, sequencer_enabled, + observability_helper, interop_params, ): + ports = dict(get_used_ports(BEACON_DISCOVERY_PORT_NUM)) + EXECUTION_ENGINE_ENDPOINT = "http://{0}:{1}".format( el_context.ip_addr, el_context.engine_rpc_port_num, ) - used_ports = get_used_ports(BEACON_DISCOVERY_PORT_NUM) - cmd = [ "op-node", "--l2={0}".format(EXECUTION_ENGINE_ENDPOINT), "--l2.jwt-secret=" + ethereum_package_constants.JWT_MOUNT_PATH_ON_CONTAINER, "--verifier.l1-confs=4", "--rollup.config=" - + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS - + "/rollup-{0}.json".format(launcher.network_params.network_id), + + "{0}/rollup-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + launcher.network_params.network_id, + ), "--rpc.addr=0.0.0.0", "--rpc.port={0}".format(BEACON_HTTP_PORT_NUM), "--rpc.enable-admin", @@ -185,32 +193,7 @@ def get_beacon_config( "--safedb.path={0}".format(BEACON_DATA_DIRPATH_ON_SERVICE_CONTAINER), ] - sequencer_private_key = util.read_network_config_value( - plan, - launcher.deployment_output, - "sequencer-{0}".format(launcher.network_params.network_id), - ".privateKey", - ) - - if sequencer_enabled: - cmd.append("--p2p.sequencer.key=" + sequencer_private_key) - cmd.append("--sequencer.enabled") - cmd.append("--sequencer.l1-confs=5") - - if len(existing_cl_clients) > 0: - cmd.append( - "--p2p.bootnodes=" - + ",".join( - [ - ctx.enr - for ctx in existing_cl_clients[ - : ethereum_package_constants.MAX_ENR_ENTRIES - ] - ] - ) - ) - - cmd += participant.cl_extra_params + # configure files files = { ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, @@ -227,10 +210,21 @@ def get_beacon_config( ], ) - ports = dict(used_ports) + # configure environment variables env_vars = dict(participant.cl_extra_env_vars) + # apply customizations + + if observability_helper.enabled: + cmd += [ + "--metrics.enabled=true", + "--metrics.addr=0.0.0.0", + "--metrics.port={0}".format(observability.METRICS_PORT_NUM), + ] + + observability.expose_metrics_port(ports) + if interop_params.enabled: ports[ interop_constants.INTEROP_WS_PORT_ID @@ -248,6 +242,35 @@ def get_beacon_config( } ) + if sequencer_enabled: + sequencer_private_key = util.read_network_config_value( + plan, + launcher.deployment_output, + "sequencer-{0}".format(launcher.network_params.network_id), + ".privateKey", + ) + + cmd += [ + "--p2p.sequencer.key=" + sequencer_private_key, + "--sequencer.enabled", + "--sequencer.l1-confs=5", + ] + + if len(existing_cl_clients) > 0: + cmd.append( + "--p2p.bootnodes=" + + ",".join( + [ + ctx.enr + for ctx in existing_cl_clients[ + : ethereum_package_constants.MAX_ENR_ENTRIES + ] + ] + ) + ) + + cmd += participant.cl_extra_params + config_args = { "image": participant.cl_image, "ports": ports, @@ -273,6 +296,8 @@ def get_beacon_config( "node_selectors": node_selectors, } + # configure resources + if participant.cl_min_cpu > 0: config_args["min_cpu"] = participant.cl_min_cpu if participant.cl_max_cpu > 0: @@ -281,13 +306,13 @@ def get_beacon_config( config_args["min_memory"] = participant.cl_min_mem if participant.cl_max_mem > 0: config_args["max_memory"] = participant.cl_max_mem + return ServiceConfig(**config_args) -def new_op_node_launcher(deployment_output, jwt_file, network_params, interop_params): +def new_op_node_launcher(deployment_output, jwt_file, network_params): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network_params=network_params, - interop_params=interop_params, ) diff --git a/src/el/op-besu/op_besu_launcher.star b/src/el/op-besu/op_besu_launcher.star index d47dc3d0..c9ceda7b 100644 --- a/src/el/op-besu/op_besu_launcher.star +++ b/src/el/op-besu/op_besu_launcher.star @@ -23,12 +23,12 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/observability.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 DISCOVERY_PORT_NUM = 30303 ENGINE_RPC_PORT_NUM = 8551 -METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 300 @@ -41,13 +41,10 @@ TCP_DISCOVERY_PORT_ID = "tcp-discovery" UDP_DISCOVERY_PORT_ID = "udp-discovery" ENGINE_RPC_PORT_ID = "engine-rpc" ENGINE_WS_PORT_ID = "engineWs" -METRICS_PORT_ID = "metrics" # TODO(old) Scale this dynamically based on CPUs available and Geth nodes mining NUM_MINING_THREADS = 1 -METRICS_PATH = "/debug/metrics/prometheus" - # The dirpath of the execution data directory on the client container EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER = "/data/besu/execution-data" @@ -72,9 +69,6 @@ def get_used_ports(discovery_port=DISCOVERY_PORT_NUM): ENGINE_RPC_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL, ), - METRICS_PORT_ID: ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ), } return used_ports @@ -105,6 +99,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, + observability_helper, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -126,6 +121,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, + observability_helper, ) service = plan.add_service(service_name, config) @@ -134,13 +130,10 @@ def launch( plan, service_name, RPC_PORT_ID ) - metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - besu_metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metrics_url - ) - http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) + metrics_info = observability.new_metrics_info(observability_helper, service) + return ethereum_package_el_context.new_el_context( client_name="op-besu", enode=enode, @@ -150,7 +143,7 @@ def launch( engine_rpc_port_num=ENGINE_RPC_PORT_NUM, rpc_http_url=http_url, service_name=service_name, - el_metrics_info=[besu_metrics_info], + el_metrics_info=[metrics_info], ) @@ -167,15 +160,18 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, + observability_helper, ): discovery_port = DISCOVERY_PORT_NUM - used_ports = get_used_ports(discovery_port) + ports = dict(get_used_ports(discovery_port)) cmd = [ "besu", "--genesis-file=" - + ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER - + "/genesis-{0}.json".format(launcher.network_id), + + "{0}/genesis-{1}.json".format( + ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER, + launcher.network_id, + ), "--network-id={0}".format(launcher.network_id), # "--logging=" + log_level, "--data-path=" + EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, @@ -198,13 +194,41 @@ def get_config( "--engine-host-allowlist=*", "--engine-rpc-port={0}".format(ENGINE_RPC_PORT_NUM), "--sync-mode=FULL", - "--metrics-enabled=true", - "--metrics-host=0.0.0.0", - "--metrics-port={0}".format(METRICS_PORT_NUM), "--bonsai-limit-trie-logs-enabled=false", "--version-compatibility-protection=false", ] + # configure files + + files = { + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, + ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, + } + if persistent: + files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( + persistent_key="data-{0}".format(service_name), + size=int(participant.el_volume_size) + if int(participant.el_volume_size) > 0 + else constants.VOLUME_SIZE[launcher.network][ + constants.EL_TYPE.op_besu + "_volume_size" + ], + ) + + # configure environment variables + + env_vars = dict(participant.el_extra_env_vars) + + # apply customizations + + if observability_helper.enabled: + cmd += [ + "--metrics-enabled=true", + "--metrics-host=0.0.0.0", + "--metrics-port={0}".format(observability.METRICS_PORT_NUM), + ] + + observability.expose_metrics_port(ports) + # if not sequencer_enabled: # cmd.append( # "--rollup.sequencerhttp={0}".format(sequencer_context.rpc_http_url) @@ -226,23 +250,9 @@ def get_config( cmd += participant.el_extra_params cmd_str = " ".join(cmd) - files = { - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, - ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, - } - if persistent: - files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( - persistent_key="data-{0}".format(service_name), - size=int(participant.el_volume_size) - if int(participant.el_volume_size) > 0 - else constants.VOLUME_SIZE[launcher.network][ - constants.EL_TYPE.op_besu + "_volume_size" - ], - ) - env_vars = participant.el_extra_env_vars config_args = { "image": participant.el_image, - "ports": used_ports, + "ports": ports, "cmd": [cmd_str], "files": files, "entrypoint": ENTRYPOINT_ARGS, @@ -260,6 +270,8 @@ def get_config( "user": User(uid=0, gid=0), } + # configure resources + if participant.el_min_cpu > 0: config_args["min_cpu"] = participant.el_min_cpu if participant.el_max_cpu > 0: @@ -268,6 +280,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem + return ServiceConfig(**config_args) @@ -276,12 +289,10 @@ def new_op_besu_launcher( jwt_file, network, network_id, - interop_params, ): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network=network, network_id=network_id, - interop_params=interop_params, ) diff --git a/src/el/op-erigon/op_erigon_launcher.star b/src/el/op-erigon/op_erigon_launcher.star index 6d67972d..38d85e77 100644 --- a/src/el/op-erigon/op_erigon_launcher.star +++ b/src/el/op-erigon/op_erigon_launcher.star @@ -21,12 +21,12 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/observability.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 DISCOVERY_PORT_NUM = 30303 ENGINE_RPC_PORT_NUM = 8551 -METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 300 @@ -39,9 +39,6 @@ TCP_DISCOVERY_PORT_ID = "tcp-discovery" UDP_DISCOVERY_PORT_ID = "udp-discovery" ENGINE_RPC_PORT_ID = "engine-rpc" ENGINE_WS_PORT_ID = "engineWs" -METRICS_PORT_ID = "metrics" - -METRICS_PATH = "/debug/metrics/prometheus" # The dirpath of the execution data directory on the client container EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER = "/data/op-erigon/execution-data" @@ -67,9 +64,6 @@ def get_used_ports(discovery_port=DISCOVERY_PORT_NUM): ENGINE_RPC_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL, ), - METRICS_PORT_ID: ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ), } return used_ports @@ -97,6 +91,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, + observability_helper, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -118,6 +113,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, + observability_helper, ) service = plan.add_service(service_name, config) @@ -126,13 +122,10 @@ def launch( plan, service_name, RPC_PORT_ID ) - metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - erigon_metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metrics_url - ) - http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) + metrics_info = observability.new_metrics_info(observability_helper, service) + return ethereum_package_el_context.new_el_context( client_name="op-erigon", enode=enode, @@ -143,7 +136,7 @@ def launch( enr=enr, rpc_http_url=http_url, service_name=service_name, - el_metrics_info=[erigon_metrics_info], + el_metrics_info=[metrics_info], ) @@ -160,15 +153,12 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, + observability_helper, ): - init_datadir_cmd_str = "erigon init --datadir={0} {1}".format( - EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, - ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER - + "/genesis-{0}.json".format(launcher.network_id), - ) - discovery_port = DISCOVERY_PORT_NUM - used_ports = get_used_ports(discovery_port) + ports = dict(get_used_ports(discovery_port)) + + subcommand_strs = [] cmd = [ "erigon", @@ -188,12 +178,51 @@ def get_config( "--authrpc.jwtsecret=" + ethereum_package_constants.JWT_MOUNT_PATH_ON_CONTAINER, "--nat=extip:" + ethereum_package_constants.PRIVATE_IP_ADDRESS_PLACEHOLDER, "--rpc.allow-unprotected-txs", - "--metrics", - "--metrics.addr=0.0.0.0", - "--metrics.port={0}".format(METRICS_PORT_NUM), "--port={0}".format(discovery_port), ] + # configure files + + files = { + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, + ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, + } + if persistent: + files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( + persistent_key="data-{0}".format(service_name), + size=int(participant.el_volume_size) + if int(participant.el_volume_size) > 0 + else constants.VOLUME_SIZE[launcher.network][ + constants.EL_TYPE.op_erigon + "_volume_size" + ], + ) + + if launcher.network not in ethereum_package_constants.PUBLIC_NETWORKS: + init_datadir_cmd_str = "erigon init --datadir={0} {1}".format( + EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, + "{0}/genesis-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + launcher.network_id, + ), + ) + + subcommand_strs.append(init_datadir_cmd_str) + + # configure environment variables + + env_vars = dict(participant.el_extra_env_vars) + + # apply customizations + + if observability_helper.enabled: + cmd += [ + "--metrics", + "--metrics.addr=0.0.0.0", + "--metrics.port={0}".format(observability.METRICS_PORT_NUM), + ] + + observability.expose_metrics_port(ports) + if not sequencer_enabled: cmd.append("--rollup.sequencerhttp={0}".format(sequencer_context.rpc_http_url)) @@ -210,34 +239,15 @@ def get_config( ) ) + # construct command string + cmd += participant.el_extra_params - cmd_str = " ".join(cmd) - if launcher.network not in ethereum_package_constants.PUBLIC_NETWORKS: - subcommand_strs = [ - init_datadir_cmd_str, - cmd_str, - ] - command_str = " && ".join(subcommand_strs) - else: - command_str = cmd_str + subcommand_strs.append(" ".join(cmd)) + command_str = " && ".join(subcommand_strs) - files = { - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, - ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, - } - if persistent: - files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( - persistent_key="data-{0}".format(service_name), - size=int(participant.el_volume_size) - if int(participant.el_volume_size) > 0 - else constants.VOLUME_SIZE[launcher.network][ - constants.EL_TYPE.op_erigon + "_volume_size" - ], - ) - env_vars = participant.el_extra_env_vars config_args = { "image": participant.el_image, - "ports": used_ports, + "ports": ports, "cmd": [command_str], "files": files, "entrypoint": ENTRYPOINT_ARGS, @@ -255,6 +265,8 @@ def get_config( "user": User(uid=0, gid=0), } + # configure resources + if participant.el_min_cpu > 0: config_args["min_cpu"] = participant.el_min_cpu if participant.el_max_cpu > 0: @@ -263,6 +275,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem + return ServiceConfig(**config_args) @@ -271,12 +284,10 @@ def new_op_erigon_launcher( jwt_file, network, network_id, - interop_params, ): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network=network, network_id=network_id, - interop_params=interop_params, ) diff --git a/src/el/op-geth/op_geth_launcher.star b/src/el/op-geth/op_geth_launcher.star index 7de38d9f..83a2bb0b 100644 --- a/src/el/op-geth/op_geth_launcher.star +++ b/src/el/op-geth/op_geth_launcher.star @@ -22,13 +22,13 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/observability.star") interop_constants = import_module("../../interop/constants.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 DISCOVERY_PORT_NUM = 30303 ENGINE_RPC_PORT_NUM = 8551 -METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 300 @@ -41,13 +41,11 @@ TCP_DISCOVERY_PORT_ID = "tcp-discovery" UDP_DISCOVERY_PORT_ID = "udp-discovery" ENGINE_RPC_PORT_ID = "engine-rpc" ENGINE_WS_PORT_ID = "engineWs" -METRICS_PORT_ID = "metrics" + # TODO(old) Scale this dynamically based on CPUs available and Geth nodes mining NUM_MINING_THREADS = 1 -METRICS_PATH = "/debug/metrics/prometheus" - # The dirpath of the execution data directory on the client container EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER = "/data/geth/execution-data" @@ -72,9 +70,6 @@ def get_used_ports(discovery_port=DISCOVERY_PORT_NUM): ENGINE_RPC_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL, ), - METRICS_PORT_ID: ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ), } return used_ports @@ -105,6 +100,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, + observability_helper, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -126,6 +122,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, + observability_helper, interop_params, ) @@ -135,13 +132,10 @@ def launch( plan, service_name, RPC_PORT_ID ) - metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - geth_metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metrics_url - ) - http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) + metrics_info = observability.new_metrics_info(observability_helper, service) + return ethereum_package_el_context.new_el_context( client_name="op-geth", enode=enode, @@ -152,7 +146,7 @@ def launch( rpc_http_url=http_url, enr=enr, service_name=service_name, - el_metrics_info=[geth_metrics_info], + el_metrics_info=[metrics_info], ) @@ -169,16 +163,13 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, + observability_helper, interop_params, ): - init_datadir_cmd_str = "geth init --datadir={0} --state.scheme=hash {1}".format( - EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS - + "/genesis-{0}.json".format(launcher.network_id), - ) - discovery_port = DISCOVERY_PORT_NUM - used_ports = get_used_ports(discovery_port) + ports = dict(get_used_ports(discovery_port)) + + subcommand_strs = [] cmd = [ "geth", @@ -205,44 +196,17 @@ def get_config( "--syncmode=full", "--nat=extip:" + ethereum_package_constants.PRIVATE_IP_ADDRESS_PLACEHOLDER, "--rpc.allow-unprotected-txs", - "--metrics", - "--metrics.addr=0.0.0.0", - "--metrics.port={0}".format(METRICS_PORT_NUM), "--discovery.port={0}".format(discovery_port), "--port={0}".format(discovery_port), ] - if not sequencer_enabled: - cmd.append("--rollup.sequencerhttp={0}".format(sequencer_context.rpc_http_url)) - - if len(existing_el_clients) > 0: - cmd.append( - "--bootnodes=" - + ",".join( - [ - ctx.enode - for ctx in existing_el_clients[ - : ethereum_package_constants.MAX_ENODE_ENTRIES - ] - ] - ) - ) - - cmd += participant.el_extra_params - cmd_str = " ".join(cmd) - if launcher.network not in ethereum_package_constants.PUBLIC_NETWORKS: - subcommand_strs = [ - init_datadir_cmd_str, - cmd_str, - ] - command_str = " && ".join(subcommand_strs) - else: - command_str = cmd_str + # configure files files = { ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, } + if persistent: files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( persistent_key="data-{0}".format(service_name), @@ -253,14 +217,60 @@ def get_config( ], ) - env_vars = dict(participant.cl_extra_env_vars) + if launcher.network not in ethereum_package_constants.PUBLIC_NETWORKS: + init_datadir_cmd_str = "geth init --datadir={0} --state.scheme=hash {1}".format( + EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, + "{0}/genesis-{1}.json".format( + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS, + launcher.network_id, + ), + ) + + subcommand_strs.append(init_datadir_cmd_str) + + # configure environment variables + + env_vars = dict(participant.el_extra_env_vars) + + # apply customizations + + if observability_helper.enabled: + cmd += [ + "--metrics", + "--metrics.addr=0.0.0.0", + "--metrics.port={0}".format(observability.METRICS_PORT_NUM), + ] + + observability.expose_metrics_port(ports) if interop_params.enabled: env_vars["GETH_ROLLUP_INTEROPRPC"] = interop_constants.SUPERVISOR_ENDPOINT + if not sequencer_enabled: + cmd.append("--rollup.sequencerhttp={0}".format(sequencer_context.rpc_http_url)) + + if len(existing_el_clients) > 0: + cmd.append( + "--bootnodes=" + + ",".join( + [ + ctx.enode + for ctx in existing_el_clients[ + : ethereum_package_constants.MAX_ENODE_ENTRIES + ] + ] + ) + ) + + # construct command string + + cmd += participant.el_extra_params + subcommand_strs.append(" ".join(cmd)) + command_str = " && ".join(subcommand_strs) + config_args = { "image": participant.el_image, - "ports": used_ports, + "ports": ports, "cmd": [command_str], "files": files, "entrypoint": ENTRYPOINT_ARGS, @@ -277,6 +287,8 @@ def get_config( "node_selectors": node_selectors, } + # configure resources + if participant.el_min_cpu > 0: config_args["min_cpu"] = participant.el_min_cpu if participant.el_max_cpu > 0: @@ -285,16 +297,14 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem + return ServiceConfig(**config_args) -def new_op_geth_launcher( - deployment_output, jwt_file, network, network_id, interop_params -): +def new_op_geth_launcher(deployment_output, jwt_file, network, network_id): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network=network, network_id=network_id, - interop_params=interop_params, ) diff --git a/src/el/op-nethermind/op_nethermind_launcher.star b/src/el/op-nethermind/op_nethermind_launcher.star index 414fd12f..a8286b42 100644 --- a/src/el/op-nethermind/op_nethermind_launcher.star +++ b/src/el/op-nethermind/op_nethermind_launcher.star @@ -22,12 +22,12 @@ ethereum_package_constants = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/observability.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 DISCOVERY_PORT_NUM = 30303 ENGINE_RPC_PORT_NUM = 8551 -METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 300 @@ -40,13 +40,10 @@ TCP_DISCOVERY_PORT_ID = "tcp-discovery" UDP_DISCOVERY_PORT_ID = "udp-discovery" ENGINE_RPC_PORT_ID = "engine-rpc" ENGINE_WS_PORT_ID = "engineWs" -METRICS_PORT_ID = "metrics" # TODO(old) Scale this dynamically based on CPUs available and Nethermind nodes mining NUM_MINING_THREADS = 1 -METRICS_PATH = "/debug/metrics/prometheus" - # The dirpath of the execution data directory on the client container EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER = "/data/nethermind/execution-data" @@ -71,9 +68,6 @@ def get_used_ports(discovery_port=DISCOVERY_PORT_NUM): ENGINE_RPC_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL, ), - METRICS_PORT_ID: ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ), } return used_ports @@ -99,6 +93,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, + observability_helper, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -120,6 +115,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, + observability_helper, ) service = plan.add_service(service_name, config) @@ -128,14 +124,11 @@ def launch( plan, service_name, RPC_PORT_ID ) - metrics_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - nethermind_metrics_info = ethereum_package_el_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metrics_url - ) - http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) ws_url = "ws://{0}:{1}".format(service.ip_address, WS_PORT_NUM) + metrics_info = observability.new_metrics_info(observability_helper, service) + return ethereum_package_el_context.new_el_context( client_name="op-nethermind", enode=enode, @@ -146,7 +139,7 @@ def launch( rpc_http_url=http_url, ws_url=ws_url, service_name=service_name, - el_metrics_info=[nethermind_metrics_info], + el_metrics_info=[metrics_info], ) @@ -163,9 +156,11 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, + observability_helper, ): discovery_port = DISCOVERY_PORT_NUM - used_ports = get_used_ports(discovery_port) + ports = dict(get_used_ports(discovery_port)) + cmd = [ "--log=debug", "--datadir=" + EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, @@ -183,10 +178,38 @@ def get_config( "--Network.P2PPort={0}".format(discovery_port), "--JsonRpc.JwtSecretFile=" + ethereum_package_constants.JWT_MOUNT_PATH_ON_CONTAINER, - "--Metrics.Enabled=true", - "--Metrics.ExposePort={0}".format(METRICS_PORT_NUM), - "--Metrics.ExposeHost=0.0.0.0", ] + + # configure files + + files = { + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, + ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, + } + if persistent: + files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( + persistent_key="data-{0}".format(service_name), + size=int(participant.el_volume_size) + if int(participant.el_volume_size) > 0 + else constants.VOLUME_SIZE[launcher.network][ + constants.EL_TYPE.op_nethermind + "_volume_size" + ], + ) + # configure environment variables + + env_vars = dict(participant.el_extra_env_vars) + + # apply customizations + + if observability_helper.enabled: + cmd += [ + "--Metrics.Enabled=true", + "--Metrics.ExposeHost=0.0.0.0", + "--Metrics.ExposePort={0}".format(observability.METRICS_PORT_NUM), + ] + + observability.expose_metrics_port(ports) + if not sequencer_enabled: cmd.append("--Optimism.SequencerUrl={0}".format(sequencer_context.rpc_http_url)) @@ -207,29 +230,17 @@ def get_config( cmd.append("--config=none.cfg") cmd.append( "--Init.ChainSpecPath=" - + ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER - + "/chainspec-{0}.json".format(launcher.network_id) + + "{0}/chainspec-{1}.json".format( + ethereum_package_constants.GENESIS_CONFIG_MOUNT_PATH_ON_CONTAINER, + launcher.network_id, + ), ) - files = { - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, - ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, - } - if persistent: - files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( - persistent_key="data-{0}".format(service_name), - size=int(participant.el_volume_size) - if int(participant.el_volume_size) > 0 - else constants.VOLUME_SIZE[launcher.network][ - constants.EL_TYPE.op_nethermind + "_volume_size" - ], - ) - cmd += participant.el_extra_params - env_vars = participant.el_extra_env_vars + config_args = { "image": participant.el_image, - "ports": used_ports, + "ports": ports, "cmd": cmd, "files": files, "private_ip_address_placeholder": ethereum_package_constants.PRIVATE_IP_ADDRESS_PLACEHOLDER, @@ -245,6 +256,8 @@ def get_config( "node_selectors": node_selectors, } + # configure resources + if participant.el_min_cpu > 0: config_args["min_cpu"] = participant.el_min_cpu if participant.el_max_cpu > 0: @@ -253,6 +266,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem + return ServiceConfig(**config_args) @@ -261,12 +275,10 @@ def new_nethermind_launcher( jwt_file, network, network_id, - interop_params, ): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network=network, network_id=network_id, - interop_params=interop_params, ) diff --git a/src/el/op-reth/op_reth_launcher.star b/src/el/op-reth/op_reth_launcher.star index 70211929..a519bd73 100644 --- a/src/el/op-reth/op_reth_launcher.star +++ b/src/el/op-reth/op_reth_launcher.star @@ -21,12 +21,12 @@ ethereum_package_input_parser = import_module( ) constants = import_module("../../package_io/constants.star") +observability = import_module("../../observability/observability.star") RPC_PORT_NUM = 8545 WS_PORT_NUM = 8546 DISCOVERY_PORT_NUM = 30303 ENGINE_RPC_PORT_NUM = 9551 -METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 100 @@ -38,7 +38,6 @@ WS_PORT_ID = "ws" TCP_DISCOVERY_PORT_ID = "tcp-discovery" UDP_DISCOVERY_PORT_ID = "udp-discovery" ENGINE_RPC_PORT_ID = "engine-rpc" -METRICS_PORT_ID = "metrics" # Paths METRICS_PATH = "/metrics" @@ -66,9 +65,6 @@ def get_used_ports(discovery_port=DISCOVERY_PORT_NUM): ENGINE_RPC_PORT_ID: ethereum_package_shared_utils.new_port_spec( ENGINE_RPC_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL ), - METRICS_PORT_ID: ethereum_package_shared_utils.new_port_spec( - METRICS_PORT_NUM, ethereum_package_shared_utils.TCP_PROTOCOL - ), } return used_ports @@ -94,6 +90,7 @@ def launch( existing_el_clients, sequencer_enabled, sequencer_context, + observability_helper, interop_params, ): log_level = ethereum_package_input_parser.get_client_log_level_or_default( @@ -115,6 +112,7 @@ def launch( cl_client_name, sequencer_enabled, sequencer_context, + observability_helper, ) service = plan.add_service(service_name, config) @@ -123,13 +121,12 @@ def launch( plan, service_name, RPC_PORT_ID ) - metric_url = "{0}:{1}".format(service.ip_address, METRICS_PORT_NUM) - op_reth_metrics_info = ethereum_package_node_metrics.new_node_metrics_info( - service_name, METRICS_PATH, metric_url - ) - http_url = "http://{0}:{1}".format(service.ip_address, RPC_PORT_NUM) + metrics_info = observability.new_metrics_info( + observability_helper, service, METRICS_PATH + ) + return ethereum_package_el_context.new_el_context( client_name="reth", enode=enode, @@ -139,7 +136,7 @@ def launch( engine_rpc_port_num=ENGINE_RPC_PORT_NUM, rpc_http_url=http_url, service_name=service_name, - el_metrics_info=[op_reth_metrics_info], + el_metrics_info=[metrics_info], ) @@ -156,10 +153,10 @@ def get_config( cl_client_name, sequencer_enabled, sequencer_context, + observability_helper, ): - public_ports = {} discovery_port = DISCOVERY_PORT_NUM - used_ports = get_used_ports(discovery_port) + ports = dict(get_used_ports(discovery_port)) cmd = [ "node", @@ -186,12 +183,38 @@ def get_config( "--authrpc.port={0}".format(ENGINE_RPC_PORT_NUM), "--authrpc.jwtsecret=" + ethereum_package_constants.JWT_MOUNT_PATH_ON_CONTAINER, "--authrpc.addr=0.0.0.0", - "--metrics=0.0.0.0:{0}".format(METRICS_PORT_NUM), "--discovery.port={0}".format(discovery_port), "--port={0}".format(discovery_port), "--rpc.eth-proof-window=302400", ] + # configure files + + files = { + ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, + ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, + } + if persistent: + files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( + persistent_key="data-{0}".format(service_name), + size=int(participant.el_volume_size) + if int(participant.el_volume_size) > 0 + else constants.VOLUME_SIZE[launcher.network][ + constants.EL_TYPE.op_reth + "_volume_size" + ], + ) + + # configure environment variables + + env_vars = participant.el_extra_env_vars + + # apply customizations + + if observability_helper.enabled: + cmd.append("--metrics=0.0.0.0:{0}".format(observability.METRICS_PORT_NUM)) + + observability.expose_metrics_port(ports) + if not sequencer_enabled: cmd.append("--rollup.sequencer-http={0}".format(sequencer_context.rpc_http_url)) @@ -208,25 +231,11 @@ def get_config( ) ) - files = { - ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: launcher.deployment_output, - ethereum_package_constants.JWT_MOUNTPOINT_ON_CLIENTS: launcher.jwt_file, - } - if persistent: - files[EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER] = Directory( - persistent_key="data-{0}".format(service_name), - size=int(participant.el_volume_size) - if int(participant.el_volume_size) > 0 - else constants.VOLUME_SIZE[launcher.network][ - constants.EL_TYPE.op_reth + "_volume_size" - ], - ) - cmd += participant.el_extra_params - env_vars = participant.el_extra_env_vars + config_args = { "image": participant.el_image, - "ports": used_ports, + "ports": ports, "cmd": cmd, "files": files, "private_ip_address_placeholder": ethereum_package_constants.PRIVATE_IP_ADDRESS_PLACEHOLDER, @@ -242,6 +251,8 @@ def get_config( "node_selectors": node_selectors, } + # configure resources + if participant.el_min_cpu > 0: config_args["min_cpu"] = participant.el_min_cpu if participant.el_max_cpu > 0: @@ -250,6 +261,7 @@ def get_config( config_args["min_memory"] = participant.el_min_mem if participant.el_max_mem > 0: config_args["max_memory"] = participant.el_max_mem + return ServiceConfig(**config_args) @@ -258,12 +270,10 @@ def new_op_reth_launcher( jwt_file, network, network_id, - interop_params, ): return struct( deployment_output=deployment_output, jwt_file=jwt_file, network=network, network_id=network_id, - interop_params=interop_params, ) diff --git a/src/el_cl_launcher.star b/src/el_cl_launcher.star index 2bedd199..4f2d7340 100644 --- a/src/el_cl_launcher.star +++ b/src/el_cl_launcher.star @@ -8,6 +8,8 @@ ethereum_package_input_parser = import_module( input_parser = import_module("./package_io/input_parser.star") +observability = import_module("./observability/observability.star") + # EL op_geth = import_module("./el/op-geth/op_geth_launcher.star") op_reth = import_module("./el/op-reth/op_reth_launcher.star") @@ -37,6 +39,7 @@ def launch( global_tolerations, persistent, additional_services, + observability_helper, interop_params, ): el_launchers = { @@ -46,7 +49,6 @@ def launch( jwt_file, network_params.network, network_params.network_id, - interop_params, ), "launch_method": op_geth.launch, }, @@ -56,7 +58,6 @@ def launch( jwt_file, network_params.network, network_params.network_id, - interop_params, ), "launch_method": op_reth.launch, }, @@ -66,7 +67,6 @@ def launch( jwt_file, network_params.network, network_params.network_id, - interop_params, ), "launch_method": op_erigon.launch, }, @@ -76,7 +76,6 @@ def launch( jwt_file, network_params.network, network_params.network_id, - interop_params, ), "launch_method": op_nethermind.launch, }, @@ -86,7 +85,6 @@ def launch( jwt_file, network_params.network, network_params.network_id, - interop_params, ), "launch_method": op_besu.launch, }, @@ -95,7 +93,7 @@ def launch( cl_launchers = { "op-node": { "launcher": op_node.new_op_node_launcher( - deployment_output, jwt_file, network_params, interop_params + deployment_output, jwt_file, network_params ), "launch_method": op_node.launch, }, @@ -229,9 +227,15 @@ def launch( all_el_contexts, sequencer_enabled, sequencer_context, + observability_helper, interop_params, ) + for metrics_info in [x for x in el_context.el_metrics_info if x != None]: + observability.register_node_metrics_job( + observability_helper, el_context.client_name, "execution", metrics_info + ) + if rollup_boost_enabled: plan.print("Rollup boost enabled") @@ -248,6 +252,7 @@ def launch( all_el_contexts, sequencer_enabled, sequencer_context, + observability_helper, interop_params, ) else: @@ -290,9 +295,21 @@ def launch( all_cl_contexts, l1_config_env_vars, sequencer_enabled, + observability_helper, interop_params, ) + for metrics_info in [x for x in cl_context.cl_nodes_metrics_info if x != None]: + observability.register_node_metrics_job( + observability_helper, + cl_context.client_name, + "beacon", + metrics_info, + { + "supernode": str(cl_context.supernode), + }, + ) + sequencer_enabled = False all_el_contexts.append(el_context) @@ -312,6 +329,7 @@ def launch( all_cl_contexts, l1_config_env_vars, False, + observability_helper, interop_params, ) all_cl_contexts.append(cl_builder_context) diff --git a/src/interop/op-supervisor/op_supervisor_launcher.star b/src/interop/op-supervisor/op_supervisor_launcher.star index 24f612d1..457967f4 100644 --- a/src/interop/op-supervisor/op_supervisor_launcher.star +++ b/src/interop/op-supervisor/op_supervisor_launcher.star @@ -8,6 +8,9 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) +observability = import_module("../../observability/observability.star") +prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") + interop_constants = import_module("../constants.star") @@ -47,6 +50,7 @@ def launch( all_participants, jwt_file, supervisor_params, + observability_helper, ): dependency_set_json = supervisor_params.dependency_set if not dependency_set_json: @@ -64,12 +68,17 @@ def launch( jwt_file, dependency_set_artifact, supervisor_params, + observability_helper, ) supervisor_service = plan.add_service( interop_constants.SUPERVISOR_SERVICE_NAME, config ) + observability.register_op_service_metrics_job( + observability_helper, supervisor_service + ) + return "op_supervisor" @@ -80,10 +89,17 @@ def get_supervisor_config( jwt_file, dependency_set_artifact, supervisor_params, + observability_helper, ): + ports = dict(get_used_ports()) + cmd = ["op-supervisor"] + supervisor_params.extra_params - ports = get_used_ports() + # apply customizations + + if observability_helper.enabled: + observability.configure_op_service_metrics(cmd, ports) + return ServiceConfig( image=supervisor_params.image, ports=ports, diff --git a/src/l2.star b/src/l2.star index 532a7dde..28eeaf6b 100644 --- a/src/l2.star +++ b/src/l2.star @@ -19,6 +19,7 @@ def launch_l2( global_node_selectors, global_tolerations, persistent, + observability_helper, interop_params, ): network_params = l2_args.network_params @@ -47,6 +48,7 @@ def launch_l2( global_tolerations, persistent, l2_args.additional_services, + observability_helper, interop_params, ) diff --git a/src/observability/observability.star b/src/observability/observability.star new file mode 100644 index 00000000..a6c11d25 --- /dev/null +++ b/src/observability/observability.star @@ -0,0 +1,145 @@ +ethereum_package_shared_utils = import_module( + "github.com/ethpandaops/ethereum-package/src/shared_utils/shared_utils.star" +) + +ethereum_package_node_metrics = import_module( + "github.com/ethpandaops/ethereum-package/src/node_metrics_info.star" +) + +DEFAULT_SCRAPE_INTERVAL = "15s" + +METRICS_PORT_ID = "metrics" +METRICS_PORT_NUM = 9001 +METRICS_PATH = "/debug/metrics/prometheus" + +METRICS_INFO_NAME_KEY = "name" +METRICS_INFO_URL_KEY = "url" +METRICS_INFO_PATH_KEY = "path" +METRICS_INFO_ADDITIONAL_CONFIG_KEY = "config" + + +def make_metrics_url(service, metrics_port_num=METRICS_PORT_NUM): + return "{0}:{1}".format(service.ip_address, metrics_port_num) + + +def new_metrics_info(helper, service, metrics_path=METRICS_PATH): + if not helper.enabled: + return None + + metrics_url = make_metrics_url(service) + metrics_info = ethereum_package_node_metrics.new_node_metrics_info( + service.name, metrics_path, metrics_url + ) + + return metrics_info + + +def expose_metrics_port(ports, port_id=METRICS_PORT_ID, port_num=METRICS_PORT_NUM): + ports[port_id] = ethereum_package_shared_utils.new_port_spec( + port_num, ethereum_package_shared_utils.TCP_PROTOCOL + ) + + +# configures the CLI flags and ports for a service using the standard op-service setup +def configure_op_service_metrics(cmd, ports): + cmd += [ + "--metrics.enabled", + "--metrics.addr=0.0.0.0", + "--metrics.port={0}".format(METRICS_PORT_NUM), + ] + + expose_metrics_port(ports) + + +def make_helper(observability_params): + return struct( + params=observability_params, + enabled=observability_params.enabled, + metrics_jobs=[], + ) + + +def add_metrics_job(helper, job): + helper.metrics_jobs.append(job) + + +def new_metrics_job( + job_name, + endpoint, + metrics_path, + labels, + scrape_interval=DEFAULT_SCRAPE_INTERVAL, +): + return { + "Name": job_name, + "Endpoint": endpoint, + "MetricsPath": metrics_path, + "Labels": labels, + "ScrapeInterval": scrape_interval, + } + + +def register_op_service_metrics_job(helper, service): + register_service_metrics_job( + helper, + service_name=service.name, + endpoint=make_metrics_url(service), + ) + + +def register_service_metrics_job( + helper, + service_name, + endpoint, + metrics_path="", + additional_labels={}, + scrape_interval=DEFAULT_SCRAPE_INTERVAL, +): + labels = { + "service": service_name, + } + labels.update(additional_labels) + + add_metrics_job( + helper, + new_metrics_job( + job_name=service_name, + endpoint=endpoint, + metrics_path=metrics_path, + labels=labels, + scrape_interval=scrape_interval, + ), + ) + + +def register_node_metrics_job( + helper, client_name, client_type, node_metrics_info, additional_labels={} +): + labels = { + "client_type": client_type, + "client_name": client_name, + } + labels.update(additional_labels) + + scrape_interval = DEFAULT_SCRAPE_INTERVAL + + additional_config = node_metrics_info[METRICS_INFO_ADDITIONAL_CONFIG_KEY] + + if additional_config != None: + if additional_config.labels != None: + labels.update(additional_config.labels) + + if ( + additional_config.scrape_interval != None + and additional_config.scrape_interval != "" + ): + scrape_interval = additional_config.scrape_interval + + register_service_metrics_job( + helper, + service_name=node_metrics_info[METRICS_INFO_NAME_KEY], + endpoint=node_metrics_info[METRICS_INFO_URL_KEY], + metrics_path=node_metrics_info[METRICS_INFO_PATH_KEY], + additional_labels=labels, + scrape_interval=scrape_interval, + ) diff --git a/src/observability/prometheus/prometheus_launcher.star b/src/observability/prometheus/prometheus_launcher.star new file mode 100644 index 00000000..121c8f37 --- /dev/null +++ b/src/observability/prometheus/prometheus_launcher.star @@ -0,0 +1,28 @@ +prometheus = import_module("github.com/kurtosis-tech/prometheus-package/main.star") + + +def launch_prometheus( + plan, + observability_helper, + global_node_selectors, +): + if len(observability_helper.metrics_jobs) == 0: + return None + + prometheus_params = observability_helper.params.prometheus_params + + prometheus_url = prometheus.run( + plan, + observability_helper.metrics_jobs, + "prometheus", + min_cpu=prometheus_params.min_cpu, + max_cpu=prometheus_params.max_cpu, + min_memory=prometheus_params.min_mem, + max_memory=prometheus_params.max_mem, + node_selectors=global_node_selectors, + storage_tsdb_retention_time=prometheus_params.storage_tsdb_retention_time, + storage_tsdb_retention_size=prometheus_params.storage_tsdb_retention_size, + image=prometheus_params.image, + ) + + return prometheus_url diff --git a/src/package_io/input_parser.star b/src/package_io/input_parser.star index f4a046ea..fddde4b7 100644 --- a/src/package_io/input_parser.star +++ b/src/package_io/input_parser.star @@ -62,6 +62,22 @@ def input_parser(plan, input_args): results["persistent"] = False return struct( + observability=struct( + enabled=results["observability"]["enabled"], + prometheus_params=struct( + image=results["observability"]["prometheus_params"]["image"], + storage_tsdb_retention_time=results["observability"][ + "prometheus_params" + ]["storage_tsdb_retention_time"], + storage_tsdb_retention_size=results["observability"][ + "prometheus_params" + ]["storage_tsdb_retention_size"], + min_cpu=results["observability"]["prometheus_params"]["min_cpu"], + max_cpu=results["observability"]["prometheus_params"]["max_cpu"], + min_mem=results["observability"]["prometheus_params"]["min_mem"], + max_mem=results["observability"]["prometheus_params"]["max_mem"], + ), + ), interop=struct( enabled=results["interop"]["enabled"], supervisor_params=struct( @@ -175,9 +191,19 @@ def input_parser(plan, input_args): def parse_network_params(plan, input_args): results = {} + # configure observability + + results["observability"] = default_observability_params() + results["observability"].update(input_args.get("observability", {})) + + results["observability"]["prometheus_params"] = default_prometheus_params() + results["observability"]["prometheus_params"].update( + input_args.get("observability", {}).get("prometheus_params", {}) + ) + # configure interop - results["interop"] = default_interop_args() + results["interop"] = default_interop_params() results["interop"].update(input_args.get("interop", {})) results["interop"]["supervisor_params"] = default_supervisor_params() @@ -302,9 +328,10 @@ def parse_network_params(plan, input_args): return results -def default_optimism_args(): +def default_optimism_params(): return { - "interop": default_interop_args(), + "observability": default_observability_params(), + "interop": default_interop_params(), "chains": default_chains(), "op_contract_deployer_params": default_op_contract_deployer_params(), "global_log_level": "info", @@ -314,7 +341,25 @@ def default_optimism_args(): } -def default_interop_args(): +def default_observability_params(): + return { + "enabled": True, + } + + +def default_prometheus_params(): + return { + "image": "prom/prometheus:latest", + "storage_tsdb_retention_time": "1d", + "storage_tsdb_retention_size": "512MB", + "min_cpu": 10, + "max_cpu": 1000, + "min_mem": 128, + "max_mem": 2048, + } + + +def default_interop_params(): return { "enabled": False, } @@ -367,14 +412,14 @@ def default_network_params(): def default_batcher_params(): return { - "image": "", + "image": DEFAULT_BATCHER_IMAGES["op-batcher"], "extra_params": [], } def default_challenger_params(): return { - "image": "", + "image": DEFAULT_CHALLENGER_IMAGES["op-challenger"], "extra_params": [], "cannon_prestate_path": "", "cannon_prestates_url": "https://storage.googleapis.com/oplabs-network-data/proofs/op-program/cannon", @@ -383,7 +428,7 @@ def default_challenger_params(): def default_proposer_params(): return { - "image": "", + "image": DEFAULT_PROPOSER_IMAGES["op-proposer"], "extra_params": [], "game_type": 1, "proposal_interval": "10m", diff --git a/src/package_io/sanity_check.star b/src/package_io/sanity_check.star index 8e5b6ea2..94fa4614 100644 --- a/src/package_io/sanity_check.star +++ b/src/package_io/sanity_check.star @@ -1,3 +1,18 @@ +OBSERVABILITY_PARAMS = [ + "enabled", + "prometheus_params", +] + +PROMETHEUS_PARAMS = [ + "image", + "storage_tsdb_retention_time", + "storage_tsdb_retention_size", + "min_cpu", + "max_cpu", + "min_mem", + "max_mem", +] + INTEROP_PARAMS = [ "enabled", "supervisor_params", @@ -84,6 +99,7 @@ ADDITIONAL_SERVICES_PARAMS = [ ] ROOT_PARAMS = [ + "observability", "interop", "chains", "op_contract_deployer_params", @@ -134,6 +150,22 @@ def sanity_check(plan, optimism_config): if key not in ROOT_PARAMS: fail("Invalid parameter {0}, allowed fields: {1}".format(key, ROOT_PARAMS)) + if "observability" in optimism_config: + validate_params( + plan, + optimism_config, + "observability", + OBSERVABILITY_PARAMS, + ) + + if "prometheus_params" in optimism_config["observability"]: + validate_params( + plan, + optimism_config["observability"], + "prometheus_params", + PROMETHEUS_PARAMS, + ) + if "interop" in optimism_config: validate_params( plan, diff --git a/src/participant_network.star b/src/participant_network.star index 37b79733..7abcdc2e 100644 --- a/src/participant_network.star +++ b/src/participant_network.star @@ -27,6 +27,7 @@ def launch_participant_network( global_tolerations, persistent, additional_services, + observability_helper, interop_params, ): num_participants = len(participants) @@ -46,6 +47,7 @@ def launch_participant_network( global_tolerations, persistent, additional_services, + observability_helper, interop_params, ) @@ -72,20 +74,17 @@ def launch_participant_network( "batcher-{0}".format(network_params.network_id), ".privateKey", ) - op_batcher_image = ( - batcher_params.image - if batcher_params.image != "" - else input_parser.DEFAULT_BATCHER_IMAGES["op-batcher"] - ) + op_batcher_launcher.launch( plan, "op-batcher-{0}".format(l2_services_suffix), - op_batcher_image, + batcher_params.image, all_el_contexts[0], all_cl_contexts[0], l1_config_env_vars, batcher_key, batcher_params, + observability_helper, ) game_factory_address = util.read_network_config_value( @@ -100,15 +99,11 @@ def launch_participant_network( "challenger-{0}".format(network_params.network_id), ".privateKey", ) - op_challenger_image = ( - challenger_params.image - if challenger_params.image != "" - else input_parser.DEFAULT_CHALLENGER_IMAGES["op-challenger"] - ) + op_challenger_launcher.launch( plan, "op-challenger-{0}".format(l2_services_suffix), - op_challenger_image, + challenger_params.image, all_el_contexts[0], all_cl_contexts[0], l1_config_env_vars, @@ -117,6 +112,7 @@ def launch_participant_network( deployment_output, network_params, challenger_params, + observability_helper, ) proposer_key = util.read_network_config_value( @@ -125,20 +121,17 @@ def launch_participant_network( "proposer-{0}".format(network_params.network_id), ".privateKey", ) - op_proposer_image = ( - proposer_params.image - if proposer_params.image != "" - else input_parser.DEFAULT_PROPOSER_IMAGES["op-proposer"] - ) + op_proposer_launcher.launch( plan, "op-proposer-{0}".format(l2_services_suffix), - op_proposer_image, + proposer_params.image, all_cl_contexts[0], l1_config_env_vars, proposer_key, game_factory_address, proposer_params, + observability_helper, ) return all_participants diff --git a/src/proposer/op-proposer/op_proposer_launcher.star b/src/proposer/op-proposer/op_proposer_launcher.star index c12fb34b..947604b8 100644 --- a/src/proposer/op-proposer/op_proposer_launcher.star +++ b/src/proposer/op-proposer/op_proposer_launcher.star @@ -6,6 +6,9 @@ ethereum_package_constants = import_module( "github.com/ethpandaops/ethereum-package/src/package_io/constants.star" ) +observability = import_module("../../observability/observability.star") +prometheus = import_module("../../observability/prometheus/prometheus_launcher.star") + # # ---------------------------------- Batcher client ------------------------------------- # The Docker container runs as the "op-proposer" user so we can't write to root @@ -41,6 +44,7 @@ def launch( gs_proposer_private_key, game_factory_address, proposer_params, + observability_helper, ): proposer_service_name = "{0}".format(service_name) @@ -53,6 +57,7 @@ def launch( gs_proposer_private_key, game_factory_address, proposer_params, + observability_helper, ) proposer_service = plan.add_service(service_name, config) @@ -62,6 +67,10 @@ def launch( proposer_service.ip_address, proposer_http_port.number ) + observability.register_op_service_metrics_job( + observability_helper, proposer_service + ) + return "op_proposer" @@ -74,7 +83,10 @@ def get_proposer_config( gs_proposer_private_key, game_factory_address, proposer_params, + observability_helper, ): + ports = dict(get_used_ports()) + cmd = [ "op-proposer", "--poll-interval=12s", @@ -89,9 +101,13 @@ def get_proposer_config( "--wait-node-sync=true", ] + # apply customizations + + if observability_helper.enabled: + observability.configure_op_service_metrics(cmd, ports) + cmd += proposer_params.extra_params - ports = get_used_ports() return ServiceConfig( image=image, ports=ports,