From 643632f962b86665801e1a2e3d5c39bbd4f2c5f1 Mon Sep 17 00:00:00 2001 From: Omid Azizi Date: Wed, 26 Jan 2022 14:45:03 -0800 Subject: [PATCH] Stirling Socket Tracer: Flags to control perf buffer sizes. Summary: Expose some flags so users can reduce the perf buffer sizes. This may be useful for a future low-memory mode. Test Plan: Rely on existing tests to validate the default values for now. Reviewers: #stirling, jamesbartlett, yzhao Reviewed By: #stirling, yzhao Subscribers: yzhao Signed-off-by: Omid Azizi Differential Revision: https://phab.corp.pixielabs.ai/D10599 GitOrigin-RevId: 9653025b05223b8f89eac03f0edf05d809f7ca1d --- src/stirling/bpf_tools/bcc_wrapper.cc | 6 +- .../socket_tracer/socket_trace_connector.cc | 136 ++++++++++++++---- .../socket_tracer/socket_trace_connector.h | 82 +---------- 3 files changed, 117 insertions(+), 107 deletions(-) diff --git a/src/stirling/bpf_tools/bcc_wrapper.cc b/src/stirling/bpf_tools/bcc_wrapper.cc index 64348b9e1ca..a60031bd504 100644 --- a/src/stirling/bpf_tools/bcc_wrapper.cc +++ b/src/stirling/bpf_tools/bcc_wrapper.cc @@ -329,9 +329,9 @@ Status BCCWrapper::OpenPerfBuffer(const PerfBufferSpec& perf_buffer, void* cb_co // Perf buffers must be sized to a power of 2. num_pages = IntRoundUpToPow2(num_pages); - VLOG(1) << absl::Substitute("Opening perf buffer: $0 [requested_size=$1 num_pages=$2 size=$3]", - perf_buffer.name, perf_buffer.size_bytes, num_pages, - num_pages * kPageSizeBytes); + LOG(INFO) << absl::Substitute( + "Opening perf buffer: $0 [requested_size=$1 num_pages=$2 size=$3] (per cpu)", + perf_buffer.name, perf_buffer.size_bytes, num_pages, num_pages * kPageSizeBytes); PL_RETURN_IF_ERROR(bpf_.open_perf_buffer(std::string(perf_buffer.name), perf_buffer.probe_output_fn, perf_buffer.probe_loss_fn, cb_cookie, num_pages)); diff --git a/src/stirling/source_connectors/socket_tracer/socket_trace_connector.cc b/src/stirling/source_connectors/socket_tracer/socket_trace_connector.cc index 6e0fe44d187..16725086516 100644 --- a/src/stirling/source_connectors/socket_tracer/socket_trace_connector.cc +++ b/src/stirling/source_connectors/socket_tracer/socket_trace_connector.cc @@ -93,6 +93,14 @@ DEFINE_bool(stirling_enable_mux_tracing, DEFINE_bool(stirling_disable_self_tracing, true, "If true, stirling will not trace and process syscalls made by itself."); +// Assume a moderate default network bandwidth peak of 100MiB/s across socket connections for data. +DEFINE_uint32(stirling_socket_tracer_target_data_bw_percpu, 100 * 1024 * 1024, + "Target bytes/sec of data per CPU"); + +// Assume a default of 5MiB/s across socket connections for control events. +DEFINE_uint32(stirling_socket_tracer_target_control_bw_percpu, 5 * 1024 * 1024, + "Target bytes/sec of control events per CPU"); + DEFINE_uint32(messages_expiration_duration_secs, 10 * 60, "The duration for which a cached message to be erased."); DEFINE_uint32(messages_size_limit_bytes, 1024 * 1024, @@ -190,37 +198,96 @@ void SocketTraceConnector::InitProtocolTransferSpecs() { } } -Status SocketTraceConnector::InitImpl() { - sampling_freq_mgr_.set_period(kSamplingPeriod); - push_freq_mgr_.set_period(kPushPeriod); +using ProbeType = bpf_tools::BPFProbeAttachType; +const auto kProbeSpecs = MakeArray( + {{"connect", ProbeType::kEntry, "syscall__probe_entry_connect"}, + {"connect", ProbeType::kReturn, "syscall__probe_ret_connect"}, + {"accept", ProbeType::kEntry, "syscall__probe_entry_accept"}, + {"accept", ProbeType::kReturn, "syscall__probe_ret_accept"}, + {"accept4", ProbeType::kEntry, "syscall__probe_entry_accept4"}, + {"accept4", ProbeType::kReturn, "syscall__probe_ret_accept4"}, + {"write", ProbeType::kEntry, "syscall__probe_entry_write"}, + {"write", ProbeType::kReturn, "syscall__probe_ret_write"}, + {"writev", ProbeType::kEntry, "syscall__probe_entry_writev"}, + {"writev", ProbeType::kReturn, "syscall__probe_ret_writev"}, + {"send", ProbeType::kEntry, "syscall__probe_entry_send"}, + {"send", ProbeType::kReturn, "syscall__probe_ret_send"}, + {"sendto", ProbeType::kEntry, "syscall__probe_entry_sendto"}, + {"sendto", ProbeType::kReturn, "syscall__probe_ret_sendto"}, + {"sendmsg", ProbeType::kEntry, "syscall__probe_entry_sendmsg"}, + {"sendmsg", ProbeType::kReturn, "syscall__probe_ret_sendmsg"}, + {"sendmmsg", ProbeType::kEntry, "syscall__probe_entry_sendmmsg"}, + {"sendmmsg", ProbeType::kReturn, "syscall__probe_ret_sendmmsg"}, + {"sendfile", ProbeType::kEntry, "syscall__probe_entry_sendfile"}, + {"sendfile", ProbeType::kReturn, "syscall__probe_ret_sendfile"}, + {"sendfile64", ProbeType::kEntry, "syscall__probe_entry_sendfile"}, + {"sendfile64", ProbeType::kReturn, "syscall__probe_ret_sendfile"}, + {"read", ProbeType::kEntry, "syscall__probe_entry_read"}, + {"read", ProbeType::kReturn, "syscall__probe_ret_read"}, + {"readv", ProbeType::kEntry, "syscall__probe_entry_readv"}, + {"readv", ProbeType::kReturn, "syscall__probe_ret_readv"}, + {"recv", ProbeType::kEntry, "syscall__probe_entry_recv"}, + {"recv", ProbeType::kReturn, "syscall__probe_ret_recv"}, + {"recvfrom", ProbeType::kEntry, "syscall__probe_entry_recvfrom"}, + {"recvfrom", ProbeType::kReturn, "syscall__probe_ret_recvfrom"}, + {"recvmsg", ProbeType::kEntry, "syscall__probe_entry_recvmsg"}, + {"recvmsg", ProbeType::kReturn, "syscall__probe_ret_recvmsg"}, + {"recvmmsg", ProbeType::kEntry, "syscall__probe_entry_recvmmsg"}, + {"recvmmsg", ProbeType::kReturn, "syscall__probe_ret_recvmmsg"}, + {"close", ProbeType::kEntry, "syscall__probe_entry_close"}, + {"close", ProbeType::kReturn, "syscall__probe_ret_close"}, + {"mmap", ProbeType::kEntry, "syscall__probe_entry_mmap"}, + {"sock_alloc", ProbeType::kReturn, "probe_ret_sock_alloc", /*is_syscall*/ false}, + {"security_socket_sendmsg", ProbeType::kEntry, "probe_entry_security_socket_sendmsg", + /*is_syscall*/ false}, + {"security_socket_recvmsg", ProbeType::kEntry, "probe_entry_security_socket_recvmsg", + /*is_syscall*/ false}}); + +auto SocketTraceConnector::InitPerfBufferSpecs() { + const double kSecondsPerPeriod = + std::chrono::duration_cast(kSamplingPeriod).count() / 1000.0; + const int kTargetDataBufferSize = + static_cast(FLAGS_stirling_socket_tracer_target_data_bw_percpu * kSecondsPerPeriod); + const int kTargetControlBufferSize = + static_cast(FLAGS_stirling_socket_tracer_target_data_bw_percpu * kSecondsPerPeriod); + + return MakeArray({ + // For data events. The order must be consistent with output tables. + {"socket_data_events", HandleDataEvent, HandleDataEventLoss, kTargetDataBufferSize}, + // For non-data events. Must not mix with the above perf buffers for data events. + {"socket_control_events", HandleControlEvent, HandleControlEventLoss, + kTargetControlBufferSize}, + {"conn_stats_events", HandleConnStatsEvent, HandleConnStatsEventLoss, + kTargetControlBufferSize}, + {"mmap_events", HandleMMapEvent, HandleMMapEventLoss, kTargetControlBufferSize}, + {"go_grpc_header_events", HandleHTTP2HeaderEvent, HandleHTTP2HeaderEventLoss, + kTargetDataBufferSize / 10}, + {"go_grpc_data_events", HandleHTTP2Data, HandleHTTP2DataLoss, kTargetDataBufferSize}, + }); +} - constexpr uint64_t kNanosPerSecond = 1000 * 1000 * 1000; - if (kNanosPerSecond % sysconfig_.KernelTicksPerSecond() != 0) { - return error::Internal( - "SC_CLK_TCK aka USER_HZ must be 100, otherwise our BPF code may not generate proper " - "timestamps in a way that matches how /proc/stat does it"); - } +Status SocketTraceConnector::InitBPF() { + // PROTOCOL_LIST: Requires update on new protocols. + std::vector defines = { + absl::StrCat("-DENABLE_HTTP_TRACING=", FLAGS_stirling_enable_http_tracing), + absl::StrCat("-DENABLE_CQL_TRACING=", FLAGS_stirling_enable_cass_tracing), + absl::StrCat("-DENABLE_MUX_TRACING=", FLAGS_stirling_enable_mux_tracing), + absl::StrCat("-DENABLE_PGSQL_TRACING=", FLAGS_stirling_enable_pgsql_tracing), + absl::StrCat("-DENABLE_MYSQL_TRACING=", FLAGS_stirling_enable_mysql_tracing), + absl::StrCat("-DENABLE_KAFKA_TRACING=", FLAGS_stirling_enable_kafka_tracing), + absl::StrCat("-DENABLE_DNS_TRACING=", FLAGS_stirling_enable_dns_tracing), + absl::StrCat("-DENABLE_REDIS_TRACING=", FLAGS_stirling_enable_redis_tracing), + absl::StrCat("-DENABLE_NATS_TRACING=", FLAGS_stirling_enable_nats_tracing), + absl::StrCat("-DENABLE_MUX_TRACING=", FLAGS_stirling_enable_mux_tracing), + absl::StrCat("-DENABLE_MONGO_TRACING=", "true"), + }; + PL_RETURN_IF_ERROR(InitBPFProgram(socket_trace_bcc_script, defines)); - PL_RETURN_IF_ERROR(InitBPFProgram( - socket_trace_bcc_script, - // PROTOCOL_LIST: Requires update on new protocols. - { - absl::StrCat("-DENABLE_HTTP_TRACING=", FLAGS_stirling_enable_http_tracing), - absl::StrCat("-DENABLE_CQL_TRACING=", FLAGS_stirling_enable_cass_tracing), - absl::StrCat("-DENABLE_MUX_TRACING=", FLAGS_stirling_enable_mux_tracing), - absl::StrCat("-DENABLE_PGSQL_TRACING=", FLAGS_stirling_enable_pgsql_tracing), - absl::StrCat("-DENABLE_MYSQL_TRACING=", FLAGS_stirling_enable_mysql_tracing), - absl::StrCat("-DENABLE_KAFKA_TRACING=", FLAGS_stirling_enable_kafka_tracing), - absl::StrCat("-DENABLE_DNS_TRACING=", FLAGS_stirling_enable_dns_tracing), - absl::StrCat("-DENABLE_REDIS_TRACING=", FLAGS_stirling_enable_redis_tracing), - absl::StrCat("-DENABLE_NATS_TRACING=", FLAGS_stirling_enable_nats_tracing), - absl::StrCat("-DENABLE_MUX_TRACING=", FLAGS_stirling_enable_mux_tracing), - absl::StrCat("-DENABLE_MONGO_TRACING=", "true"), - })); PL_RETURN_IF_ERROR(AttachKProbes(kProbeSpecs)); LOG(INFO) << absl::Substitute("Number of kprobes deployed = $0", kProbeSpecs.size()); LOG(INFO) << "Probes successfully deployed."; + const auto kPerfBufferSpecs = InitPerfBufferSpecs(); PL_RETURN_IF_ERROR(OpenPerfBuffers(kPerfBufferSpecs, this)); LOG(INFO) << absl::Substitute("Number of perf buffers opened = $0", kPerfBufferSpecs.size()); @@ -243,12 +310,27 @@ Status SocketTraceConnector::InitImpl() { SetupOutput(FLAGS_perf_buffer_events_output_path); } + return Status::OK(); +} + +Status SocketTraceConnector::InitImpl() { + sampling_freq_mgr_.set_period(kSamplingPeriod); + push_freq_mgr_.set_period(kPushPeriod); + + constexpr uint64_t kNanosPerSecond = 1000 * 1000 * 1000; + if (kNanosPerSecond % sysconfig_.KernelTicksPerSecond() != 0) { + return error::Internal( + "SC_CLK_TCK aka USER_HZ must be 100, otherwise our BPF code may not generate proper " + "timestamps in a way that matches how /proc/stat does it"); + } + + PL_RETURN_IF_ERROR(InitBPF()); + StatusOr> s = system::SocketInfoManager::Create(system::Config::GetInstance().proc_path(), system::kTCPEstablishedState | system::kTCPListeningState); if (!s.ok()) { - LOG(WARNING) << absl::Substitute("Failed to set up socket prober manager. Message: $0", - s.msg()); + LOG(WARNING) << absl::Substitute("Failed to set up SocketInfoManager. Message: $0", s.msg()); } else { socket_info_mgr_ = s.ConsumeValueOrDie(); } diff --git a/src/stirling/source_connectors/socket_tracer/socket_trace_connector.h b/src/stirling/source_connectors/socket_tracer/socket_trace_connector.h index b23102a431c..2acb48cf94c 100644 --- a/src/stirling/source_connectors/socket_tracer/socket_trace_connector.h +++ b/src/stirling/source_connectors/socket_tracer/socket_trace_connector.h @@ -62,6 +62,9 @@ DECLARE_bool(stirling_enable_mux_tracing); DECLARE_bool(stirling_disable_self_tracing); DECLARE_string(stirling_role_to_trace); +DECLARE_uint32(stirling_socket_tracer_target_data_bw_percpu); +DECLARE_uint32(stirling_socket_tracer_target_control_bw_percpu); + DECLARE_uint32(messages_expiration_duration_secs); DECLARE_uint32(messages_size_limit_bytes); @@ -145,82 +148,6 @@ class SocketTraceConnector : public SourceConnector, public bpf_tools::BCCWrappe static void HandleHTTP2Data(void* cb_cookie, void* data, int data_size); static void HandleHTTP2DataLoss(void* cb_cookie, uint64_t lost); - static constexpr auto kProbeSpecs = MakeArray( - {{"connect", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_connect"}, - {"connect", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_connect"}, - {"accept", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_accept"}, - {"accept", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_accept"}, - {"accept4", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_accept4"}, - {"accept4", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_accept4"}, - {"write", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_write"}, - {"write", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_write"}, - {"writev", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_writev"}, - {"writev", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_writev"}, - {"send", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_send"}, - {"send", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_send"}, - {"sendto", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_sendto"}, - {"sendto", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_sendto"}, - {"sendmsg", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_sendmsg"}, - {"sendmsg", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_sendmsg"}, - {"sendmmsg", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_sendmmsg"}, - {"sendmmsg", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_sendmmsg"}, - {"sendfile", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_sendfile"}, - {"sendfile", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_sendfile"}, - {"sendfile64", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_sendfile"}, - {"sendfile64", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_sendfile"}, - {"read", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_read"}, - {"read", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_read"}, - {"readv", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_readv"}, - {"readv", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_readv"}, - {"recv", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_recv"}, - {"recv", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_recv"}, - {"recvfrom", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_recvfrom"}, - {"recvfrom", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_recvfrom"}, - {"recvmsg", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_recvmsg"}, - {"recvmsg", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_recvmsg"}, - {"recvmmsg", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_recvmmsg"}, - {"recvmmsg", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_recvmmsg"}, - {"close", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_close"}, - {"close", bpf_tools::BPFProbeAttachType::kReturn, "syscall__probe_ret_close"}, - {"mmap", bpf_tools::BPFProbeAttachType::kEntry, "syscall__probe_entry_mmap"}, - {"sock_alloc", bpf_tools::BPFProbeAttachType::kReturn, "probe_ret_sock_alloc", - /*is_syscall*/ false}, - {"security_socket_sendmsg", bpf_tools::BPFProbeAttachType::kEntry, - "probe_entry_security_socket_sendmsg", - /*is_syscall*/ false}, - {"security_socket_recvmsg", bpf_tools::BPFProbeAttachType::kEntry, - "probe_entry_security_socket_recvmsg", - /*is_syscall*/ false}}); - - // TODO(oazizi): Remove send and recv probes once we are confident that they don't trace anything. - // Note that send/recv are not in the syscall table - // (https://filippo.io/linux-syscall-table/), but are defined as SYSCALL_DEFINE4 in - // https://elixir.bootlin.com/linux/latest/source/net/socket.c. - - // Assume a moderate network bandwidth peak of 100MiB/s across socket connections for data. - inline static constexpr int64_t kTargetDataBytesPerSec = 100 * 1024 * 1024; - inline static constexpr int64_t kTargetDataBufferSize = - kTargetDataBytesPerSec * kSamplingPeriod.count() / 1000; - - // Assume a 5MiB/s across socket connections for control events. - inline static constexpr int64_t kTargetControlBytesPerSec = 5 * 1024 * 1024; - inline static constexpr int64_t kTargetControlBufferSize = - kTargetControlBytesPerSec * kSamplingPeriod.count() / 1000; - - inline static const auto kPerfBufferSpecs = MakeArray({ - // For data events. The order must be consistent with output tables. - {"socket_data_events", HandleDataEvent, HandleDataEventLoss, kTargetDataBufferSize}, - // For non-data events. Must not mix with the above perf buffers for data events. - {"socket_control_events", HandleControlEvent, HandleControlEventLoss, - kTargetControlBufferSize}, - {"conn_stats_events", HandleConnStatsEvent, HandleConnStatsEventLoss, - kTargetControlBufferSize}, - {"mmap_events", HandleMMapEvent, HandleMMapEventLoss, kTargetControlBufferSize}, - {"go_grpc_header_events", HandleHTTP2HeaderEvent, HandleHTTP2HeaderEventLoss, - kTargetDataBufferSize / 10}, - {"go_grpc_data_events", HandleHTTP2Data, HandleHTTP2DataLoss, kTargetDataBufferSize}, - }); - // Most HTTP servers support 8K headers, so we truncate after that. // https://stackoverflow.com/questions/686217/maximum-on-http-header-values inline static constexpr size_t kMaxHTTPHeadersBytes = 8192; @@ -230,7 +157,8 @@ class SocketTraceConnector : public SourceConnector, public bpf_tools::BCCWrappe explicit SocketTraceConnector(std::string_view source_name); - // Initialize protocol_transfer_specs_. + Status InitBPF(); + auto InitPerfBufferSpecs(); void InitProtocolTransferSpecs(); ConnTracker& GetOrCreateConnTracker(struct conn_id_t conn_id);