Skip to content

Commit

Permalink
[Add Local Addr & Port 1/3] Capture local addr from socket for accept…
Browse files Browse the repository at this point in the history
… syscalls (#1808)

Summary: Capture local address from socket if present for accept
syscalls. This will support standalone pem entity relationships.

Note that for syscalls without socket information (e.g. `connect`), we
are currently unable to trace the local IP from bpf. For cases where we
fail to trace connect/accept calls, we do try to parse the local address
from socket information via `InferConnInfo`.

Type of change: /kind feature

Test Plan: Ran standalone pem on dev cluster and tracked local address
inference from bpf. Ran PxL script for http events.

---------

Signed-off-by: Benjamin Kilimnik <[email protected]>
  • Loading branch information
benkilimnik authored Jan 24, 2024
1 parent ecb5353 commit 0ec2c3a
Show file tree
Hide file tree
Showing 11 changed files with 110 additions and 72 deletions.
42 changes: 26 additions & 16 deletions src/stirling/source_connectors/socket_tracer/bcc_bpf/socket_trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ static __inline void init_conn_info(uint32_t tgid, int32_t fd, struct conn_info_
init_conn_id(tgid, fd, &conn_info->conn_id);
// NOTE: BCC code defaults to 0, because kRoleUnknown is not 0, must explicitly initialize.
conn_info->role = kRoleUnknown;
conn_info->addr.sa.sa_family = PX_AF_UNKNOWN;
conn_info->laddr.sa.sa_family = PX_AF_UNKNOWN;
conn_info->raddr.sa.sa_family = PX_AF_UNKNOWN;
}

// Be careful calling this function. The automatic creation of BPF map entries can result in a
Expand Down Expand Up @@ -227,7 +228,8 @@ static __inline struct conn_stats_event_t* fill_conn_stats_event(
}

event->conn_id = conn_info->conn_id;
event->addr = conn_info->addr;
event->laddr = conn_info->laddr;
event->raddr = conn_info->raddr;
event->role = conn_info->role;
event->wr_bytes = conn_info->wr_bytes;
event->rd_bytes = conn_info->rd_bytes;
Expand All @@ -251,7 +253,7 @@ static __inline bool should_trace_conn(struct conn_info_t* conn_info) {
// we only send connections on INET or UNKNOWN to user-space.
// Also, it's very important to send the UNKNOWN cases to user-space,
// otherwise we may have a BPF map leak from the earlier call to get_or_create_conn_info().
return should_trace_sockaddr_family(conn_info->addr.sa.sa_family);
return should_trace_sockaddr_family(conn_info->raddr.sa.sa_family);
}

// If this returns false, we still will trace summary stats.
Expand Down Expand Up @@ -351,19 +353,26 @@ static __inline void read_sockaddr_kernel(struct conn_info_t* conn_info,

struct sock_common* sk_common = &sk->__sk_common;
uint16_t family = -1;
uint16_t port = -1;
uint16_t lport = -1;
uint16_t rport = -1;

BPF_PROBE_READ_KERNEL_VAR(family, &sk_common->skc_family);
BPF_PROBE_READ_KERNEL_VAR(port, &sk_common->skc_dport);
BPF_PROBE_READ_KERNEL_VAR(lport, &sk_common->skc_num);
BPF_PROBE_READ_KERNEL_VAR(rport, &sk_common->skc_dport);

conn_info->addr.sa.sa_family = family;
conn_info->laddr.sa.sa_family = family;
conn_info->raddr.sa.sa_family = family;

if (family == AF_INET) {
conn_info->addr.in4.sin_port = port;
BPF_PROBE_READ_KERNEL_VAR(conn_info->addr.in4.sin_addr.s_addr, &sk_common->skc_daddr);
conn_info->laddr.in4.sin_port = lport;
conn_info->raddr.in4.sin_port = rport;
BPF_PROBE_READ_KERNEL_VAR(conn_info->laddr.in4.sin_addr.s_addr, &sk_common->skc_rcv_saddr);
BPF_PROBE_READ_KERNEL_VAR(conn_info->raddr.in4.sin_addr.s_addr, &sk_common->skc_daddr);
} else if (family == AF_INET6) {
conn_info->addr.in6.sin6_port = port;
BPF_PROBE_READ_KERNEL_VAR(conn_info->addr.in6.sin6_addr, &sk_common->skc_v6_daddr);
conn_info->laddr.in6.sin6_port = lport;
conn_info->raddr.in6.sin6_port = rport;
BPF_PROBE_READ_KERNEL_VAR(conn_info->laddr.in6.sin6_addr, &sk_common->skc_v6_rcv_saddr);
BPF_PROBE_READ_KERNEL_VAR(conn_info->raddr.in6.sin6_addr, &sk_common->skc_v6_daddr);
}
}

Expand All @@ -372,10 +381,10 @@ static __inline void submit_new_conn(struct pt_regs* ctx, uint32_t tgid, int32_t
enum endpoint_role_t role, enum source_function_t source_fn) {
struct conn_info_t conn_info = {};
init_conn_info(tgid, fd, &conn_info);
if (addr != NULL) {
conn_info.addr = *((union sockaddr_t*)addr);
} else if (socket != NULL) {
if (socket != NULL) {
read_sockaddr_kernel(&conn_info, socket);
} else if (addr != NULL) {
conn_info.raddr = *((union sockaddr_t*)addr);
}
conn_info.role = role;

Expand All @@ -385,7 +394,7 @@ static __inline void submit_new_conn(struct pt_regs* ctx, uint32_t tgid, int32_t
// While we keep all sa_family types in conn_info_map,
// we only send connections with supported protocols to user-space.
// We use the same filter function to avoid sending data of unwanted connections as well.
if (!should_trace_sockaddr_family(conn_info.addr.sa.sa_family)) {
if (!should_trace_sockaddr_family(conn_info.raddr.sa.sa_family)) {
return;
}

Expand All @@ -394,7 +403,8 @@ static __inline void submit_new_conn(struct pt_regs* ctx, uint32_t tgid, int32_t
control_event.timestamp_ns = bpf_ktime_get_ns();
control_event.conn_id = conn_info.conn_id;
control_event.source_fn = source_fn;
control_event.open.addr = conn_info.addr;
control_event.open.raddr = conn_info.raddr;
control_event.open.laddr = conn_info.laddr;
control_event.open.role = conn_info.role;

socket_control_events.perf_submit(ctx, &control_event, sizeof(struct socket_control_event_t));
Expand Down Expand Up @@ -923,7 +933,7 @@ static __inline void process_syscall_close(struct pt_regs* ctx, uint64_t id,

// Only submit event to user-space if there was a corresponding open or data event reported.
// This is to avoid polluting the perf buffer.
if (should_trace_sockaddr_family(conn_info->addr.sa.sa_family) || conn_info->wr_bytes != 0 ||
if (should_trace_sockaddr_family(conn_info->raddr.sa.sa_family) || conn_info->wr_bytes != 0 ||
conn_info->rd_bytes != 0) {
submit_close_event(ctx, conn_info, kSyscallClose);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,10 @@ struct conn_info_t {
// Connection identifier (PID, FD, etc.).
struct conn_id_t conn_id;

// IP address of the local endpoint.
union sockaddr_t laddr;
// IP address of the remote endpoint.
union sockaddr_t addr;
union sockaddr_t raddr;

// The protocol of traffic on the connection (HTTP, MySQL, etc.).
enum traffic_protocol_t protocol;
Expand Down Expand Up @@ -92,7 +94,8 @@ struct conn_info_t {
// This struct is a subset of conn_info_t. It is used to communicate connect/accept events.
// See conn_info_t for descriptions of the members.
struct conn_event_t {
union sockaddr_t addr;
union sockaddr_t laddr;
union sockaddr_t raddr;
enum endpoint_role_t role;
};

Expand Down Expand Up @@ -222,8 +225,10 @@ struct conn_stats_event_t {

struct conn_id_t conn_id;

// IP address of the local endpoint.
union sockaddr_t laddr;
// IP address of the remote endpoint.
union sockaddr_t addr;
union sockaddr_t raddr;

// The server-client role.
enum endpoint_role_t role;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ inline std::string ToString(const close_event_t& event) {

inline std::string ToString(const conn_event_t& event) {
return absl::Substitute("[addr=$0]",
::px::ToString(reinterpret_cast<const struct sockaddr*>(&event.addr)));
::px::ToString(reinterpret_cast<const struct sockaddr*>(&event.raddr)));
}

inline std::string ToString(const socket_control_event_t& event) {
Expand Down
50 changes: 25 additions & 25 deletions src/stirling/source_connectors/socket_tracer/conn_stats_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ TEST_F(ConnStatsTest, Basic) {
conn_stats_event.timestamp_ns = 0;
conn_stats_event.conn_id = kConnID0;
conn_stats_event.role = kRoleClient;
conn_stats_event.addr.in4.sin_family = AF_INET;
conn_stats_event.addr.in4.sin_port = htons(80);
conn_stats_event.addr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn_stats_event.raddr.in4.sin_family = AF_INET;
conn_stats_event.raddr.in4.sin_port = htons(80);
conn_stats_event.raddr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn_stats_event.conn_events = 0;
conn_stats_event.rd_bytes = 0;
conn_stats_event.wr_bytes = 0;
Expand Down Expand Up @@ -171,9 +171,9 @@ TEST_F(ConnStatsTest, ServerSide) {
conn0_stats_event.timestamp_ns = 0;
conn0_stats_event.conn_id = kConnID0;
conn0_stats_event.role = kRoleServer;
conn0_stats_event.addr.in4.sin_family = AF_INET;
conn0_stats_event.addr.in4.sin_port = 54321;
conn0_stats_event.addr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn0_stats_event.raddr.in4.sin_family = AF_INET;
conn0_stats_event.raddr.in4.sin_port = 54321;
conn0_stats_event.raddr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn0_stats_event.conn_events = 0;
conn0_stats_event.rd_bytes = 0;
conn0_stats_event.wr_bytes = 0;
Expand All @@ -199,9 +199,9 @@ TEST_F(ConnStatsTest, ServerSide) {
conn1_stats_event.timestamp_ns = 0;
conn1_stats_event.conn_id = kConnID1;
conn1_stats_event.role = kRoleServer;
conn1_stats_event.addr.in4.sin_family = AF_INET;
conn1_stats_event.addr.in4.sin_port = 65432;
conn1_stats_event.addr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn1_stats_event.raddr.in4.sin_family = AF_INET;
conn1_stats_event.raddr.in4.sin_port = 65432;
conn1_stats_event.raddr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn1_stats_event.conn_events = 0;
conn1_stats_event.rd_bytes = 0;
conn1_stats_event.wr_bytes = 0;
Expand All @@ -227,9 +227,9 @@ TEST_F(ConnStatsTest, ServerSide) {
conn2_stats_event.timestamp_ns = 0;
conn2_stats_event.conn_id = kConnID3;
conn2_stats_event.role = kRoleServer;
conn2_stats_event.addr.in4.sin_family = AF_INET;
conn2_stats_event.addr.in4.sin_port = 12345;
conn2_stats_event.addr.in4.sin_addr.s_addr = 0x02020202; // 2.2.2.2
conn2_stats_event.raddr.in4.sin_family = AF_INET;
conn2_stats_event.raddr.in4.sin_port = 12345;
conn2_stats_event.raddr.in4.sin_addr.s_addr = 0x02020202; // 2.2.2.2
conn2_stats_event.conn_events = 0;
conn2_stats_event.rd_bytes = 0;
conn2_stats_event.wr_bytes = 0;
Expand Down Expand Up @@ -263,9 +263,9 @@ TEST_F(ConnStatsTest, ClientSide) {
conn0_stats_event.timestamp_ns = 0;
conn0_stats_event.conn_id = kConnID0;
conn0_stats_event.role = kRoleClient;
conn0_stats_event.addr.in4.sin_family = AF_INET;
conn0_stats_event.addr.in4.sin_port = htons(80);
conn0_stats_event.addr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn0_stats_event.raddr.in4.sin_family = AF_INET;
conn0_stats_event.raddr.in4.sin_port = htons(80);
conn0_stats_event.raddr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn0_stats_event.conn_events = 0;
conn0_stats_event.rd_bytes = 0;
conn0_stats_event.wr_bytes = 0;
Expand All @@ -291,9 +291,9 @@ TEST_F(ConnStatsTest, ClientSide) {
conn1_stats_event.timestamp_ns = 0;
conn1_stats_event.conn_id = kConnID1;
conn1_stats_event.role = kRoleClient;
conn1_stats_event.addr.in4.sin_family = AF_INET;
conn1_stats_event.addr.in4.sin_port = htons(80);
conn1_stats_event.addr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn1_stats_event.raddr.in4.sin_family = AF_INET;
conn1_stats_event.raddr.in4.sin_port = htons(80);
conn1_stats_event.raddr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn1_stats_event.conn_events = 0;
conn1_stats_event.rd_bytes = 0;
conn1_stats_event.wr_bytes = 0;
Expand All @@ -319,9 +319,9 @@ TEST_F(ConnStatsTest, ClientSide) {
conn2_stats_event.timestamp_ns = 0;
conn2_stats_event.conn_id = kConnID3;
conn2_stats_event.role = kRoleClient;
conn2_stats_event.addr.in4.sin_family = AF_INET;
conn2_stats_event.addr.in4.sin_port = htons(21);
conn2_stats_event.addr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn2_stats_event.raddr.in4.sin_family = AF_INET;
conn2_stats_event.raddr.in4.sin_port = htons(21);
conn2_stats_event.raddr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn2_stats_event.conn_events = 0;
conn2_stats_event.rd_bytes = 0;
conn2_stats_event.wr_bytes = 0;
Expand Down Expand Up @@ -350,7 +350,7 @@ TEST_F(ConnStatsTest, NoEventsIfNoRemoteAddr) {
conn_stats_event.timestamp_ns = 0;
conn_stats_event.conn_id = kConnID0;
conn_stats_event.role = kRoleClient;
conn_stats_event.addr.in4.sin_family = PX_AF_UNKNOWN;
conn_stats_event.raddr.in4.sin_family = PX_AF_UNKNOWN;
conn_stats_event.conn_events = 0;
conn_stats_event.rd_bytes = 0;
conn_stats_event.wr_bytes = 0;
Expand All @@ -377,9 +377,9 @@ TEST_F(ConnStatsTest, DisabledConnTracker) {
conn_stats_event.timestamp_ns = 0;
conn_stats_event.conn_id = kConnID0;
conn_stats_event.role = kRoleClient;
conn_stats_event.addr.in4.sin_family = AF_INET;
conn_stats_event.addr.in4.sin_port = htons(80);
conn_stats_event.addr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn_stats_event.raddr.in4.sin_family = AF_INET;
conn_stats_event.raddr.in4.sin_port = htons(80);
conn_stats_event.raddr.in4.sin_addr.s_addr = 0x01010101; // 1.1.1.1
conn_stats_event.conn_events = 0;
conn_stats_event.rd_bytes = 0;
conn_stats_event.wr_bytes = 0;
Expand Down
40 changes: 30 additions & 10 deletions src/stirling/source_connectors/socket_tracer/conn_tracker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ void ConnTracker::AddConnOpenEvent(const socket_control_event_t& event) {
}
open_info_.timestamp_ns = event.timestamp_ns;

SetRemoteAddr(event.open.addr, "Inferred from conn_open.");
SetRemoteAddr(event.open.raddr, "Inferred from conn_open.");
SetLocalAddr(event.open.laddr, "Inferred from conn_open.");

SetRole(event.open.role, "Inferred from conn_open.");

Expand Down Expand Up @@ -192,7 +193,8 @@ void UpdateProtocolMetrics(traffic_protocol_t protocol, const conn_stats_event_t

void ConnTracker::AddConnStats(const conn_stats_event_t& event) {
SetRole(event.role, "inferred from conn_stats event");
SetRemoteAddr(event.addr, "conn_stats event");
SetRemoteAddr(event.raddr, "conn_stats event");
SetLocalAddr(event.laddr, "conn_stats event");
UpdateTimestamps(event.timestamp_ns);

CONN_TRACE(1) << absl::Substitute("ConnStats timestamp=$0 wr=$1 rd=$2 close=$3",
Expand Down Expand Up @@ -476,6 +478,17 @@ void ConnTracker::SetRemoteAddr(const union sockaddr_t addr, std::string_view re
}
}

void ConnTracker::SetLocalAddr(const union sockaddr_t addr, std::string_view reason) {
if (open_info_.local_addr.family == SockAddrFamily::kUnspecified) {
PopulateSockAddr(&addr.sa, &open_info_.local_addr);
if (addr.sa.sa_family == PX_AF_UNKNOWN) {
open_info_.local_addr.family = SockAddrFamily::kUnspecified;
}
CONN_TRACE(1) << absl::Substitute("LocalAddr updated $0, reason=[$1]",
open_info_.local_addr.AddrStr(), reason);
}
}

bool ConnTracker::SetRole(endpoint_role_t role, std::string_view reason) {
// Don't allow changing active role, unless it is from unknown to something else.
if (role_ != kRoleUnknown) {
Expand Down Expand Up @@ -782,8 +795,8 @@ void ConnTracker::IterationPreTick(
return;
}

// If remote_addr is missing, it means the connect/accept was not traced.
// Attempt to infer the connection information, to populate remote_addr.
// If remote_addr is missing, it means the connect/accept syscall was not traced.
// Attempt to infer the connection information, to populate remote_addr and local_addr.
if (open_info_.remote_addr.family == SockAddrFamily::kUnspecified && socket_info_mgr != nullptr) {
InferConnInfo(proc_parser, socket_info_mgr);

Expand Down Expand Up @@ -888,19 +901,26 @@ double ConnTracker::StitchFailureRate() const {

namespace {

Status ParseSocketInfoRemoteAddr(const system::SocketInfo& socket_info, SockAddr* addr) {
Status ParseSocketInfoAddr(const system::SocketInfo& socket_info, SockAddr* remote_addr,
SockAddr* local_addr) {
switch (socket_info.family) {
case AF_INET:
PopulateInetAddr(std::get<struct in_addr>(socket_info.remote_addr), socket_info.remote_port,
addr);
remote_addr);
PopulateInetAddr(std::get<struct in_addr>(socket_info.local_addr), socket_info.local_port,
local_addr);
break;
case AF_INET6:
PopulateInet6Addr(std::get<struct in6_addr>(socket_info.remote_addr), socket_info.remote_port,
addr);
remote_addr);
PopulateInet6Addr(std::get<struct in6_addr>(socket_info.local_addr), socket_info.local_port,
local_addr);
break;
case AF_UNIX:
PopulateUnixAddr(std::get<struct un_path_t>(socket_info.remote_addr).path,
socket_info.remote_port, addr);
socket_info.remote_port, remote_addr);
PopulateUnixAddr(std::get<struct un_path_t>(socket_info.local_addr).path,
socket_info.local_port, local_addr);
break;
default:
return error::Internal("Unknown socket_info family: $0", socket_info.family);
Expand Down Expand Up @@ -1004,11 +1024,11 @@ void ConnTracker::InferConnInfo(system::ProcParser* proc_parser,

// Success! Now copy the inferred socket information into the ConnTracker.

Status s = ParseSocketInfoRemoteAddr(socket_info, &open_info_.remote_addr);
Status s = ParseSocketInfoAddr(socket_info, &open_info_.remote_addr, &open_info_.local_addr);
if (!s.ok()) {
conn_resolver_.reset();
conn_resolution_failed_ = true;
LOG(ERROR) << absl::Substitute("Remote address (type=$0) parsing failed. Message: $1",
LOG(ERROR) << absl::Substitute("Remote and local address (type=$0) parsing failed. Message: $1",
socket_info.family, s.msg());
return;
}
Expand Down
2 changes: 2 additions & 0 deletions src/stirling/source_connectors/socket_tracer/conn_tracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ struct SocketOpen {
uint64_t timestamp_ns = 0;
// TODO(yzhao): Consider using std::optional to indicate the address has not been initialized.
SockAddr remote_addr;
SockAddr local_addr;
};

struct SocketClose {
Expand Down Expand Up @@ -577,6 +578,7 @@ class ConnTracker : NotCopyMoveable {
void SetConnID(struct conn_id_t conn_id);

void SetRemoteAddr(const union sockaddr_t addr, std::string_view reason);
void SetLocalAddr(const union sockaddr_t addr, std::string_view reason);

// Returns false if the protocol change is disallowed.
bool SetProtocol(traffic_protocol_t protocol, std::string_view reason);
Expand Down
Loading

0 comments on commit 0ec2c3a

Please sign in to comment.