diff --git a/src/stirling/source_connectors/socket_tracer/bcc_bpf/socket_trace.c b/src/stirling/source_connectors/socket_tracer/bcc_bpf/socket_trace.c
index ba5df955dde..f51260d2108 100644
--- a/src/stirling/source_connectors/socket_tracer/bcc_bpf/socket_trace.c
+++ b/src/stirling/source_connectors/socket_tracer/bcc_bpf/socket_trace.c
@@ -35,8 +35,8 @@
 #include "src/stirling/upid/upid.h"
 
 // This keeps instruction count below BPF's limit of 4096 per probe.
-#define LOOP_LIMIT 42
 #define PROTOCOL_VEC_LIMIT 3
+#define MAX_FILLER_SIZE (1 * 1024 * 1024)  // 1MiB, taken from socket_trace.hpp
 
 const int32_t kInvalidFD = -1;
 
@@ -476,6 +476,10 @@ static __inline void perf_submit_buf(struct pt_regs* ctx, const enum traffic_dir
   } else if (buf_size_minus_1 < 0x7fffffff) {
     // If-statement condition above is only required to prevent clang from optimizing
     // away the `if (amount_copied > 0)` below.
+
+    // Here we truncate an iovec to MAX_MSG_SIZE (30KiB), then in user space we add a filler
+    // event if msg_size (size of this iovec) > msg_buf_size. If this differences > our filler max
+    // of 1MiB, then we push an event with a gap to the datastream buffer.
     bpf_probe_read(&event->msg, MAX_MSG_SIZE, buf);
     amount_copied = MAX_MSG_SIZE;
   }
@@ -483,6 +487,23 @@ static __inline void perf_submit_buf(struct pt_regs* ctx, const enum traffic_dir
   // If-statement is redundant, but is required to keep the 4.14 verifier happy.
   if (amount_copied > 0) {
     event->attr.msg_buf_size = amount_copied;
+    // bytes_missed should be 0 if we didn't truncate amount_copied to MAX_MSG_SIZE above.
+    // Note that perf_submit_buf won't correctly set bytes_missed for perf_submit_iovecs
+    // when bytes_remaining > iov_size and we've reached the loop limit
+    // bc it takes only the size of the current iovec into account
+    // and not the bytes remaining across all iovecs which we drop due to the loop limit.
+    // In those cases we rely on the value set in perf_submit_iovecs.
+
+    // For older kernels < 4.14, we can't record gap metadata without exceeding the instruction
+    // limit.
+    if (LOOP_LIMIT > 42 || CHUNK_LIMIT > 4) {
+      if (event->attr.incomplete_chunk != kExceededLoopLimit) {
+        event->attr.bytes_missed = event->attr.msg_size - event->attr.msg_buf_size;
+      }
+      if (event->attr.bytes_missed > 0 && event->attr.incomplete_chunk == kFullyFormed) {
+        event->attr.incomplete_chunk = kUnknownGapReason;
+      }
+    }
     socket_data_events.perf_submit(ctx, event, sizeof(event->attr) + amount_copied);
   }
 }
@@ -493,12 +514,23 @@ static __inline void perf_submit_wrapper(struct pt_regs* ctx,
                                          struct socket_data_event_t* event) {
   int bytes_sent = 0;
   unsigned int i;
-
+  event->attr.incomplete_chunk = kFullyFormed;
+  event->attr.bytes_missed = 0;
 #pragma unroll
   for (i = 0; i < CHUNK_LIMIT; ++i) {
     const int bytes_remaining = buf_size - bytes_sent;
     const size_t current_size =
         (bytes_remaining > MAX_MSG_SIZE && (i != CHUNK_LIMIT - 1)) ? MAX_MSG_SIZE : bytes_remaining;
+    // For older kernels < 5.1, we can't record gap metadata without exceeding the instruction
+    // limit.
+    if (LOOP_LIMIT > 42 || CHUNK_LIMIT > 4) {
+      // Check if we have reached the chunk limit, but there are bytes left to capture beyond our
+      // max msg size.
+      const bool chunks_not_fully_captured = i == CHUNK_LIMIT - 1 && current_size > MAX_MSG_SIZE;
+      if (chunks_not_fully_captured) {
+        event->attr.incomplete_chunk = kExceededChunkLimitAndMaxMsgSize;
+      }
+    }
     perf_submit_buf(ctx, direction, buf + bytes_sent, current_size, conn_info, event);
     bytes_sent += current_size;
 
@@ -516,15 +548,41 @@ static __inline void perf_submit_iovecs(struct pt_regs* ctx,
   // array order. That means they read or fill iov[0], then iov[1], and so on. They return the total
   // size of the written or read data. Therefore, when loop through the buffers, both the number of
   // buffers and the total size need to be checked. More details can be found on their man pages.
+  event->attr.incomplete_chunk = kFullyFormed;
+  event->attr.bytes_missed = 0;
   int bytes_sent = 0;
+  unsigned int i;
 #pragma unroll
-  for (int i = 0; i < LOOP_LIMIT && i < iovlen && bytes_sent < total_size; ++i) {
+  for (i = 0; i < LOOP_LIMIT && i < iovlen && bytes_sent < total_size; ++i) {
     struct iovec iov_cpy;
     BPF_PROBE_READ_VAR(iov_cpy, &iov[i]);
-
+    // total bytes we have left to copy across all iovecs
     const int bytes_remaining = total_size - bytes_sent;
+    // bytes contained in this iovec (either bytes we have left or the size of the iovec, whichever
+    // is smaller) This can be >MAX_MSG_SIZE and thus truncated in perf_submit_buf
     const size_t iov_size = min_size_t(iov_cpy.iov_len, bytes_remaining);
 
+    // For older kernels < 5.1, we can't record gap metadata without exceeding the instruction
+    // limit.
+    if (LOOP_LIMIT > 42 || CHUNK_LIMIT > 4) {
+      // We have reached the loop limit, but there are iovecs left to capture.
+      const bool iovec_not_fully_captured = i == LOOP_LIMIT - 1 && i < iovlen;
+      // This iov exceeds the MAX_MSG_SIZE, and will be truncated in perf_submit_buf.
+      const bool iov_size_exceeds_max_msg_size = iov_size > MAX_MSG_SIZE;
+
+      if (iovec_not_fully_captured && iov_size_exceeds_max_msg_size) {
+        event->attr.incomplete_chunk = kExceededLoopLimitAndMaxMsgSize;
+      } else if (iovec_not_fully_captured) {
+        event->attr.incomplete_chunk = kExceededLoopLimit;
+        // perf_submit_buf won't correctly set bytes_missed for perf_submit_iovecs
+        // if bytes_remaining > iov_size and we've reached the loop limit
+        // because it takes only the size of the current iovec into account
+        // see min(iov_len, bytes_remaining) above.
+        event->attr.bytes_missed = bytes_remaining - iov_size;
+      } else if (iov_size_exceeds_max_msg_size) {
+        event->attr.incomplete_chunk = kIovSizeExceededMaxMsgSize;
+      }
+    }
     // TODO(oazizi/yzhao): Should switch this to go through perf_submit_wrapper.
     //                     We don't have the BPF instruction count to do so right now.
     perf_submit_buf(ctx, direction, iov_cpy.iov_base, iov_size, conn_info, event);
@@ -533,9 +591,6 @@ static __inline void perf_submit_iovecs(struct pt_regs* ctx,
     // Move the position for the next event.
     event->attr.pos += iov_size;
   }
-
-  // TODO(oazizi): If there is data left after the loop limit, we should still report the remainder
-  //               with a data-less event.
 }
 
 /***********************************************************
@@ -887,6 +942,22 @@ static __inline void process_syscall_sendfile(struct pt_regs* ctx, uint64_t id,
     }
 
     event->attr.pos = conn_info->wr_bytes;
+    // For older kernels < 5.1, we can't record gap metadata without exceeding the instruction
+    // limit.
+    if (LOOP_LIMIT > 42 || CHUNK_LIMIT > 4) {
+      // technically we drop all the data and just send the gap event, filling the gap with \0 bytes
+      // up to 1MB
+      if (bytes_count > MAX_FILLER_SIZE) {
+        // if we exceed the max filler size (1MB), we'll create a gap in the data stream buffer
+        event->attr.incomplete_chunk = kSendFileExceededMaxFillerSize;
+      } else {
+        // If we don't exceed max filler size for this sendfile, we record a complete
+        // kSendFile to keep track of in our metrics. If filler is enabled (lazy parsing off)
+        // we will allocate a filler event in user space to fill the gap left by an empty sendfile.
+        event->attr.incomplete_chunk = kSendFile;
+      }
+      event->attr.bytes_missed = bytes_count;
+    }
     event->attr.msg_size = bytes_count;
     event->attr.msg_buf_size = 0;
     socket_data_events.perf_submit(ctx, event, sizeof(event->attr));
diff --git a/src/stirling/source_connectors/socket_tracer/bcc_bpf_intf/socket_trace.h b/src/stirling/source_connectors/socket_tracer/bcc_bpf_intf/socket_trace.h
index 8fe90b6230d..14fc411fb73 100644
--- a/src/stirling/source_connectors/socket_tracer/bcc_bpf_intf/socket_trace.h
+++ b/src/stirling/source_connectors/socket_tracer/bcc_bpf_intf/socket_trace.h
@@ -129,6 +129,7 @@ struct close_event_t {
 // This applies to messages that are over MAX_MSG_SIZE,
 // and effectively makes the maximum message size to be CHUNK_LIMIT*MAX_MSG_SIZE.
 #define CHUNK_LIMIT 4
+#define LOOP_LIMIT 42
 
 // Unique ID to all syscalls and a few other notable functions.
 // This applies to events sent to user-space.
@@ -162,6 +163,29 @@ enum source_function_t {
   kSSLRead,
 };
 
+// Keeps track of the reasons for missed data from bpf, resulting in
+// a gap in the data stream buffer (which we sometimes fill with null bytes).
+enum chunk_t {
+  kFullyFormed = 0,
+  // perf_submit_iovecs
+  kExceededLoopLimit = 1,
+  kIovSizeExceededMaxMsgSize = 2,
+  kExceededLoopLimitAndMaxMsgSize = 3,
+  // perf_submit_wrapper
+  kExceededChunkLimitAndMaxMsgSize = 4,
+  // process_syscall_sendfile
+  kSendFile = 5,
+  kSendFileExceededMaxFillerSize = 6,
+  // filler event (populated in socket_trace.hpp) with size bytes_missed
+  // TODO(@benkilimnik): eventually we should remove the filler event
+  // and use lazy parsing across the board.
+  kFiller = 7,
+  // gap we tried to fill was larger than max filler size (kMaxFilledSizeBytes, currently 1MB)
+  kIncompleteFiller = 8,
+  kHeaderEvent = 9,  // no gap
+  kUnknownGapReason = 10,
+};
+
 struct socket_data_event_t {
   // We split attributes into a separate struct, because BPF gets upset if you do lots of
   // size arithmetic. This makes it so that it's attributes followed by message.
@@ -195,8 +219,9 @@ struct socket_data_event_t {
     // Note that write/send have separate sequences than read/recv.
     uint64_t pos;
 
-    // The size of the original message. We use this to truncate msg field to minimize the amount
-    // of data being transferred.
+    // The size of the original message (or chunk of a message if iovlen > 1
+    // since each perf_submit passes one iovec as an event). We use
+    // this to truncate the msg field to minimize the amount of data being transferred.
     uint32_t msg_size;
 
     // The amount of data actually being sent to user space. This may be less than msg_size if
@@ -204,6 +229,14 @@ struct socket_data_event_t {
     // (e.g. if the connection data tracking has been disabled).
     uint32_t msg_buf_size;
 
+    // Bytes we could not capture (gap size in the data stream buffer)
+    // Currently keeps track of cases where we exceed CHUNK_LIMIT or LOOP_LIMIT, or truncate in such
+    // a way that we create a gap. Should be 0 if incomplete_chunk enum is kFullyFormed.
+    uint32_t bytes_missed;
+
+    // Reason for incomplete chunk, if present.
+    enum chunk_t incomplete_chunk;
+
     // Whether to prepend length header to the buffer for messages first inferred as Kafka. MySQL
     // may also use this in this future.
     // See infer_kafka_message in protocol_inference.h for details.
diff --git a/src/stirling/source_connectors/socket_tracer/bcc_bpf_intf/socket_trace.hpp b/src/stirling/source_connectors/socket_tracer/bcc_bpf_intf/socket_trace.hpp
index 372581ef150..ce7e0058e42 100644
--- a/src/stirling/source_connectors/socket_tracer/bcc_bpf_intf/socket_trace.hpp
+++ b/src/stirling/source_connectors/socket_tracer/bcc_bpf_intf/socket_trace.hpp
@@ -35,12 +35,22 @@
 // The file name is kept identical to its BPF counterpart as well.
 
 inline std::string ToString(const socket_data_event_t::attr_t& attr) {
-  return absl::Substitute(
+  // Since absl::Substitute can handle up to 10 arguments after the format string,
+  // we concatenate the incomplete_chunk string separately.
+  std::string base_str = absl::Substitute(
       "[ts=$0 conn_id=$1 protocol=$2 role=$3 dir=$4 ssl=$5 source_fn=$6 pos=$7 size=$8 "
-      "buf_size=$9]",
+      "buf_size=$9",
       attr.timestamp_ns, ToString(attr.conn_id), magic_enum::enum_name(attr.protocol),
       magic_enum::enum_name(attr.role), magic_enum::enum_name(attr.direction), attr.ssl,
       magic_enum::enum_name(attr.source_fn), attr.pos, attr.msg_size, attr.msg_buf_size);
+
+  // Second part: Continue with the next set of attributes.
+  std::string second_part = absl::Substitute(
+      " bytes_missed=$0 incomplete_chunk=$1]",
+      attr.bytes_missed, magic_enum::enum_name(attr.incomplete_chunk));
+
+  // Concatenate both parts and return.
+  return absl::StrCat(base_str, " ", second_part);
 }
 
 inline std::string ToString(const close_event_t& event) {
@@ -104,6 +114,8 @@ struct SocketDataEvent {
       header_event_ptr->attr.pos = attr.pos - kHeaderBufSize;
       header_event_ptr->attr.msg_buf_size = kHeaderBufSize;
       header_event_ptr->attr.msg_size = kHeaderBufSize;
+      header_event_ptr->attr.incomplete_chunk = kHeaderEvent;
+      header_event_ptr->attr.bytes_missed = 0;
 
       // Take the length_header from the original, fix byte ordering, and place
       // into length_header of the header_event.
@@ -124,7 +136,7 @@ struct SocketDataEvent {
   // For events that which couldn't transfer all its data, we have two options:
   //  1) A missing event.
   //  2) A filler event.
-  // A desired filler event is indicated by a msg_size > msg_buf_size when creating the BPF event.
+  // A desired filler event is indicated by a bytes_missed > 0 when creating the BPF event.
   //
   // A filler event is used in particular for sendfile data.
   // We need a better long-term solution for this,
@@ -134,28 +146,51 @@ struct SocketDataEvent {
 
     DCHECK_GE(attr.msg_size, attr.msg_buf_size);
 
-    if (attr.msg_size > attr.msg_buf_size) {
-      VLOG(1) << "Adding filler to event";
+    // Note that msg_size - msg_buf_size != bytes_missed in the case where we exceed LOOP_LIMIT
+    // in perf_submit_iovecs, because one call to perf_submit_buf takes only the size of the current
+    // iovec into account, ommitting the rest of the iovecs which could not be submitted.
+    // As a result, we need to use bytes_missed to determine the size of the filler event.
+
+    // For kernels < 5.1, we cannot track the bytes missed in socket_trace.c properly and thus
+    // preserve the previous behavior of encoding the bytes missed via the msg_size.
+    // If our loop and chunk limits are at most 42 and 4, then we know that we can
+    // stay below the verifier instruction limit for kernels < 5.1.
+    if (LOOP_LIMIT <= 42 && CHUNK_LIMIT <= 4) {
+      if (attr.msg_size > attr.msg_buf_size) {
+          DCHECK_EQ(attr.bytes_missed, 0);
+          attr.bytes_missed = attr.msg_size - attr.msg_buf_size;
+      }
+    }
+    if (attr.bytes_missed > 0) {
+      VLOG(1) << absl::Substitute("Adding filler event for incomplete_chunk: $0, bytes_missed: $1", magic_enum::enum_name(attr.incomplete_chunk), attr.bytes_missed);
 
       // Limit the size so we don't have huge allocations.
       constexpr uint32_t kMaxFilledSizeBytes = 1 * 1024 * 1024;
       static char kZeros[kMaxFilledSizeBytes] = {0};
 
-      size_t filler_size = attr.msg_size - attr.msg_buf_size;
+      filler_event_ptr = std::make_unique<SocketDataEvent>();
+      filler_event_ptr->attr = attr;
+      size_t filler_size = attr.bytes_missed;
       if (filler_size > kMaxFilledSizeBytes) {
         VLOG(1) << absl::Substitute("Truncating filler event: $0->$1", filler_size,
                                     kMaxFilledSizeBytes);
         filler_size = kMaxFilledSizeBytes;
+        // incomplete even after filler (bytes_missed > 1MB)
+        filler_event_ptr->attr.incomplete_chunk = kIncompleteFiller;
+        filler_event_ptr->attr.bytes_missed -= kMaxFilledSizeBytes;
+      } else {
+        // We encode the filler size in bytes_missed for filler events which completely plug a gap (chunk_t kFiller) in our metrics.
+        // (In reality, bytes missed is 0 since filler plugs the gap.)
+        // In all other circumstances bytes_missed represents the size of the gap
+        filler_event_ptr->attr.incomplete_chunk = kFiller;
       }
-
-      filler_event_ptr = std::make_unique<SocketDataEvent>();
-      filler_event_ptr->attr = attr;
       filler_event_ptr->attr.pos = attr.pos + attr.msg_buf_size;
       filler_event_ptr->attr.msg_buf_size = filler_size;
       filler_event_ptr->attr.msg_size = filler_size;
       filler_event_ptr->msg = std::string_view(kZeros, filler_size);
 
       // We've created the filler event, so adjust the original event accordingly.
+      DCHECK(filler_size <= attr.bytes_missed);
       attr.msg_size = attr.msg_buf_size;
     }