Add safety checks when rendering kernel key strings

Summary: The old code assumed that it was handed a MAX_SIZE buffer, and that the list of TensorMeta values would never generate a string longer than that size. This PR adds explicit size tracking and an error code to the API, and now returns an error if the buffer is too small for the provided values. While I'm here, move MAX_SIZE out of the public API, since it's not an intrinsic aspect of kernel keys. This is technically a BC-breaking change, but I don't expect that any users are actually depending on it. Add unit tests for all modified code. Differential Revision: D69324821
pytorch · Feb 8, 2025 · b6fbd2c · b6fbd2c
1 parent d99970b
commit b6fbd2c
Show file tree

Hide file tree

Showing 4 changed files with 356 additions and 68 deletions.
diff --git a/runtime/kernel/operator_registry.cpp b/runtime/kernel/operator_registry.cpp
@@ -114,44 +114,106 @@ Error register_kernels(const Span<const Kernel> kernels) {
 }
 
 namespace {
-int copy_char_as_number_to_buf(char num, char* buf) {
-  if ((char)num < 10) {
+/**
+ * Writes `num` as a decimal string to `buf` and returns the number of bytes
+ * written. Returns -1 if `buf` is too small or if `num` is not supported.
+ */
+int copy_char_as_number_to_buf(int num, char* buf, size_t buf_size) {
+  if (num < 0) {
+    return -1;
+  }
+  if (num < 10) {
+    if (buf_size < 1) {
+      return -1;
+    }
     *buf = '0' + (char)num;
-    buf += 1;
     return 1;
-  } else {
-    *buf = '0' + ((char)num) / 10;
-    buf += 1;
+  }
+  if (num < 100) {
+    if (buf_size < 2) {
+      return -1;
+    }
+    *buf++ = '0' + ((char)num) / 10;
     *buf = '0' + ((char)num) % 10;
-    buf += 1;
     return 2;
   }
+  return -1;
 }
 } // namespace
 
 namespace internal {
-void make_kernel_key_string(Span<const TensorMeta> key, char* buf) {
+Error make_kernel_key_string(
+    Span<const TensorMeta> key,
+    char* buf,
+    size_t buf_size) {
   if (key.empty()) {
-    // If no tensor is present in an op, kernel key does not apply
-    return;
+    // If no tensor is present in an op, kernel key does not apply.
+    if (buf_size > 0) {
+      buf[0] = '\0';
+    }
+    return Error::Ok;
   }
-  strncpy(buf, "v1/", 3);
+
+  // Reserve one byte for null terminator.
+  if (buf_size < 1) {
+    return Error::InvalidArgument;
+  }
+  buf_size -= 1;
+
+  // Add prefix.
+  if (buf_size < 3) {
+    return Error::InvalidArgument;
+  }
+  memcpy(buf, "v1/", 3);
   buf += 3;
+  buf_size -= 3;
+
+  // Add tensor meta.
   for (size_t i = 0; i < key.size(); i++) {
     auto& meta = key[i];
-    buf += copy_char_as_number_to_buf((char)meta.dtype_, buf);
-    *buf = ';';
-    buf += 1;
+
+    // Add dtype.
+    int n = copy_char_as_number_to_buf((int)meta.dtype_, buf, buf_size);
+    if (n < 0) {
+      return Error::InvalidArgument;
+    }
+    buf += n;
+    buf_size -= n;
+
+    // Add separator between dtype and dim order.
+    if (buf_size < 1) {
+      return Error::InvalidArgument;
+    }
+    *buf++ = ';';
+    buf_size -= 1;
+
+    // Add dim order.
     for (int j = 0; j < meta.dim_order_.size(); j++) {
-      buf += copy_char_as_number_to_buf((char)meta.dim_order_[j], buf);
-      if (j != meta.dim_order_.size() - 1) {
-        *buf = ',';
-        buf += 1;
+      n = copy_char_as_number_to_buf((int)meta.dim_order_[j], buf, buf_size);
+      if (n < 0) {
+        return Error::InvalidArgument;
+      }
+      buf += n;
+      buf_size -= n;
+
+      if (j < meta.dim_order_.size() - 1) {
+        if (buf_size < 1) {
+          return Error::InvalidArgument;
+        }
+        *buf++ = ',';
+        buf_size -= 1;
+      }
+    }
+    if (i < key.size() - 1) {
+      if (buf_size < 1) {
+        return Error::InvalidArgument;
       }
+      *buf++ = '|';
+      buf_size -= 1;
     }
-    *buf = (i < (key.size() - 1)) ? '|' : 0x00;
-    buf += 1;
   }
+  *buf = '\0'; // Space for this was reserved above.
+  return Error::Ok;
 }
 } // namespace internal
 
@@ -165,8 +227,12 @@ Result<OpFunction> get_op_function_from_registry(
     const char* name,
     Span<const TensorMeta> meta_list) {
   // @lint-ignore CLANGTIDY facebook-hte-CArray
-  char buf[KernelKey::MAX_SIZE] = {0};
-  internal::make_kernel_key_string(meta_list, buf);
+  char buf[internal::kKernelKeyBufSize];
+  Error err = internal::make_kernel_key_string(meta_list, buf, sizeof(buf));
+  if (err != Error::Ok) {
+    ET_LOG(Error, "Failed to make kernel key string");
+    return err;
+  }
   KernelKey kernel_key = KernelKey(buf);
 
   int32_t fallback_idx = -1;

diff --git a/runtime/kernel/operator_registry.h b/runtime/kernel/operator_registry.h
@@ -96,39 +96,43 @@ struct TensorMeta {
 
 /**
  * Describes which dtype & dim order specialized kernel to be bound to an
- * operator. If `is_fallback_` is true, it means this kernel can be used as a
- * fallback, if false, it means this kernel can only be used if all the
- * `TensorMeta` are matched. Fallback means this kernel will be used for
- * all input tensor dtypes and dim orders, if the specialized kernel is not
- * registered.
+ * operator.
  *
- * The format of a kernel key data is a string:
- *                              "v<version>/<tensor_meta>|<tensor_meta>..."
- * Size: Up to 691               1    1    1     (42     +1) * 16
- *           Assuming max number of tensors is 16               ^
- * Kernel key version is v1 for now. If the kernel key format changes,
- * update the version to avoid breaking pre-existing kernel keys.
- * Example: v1/7;0,1,2,3
- * The kernel key has only one tensor: a double tensor with dimension 0, 1, 2, 3
+ * Kernel key data is a string with the format:
+ *
+ *     "v<version>/<tensor_meta>|<tensor_meta>..."
+ *
+ * The version is v1 for now. If the kernel key format changes, update the
+ * version to avoid breaking pre-existing kernel keys.
  *
  * Each tensor_meta has the following format: "<dtype>;<dim_order,...>"
- * Size: Up to 42                               1-2   1    24 (1 byte for 0-9; 2
- * for 10-15) + 15 commas Assuming that the max number of dims is 16 ^ Example:
- * 7;0,1,2,3 for [double; 0, 1, 2, 3]
+ *
+ * Example kernel key data: "v1/7;0,1,2,3|1;0,1,2,3,4,5,6,7"
+ *
+ * This has two tensors: the first with dtype=7 and dim order 0,1,2,3, and the
+ * second with dtype=1 and dim order 0,1,2,3,4,5,6,7.
  *
  * IMPORTANT:
  * Users should not construct a kernel key manually. Instead, it should be
  * generated from kernel yaml.
  */
 struct KernelKey {
  public:
+  /**
+   * Creates a fallback (non-specialized) kernel key: this kernel can be used
+   * for all input tensor dtypes and dim orders if the specialized kernel is not
+   * registered.
+   */
   KernelKey() : is_fallback_(true) {}
 
+  /**
+   * Creates a specialized (non-fallback) kernel key that matches a specific
+   * set of input tensor dtypes and dim orders. See the class comment for the
+   * expected format of `kernel_key_data`.
+   */
   /* implicit */ KernelKey(const char* kernel_key_data)
       : kernel_key_data_(kernel_key_data), is_fallback_(false) {}
 
-  constexpr static int MAX_SIZE = 691;
-
   bool operator==(const KernelKey& other) const {
     return this->equals(other);
   }
@@ -144,7 +148,7 @@ struct KernelKey {
     if (is_fallback_) {
       return true;
     }
-    return strncmp(kernel_key_data_, other.kernel_key_data_, MAX_SIZE) == 0;
+    return strcmp(kernel_key_data_, other.kernel_key_data_) == 0;
   }
 
   bool is_fallback() const {
@@ -194,7 +198,23 @@ struct Kernel {
 };
 
 namespace internal {
-void make_kernel_key_string(Span<const TensorMeta> key, char* buf);
+
+/**
+ * A make_kernel_key_string buffer size that is large enough to hold a kernel
+ * key string with 16 tensors of 16 dimensions, plus the trailing NUL byte.
+ */
+constexpr size_t kKernelKeyBufSize = 659;
+
+/**
+ * Given the list of input tensor dtypes + dim orders, writes the kernel key
+ * string into the buffer. Returns an error if the buffer is too small or if the
+ * tensors cannot be represented as a valid key string.
+ */
+Error make_kernel_key_string(
+    Span<const TensorMeta> key,
+    char* buf,
+    size_t buf_size);
+
 } // namespace internal
 
 /**