diff --git a/package.xml b/package.xml
index 72da5db..4488299 100644
--- a/package.xml
+++ b/package.xml
@@ -10,11 +10,11 @@
tandre@php.net
yes
- 2022-10-10
+ 2022-10-22
- 1.2.6
- 1.2.6
+ 1.2.7
+ 1.2.7
stable
@@ -22,8 +22,10 @@
BSD-3-Clause
-* Fix a build warning in clang for placeholders indicating that a data structure was constructed/unserialized.
-* Fix a build warning after change to expected return type of `count_elements` object handler implementation.
+* Fix an edge case in Teds\strict_hash for different arrays with reference cycles
+ where php's strict equality operator would return true: '==='.
+ (and for Teds\unique_values, Teds\StrictHashSet, and Teds\StrictHashMap)
+ '$x === $y' should now always imply that 'Teds\strict_hash($x) === Teds\strict_hash($y)'
@@ -383,6 +385,8 @@
+
+
@@ -404,6 +408,23 @@
teds
+
+ 2022-10-10
+
+
+ 1.2.6
+ 1.2.6
+
+
+ stable
+ stable
+
+ BSD-3-Clause
+
+* Fix a build warning in clang for placeholders indicating that a data structure was constructed/unserialized.
+* Fix a build warning after change to expected return type of `count_elements` object handler implementation.
+
+
2022-10-10
diff --git a/php_teds.h b/php_teds.h
index 765f04a..cf69734 100644
--- a/php_teds.h
+++ b/php_teds.h
@@ -29,7 +29,7 @@ struct _teds_intrusive_dllist;
PHP_MINIT_FUNCTION(teds);
-# define PHP_TEDS_VERSION "1.2.6"
+# define PHP_TEDS_VERSION "1.2.7"
# if defined(ZTS) && defined(COMPILE_DL_TEDS)
ZEND_TSRMLS_CACHE_EXTERN()
diff --git a/teds.c b/teds.c
index 7608f98..3a6228c 100644
--- a/teds.c
+++ b/teds.c
@@ -1051,42 +1051,79 @@ PHP_FUNCTION(stable_compare)
}
/* }}} */
-zend_long teds_strict_hash_array(HashTable *ht, teds_strict_hash_node *node) {
+ZEND_COLD zend_never_inline uint64_t teds_strict_hash_cyclic_reference_fallback(zval *value) {
+ ZVAL_DEINDIRECT(value);
+ ZVAL_DEREF(value);
+ ZEND_ASSERT(Z_TYPE_P(value) == IS_ARRAY);
+
+ HashTable *ht = Z_ARR_P(value);
+ uint64_t result = 1;
zend_long num_key;
zend_string *str_key;
zval *field_value;
+ ZEND_HASH_FOREACH_KEY_VAL(ht, num_key, str_key, field_value) {
+ /* str_key is in a hash table, meaning that the hash was already computed. */
+ result += str_key ? ZSTR_H(str_key) : (zend_ulong) num_key;
- uint64_t result = 1;
+ ZVAL_DEINDIRECT(field_value);
+ ZVAL_DEREF(field_value);
+
+ if (Z_TYPE_P(field_value) == IS_ARRAY) {
+ /* Don't recurse into arrays */
+ continue;
+ }
+ ZEND_ASSERT(!Z_ISREF_P(field_value) && Z_TYPE_P(field_value) != IS_INDIRECT);
+
+ zend_long field_hash = teds_strict_hash_inner(field_value, NULL, NULL);
+
+ result += (field_hash + (result << 7));
+ result = teds_inline_hash_of_uint64(result);
+ } ZEND_HASH_FOREACH_END();
+ return result;
+}
+
+zend_long teds_strict_hash_array(HashTable *const ht, const teds_strict_hash_node *const node, bool *const out_has_cyclic_reference) {
+ ZEND_ASSERT(!*out_has_cyclic_reference);
if (zend_hash_num_elements(ht) == 0) {
return 8313;
}
+ zend_long num_key;
+ zend_string *str_key;
+ zval *field_value;
+
+ uint64_t result = 1;
bool protected_recursion = false;
teds_strict_hash_node new_node;
teds_strict_hash_node *new_node_ptr = NULL;
if (!(GC_FLAGS(ht) & GC_IMMUTABLE)) {
+ new_node.prev = node;
+ new_node.ht = ht;
if (UNEXPECTED(GC_IS_RECURSIVE(ht))) {
- for (zend_long i = 8712; node != NULL; node = node->prev, i++) {
- if (node->ht == ht) {
- return i;
+ for (const teds_strict_hash_node *tmp = node; tmp != NULL; tmp = tmp->prev) {
+ if (tmp->ht == ht) {
+ *out_has_cyclic_reference = true;
+ return 0;
}
}
} else {
protected_recursion = true;
GC_PROTECT_RECURSION(ht);
}
- new_node.prev = node;
- new_node.ht = ht;
new_node_ptr = &new_node;
}
- /* teds_strict_hash_inner has code to dereference IS_INDIRECT/IS_REFERENCE,
- * but IS_INDIRECT is probably impossible as of php 8.1's removal of direct access to $GLOBALS? */
+ /* teds_strict_hash_inner has code to dereference IS_INDIRECT/IS_REFERENCE.
+ * Aside: IS_INDIRECT is probably impossible as of php 8.1's removal of direct access to $GLOBALS? */
ZEND_HASH_FOREACH_KEY_VAL(ht, num_key, str_key, field_value) {
- /* str_key is in a hash table meaning the hash was already computed. */
+ /* str_key is in a hash table, meaning that the hash was already computed. */
result += str_key ? ZSTR_H(str_key) : (zend_ulong) num_key;
- zend_long field_hash = teds_strict_hash_inner(field_value, new_node_ptr);
+ zend_long field_hash = teds_strict_hash_inner(field_value, new_node_ptr, out_has_cyclic_reference);
+ if (UNEXPECTED(*out_has_cyclic_reference)) {
+ /* Result will not be used, its value won't matter */
+ break;
+ }
result += (field_hash + (result << 7));
result = teds_inline_hash_of_uint64(result);
} ZEND_HASH_FOREACH_END();
diff --git a/teds.h b/teds.h
index 0deaece..5164fa7 100644
--- a/teds.h
+++ b/teds.h
@@ -139,11 +139,11 @@ static zend_always_inline zend_long teds_get_offset(const zval *offset) {
} while(0)
typedef struct _teds_strict_hash_node {
- zend_array *ht;
- struct _teds_strict_hash_node *prev;
+ const zend_array *ht;
+ const struct _teds_strict_hash_node *prev;
} teds_strict_hash_node;
-zend_long teds_strict_hash_array(HashTable *ht, teds_strict_hash_node *node);
+zend_long teds_strict_hash_array(HashTable *const ht, const teds_strict_hash_node *const node, bool *const out_has_cyclic_reference);
inline static uint64_t teds_convert_double_to_uint64_t(double *value) {
double tmp = *value;
@@ -190,8 +190,9 @@ inline static uint64_t teds_convert_double_to_uint64_t(double *value) {
#endif
}
-static zend_always_inline zend_long teds_strict_hash_inner(zval *value, teds_strict_hash_node *node) {
+static zend_always_inline zend_long teds_strict_hash_inner(zval *value, teds_strict_hash_node *node, bool *const out_has_cyclic_reference) {
again:
+ ZEND_ASSERT(out_has_cyclic_reference == NULL || !*out_has_cyclic_reference);
switch (Z_TYPE_P(value)) {
case IS_NULL:
return 8310;
@@ -207,7 +208,7 @@ static zend_always_inline zend_long teds_strict_hash_inner(zval *value, teds_str
/* Compute the hash if needed, return it */
return ZSTR_HASH(Z_STR_P(value));
case IS_ARRAY:
- return teds_strict_hash_array(Z_ARR_P(value), node);
+ return teds_strict_hash_array(Z_ARR_P(value), node, out_has_cyclic_reference);
case IS_OBJECT:
/* Avoid hash collisions between objects and small numbers. */
return Z_OBJ_HANDLE_P(value) + 31415926;
@@ -233,9 +234,16 @@ static zend_always_inline uint64_t teds_inline_hash_of_uint64(uint64_t orig) {
return bswap_64(data);
}
+/* Generate a hash of top level array keys and top level non-array values of an array containing reference cycles somewhere */
+ZEND_COLD zend_never_inline uint64_t teds_strict_hash_cyclic_reference_fallback(zval *value);
+
/* {{{ Generate a hash */
static zend_always_inline zend_long teds_strict_hash(zval *value) {
- uint64_t raw_data = teds_strict_hash_inner(value, NULL);
+ bool out_has_cyclic_reference = false;
+ uint64_t raw_data = teds_strict_hash_inner(value, NULL, &out_has_cyclic_reference);
+ if (UNEXPECTED(out_has_cyclic_reference)) {
+ raw_data = teds_strict_hash_cyclic_reference_fallback(value);
+ }
return teds_inline_hash_of_uint64(raw_data);
}
/* }}} */
diff --git a/tests/misc/strict_hash_array_recursion.phpt b/tests/misc/strict_hash_array_recursion.phpt
index 7c06fd0..4ba83fc 100644
--- a/tests/misc/strict_hash_array_recursion.phpt
+++ b/tests/misc/strict_hash_array_recursion.phpt
@@ -33,14 +33,14 @@ array(1) {
[0]=>
*RECURSION*
}
-int(-8312374854449992677)
-int(-7608636289052184164)
+int(-2744736492267492002)
+int(-4521862704837642663)
array(1) {
[0]=>
*RECURSION*
}
-int(-8312374854449992677)
-int(-7608636289052184164)
+int(-2744736492267492002)
+int(-4521862704837642663)
Recursive arrays hashed in a predictable way
array(2) {
[0]=>
@@ -58,7 +58,7 @@ array(2) {
int(123)
}
}
-int(5570903236879094712)
-int(5570903236879094712)
+int(-5561530578556069188)
+int(-5561530578556069188)
Test deeper recursion level
-int(-6092345340150249341)
\ No newline at end of file
+int(-5561530578556069188)
\ No newline at end of file
diff --git a/tests/misc/strict_hash_array_recursion_32bit.phpt b/tests/misc/strict_hash_array_recursion_32bit.phpt
index a274277..6b7daf3 100644
--- a/tests/misc/strict_hash_array_recursion_32bit.phpt
+++ b/tests/misc/strict_hash_array_recursion_32bit.phpt
@@ -33,14 +33,14 @@ array(1) {
[0]=>
*RECURSION*
}
-int(1357152471)
-int(944359473)
+int(-665309858)
+int(-351226279)
array(1) {
[0]=>
*RECURSION*
}
-int(1357152471)
-int(944359473)
+int(-665309858)
+int(-351226279)
Recursive arrays hashed in a predictable way
array(2) {
[0]=>
@@ -58,7 +58,7 @@ array(2) {
int(123)
}
}
-int(-2005905482)
-int(-2005905482)
+int(-1313776964)
+int(-1313776964)
Test deeper recursion level
-int(-2086164413)
\ No newline at end of file
+int(-1313776964)
\ No newline at end of file
diff --git a/tests/misc/strict_hash_cyclic_recursion_fallback.phpt b/tests/misc/strict_hash_cyclic_recursion_fallback.phpt
new file mode 100644
index 0000000..97076c5
--- /dev/null
+++ b/tests/misc/strict_hash_cyclic_recursion_fallback.phpt
@@ -0,0 +1,112 @@
+--TEST--
+Test strict_hash() handling of cyclic recursion edge cases
+--SKIPIF--
+
+--FILE--
+ $x];
+
+var_dump($x === $y); // true, so we expect the strict_hash to be the same. Only hash the key 1 but don't hash the top level array
+
+// Expected: same hashes are computed
+// Observed: Different hashes in Teds 1.2.6
+var_dump(Teds\strict_hash($x), Teds\strict_hash($y));
+var_dump($x);
+echo "Test hash skips hashing sub-arrays if array contains cyclic arrays anywhere\n";
+$x[2] = [];
+$x[1] = &$x;
+var_dump(Teds\strict_hash($x));
+$x[2][] = 1;
+var_dump(Teds\strict_hash($x));
+var_dump($x);
+$y[2] = [1];
+$y[1] = &$x;
+echo "x, y values:\n";
+var_dump($x);
+var_dump($y);
+echo "x === y: ";
+var_export($x === $y);
+echo "\n";
+var_dump(Teds\strict_hash($x));
+var_dump(Teds\strict_hash($y));
+$other = [];
+$other[1] = &$other;
+var_dump(new Teds\StrictHashSet([$x, $y, null, &$other, $other, [1 => null]]));
+?>
+--EXPECT--
+bool(true)
+int(-5561530578556069188)
+int(-5561530578556069188)
+array(1) {
+ [1]=>
+ *RECURSION*
+}
+Test hash skips hashing sub-arrays if array contains cyclic arrays anywhere
+int(7251907892736144760)
+int(7251907892736144760)
+array(2) {
+ [1]=>
+ *RECURSION*
+ [2]=>
+ array(1) {
+ [0]=>
+ int(1)
+ }
+}
+x, y values:
+array(2) {
+ [1]=>
+ *RECURSION*
+ [2]=>
+ array(1) {
+ [0]=>
+ int(1)
+ }
+}
+array(2) {
+ [1]=>
+ &array(2) {
+ [1]=>
+ *RECURSION*
+ [2]=>
+ array(1) {
+ [0]=>
+ int(1)
+ }
+ }
+ [2]=>
+ array(1) {
+ [0]=>
+ int(1)
+ }
+}
+x === y: true
+int(7251907892736144760)
+int(7251907892736144760)
+object(Teds\StrictHashSet)#1 (4) {
+ [0]=>
+ array(2) {
+ [1]=>
+ *RECURSION*
+ [2]=>
+ array(1) {
+ [0]=>
+ int(1)
+ }
+ }
+ [1]=>
+ NULL
+ [2]=>
+ array(1) {
+ [1]=>
+ *RECURSION*
+ }
+ [3]=>
+ array(1) {
+ [1]=>
+ NULL
+ }
+}
\ No newline at end of file
diff --git a/tests/misc/strict_hash_cyclic_recursion_fallback_32bit.phpt b/tests/misc/strict_hash_cyclic_recursion_fallback_32bit.phpt
new file mode 100644
index 0000000..2606b4d
--- /dev/null
+++ b/tests/misc/strict_hash_cyclic_recursion_fallback_32bit.phpt
@@ -0,0 +1,112 @@
+--TEST--
+Test strict_hash() handling of cyclic recursion edge cases
+--SKIPIF--
+ 4) echo "skip 32-bit only\n"; ?>
+--FILE--
+ $x];
+
+var_dump($x === $y); // true, so we expect the strict_hash to be the same. Only hash the key 1 but don't hash the top level array
+
+// Expected: same hashes are computed
+// Observed: Different hashes in Teds 1.2.6
+var_dump(Teds\strict_hash($x), Teds\strict_hash($y));
+var_dump($x);
+echo "Test hash skips hashing sub-arrays if array contains cyclic arrays anywhere\n";
+$x[2] = [];
+$x[1] = &$x;
+var_dump(Teds\strict_hash($x));
+$x[2][] = 1;
+var_dump(Teds\strict_hash($x));
+var_dump($x);
+$y[2] = [1];
+$y[1] = &$x;
+echo "x, y values:\n";
+var_dump($x);
+var_dump($y);
+echo "x === y: ";
+var_export($x === $y);
+echo "\n";
+var_dump(Teds\strict_hash($x));
+var_dump(Teds\strict_hash($y));
+$other = [];
+$other[1] = &$other;
+var_dump(new Teds\StrictHashSet([$x, $y, null, &$other, $other, [1 => null]]));
+?>
+--EXPECT--
+bool(true)
+int(-1313776964)
+int(-1313776964)
+array(1) {
+ [1]=>
+ *RECURSION*
+}
+Test hash skips hashing sub-arrays if array contains cyclic arrays anywhere
+int(1650701688)
+int(1650701688)
+array(2) {
+ [1]=>
+ *RECURSION*
+ [2]=>
+ array(1) {
+ [0]=>
+ int(1)
+ }
+}
+x, y values:
+array(2) {
+ [1]=>
+ *RECURSION*
+ [2]=>
+ array(1) {
+ [0]=>
+ int(1)
+ }
+}
+array(2) {
+ [1]=>
+ &array(2) {
+ [1]=>
+ *RECURSION*
+ [2]=>
+ array(1) {
+ [0]=>
+ int(1)
+ }
+ }
+ [2]=>
+ array(1) {
+ [0]=>
+ int(1)
+ }
+}
+x === y: true
+int(1650701688)
+int(1650701688)
+object(Teds\StrictHashSet)#1 (4) {
+ [0]=>
+ array(2) {
+ [1]=>
+ *RECURSION*
+ [2]=>
+ array(1) {
+ [0]=>
+ int(1)
+ }
+ }
+ [1]=>
+ NULL
+ [2]=>
+ array(1) {
+ [1]=>
+ *RECURSION*
+ }
+ [3]=>
+ array(1) {
+ [1]=>
+ NULL
+ }
+}
\ No newline at end of file