diff --git a/package.xml b/package.xml index 72da5db..4488299 100644 --- a/package.xml +++ b/package.xml @@ -10,11 +10,11 @@ tandre@php.net yes - 2022-10-10 + 2022-10-22 - 1.2.6 - 1.2.6 + 1.2.7 + 1.2.7 stable @@ -22,8 +22,10 @@ BSD-3-Clause -* Fix a build warning in clang for placeholders indicating that a data structure was constructed/unserialized. -* Fix a build warning after change to expected return type of `count_elements` object handler implementation. +* Fix an edge case in Teds\strict_hash for different arrays with reference cycles + where php's strict equality operator would return true: '==='. + (and for Teds\unique_values, Teds\StrictHashSet, and Teds\StrictHashMap) + '$x === $y' should now always imply that 'Teds\strict_hash($x) === Teds\strict_hash($y)' @@ -383,6 +385,8 @@ + + @@ -404,6 +408,23 @@ teds + + 2022-10-10 + + + 1.2.6 + 1.2.6 + + + stable + stable + + BSD-3-Clause + +* Fix a build warning in clang for placeholders indicating that a data structure was constructed/unserialized. +* Fix a build warning after change to expected return type of `count_elements` object handler implementation. + + 2022-10-10 diff --git a/php_teds.h b/php_teds.h index 765f04a..cf69734 100644 --- a/php_teds.h +++ b/php_teds.h @@ -29,7 +29,7 @@ struct _teds_intrusive_dllist; PHP_MINIT_FUNCTION(teds); -# define PHP_TEDS_VERSION "1.2.6" +# define PHP_TEDS_VERSION "1.2.7" # if defined(ZTS) && defined(COMPILE_DL_TEDS) ZEND_TSRMLS_CACHE_EXTERN() diff --git a/teds.c b/teds.c index 7608f98..3a6228c 100644 --- a/teds.c +++ b/teds.c @@ -1051,42 +1051,79 @@ PHP_FUNCTION(stable_compare) } /* }}} */ -zend_long teds_strict_hash_array(HashTable *ht, teds_strict_hash_node *node) { +ZEND_COLD zend_never_inline uint64_t teds_strict_hash_cyclic_reference_fallback(zval *value) { + ZVAL_DEINDIRECT(value); + ZVAL_DEREF(value); + ZEND_ASSERT(Z_TYPE_P(value) == IS_ARRAY); + + HashTable *ht = Z_ARR_P(value); + uint64_t result = 1; zend_long num_key; zend_string *str_key; zval *field_value; + ZEND_HASH_FOREACH_KEY_VAL(ht, num_key, str_key, field_value) { + /* str_key is in a hash table, meaning that the hash was already computed. */ + result += str_key ? ZSTR_H(str_key) : (zend_ulong) num_key; - uint64_t result = 1; + ZVAL_DEINDIRECT(field_value); + ZVAL_DEREF(field_value); + + if (Z_TYPE_P(field_value) == IS_ARRAY) { + /* Don't recurse into arrays */ + continue; + } + ZEND_ASSERT(!Z_ISREF_P(field_value) && Z_TYPE_P(field_value) != IS_INDIRECT); + + zend_long field_hash = teds_strict_hash_inner(field_value, NULL, NULL); + + result += (field_hash + (result << 7)); + result = teds_inline_hash_of_uint64(result); + } ZEND_HASH_FOREACH_END(); + return result; +} + +zend_long teds_strict_hash_array(HashTable *const ht, const teds_strict_hash_node *const node, bool *const out_has_cyclic_reference) { + ZEND_ASSERT(!*out_has_cyclic_reference); if (zend_hash_num_elements(ht) == 0) { return 8313; } + zend_long num_key; + zend_string *str_key; + zval *field_value; + + uint64_t result = 1; bool protected_recursion = false; teds_strict_hash_node new_node; teds_strict_hash_node *new_node_ptr = NULL; if (!(GC_FLAGS(ht) & GC_IMMUTABLE)) { + new_node.prev = node; + new_node.ht = ht; if (UNEXPECTED(GC_IS_RECURSIVE(ht))) { - for (zend_long i = 8712; node != NULL; node = node->prev, i++) { - if (node->ht == ht) { - return i; + for (const teds_strict_hash_node *tmp = node; tmp != NULL; tmp = tmp->prev) { + if (tmp->ht == ht) { + *out_has_cyclic_reference = true; + return 0; } } } else { protected_recursion = true; GC_PROTECT_RECURSION(ht); } - new_node.prev = node; - new_node.ht = ht; new_node_ptr = &new_node; } - /* teds_strict_hash_inner has code to dereference IS_INDIRECT/IS_REFERENCE, - * but IS_INDIRECT is probably impossible as of php 8.1's removal of direct access to $GLOBALS? */ + /* teds_strict_hash_inner has code to dereference IS_INDIRECT/IS_REFERENCE. + * Aside: IS_INDIRECT is probably impossible as of php 8.1's removal of direct access to $GLOBALS? */ ZEND_HASH_FOREACH_KEY_VAL(ht, num_key, str_key, field_value) { - /* str_key is in a hash table meaning the hash was already computed. */ + /* str_key is in a hash table, meaning that the hash was already computed. */ result += str_key ? ZSTR_H(str_key) : (zend_ulong) num_key; - zend_long field_hash = teds_strict_hash_inner(field_value, new_node_ptr); + zend_long field_hash = teds_strict_hash_inner(field_value, new_node_ptr, out_has_cyclic_reference); + if (UNEXPECTED(*out_has_cyclic_reference)) { + /* Result will not be used, its value won't matter */ + break; + } result += (field_hash + (result << 7)); result = teds_inline_hash_of_uint64(result); } ZEND_HASH_FOREACH_END(); diff --git a/teds.h b/teds.h index 0deaece..5164fa7 100644 --- a/teds.h +++ b/teds.h @@ -139,11 +139,11 @@ static zend_always_inline zend_long teds_get_offset(const zval *offset) { } while(0) typedef struct _teds_strict_hash_node { - zend_array *ht; - struct _teds_strict_hash_node *prev; + const zend_array *ht; + const struct _teds_strict_hash_node *prev; } teds_strict_hash_node; -zend_long teds_strict_hash_array(HashTable *ht, teds_strict_hash_node *node); +zend_long teds_strict_hash_array(HashTable *const ht, const teds_strict_hash_node *const node, bool *const out_has_cyclic_reference); inline static uint64_t teds_convert_double_to_uint64_t(double *value) { double tmp = *value; @@ -190,8 +190,9 @@ inline static uint64_t teds_convert_double_to_uint64_t(double *value) { #endif } -static zend_always_inline zend_long teds_strict_hash_inner(zval *value, teds_strict_hash_node *node) { +static zend_always_inline zend_long teds_strict_hash_inner(zval *value, teds_strict_hash_node *node, bool *const out_has_cyclic_reference) { again: + ZEND_ASSERT(out_has_cyclic_reference == NULL || !*out_has_cyclic_reference); switch (Z_TYPE_P(value)) { case IS_NULL: return 8310; @@ -207,7 +208,7 @@ static zend_always_inline zend_long teds_strict_hash_inner(zval *value, teds_str /* Compute the hash if needed, return it */ return ZSTR_HASH(Z_STR_P(value)); case IS_ARRAY: - return teds_strict_hash_array(Z_ARR_P(value), node); + return teds_strict_hash_array(Z_ARR_P(value), node, out_has_cyclic_reference); case IS_OBJECT: /* Avoid hash collisions between objects and small numbers. */ return Z_OBJ_HANDLE_P(value) + 31415926; @@ -233,9 +234,16 @@ static zend_always_inline uint64_t teds_inline_hash_of_uint64(uint64_t orig) { return bswap_64(data); } +/* Generate a hash of top level array keys and top level non-array values of an array containing reference cycles somewhere */ +ZEND_COLD zend_never_inline uint64_t teds_strict_hash_cyclic_reference_fallback(zval *value); + /* {{{ Generate a hash */ static zend_always_inline zend_long teds_strict_hash(zval *value) { - uint64_t raw_data = teds_strict_hash_inner(value, NULL); + bool out_has_cyclic_reference = false; + uint64_t raw_data = teds_strict_hash_inner(value, NULL, &out_has_cyclic_reference); + if (UNEXPECTED(out_has_cyclic_reference)) { + raw_data = teds_strict_hash_cyclic_reference_fallback(value); + } return teds_inline_hash_of_uint64(raw_data); } /* }}} */ diff --git a/tests/misc/strict_hash_array_recursion.phpt b/tests/misc/strict_hash_array_recursion.phpt index 7c06fd0..4ba83fc 100644 --- a/tests/misc/strict_hash_array_recursion.phpt +++ b/tests/misc/strict_hash_array_recursion.phpt @@ -33,14 +33,14 @@ array(1) { [0]=> *RECURSION* } -int(-8312374854449992677) -int(-7608636289052184164) +int(-2744736492267492002) +int(-4521862704837642663) array(1) { [0]=> *RECURSION* } -int(-8312374854449992677) -int(-7608636289052184164) +int(-2744736492267492002) +int(-4521862704837642663) Recursive arrays hashed in a predictable way array(2) { [0]=> @@ -58,7 +58,7 @@ array(2) { int(123) } } -int(5570903236879094712) -int(5570903236879094712) +int(-5561530578556069188) +int(-5561530578556069188) Test deeper recursion level -int(-6092345340150249341) \ No newline at end of file +int(-5561530578556069188) \ No newline at end of file diff --git a/tests/misc/strict_hash_array_recursion_32bit.phpt b/tests/misc/strict_hash_array_recursion_32bit.phpt index a274277..6b7daf3 100644 --- a/tests/misc/strict_hash_array_recursion_32bit.phpt +++ b/tests/misc/strict_hash_array_recursion_32bit.phpt @@ -33,14 +33,14 @@ array(1) { [0]=> *RECURSION* } -int(1357152471) -int(944359473) +int(-665309858) +int(-351226279) array(1) { [0]=> *RECURSION* } -int(1357152471) -int(944359473) +int(-665309858) +int(-351226279) Recursive arrays hashed in a predictable way array(2) { [0]=> @@ -58,7 +58,7 @@ array(2) { int(123) } } -int(-2005905482) -int(-2005905482) +int(-1313776964) +int(-1313776964) Test deeper recursion level -int(-2086164413) \ No newline at end of file +int(-1313776964) \ No newline at end of file diff --git a/tests/misc/strict_hash_cyclic_recursion_fallback.phpt b/tests/misc/strict_hash_cyclic_recursion_fallback.phpt new file mode 100644 index 0000000..97076c5 --- /dev/null +++ b/tests/misc/strict_hash_cyclic_recursion_fallback.phpt @@ -0,0 +1,112 @@ +--TEST-- +Test strict_hash() handling of cyclic recursion edge cases +--SKIPIF-- + +--FILE-- + $x]; + +var_dump($x === $y); // true, so we expect the strict_hash to be the same. Only hash the key 1 but don't hash the top level array + +// Expected: same hashes are computed +// Observed: Different hashes in Teds 1.2.6 +var_dump(Teds\strict_hash($x), Teds\strict_hash($y)); +var_dump($x); +echo "Test hash skips hashing sub-arrays if array contains cyclic arrays anywhere\n"; +$x[2] = []; +$x[1] = &$x; +var_dump(Teds\strict_hash($x)); +$x[2][] = 1; +var_dump(Teds\strict_hash($x)); +var_dump($x); +$y[2] = [1]; +$y[1] = &$x; +echo "x, y values:\n"; +var_dump($x); +var_dump($y); +echo "x === y: "; +var_export($x === $y); +echo "\n"; +var_dump(Teds\strict_hash($x)); +var_dump(Teds\strict_hash($y)); +$other = []; +$other[1] = &$other; +var_dump(new Teds\StrictHashSet([$x, $y, null, &$other, $other, [1 => null]])); +?> +--EXPECT-- +bool(true) +int(-5561530578556069188) +int(-5561530578556069188) +array(1) { + [1]=> + *RECURSION* +} +Test hash skips hashing sub-arrays if array contains cyclic arrays anywhere +int(7251907892736144760) +int(7251907892736144760) +array(2) { + [1]=> + *RECURSION* + [2]=> + array(1) { + [0]=> + int(1) + } +} +x, y values: +array(2) { + [1]=> + *RECURSION* + [2]=> + array(1) { + [0]=> + int(1) + } +} +array(2) { + [1]=> + &array(2) { + [1]=> + *RECURSION* + [2]=> + array(1) { + [0]=> + int(1) + } + } + [2]=> + array(1) { + [0]=> + int(1) + } +} +x === y: true +int(7251907892736144760) +int(7251907892736144760) +object(Teds\StrictHashSet)#1 (4) { + [0]=> + array(2) { + [1]=> + *RECURSION* + [2]=> + array(1) { + [0]=> + int(1) + } + } + [1]=> + NULL + [2]=> + array(1) { + [1]=> + *RECURSION* + } + [3]=> + array(1) { + [1]=> + NULL + } +} \ No newline at end of file diff --git a/tests/misc/strict_hash_cyclic_recursion_fallback_32bit.phpt b/tests/misc/strict_hash_cyclic_recursion_fallback_32bit.phpt new file mode 100644 index 0000000..2606b4d --- /dev/null +++ b/tests/misc/strict_hash_cyclic_recursion_fallback_32bit.phpt @@ -0,0 +1,112 @@ +--TEST-- +Test strict_hash() handling of cyclic recursion edge cases +--SKIPIF-- + 4) echo "skip 32-bit only\n"; ?> +--FILE-- + $x]; + +var_dump($x === $y); // true, so we expect the strict_hash to be the same. Only hash the key 1 but don't hash the top level array + +// Expected: same hashes are computed +// Observed: Different hashes in Teds 1.2.6 +var_dump(Teds\strict_hash($x), Teds\strict_hash($y)); +var_dump($x); +echo "Test hash skips hashing sub-arrays if array contains cyclic arrays anywhere\n"; +$x[2] = []; +$x[1] = &$x; +var_dump(Teds\strict_hash($x)); +$x[2][] = 1; +var_dump(Teds\strict_hash($x)); +var_dump($x); +$y[2] = [1]; +$y[1] = &$x; +echo "x, y values:\n"; +var_dump($x); +var_dump($y); +echo "x === y: "; +var_export($x === $y); +echo "\n"; +var_dump(Teds\strict_hash($x)); +var_dump(Teds\strict_hash($y)); +$other = []; +$other[1] = &$other; +var_dump(new Teds\StrictHashSet([$x, $y, null, &$other, $other, [1 => null]])); +?> +--EXPECT-- +bool(true) +int(-1313776964) +int(-1313776964) +array(1) { + [1]=> + *RECURSION* +} +Test hash skips hashing sub-arrays if array contains cyclic arrays anywhere +int(1650701688) +int(1650701688) +array(2) { + [1]=> + *RECURSION* + [2]=> + array(1) { + [0]=> + int(1) + } +} +x, y values: +array(2) { + [1]=> + *RECURSION* + [2]=> + array(1) { + [0]=> + int(1) + } +} +array(2) { + [1]=> + &array(2) { + [1]=> + *RECURSION* + [2]=> + array(1) { + [0]=> + int(1) + } + } + [2]=> + array(1) { + [0]=> + int(1) + } +} +x === y: true +int(1650701688) +int(1650701688) +object(Teds\StrictHashSet)#1 (4) { + [0]=> + array(2) { + [1]=> + *RECURSION* + [2]=> + array(1) { + [0]=> + int(1) + } + } + [1]=> + NULL + [2]=> + array(1) { + [1]=> + *RECURSION* + } + [3]=> + array(1) { + [1]=> + NULL + } +} \ No newline at end of file