Skip to content

Commit

Permalink
Merge pull request #194 from TysonAndre/strict_hash-change
Browse files Browse the repository at this point in the history
Fix handling of reference cycles in Teds\strict_hash
  • Loading branch information
TysonAndre authored Oct 22, 2022
2 parents 0f7ee2e + d71499e commit 6dfb2a3
Show file tree
Hide file tree
Showing 8 changed files with 327 additions and 37 deletions.
31 changes: 26 additions & 5 deletions package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,22 @@
<email>[email protected]</email>
<active>yes</active>
</lead>
<date>2022-10-10</date>
<date>2022-10-22</date>
<time>16:00:00</time>
<version>
<release>1.2.6</release>
<api>1.2.6</api>
<release>1.2.7</release>
<api>1.2.7</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="https://github.com/TysonAndre/teds/blob/main/COPYING">BSD-3-Clause</license>
<notes>
* Fix a build warning in clang for placeholders indicating that a data structure was constructed/unserialized.
* Fix a build warning after change to expected return type of `count_elements` object handler implementation.
* Fix an edge case in Teds\strict_hash for different arrays with reference cycles
where php's strict equality operator would return true: '==='.
(and for Teds\unique_values, Teds\StrictHashSet, and Teds\StrictHashMap)
'$x === $y' should now always imply that 'Teds\strict_hash($x) === Teds\strict_hash($y)'
</notes>
<contents>
<dir name="/">
Expand Down Expand Up @@ -383,6 +385,8 @@
<file name="misc/strict_hash_array_recursion_32bit.phpt" role="test" />
<file name="misc/strict_hash_array_references.phpt" role="test" />
<file name="misc/strict_hash_array_references_32bit.phpt" role="test" />
<file name="misc/strict_hash_cyclic_recursion_fallback.phpt" role="test" />
<file name="misc/strict_hash_cyclic_recursion_fallback_32bit.phpt" role="test" />
<file name="misc/strict_hash_recursion.phpt" role="test" />
<file name="misc/strict_hash_recursion_32bit.phpt" role="test" />
<file name="misc/strict_hash_typed_property.phpt" role="test" />
Expand All @@ -404,6 +408,23 @@
<providesextension>teds</providesextension>
<extsrcrelease />
<changelog>
<release>
<date>2022-10-10</date>
<time>16:00:00</time>
<version>
<release>1.2.6</release>
<api>1.2.6</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="https://github.com/TysonAndre/teds/blob/main/COPYING">BSD-3-Clause</license>
<notes>
* Fix a build warning in clang for placeholders indicating that a data structure was constructed/unserialized.
* Fix a build warning after change to expected return type of `count_elements` object handler implementation.
</notes>
</release>
<release>
<date>2022-10-10</date>
<time>16:00:00</time>
Expand Down
2 changes: 1 addition & 1 deletion php_teds.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ struct _teds_intrusive_dllist;

PHP_MINIT_FUNCTION(teds);

# define PHP_TEDS_VERSION "1.2.6"
# define PHP_TEDS_VERSION "1.2.7"

# if defined(ZTS) && defined(COMPILE_DL_TEDS)
ZEND_TSRMLS_CACHE_EXTERN()
Expand Down
59 changes: 48 additions & 11 deletions teds.c
Original file line number Diff line number Diff line change
Expand Up @@ -1051,42 +1051,79 @@ PHP_FUNCTION(stable_compare)
}
/* }}} */

zend_long teds_strict_hash_array(HashTable *ht, teds_strict_hash_node *node) {
ZEND_COLD zend_never_inline uint64_t teds_strict_hash_cyclic_reference_fallback(zval *value) {
ZVAL_DEINDIRECT(value);
ZVAL_DEREF(value);
ZEND_ASSERT(Z_TYPE_P(value) == IS_ARRAY);

HashTable *ht = Z_ARR_P(value);
uint64_t result = 1;
zend_long num_key;
zend_string *str_key;
zval *field_value;
ZEND_HASH_FOREACH_KEY_VAL(ht, num_key, str_key, field_value) {
/* str_key is in a hash table, meaning that the hash was already computed. */
result += str_key ? ZSTR_H(str_key) : (zend_ulong) num_key;

uint64_t result = 1;
ZVAL_DEINDIRECT(field_value);
ZVAL_DEREF(field_value);

if (Z_TYPE_P(field_value) == IS_ARRAY) {
/* Don't recurse into arrays */
continue;
}
ZEND_ASSERT(!Z_ISREF_P(field_value) && Z_TYPE_P(field_value) != IS_INDIRECT);

zend_long field_hash = teds_strict_hash_inner(field_value, NULL, NULL);

result += (field_hash + (result << 7));
result = teds_inline_hash_of_uint64(result);
} ZEND_HASH_FOREACH_END();
return result;
}

zend_long teds_strict_hash_array(HashTable *const ht, const teds_strict_hash_node *const node, bool *const out_has_cyclic_reference) {
ZEND_ASSERT(!*out_has_cyclic_reference);

if (zend_hash_num_elements(ht) == 0) {
return 8313;
}
zend_long num_key;
zend_string *str_key;
zval *field_value;

uint64_t result = 1;
bool protected_recursion = false;

teds_strict_hash_node new_node;
teds_strict_hash_node *new_node_ptr = NULL;
if (!(GC_FLAGS(ht) & GC_IMMUTABLE)) {
new_node.prev = node;
new_node.ht = ht;
if (UNEXPECTED(GC_IS_RECURSIVE(ht))) {
for (zend_long i = 8712; node != NULL; node = node->prev, i++) {
if (node->ht == ht) {
return i;
for (const teds_strict_hash_node *tmp = node; tmp != NULL; tmp = tmp->prev) {
if (tmp->ht == ht) {
*out_has_cyclic_reference = true;
return 0;
}
}
} else {
protected_recursion = true;
GC_PROTECT_RECURSION(ht);
}
new_node.prev = node;
new_node.ht = ht;
new_node_ptr = &new_node;
}

/* teds_strict_hash_inner has code to dereference IS_INDIRECT/IS_REFERENCE,
* but IS_INDIRECT is probably impossible as of php 8.1's removal of direct access to $GLOBALS? */
/* teds_strict_hash_inner has code to dereference IS_INDIRECT/IS_REFERENCE.
* Aside: IS_INDIRECT is probably impossible as of php 8.1's removal of direct access to $GLOBALS? */
ZEND_HASH_FOREACH_KEY_VAL(ht, num_key, str_key, field_value) {
/* str_key is in a hash table meaning the hash was already computed. */
/* str_key is in a hash table, meaning that the hash was already computed. */
result += str_key ? ZSTR_H(str_key) : (zend_ulong) num_key;
zend_long field_hash = teds_strict_hash_inner(field_value, new_node_ptr);
zend_long field_hash = teds_strict_hash_inner(field_value, new_node_ptr, out_has_cyclic_reference);
if (UNEXPECTED(*out_has_cyclic_reference)) {
/* Result will not be used, its value won't matter */
break;
}
result += (field_hash + (result << 7));
result = teds_inline_hash_of_uint64(result);
} ZEND_HASH_FOREACH_END();
Expand Down
20 changes: 14 additions & 6 deletions teds.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,11 @@ static zend_always_inline zend_long teds_get_offset(const zval *offset) {
} while(0)

typedef struct _teds_strict_hash_node {
zend_array *ht;
struct _teds_strict_hash_node *prev;
const zend_array *ht;
const struct _teds_strict_hash_node *prev;
} teds_strict_hash_node;

zend_long teds_strict_hash_array(HashTable *ht, teds_strict_hash_node *node);
zend_long teds_strict_hash_array(HashTable *const ht, const teds_strict_hash_node *const node, bool *const out_has_cyclic_reference);

inline static uint64_t teds_convert_double_to_uint64_t(double *value) {
double tmp = *value;
Expand Down Expand Up @@ -190,8 +190,9 @@ inline static uint64_t teds_convert_double_to_uint64_t(double *value) {
#endif
}

static zend_always_inline zend_long teds_strict_hash_inner(zval *value, teds_strict_hash_node *node) {
static zend_always_inline zend_long teds_strict_hash_inner(zval *value, teds_strict_hash_node *node, bool *const out_has_cyclic_reference) {
again:
ZEND_ASSERT(out_has_cyclic_reference == NULL || !*out_has_cyclic_reference);
switch (Z_TYPE_P(value)) {
case IS_NULL:
return 8310;
Expand All @@ -207,7 +208,7 @@ static zend_always_inline zend_long teds_strict_hash_inner(zval *value, teds_str
/* Compute the hash if needed, return it */
return ZSTR_HASH(Z_STR_P(value));
case IS_ARRAY:
return teds_strict_hash_array(Z_ARR_P(value), node);
return teds_strict_hash_array(Z_ARR_P(value), node, out_has_cyclic_reference);
case IS_OBJECT:
/* Avoid hash collisions between objects and small numbers. */
return Z_OBJ_HANDLE_P(value) + 31415926;
Expand All @@ -233,9 +234,16 @@ static zend_always_inline uint64_t teds_inline_hash_of_uint64(uint64_t orig) {
return bswap_64(data);
}

/* Generate a hash of top level array keys and top level non-array values of an array containing reference cycles somewhere */
ZEND_COLD zend_never_inline uint64_t teds_strict_hash_cyclic_reference_fallback(zval *value);

/* {{{ Generate a hash */
static zend_always_inline zend_long teds_strict_hash(zval *value) {
uint64_t raw_data = teds_strict_hash_inner(value, NULL);
bool out_has_cyclic_reference = false;
uint64_t raw_data = teds_strict_hash_inner(value, NULL, &out_has_cyclic_reference);
if (UNEXPECTED(out_has_cyclic_reference)) {
raw_data = teds_strict_hash_cyclic_reference_fallback(value);
}
return teds_inline_hash_of_uint64(raw_data);
}
/* }}} */
Expand Down
14 changes: 7 additions & 7 deletions tests/misc/strict_hash_array_recursion.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ array(1) {
[0]=>
*RECURSION*
}
int(-8312374854449992677)
int(-7608636289052184164)
int(-2744736492267492002)
int(-4521862704837642663)
array(1) {
[0]=>
*RECURSION*
}
int(-8312374854449992677)
int(-7608636289052184164)
int(-2744736492267492002)
int(-4521862704837642663)
Recursive arrays hashed in a predictable way
array(2) {
[0]=>
Expand All @@ -58,7 +58,7 @@ array(2) {
int(123)
}
}
int(5570903236879094712)
int(5570903236879094712)
int(-5561530578556069188)
int(-5561530578556069188)
Test deeper recursion level
int(-6092345340150249341)
int(-5561530578556069188)
14 changes: 7 additions & 7 deletions tests/misc/strict_hash_array_recursion_32bit.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ array(1) {
[0]=>
*RECURSION*
}
int(1357152471)
int(944359473)
int(-665309858)
int(-351226279)
array(1) {
[0]=>
*RECURSION*
}
int(1357152471)
int(944359473)
int(-665309858)
int(-351226279)
Recursive arrays hashed in a predictable way
array(2) {
[0]=>
Expand All @@ -58,7 +58,7 @@ array(2) {
int(123)
}
}
int(-2005905482)
int(-2005905482)
int(-1313776964)
int(-1313776964)
Test deeper recursion level
int(-2086164413)
int(-1313776964)
112 changes: 112 additions & 0 deletions tests/misc/strict_hash_cyclic_recursion_fallback.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
--TEST--
Test strict_hash() handling of cyclic recursion edge cases
--SKIPIF--
<?php if (PHP_INT_SIZE < 8) echo "skip 64-bit only\n"; ?>
--FILE--
<?php

$x = [];
$x[1] = &$x;
$y = [1 => $x];

var_dump($x === $y); // true, so we expect the strict_hash to be the same. Only hash the key 1 but don't hash the top level array

// Expected: same hashes are computed
// Observed: Different hashes in Teds 1.2.6
var_dump(Teds\strict_hash($x), Teds\strict_hash($y));
var_dump($x);
echo "Test hash skips hashing sub-arrays if array contains cyclic arrays anywhere\n";
$x[2] = [];
$x[1] = &$x;
var_dump(Teds\strict_hash($x));
$x[2][] = 1;
var_dump(Teds\strict_hash($x));
var_dump($x);
$y[2] = [1];
$y[1] = &$x;
echo "x, y values:\n";
var_dump($x);
var_dump($y);
echo "x === y: ";
var_export($x === $y);
echo "\n";
var_dump(Teds\strict_hash($x));
var_dump(Teds\strict_hash($y));
$other = [];
$other[1] = &$other;
var_dump(new Teds\StrictHashSet([$x, $y, null, &$other, $other, [1 => null]]));
?>
--EXPECT--
bool(true)
int(-5561530578556069188)
int(-5561530578556069188)
array(1) {
[1]=>
*RECURSION*
}
Test hash skips hashing sub-arrays if array contains cyclic arrays anywhere
int(7251907892736144760)
int(7251907892736144760)
array(2) {
[1]=>
*RECURSION*
[2]=>
array(1) {
[0]=>
int(1)
}
}
x, y values:
array(2) {
[1]=>
*RECURSION*
[2]=>
array(1) {
[0]=>
int(1)
}
}
array(2) {
[1]=>
&array(2) {
[1]=>
*RECURSION*
[2]=>
array(1) {
[0]=>
int(1)
}
}
[2]=>
array(1) {
[0]=>
int(1)
}
}
x === y: true
int(7251907892736144760)
int(7251907892736144760)
object(Teds\StrictHashSet)#1 (4) {
[0]=>
array(2) {
[1]=>
*RECURSION*
[2]=>
array(1) {
[0]=>
int(1)
}
}
[1]=>
NULL
[2]=>
array(1) {
[1]=>
*RECURSION*
}
[3]=>
array(1) {
[1]=>
NULL
}
}
Loading

0 comments on commit 6dfb2a3

Please sign in to comment.