Skip to content

Commit

Permalink
feat: handle concrete keccaks concretely (WIP) (a16z#391)
Browse files Browse the repository at this point in the history
Co-authored-by: Daejun Park <[email protected]>
  • Loading branch information
0xkarmacoma and daejunpark authored Oct 14, 2024
1 parent eed8f93 commit 9a83860
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 19 deletions.
2 changes: 2 additions & 0 deletions packages/halmos-builder/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
python3-pip \
wget \
python3-venv \
build-essential \
python3.12-dev \
&& rm -rf /var/lib/apt/lists/*

# Install Foundry
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies = [
"sortedcontainers>=2.4.0",
"toml>=0.10.2",
"z3-solver==4.12.6.0",
"eth_hash[pysha3]>=0.7.0"
]
dynamic = ["version"]

Expand Down
85 changes: 66 additions & 19 deletions src/halmos/sevm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
TypeVar,
)

from eth_hash.auto import keccak
from z3 import (
UGE,
UGT,
Expand Down Expand Up @@ -87,6 +88,7 @@
assert_address,
assert_bv,
assert_uint256,
bv_value_to_bytes,
byte_length,
bytes_to_bv_value,
con,
Expand All @@ -98,6 +100,7 @@
f_ecrecover,
f_sha3_256_name,
f_sha3_512_name,
f_sha3_empty,
f_sha3_name,
hexify,
int_of,
Expand Down Expand Up @@ -129,7 +132,7 @@
Steps = dict[int, dict[str, Any]] # execution tree

EMPTY_BYTES = ByteVec()
EMPTY_KECCAK = con(0xC5D2460186F7233C927E7DB2DCC703C0E500B653CA82273B7BFAD8045D85A470)
EMPTY_KECCAK = 0xC5D2460186F7233C927E7DB2DCC703C0E500B653CA82273B7BFAD8045D85A470
ZERO, ONE = con(0), con(1)
MAX_CALL_DEPTH = 1024

Expand Down Expand Up @@ -1111,29 +1114,68 @@ def sha3(self) -> None:
sha3_image = self.sha3_data(data)
self.st.push(sha3_image)

def sha3_data(self, data: Bytes) -> Word:
def sha3_hash(self, data: Bytes) -> bytes | None:
"""return concrete bytes if the hash can be evaluated, otherwise None"""

size = byte_length(data)

if size > 0:
if isinstance(data, bytes):
data = bytes_to_bv_value(data)
if size == 0:
return EMPTY_KECCAK.to_bytes(32, byteorder="big")

f_sha3 = Function(
f_sha3_name(size * 8), BitVecSorts[size * 8], BitVecSort256
)
sha3_expr = f_sha3(data)
else:
sha3_expr = EMPTY_KECCAK
if isinstance(data, bytes):
return keccak(data)

if is_bv_value(data):
return keccak(bv_value_to_bytes(data))

if isinstance(data, int):
# the problem here is that we're not sure about the bit-width of the int
# this is not supposed to happen, so just log and move on
debug(f"eval_sha3 received unexpected int value ({data})")

return None

def sha3_expr(self, data: Bytes) -> Word:
"""return a symbolic sha3 expression, e.g. f_sha3_256(data)"""

# assume hash values are sufficiently smaller than the uint max
self.path.append(ULE(sha3_expr, 2**256 - 2**64))
bitsize = byte_length(data) * 8
if bitsize == 0:
return f_sha3_empty

if isinstance(data, bytes):
data = bytes_to_bv_value(data)

fname = f_sha3_name(bitsize)
f_sha3 = Function(fname, BitVecSorts[bitsize], BitVecSort256)
return f_sha3(data)

def sha3_data(self, data: Bytes) -> Word:
sha3_expr = self.sha3_expr(data)
sha3_hash = self.sha3_hash(data)

if sha3_hash is not None:
self.path.append(sha3_expr == bytes_to_bv_value(sha3_hash))

# ensure the hash value is within the safe range assumed below
sha3_hash_int = int.from_bytes(sha3_hash, "big")
if sha3_hash_int == 0 or sha3_hash_int > 2**256 - 2**64:
error_msg = f"hash value outside expected range: {sha3_hash.hex()}"
raise HalmosException(error_msg)

else:
# assume hash values are non-zero and sufficiently small to prevent overflow when adding reasonable offsets
self.path.append(sha3_expr != ZERO)
self.path.append(ULE(sha3_expr, 2**256 - 2**64))

# assume no hash collision
self.assume_sha3_distinct(sha3_expr)

# handle create2 hash
if size == 85 and eq(extract_bytes(data, 0, 1), con(0xFF, size_bits=8)):
return con(create2_magic_address + self.sha3s[sha3_expr])
size = byte_length(data)
if size == 85:
first_byte = unbox_int(ByteVec(data).get_byte(0))
if isinstance(first_byte, int) and first_byte == 0xFF:
return con(create2_magic_address + self.sha3s[sha3_expr])
else:
return sha3_expr

Expand All @@ -1159,7 +1201,6 @@ def assume_sha3_distinct(self, sha3_expr) -> None:
# inputs have different sizes: assume the outputs are different
self.path.append(sha3_expr != prev_sha3_expr)

self.path.append(sha3_expr != ZERO)
self.sha3s[sha3_expr] = len(self.sha3s)

def new_gas_id(self) -> int:
Expand Down Expand Up @@ -3019,9 +3060,15 @@ def finalize(ex: Exec):
elif EVM.PUSH1 <= opcode <= EVM.PUSH32:
val = unbox_int(insn.operand)
if isinstance(val, int):
if opcode == EVM.PUSH32 and val in sha3_inv:
# restore precomputed hashes
ex.st.push(ex.sha3_data(con(sha3_inv[val])))
if opcode == EVM.PUSH32:
if val in sha3_inv:
# restore precomputed hashes
ex.st.push(ex.sha3_data(con(sha3_inv[val])))
# TODO: support more commonly used concrete keccak values
elif val == EMPTY_KECCAK:
ex.st.push(ex.sha3_data(b""))
else:
ex.st.push(val)
else:
ex.st.push(val)
else:
Expand Down
8 changes: 8 additions & 0 deletions src/halmos/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Z3_OP_BADD,
Z3_OP_CONCAT,
Z3_OP_ULEQ,
BitVec,
BitVecNumRef,
BitVecRef,
BitVecSort,
Expand Down Expand Up @@ -108,9 +109,16 @@ def f_sha3_name(bitsize: int) -> str:
return f"f_sha3_{bitsize}"


f_sha3_0_name = f_sha3_name(0)
f_sha3_256_name = f_sha3_name(256)
f_sha3_512_name = f_sha3_name(512)

# NOTE: another way to encode the empty keccak is to use 0-ary function like:
# f_sha3_empty = Function(f_sha3_0_name, BitVecSort256)
# then `f_sha3_empty()` is equivalent to `BitVec(f_sha3_0_name, BitVecSort256)`.
# in both cases, decl() == f_sha3_0_name, and num_args() == 0.
f_sha3_empty = BitVec(f_sha3_0_name, BitVecSort256)


def uid() -> str:
return uuid.uuid4().hex[:7]
Expand Down
45 changes: 45 additions & 0 deletions tests/expected/all.json
Original file line number Diff line number Diff line change
Expand Up @@ -2279,6 +2279,33 @@
}
],
"test/Sha3.t.sol:Sha3Test": [
{
"name": "check_concrete_keccak_does_not_split_paths()",
"exitcode": 0,
"num_models": 0,
"models": null,
"num_paths": null,
"time": null,
"num_bounded_loops": null
},
{
"name": "check_concrete_keccak_memory_lookup()",
"exitcode": 0,
"num_models": 0,
"models": null,
"num_paths": null,
"time": null,
"num_bounded_loops": null
},
{
"name": "check_empty_hash_value()",
"exitcode": 0,
"num_models": 0,
"models": null,
"num_paths": null,
"time": null,
"num_bounded_loops": null
},
{
"name": "check_hash()",
"exitcode": 0,
Expand All @@ -2288,6 +2315,15 @@
"time": null,
"num_bounded_loops": null
},
{
"name": "check_hash_collision_with_empty()",
"exitcode": 0,
"num_models": 0,
"models": null,
"num_paths": null,
"time": null,
"num_bounded_loops": null
},
{
"name": "check_no_hash_collision_assumption()",
"exitcode": 0,
Expand All @@ -2296,6 +2332,15 @@
"num_paths": null,
"time": null,
"num_bounded_loops": null
},
{
"name": "check_only_empty_bytes_matches_empty_hash(bytes)",
"exitcode": 0,
"num_models": 0,
"models": null,
"num_paths": null,
"time": null,
"num_bounded_loops": null
}
],
"test/SignExtend.t.sol:SignExtendTest": [
Expand Down
50 changes: 50 additions & 0 deletions tests/regression/test/Sha3.t.sol
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import "forge-std/Test.sol";
import {SymTest} from "halmos-cheatcodes/SymTest.sol";

contract Sha3Test is Test, SymTest {
// 0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470
bytes32 constant EMPTY_HASH = keccak256("");

function check_hash() public {
_assert_eq("", "");
_assert_eq("1", "1");
Expand All @@ -26,6 +29,53 @@ contract Sha3Test is Test, SymTest {
assert(data32_1[0] == data32_2[0]);
}

function check_hash_collision_with_empty() public {
bytes memory data = svm.createBytes(1, "data");
assertNotEq(keccak256(data), keccak256(""));
}

function check_empty_hash_value() public {
assertEq(keccak256(""), EMPTY_HASH);

// TODO: uncomment when we support empty bytes
// bytes memory data = svm.createBytes(0, "data");
// assertEq(keccak256(data), EMPTY_HASH);
}

function check_only_empty_bytes_matches_empty_hash(bytes memory data) public {
// empty hash value
vm.assume(keccak256(data) == EMPTY_HASH);
assertEq(data.length, 0);
}

function check_concrete_keccak_does_not_split_paths() external {
bytes32 hash = keccak256("data");
uint256 bit = uint256(hash) & 1;

// this tests that the hash value is concrete
// if it was symbolic, we would split paths and fail in the even case
// (keccak("data") is odd)
if (uint256(hash) & 1 == 0) {
console2.log("even");
assert(false);
} else {
console2.log("odd");
assert(true);
}
}

function check_concrete_keccak_memory_lookup() external {
bytes32 hash = keccak256(abi.encodePacked(uint256(3)));
uint256 bit = uint256(hash) & 1;

string[] memory x = new string[](2);
x[0] = "even";
x[1] = "odd";

// checks that we don't fail with symbolic memory offset error
console2.log(x[bit]);
}

function _assert_eq(bytes memory data1, bytes memory data2) internal {
assert(keccak256(data1) == keccak256(data2));
}
Expand Down

0 comments on commit 9a83860

Please sign in to comment.