Compute jumpdests using the KCFGSemantics.custom_step heuristic (#2441)

* kevm.py: implement custom_step heuristic * Set Version: 1.0.567 * Set Version: 1.0.568 * update claims * Set Version: 1.0.569 * Set Version: 1.0.578 * test/specs/kontrol/: reorder Sets in claims * tests/specs/kontrol: update claims * tests/specs/kontrol: update remaining claims * Set Version: 1.0.579 * Update kevm-pyk/src/kevm_pyk/kevm.py Co-authored-by: Petar Maksimović <[email protected]> * kevm.py: add more tests * Set Version: 1.0.580 * test_kevm.py: apply review suggestions * Set Version: 1.0.581 --------- Co-authored-by: devops <[email protected]> Co-authored-by: Petar Maksimović <[email protected]>
runtimeverification · Jun 1, 2024 · 088be1c · 088be1c
1 parent e8860f2
commit 088be1c
Show file tree

Hide file tree

Showing 19 changed files with 191 additions and 34 deletions.
diff --git a/kevm-pyk/pyproject.toml b/kevm-pyk/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "kevm-pyk"
-version = "1.0.580"
+version = "1.0.581"
 description = ""
 authors = [
     "Runtime Verification, Inc. <[email protected]>",

diff --git a/kevm-pyk/src/kevm_pyk/__init__.py b/kevm-pyk/src/kevm_pyk/__init__.py
@@ -5,4 +5,4 @@
 if TYPE_CHECKING:
     from typing import Final
 
-VERSION: Final = '1.0.580'
+VERSION: Final = '1.0.581'
diff --git a/kevm-pyk/src/kevm_pyk/kevm.py b/kevm-pyk/src/kevm_pyk/kevm.py
@@ -17,13 +17,15 @@
     build_cons,
     top_down,
 )
-from pyk.kast.manip import abstract_term_safely, flatten_label
+from pyk.kast.manip import abstract_term_safely, flatten_label, set_cell
 from pyk.kast.pretty import paren
+from pyk.kcfg.kcfg import Step
 from pyk.kcfg.semantics import KCFGSemantics
 from pyk.kcfg.show import NodePrinter
 from pyk.ktool.kprove import KProve
 from pyk.ktool.krun import KRun
 from pyk.prelude.bytes import BYTES, pretty_bytes
+from pyk.prelude.collections import set_of
 from pyk.prelude.kbool import notBool
 from pyk.prelude.kint import INT, eqInt, intToken, ltInt
 from pyk.prelude.ml import mlEqualsFalse, mlEqualsTrue
@@ -45,7 +47,6 @@
 
 _LOGGER: Final = logging.getLogger(__name__)
 
-
 # KEVM class
 
 
@@ -149,13 +150,31 @@ def _replace(term: KInner) -> KInner:
         return CTerm(config=bottom_up(_replace, cterm.config), constraints=cterm.constraints)
 
     def custom_step(self, cterm: CTerm) -> KCFGExtendResult | None:
+        """Given a CTerm, update the JUMPDESTS_CELL and PROGRAM_CELL if the rule 'EVM.program.load' is at the top of the K_CELL.
+
+        :param cterm: CTerm of a proof node.
+        :type cterm: CTerm
+        :return: If the K_CELL matches the load_pattern, a Step with depth 1 is returned together with the new configuration, also registering that the `EVM.program.load` rule has been applied. Otherwise, None is returned.
+        :rtype: KCFGExtendResult | None
+        """
+        load_pattern = KSequence(
+            [KApply('#loadProgram__EVM_KItem_Bytes', KVariable('###BYTECODE')), KVariable('###CONTINUATION')]
+        )
+        subst = load_pattern.match(cterm.cell('K_CELL'))
+        if subst is not None:
+            bytecode_sections = flatten_label('_+Bytes__BYTES-HOOKED_Bytes_Bytes_Bytes', subst['###BYTECODE'])
+            jumpdests_set = compute_jumpdests(bytecode_sections)
+            new_cterm = CTerm.from_kast(set_cell(cterm.kast, 'JUMPDESTS_CELL', jumpdests_set))
+            new_cterm = CTerm.from_kast(set_cell(new_cterm.kast, 'PROGRAM_CELL', subst['###BYTECODE']))
+            new_cterm = CTerm.from_kast(set_cell(new_cterm.kast, 'K_CELL', KSequence(subst['###CONTINUATION'])))
+            return Step(new_cterm, 1, (), ['EVM.program.load'], cut=True)
         return None
 
     @staticmethod
     def cut_point_rules(
         break_on_jumpi: bool, break_on_calls: bool, break_on_storage: bool, break_on_basic_blocks: bool
     ) -> list[str]:
-        cut_point_rules = []
+        cut_point_rules = ['EVM.program.load']
         if break_on_jumpi:
             cut_point_rules.extend(['EVM.jumpi.true', 'EVM.jumpi.false'])
         if break_on_basic_blocks:
@@ -548,8 +567,8 @@ def bytes_empty() -> KApply:
         return KApply('.Bytes_BYTES-HOOKED_Bytes')
 
     @staticmethod
-    def buf(width: int, v: KInner) -> KApply:
-        return KApply('#buf(_,_)_BUF-SYNTAX_Bytes_Int_Int', [intToken(width), v])
+    def buf(width: KInner, v: KInner) -> KApply:
+        return KApply('#buf(_,_)_BUF-SYNTAX_Bytes_Int_Int', [width, v])
 
     @staticmethod
     def intlist(ints: list[KInner]) -> KApply:
@@ -644,3 +663,40 @@ def kevm_node_printer(kevm: KEVM, proof: APRProof) -> NodePrinter:
     if type(proof) is APRProof:
         return KEVMAPRNodePrinter(kevm, proof)
     raise ValueError(f'Cannot build NodePrinter for proof type: {type(proof)}')
+
+
+def compute_jumpdests(sections: list[KInner]) -> KInner:
+    """Analyzes a list of KInner objects representing sections of bytecode to compute jump destinations.
+
+    :param sections: A section is expected to be either a concrete sequence of bytes (Bytes) or a symbolic buffer of concrete width (#buf(WIDTH, _)).
+    :return: This function iterates over each section, appending the jump destinations (0x5B) from the bytecode in a KAst Set.
+    :rtype: KInner
+    """
+    offset = 0
+    jumpdests = []
+    for s in sections:
+        if type(s) is KApply and s.label == KLabel('#buf(_,_)_BUF-SYNTAX_Bytes_Int_Int'):
+            width_token = s.args[0]
+            assert type(width_token) is KToken
+            offset += int(width_token.token)
+        elif type(s) is KToken and s.sort == BYTES:
+            bytecode = pretty_bytes(s)
+            jumpdests.extend(_process_jumpdests(bytecode, offset))
+            offset += len(bytecode)
+        else:
+            raise ValueError(f'Cannot compute jumpdests for type: {type(s)}')
+
+    return set_of(jumpdests)
+
+
+def _process_jumpdests(bytecode: bytes, offset: int) -> list[KToken]:
+    """Computes the location of JUMPDEST opcodes from a given bytecode.
+
+    :param bytecode: The bytecode of the contract as bytes.
+    :type bytecode: bytes
+    :param offset: The offset to add to each position index to align it with the broader code structure.
+    :type offset: int
+    :return:  A list of intToken instances representing the positions of all found jump destinations in the bytecode adjusted by the offset.
+    :rtype: list[KToken]
+    """
+    return [intToken(offset + i) for i, byte in enumerate(bytecode) if byte == 0x5B]
diff --git a/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/evm.md b/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/evm.md
@@ -1350,7 +1350,8 @@ The various `CALL*` (and other inter-contract control flow) operations will be d
 
     syntax KItem ::= "#loadProgram" Bytes
  // -------------------------------------
-    rule <k> #loadProgram BYTES => .K ... </k>
+    rule [program.load]:
+         <k> #loadProgram BYTES => .K ... </k>
          <program> _ => BYTES </program>
          <jumpDests> _ => #computeValidJumpDests(BYTES) </jumpDests>
 

diff --git a/kevm-pyk/src/tests/unit/test_kevm.py b/kevm-pyk/src/tests/unit/test_kevm.py
@@ -8,26 +8,28 @@
     from typing import Final
     from pyk.kast.inner import KInner
 
-from pyk.kast.inner import KApply, KToken
+from pyk.kast.inner import KApply, KToken, KVariable
+from pyk.prelude.collections import set_of
+from pyk.prelude.utils import token
 
-from kevm_pyk.kevm import KEVM
+from kevm_pyk.kevm import KEVM, compute_jumpdests
 
 TEST_DATA: Final = [
-    ('single-ktoken', KToken('0', 'Int'), KToken('0x0', 'Int')),
-    ('bytes-to-hex-empty', KApply('<k>', [KToken('b""', 'Bytes')]), KApply('<k>', KToken('0x', 'Bytes'))),
+    ('single-ktoken', token(0), KToken('0x0', 'Int')),
+    ('bytes-to-hex-empty', KApply('<k>', [token(b'')]), KApply('<k>', KToken('0x', 'Bytes'))),
     (
         'bytes-to-hex-nonempty',
-        KApply('<k>', [KToken('b"\\xa6\\xb9c\\x9d"', 'Bytes')]),
+        KApply('<k>', [token(b'\xa6\xb9c\x9d')]),
         KApply('<k>', KToken('0xa6b9639d', 'Bytes')),
     ),
     (
         'kast-to-hex',
         KApply(
             '<generatedTop>',
-            KApply('<coinbase>', KToken('728815563385977040452943777879061427756277306518', 'Int')),
-            KApply('<pc>', KToken('100', 'Int')),
-            KApply('<output>', KToken('b"\x00\x00\x00\x3c\x60\xf5"', 'Bytes')),
-            KApply('<program>', KToken('b"\xcc\xff\xff\xfac\x60\xf5"', 'Bytes')),
+            KApply('<coinbase>', token(728815563385977040452943777879061427756277306518)),
+            KApply('<pc>', token(100)),
+            KApply('<output>', token(b'\x00\x00\x00\x3c\x60\xf5')),
+            KApply('<program>', token(b'\xcc\xff\xff\xfac\x60\xf5')),
         ),
         KApply(
             '<generatedTop>',
@@ -50,3 +52,71 @@ def test_kinner_to_hex(test_id: str, input: KInner, result: KInner) -> None:
     to_hex = KEVM.kinner_to_hex(input)
     # Then
     assert to_hex == result
+
+
+JUMPDESTS_DATA: Final = [
+    ('empty', [], set_of([])),
+    (
+        'with_buf',
+        [
+            token(
+                b'`\xa0`@R4\x80\x15a\x00\x10W`\x00\x80\xfd[P`@Qa\x01\x038\x03\x80a\x01\x03\x839\x81\x01`@\x81\x90Ra\x00/\x91a\x007V[`\x80Ra\x00PV[`\x00` \x82\x84\x03\x12\x15a\x00IW`\x00\x80\xfd[PQ\x91\x90PV[`\x80Q`\x9ba\x00h`\x009`\x00`1\x01R`\x9b`\x00\xf3\xfe`\x80`@R4\x80\x15`\x0fW`\x00\x80\xfd[P`\x046\x10`(W`\x005`\xe0\x1c\x80c\xa5m\xfeJ\x14`-W[`\x00\x80\xfd[`S\x7f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x81V[`@Q\x90\x81R` \x01`@Q\x80\x91\x03\x90\xf3\xfe\xa2dipfsX\"\x12 \xeb\xb3\x99\x11\xbe\x13L\xdeC\xcc\x01/\x849\xe7\xc5\x9aC\xf1\x0f\xf4\xdfE\x14Z\x80\x90(\xeb\xda\xee\xeadsolcC\x00\x08\r\x003'
+            ),
+            KEVM.buf(token(32), KVariable('VV0_x', 'Int')),
+        ],
+        set_of(
+            [
+                token(16),
+                token(47),
+                token(55),
+                token(73),
+                token(80),
+                token(119),
+                token(144),
+                token(149),
+                token(187),
+            ]
+        ),
+    ),
+    (
+        'single_jumpdest',
+        [
+            token(b'['),
+        ],
+        set_of([token(0)]),
+    ),
+    (
+        'multiple_bytes',
+        [
+            token(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00['),
+            token(b'\x00\x00[\x00\x00'),
+            token(b'\x00['),
+        ],
+        set_of([token(16), token(19), token(23)]),
+    ),
+    (
+        'multiple_bufs',
+        [
+            KEVM.buf(token(32), KVariable('VV0_a', 'Int')),
+            token(b'\x00\x00'),
+            KEVM.buf(token(32), KVariable('VV0_x', 'Int')),
+            token(b'\x00\x00[\x00\x00'),
+            KEVM.buf(token(32), KVariable('VV0_y', 'Int')),
+            token(b'\x00[[[\x00[\x00\x00'),
+        ],
+        set_of([token(68), token(104), token(105), token(106), token(108)]),
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    'test_id,input,expected',
+    JUMPDESTS_DATA,
+    ids=[test_id for test_id, *_ in JUMPDESTS_DATA],
+)
+def test_process_jumpdests(test_id: str, input: list[KInner], expected: KInner) -> None:
+    # When
+    result = compute_jumpdests(input)
+
+    # Then
+    assert result == expected
diff --git a/package/version b/package/version
@@ -1 +1 @@
-1.0.580
+1.0.581