From c9d8f5b8e8548b8af88581401a73fb1388040b14 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 25 Dec 2024 01:50:40 +0200 Subject: [PATCH 1/2] fix[venom]: fix `MakeSSA` with existing phis (#4423) This commit improves the `MakeSSA` pass to handle incoming `phi` instructions. Following the implementation of the venom parser it is possible to parse code with existing `phi` instructions in the code. The old code was expecting the `phi` instructions to have been self-placed and in "degenerate form" of output == all branch arguments. The new code does not make this assumption. This commit additionally: - refactors the existing `make_ssa` tests to use new test machinery - adds a phi parsing test (does not test the new code in this commit since it does not run passes, but does make sure we can at least parse phis) - expands the venom round-trip tests to check that we can both a) run venom passes on parsed venom, and b) bytecode generation from round-tripping venom produced by the vyper frontend is equivalent to bytecode generation from the regular pipeline (directly from vyper source code) --------- Co-authored-by: Charles Cooper --- tests/functional/venom/parser/test_parsing.py | 121 +++++++++++++++++- tests/functional/venom/test_venom_repr.py | 86 ++++++++++++- tests/unit/compiler/venom/test_make_ssa.py | 88 +++++++------ vyper/venom/basicblock.py | 17 ++- vyper/venom/passes/make_ssa.py | 8 +- 5 files changed, 263 insertions(+), 57 deletions(-) diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index f18a51fe76..bd536a8cfa 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -1,4 +1,4 @@ -from tests.venom_utils import assert_ctx_eq +from tests.venom_utils import assert_bb_eq, assert_ctx_eq from vyper.venom.basicblock import IRBasicBlock, IRLabel, IRLiteral, IRVariable from vyper.venom.context import DataItem, DataSection, IRContext from vyper.venom.function import IRFunction @@ -231,3 +231,122 @@ def test_multi_function_and_data(): ] assert_ctx_eq(parsed_ctx, expected_ctx) + + +def test_phis(): + # @external + # def _loop() -> uint256: + # res: uint256 = 9 + # for i: uint256 in range(res, bound=10): + # res = res + i + # return res + source = """ + function __main_entry { + __main_entry: ; IN=[] OUT=[fallback, 1_then] => {} + %27 = 0 + %1 = calldataload %27 + %28 = %1 + %29 = 224 + %2 = shr %29, %28 + %31 = %2 + %30 = 1729138561 + %4 = xor %30, %31 + %32 = %4 + jnz %32, @fallback, @1_then + ; (__main_entry) + + + 1_then: ; IN=[__main_entry] OUT=[4_condition] => {%11, %var8_0} + %6 = callvalue + %33 = %6 + %7 = iszero %33 + %34 = %7 + assert %34 + %var8_0 = 9 + %11 = 0 + nop + jmp @4_condition + ; (__main_entry) + + + 4_condition: ; IN=[1_then, 5_body] OUT=[5_body, 7_exit] => {%11:3, %var8_0:2} + %var8_0:2 = phi @1_then, %var8_0, @5_body, %var8_0:3 + %11:3 = phi @1_then, %11, @5_body, %11:4 + %35 = %11:3 + %36 = 9 + %15 = xor %36, %35 + %37 = %15 + jnz %37, @5_body, @7_exit + ; (__main_entry) + + + 5_body: ; IN=[4_condition] OUT=[4_condition] => {%11:4, %var8_0:3} + %38 = %11:3 + %39 = %var8_0:2 + %22 = add %39, %38 + %41 = %22 + %40 = %var8_0:2 + %24 = gt %40, %41 + %42 = %24 + %25 = iszero %42 + %43 = %25 + assert %43 + %var8_0:3 = %22 + %44 = %11:3 + %45 = 1 + %11:4 = add %45, %44 + jmp @4_condition + ; (__main_entry) + + + 7_exit: ; IN=[4_condition] OUT=[] => {} + %46 = %var8_0:2 + %47 = 64 + mstore %47, %46 + %48 = 32 + %49 = 64 + return %49, %48 + ; (__main_entry) + + + fallback: ; IN=[__main_entry] OUT=[] => {} + %50 = 0 + %51 = 0 + revert %51, %50 + stop + ; (__main_entry) + } ; close function __main_entry + """ + ctx = parse_venom(source) + + expected_ctx = IRContext() + expected_ctx.add_function(entry_fn := IRFunction(IRLabel("__main_entry"))) + + expect_bb = IRBasicBlock(IRLabel("4_condition"), entry_fn) + entry_fn.append_basic_block(expect_bb) + + expect_bb.append_instruction( + "phi", + IRLabel("1_then"), + IRVariable("%var8_0"), + IRLabel("5_body"), + IRVariable("%var8_0:3"), + ret=IRVariable("var8_0:2"), + ) + expect_bb.append_instruction( + "phi", + IRLabel("1_then"), + IRVariable("%11"), + IRLabel("5_body"), + IRVariable("%11:4"), + ret=IRVariable("11:3"), + ) + expect_bb.append_instruction("store", IRVariable("11:3"), ret=IRVariable("%35")) + expect_bb.append_instruction("store", IRLiteral(9), ret=IRVariable("%36")) + expect_bb.append_instruction("xor", IRVariable("%35"), IRVariable("%36"), ret=IRVariable("%15")) + expect_bb.append_instruction("store", IRVariable("%15"), ret=IRVariable("%37")) + expect_bb.append_instruction("jnz", IRVariable("%37"), IRLabel("5_body"), IRLabel("7_exit")) + # other basic blocks omitted for brevity + + parsed_fn = next(iter(ctx.functions.values())) + assert_bb_eq(parsed_fn.get_basic_block(expect_bb.label.name), expect_bb) diff --git a/tests/functional/venom/test_venom_repr.py b/tests/functional/venom/test_venom_repr.py index c25ce381d8..5136672a03 100644 --- a/tests/functional/venom/test_venom_repr.py +++ b/tests/functional/venom/test_venom_repr.py @@ -1,9 +1,13 @@ +import copy import glob +import textwrap import pytest from tests.venom_utils import assert_ctx_eq, parse_venom from vyper.compiler import compile_code +from vyper.compiler.phases import generate_bytecode +from vyper.venom import generate_assembly_experimental, run_passes_on from vyper.venom.context import IRContext """ @@ -16,15 +20,95 @@ def get_example_vy_filenames(): @pytest.mark.parametrize("vy_filename", get_example_vy_filenames()) -def test_round_trip(vy_filename, optimize, request): +def test_round_trip_examples(vy_filename, optimize, compiler_settings): + """ + Check all examples round trip + """ path = f"examples/{vy_filename}" with open(path) as f: vyper_source = f.read() + _round_trip_helper(vyper_source, optimize, compiler_settings) + + +# pure vyper sources +vyper_sources = [ + """ + @external + def _loop() -> uint256: + res: uint256 = 9 + for i: uint256 in range(res, bound=10): + res = res + i + return res + """ +] + + +@pytest.mark.parametrize("vyper_source", vyper_sources) +def test_round_trip_sources(vyper_source, optimize, compiler_settings): + """ + Test vyper_sources round trip + """ + vyper_source = textwrap.dedent(vyper_source) + _round_trip_helper(vyper_source, optimize, compiler_settings) + + +def _round_trip_helper(vyper_source, optimize, compiler_settings): + # helper function to test venom round-tripping thru the parser + # use two helpers because run_passes_on and + # generate_assembly_experimental are both destructive (mutating) on + # the IRContext + _helper1(vyper_source, optimize) + _helper2(vyper_source, optimize, compiler_settings) + + +def _helper1(vyper_source, optimize): + """ + Check that we are able to run passes on the round-tripped venom code + and that it is valid (generates bytecode) + """ + # note: compiling any later stage than bb_runtime like `asm` or + # `bytecode` modifies the bb_runtime data structure in place and results + # in normalization of the venom cfg (which breaks again make_ssa) out = compile_code(vyper_source, output_formats=["bb_runtime"]) + + bb_runtime = out["bb_runtime"] + venom_code = IRContext.__repr__(bb_runtime) + + ctx = parse_venom(venom_code) + + assert_ctx_eq(bb_runtime, ctx) + + # check it's valid to run venom passes+analyses + # (note this breaks bytecode equality, in the future we should + # test that separately) + run_passes_on(ctx, optimize) + + # test we can generate assembly+bytecode + asm = generate_assembly_experimental(ctx) + generate_bytecode(asm, compiler_metadata=None) + + +def _helper2(vyper_source, optimize, compiler_settings): + """ + Check that we can compile to bytecode, and without running venom passes, + that the output bytecode is equal to going through the normal vyper pipeline + """ + settings = copy.copy(compiler_settings) + # bytecode equivalence only makes sense if we use venom pipeline + settings.experimental_codegen = True + + out = compile_code(vyper_source, settings=settings, output_formats=["bb_runtime"]) bb_runtime = out["bb_runtime"] venom_code = IRContext.__repr__(bb_runtime) ctx = parse_venom(venom_code) assert_ctx_eq(bb_runtime, ctx) + + # test we can generate assembly+bytecode + asm = generate_assembly_experimental(ctx, optimize=optimize) + bytecode = generate_bytecode(asm, compiler_metadata=None) + + out = compile_code(vyper_source, settings=settings, output_formats=["bytecode_runtime"]) + assert "0x" + bytecode.hex() == out["bytecode_runtime"] diff --git a/tests/unit/compiler/venom/test_make_ssa.py b/tests/unit/compiler/venom/test_make_ssa.py index aa3fead6bf..7f6b2c0cba 100644 --- a/tests/unit/compiler/venom/test_make_ssa.py +++ b/tests/unit/compiler/venom/test_make_ssa.py @@ -1,48 +1,52 @@ +from tests.venom_utils import assert_ctx_eq, parse_venom from vyper.venom.analysis import IRAnalysesCache -from vyper.venom.basicblock import IRBasicBlock, IRLabel -from vyper.venom.context import IRContext from vyper.venom.passes import MakeSSA -def test_phi_case(): - ctx = IRContext() - fn = ctx.create_function("_global") - - bb = fn.get_basic_block() - - bb_cont = IRBasicBlock(IRLabel("condition"), fn) - bb_then = IRBasicBlock(IRLabel("then"), fn) - bb_else = IRBasicBlock(IRLabel("else"), fn) - bb_if_exit = IRBasicBlock(IRLabel("if_exit"), fn) - fn.append_basic_block(bb_cont) - fn.append_basic_block(bb_then) - fn.append_basic_block(bb_else) - fn.append_basic_block(bb_if_exit) - - v = bb.append_instruction("mload", 64) - bb_cont.append_instruction("jnz", v, bb_then.label, bb_else.label) - - bb_if_exit.append_instruction("add", v, 1, ret=v) - bb_if_exit.append_instruction("jmp", bb_cont.label) +def _check_pre_post(pre, post): + ctx = parse_venom(pre) + for fn in ctx.functions.values(): + ac = IRAnalysesCache(fn) + MakeSSA(ac, fn).run_pass() + assert_ctx_eq(ctx, parse_venom(post)) - bb_then.append_instruction("assert", bb_then.append_instruction("mload", 96)) - bb_then.append_instruction("jmp", bb_if_exit.label) - bb_else.append_instruction("jmp", bb_if_exit.label) - bb.append_instruction("jmp", bb_cont.label) - - ac = IRAnalysesCache(fn) - MakeSSA(ac, fn).run_pass() - - condition_block = fn.get_basic_block("condition") - assert len(condition_block.instructions) == 2 - - phi_inst = condition_block.instructions[0] - assert phi_inst.opcode == "phi" - assert phi_inst.operands[0].name == "_global" - assert phi_inst.operands[1].name == "%1" - assert phi_inst.operands[2].name == "if_exit" - assert phi_inst.operands[3].name == "%1" - assert phi_inst.output.name == "%1" - assert phi_inst.output.value != phi_inst.operands[1].value - assert phi_inst.output.value != phi_inst.operands[3].value +def test_phi_case(): + pre = """ + function loop { + main: + %v = mload 64 + jmp @test + test: + jnz %v, @then, @else + then: + %t = mload 96 + assert %t + jmp @if_exit + else: + jmp @if_exit + if_exit: + %v = add %v, 1 + jmp @test + } + """ + post = """ + function loop { + main: + %v = mload 64 + jmp @test + test: + %v:1 = phi @main, %v, @if_exit, %v:2 + jnz %v:1, @then, @else + then: + %t = mload 96 + assert %t + jmp @if_exit + else: + jmp @if_exit + if_exit: + %v:2 = add %v:1, 1 + jmp @test + } + """ + _check_pre_post(pre, post) diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index b0f0b00341..4c75c67700 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -117,7 +117,7 @@ class IROperand: """ value: Any - _hash: Optional[int] + _hash: Optional[int] = None def __init__(self, value: Any) -> None: self.value = value @@ -149,9 +149,8 @@ class IRLiteral(IROperand): value: int def __init__(self, value: int) -> None: - super().__init__(value) assert isinstance(value, int), "value must be an int" - self.value = value + super().__init__(value) class IRVariable(IROperand): @@ -163,17 +162,17 @@ class IRVariable(IROperand): version: Optional[int] def __init__(self, name: str, version: int = 0) -> None: - super().__init__(name) assert isinstance(name, str) - assert isinstance(version, int | None) + # TODO: allow version to be None + assert isinstance(version, int) if not name.startswith("%"): name = f"%{name}" self._name = name self.version = version + value = name if version > 0: - self.value = f"{name}:{version}" - else: - self.value = name + value = f"{name}:{version}" + super().__init__(value) @property def name(self) -> str: @@ -193,8 +192,8 @@ class IRLabel(IROperand): def __init__(self, value: str, is_symbol: bool = False) -> None: assert isinstance(value, str), f"not a str: {value} ({type(value)})" assert len(value) > 0 - super().__init__(value) self.is_symbol = is_symbol + super().__init__(value) _IS_IDENTIFIER = re.compile("[0-9a-zA-Z_]*") diff --git a/vyper/venom/passes/make_ssa.py b/vyper/venom/passes/make_ssa.py index 56d3e1b7d3..ee013e0f1d 100644 --- a/vyper/venom/passes/make_ssa.py +++ b/vyper/venom/passes/make_ssa.py @@ -35,8 +35,8 @@ def _add_phi_nodes(self): Add phi nodes to the function. """ self._compute_defs() - work = {var: 0 for var in self.dom.dfs_walk} - has_already = {var: 0 for var in self.dom.dfs_walk} + work = {bb: 0 for bb in self.dom.dfs_walk} + has_already = {bb: 0 for bb in self.dom.dfs_walk} i = 0 # Iterate over all variables @@ -96,7 +96,6 @@ def _rename_vars(self, basic_block: IRBasicBlock): self.var_name_counters[v_name] = i + 1 inst.output = IRVariable(v_name, version=i) - # note - after previous line, inst.output.name != v_name outs.append(inst.output.name) for bb in basic_block.cfg_out: @@ -106,8 +105,9 @@ def _rename_vars(self, basic_block: IRBasicBlock): assert inst.output is not None, "Phi instruction without output" for i, op in enumerate(inst.operands): if op == basic_block.label: + var = inst.operands[i + 1] inst.operands[i + 1] = IRVariable( - inst.output.name, version=self.var_name_stacks[inst.output.name][-1] + var.name, version=self.var_name_stacks[var.name][-1] ) for bb in self.dom.dominated[basic_block]: From 7caa05590a4c8788b01795d3e18a7aefe1183ebf Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Dec 2024 15:28:59 -0500 Subject: [PATCH 2/2] refactor[venom]: refactor mem2var (#4421) remove special cases which were necessary before introduction of `palloca`. now they represent useless variable read/writes which are safe to remove. results in removal of a few instructions on benchmark contracts. --------- Co-authored-by: Harry Kalogirou --- vyper/venom/passes/mem2var.py | 51 ++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/vyper/venom/passes/mem2var.py b/vyper/venom/passes/mem2var.py index f93924d449..9f985e2b0b 100644 --- a/vyper/venom/passes/mem2var.py +++ b/vyper/venom/passes/mem2var.py @@ -34,31 +34,30 @@ def _mk_varname(self, varname: str): def _process_alloca_var(self, dfg: DFGAnalysis, var: IRVariable): """ - Process alloca allocated variable. If it is only used by mstore/mload/return - instructions, it is promoted to a stack variable. Otherwise, it is left as is. + Process alloca allocated variable. If it is only used by + mstore/mload/return instructions, it is promoted to a stack variable. + Otherwise, it is left as is. """ uses = dfg.get_uses(var) - if all([inst.opcode == "mload" for inst in uses]): - return - elif all([inst.opcode == "mstore" for inst in uses]): + if not all([inst.opcode in ["mstore", "mload", "return"] for inst in uses]): return - elif all([inst.opcode in ["mstore", "mload", "return"] for inst in uses]): - var_name = self._mk_varname(var.name) - for inst in uses: - if inst.opcode == "mstore": - inst.opcode = "store" - inst.output = IRVariable(var_name) - inst.operands = [inst.operands[0]] - elif inst.opcode == "mload": - inst.opcode = "store" - inst.operands = [IRVariable(var_name)] - elif inst.opcode == "return": - bb = inst.parent - idx = len(bb.instructions) - 1 - assert inst == bb.instructions[idx] # sanity - bb.insert_instruction( - IRInstruction("mstore", [IRVariable(var_name), inst.operands[1]]), idx - ) + + var_name = self._mk_varname(var.name) + var = IRVariable(var_name) + for inst in uses: + if inst.opcode == "mstore": + inst.opcode = "store" + inst.output = var + inst.operands = [inst.operands[0]] + elif inst.opcode == "mload": + inst.opcode = "store" + inst.operands = [var] + elif inst.opcode == "return": + bb = inst.parent + idx = len(bb.instructions) - 1 + assert inst == bb.instructions[idx] # sanity + new_inst = IRInstruction("mstore", [var, inst.operands[1]]) + bb.insert_instruction(new_inst, idx) def _process_palloca_var(self, dfg: DFGAnalysis, palloca_inst: IRInstruction, var: IRVariable): """ @@ -70,16 +69,18 @@ def _process_palloca_var(self, dfg: DFGAnalysis, palloca_inst: IRInstruction, va return var_name = self._mk_varname(var.name) + var = IRVariable(var_name) + # some value given to us by the calling convention palloca_inst.opcode = "mload" palloca_inst.operands = [palloca_inst.operands[0]] - palloca_inst.output = IRVariable(var_name) + palloca_inst.output = var for inst in uses: if inst.opcode == "mstore": inst.opcode = "store" - inst.output = IRVariable(var_name) + inst.output = var inst.operands = [inst.operands[0]] elif inst.opcode == "mload": inst.opcode = "store" - inst.operands = [IRVariable(var_name)] + inst.operands = [var]