Skip to content

Commit

Permalink
minif2f: fix & check
Browse files Browse the repository at this point in the history
  • Loading branch information
RexWzh committed Jan 12, 2025
1 parent 70e3a72 commit 52aae67
Show file tree
Hide file tree
Showing 6 changed files with 581 additions and 513 deletions.
488 changes: 244 additions & 244 deletions experiments/minif2f/test.jsonl

Large diffs are not rendered by default.

488 changes: 244 additions & 244 deletions experiments/minif2f/valid.jsonl

Large diffs are not rendered by default.

23 changes: 22 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import pandas as pd
from pathlib import Path
from pantograph import Server
from pantograph.expr import GoalState
from typing import Callable
from loguru import logger
logger.remove()
LOGGER_FORMAT = "<green>{level}</green> | <lvl>{message}</lvl>"
Expand All @@ -15,6 +17,17 @@
MINIF2F_TEST_FILE = PROJECT_ROOT / 'experiments/minif2f/test.jsonl'
MINIF2F_ROOT = PROJECT_ROOT / 'experiments/minif2f/MiniF2F'

# Example Statements
EXAMPLE_STATEMENT = "∀ (p q: Prop), p ∨ q → q ∨ p"
EXAMPLE_TACTICS = [
"intro p q h",
"cases h",
"apply Or.inr",
"assumption",
"apply Or.inl",
"assumption"
]

@pytest.fixture
def minif2f_root():
return MINIF2F_ROOT
Expand All @@ -30,4 +43,12 @@ def minif2f_test():
@pytest.fixture
def minif2f_server():
server = Server(project_path=MINIF2F_ROOT, imports=['Mathlib', 'Aesop'])
return server
return server

@pytest.fixture
def example_statement():
return EXAMPLE_STATEMENT

@pytest.fixture
def example_tactics():
return EXAMPLE_TACTICS
26 changes: 3 additions & 23 deletions tests/test_minif2f.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,8 @@
from tqdm import tqdm
import pytest
from loguru import logger
from .utils import verify_theorem_loading

default_header = """
set_option maxHeartbeats 0
open BigOperators Real Nat Topology Rat
"""

def verify_theorem_loading(server: Server, theorem: str) -> tuple[bool, str]:
"""Helper function to verify theorem loading."""
try:
unit = server.load_sorry(f"{default_header}\n{theorem}")[2]
goal_state, message = unit.goal_state, '\n'.join(unit.messages)
is_valid = (
goal_state is not None and
len(goal_state.goals) == 1 and
'error' not in message.lower()
)
return is_valid, message
except Exception as e:
logger.error(f"Exception while loading theorem: {e}")
return False, str(e)

@pytest.mark.basic
def test_single_case(minif2f_server: Server, minif2f_test: DataFrame):
Expand Down Expand Up @@ -53,6 +35,7 @@ def test_load_theorem(minif2f_server: Server, minif2f_test: DataFrame, minif2f_v
is_valid, _ = verify_theorem_loading(minif2f_server, theorem)
if not is_valid:
failed_valid.append(i)
minif2f_server.restart()
# Test test theorems
logger.info("Testing test theorems...")
failed_test = []
Expand All @@ -62,6 +45,7 @@ def test_load_theorem(minif2f_server: Server, minif2f_test: DataFrame, minif2f_v
is_valid, _ = verify_theorem_loading(minif2f_server, theorem)
if not is_valid:
failed_test.append(i)
minif2f_server.restart()

# Report results
total_valid = len(minif2f_valid)
Expand All @@ -80,7 +64,3 @@ def test_load_theorem(minif2f_server: Server, minif2f_test: DataFrame, minif2f_v
if failed_test:
err_msg += f"\nFailed test theorems: {failed_test}"
raise AssertionError(err_msg)

@pytest.mark.advance
def test_advance_cases():
pass
33 changes: 32 additions & 1 deletion tests/test_special_tactics.py
Original file line number Diff line number Diff line change
@@ -1 +1,32 @@
"""Test for special tactics, such as have, let, etc."""
"""Test for special tactics, such as have, let, etc."""
import pytest
from pantograph import Server
from pantograph.expr import *
from pantograph.server import TacticFailure
from .utils import apply_tactics


def test_simple_have():
server = Server()
state0 = server.goal_start("1 + (1: Nat) = 2")
state1 = server.goal_tactic(state0, goal_id=0, tactic=TacticHave(branch="(2:Nat) = 1 + 1", binder_name="h"))
state2 = server.goal_tactic(state1, goal_id=0, tactic="simp")
state3 = server.goal_tactic(state2, goal_id=0, tactic="simp")
assert state1.goals == [
Goal(
variables=[],
target="2 = 1 + 1",
),
Goal(
variables=[Variable(name="h", t="2 = 1 + 1")],
target="1 + 1 = 2",
),
]
assert state2.goals == [
Goal(
variables=[Variable(name="h", t="2 = 1 + 1")],
target="1 + 1 = 2",
),
]
assert state3.is_solved

36 changes: 36 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from pantograph import Server
from pantograph.expr import GoalState
from loguru import logger

def apply_tactics(server:Server, state:GoalState, tactics:list):
states = [state]
logger.opt(raw=True).debug(f"{state}\n")
for tactic in tactics:
logger.opt(raw=True).debug(f"{tactic}\n")
state = server.goal_tactic(state, 0, tactic)
if not state.is_solved:
logger.opt(raw=True).debug(f"{state}\n")
else:
logger.debug("close proof")
states.append(state)
return states

default_header = """
open BigOperators Real Nat Topology Rat
"""

def verify_theorem_loading(server: Server, theorem: str) -> tuple[bool, str]:
"""Helper function to verify theorem loading."""
try:
# the first argument is for the `open` command
unit = server.load_sorry(f"{default_header}\n{theorem} := by sorry")[1]
goal_state, message = unit.goal_state, '\n'.join(unit.messages)
is_valid = (
goal_state is not None and
len(goal_state.goals) == 1 and
'error' not in message.lower()
)
return is_valid, message
except Exception as e:
logger.error(f"Exception while loading theorem: {e}")
return False, str(e)

0 comments on commit 52aae67

Please sign in to comment.