Skip to content

Commit

Permalink
Optimizations for write_cfg_data (#4569)
Browse files Browse the repository at this point in the history
Makes progress in fixing the slowdown as proofs get large.

I'll call the "sync time" the time in between receiving a step result
and starting to wait for the next step result on the master thread.

As proofs get more and more nodes, calling `to_dict()` on every node
gets progressively harder and this was taking up a substantial portion
of the sync time as proofs got to 500+ nodes.

Instead of generating the entire KCFG dict with `KCFG.to_dict()` before
passing this to `KCFGStore.write_cfg_data()`, which discards all of this
work anyway for the final product and replaces it with just a list of
the node IDs, it is significantly faster to just pass it the list of the
node IDs and have it use the KCFG object, which already has a dictionary
storing nodes by ID to find the vacuous and stuck nodes, and to get the
newly created nodes. This way we can call `to_dict()` on each node only
when it is first created.

To give an idea of a benchmark, I used a `LoopsTest.test_sum_1000()`
(linear and long-running) with `--max-depth 1` and `--max-iterations
1000`. Before this change it gets to 1000 iterations in 59:06, and after
this change it does it in 40:31. Before the change the sync time as this
proof approached 1000 nodes ranged between about 3.4 to 4.2 seconds.
After the change ranged from about 1.39 to 1.54 seconds.

The big remaining chunk of sync time when the proof gets large seems to
be in `get_steps()`.

---------

Co-authored-by: Petar Maksimović <[email protected]>
  • Loading branch information
nwatson22 and PetarMax authored Aug 8, 2024
1 parent 394c95b commit d55c98f
Showing 1 changed file with 25 additions and 10 deletions.
35 changes: 25 additions & 10 deletions pyk/src/pyk/kcfg/kcfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,23 +73,18 @@ def kcfg_node_path(self, node_id: int) -> Path:
return self.kcfg_node_dir / f'{node_id}.json'

def write_cfg_data(
self, dct: dict[str, Any], deleted_nodes: Iterable[int] = (), created_nodes: Iterable[int] = ()
self, kcfg: KCFG, dct: dict[str, Any], deleted_nodes: Iterable[int] = (), created_nodes: Iterable[int] = ()
) -> None:
node_dict = {node_dct['id']: node_dct for node_dct in dct['nodes']}
vacuous_nodes = [
node_id for node_id in node_dict.keys() if KCFGNodeAttr.VACUOUS.value in node_dict[node_id]['attrs']
]
stuck_nodes = [
node_id for node_id in node_dict.keys() if KCFGNodeAttr.STUCK.value in node_dict[node_id]['attrs']
node_id for node_id in kcfg._nodes.keys() if KCFGNodeAttr.VACUOUS in kcfg._nodes[node_id].attrs
]
stuck_nodes = [node_id for node_id in kcfg._nodes.keys() if KCFGNodeAttr.STUCK in kcfg._nodes[node_id].attrs]
dct['vacuous'] = vacuous_nodes
dct['stuck'] = stuck_nodes
for node_id in deleted_nodes:
self.kcfg_node_path(node_id).unlink(missing_ok=True)
for node_id in created_nodes:
del node_dict[node_id]['attrs']
self.kcfg_node_path(node_id).write_text(json.dumps(node_dict[node_id]))
dct['nodes'] = list(node_dict.keys())
self.kcfg_node_path(node_id).write_text(json.dumps(kcfg._nodes[node_id].to_dict()))
self.kcfg_json_path.write_text(json.dumps(dct))

def read_cfg_data(self) -> dict[str, Any]:
Expand Down Expand Up @@ -542,6 +537,26 @@ def extend(
case _:
raise AssertionError()

def to_dict_no_nodes(self) -> dict[str, Any]:
nodes = list(self._nodes.keys())
edges = [edge.to_dict() for edge in self.edges()]
covers = [cover.to_dict() for cover in self.covers()]
splits = [split.to_dict() for split in self.splits()]
ndbranches = [ndbranch.to_dict() for ndbranch in self.ndbranches()]

aliases = dict(sorted(self._aliases.items()))

res = {
'next': self._node_id,
'nodes': nodes,
'edges': edges,
'covers': covers,
'splits': splits,
'ndbranches': ndbranches,
'aliases': aliases,
}
return {k: v for k, v in res.items() if v}

def to_dict(self) -> dict[str, Any]:
nodes = [node.to_dict() for node in self.nodes]
edges = [edge.to_dict() for edge in self.edges()]
Expand Down Expand Up @@ -1261,7 +1276,7 @@ def reachable_nodes(self, source_id: NodeIdLike, *, reverse: bool = False) -> se
def write_cfg_data(self) -> None:
assert self._kcfg_store is not None
self._kcfg_store.write_cfg_data(
self.to_dict(), deleted_nodes=self._deleted_nodes, created_nodes=self._created_nodes
self, self.to_dict_no_nodes(), deleted_nodes=self._deleted_nodes, created_nodes=self._created_nodes
)
self._deleted_nodes.clear()
self._created_nodes.clear()
Expand Down

0 comments on commit d55c98f

Please sign in to comment.