Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[API] Rework the return of a compiler to avoid async and make target optional. #36

Merged
merged 1 commit into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 11 additions & 12 deletions examples/tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"source": [
"To extract machine-learning features from our dataset, we will need to configure a feature extractor. This library provides several feature extractors to either make use of a physical quantum device (QPU), or a variety of emulators.\n",
"\n",
"To configure a feature extractor, we will need to give it a _compiler_, whose task is to take a list of graphs, extract embeddings and compile these embeddings to _sequences of pulses_, the format that can be executed by either a QPU or an emulator. For this tutorial, our dataset is composed of molecule graphs, so we will use the `MoleculeGraphCompiler`:"
"To configure a feature extractor, we will need to give it a _compiler_, whose task is to take a list of graphs, extract embeddings and compile these embeddings to _sequences of pulses_, the format that can be executed by either a QPU or an emulator. For this tutorial, our dataset is composed of molecule graphs encoded with the PTC-FM conventions, so we will use the `PTCFMGraphCompiler`:"
Yoric marked this conversation as resolved.
Show resolved Hide resolved
]
},
{
Expand All @@ -65,14 +65,14 @@
"source": [
"import qek.data.graphs as qek_graphs\n",
"\n",
"compiler = qek_graphs.MoleculeGraphCompiler()"
"compiler = qek_graphs.PTCFMCompiler()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This library provides other compilers from other formats of graphs."
"This library provides other compilers from other formats of graphs, including the `MoleculeGraphCompiler` and general-purpose graph compilers for pytorch_geometric or networkx graphs."
]
},
{
Expand Down Expand Up @@ -126,8 +126,8 @@
"# You can increase this value to higher number of qubits, but this\n",
"# notebook will take longer to execute and may run out of memory.\n",
"max_qubits = 5\n",
"processed_dataset = await extractor.run(max_qubits=max_qubits) # Don't forget to `await`!\n",
"display(\"Extracted features from %s samples\"% (len(processed_dataset), ))"
"processed_dataset = extractor.run(max_qubits=max_qubits)\n",
"display(\"Extracted features from %s samples\"% (len(processed_dataset.states), ))"
]
},
{
Expand Down Expand Up @@ -159,8 +159,6 @@
"HAVE_PASQAL_ACCOUNT = False # If you have a PASQAL Cloud account, fill in the details and set this to `True`.\n",
Yoric marked this conversation as resolved.
Show resolved Hide resolved
"\n",
"if HAVE_PASQAL_ACCOUNT:\n",
" processed_dataset = []\n",
"\n",
" # Use the QPU Extractor.\n",
" extractor = qek_extractors.QPUExtractor(\n",
" # Once computing is complete, data will be saved in this file.\n",
Expand All @@ -179,12 +177,12 @@
" display(\"Compiled %s sequences\" % (len(compiled), ))\n",
"\n",
" # Launch the execution.\n",
" execution = extractor.run()\n",
" display(\"Work enqueued with ids %s\" % (extractor.batch_ids, ))\n",
" processed_dataset = extractor.run()\n",
" display(\"Work enqueued with ids %s\" % (processed_dataset.batch_ids, ))\n",
"\n",
" # ...and wait for the results.\n",
" processed_dataset = await execution\n",
" display(\"Extracted features from %s samples\"% (len(processed_dataset), ))"
" await processed_dataset\n",
" display(\"Extracted states from %s samples\"% (len(processed_dataset.states), ))"
]
},
{
Expand Down Expand Up @@ -221,7 +219,8 @@
"outputs": [],
"source": [
"import qek.data.dataset as qek_dataset\n",
"processed_dataset = qek_dataset.load_dataset(file_path=\"ptcfm_processed_dataset.json\")\n",
"from qek.data.dataset import ProcessedData\n",
"processed_dataset: list[ProcessedData] = qek_dataset.load_dataset(file_path=\"ptcfm_processed_dataset.json\")\n",
"print(f\"Size of the quantum compatible dataset = {len(processed_dataset)}\")"
]
},
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dependencies = [
"torch",
"torch_geometric",
"matplotlib",
"emu-mps",
"emu-mps~=1.2.0",
]

[tool.hatch.metadata]
Expand Down
38 changes: 19 additions & 19 deletions qek/data/dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import collections
import json
from typing import cast
from typing import Final, cast
import matplotlib

import logging
Expand All @@ -21,8 +21,8 @@ class ProcessedData:
executed on the device.
state_dict: A dictionary {bitstring: number of instances}
for this graph.
target: The machine-learning target (in this case, a value
in {0, 1}, as specified by the original graph).
target: If specified, the machine-learning target, as a
value `0` or `1`.

The state dictionary represents an approximation of the quantum
state of the device for this graph after completion of the
Expand All @@ -40,14 +40,24 @@ class ProcessedData:
specific graph).
"""

sequence: pl.Sequence
state_dict: dict[str, int]
_dist_excitation: np.ndarray
target: int
sequence: Final[pl.Sequence]
state_dict: Final[dict[str, int]]
_dist_excitation: Final[np.ndarray]
target: Final[int | None]

def __init__(self, sequence: pl.Sequence, state_dict: dict[str, np.int64], target: int):
def __init__(
self, sequence: pl.Sequence, state_dict: dict[str, int | np.int64], target: int | None
):
self.sequence = sequence
self.state_dict = _convert_np_int64_to_int(data=state_dict)
# Some emulators will actually be `dict[str, int64]` instead of `dict[str, int]` and `int64`
# is not JSON-serializable.
#
# The reason for which `int64` is not JSON-serializable is that JSON limits ints to 2^53-1.
Yoric marked this conversation as resolved.
Show resolved Hide resolved
# In practice, this should not be a problem, since the `int`/`int64` in our dict is
# limited to the number of runs, and we don't expect to be launching 2^53 consecutive runs
# for a single sequence on a device in any foreseeable future (assuming a run of 1ns,
# this would still take ~4 billion years to execute).
self.state_dict = {k: int(value) for k, value in state_dict.items()}
self._dist_excitation = dist_excitation(self.state_dict)
self.target = target

Expand Down Expand Up @@ -156,16 +166,6 @@ def dist_excitation(state_dict: dict[str, int], size: int | None = None) -> np.n
return result


def _convert_np_int64_to_int(data: dict[str, np.int64]) -> dict[str, int]:
"""
Utility function: convert the values of a dict from `np.int64` to `int`,
for serialization purposes.
"""
return {
key: (int(value) if isinstance(value, np.integer) else value) for key, value in data.items()
}


def save_dataset(dataset: list[ProcessedData], file_path: str) -> None:
"""Saves a dataset to a JSON file.

Expand Down
Loading