Skip to content

Commit

Permalink
Expose the ASDF history in meta (#513)
Browse files Browse the repository at this point in the history
* Add a meta dict to TiledDataset

* Expose ASDF history in `[Tiled]Dataset.meta`

Also makes sure it doesn't get saved back out.

* Refactor schema and converter for TiledDataset

now there is meta

* Fix figure test

* Add quality to shadow dataset

* Add changelogs

* Fix level_1_dataset_schema
  • Loading branch information
Cadair authored Feb 4, 2025
1 parent 832e1ff commit 5b0cd54
Show file tree
Hide file tree
Showing 13 changed files with 113 additions and 20 deletions.
1 change: 1 addition & 0 deletions changelog/513.feature.1.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
History of the ADSF file, such as versions of packages and extensions used when writing it are now exposed ``TiledDataset.meta["history"]`` and ``Dataset.meta["history"]``.
1 change: 1 addition & 0 deletions changelog/513.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
``TiledDataset`` now has a ``.meta`` dictionary like that of ``Dataset``.
3 changes: 2 additions & 1 deletion dkist/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,8 @@ def simple_tiled_dataset(dataset):
for ds in datasets:
ds.meta["inventory"] = dataset.meta["inventory"]
dataset_array = np.array(datasets).reshape((2,2))
return TiledDataset(dataset_array, dataset.meta["inventory"])
meta = {"inventory": dataset.meta["inventory"]}
return TiledDataset(dataset_array, meta=meta)


@pytest.fixture
Expand Down
5 changes: 2 additions & 3 deletions dkist/dataset/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,7 @@ def _load_from_results(results):
return _load_from_iterable(results)


# In Python 3.11 we can use the Union type here
@load_dataset.register(list)
@load_dataset.register(tuple)
@load_dataset.register(tuple | list)
def _load_from_iterable(iterable):
"""
A list or tuple of valid inputs to ``load_dataset``.
Expand Down Expand Up @@ -245,6 +243,7 @@ def _load_from_asdf(filepath):
with asdf.open(filepath, custom_schema=schema_path.as_posix(),
lazy_load=False, **asdf_open_memory_mapping_kwarg(memmap=False)) as ff:
ds = ff.tree["dataset"]
ds.meta["history"] = ff.tree["history"]
if isinstance(ds, TiledDataset):
for sub in ds.flat:
sub.files.basepath = base_path
Expand Down
7 changes: 4 additions & 3 deletions dkist/dataset/tests/test_tiled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,17 @@ def test_tiled_dataset_headers(simple_tiled_dataset, dataset):


def test_tiled_dataset_invalid_construction(dataset, dataset_4d):
meta = {"inventory": dataset.meta["inventory"]}
with pytest.raises(ValueError, match="inventory record of the first dataset"):
TiledDataset(np.array((dataset, dataset_4d)))

with pytest.raises(ValueError, match="physical types do not match"):
TiledDataset(np.array((dataset, dataset_4d)), inventory=dataset.meta["inventory"])
TiledDataset(np.array((dataset, dataset_4d)), meta=meta)

ds2 = copy.deepcopy(dataset)
ds2.meta["inventory"] = {"hello": "world"}
with pytest.raises(ValueError, match="inventory records of all the datasets"):
TiledDataset(np.array((dataset, ds2)), dataset.meta["inventory"])
TiledDataset(np.array((dataset, ds2)), meta=meta)


def test_tiled_dataset_from_components(dataset):
Expand Down Expand Up @@ -85,7 +86,7 @@ def test_tileddataset_plot(share_zscale):
# https://github.com/sunpy/ndcube/issues/815
for tile in newtiles:
tile.meta["inventory"] = ori_ds.inventory
ds = TiledDataset(np.array(newtiles).reshape(ori_ds.shape), inventory=newtiles[0].inventory)
ds = TiledDataset(np.array(newtiles).reshape(ori_ds.shape), meta={"inventory": newtiles[0].inventory})
fig = plt.figure(figsize=(12, 15))
ds.plot(0, share_zscale=share_zscale, figure=fig)
return plt.gcf()
Expand Down
37 changes: 27 additions & 10 deletions dkist/dataset/tiled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
but not representable in a single NDCube derived object as the array data are
not contiguous in the spatial dimensions (due to overlaps and offsets).
"""
import warnings
from textwrap import dedent
from collections.abc import Collection

Expand All @@ -16,6 +17,7 @@

from dkist.io.file_manager import FileManager, StripedExternalArray
from dkist.io.loaders import AstropyFITSLoader
from dkist.utils.exceptions import DKISTDeprecationWarning

from .dataset import Dataset
from .utils import dataset_info_str
Expand All @@ -27,15 +29,15 @@ class TiledDatasetSlicer:
"""
Basic class to provide the slicing
"""
def __init__(self, data, inventory):
def __init__(self, data, meta):
self.data = data
self.inventory = inventory
self.meta = meta

def __getitem__(self, slice_):
new_data = []
for tile in self.data.flat:
new_data.append(tile[slice_])
return TiledDataset(np.array(new_data).reshape(self.data.shape), self.inventory)
return TiledDataset(np.array(new_data).reshape(self.data.shape), meta=self.meta)


class TiledDataset(Collection):
Expand Down Expand Up @@ -80,12 +82,20 @@ def _from_components(cls, shape, file_managers, wcses, header_tables, inventory)
datasets[i]._file_manager = fm
datasets = datasets.reshape(shape)

return cls(datasets, inventory)
return cls(datasets, meta={"inventory": inventory})

def __init__(self, dataset_array, inventory=None):
def __init__(self, dataset_array, inventory=None, *, meta=None):
if inventory is not None:
warnings.warn(
"The inventory= kwarg is deprecated, inventory should be passed as part of the meta argument",
DKISTDeprecationWarning,
)
self._data = np.array(dataset_array, dtype=object)
self._inventory = inventory or {}
meta = meta or {}
inventory = meta.get("inventory", inventory or {})
self._validate_component_datasets(self._data, inventory)
self._meta = meta
self._meta["inventory"] = inventory

def __contains__(self, x):
return any(ele is x for ele in self._data.flat)
Expand All @@ -101,7 +111,7 @@ def __getitem__(self, aslice):
if isinstance(new_data, Dataset):
return new_data

return type(self)(new_data, inventory=self.inventory)
return type(self)(new_data, meta=self.meta)

@staticmethod
def _validate_component_datasets(datasets, inventory):
Expand All @@ -122,14 +132,21 @@ def flat(self):
"""
Represent this `.TiledDataset` as a 1D array.
"""
return type(self)(self._data.flat, self.inventory)
return type(self)(self._data.flat, meta=self.meta)

@property
def meta(self):
"""
A dictionary of extra metadata about the dataset.
"""
return self._meta

@property
def inventory(self):
"""
The inventory record as kept by the data center for this dataset.
"""
return self._inventory
return self._meta["inventory"]

@property
def combined_headers(self):
Expand Down Expand Up @@ -260,7 +277,7 @@ def slice_tiles(self):
helioprojective latitude | x | x
"""

return TiledDatasetSlicer(self._data, self.inventory)
return TiledDatasetSlicer(self._data, self.meta)

# TODO: def regrid()

Expand Down
7 changes: 6 additions & 1 deletion dkist/io/asdf/converters/dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import copy

from asdf.extension import Converter


Expand Down Expand Up @@ -62,7 +64,10 @@ def to_yaml_tree(self, dataset, tag, ctx):
raise ValueError("This Dataset object can not be saved to asdf as "
"it was not constructed from a set of FITS files.")
node = {}
node["meta"] = dataset.meta or {}
# Copy the meta so we don't pop from the one in memory
node["meta"] = copy.copy(dataset.meta) or {}
# If the history key has been injected into the meta, do not save it
node["meta"].pop("history", None)
node["wcs"] = dataset.wcs
node["data"] = dataset.files
if dataset.unit:
Expand Down
16 changes: 14 additions & 2 deletions dkist/io/asdf/converters/tiled_dataset.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,32 @@
import copy

from asdf.extension import Converter


class TiledDatasetConverter(Converter):
tags = [
"tag:dkist.nso.edu:dkist/tiled_dataset-0.1.0",
"asdf://dkist.nso.edu/tags/tiled_dataset-1.0.0",
"asdf://dkist.nso.edu/tags/tiled_dataset-1.1.0",
]
types = ["dkist.dataset.tiled_dataset.TiledDataset"]

def from_yaml_tree(cls, node, tag, ctx):
from dkist.dataset.tiled_dataset import TiledDataset

return TiledDataset(node["datasets"], node["inventory"])
# Support old files without meta, but with inventory
meta = node.get("meta", {})
if "inventory" not in meta and (inventory := node.get("inventory", None)):
meta["inventory"] = inventory

return TiledDataset(node["datasets"], meta=meta)

def to_yaml_tree(cls, tiled_dataset, tag, ctx):
tree = {}
tree["inventory"] = tiled_dataset._inventory
# Copy the meta so we don't pop from the one in memory
meta = copy.copy(tiled_dataset.meta)
# If the history key has been injected into the meta, do not save it
meta.pop("history", None)
tree["meta"] = meta
tree["datasets"] = tiled_dataset._data.tolist()
return tree
2 changes: 2 additions & 0 deletions dkist/io/asdf/entry_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def get_extensions():
dkist_converters = [FileManagerConverter(), DatasetConverter(), TiledDatasetConverter()]
wcs_converters = [VaryingCelestialConverter(), CoupledCompoundConverter(), RavelConverter(), AsymmetricMappingConverter()]
return [
ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.3.0",
converters=dkist_converters),
ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.2.0",
converters=dkist_converters),
ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.1.0",
Expand Down
14 changes: 14 additions & 0 deletions dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
%YAML 1.1
---
id: asdf://dkist.nso.edu/manifests/dkist-1.3.0
extension_uri: asdf://dkist.nso.edu/dkist/extensions/dkist-1.3.0
title: DKIST extension
description: ASDF schemas and tags for DKIST classes.

tags:
- schema_uri: "asdf://dkist.nso.edu/schemas/file_manager-1.0.0"
tag_uri: "asdf://dkist.nso.edu/tags/file_manager-1.0.0"
- schema_uri: "asdf://dkist.nso.edu/schemas/dataset-1.1.0"
tag_uri: "asdf://dkist.nso.edu/tags/dataset-1.2.0"
- schema_uri: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.1.0"
tag_uri: "asdf://dkist.nso.edu/tags/tiled_dataset-1.1.0"
38 changes: 38 additions & 0 deletions dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
%YAML 1.1
---
$schema: "http://stsci.edu/schemas/yaml-schema/draft-01"
id: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.1.0"

title: |
A DKIST Tiled Dataset object.
description:
The container for a set of Dataset objects.

type: object
properties:
datasets:
description: A nested structure of Dataset objects
type: array
items:
type: array
items:
- tag: "asdf://dkist.nso.edu/tags/dataset-1.*"

meta:
description: Dataset metadata, describing the whole dataset.
type: object
properties:
quality:
description: A copy of the quality report of these observations.
type: object

inventory:
description: A copy of the inventory record for this dataset.
type: object

required: [inventory]
additionalProperties: true

required: [datasets, meta]
additionalProperties: false
...
1 change: 1 addition & 0 deletions dkist/io/asdf/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def test_save_dataset_with_file_schema(tagobj, tmpdir):
tree = {"dataset": tagobj}
with importlib_resources.as_file(importlib_resources.files("dkist.io") / "level_1_dataset_schema.yaml") as schema_path:
with asdf.AsdfFile(tree, custom_schema=schema_path.as_posix()) as afile:
afile.validate() # it seems that asdf 4.0 does not validate the custom schema on write?
afile.write_to(Path(tmpdir / "test.asdf"))


Expand Down
1 change: 1 addition & 0 deletions dkist/io/level_1_dataset_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ properties:
- $ref: "asdf://dkist.nso.edu/schemas/dataset-1.1.0"
- $ref: "asdf://dkist.nso.edu/schemas/tiled_dataset-0.1.0"
- $ref: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.0.0"
- $ref: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.1.0"

required: [dataset]
additionalProperties: true

0 comments on commit 5b0cd54

Please sign in to comment.