Skip to content

Commit

Permalink
Manually specify dates for Network (#167)
Browse files Browse the repository at this point in the history
* allow us to specify dates manually for the ifg network

* use manual dates to allow a max-bandwidth network in `wrapped_phase`

* add changelog entries

* move change log in prep of release
  • Loading branch information
scottstanie authored Nov 29, 2023
1 parent d83b664 commit 3601816
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 22 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
# [Unreleased](https://github.com/isce-framework/dolphin/compare/v0.6.1...main)
# [Unreleased](https://github.com/isce-framework/dolphin/compare/v0.7.0...main)

# [v0.7.0](https://github.com/isce-framework/dolphin/compare/v0.6.1...v0.7.0)

**Added**
- `MiniStackPlanner` and `MiniStackInfo` class which does the planning for how a large stack of SLCs will be processed in batches.
- Previously this was done at run time in `sequential.py`. We want to separate that out to view the plan in advance/allow us to dispatch the work to multiple machines.
- `CompressedSlcInfo` class added to track the attributes of a compressed SLC file created during the workflow.
- This has the `reference_date` as an attribute, which allows us to know what the base phase is even without starting from
the first SLC in the stack (i.e. if we have limited the number of compressed SLCs)
- Added better/more complete metadata to the compressed SLC Geotiff tags, including the phase reference date
- Before we were relying on the filename convention, which was not enough information
- config: `phase_linking.max_compressed_slcs` to cap the number of compressed SLCs added during large-stack sequential workflows
- `interferogram`: Add ability to specify manual dates for a `Network`/`VRTInterferogram`, which lets us re-interfere the phase-linking results

**Changed**
- Date functions have been moved from `dolphin.utils` to `dolphin._dates`. They are accessible at `dolphin.get_dates`, etc
- `get_dates` now uses `datetime.datetime` instead of `datetime.date`.
- This is to allow for more flexibility in the date parsing, and to allow for the use of `datetime.date` or `datetime.datetime` in the output filenames.
- `VRTStack` has been moved to `_readers.py`. The minstack planning functions have been removed to focus the class on just reading input GDAL rasters.

**Fixed**
- When starting with Compressed SLCs in the list of input SLCs, the workflows will now recognize them, find the correct reference date, and form all the correct interferograms

**Removed**
- Extra subsetting functions from `VRTStack` have been removed, as they are not used in the workflow and the reimplmenent simple GDAL calls.
- `CPURecorder` and `GPURecorder` have been removed to simplify code. May be moved to separate repo.
Expand Down
94 changes: 75 additions & 19 deletions src/dolphin/interferogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ class VRTInterferogram(BaseModel, extra="allow"):
)
ref_slc: Union[Path, str] = Field(..., description="Path to reference SLC file")
sec_slc: Union[Path, str] = Field(..., description="Path to secondary SLC file")
verify_slcs: bool = Field(
True, description="Raise an error if `ref_slc` or `sec_slc` aren't readable."
)
outdir: Optional[Path] = Field(
None,
description=(
Expand All @@ -82,7 +85,10 @@ class VRTInterferogram(BaseModel, extra="allow"):
),
validate_default=True,
)
date_format: str = DEFAULT_DATETIME_FORMAT
date_format: str = Field(
DEFAULT_DATETIME_FORMAT,
description="datetime format used to parse SLC filenames",
)
ref_date: Optional[DateOrDatetime] = Field(
None,
description="Reference date of the interferogram. If not specified,"
Expand All @@ -93,6 +99,9 @@ class VRTInterferogram(BaseModel, extra="allow"):
description="Secondary date of the interferogram. If not specified,"
"will be parsed from `sec_slc` using `date_format`.",
)
resolve_paths: bool = Field(
True, description="Resolve paths of `ref_slc`/`sec_slc` when saving the VRT"
)
write: bool = Field(True, description="Write the VRT file to disk")

pixel_function: Literal["cmul", "mul"] = "cmul"
Expand All @@ -116,11 +125,7 @@ def _check_gdal_string(cls, v: Union[Path, str], info: ValidationInfo):
subdataset = info.data.get("subdataset")
# If we're using a subdataset, create a the GDAL-readable string
gdal_str = io.format_nc_filename(v, subdataset)
try:
# First make sure it's openable
gdal.Info(fspath(gdal_str))
except RuntimeError:
raise ValueError(f"File {gdal_str} is not a valid GDAL dataset")

# Then, if we passed a string like 'NETCDF:"file.nc":band', make sure
# the file is absolute
if ":" in str(gdal_str):
Expand All @@ -145,9 +150,19 @@ def _check_output_dir(cls, v, info: ValidationInfo):
def _parse_dates(self) -> "VRTInterferogram":
# Get the dates from the input files if not provided
if self.ref_date is None:
self.ref_date = get_dates(self.ref_slc, fmt=self.date_format)[0]
d = get_dates(self.ref_slc, fmt=self.date_format)
if not d:
raise ValueError(
f"No dates found in '{self.ref_slc}' like {self.date_format}"
)
self.ref_date = d[0]
if self.sec_date is None:
self.sec_date = get_dates(self.sec_slc, fmt=self.date_format)[0]
d = get_dates(self.sec_slc, fmt=self.date_format)
if not d:
raise ValueError(
f"No dates found in '{self.sec_slc}' like {self.date_format}"
)
self.sec_date = d[0]

return self

Expand All @@ -159,6 +174,10 @@ def _validate_files(self) -> "VRTInterferogram":
if not ref_slc or not sec_slc:
# Skip validation if files are not set
return self
# Only run this check if we care to validate the readability
if not self.verify_slcs:
return self

ds1 = gdal.Open(fspath(ref_slc))
ds2 = gdal.Open(fspath(sec_slc))
xsize, ysize = ds1.RasterXSize, ds1.RasterYSize
Expand Down Expand Up @@ -195,15 +214,14 @@ def _form_path(self) -> "VRTInterferogram":
self.path = path
return self

def __init__(self, **data):
"""Create a VRTInterferogram object and write the VRT file."""
super().__init__(**data)
self.dates = (self.ref_date, self.sec_date)
if self.write:
self._write_vrt()
@model_validator(mode="after")
def _write_vrt(self) -> "VRTInterferogram":
"""Write out the VRT if requested."""
if not self.write:
return self

def _write_vrt(self):
xsize, ysize = io.get_raster_xysize(self.ref_slc)
assert self.path is not None
self.path.parent.mkdir(parents=True, exist_ok=True)
with open(self.path, "w") as f:
f.write(
Expand All @@ -216,6 +234,7 @@ def _write_vrt(self):
)
)
io.copy_projection(self.ref_slc, self.path)
return self

def load(self):
"""Load the interferogram as a numpy array."""
Expand All @@ -226,6 +245,10 @@ def shape(self):
xsize, ysize = io.get_raster_xysize(self.path)
return (ysize, xsize)

@property
def dates(self):
return (self.ref_date, self.sec_date)

@classmethod
def from_vrt_file(cls, path: Filename) -> "VRTInterferogram":
"""Load a VRTInterferogram from an existing VRT file.
Expand Down Expand Up @@ -274,8 +297,13 @@ class Network:
max_temporal_baseline : Optional[float], default = None
Maximum temporal baseline to include in an interferogram, in days.
date_format : str, optional
Date format to use when parsing dates from the input files (for temporal baseline).
Date format to use when parsing dates from the input files (only
used if setting `max_temporal_baseline`).
defaults to [`dolphin._dates.DEFAULT_DATETIME_FORMAT`][]
dates: Sequence[DateOrDatetime], optional
Alternative to `date_format`: manually specify the date/datetime of each item in
`slc_list` instead of parsing the name.
Only used for `max_temporal_baseline` networks.
reference_idx : int | None, optional
Index of the SLC to use as the reference for all interferograms.
Defaults to None.
Expand All @@ -285,6 +313,8 @@ class Network:
If passing NetCDF files in `slc_list, the subdataset of the image data
within the file.
Can also pass a sequence of one subdataset per entry in `slc_list`
verify_slcs : bool, default = True
Raise an error if any SLCs aren't GDAL-readable.
write : bool
Whether to write the VRT files to disk. Defaults to True.
"""
Expand All @@ -294,18 +324,18 @@ class Network:
max_bandwidth: Optional[int] = None
max_temporal_baseline: Optional[float] = None
date_format: str = DEFAULT_DATETIME_FORMAT
dates: Optional[Sequence[DateOrDatetime]] = None
reference_idx: Optional[int] = None
indexes: Optional[Sequence[tuple[int, int]]] = None
subdataset: Optional[Union[str, Sequence[str]]] = None
verify_slcs: bool = True
write: bool = True

def __post_init__(
self,
*args,
**kwargs,
):
self.slc_file_pairs = self._make_ifg_pairs()

if self.subdataset is None or isinstance(self.subdataset, str):
self._slc_to_subdataset = {slc: self.subdataset for slc in self.slc_list}
else:
Expand All @@ -317,18 +347,36 @@ def __post_init__(

if self.outdir is None:
self.outdir = Path(self.slc_list[0]).parent

# Set the dates to be used for each ifg
if self.dates is None:
# Use the first one we find in the name
self.dates = [get_dates(f, fmt=self.date_format)[0] for f in self.slc_list]
if len(self.dates) != len(self.slc_list):
raise ValueError(f"{len(self.dates) = }, but {len(self.slc_list) = }")
self._slc_to_date = {slc: d for slc, d in zip(self.slc_list, self.dates)}

# Run the appropriate network creation based on the options we passed
self.slc_file_pairs = self._make_ifg_pairs()

# Create each VRT file
self.ifg_list: list[VRTInterferogram] = self._create_vrt_ifgs()

def _create_vrt_ifgs(self) -> list[VRTInterferogram]:
"""Write out a VRTInterferogram for each ifg."""
ifg_list: list[VRTInterferogram] = []
for ref, sec in self._gdal_file_strings:
date_pairs = self._get_ifg_date_pairs()
for idx, (ref, sec) in enumerate(self._gdal_file_strings):
ref_date, sec_date = date_pairs[idx]

v = VRTInterferogram(
ref_slc=ref,
sec_slc=sec,
date_format=self.date_format,
outdir=self.outdir,
ref_date=ref_date,
sec_date=sec_date,
verify_slcs=self.verify_slcs,
write=self.write,
)
ifg_list.append(v)
Expand All @@ -346,6 +394,14 @@ def _gdal_file_strings(self):
)
return out

def _get_ifg_date_pairs(self):
date_pairs = []
for slc1, slc2 in self.slc_file_pairs:
d1 = self._slc_to_date[slc1]
d2 = self._slc_to_date[slc2]
date_pairs.append((d1, d2))
return date_pairs

def __repr__(self):
return (
f"Network(ifg_list={self.ifg_list}, slc_list={self.slc_list},"
Expand Down
23 changes: 21 additions & 2 deletions src/dolphin/workflows/wrapped_phase.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import datetime
from pathlib import Path
from typing import Optional, Sequence
from typing import Optional, Sequence, cast

import numpy as np
from opera_utils import make_nodata_mask
Expand Down Expand Up @@ -221,7 +221,26 @@ def _create_ifgs(
return ifg_file_list

# For other networks, we have to combine other ones formed from the `Network`
# TODO
if network_type == InterferogramNetworkType.MAX_BANDWIDTH:
max_b = cfg.interferogram_network.max_bandwidth
# Max bandwidth is easier because we just take the first `max_b` from `phase_linked_slcs`
# (which are the (ref_date, ...) interferograms),...
ifgs_ref_date = ifg_file_list[:max_b]
# ...then combine it with the results from the `Network`
# Manually specify the dates, which come from the names of `phase_linked_slcs`
secondary_dates = [get_dates(f)[0] for f in phase_linked_slcs]
network_rest = interferogram.Network(
slc_list=phase_linked_slcs,
max_bandwidth=max_b,
outdir=ifg_dir,
dates=secondary_dates,
)
# Using `cast` to assert that the paths are not None
ifgs_others = cast(list[Path], [ifg.path for ifg in network_rest.ifg_list])

return ifgs_ref_date + ifgs_others

# Other types: TODO
raise NotImplementedError(
"Only single-reference interferograms are supported when"
" starting with compressed SLCs"
Expand Down
42 changes: 42 additions & 0 deletions tests/test_interferogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,45 @@ def test_create_vrt_conj(tmp_path, slc_file_list_nc_wgs84):
assert io.get_raster_xysize(out) == io.get_raster_xysize(infile)
assert io.get_raster_crs(out) == io.get_raster_crs(infile)
np.testing.assert_array_equal(io.load_gdal(out).conj(), io.load_gdal(infile))


def test_network_manual_dates(four_slc_files):
Network(
four_slc_files,
max_bandwidth=1,
write=False,
dates=["20210101", "20210107", "20210101", "20210109"],
)


def test_network_manual_wrong_len_dates(four_slc_files):
with pytest.raises(ValueError):
Network(
four_slc_files, max_bandwidth=1, write=False, dates=["20210101", "20210109"]
)


def test_network_no_verify(tmpdir):
datestrs = ["20210101", "20210107", "20210108", "20210109"]
Network(
datestrs,
max_bandwidth=1,
write=False,
verify_slcs=False,
)


def test_network_from_ifgs(tmp_path):
"""Check that the `Network` can work when passing in ifgs"""
ifg_files = ["20210101_20210107", "202010101_20210108", "20210101_20210109"]
n = Network(
ifg_files,
max_bandwidth=10,
write=False,
verify_slcs=False,
dates=["2021-01-07", "2021-01-08", "2021-01-09"],
)
assert len(n.ifg_list) == 3
assert n.ifg_list[0].path.name == "20210107_20210108.int.vrt"
assert n.ifg_list[1].path.name == "20210107_20210109.int.vrt"
assert n.ifg_list[2].path.name == "20210108_20210109.int.vrt"

0 comments on commit 3601816

Please sign in to comment.