Skip to content

Commit

Permalink
Publish_L2 / RID LUT Manager (#385)
Browse files Browse the repository at this point in the history
* bump all product versions to 2
* RID LUT data manager update and use in processing for trigger descaling
* Update stixcore/io/RidLutManager.py
* fixed API mock via static files on pup99

---------

Co-authored-by: Shane Maloney <[email protected]>
  • Loading branch information
nicHoch and samaloney authored Mar 22, 2024
1 parent de54821 commit f5b77dc
Show file tree
Hide file tree
Showing 20 changed files with 542 additions and 162 deletions.
5 changes: 5 additions & 0 deletions stixcore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,9 @@
except ImportError:
__version__ = "unknown"

try:
from .version_conf import __version_conf__
except ImportError:
__version_conf__ = "unknown"

logger = get_logger(__name__)
9 changes: 9 additions & 0 deletions stixcore/data/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(self, data_dir):
self.DIR / "solo_L1_stix-ql-lightcurve_20210116_V01.fits"]
self.L1_fits = list(self.DIR.glob('solo_L1_stix-*.fits'))
self.LB_21_6_30_fits = self.DIR / "solo_LB_stix-21-6-30_0664156800_V01.fits"
self.LB_21_6_21_fits = self.DIR / "solo_LB_stix-21-6-21_0000000000-9999999999_V02_2312148821-53879.fits" # noqa
self.__doc__ = "\n".join([f'{str(k)}: {repr(v)}\n\n' for k, v in self.__dict__.items()])


Expand All @@ -59,6 +60,13 @@ def __init__(self, data_dir):
self.__doc__ = "\n".join([f'{str(k)}: {repr(v)}\n\n' for k, v in self.__dict__.items()])


class RidLutTestData:
def __init__(self, data_dir):
self.PUB_DIR = data_dir / "publish"
self.RID_LUT = self.PUB_DIR / "rid_lut.csv"
self.RID_LUT_UPDATE = self.PUB_DIR / "update_rid_lut.csv"


class TestData:
def __init__(self, data_dir):
self.ephemeris = EphemerisTestData(data_dir)
Expand All @@ -67,6 +75,7 @@ def __init__(self, data_dir):
self.products = IDBTestProduct(data_dir)
self.io = IOTestData(data_dir)
self.soop = SOOPTestData(data_dir)
self.rid_lut = RidLutTestData(data_dir)

self.__doc__ = "\n".join([f"{k}\n******************\n\n{v.__doc__}\n\n\n"
for k, v in self.__dict__.items()])
Expand Down
Git LFS file not shown
4 changes: 2 additions & 2 deletions stixcore/data/test/publish/rid_lut.csv
Git LFS file not shown
4 changes: 2 additions & 2 deletions stixcore/data/test/publish/update_rid_lut.csv
Git LFS file not shown
187 changes: 187 additions & 0 deletions stixcore/io/RidLutManager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import sys
import time
import tempfile
import urllib.request
from datetime import date, datetime, timedelta

import numpy as np

from astropy.io import ascii
from astropy.table import Table
from astropy.table.operations import unique, vstack

from stixcore.config.config import CONFIG
from stixcore.util.logging import get_logger
from stixcore.util.singleton import Singleton

__all__ = ['RidLutManager']

logger = get_logger(__name__)


class RidLutManager(metaclass=Singleton):
"""Manages metadata for BSD requests
The rid is used for a lookup in a csv table file where additional data
connected to a BSD request is stored. Such as a description of the request
purpose or state dependent configurations that are not part of the TM data.
Most important the trigger scaling factor that was used if the trigger scaling
schema is active.
The data of th LUT is required over the the API endpoint:
https://datacenter.stix.i4ds.net/api/bsd/info/
"""

def __init__(self, file, update=False):
"""Creates the manager by pointing to the LUT files and setting the update strategy.
Parameters
----------
file : Path
points to the LUT file
update : bool, optional
Update strategy: is the LUT file updated via API?, by default False
"""
self.file = file
self.update = update
self.rid_lut = RidLutManager.read_rid_lut(self.file, self.update)

def __str__(self) -> str:
return f"file: {self.file} update: {self.update} size: {len(self.rid_lut)}"

def update_lut(self):
"""Updates the LUT file via api request.
Will create a new file if not available or do a incremental update otherwise,
using the last entry time stamp.
"""
self.rid_lut = RidLutManager.read_rid_lut(self.file, update=self.update)

def get_reason(self, rid):
"""Gets the verbal description of the request purpose by combining several descriptive columns.
Parameters
----------
rid : int
the BSD request id
Returns
-------
str
verbal description of the request purpose
"""
request = self.rid_lut.loc[rid]
reason = " ".join(np.atleast_1d(request['description']))
return reason

def get_scaling_factor(self, rid):
"""Gets the trigger descaling factor connected to the BSD request.
Parameters
----------
rid : int
the BSD request id
Returns
-------
int
the proposed trigger descaling factor to use for the BSD processing
Raises
------
ValueError
if no or to many entries found for the given rid
"""
try:
request = self.rid_lut.loc[rid]
except KeyError:
raise ValueError("can't get scaling factor: no request founds for rid: {rid}")
scaling_factor = np.atleast_1d(request['scaling_factor'])
if len(scaling_factor) > 1:
raise ValueError("can't get scaling factor: to many request founds for rid: {rid}")
scf = scaling_factor[0].strip()
return 30 if scf == '' else int(float(scf))

@classmethod
def read_rid_lut(cls, file, update=False):
"""Reads or creates the LUT of all BSD RIDs and the request reason comment.
On creation or update an api endpoint from the STIX data center is used
to get the information and persists as a LUT locally.
Parameters
----------
file : Path
path the to LUT file.
update : bool, optional
should the LUT be updated at start up?, by default False
Returns
-------
Table
the LUT od RIDs and request reasons.
"""
converters = {'_id': np.uint, 'unique_id': np.uint, 'start_utc': datetime,
'duration': np.uint, 'type': str, 'subject': str,
'purpose': str, 'scaling_factor': str, 'ior_id': str, 'comment': str}

if update or not file.exists():
rid_lut = Table(names=converters.keys(), dtype=converters.values())
# the api is limited to batch sizes of a month. in order to get the full table we have
# to ready each month after the start of STIX
last_date = date(2019, 1, 1)
today = date.today()
if file.exists():
rid_lut = ascii.read(file, delimiter=",", converters=converters,
guess=False, quotechar='"')
mds = rid_lut['start_utc'].max()
try:
last_date = datetime.strptime(mds, '%Y-%m-%dT%H:%M:%S').date()
except ValueError:
last_date = datetime.strptime(mds, '%Y-%m-%dT%H:%M:%S.%f').date()

if not file.parent.exists():
logger.info(f'path not found to rid lut file dir: {file.parent} creating dir')
file.parent.mkdir(parents=True, exist_ok=True)
rid_lut_file_update_url = CONFIG.get('Publish', 'rid_lut_file_update_url')

try:
while (last_date < today):
last_date_1m = last_date + timedelta(days=30)
ldf = last_date.strftime('%Y%m%d')
ld1mf = last_date_1m.strftime('%Y%m%d')
update_url = f"{rid_lut_file_update_url}{ldf}/{ld1mf}"
logger.info(f'download publish lut file: {update_url}')
last_date = last_date_1m
updatefile = tempfile.NamedTemporaryFile().name
urllib.request.urlretrieve(update_url, updatefile)
update_lut = ascii.read(updatefile, delimiter=",", converters=converters,
guess=False, quotechar='"')

if len(update_lut) < 1:
continue
logger.info(f'found {len(update_lut)} entries')
rid_lut = vstack([rid_lut, update_lut])
# the stix datacenter API is throttled to 2 calls per second
time.sleep(0.5)
except Exception:
logger.error("RID API ERROR", exc_info=True)

rid_lut = unique(rid_lut, silent=True)
ascii.write(rid_lut, file, overwrite=True, delimiter=",", quotechar='"')
logger.info(f'write total {len(rid_lut)} entries to local storage')
else:
logger.info(f"read rid-lut from {file}")
rid_lut = ascii.read(file, delimiter=",", converters=converters)

rid_lut['description'] = [", ".join(r.values()) for r in
rid_lut['subject', 'purpose', 'comment'].filled()]
rid_lut.add_index('unique_id')

return rid_lut


if 'pytest' in sys.modules:
# only set the global in test scenario
from stixcore.data.test import test_data
RidLutManager.instance = RidLutManager(test_data.rid_lut.RID_LUT, update=False)
10 changes: 8 additions & 2 deletions stixcore/io/fits/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import stixcore
from stixcore.ephemeris.manager import Spice
from stixcore.products.level0.scienceL0 import Aspect
from stixcore.products.product import Product
from stixcore.products.product import FitsHeaderMixin, Product
from stixcore.soop.manager import SOOPManager, SoopObservationType
from stixcore.time.datetime import SEC_IN_DAY
from stixcore.util.logging import get_logger
Expand Down Expand Up @@ -135,7 +135,8 @@ def generate_common_header(cls, filename, product, *, version=0):
('ORIGIN', 'STIX Team, FHNW', 'FHNW'),
('CREATOR', 'stixcore', 'FITS creation software'),
('VERS_SW', str(stixcore.__version__), 'Version of SW that provided FITS file'),
# ('VERS_CAL', '', 'Version of the calibration pack'),
('VERS_CFG', str(stixcore.__version_conf__),
'Version of the common instrument configuration package'),
('VERSION', version_format(version), 'Version of data product'),
('OBSRVTRY', 'Solar Orbiter', 'Satellite name'),
('TELESCOP', 'SOLO/STIX', 'Telescope/Sensor name'),
Expand Down Expand Up @@ -558,6 +559,9 @@ def write_fits(self, product, path=None, *, version=0):
primary_hdu = fits.PrimaryHDU()
primary_hdu.header.update(primary_header)

if isinstance(product, FitsHeaderMixin):
primary_hdu.header.update(product.get_additional_header_keywords())

# Add comment and history
[primary_hdu.header.add_comment(com) for com in prod.comment]
[primary_hdu.header.add_history(com) for com in prod.history]
Expand Down Expand Up @@ -965,6 +969,8 @@ def generate_primary_header(self, filename, product, *, version=0):
# Name, Value, Comment
('LEVEL', 'L2', 'Processing level of the data'),
('VERS_SW', str(stixcore.__version__), 'Version of SW that provided FITS file'),
('VERS_CFG', str(stixcore.__version_conf__),
'Version of the common instrument configuration package'),
('HISTORY', 'Processed by STIXCore L2'),
)

Expand Down
34 changes: 34 additions & 0 deletions stixcore/io/tests/test_rid_lut_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pytest

from stixcore.io.RidLutManager import RidLutManager


def test_singleton():
assert RidLutManager.instance


def test_get_reason():
r = RidLutManager.instance.get_reason(1)
assert r == 'subject, purpose1, r1'


def test_get_reason_multi():
r = RidLutManager.instance.get_reason(223)
assert r == 'subject, purpose, r223 , c2 subject, purpose_again, r223 , c2'


def test_get_scaling_factor():
sf = RidLutManager.instance.get_scaling_factor(1)
assert sf == 1234


def test_get_scaling_factor_not_found():
with pytest.raises(ValueError) as e:
RidLutManager.instance.get_scaling_factor(123344)
assert str(e.value).startswith("can't get scaling factor")


def test_get_scaling_factor_to_many():
with pytest.raises(ValueError) as e:
RidLutManager.instance.get_scaling_factor(223)
assert str(e.value).startswith("can't get scaling factor")
4 changes: 4 additions & 0 deletions stixcore/processing/LBtoL0.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from stixcore.ephemeris.manager import Spice, SpiceKernelManager
from stixcore.idb.manager import IDBManager
from stixcore.io.fits.processors import FitsL0Processor
from stixcore.io.RidLutManager import RidLutManager
from stixcore.products.level0.scienceL0 import NotCombineException
from stixcore.products.product import Product
from stixcore.util.logging import get_logger
Expand Down Expand Up @@ -73,6 +74,9 @@ def process_tm_type(files, tm_type, processor, spice_kernel_path, config, idbm):
IDBManager.instance = idbm
CONFIG = config

RidLutManager.instance = RidLutManager(Path(CONFIG.get('Publish', 'rid_lut_file')),
update=False)

# Stand alone packet data
if (tm_type[0] == 21 and tm_type[-2] not in {20, 21, 22, 23, 24, 42}) or tm_type[0] != 21:
for file in files:
Expand Down
Loading

0 comments on commit f5b77dc

Please sign in to comment.