Skip to content

Commit

Permalink
PI24 run script: metadata generation added, refactoring (#632)
Browse files Browse the repository at this point in the history
* WIP Include metadata creation to run script, refactor script 🔑

* WIP run script refactoring done, rename files to final names before metadata creation 🈵

* Established OUTPUT_ROOT_DIR where all output is written to. Changed params back to full run params. 🐊

* Ignore mypy error 👛

* Fixed type: ignore 🌕
  • Loading branch information
mpluess authored Nov 28, 2024
1 parent a04d637 commit ae7b59f
Showing 1 changed file with 164 additions and 29 deletions.
193 changes: 164 additions & 29 deletions karabo/examples/SRCNet_v0.1_simulation_2_AAstar.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# This script generates simulated visibilities and images resembling SKAO data.
# It also outputs corresponding ObsCore metadata ready to be ingested to Rucio.
#
# Images: dirty image and cleaned image using WSClean.
# These are MFS images (frequency channels aggregated into one channel),
Expand All @@ -9,8 +10,12 @@
# - 3 TB visibilities (after image cleaning)
# - 12 GB images
import math
import os
from datetime import datetime, timedelta, timezone

from karabo.data.obscore import ObsCoreMeta
from karabo.data.src import RucioMeta
from karabo.imaging.image import Image
from karabo.imaging.imager_base import DirtyImagerConfig
from karabo.imaging.imager_wsclean import (
WscleanDirtyImager,
Expand All @@ -22,26 +27,59 @@
from karabo.simulation.sky_model import SkyModel
from karabo.simulation.telescope import Telescope
from karabo.simulation.telescope_versions import SKAMidAAStarVersions
from karabo.simulation.visibility import Visibility
from karabo.simulator_backend import SimulatorBackend
from karabo.util.helpers import get_rnd_str

if __name__ == "__main__":
print(f"{datetime.now()} Script started")
# Simulation
# Phase center: should be mean of coverage
# Means of values from sky model description
PHASE_CENTER_RA = 150.12
PHASE_CENTER_DEC = 2.21

# Imaging
# Image size in degrees should be smaller than FOV
# Bigger baseline -> higher resolution
# Image resolution from SKAO's generate_visibilities.ipynb
IMAGING_NPIXEL = 20000
# -> Cellsize < FOV / 20000 -> 9.32190458333e-7
IMAGING_CELLSIZE = 9.3e-7

# Rucio
RUCIO_NAMESPACE = "chocolate"
# in seconds
RUCIO_LIFETIME = 31536000

# Metadata
OBS_COLLECTION = "SKAO/SKAMID"

obs_sim_id = 0 # inc/change for new simulation
user_rnd_str = get_rnd_str(k=7, seed=os.environ.get("USER"))
OBS_ID = f"karabo-{user_rnd_str}-{obs_sim_id}" # unique ID per user & simulation

# Simulate using OSKAR
SIMULATOR_BACKEND = SimulatorBackend.OSKAR
# Prefix for name part of Rucio DIDs.
# Example: visibilities DID will be [RUCIO_NAMESPACE]:[RUCIO_NAME_PREFIX]measurements.MS
# Keep in mind that DIDs need to be globally unique.
# Would probably make sense to make OBS_ID part of this for future runs.
RUCIO_NAME_PREFIX = "pi24_run_1_"

# Output root dir, this is just a default, set to your liking
OUTPUT_ROOT_DIR = os.path.join(os.environ["SCRATCH"], f"{RUCIO_NAME_PREFIX}output")
os.makedirs(OUTPUT_ROOT_DIR, exist_ok=False)
print(f"Output will be written under output root dir {OUTPUT_ROOT_DIR}")


def generate_visibilities() -> Visibility:
simulator_backend = SimulatorBackend.OSKAR

# Phase center: should be mean of coverage
# Link to metadata of survey:
# https://archive.sarao.ac.za/search/MIGHTEE%20COSMOS/target/J0408-6545/captureblockid/1587911796/
sky_model = SkyModel.get_MIGHTEE_Sky()
# Means of values from sky model description
phase_center_ra = 150.12
phase_center_dec = 2.21

telescope = Telescope.constructor( # type: ignore[call-overload]
name="SKA-MID-AAstar",
version=SKAMidAAStarVersions.SKA_OST_ARRAY_CONFIG_2_3_1,
backend=SIMULATOR_BACKEND,
backend=simulator_backend,
)

# From sky model description
Expand Down Expand Up @@ -79,8 +117,8 @@
)

observation = Observation(
phase_centre_ra_deg=phase_center_ra,
phase_centre_dec_deg=phase_center_dec,
phase_centre_ra_deg=PHASE_CENTER_RA,
phase_centre_dec_deg=PHASE_CENTER_DEC,
# During the chosen time range [start, start + length]
# sources shouldn't be behind horizon, otherwise we won't see much.
# Original survey: 2020-04-26 14:36:50.820 UTC to 2020-04-26 22:35:42.665 UTC
Expand All @@ -93,42 +131,139 @@
frequency_increment_hz=frequency_increment_hz,
)

print(f"{datetime.now()} Starting simulation")
visibility = simulation.run_simulation(
return simulation.run_simulation( # type: ignore[no-any-return]
telescope,
sky_model,
observation,
backend=SIMULATOR_BACKEND,
backend=simulator_backend,
visibility_path=os.path.join(
OUTPUT_ROOT_DIR,
f"{RUCIO_NAME_PREFIX}measurements.MS",
),
) # type: ignore[call-overload]

# Imaging using WSClean
# Image size in degrees should be smaller than FOV
# Bigger baseline -> higher resolution
# Image resolution from SKAO's generate_visibilities.ipynb
imaging_npixel = 20000
# -> Cellsize < FOV / 20000 -> 9.32190458333e-7
imaging_cellsize = 9.3e-7

print(f"{datetime.now()} Creating dirty image")
def create_visibilities_metadata(visibility: Visibility) -> None:
ocm = ObsCoreMeta.from_visibility(
vis=visibility,
calibrated=False,
)
# remove path-infos for `name`
name = os.path.split(visibility.path)[-1]
rm = RucioMeta(
namespace=RUCIO_NAMESPACE, # needs to be specified by Rucio service
name=name,
lifetime=RUCIO_LIFETIME,
dataset_name=None,
meta=ocm,
)
# ObsCore mandatory fields
ocm.obs_collection = OBS_COLLECTION
ocm.obs_id = OBS_ID
obs_publisher_did = RucioMeta.get_ivoid( # rest args are defaults
namespace=rm.namespace,
name=rm.name,
)
ocm.obs_publisher_did = obs_publisher_did

ocm.access_url = create_access_url(RUCIO_NAMESPACE, name)

path_meta = RucioMeta.get_meta_fname(fname=visibility.path)
_ = rm.to_dict(fpath=path_meta)
print(f"Created {path_meta=}")


def create_dirty_image(visibility: Visibility) -> Image:
dirty_imager = WscleanDirtyImager(
DirtyImagerConfig(
imaging_npixel=imaging_npixel,
imaging_cellsize=imaging_cellsize,
imaging_npixel=IMAGING_NPIXEL,
imaging_cellsize=IMAGING_CELLSIZE,
combine_across_frequencies=True,
)
)
dirty_image = dirty_imager.create_dirty_image(visibility)

print(f"{datetime.now()} Creating cleaned image")
return dirty_imager.create_dirty_image(
visibility,
output_fits_path=os.path.join(
OUTPUT_ROOT_DIR,
f"{RUCIO_NAME_PREFIX}dirty.fits",
),
)


def create_cleaned_image(visibility: Visibility, dirty_image: Image) -> Image:
image_cleaner = WscleanImageCleaner(
WscleanImageCleanerConfig(
imaging_npixel=imaging_npixel,
imaging_cellsize=imaging_cellsize,
imaging_npixel=IMAGING_NPIXEL,
imaging_cellsize=IMAGING_CELLSIZE,
)
)
cleaned_image = image_cleaner.create_cleaned_image(

return image_cleaner.create_cleaned_image(
visibility,
dirty_fits_path=dirty_image.path,
output_fits_path=os.path.join(
OUTPUT_ROOT_DIR,
f"{RUCIO_NAME_PREFIX}cleaned.fits",
),
)


def create_image_metadata(image: Image) -> None:
# Create image metadata
ocm = ObsCoreMeta.from_image(img=image)
# remove path-infos for `name`
name = os.path.split(image.path)[-1]
rm = RucioMeta(
namespace=RUCIO_NAMESPACE, # needs to be specified by Rucio service
name=name,
lifetime=RUCIO_LIFETIME,
dataset_name=None,
meta=ocm,
)
# ObsCore mandatory fields
# some of the metadata is taken from the visibilities, since both data-products
# originate from the same observation
ocm.obs_collection = OBS_COLLECTION
ocm.obs_id = OBS_ID
obs_publisher_did = RucioMeta.get_ivoid( # rest args are defaults
namespace=rm.namespace,
name=rm.name,
)
ocm.obs_publisher_did = obs_publisher_did

ocm.s_ra = PHASE_CENTER_RA
ocm.s_dec = PHASE_CENTER_DEC
ocm.access_url = create_access_url(RUCIO_NAMESPACE, name)

path_meta = RucioMeta.get_meta_fname(fname=image.path)
_ = rm.to_dict(fpath=path_meta)
print(f"Created {path_meta=}")


def create_access_url(namespace: str, name: str) -> str:
return f"https://datalink.ivoa.srcdev.skao.int/rucio/links?id={namespace}:{name}"


if __name__ == "__main__":
print(f"{datetime.now()} Script started")

print(f"{datetime.now()} Starting simulation")
visibility = generate_visibilities()

print(f"{datetime.now()} Creating visibility metadata")
create_visibilities_metadata(visibility)

print(f"{datetime.now()} Creating dirty image")
dirty_image = create_dirty_image(visibility)

print(f"{datetime.now()} Creating dirty image metadata")
create_image_metadata(dirty_image)

print(f"{datetime.now()} Creating cleaned image")
cleaned_image = create_cleaned_image(visibility, dirty_image)

print(f"{datetime.now()} Creating cleaned image metadata")
create_image_metadata(cleaned_image)

print(f"{datetime.now()} Script finished")

0 comments on commit ae7b59f

Please sign in to comment.