diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 2bdbfd0..cf7251f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -34,4 +34,4 @@ jobs: run: cargo fmt -- --check - name: Hygiene | Clippy - run: cargo clippy --all-targets --all-features -- -Dwarnings -Dclippy::all -Dclippy::pedantic -Aclippy::module_name_repetitions -Aclippy::missing_panics_doc -Aclippy::missing_errors_doc -Aclippy::must_use_candidate -Aclippy::similar_names + run: cargo clippy --all-targets --all-features -- -Dwarnings -Dclippy::all -Dclippy::pedantic -Aclippy::missing_errors_doc -Aclippy::missing_panics_doc -Aclippy::module_name_repetitions -Aclippy::must_use_candidate -Aclippy::similar_names -Aclippy::unreadable_literal diff --git a/Cargo.toml b/Cargo.toml index 804ce5d..67e4e7f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ log = "0.4.20" memmap2 = "0.7.1" num-traits = "0.2.16" serde = { version = "1", features = ["derive"] } +serde_json = "1" thiserror = "1.0.48" # We want meaninful stack traces when profiling/debugging diff --git a/hexit/Cargo.toml b/hexit/Cargo.toml index b60173e..e152742 100644 --- a/hexit/Cargo.toml +++ b/hexit/Cargo.toml @@ -18,7 +18,8 @@ itertools = "0.10" nasadem = { path = "../nasadem" } num-traits = { workspace = true } rayon = "1.8.0" - +serde = { workspace = true } +serde_json = { workspace = true } [target.'cfg(not(target_env = "msvc"))'.dependencies] tikv-jemallocator = "0.5" diff --git a/hexit/src/combine.rs b/hexit/src/combine.rs index 6e4846e..561f0e7 100644 --- a/hexit/src/combine.rs +++ b/hexit/src/combine.rs @@ -1,28 +1,59 @@ -use crate::{options::Combine, progress}; +use crate::{ + elevation::{Elevation, ReducedElevation}, + options::Combine, + progress, +}; use anyhow::Result; use byteorder::{LittleEndian as LE, ReadBytesExt, WriteBytesExt}; use flate2::bufread::GzDecoder; -use hextree::{compaction::EqCompactor, disktree::DiskTree, Cell, HexTreeMap}; +use hextree::{compaction::Compactor, Cell, HexTreeMap}; use indicatif::MultiProgress; use std::{ffi::OsStr, fs::File, io::BufReader, path::Path}; +struct ReductionCompactor { + target_resolution: u8, + source_resolution: u8, +} + +impl Compactor for ReductionCompactor { + fn compact(&mut self, cell: Cell, children: [Option<&Elevation>; 7]) -> Option { + if cell.res() < self.target_resolution { + None + } else if let [Some(v0), Some(v1), Some(v2), Some(v3), Some(v4), Some(v5), Some(v6)] = + children + { + Some(Elevation::concat( + self.source_resolution, + cell.res(), + &[*v0, *v1, *v2, *v3, *v4, *v5, *v6], + )) + } else { + None + } + } +} + impl Combine { pub fn run(&self) -> Result<()> { assert!(!self.input.is_empty()); - let mut hextree: HexTreeMap = HexTreeMap::with_compactor(EqCompactor); + let mut hextree: HexTreeMap = + HexTreeMap::with_compactor(ReductionCompactor { + source_resolution: self.source_resolution as u8, + target_resolution: self.target_resolution as u8, + }); let progress_group = MultiProgress::new(); for tess_file_path in &self.input { Self::read_tessellation(tess_file_path, &progress_group, &mut hextree)?; } + let hextree = self.reduce_hextree(&hextree, &progress_group); self.write_disktree(&hextree, &progress_group)?; - self.verify_disktree(&hextree, &progress_group)?; Ok(()) } fn read_tessellation( tess_file_path: &Path, progress_group: &MultiProgress, - hextree: &mut HexTreeMap, + hextree: &mut HexTreeMap, ) -> Result<()> { let tess_file = File::open(tess_file_path)?; let tess_buf_rdr = BufReader::new(tess_file); @@ -38,7 +69,7 @@ impl Combine { let raw_cell = rdr.read_u64::()?; let cell = hextree::Cell::from_raw(raw_cell)?; let elevation = rdr.read_i16::()?; - hextree.insert(cell, elevation); + hextree.insert(cell, Elevation::Plain(elevation)); pb.inc(1); } assert!( @@ -49,9 +80,55 @@ impl Combine { Ok(()) } + fn reduce_hextree( + &self, + hextree: &HexTreeMap, + _progress_group: &MultiProgress, + ) -> HexTreeMap { + // let mut res_n_cells = HashSet::new(); + let mut reduced_hextree = HexTreeMap::new(); + let max_child_cnt = + 7_usize.pow(self.source_resolution as u32 - self.target_resolution as u32); + for (cell, elev) in hextree.iter() { + match elev { + Elevation::Intermediate(intermediate) + if cell.res() == self.target_resolution as u8 => + { + assert_eq!(intermediate.n, max_child_cnt); + let reduction = intermediate.reduce(); + reduced_hextree.insert(cell, reduction); + } + _ => { + // res_n_cells.insert( + // cell.to_parent(self.target_resolution as u8) + // .unwrap() + // .into_raw(), + // ); + } + }; + } + // println!("["); + // for index in res_n_cells.into_iter() { + // let cell = Cell::try_from(index).unwrap(); + // println!("{{\"h3_id\":\"{index:x}\",\"res\":{},\"val\":\"\"}},", cell.res()); + // let subtree_iter = hextree.subtree_iter(cell); + // for (cell, val) in subtree_iter { + // println!( + // "{{\"h3_id\":\"{:x}\",\"res\":{},\"val\":\"{val:?}\"}},", + // cell.into_raw(), + // cell.res() + // ); + // } + // } + // println!("]"); + // assert!(false); + // assert!(!reduced_hextree.is_empty()); + reduced_hextree + } + fn write_disktree( &self, - hextree: &HexTreeMap, + hextree: &HexTreeMap, progress_group: &MultiProgress, ) -> Result<()> { let disktree_file = File::create(&self.out)?; @@ -65,41 +142,42 @@ impl Combine { format!("Writing {disktree_file_name}"), disktree_len as u64, )); - hextree.to_disktree(disktree_file, |wtr, val| { + hextree.to_disktree(disktree_file, |wtr, ReducedElevation { min, avg, max }| { pb.inc(1); - wtr.write_i16::(*val) + wtr.write_i16::(*min) + .and_then(|()| wtr.write_i16::(*avg)) + .and_then(|()| wtr.write_i16::(*max)) })?; Ok(()) } - fn verify_disktree( - &self, - hextree: &HexTreeMap, - progress_group: &MultiProgress, - ) -> Result<()> { - fn value_reader(res: hextree::Result<(Cell, &mut File)>) -> Result<(Cell, i16)> { - let (cell, rdr) = res?; - Ok(rdr.read_i16::().map(|val| (cell, val))?) - } - - let mut disktree = DiskTree::open(&self.out)?; - let disktree_file_name = self - .out - .file_name() - .and_then(OsStr::to_str) - .expect("already opened, therefore path must be a file"); - let pb = progress_group.add(progress::bar( - format!("Validating {disktree_file_name}"), - hextree.len() as u64, - )); - let mut count = 0; - for res in disktree.iter(value_reader)? { - let (cell, value) = res?; - assert_eq!(Some((cell, &value)), hextree.get(cell)); - pb.inc(1); - count += 1; - } - assert_eq!(hextree.len(), count); - Ok(()) - } + // fn verify_disktree( + // &self, + // hextree: &HexTreeMap, + // progress_group: &MultiProgress, + // ) -> Result<()> { + // fn value_reader(res: hextree::Result<(Cell, &mut File)>) -> Result<(Cell, i16)> { + // let (cell, rdr) = res?; + // Ok(rdr.read_i16::().map(|val| (cell, val))?) + // } + // let mut disktree = DiskTree::open(&self.out)?; + // let disktree_file_name = self + // .out + // .file_name() + // .and_then(OsStr::to_str) + // .expect("already opened, therefore path must be a file"); + // let pb = progress_group.add(progress::bar( + // format!("Validating {disktree_file_name}"), + // hextree.len() as u64, + // )); + // let mut count = 0; + // for res in disktree.iter(value_reader)? { + // let (cell, value) = res?; + // assert_eq!(Some((cell, &value)), hextree.get(cell)); + // pb.inc(1); + // count += 1; + // } + // assert_eq!(hextree.len(), count); + // Ok(()) + // } } diff --git a/hexit/src/elevation.rs b/hexit/src/elevation.rs new file mode 100644 index 0000000..3cbd2bc --- /dev/null +++ b/hexit/src/elevation.rs @@ -0,0 +1,81 @@ +use anyhow::Result; +use byteorder::{LittleEndian as LE, ReadBytesExt}; +use std::io::Read; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ReducedElevation { + pub min: i16, + pub avg: i16, + pub max: i16, +} + +impl ReducedElevation { + pub fn from_reader(mut rdr: R) -> Result { + let mut buf = [0_u8; 3 * std::mem::size_of::()]; + rdr.read_exact(&mut buf)?; + let rdr = &mut &buf[..]; + let min = rdr.read_i16::()?; + let avg = rdr.read_i16::()?; + let max = rdr.read_i16::()?; + Ok(Self { min, avg, max }) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct IntermediateElevation { + pub min: i16, + pub sum: i32, + pub max: i16, + pub n: usize, +} + +impl IntermediateElevation { + pub fn reduce(&self) -> ReducedElevation { + let min = self.min; + let avg = i16::try_from(self.sum / i32::try_from(self.n).unwrap()).unwrap(); + let max = self.max; + assert!(min <= avg && avg <= max); + ReducedElevation { min, avg, max } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Elevation { + Plain(i16), + Intermediate(IntermediateElevation), +} + +impl Elevation { + pub fn concat(source_resolution: u8, this_resolution: u8, items: &[Self]) -> Self { + let mut new_min = i16::MAX; + let mut new_sum: i32 = 0; + let mut new_max = i16::MIN; + let mut new_n = 0_usize; + for item in items { + match item { + Elevation::Plain(elev) => { + let n = 7_usize.pow(u32::from(source_resolution - this_resolution - 1)); + assert_ne!(n, 0); + let sum = i32::from(*elev) * i32::try_from(n).unwrap(); + new_sum += sum; + new_min = i16::min(new_min, *elev); + new_max = i16::max(new_max, *elev); + new_n += n; + } + + Elevation::Intermediate(IntermediateElevation { min, sum, max, n }) => { + new_sum += *sum; + new_min = i16::min(new_min, *min); + new_max = i16::max(new_max, *max); + new_n += n; + } + } + } + Elevation::Intermediate(IntermediateElevation { + min: new_min, + sum: new_sum, + max: new_max, + n: new_n, + }) + } +} diff --git a/hexit/src/json.rs b/hexit/src/json.rs new file mode 100644 index 0000000..3c90faf --- /dev/null +++ b/hexit/src/json.rs @@ -0,0 +1,78 @@ +use crate::{elevation::ReducedElevation, mask, options::Json}; +use anyhow::Result; +use geo::geometry::GeometryCollection; +use h3o::{ + geom::{PolyfillConfig, ToCells}, + Resolution, +}; +use hextree::{disktree::DiskTree, Cell, HexTreeMap}; +use serde::Serialize; +use serde_json::{json, Value}; +use std::fs::File; + +impl Json { + pub fn run(&self) -> Result<()> { + let mut disktree = DiskTree::open(&self.disktree)?; + let mask = mask::open(Some(&self.mask))?.unwrap(); + let target_cells = Self::polyfill_mask(mask, self.source_resolution)?; + let mut hextree = HexTreeMap::new(); + for h3idx in target_cells { + let cell = Cell::try_from(h3idx)?; + if let Some((cell, reduction)) = Self::get(cell, &mut disktree)? { + hextree.insert(cell, reduction); + } + } + let json = Self::gen_json(&hextree); + Self::output_json(&json)?; + Ok(()) + } + + fn polyfill_mask(mask: GeometryCollection, resolution: Resolution) -> Result> { + let polygon = h3o::geom::GeometryCollection::from_degrees(mask)?; + let mut cells: Vec = polygon + .to_cells(PolyfillConfig::new(resolution)) + .map(u64::from) + .collect(); + cells.sort_unstable(); + cells.dedup(); + Ok(cells) + } + + fn get(cell: Cell, disktree: &mut DiskTree) -> Result> { + match disktree.seek_to_cell(cell)? { + None => Ok(None), + Some((cell, rdr)) => { + let reduction = ReducedElevation::from_reader(rdr)?; + Ok(Some((cell, reduction))) + } + } + } + + fn gen_json(hextree: &HexTreeMap) -> Value { + #[derive(Serialize)] + struct JsonEntry { + h3_id: String, + min: i16, + avg: i16, + max: i16, + } + impl From<(Cell, &ReducedElevation)> for JsonEntry { + fn from((cell, reduction): (Cell, &ReducedElevation)) -> JsonEntry { + JsonEntry { + h3_id: cell.to_string(), + min: reduction.min, + avg: reduction.avg, + max: reduction.max, + } + } + } + let samples = hextree.iter().map(JsonEntry::from).collect::>(); + json!(samples) + } + + fn output_json(json: &Value) -> Result<()> { + let out = std::io::stdout(); + serde_json::to_writer(out, json)?; + Ok(()) + } +} diff --git a/hexit/src/main.rs b/hexit/src/main.rs index ce183f5..d075238 100644 --- a/hexit/src/main.rs +++ b/hexit/src/main.rs @@ -1,4 +1,6 @@ mod combine; +mod elevation; +mod json; mod lookup; mod mask; mod options; @@ -21,5 +23,6 @@ fn main() -> Result<()> { Cli::Tessellate(tesselate) => tesselate.run(), Cli::Combine(combine) => combine.run(), Cli::Lookup(lookup) => lookup.run(), + Cli::Json(json) => json.run(), } } diff --git a/hexit/src/options.rs b/hexit/src/options.rs index 4c94352..75c3505 100644 --- a/hexit/src/options.rs +++ b/hexit/src/options.rs @@ -11,6 +11,8 @@ pub enum Cli { Combine(Combine), Lookup(Lookup), + + Json(Json), } /// Generate a tessellated list of (cell, elevation) for each @@ -52,8 +54,11 @@ pub struct Combine { #[arg(short, long)] pub mask: Option, - #[arg(short, long, default_value_t = Resolution::Ten)] - pub resolution: Resolution, + #[arg(short, long)] + pub source_resolution: Resolution, + + #[arg(short, long)] + pub target_resolution: Resolution, #[arg(short, long)] pub out: PathBuf, @@ -71,3 +76,17 @@ pub struct Lookup { pub disktree: PathBuf, pub cell: String, } + +/// Output kepler.gl compatible JSON within the given mask. +#[derive(Debug, Clone, Args)] +pub struct Json { + #[arg(short, long)] + pub source_resolution: Resolution, + + /// Path GeoJSON mask. + /// + /// Any samples which do not intersect the mask are ignored. + pub mask: PathBuf, + + pub disktree: PathBuf, +} diff --git a/hexit/src/tesselate.rs b/hexit/src/tesselate.rs index 037c897..4f33372 100644 --- a/hexit/src/tesselate.rs +++ b/hexit/src/tesselate.rs @@ -7,7 +7,7 @@ use h3o::{ geom::{PolyfillConfig, Polygon, ToCells}, Resolution, }; -use hextree::{compaction::EqCompactor, Cell, HexTreeMap}; +use hextree::{Cell, HexTreeMap}; use indicatif::{MultiProgress, ProgressBar}; use nasadem::{Sample, Tile}; use rayon::prelude::*; @@ -72,7 +72,7 @@ impl Tesselate { let wtr = BufWriter::new(tmp_out_wtr); let pb = progress_group.add(progress::bar( format!("Write {out_file_name}"), - tile.len() as u64, + hextree.len() as u64, )); Self::write_to_disk(&hextree, &pb, wtr)?; fs::rename(out_file_tmp_path, out_file_path)?; @@ -81,12 +81,8 @@ impl Tesselate { Ok(()) } - fn tesselate_tile( - &self, - tile: &Tile, - progress_bar: &ProgressBar, - ) -> Result> { - let mut hextree: HexTreeMap = HexTreeMap::with_compactor(EqCompactor); + fn tesselate_tile(&self, tile: &Tile, progress_bar: &ProgressBar) -> Result> { + let mut hextree: HexTreeMap = HexTreeMap::new(); for sample in tile.iter() { let (elev, hexes) = Self::tesselate_sample(&sample, self.resolution)?; for hex in hexes { @@ -110,7 +106,7 @@ impl Tesselate { } fn write_to_disk( - hextree: &HexTreeMap, + hextree: &HexTreeMap, progress_bar: &ProgressBar, mut out: impl Write, ) -> Result<()> {