diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 2bdbfd0..cf7251f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -34,4 +34,4 @@ jobs: run: cargo fmt -- --check - name: Hygiene | Clippy - run: cargo clippy --all-targets --all-features -- -Dwarnings -Dclippy::all -Dclippy::pedantic -Aclippy::module_name_repetitions -Aclippy::missing_panics_doc -Aclippy::missing_errors_doc -Aclippy::must_use_candidate -Aclippy::similar_names + run: cargo clippy --all-targets --all-features -- -Dwarnings -Dclippy::all -Dclippy::pedantic -Aclippy::missing_errors_doc -Aclippy::missing_panics_doc -Aclippy::module_name_repetitions -Aclippy::must_use_candidate -Aclippy::similar_names -Aclippy::unreadable_literal diff --git a/Cargo.toml b/Cargo.toml index 7e0710b..67e4e7f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "geopath", + "hexit", "itm", "nasadem", "propah", @@ -19,6 +20,7 @@ log = "0.4.20" memmap2 = "0.7.1" num-traits = "0.2.16" serde = { version = "1", features = ["derive"] } +serde_json = "1" thiserror = "1.0.48" # We want meaninful stack traces when profiling/debugging diff --git a/hexit/Cargo.toml b/hexit/Cargo.toml new file mode 100644 index 0000000..9b391c0 --- /dev/null +++ b/hexit/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "hexit" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1" +byteorder = { workspace = true } +clap = { version = "4.4.2", features = ["derive"] } +env_logger = "0.10" +flate2 = "1.0.28" +geo = { workspace = true } +geojson = "0.24.1" +h3o = { version = "0.4.0", features = ["geo"] } +hextree = { git = "https://github.com/JayKickliter/HexTree.git", branch = "main", features = ["disktree"] } +indicatif = "0.17.7" +itertools = "0.10" +nasadem = { path = "../nasadem" } +num-traits = { workspace = true } +rayon = "1.8.0" +serde = { workspace = true } +serde_json = { workspace = true } + +[target.'cfg(not(target_env = "msvc"))'.dependencies] +tikv-jemallocator = "0.5" diff --git a/hexit/src/combine.rs b/hexit/src/combine.rs new file mode 100644 index 0000000..99ec4b2 --- /dev/null +++ b/hexit/src/combine.rs @@ -0,0 +1,81 @@ +use crate::{ + elevation::{CloseEnoughCompactor, Elevation}, + options::Combine, + progress, +}; +use anyhow::Result; +use byteorder::{LittleEndian as LE, ReadBytesExt}; +use flate2::bufread::GzDecoder; +use hextree::HexTreeMap; +use indicatif::MultiProgress; +use std::{ffi::OsStr, fs::File, io::BufReader, path::Path}; + +impl Combine { + pub fn run(&self) -> Result<()> { + assert!(!self.input.is_empty()); + let mut hextree: HexTreeMap = + HexTreeMap::with_compactor(CloseEnoughCompactor { + tolerance: self.tolerance, + }); + let progress_group = MultiProgress::new(); + for tess_file_path in &self.input { + Self::read_tessellation(tess_file_path, &progress_group, &mut hextree)?; + } + self.write_disktree(&hextree, &progress_group)?; + Ok(()) + } + + fn read_tessellation( + tess_file_path: &Path, + progress_group: &MultiProgress, + hextree: &mut HexTreeMap, + ) -> Result<()> { + let tess_file = File::open(tess_file_path)?; + let tess_buf_rdr = BufReader::new(tess_file); + let mut rdr = GzDecoder::new(tess_buf_rdr); + let tess_file_name = tess_file_path + .file_name() + .and_then(OsStr::to_str) + .expect("already opened, therefore path must be a file"); + + let n_samples = rdr.read_u64::()?; + let pb = progress_group.add(progress::bar(tess_file_name.to_string(), n_samples)); + for _sample_n in 0..n_samples { + let raw_cell = rdr.read_u64::()?; + let cell = hextree::Cell::from_raw(raw_cell)?; + let raw_elevation = rdr.read_i16::()?; + let elevation = Elevation::new(raw_elevation); + hextree.insert(cell, elevation); + pb.inc(1); + } + assert!( + rdr.read_u8().is_err(), + "We should have read all samples out of the file" + ); + + Ok(()) + } + + fn write_disktree( + &self, + hextree: &HexTreeMap, + progress_group: &MultiProgress, + ) -> Result<()> { + let disktree_file = File::create(&self.out)?; + let disktree_file_name = self + .out + .file_name() + .and_then(OsStr::to_str) + .expect("already opened, therefore path must be a file"); + let disktree_len = hextree.len(); + let pb = progress_group.add(progress::bar( + format!("Writing {disktree_file_name}"), + disktree_len as u64, + )); + hextree.to_disktree(disktree_file, |wtr, elev| { + pb.inc(1); + elev.to_writer(wtr) + })?; + Ok(()) + } +} diff --git a/hexit/src/elevation.rs b/hexit/src/elevation.rs new file mode 100644 index 0000000..d38b3d4 --- /dev/null +++ b/hexit/src/elevation.rs @@ -0,0 +1,129 @@ +use byteorder::{LittleEndian as LE, ReadBytesExt, WriteBytesExt}; +use hextree::{compaction::Compactor, Cell}; +use std::{ + io::{Read, Write}, + mem::size_of, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Elevation { + pub min: i16, + pub max: i16, + pub sum: i32, + pub n: i32, +} + +impl Elevation { + const BUF_LEN: usize = + size_of::() + size_of::() + size_of::() + size_of::(); + + pub fn new(raw: i16) -> Elevation { + Elevation { + min: raw, + sum: i32::from(raw), + max: raw, + n: 1, + } + } + + pub fn from_reader(mut rdr: R) -> std::io::Result { + debug_assert_eq!(Self::BUF_LEN, size_of::()); + let mut buf = [0_u8; Self::BUF_LEN]; + rdr.read_exact(&mut buf)?; + let rdr = &mut &buf[..]; + let min = rdr.read_i16::()?; + let max = rdr.read_i16::()?; + let sum = rdr.read_i32::()?; + let n = rdr.read_i32::()?; + Ok(Self { min, max, sum, n }) + } + + pub fn to_writer(&self, mut wtr: W) -> std::io::Result<()> { + assert_eq!(Self::BUF_LEN, size_of::()); + let mut buf = [0_u8; Self::BUF_LEN]; + { + let mut buf_wtr = &mut buf[..]; + buf_wtr.write_i16::(self.min)?; + buf_wtr.write_i16::(self.max)?; + buf_wtr.write_i32::(self.sum)?; + buf_wtr.write_i32::(self.n)?; + } + wtr.write_all(&buf) + } +} + +impl Elevation { + pub fn concat(items: &[&Self]) -> Self { + let mut min = i16::MAX; + let mut sum: i32 = 0; + let mut max = i16::MIN; + let mut n = 0_i32; + for item in items { + sum += item.sum; + min = i16::min(min, item.min); + max = i16::max(max, item.max); + n += item.n; + } + Elevation { min, max, sum, n } + } +} + +pub struct ReductionCompactor { + pub target_resolution: u8, + pub source_resolution: u8, +} + +impl Compactor for ReductionCompactor { + fn compact(&mut self, cell: Cell, children: [Option<&Elevation>; 7]) -> Option { + if cell.res() < self.target_resolution { + None + } else if let [Some(v0), Some(v1), Some(v2), Some(v3), Some(v4), Some(v5), Some(v6)] = + children + { + Some(Elevation::concat(&[v0, v1, v2, v3, v4, v5, v6])) + } else { + None + } + } +} + +pub struct CloseEnoughCompactor { + // Maximum differance between min and max child elevations + // allowable for a cell to be coalesced. + pub tolerance: i16, +} + +impl Compactor for CloseEnoughCompactor { + fn compact(&mut self, _cell: Cell, children: [Option<&Elevation>; 7]) -> Option { + if let [Some(v0), Some(v1), Some(v2), Some(v3), Some(v4), Some(v5), Some(v6)] = children { + let mut n_min = i16::MAX; + let mut n_sum = 0; + let mut n_max = i16::MIN; + let mut n_n = 0; + for Elevation { min, sum, max, n } in [v0, v1, v2, v3, v4, v5, v6] { + // HACK: Ignore voids that snuck through. + if [min, max].contains(&&i16::MIN) { + continue; + } + n_min = i16::min(n_min, *min); + n_sum += sum; + n_max = i16::max(n_max, *max); + n_n += n; + } + let error = n_max - n_min; + assert!(error >= 0, "error can't be negative"); + if error <= self.tolerance { + Some(Elevation { + min: n_min, + sum: n_sum, + max: n_max, + n: n_n, + }) + } else { + None + } + } else { + None + } + } +} diff --git a/hexit/src/json.rs b/hexit/src/json.rs new file mode 100644 index 0000000..b09f890 --- /dev/null +++ b/hexit/src/json.rs @@ -0,0 +1,85 @@ +use crate::{elevation::Elevation, mask, options::Json}; +use anyhow::Result; +use geo::geometry::GeometryCollection; +use h3o::{ + geom::{PolyfillConfig, ToCells}, + Resolution, +}; +use hextree::{disktree::DiskTreeMap, memmap::Mmap, Cell, HexTreeMap}; +use serde::Serialize; +use serde_json::Value; + +impl Json { + pub fn run(&self) -> Result<()> { + let disktree = DiskTreeMap::open(&self.disktree)?; + let mask = mask::open(Some(&self.mask))?.unwrap(); + let target_cells = Self::polyfill_mask(mask, self.resolution)?; + let mut hextree = HexTreeMap::new(); + for h3idx in target_cells { + let cell = Cell::try_from(h3idx)?; + if let Some((cell, reduction)) = Self::get(cell, &disktree)? { + hextree.insert(cell, reduction); + } + } + let json = Self::gen_json(&hextree)?; + Self::output_json(&json)?; + Ok(()) + } + + fn polyfill_mask(mask: GeometryCollection, resolution: Resolution) -> Result> { + let polygon = h3o::geom::GeometryCollection::from_degrees(mask)?; + let mut cells: Vec = polygon + .to_cells(PolyfillConfig::new(resolution)) + .map(u64::from) + .collect(); + cells.sort_unstable(); + cells.dedup(); + Ok(cells) + } + + fn get(cell: Cell, disktree: &DiskTreeMap) -> Result> { + match disktree.get(cell)? { + None => Ok(None), + Some((cell, bytes)) => { + let reduction = Elevation::from_reader(&mut &bytes[..])?; + Ok(Some((cell, reduction))) + } + } + } + + fn gen_json(hextree: &HexTreeMap) -> Result { + #[derive(Serialize)] + struct JsonEntry { + h3_id: String, + min: i16, + avg: i16, + sum: i32, + max: i16, + n: i32, + } + impl From<(Cell, &Elevation)> for JsonEntry { + fn from((cell, elev): (Cell, &Elevation)) -> JsonEntry { + JsonEntry { + avg: i16::try_from(elev.sum / elev.n).unwrap(), + h3_id: cell.to_string(), + max: elev.max, + min: elev.min, + n: elev.n, + sum: elev.sum, + } + } + } + let samples = hextree + .iter() + .map(JsonEntry::from) + .map(serde_json::to_value) + .collect::, _>>()?; + Ok(Value::Array(samples)) + } + + fn output_json(json: &Value) -> Result<()> { + let out = std::io::stdout().lock(); + serde_json::to_writer(out, json)?; + Ok(()) + } +} diff --git a/hexit/src/lookup.rs b/hexit/src/lookup.rs new file mode 100644 index 0000000..8da23e3 --- /dev/null +++ b/hexit/src/lookup.rs @@ -0,0 +1,35 @@ +use crate::{elevation::Elevation, options::Lookup}; +use anyhow::Result; +use hextree::{disktree::DiskTreeMap, memmap::Mmap, Cell}; + +impl Lookup { + pub fn run(&self) -> Result<()> { + let raw_cell: u64 = self + .cell + .parse::() + .or_else(|_| u64::from_str_radix(&self.cell, 16))?; + let cell = Cell::try_from(raw_cell)?; + let mut disktree = DiskTreeMap::open(&self.disktree)?; + + Self::by_get(cell, &mut disktree) + } + + fn by_get(cell: Cell, disktree: &mut DiskTreeMap) -> Result<()> { + let t0 = std::time::Instant::now(); + match disktree.get(cell)? { + None => (), + Some((cell, bytes)) => { + let t_seek = t0.elapsed(); + let Elevation { min, max, sum, n } = Elevation::from_reader(&mut &bytes[..])?; + let avg = sum / n; + println!("cell: {cell} (res {})", cell.res()); + println!("min: {min}"); + println!("avg: {avg}"); + println!("max: {max}"); + println!("n: {n}"); + println!("seek: {t_seek:?}"); + } + } + Ok(()) + } +} diff --git a/hexit/src/main.rs b/hexit/src/main.rs new file mode 100644 index 0000000..d075238 --- /dev/null +++ b/hexit/src/main.rs @@ -0,0 +1,28 @@ +mod combine; +mod elevation; +mod json; +mod lookup; +mod mask; +mod options; +mod progress; +mod tesselate; + +use anyhow::Result; +use clap::Parser; +use options::Cli; +#[cfg(not(target_env = "msvc"))] +use tikv_jemallocator::Jemalloc; + +#[cfg(not(target_env = "msvc"))] +#[global_allocator] +static GLOBAL: Jemalloc = Jemalloc; + +fn main() -> Result<()> { + let cli = Cli::parse(); + match cli { + Cli::Tessellate(tesselate) => tesselate.run(), + Cli::Combine(combine) => combine.run(), + Cli::Lookup(lookup) => lookup.run(), + Cli::Json(json) => json.run(), + } +} diff --git a/hexit/src/mask.rs b/hexit/src/mask.rs new file mode 100644 index 0000000..5482226 --- /dev/null +++ b/hexit/src/mask.rs @@ -0,0 +1,16 @@ +use anyhow::Result; +use geo::GeometryCollection; +use geojson::{quick_collection, GeoJson}; +use std::{fs::File, path::Path}; + +pub fn open(maybe_path: Option<&Path>) -> Result> { + match maybe_path { + None => Ok(None), + Some(path) => { + let mask_file = File::open(path)?; + let mask_json = GeoJson::from_reader(mask_file)?; + let mask: GeometryCollection = quick_collection(&mask_json)?; + Ok(Some(mask)) + } + } +} diff --git a/hexit/src/options.rs b/hexit/src/options.rs new file mode 100644 index 0000000..29d6157 --- /dev/null +++ b/hexit/src/options.rs @@ -0,0 +1,84 @@ +use clap::{Args, Parser}; +use h3o::Resolution; +use std::path::PathBuf; + +/// Generate H3 tessellated polyfills from raster data. +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +pub enum Cli { + Tessellate(Tesselate), + + Combine(Combine), + + Lookup(Lookup), + + Json(Json), +} + +/// Generate a tessellated list of (cell, elevation) for each +/// input file. +#[derive(Debug, Clone, Args)] +pub struct Tesselate { + /// Path GeoJSON mask. + /// + /// Any tiles which do not intersect the mask are ignored. + #[arg(short, long)] + pub mask: Option, + + /// Reprocess height file even if corresponding output already + /// exists. + #[arg(short = 'O', long)] + pub overwrite: bool, + + /// Amount of compression. + #[arg(short, long, default_value_t = 6)] + pub compression: u32, + + #[arg(short, long, default_value_t = Resolution::Twelve)] + pub resolution: Resolution, + + /// Output directory. + #[arg(short, long)] + pub out_dir: PathBuf, + + /// Input SRTM elevation (.hgt) files. + pub input: Vec, +} + +/// Combine previously tesselated files into a single +#[derive(Debug, Clone, Args)] +pub struct Combine { + #[arg(short, long)] + pub tolerance: i16, + + #[arg(short, long)] + pub out: PathBuf, + + /// Input tessaltions. + pub input: Vec, +} + +/// Lookup value for H3 cell in a disktree. +#[derive(Debug, Clone, Args)] +pub struct Lookup { + /// Iterate through the disktree instead of `get`ting the value. + #[arg(short, long)] + pub iter: bool, + pub disktree: PathBuf, + pub cell: String, +} + +/// Output kepler.gl compatible JSON within the given mask. +#[derive(Debug, Clone, Args)] +pub struct Json { + /// Source resolution. + #[arg(short, long)] + pub resolution: Resolution, + + /// Path GeoJSON mask. + /// + /// Any samples which do not intersect the mask are ignored. + pub mask: PathBuf, + + pub disktree: PathBuf, +} diff --git a/hexit/src/progress.rs b/hexit/src/progress.rs new file mode 100644 index 0000000..1fddba6 --- /dev/null +++ b/hexit/src/progress.rs @@ -0,0 +1,13 @@ +use indicatif::{ProgressBar, ProgressStyle}; + +pub fn bar(header: String, length: u64) -> ProgressBar { + let pb = ProgressBar::hidden(); + pb.set_prefix(header); + pb.set_length(length); + pb.set_style( + ProgressStyle::with_template("{prefix}...\n[{wide_bar:.cyan/blue}]") + .expect("incorrect progress bar format string") + .progress_chars("#>-"), + ); + pb +} diff --git a/hexit/src/tesselate.rs b/hexit/src/tesselate.rs new file mode 100644 index 0000000..22a71f3 --- /dev/null +++ b/hexit/src/tesselate.rs @@ -0,0 +1,122 @@ +use crate::{mask, options::Tesselate, progress}; +use anyhow::Result; +use byteorder::{LittleEndian as LE, WriteBytesExt}; +use flate2::{write::GzEncoder, Compression}; +use geo::{GeometryCollection, Intersects}; +use h3o::{ + geom::{PolyfillConfig, Polygon, ToCells}, + Resolution, +}; +use hextree::{Cell, HexTreeMap}; +use indicatif::{MultiProgress, ProgressBar, ProgressDrawTarget}; +use nasadem::{Sample, Tile}; +use rayon::prelude::*; +use std::{ + ffi::OsStr, + fs::{self, File}, + io::{BufWriter, Write}, + path::Path, +}; + +impl Tesselate { + pub fn run(&self) -> Result<()> { + let progress_group = MultiProgress::with_draw_target(ProgressDrawTarget::stderr_with_hz(4)); + let mask = mask::open(self.mask.as_deref())?; + self.input.par_iter().try_for_each(|height_file_path| { + self._run(height_file_path, mask.as_ref(), &progress_group) + })?; + Ok(()) + } + + fn _run( + &self, + height_file_path: &Path, + mask: Option<&GeometryCollection>, + progress_group: &MultiProgress, + ) -> Result<()> { + let (in_file_name, out_file_name) = { + let file_name = height_file_path + .file_name() + .and_then(OsStr::to_str) + .expect("already opened, therefore path must be a file"); + ( + file_name, + format!("{file_name}.res{}.h3tez", self.resolution), + ) + }; + let out_file_path = self.out_dir.clone().join(&out_file_name); + + if out_file_path.exists() && !self.overwrite { + return Ok(()); + } + + let out_file_tmp_path = { + let mut p = out_file_path.clone(); + p.set_extension("tmp"); + p + }; + + let tile = Tile::memmap(height_file_path)?; + let intersects = mask.as_ref().map_or(true, |mask| { + let polygon = tile.polygon(); + mask.intersects(&polygon) + }); + if intersects { + let pb = progress_group.add(progress::bar( + format!("Tesselate {in_file_name}"), + tile.len() as u64, + )); + let hextree = self.tesselate_tile(&tile, &pb)?; + let tmp_out_file = File::create(&out_file_tmp_path)?; + let tmp_out_wtr = GzEncoder::new(tmp_out_file, Compression::new(self.compression)); + let wtr = BufWriter::new(tmp_out_wtr); + let pb = progress_group.add(progress::bar( + format!("Write {out_file_name}"), + hextree.len() as u64, + )); + Self::write_to_disk(&hextree, &pb, wtr)?; + fs::rename(out_file_tmp_path, out_file_path)?; + } + + Ok(()) + } + + fn tesselate_tile(&self, tile: &Tile, progress_bar: &ProgressBar) -> Result> { + let mut hextree: HexTreeMap = HexTreeMap::new(); + for sample in tile.iter() { + assert_ne!(sample.elevation(), i16::MIN); + let (elev, hexes) = Self::tesselate_sample(&sample, self.resolution)?; + for hex in hexes { + hextree.insert(Cell::try_from(hex)?, elev); + } + progress_bar.inc(1); + } + Ok(hextree) + } + + fn tesselate_sample(sample: &Sample, resolution: Resolution) -> Result<(i16, Vec)> { + let elevation = sample.elevation(); + let polygon = Polygon::from_degrees(sample.polygon())?; + let mut cells: Vec = polygon + .to_cells(PolyfillConfig::new(resolution)) + .map(u64::from) + .collect(); + cells.sort_unstable(); + cells.dedup(); + Ok((elevation, cells)) + } + + fn write_to_disk( + hextree: &HexTreeMap, + progress_bar: &ProgressBar, + mut out: impl Write, + ) -> Result<()> { + out.write_u64::(hextree.len() as u64)?; + for (cell, elev) in hextree.iter() { + out.write_u64::(cell.into_raw())?; + out.write_i16::(*elev)?; + progress_bar.inc(1); + } + Ok(()) + } +} diff --git a/nasadem/src/lib.rs b/nasadem/src/lib.rs index 40dc1af..a269b6a 100755 --- a/nasadem/src/lib.rs +++ b/nasadem/src/lib.rs @@ -281,7 +281,7 @@ impl Tile { self.samples.get_unchecked(idx_1d) } - /// Returns and iterator over `self`'s grid squares. + /// Returns an iterator over `self`'s grid squares. pub fn iter(&self) -> impl Iterator> + '_ { (0..(self.dimensions.0 * self.dimensions.1)).map(|index| Sample { tile: self, index }) }