Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hexit binary project #18

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ jobs:
run: cargo fmt -- --check

- name: Hygiene | Clippy
run: cargo clippy --all-targets --all-features -- -Dwarnings -Dclippy::all -Dclippy::pedantic -Aclippy::module_name_repetitions -Aclippy::missing_panics_doc -Aclippy::missing_errors_doc -Aclippy::must_use_candidate -Aclippy::similar_names
run: cargo clippy --all-targets --all-features -- -Dwarnings -Dclippy::all -Dclippy::pedantic -Aclippy::missing_errors_doc -Aclippy::missing_panics_doc -Aclippy::module_name_repetitions -Aclippy::must_use_candidate -Aclippy::similar_names -Aclippy::unreadable_literal
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[workspace]
members = [
"geopath",
"hexit",
"itm",
"nasadem",
"propah",
Expand All @@ -19,6 +20,7 @@ log = "0.4.20"
memmap2 = "0.7.1"
num-traits = "0.2.16"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
thiserror = "1.0.48"

# We want meaninful stack traces when profiling/debugging
Expand Down
25 changes: 25 additions & 0 deletions hexit/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[package]
name = "hexit"
version = "0.1.0"
edition = "2021"

[dependencies]
anyhow = "1"
byteorder = { workspace = true }
clap = { version = "4.4.2", features = ["derive"] }
env_logger = "0.10"
flate2 = "1.0.28"
geo = { workspace = true }
geojson = "0.24.1"
h3o = { version = "0.4.0", features = ["geo"] }
hextree = { git = "https://github.com/JayKickliter/HexTree.git", branch = "main", features = ["disktree"] }
indicatif = "0.17.7"
itertools = "0.10"
nasadem = { path = "../nasadem" }
num-traits = { workspace = true }
rayon = "1.8.0"
serde = { workspace = true }
serde_json = { workspace = true }

[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = "0.5"
81 changes: 81 additions & 0 deletions hexit/src/combine.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use crate::{
elevation::{CloseEnoughCompactor, Elevation},
options::Combine,
progress,
};
use anyhow::Result;
use byteorder::{LittleEndian as LE, ReadBytesExt};
use flate2::bufread::GzDecoder;
use hextree::HexTreeMap;
use indicatif::MultiProgress;
use std::{ffi::OsStr, fs::File, io::BufReader, path::Path};

impl Combine {
pub fn run(&self) -> Result<()> {
assert!(!self.input.is_empty());
let mut hextree: HexTreeMap<Elevation, CloseEnoughCompactor> =
HexTreeMap::with_compactor(CloseEnoughCompactor {
tolerance: self.tolerance,
});
let progress_group = MultiProgress::new();
for tess_file_path in &self.input {
Self::read_tessellation(tess_file_path, &progress_group, &mut hextree)?;
}
self.write_disktree(&hextree, &progress_group)?;
Ok(())
}

fn read_tessellation(
tess_file_path: &Path,
progress_group: &MultiProgress,
hextree: &mut HexTreeMap<Elevation, CloseEnoughCompactor>,
) -> Result<()> {
let tess_file = File::open(tess_file_path)?;
let tess_buf_rdr = BufReader::new(tess_file);
let mut rdr = GzDecoder::new(tess_buf_rdr);
let tess_file_name = tess_file_path
.file_name()
.and_then(OsStr::to_str)
.expect("already opened, therefore path must be a file");

let n_samples = rdr.read_u64::<LE>()?;
let pb = progress_group.add(progress::bar(tess_file_name.to_string(), n_samples));
for _sample_n in 0..n_samples {
let raw_cell = rdr.read_u64::<LE>()?;
let cell = hextree::Cell::from_raw(raw_cell)?;
let raw_elevation = rdr.read_i16::<LE>()?;
let elevation = Elevation::new(raw_elevation);
hextree.insert(cell, elevation);
pb.inc(1);
}
assert!(
rdr.read_u8().is_err(),
"We should have read all samples out of the file"
);

Ok(())
}

fn write_disktree(
&self,
hextree: &HexTreeMap<Elevation, CloseEnoughCompactor>,
progress_group: &MultiProgress,
) -> Result<()> {
let disktree_file = File::create(&self.out)?;
let disktree_file_name = self
.out
.file_name()
.and_then(OsStr::to_str)
.expect("already opened, therefore path must be a file");
let disktree_len = hextree.len();
let pb = progress_group.add(progress::bar(
format!("Writing {disktree_file_name}"),
disktree_len as u64,
));
hextree.to_disktree(disktree_file, |wtr, elev| {
pb.inc(1);
elev.to_writer(wtr)
})?;
Ok(())
}
}
129 changes: 129 additions & 0 deletions hexit/src/elevation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
use byteorder::{LittleEndian as LE, ReadBytesExt, WriteBytesExt};
use hextree::{compaction::Compactor, Cell};
use std::{
io::{Read, Write},
mem::size_of,
};

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Elevation {
pub min: i16,
pub max: i16,
pub sum: i32,
pub n: i32,
}

impl Elevation {
const BUF_LEN: usize =
size_of::<i16>() + size_of::<i16>() + size_of::<i32>() + size_of::<i32>();

pub fn new(raw: i16) -> Elevation {
Elevation {
min: raw,
sum: i32::from(raw),
max: raw,
n: 1,
}
}

pub fn from_reader<R: Read>(mut rdr: R) -> std::io::Result<Self> {
debug_assert_eq!(Self::BUF_LEN, size_of::<Elevation>());
let mut buf = [0_u8; Self::BUF_LEN];
rdr.read_exact(&mut buf)?;
let rdr = &mut &buf[..];
let min = rdr.read_i16::<LE>()?;
let max = rdr.read_i16::<LE>()?;
let sum = rdr.read_i32::<LE>()?;
let n = rdr.read_i32::<LE>()?;
Ok(Self { min, max, sum, n })
}

pub fn to_writer<W: Write>(&self, mut wtr: W) -> std::io::Result<()> {
assert_eq!(Self::BUF_LEN, size_of::<Elevation>());
let mut buf = [0_u8; Self::BUF_LEN];
{
let mut buf_wtr = &mut buf[..];
buf_wtr.write_i16::<LE>(self.min)?;
buf_wtr.write_i16::<LE>(self.max)?;
buf_wtr.write_i32::<LE>(self.sum)?;
buf_wtr.write_i32::<LE>(self.n)?;
}
wtr.write_all(&buf)
}
}

impl Elevation {
pub fn concat(items: &[&Self]) -> Self {
let mut min = i16::MAX;
let mut sum: i32 = 0;
let mut max = i16::MIN;
let mut n = 0_i32;
for item in items {
sum += item.sum;
min = i16::min(min, item.min);
max = i16::max(max, item.max);
n += item.n;
}
Elevation { min, max, sum, n }
}
}

pub struct ReductionCompactor {
pub target_resolution: u8,
pub source_resolution: u8,
}

impl Compactor<Elevation> for ReductionCompactor {
fn compact(&mut self, cell: Cell, children: [Option<&Elevation>; 7]) -> Option<Elevation> {
if cell.res() < self.target_resolution {
None
} else if let [Some(v0), Some(v1), Some(v2), Some(v3), Some(v4), Some(v5), Some(v6)] =
children
{
Some(Elevation::concat(&[v0, v1, v2, v3, v4, v5, v6]))
} else {
None
}
}
}

pub struct CloseEnoughCompactor {
// Maximum differance between min and max child elevations
// allowable for a cell to be coalesced.
pub tolerance: i16,
}

impl Compactor<Elevation> for CloseEnoughCompactor {
fn compact(&mut self, _cell: Cell, children: [Option<&Elevation>; 7]) -> Option<Elevation> {
if let [Some(v0), Some(v1), Some(v2), Some(v3), Some(v4), Some(v5), Some(v6)] = children {
let mut n_min = i16::MAX;
let mut n_sum = 0;
let mut n_max = i16::MIN;
let mut n_n = 0;
for Elevation { min, sum, max, n } in [v0, v1, v2, v3, v4, v5, v6] {
// HACK: Ignore voids that snuck through.
if [min, max].contains(&&i16::MIN) {
continue;
}
n_min = i16::min(n_min, *min);
n_sum += sum;
n_max = i16::max(n_max, *max);
n_n += n;
}
let error = n_max - n_min;
assert!(error >= 0, "error can't be negative");
if error <= self.tolerance {
Some(Elevation {
min: n_min,
sum: n_sum,
max: n_max,
n: n_n,
})
} else {
None
}
} else {
None
}
}
}
85 changes: 85 additions & 0 deletions hexit/src/json.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use crate::{elevation::Elevation, mask, options::Json};
use anyhow::Result;
use geo::geometry::GeometryCollection;
use h3o::{
geom::{PolyfillConfig, ToCells},
Resolution,
};
use hextree::{disktree::DiskTreeMap, memmap::Mmap, Cell, HexTreeMap};
use serde::Serialize;
use serde_json::Value;

impl Json {
pub fn run(&self) -> Result<()> {
let disktree = DiskTreeMap::open(&self.disktree)?;
let mask = mask::open(Some(&self.mask))?.unwrap();
let target_cells = Self::polyfill_mask(mask, self.resolution)?;
let mut hextree = HexTreeMap::new();
for h3idx in target_cells {
let cell = Cell::try_from(h3idx)?;
if let Some((cell, reduction)) = Self::get(cell, &disktree)? {
hextree.insert(cell, reduction);
}
}
let json = Self::gen_json(&hextree)?;
Self::output_json(&json)?;
Ok(())
}

fn polyfill_mask(mask: GeometryCollection, resolution: Resolution) -> Result<Vec<u64>> {
let polygon = h3o::geom::GeometryCollection::from_degrees(mask)?;
let mut cells: Vec<u64> = polygon
.to_cells(PolyfillConfig::new(resolution))
.map(u64::from)
.collect();
cells.sort_unstable();
cells.dedup();
Ok(cells)
}

fn get(cell: Cell, disktree: &DiskTreeMap<Mmap>) -> Result<Option<(Cell, Elevation)>> {
match disktree.get(cell)? {
None => Ok(None),
Some((cell, bytes)) => {
let reduction = Elevation::from_reader(&mut &bytes[..])?;
Ok(Some((cell, reduction)))
}
}
}

fn gen_json(hextree: &HexTreeMap<Elevation>) -> Result<Value> {
#[derive(Serialize)]
struct JsonEntry {
h3_id: String,
min: i16,
avg: i16,
sum: i32,
max: i16,
n: i32,
}
impl From<(Cell, &Elevation)> for JsonEntry {
fn from((cell, elev): (Cell, &Elevation)) -> JsonEntry {
JsonEntry {
avg: i16::try_from(elev.sum / elev.n).unwrap(),
h3_id: cell.to_string(),
max: elev.max,
min: elev.min,
n: elev.n,
sum: elev.sum,
}
}
}
let samples = hextree
.iter()
.map(JsonEntry::from)
.map(serde_json::to_value)
.collect::<Result<Vec<Value>, _>>()?;
Ok(Value::Array(samples))
}

fn output_json(json: &Value) -> Result<()> {
let out = std::io::stdout().lock();
serde_json::to_writer(out, json)?;
Ok(())
}
}
35 changes: 35 additions & 0 deletions hexit/src/lookup.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use crate::{elevation::Elevation, options::Lookup};
use anyhow::Result;
use hextree::{disktree::DiskTreeMap, memmap::Mmap, Cell};

impl Lookup {
pub fn run(&self) -> Result<()> {
let raw_cell: u64 = self
.cell
.parse::<u64>()
.or_else(|_| u64::from_str_radix(&self.cell, 16))?;
let cell = Cell::try_from(raw_cell)?;
let mut disktree = DiskTreeMap::open(&self.disktree)?;

Self::by_get(cell, &mut disktree)
}

fn by_get(cell: Cell, disktree: &mut DiskTreeMap<Mmap>) -> Result<()> {
let t0 = std::time::Instant::now();
match disktree.get(cell)? {
None => (),
Some((cell, bytes)) => {
let t_seek = t0.elapsed();
let Elevation { min, max, sum, n } = Elevation::from_reader(&mut &bytes[..])?;
let avg = sum / n;
println!("cell: {cell} (res {})", cell.res());
println!("min: {min}");
println!("avg: {avg}");
println!("max: {max}");
println!("n: {n}");
println!("seek: {t_seek:?}");
}
}
Ok(())
}
}
Loading
Loading