-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Loading status checks…
Async Layouts (#1866)
Our layouts are implemented with a `poll(&mut segments) -> Poll::NeedMore(segment_ids)` style API, these are abstracted behind an `Operation` trait. In practice, this is almost identical to Rust's `Future` trait. Why not benefit from the ecosystem of async utilities? Figured it was worth seeing what the code looks like
Showing
36 changed files
with
655 additions
and
851 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
//! The segment reader provides an async interface to layouts for resolving individual segments. | ||
use std::sync::{Arc, RwLock}; | ||
|
||
use async_trait::async_trait; | ||
use bytes::Bytes; | ||
use futures::channel::oneshot; | ||
use futures_util::future::try_join_all; | ||
use itertools::Itertools; | ||
use vortex_array::aliases::hash_map::HashMap; | ||
use vortex_error::{vortex_err, VortexResult}; | ||
use vortex_io::VortexReadAt; | ||
use vortex_layout::segments::{AsyncSegmentReader, SegmentId}; | ||
|
||
use crate::v2::footer::Segment; | ||
|
||
pub(crate) struct SegmentCache<R> { | ||
read: R, | ||
segments: Arc<[Segment]>, | ||
inflight: RwLock<HashMap<SegmentId, Vec<oneshot::Sender<Bytes>>>>, | ||
} | ||
|
||
impl<R> SegmentCache<R> { | ||
pub fn new(read: R, segments: Arc<[Segment]>) -> Self { | ||
Self { | ||
read, | ||
segments, | ||
inflight: RwLock::new(HashMap::new()), | ||
} | ||
} | ||
|
||
pub fn set(&mut self, _segment_id: SegmentId, _bytes: Bytes) -> VortexResult<()> { | ||
// Do nothing for now | ||
Ok(()) | ||
} | ||
} | ||
|
||
impl<R: VortexReadAt> SegmentCache<R> { | ||
/// Drives the segment cache. | ||
pub(crate) async fn drive(&self) -> VortexResult<()> | ||
where | ||
Self: Unpin, | ||
{ | ||
// Grab a read lock and collect a set of segments to read. | ||
let segment_ids = self | ||
.inflight | ||
.read() | ||
.map_err(|_| vortex_err!("poisoned"))? | ||
.iter() | ||
.filter_map(|(id, channels)| (!channels.is_empty()).then_some(*id)) | ||
.collect::<Vec<_>>(); | ||
|
||
// Read all the segments. | ||
let buffers = try_join_all(segment_ids.iter().map(|id| { | ||
let segment = &self.segments[**id as usize]; | ||
self.read | ||
.read_byte_range(segment.offset, segment.length as u64) | ||
})) | ||
.await?; | ||
|
||
// Send the buffers to the waiting channels. | ||
let mut inflight = self.inflight.write().map_err(|_| vortex_err!("poisoned"))?; | ||
for (id, buffer) in segment_ids.into_iter().zip_eq(buffers.into_iter()) { | ||
let channels = inflight | ||
.remove(&id) | ||
.ok_or_else(|| vortex_err!("missing inflight segment"))?; | ||
for sender in channels { | ||
sender | ||
.send(buffer.clone()) | ||
.map_err(|_| vortex_err!("receiver dropped"))?; | ||
} | ||
} | ||
|
||
Ok(()) | ||
} | ||
} | ||
|
||
#[async_trait] | ||
impl<R: VortexReadAt> AsyncSegmentReader for SegmentCache<R> { | ||
async fn get(&self, id: SegmentId) -> VortexResult<Bytes> { | ||
let (send, recv) = oneshot::channel(); | ||
self.inflight | ||
.write() | ||
.map_err(|_| vortex_err!("poisoned"))? | ||
.entry(id) | ||
.or_default() | ||
.push(send); | ||
recv.await | ||
.map_err(|cancelled| vortex_err!("segment read cancelled: {:?}", cancelled)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
pub(crate) mod cache; | ||
pub(crate) mod writer; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,39 +1,38 @@ | ||
use bytes::Bytes; | ||
use futures_executor::block_on; | ||
use vortex_array::array::ChunkedArray; | ||
use vortex_array::stream::ArrayStreamExt; | ||
use vortex_array::{ContextRef, IntoArrayData, IntoArrayVariant}; | ||
use vortex_buffer::buffer; | ||
use vortex_error::VortexResult; | ||
use vortex_scan::Scan; | ||
|
||
use crate::v2::{OpenOptions, WriteOptions}; | ||
use crate::v2::*; | ||
|
||
#[tokio::test] | ||
async fn write_read() { | ||
let arr = ChunkedArray::from_iter(vec![ | ||
buffer![0, 1, 2].into_array(), | ||
buffer![3, 4, 5].into_array(), | ||
]) | ||
.into_array(); | ||
#[test] | ||
fn basic_file_roundtrip() -> VortexResult<()> { | ||
block_on(async { | ||
let array = ChunkedArray::from_iter([ | ||
buffer![0, 1, 2].into_array(), | ||
buffer![3, 4, 5].into_array(), | ||
buffer![6, 7, 8].into_array(), | ||
]) | ||
.into_array(); | ||
|
||
let written = WriteOptions::default() | ||
.write_async(vec![], arr.into_array_stream()) | ||
.await | ||
.unwrap(); | ||
let buffer: Bytes = WriteOptions::default() | ||
.write_async(vec![], array.into_array_stream()) | ||
.await? | ||
.into(); | ||
|
||
// TODO(ngates): no need to wrap Vec<u8> in Bytes if VortexReadAt doesn't require clone. | ||
let vxf = OpenOptions::new(ContextRef::default()) | ||
.open(Bytes::from(written)) | ||
.await | ||
.unwrap(); | ||
let vxf = OpenOptions::new(ContextRef::default()).open(buffer).await?; | ||
let result = vxf | ||
.scan(Scan::all())? | ||
.into_array_data() | ||
.await? | ||
.into_primitive()?; | ||
|
||
let result = vxf | ||
.scan(Scan::all()) | ||
.unwrap() | ||
.into_array_data() | ||
.await | ||
.unwrap() | ||
.into_primitive() | ||
.unwrap(); | ||
assert_eq!(result.as_slice::<i32>(), &[0, 1, 2, 3, 4, 5, 6, 7, 8]); | ||
|
||
assert_eq!(result.as_slice::<i32>(), &[0, 1, 2, 3, 4, 5]); | ||
Ok(()) | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// ## A note on the API of Evaluator. | ||
// | ||
// We have chosen a general "run this expression" API instead of separate | ||
// `filter(row_mask, expr) -> row_mask` + `project(row_mask, field_mask)` APIs. The reason for | ||
// this is so we can eventually support cell-level push-down. | ||
// | ||
// If we only projected using a field mask, then it means we need to download all the data | ||
// for the rows of field present in the row mask. When I say cell-level push-down, I mean | ||
// we can slice the cell directly out of storage using an API like | ||
// `SegmentReader::read(segment_id, byte_range: Range<usize>)`. | ||
// | ||
// Admittedly, this is a highly advanced use-case, but can prove invaluable for large cell values | ||
// such as images and video. | ||
// | ||
// If instead we make the projection API `project(row_mask, expr)`, then the project API is | ||
// identical to the filter API and there's no point having both. Hence, a single | ||
// `evaluate(row_mask, expr)` API. | ||
use async_trait::async_trait; | ||
use vortex_array::ArrayData; | ||
use vortex_error::VortexResult; | ||
use vortex_expr::ExprRef; | ||
|
||
use crate::RowMask; | ||
|
||
pub trait Evaluator { | ||
fn evaluate(&self, row_mask: RowMask, expr: ExprRef) -> VortexResult<ArrayData>; | ||
} | ||
|
||
/// An async evaluator that can evaluate expressions against a row mask. | ||
/// | ||
/// For now, we make this a non-Send trait since it's desirable for us to pin this CPU-heavy | ||
/// work to a single thread. Having a `Send` future doesn't prevent this model, but it makes | ||
/// it easy to accidentally spawn this on e.g. a multithreaded Tokio runtime that would cause | ||
/// thrashing of the CPU cache. | ||
#[async_trait(?Send)] | ||
pub trait AsyncEvaluator { | ||
async fn evaluate(&self, row_mask: RowMask, expr: ExprRef) -> VortexResult<ArrayData>; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters