Skip to content

Commit

Permalink
Default to opaque encoding (#2060)
Browse files Browse the repository at this point in the history
Fixes #1003
  • Loading branch information
gatesn authored Jan 23, 2025
1 parent dcb00df commit b44d751
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 19 deletions.
11 changes: 11 additions & 0 deletions vortex-array/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::array::{
BoolEncoding, ChunkedEncoding, ConstantEncoding, ExtensionEncoding, ListEncoding, NullEncoding,
PrimitiveEncoding, SparseEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
};
use crate::encoding::opaque::OpaqueEncoding;
use crate::encoding::EncodingRef;

/// A mapping between an encoding's ID to an [`EncodingRef`], used to have a shared view of all available encoding schemes.
Expand Down Expand Up @@ -35,6 +36,16 @@ impl Context {
pub fn lookup_encoding(&self, encoding_code: u16) -> Option<EncodingRef> {
self.encodings.get(&encoding_code).cloned()
}

pub fn lookup_encoding_or_opaque(&self, encoding_id: u16) -> EncodingRef {
self.lookup_encoding(encoding_id).unwrap_or_else(|| {
// We must return an EncodingRef, which requires a static reference.
// OpaqueEncoding however must be created dynamically, since we do not know ahead
// of time which of the ~65,000 unknown code IDs we will end up seeing. Thus, we
// allocate (and leak) 2 bytes of memory to create a new encoding.
Box::leak(Box::new(OpaqueEncoding(encoding_id)))
})
}
}

impl Default for Context {
Expand Down
9 changes: 1 addition & 8 deletions vortex-array/src/data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,7 @@ impl ArrayData {
{
let array = flatbuffer_init(flatbuffer.as_ref())?;
let flatbuffer_loc = array._tab.loc();

let encoding = ctx.lookup_encoding(array.encoding()).ok_or_else(
|| {
let pretty_known_encodings = ctx.encodings()
.format_with("\n", |e, f| f(&format_args!("- {}", e.id())));
vortex_err!(InvalidSerde: "Unknown encoding with ID {:#02x}. Known encodings:\n{pretty_known_encodings}", array.encoding())
},
)?;
let encoding = ctx.lookup_encoding_or_opaque(array.encoding());

let view = ViewedArrayData {
encoding,
Expand Down
12 changes: 1 addition & 11 deletions vortex-array/src/data/viewed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,7 @@ impl ViewedArrayData {
.ok_or_else(|| vortex_err!("ArrayView: array_child({idx}) not found"))?;
let flatbuffer_loc = child._tab.loc();

let encoding = self
.ctx
.lookup_encoding(child.encoding())
.unwrap_or_else(|| {
// We must return an EncodingRef, which requires a static reference.
// OpaqueEncoding however must be created dynamically, since we do not know ahead
// of time which of the ~65,000 unknown code IDs we will end up seeing. Thus, we
// allocate (and leak) 2 bytes of memory to create a new encoding.
Box::leak(Box::new(OpaqueEncoding(child.encoding())))
});

let encoding = self.ctx.lookup_encoding_or_opaque(child.encoding());
Ok(Self {
encoding,
dtype: dtype.clone(),
Expand Down

0 comments on commit b44d751

Please sign in to comment.