diff --git a/crates/polars-core/src/frame/row/av_buffer.rs b/crates/polars-core/src/frame/row/av_buffer.rs index 58ceb5c29784..3441166bb33f 100644 --- a/crates/polars-core/src/frame/row/av_buffer.rs +++ b/crates/polars-core/src/frame/row/av_buffer.rs @@ -1,5 +1,6 @@ use std::hint::unreachable_unchecked; +use arrow::bitmap::BitmapBuilder; #[cfg(feature = "dtype-struct")] use polars_utils::pl_str::PlSmallStr; @@ -340,7 +341,7 @@ pub enum AnyValueBufferTrusted<'a> { String(StringChunkedBuilder), #[cfg(feature = "dtype-struct")] // not the trusted variant! - Struct(Vec<(AnyValueBuffer<'a>, PlSmallStr)>), + Struct(BitmapBuilder, Vec<(AnyValueBuffer<'a>, PlSmallStr)>), Null(NullChunkedBuilder), All(DataType, Vec>), } @@ -371,7 +372,8 @@ impl<'a> AnyValueBufferTrusted<'a> { Float64(builder) => builder.append_null(), String(builder) => builder.append_null(), #[cfg(feature = "dtype-struct")] - Struct(builders) => { + Struct(outer_validity, builders) => { + outer_validity.push(false); for (b, _) in builders.iter_mut() { b.add(AnyValue::Null); } @@ -486,7 +488,7 @@ impl<'a> AnyValueBufferTrusted<'a> { builder.append_value(v.as_str()) }, #[cfg(feature = "dtype-struct")] - Struct(builders) => { + Struct(outer_validity, builders) => { let AnyValue::StructOwned(payload) = val else { unreachable_unchecked() }; @@ -501,6 +503,7 @@ impl<'a> AnyValueBufferTrusted<'a> { builder.add(av.clone()); } } + outer_validity.push(true); }, All(_, vals) => vals.push(val.clone().into_static()), _ => self.add_physical(val), @@ -525,7 +528,7 @@ impl<'a> AnyValueBufferTrusted<'a> { builder.append_value(v) }, #[cfg(feature = "dtype-struct")] - Struct(builders) => { + Struct(outer_validity, builders) => { let AnyValue::Struct(idx, arr, fields) = val else { unreachable_unchecked() }; @@ -542,6 +545,7 @@ impl<'a> AnyValueBufferTrusted<'a> { builder.add(av); } } + outer_validity.push(true); }, All(_, vals) => vals.push(val.clone().into_static()), _ => self.add_physical(val), @@ -619,7 +623,7 @@ impl<'a> AnyValueBufferTrusted<'a> { new.finish().into_series() }, #[cfg(feature = "dtype-struct")] - Struct(b) => { + Struct(outer_validity, b) => { // @Q? Maybe we need to add a length parameter here for ZFS's. I am not very happy // with just setting the length to zero for that case. if b.is_empty() { @@ -646,8 +650,12 @@ impl<'a> AnyValueBufferTrusted<'a> { let length = if min_len == 0 { 0 } else { max_len }; + let old_outer_validity = core::mem::take(outer_validity); + outer_validity.reserve(capacity); + StructChunked::from_series(PlSmallStr::EMPTY, length, v.iter()) .unwrap() + .with_outer_validity(Some(old_outer_validity.freeze())) .into_series() }, Null(b) => { @@ -716,6 +724,7 @@ impl From<(&DataType, usize)> for AnyValueBufferTrusted<'_> { }, #[cfg(feature = "dtype-struct")] Struct(fields) => { + let outer_validity = BitmapBuilder::with_capacity(len); let buffers = fields .iter() .map(|field| { @@ -724,7 +733,7 @@ impl From<(&DataType, usize)> for AnyValueBufferTrusted<'_> { (buffer, field.name.clone()) }) .collect::>(); - AnyValueBufferTrusted::Struct(buffers) + AnyValueBufferTrusted::Struct(outer_validity, buffers) }, // List can be recursive so use AnyValues for that dt => AnyValueBufferTrusted::All(dt.clone(), Vec::with_capacity(len)), diff --git a/py-polars/tests/unit/operations/test_pivot.py b/py-polars/tests/unit/operations/test_pivot.py index 94c48244225a..c2d453cd4e41 100644 --- a/py-polars/tests/unit/operations/test_pivot.py +++ b/py-polars/tests/unit/operations/test_pivot.py @@ -473,22 +473,22 @@ def test_pivot_struct() -> None: "id": ["a", "b", "c"], "1": [ {"num1": 1, "num2": 4}, - {"num1": None, "num2": None}, + None, {"num1": 6, "num2": 6}, ], "2": [ {"num1": 3, "num2": 5}, - {"num1": None, "num2": None}, - {"num1": None, "num2": None}, + None, + None, ], "3": [ - {"num1": None, "num2": None}, + None, {"num1": 5, "num2": 3}, {"num1": 3, "num2": 6}, ], "4": [ - {"num1": None, "num2": None}, - {"num1": None, "num2": None}, + None, + None, {"num1": 4, "num2": 4}, ], }