diff --git a/encodings/zigzag/public-api.lock b/encodings/zigzag/public-api.lock index a366ce1d631..08761c5987b 100644 --- a/encodings/zigzag/public-api.lock +++ b/encodings/zigzag/public-api.lock @@ -4,122 +4,46 @@ pub struct vortex_zigzag::ZigZag impl vortex_zigzag::ZigZag -pub const vortex_zigzag::ZigZag::ID: vortex_array::array::ArrayId - -pub fn vortex_zigzag::ZigZag::try_new(encoded: vortex_array::array::erased::ArrayRef) -> vortex_error::VortexResult +pub const vortex_zigzag::ZigZag::ID: vortex_array::scalar_fn::ScalarFnId impl core::clone::Clone for vortex_zigzag::ZigZag pub fn vortex_zigzag::ZigZag::clone(&self) -> vortex_zigzag::ZigZag -impl core::fmt::Debug for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result - -impl vortex_array::array::vtable::VTable for vortex_zigzag::ZigZag - -pub type vortex_zigzag::ZigZag::ArrayData = vortex_zigzag::ZigZagData - -pub type vortex_zigzag::ZigZag::OperationsVTable = vortex_zigzag::ZigZag - -pub type vortex_zigzag::ZigZag::ValidityVTable = vortex_array::array::vtable::validity::ValidityVTableFromChild - -pub fn vortex_zigzag::ZigZag::buffer(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> vortex_array::buffer::BufferHandle - -pub fn vortex_zigzag::ZigZag::buffer_name(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> core::option::Option - -pub fn vortex_zigzag::ZigZag::deserialize(&self, dtype: &vortex_array::dtype::DType, len: usize, metadata: &[u8], _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren, _session: &vortex_session::VortexSession) -> vortex_error::VortexResult> - -pub fn vortex_zigzag::ZigZag::execute(array: vortex_array::array::typed::Array, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult - -pub fn vortex_zigzag::ZigZag::execute_parent(array: vortex_array::array::view::ArrayView<'_, Self>, parent: &vortex_array::array::erased::ArrayRef, child_idx: usize, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> - -pub fn vortex_zigzag::ZigZag::id(&self) -> vortex_array::array::ArrayId - -pub fn vortex_zigzag::ZigZag::nbuffers(_array: vortex_array::array::view::ArrayView<'_, Self>) -> usize - -pub fn vortex_zigzag::ZigZag::reduce_parent(array: vortex_array::array::view::ArrayView<'_, Self>, parent: &vortex_array::array::erased::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> - -pub fn vortex_zigzag::ZigZag::serialize(_array: vortex_array::array::view::ArrayView<'_, Self>, _session: &vortex_session::VortexSession) -> vortex_error::VortexResult>> - -pub fn vortex_zigzag::ZigZag::slot_name(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> alloc::string::String - -pub fn vortex_zigzag::ZigZag::validate(&self, _data: &Self::ArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> - -impl vortex_array::array::vtable::operations::OperationsVTable for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::scalar_at(array: vortex_array::array::view::ArrayView<'_, vortex_zigzag::ZigZag>, index: usize, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult - -impl vortex_array::array::vtable::validity::ValidityChild for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::validity_child(array: vortex_array::array::view::ArrayView<'_, vortex_zigzag::ZigZag>) -> vortex_array::array::erased::ArrayRef - -impl vortex_array::arrays::dict::take::TakeExecute for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::take(array: vortex_array::array::view::ArrayView<'_, Self>, indices: &vortex_array::array::erased::ArrayRef, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> - -impl vortex_array::arrays::filter::kernel::FilterReduce for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::filter(array: vortex_array::array::view::ArrayView<'_, Self>, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> - -impl vortex_array::arrays::slice::SliceReduce for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::slice(array: vortex_array::array::view::ArrayView<'_, Self>, range: core::ops::range::Range) -> vortex_error::VortexResult> - -impl vortex_array::scalar_fn::fns::cast::kernel::CastReduce for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::cast(array: vortex_array::array::view::ArrayView<'_, Self>, dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult> - -impl vortex_array::scalar_fn::fns::mask::kernel::MaskReduce for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::mask(array: vortex_array::array::view::ArrayView<'_, Self>, mask: &vortex_array::array::erased::ArrayRef) -> vortex_error::VortexResult> - -pub struct vortex_zigzag::ZigZagData - -impl vortex_zigzag::ZigZagData - -pub fn vortex_zigzag::ZigZagData::new() -> Self - -pub fn vortex_zigzag::ZigZagData::try_new(encoded_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult - -impl core::clone::Clone for vortex_zigzag::ZigZagData - -pub fn vortex_zigzag::ZigZagData::clone(&self) -> vortex_zigzag::ZigZagData - -impl core::default::Default for vortex_zigzag::ZigZagData +impl vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable for vortex_zigzag::ZigZag -pub fn vortex_zigzag::ZigZagData::default() -> Self +pub fn vortex_zigzag::ZigZag::deserialize(&self, dtype: &vortex_array::dtype::DType, len: usize, metadata: &[u8], children: &dyn vortex_array::serde::ArrayChildren, _session: &vortex_session::VortexSession) -> vortex_error::VortexResult> -impl core::fmt::Debug for vortex_zigzag::ZigZagData +pub fn vortex_zigzag::ZigZag::serialize(&self, _view: &vortex_array::arrays::scalar_fn::vtable::ScalarFnArrayView<'_, Self>, _session: &vortex_session::VortexSession) -> vortex_error::VortexResult>> -pub fn vortex_zigzag::ZigZagData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_zigzag::ZigZag -impl core::fmt::Display for vortex_zigzag::ZigZagData +pub type vortex_zigzag::ZigZag::Options = vortex_array::scalar_fn::vtable::EmptyOptions -pub fn vortex_zigzag::ZigZagData::fmt(&self, _f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub fn vortex_zigzag::ZigZag::arity(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions) -> vortex_array::scalar_fn::vtable::Arity -impl vortex_array::hash::ArrayEq for vortex_zigzag::ZigZagData +pub fn vortex_zigzag::ZigZag::child_name(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions, child_idx: usize) -> vortex_array::scalar_fn::vtable::ChildName -pub fn vortex_zigzag::ZigZagData::array_eq(&self, _other: &Self, _precision: vortex_array::hash::Precision) -> bool +pub fn vortex_zigzag::ZigZag::deserialize(&self, _metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult -impl vortex_array::hash::ArrayHash for vortex_zigzag::ZigZagData +pub fn vortex_zigzag::ZigZag::execute(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions, args: &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult -pub fn vortex_zigzag::ZigZagData::array_hash(&self, _state: &mut H, _precision: vortex_array::hash::Precision) +pub fn vortex_zigzag::ZigZag::fmt_sql(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions, expr: &vortex_array::expr::expression::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result -pub trait vortex_zigzag::ZigZagArrayExt: vortex_array::array::typed::TypedArrayRef +pub fn vortex_zigzag::ZigZag::id(&self) -> vortex_array::scalar_fn::ScalarFnId -pub fn vortex_zigzag::ZigZagArrayExt::encoded(&self) -> &vortex_array::array::erased::ArrayRef +pub fn vortex_zigzag::ZigZag::is_fallible(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions) -> bool -pub fn vortex_zigzag::ZigZagArrayExt::ptype(&self) -> vortex_array::dtype::ptype::PType +pub fn vortex_zigzag::ZigZag::is_null_sensitive(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions) -> bool -impl> vortex_zigzag::ZigZagArrayExt for T +pub fn vortex_zigzag::ZigZag::return_dtype(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions, arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult -pub fn T::encoded(&self) -> &vortex_array::array::erased::ArrayRef +pub fn vortex_zigzag::ZigZag::serialize(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions) -> vortex_error::VortexResult>> -pub fn T::ptype(&self) -> vortex_array::dtype::ptype::PType +pub fn vortex_zigzag::ZigZag::validity(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions, expression: &vortex_array::expr::expression::Expression) -> vortex_error::VortexResult> pub fn vortex_zigzag::zigzag_decode(parray: vortex_array::arrays::primitive::vtable::PrimitiveArray) -> vortex_array::arrays::primitive::vtable::PrimitiveArray -pub fn vortex_zigzag::zigzag_encode(parray: vortex_array::arrays::primitive::vtable::PrimitiveArray) -> vortex_error::VortexResult +pub fn vortex_zigzag::zigzag_encode(parray: vortex_array::arrays::primitive::vtable::PrimitiveArray) -> vortex_error::VortexResult -pub type vortex_zigzag::ZigZagArray = vortex_array::array::typed::Array +pub fn vortex_zigzag::zigzag_try_new(encoded: vortex_array::array::erased::ArrayRef) -> vortex_error::VortexResult diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index 34b1d2d19f6..3e633f121fb 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -1,321 +1,159 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::fmt::Display; use std::fmt::Formatter; -use std::hash::Hasher; -use vortex_array::Array; -use vortex_array::ArrayEq; -use vortex_array::ArrayHash; -use vortex_array::ArrayId; -use vortex_array::ArrayParts; use vortex_array::ArrayRef; -use vortex_array::ArrayView; use vortex_array::ExecutionCtx; -use vortex_array::ExecutionResult; use vortex_array::IntoArray; -use vortex_array::Precision; -use vortex_array::TypedArrayRef; -use vortex_array::buffer::BufferHandle; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::scalar_fn::ScalarFnArrayView; +use vortex_array::arrays::scalar_fn::ScalarFnFactoryExt; +use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayParts; +use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable; use vortex_array::dtype::DType; use vortex_array::dtype::PType; -use vortex_array::match_each_unsigned_integer_ptype; -use vortex_array::scalar::Scalar; -use vortex_array::serde::ArrayChildren; -use vortex_array::vtable::OperationsVTable; -use vortex_array::vtable::VTable; -use vortex_array::vtable::ValidityChild; -use vortex_array::vtable::ValidityVTableFromChild; -use vortex_error::VortexExpect; +use vortex_array::scalar_fn::Arity; +use vortex_array::scalar_fn::ChildName; +use vortex_array::scalar_fn::EmptyOptions; +use vortex_array::scalar_fn::ExecutionArgs; +use vortex_array::scalar_fn::ScalarFnId; +use vortex_array::scalar_fn::ScalarFnVTable; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_error::vortex_ensure; -use vortex_error::vortex_panic; use vortex_session::VortexSession; -use zigzag::ZigZag as ExternalZigZag; -use crate::compute::ZigZagEncoded; -use crate::kernel::PARENT_KERNELS; -use crate::rules::RULES; -use crate::zigzag_decode; +use crate::compress::zigzag_decode; -/// A [`ZigZag`]-encoded Vortex array. -pub type ZigZagArray = Array; - -impl VTable for ZigZag { - type ArrayData = ZigZagData; - - type OperationsVTable = Self; - type ValidityVTable = ValidityVTableFromChild; - - fn id(&self) -> ArrayId { - Self::ID - } - - fn validate( - &self, - _data: &Self::ArrayData, - dtype: &DType, - len: usize, - slots: &[Option], - ) -> VortexResult<()> { - let encoded = slots[ENCODED_SLOT] - .as_ref() - .vortex_expect("ZigZagArray encoded slot"); - let expected_dtype = ZigZagData::dtype_from_encoded_dtype(encoded.dtype())?; - vortex_ensure!( - dtype == &expected_dtype, - "expected dtype {expected_dtype}, got {dtype}" - ); - vortex_ensure!( - encoded.len() == len, - "expected len {len}, got {}", - encoded.len() - ); - Ok(()) - } +/// ZigZag encoding maps signed integers to unsigned integers so that small absolute values +/// have small encoded values. +#[derive(Clone)] +pub struct ZigZag; - fn nbuffers(_array: ArrayView<'_, Self>) -> usize { - 0 - } +impl ZigZag { + pub const ID: ScalarFnId = ScalarFnId::new_ref("vortex.zigzag"); +} - fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle { - vortex_panic!("ZigZagArray buffer index {idx} out of bounds") - } +impl ScalarFnVTable for ZigZag { + type Options = EmptyOptions; - fn buffer_name(_array: ArrayView<'_, Self>, idx: usize) -> Option { - vortex_panic!("ZigZagArray buffer_name index {idx} out of bounds") + fn id(&self) -> ScalarFnId { + ZigZag::ID } - fn serialize( - _array: ArrayView<'_, Self>, - _session: &VortexSession, - ) -> VortexResult>> { + fn serialize(&self, _options: &EmptyOptions) -> VortexResult>> { Ok(Some(vec![])) } fn deserialize( &self, - dtype: &DType, - len: usize, - metadata: &[u8], - _buffers: &[BufferHandle], - children: &dyn ArrayChildren, + _metadata: &[u8], _session: &VortexSession, - ) -> VortexResult> { - if !metadata.is_empty() { - vortex_bail!( - "ZigZagArray expects empty metadata, got {} bytes", - metadata.len() - ); - } - if children.len() != 1 { - vortex_bail!("Expected 1 child, got {}", children.len()); - } - - let ptype = PType::try_from(dtype)?; - let encoded_type = DType::Primitive(ptype.to_unsigned(), dtype.nullability()); - - let encoded = children.get(0, &encoded_type, len)?; - let slots = vec![Some(encoded.clone())]; - let data = ZigZagData::try_new(encoded.dtype())?; - Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) + ) -> VortexResult { + Ok(EmptyOptions) } - fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { - SLOT_NAMES[idx].to_string() + fn arity(&self, _options: &EmptyOptions) -> Arity { + Arity::Exact(1) } - fn execute(array: Array, ctx: &mut ExecutionCtx) -> VortexResult { - Ok(ExecutionResult::done( - zigzag_decode(array.encoded().clone().execute(ctx)?).into_array(), - )) + fn child_name(&self, _options: &EmptyOptions, child_idx: usize) -> ChildName { + match child_idx { + 0 => ChildName::from("encoded"), + _ => unreachable!("Invalid child index {child_idx} for ZigZag"), + } } - fn reduce_parent( - array: ArrayView<'_, Self>, - parent: &ArrayRef, - child_idx: usize, - ) -> VortexResult> { - RULES.evaluate(array, parent, child_idx) + fn fmt_sql( + &self, + _options: &EmptyOptions, + expr: &vortex_array::expr::Expression, + f: &mut Formatter<'_>, + ) -> std::fmt::Result { + write!(f, "zigzag_decode(")?; + expr.children()[0].fmt_sql(f)?; + write!(f, ")") + } + + fn return_dtype(&self, _options: &EmptyOptions, arg_dtypes: &[DType]) -> VortexResult { + let encoded_dtype = &arg_dtypes[0]; + let ptype = PType::try_from(encoded_dtype)?; + vortex_ensure!( + ptype.is_unsigned_int(), + "ZigZag encoded child must be unsigned integer, got {encoded_dtype}" + ); + Ok(DType::from(ptype.to_signed()).with_nullability(encoded_dtype.nullability())) } - fn execute_parent( - array: ArrayView<'_, Self>, - parent: &ArrayRef, - child_idx: usize, + fn execute( + &self, + _options: &EmptyOptions, + args: &dyn ExecutionArgs, ctx: &mut ExecutionCtx, - ) -> VortexResult> { - PARENT_KERNELS.execute(array, parent, child_idx, ctx) - } -} - -impl ArrayHash for ZigZagData { - fn array_hash(&self, _state: &mut H, _precision: Precision) {} -} - -impl ArrayEq for ZigZagData { - fn array_eq(&self, _other: &Self, _precision: Precision) -> bool { - true - } -} - -/// The zigzag-encoded values (signed integers mapped to unsigned). -pub(super) const ENCODED_SLOT: usize = 0; -pub(super) const NUM_SLOTS: usize = 1; -pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["encoded"]; - -#[derive(Clone, Debug)] -pub struct ZigZagData {} - -impl Display for ZigZagData { - fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result { - Ok(()) - } -} - -pub trait ZigZagArrayExt: TypedArrayRef { - fn encoded(&self) -> &ArrayRef { - self.as_ref().slots()[ENCODED_SLOT] - .as_ref() - .vortex_expect("ZigZagArray encoded slot") + ) -> VortexResult { + let encoded = args.get(0)?; + let decoded = zigzag_decode(encoded.execute::(ctx)?); + Ok(decoded.into_array()) } - fn ptype(&self) -> PType { - PType::try_from(self.encoded().dtype()) - .vortex_expect("ZigZagArray encoded dtype") - .to_signed() - } -} - -impl> ZigZagArrayExt for T {} - -#[derive(Clone, Debug)] -pub struct ZigZag; - -impl ZigZag { - pub const ID: ArrayId = ArrayId::new_ref("vortex.zigzag"); - - /// Construct a new [`ZigZagArray`] from an encoded unsigned integer array. - pub fn try_new(encoded: ArrayRef) -> VortexResult { - let dtype = ZigZagData::dtype_from_encoded_dtype(encoded.dtype())?; - let len = encoded.len(); - let slots = vec![Some(encoded.clone())]; - let data = ZigZagData::try_new(encoded.dtype())?; - Ok(unsafe { - Array::from_parts_unchecked(ArrayParts::new(ZigZag, dtype, len, data).with_slots(slots)) - }) - } -} - -impl ZigZagData { - fn dtype_from_encoded_dtype(encoded_dtype: &DType) -> VortexResult { - Ok(DType::from(PType::try_from(encoded_dtype)?.to_signed()) - .with_nullability(encoded_dtype.nullability())) - } - - pub fn new() -> Self { - Self {} - } - - pub fn try_new(encoded_dtype: &DType) -> VortexResult { - if !encoded_dtype.is_unsigned_int() { - vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype); - } - - Self::dtype_from_encoded_dtype(encoded_dtype)?; - - Ok(Self {}) + fn validity( + &self, + _options: &EmptyOptions, + expression: &vortex_array::expr::Expression, + ) -> VortexResult> { + Ok(Some(expression.child(0).validity()?)) } -} -impl Default for ZigZagData { - fn default() -> Self { - Self::new() + fn is_null_sensitive(&self, _options: &EmptyOptions) -> bool { + false } -} - -impl OperationsVTable for ZigZag { - fn scalar_at( - array: ArrayView<'_, ZigZag>, - index: usize, - _ctx: &mut ExecutionCtx, - ) -> VortexResult { - let scalar = array.encoded().scalar_at(index)?; - if scalar.is_null() { - return scalar.primitive_reinterpret_cast(ZigZagArrayExt::ptype(&array)); - } - let pscalar = scalar.as_primitive(); - Ok(match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| { - Scalar::primitive( - <

::Int>::decode( - pscalar - .typed_value::

() - .vortex_expect("zigzag corruption"), - ), - array.dtype().nullability(), - ) - })) + fn is_fallible(&self, _options: &EmptyOptions) -> bool { + false } } -impl ValidityChild for ZigZag { - fn validity_child(array: ArrayView<'_, ZigZag>) -> ArrayRef { - array.encoded().clone() +impl ScalarFnArrayVTable for ZigZag { + fn serialize( + &self, + _view: &ScalarFnArrayView, + _session: &VortexSession, + ) -> VortexResult>> { + Ok(Some(vec![])) } -} - -#[cfg(test)] -mod test { - use vortex_array::IntoArray; - use vortex_array::ToCanonical; - use vortex_array::scalar::Scalar; - use vortex_buffer::buffer; - - use super::*; - use crate::zigzag_encode; - #[test] - fn test_compute_statistics() -> VortexResult<()> { - let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10] - .into_array() - .to_primitive(); - let zigzag = zigzag_encode(array.clone())?; - - assert_eq!( - zigzag.statistics().compute_max::(), - array.statistics().compute_max::() - ); - assert_eq!( - zigzag.statistics().compute_null_count(), - array.statistics().compute_null_count() + fn deserialize( + &self, + dtype: &DType, + len: usize, + metadata: &[u8], + children: &dyn vortex_array::serde::ArrayChildren, + _session: &VortexSession, + ) -> VortexResult> { + vortex_ensure!( + metadata.is_empty(), + "ZigZag expects empty metadata, got {} bytes", + metadata.len() ); - assert_eq!( - zigzag.statistics().compute_is_constant(), - array.statistics().compute_is_constant() + vortex_ensure!( + children.len() == 1, + "ZigZag expects 1 child, got {}", + children.len() ); - let sliced = zigzag.slice(0..2).unwrap(); - let sliced = sliced.as_::(); - assert_eq!( - sliced.array().scalar_at(sliced.len() - 1).unwrap(), - Scalar::from(-5i32) - ); + let ptype = PType::try_from(dtype)?; + let encoded_dtype = DType::Primitive(ptype.to_unsigned(), dtype.nullability()); + let encoded = children.get(0, &encoded_dtype, len)?; - assert_eq!( - sliced.statistics().compute_min::(), - array.statistics().compute_min::() - ); - assert_eq!( - sliced.statistics().compute_null_count(), - array.statistics().compute_null_count() - ); - assert_eq!( - sliced.statistics().compute_is_constant(), - array.statistics().compute_is_constant() - ); - Ok(()) + Ok(ScalarFnArrayParts { + options: EmptyOptions, + children: vec![encoded], + }) } } + +/// Construct a ZigZag-encoded array from an unsigned encoded child. +pub fn zigzag_try_new(encoded: ArrayRef) -> VortexResult { + let len = encoded.len(); + ZigZag.try_new_array(len, EmptyOptions, [encoded]) +} diff --git a/encodings/zigzag/src/compress.rs b/encodings/zigzag/src/compress.rs index 998a39f2b49..82e1022b228 100644 --- a/encodings/zigzag/src/compress.rs +++ b/encodings/zigzag/src/compress.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex_array::ArrayRef; use vortex_array::IntoArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::dtype::NativePType; @@ -13,9 +14,9 @@ use vortex_error::vortex_bail; use vortex_error::vortex_panic; use zigzag::ZigZag as ExternalZigZag; -use crate::ZigZag; -use crate::ZigZagArray; -pub fn zigzag_encode(parray: PrimitiveArray) -> VortexResult { +use crate::array::zigzag_try_new; + +pub fn zigzag_encode(parray: PrimitiveArray) -> VortexResult { let validity = parray.validity()?; let encoded = match parray.ptype() { PType::I8 => zigzag_encode_primitive::(parray.into_buffer_mut(), validity), @@ -27,7 +28,7 @@ pub fn zigzag_encode(parray: PrimitiveArray) -> VortexResult { parray.ptype() ), }; - ZigZag::try_new(encoded.into_array()) + zigzag_try_new(encoded.into_array()) } fn zigzag_encode_primitive( @@ -74,19 +75,15 @@ where #[cfg(test)] mod test { - use vortex_array::IntoArray; use vortex_array::ToCanonical; use vortex_array::assert_arrays_eq; use super::*; - use crate::ZigZag; #[test] fn test_compress_i8() { - let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i8..100)) - .unwrap() - .into_array(); - assert!(compressed.is::()); + let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i8..100)).unwrap(); + assert_eq!(compressed.encoding_id().as_ref(), "vortex.zigzag"); assert_arrays_eq!( compressed.to_primitive(), PrimitiveArray::from_iter(-100_i8..100) @@ -94,10 +91,8 @@ mod test { } #[test] fn test_compress_i16() { - let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i16..100)) - .unwrap() - .into_array(); - assert!(compressed.is::()); + let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i16..100)).unwrap(); + assert_eq!(compressed.encoding_id().as_ref(), "vortex.zigzag"); assert_arrays_eq!( compressed.to_primitive(), PrimitiveArray::from_iter(-100_i16..100) @@ -105,10 +100,8 @@ mod test { } #[test] fn test_compress_i32() { - let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i32..100)) - .unwrap() - .into_array(); - assert!(compressed.is::()); + let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i32..100)).unwrap(); + assert_eq!(compressed.encoding_id().as_ref(), "vortex.zigzag"); assert_arrays_eq!( compressed.to_primitive(), PrimitiveArray::from_iter(-100_i32..100) @@ -116,10 +109,8 @@ mod test { } #[test] fn test_compress_i64() { - let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i64..100)) - .unwrap() - .into_array(); - assert!(compressed.is::()); + let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i64..100)).unwrap(); + assert_eq!(compressed.encoding_id().as_ref(), "vortex.zigzag"); assert_arrays_eq!( compressed.to_primitive(), PrimitiveArray::from_iter(-100_i64..100) diff --git a/encodings/zigzag/src/compute/cast.rs b/encodings/zigzag/src/compute/cast.rs deleted file mode 100644 index 7f6741d9aa1..00000000000 --- a/encodings/zigzag/src/compute/cast.rs +++ /dev/null @@ -1,132 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_array::ArrayRef; -use vortex_array::ArrayView; -use vortex_array::IntoArray; -use vortex_array::builtins::ArrayBuiltins; -use vortex_array::dtype::DType; -use vortex_array::scalar_fn::fns::cast::CastReduce; -use vortex_error::VortexResult; - -use crate::ZigZag; -use crate::array::ZigZagArrayExt; -impl CastReduce for ZigZag { - fn cast(array: ArrayView<'_, Self>, dtype: &DType) -> VortexResult> { - if !dtype.is_signed_int() { - return Ok(None); - } - - let new_encoded_dtype = - DType::Primitive(dtype.as_ptype().to_unsigned(), dtype.nullability()); - let new_encoded = array.encoded().cast(new_encoded_dtype)?; - Ok(Some(ZigZag::try_new(new_encoded)?.into_array())) - } -} - -#[cfg(test)] -mod tests { - use rstest::rstest; - use vortex_array::IntoArray; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::assert_arrays_eq; - use vortex_array::builtins::ArrayBuiltins; - use vortex_array::compute::conformance::cast::test_cast_conformance; - use vortex_array::dtype::DType; - use vortex_array::dtype::Nullability; - use vortex_array::dtype::PType; - - use crate::ZigZagArray; - use crate::zigzag_encode; - - #[test] - fn test_cast_zigzag_i32_to_i64() { - let values = PrimitiveArray::from_iter([-100i32, -1, 0, 1, 100]); - let zigzag = zigzag_encode(values).unwrap(); - - let casted = zigzag - .into_array() - .cast(DType::Primitive(PType::I64, Nullability::NonNullable)) - .unwrap(); - assert_eq!( - casted.dtype(), - &DType::Primitive(PType::I64, Nullability::NonNullable) - ); - - // Verify the result is still a ZigZagArray (not decoded) - // Note: The result might be wrapped, so let's check the encoding ID - assert_eq!( - casted.encoding_id().as_ref(), - "vortex.zigzag", - "Cast should preserve ZigZag encoding" - ); - - assert_arrays_eq!(casted, PrimitiveArray::from_iter([-100i64, -1, 0, 1, 100])); - } - - #[test] - fn test_cast_zigzag_width_changes() { - // Test i32 to i16 (narrowing) - let values = PrimitiveArray::from_iter([100i32, -50, 0, 25, -100]); - let zigzag = zigzag_encode(values).unwrap(); - - let casted = zigzag - .into_array() - .cast(DType::Primitive(PType::I16, Nullability::NonNullable)) - .unwrap(); - assert_eq!( - casted.encoding_id().as_ref(), - "vortex.zigzag", - "Should remain ZigZag encoded" - ); - - assert_arrays_eq!( - casted, - PrimitiveArray::from_iter([100i16, -50, 0, 25, -100]) - ); - - // Test i16 to i64 (widening) - let values16 = PrimitiveArray::from_iter([1000i16, -500, 0, 250, -1000]); - let zigzag16 = zigzag_encode(values16).unwrap(); - - let casted64 = zigzag16 - .into_array() - .cast(DType::Primitive(PType::I64, Nullability::NonNullable)) - .unwrap(); - assert_eq!( - casted64.encoding_id().as_ref(), - "vortex.zigzag", - "Should remain ZigZag encoded" - ); - - assert_arrays_eq!( - casted64, - PrimitiveArray::from_iter([1000i64, -500, 0, 250, -1000]) - ); - } - - #[test] - fn test_cast_zigzag_nullable() { - let values = - PrimitiveArray::from_option_iter([Some(-10i32), None, Some(0), Some(10), None]); - let zigzag = zigzag_encode(values).unwrap(); - - let casted = zigzag - .into_array() - .cast(DType::Primitive(PType::I64, Nullability::Nullable)) - .unwrap(); - assert_eq!( - casted.dtype(), - &DType::Primitive(PType::I64, Nullability::Nullable) - ); - } - - #[rstest] - #[case(zigzag_encode(PrimitiveArray::from_iter([-100i32, -50, -1, 0, 1, 50, 100])).unwrap())] - #[case(zigzag_encode(PrimitiveArray::from_iter([-1000i64, -1, 0, 1, 1000])).unwrap())] - #[case(zigzag_encode(PrimitiveArray::from_option_iter([Some(-5i16), None, Some(0), Some(5), None])).unwrap())] - #[case(zigzag_encode(PrimitiveArray::from_iter([i32::MIN, -1, 0, 1, i32::MAX])).unwrap())] - fn test_cast_zigzag_conformance(#[case] array: ZigZagArray) { - test_cast_conformance(&array.into_array()); - } -} diff --git a/encodings/zigzag/src/compute/mod.rs b/encodings/zigzag/src/compute/mod.rs deleted file mode 100644 index 71938dbcc18..00000000000 --- a/encodings/zigzag/src/compute/mod.rs +++ /dev/null @@ -1,222 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -mod cast; - -use vortex_array::ArrayRef; -use vortex_array::ArrayView; -use vortex_array::ExecutionCtx; -use vortex_array::IntoArray; -use vortex_array::arrays::dict::TakeExecute; -use vortex_array::arrays::filter::FilterReduce; -use vortex_array::arrays::scalar_fn::ScalarFnFactoryExt; -use vortex_array::scalar_fn::EmptyOptions; -use vortex_array::scalar_fn::fns::mask::Mask as MaskExpr; -use vortex_array::scalar_fn::fns::mask::MaskReduce; -use vortex_error::VortexResult; -use vortex_mask::Mask; - -use crate::ZigZag; -use crate::array::ZigZagArrayExt; - -impl FilterReduce for ZigZag { - fn filter(array: ArrayView<'_, Self>, mask: &Mask) -> VortexResult> { - let encoded = array.encoded().filter(mask.clone())?; - Ok(Some(ZigZag::try_new(encoded)?.into_array())) - } -} - -impl TakeExecute for ZigZag { - fn take( - array: ArrayView<'_, Self>, - indices: &ArrayRef, - _ctx: &mut ExecutionCtx, - ) -> VortexResult> { - let encoded = array.encoded().take(indices.clone())?; - Ok(Some(ZigZag::try_new(encoded)?.into_array())) - } -} - -impl MaskReduce for ZigZag { - fn mask(array: ArrayView<'_, Self>, mask: &ArrayRef) -> VortexResult> { - let masked_encoded = MaskExpr.try_new_array( - array.encoded().len(), - EmptyOptions, - [array.encoded().clone(), mask.clone()], - )?; - Ok(Some(ZigZag::try_new(masked_encoded)?.into_array())) - } -} - -pub(crate) trait ZigZagEncoded { - type Int: zigzag::ZigZag; -} - -impl ZigZagEncoded for u8 { - type Int = i8; -} - -impl ZigZagEncoded for u16 { - type Int = i16; -} - -impl ZigZagEncoded for u32 { - type Int = i32; -} - -impl ZigZagEncoded for u64 { - type Int = i64; -} - -#[cfg(test)] -mod tests { - use rstest::rstest; - use vortex_array::ArrayRef; - use vortex_array::IntoArray; - use vortex_array::ToCanonical; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::assert_arrays_eq; - use vortex_array::compute::conformance::binary_numeric::test_binary_numeric_array; - use vortex_array::compute::conformance::consistency::test_array_consistency; - use vortex_array::dtype::Nullability; - use vortex_array::scalar::Scalar; - use vortex_array::validity::Validity; - use vortex_buffer::BitBuffer; - use vortex_buffer::buffer; - use vortex_error::VortexResult; - - use crate::ZigZagArray; - use crate::zigzag_encode; - - #[test] - pub fn nullable_scalar_at() -> VortexResult<()> { - let zigzag = zigzag_encode(PrimitiveArray::new( - buffer![-189, -160, 1], - Validity::AllValid, - ))?; - assert_eq!( - zigzag.scalar_at(1)?, - Scalar::primitive(-160, Nullability::Nullable) - ); - Ok(()) - } - - #[test] - fn take_zigzag() -> VortexResult<()> { - let zigzag = zigzag_encode(PrimitiveArray::new( - buffer![-189, -160, 1], - Validity::AllValid, - ))?; - - let indices = buffer![0, 2].into_array(); - let actual = zigzag.take(indices).unwrap(); - let expected = - zigzag_encode(PrimitiveArray::new(buffer![-189, 1], Validity::AllValid))?.into_array(); - assert_arrays_eq!(actual, expected); - Ok(()) - } - - #[test] - fn filter_zigzag() -> VortexResult<()> { - let zigzag = zigzag_encode(PrimitiveArray::new( - buffer![-189, -160, 1], - Validity::AllValid, - ))?; - - let filter_mask = BitBuffer::from(vec![true, false, true]).into(); - let actual = zigzag.filter(filter_mask).unwrap(); - let expected = - zigzag_encode(PrimitiveArray::new(buffer![-189, 1], Validity::AllValid))?.into_array(); - assert_arrays_eq!(actual, expected); - Ok(()) - } - - #[test] - fn test_filter_conformance() -> VortexResult<()> { - use vortex_array::compute::conformance::filter::test_filter_conformance; - - // Test with i32 values - let zigzag = zigzag_encode(PrimitiveArray::new( - buffer![-189i32, -160, 1, 42, -73], - Validity::AllValid, - ))?; - test_filter_conformance(&zigzag.into_array()); - - // Test with i64 values - let zigzag = zigzag_encode(PrimitiveArray::new( - buffer![1000i64, -2000, 3000, -4000, 5000], - Validity::AllValid, - ))?; - test_filter_conformance(&zigzag.into_array()); - - // Test with nullable values - let array = - PrimitiveArray::from_option_iter([Some(-10i16), None, Some(20), Some(-30), None]); - let zigzag = zigzag_encode(array)?; - test_filter_conformance(&zigzag.into_array()); - Ok(()) - } - - #[test] - fn test_mask_conformance() -> VortexResult<()> { - use vortex_array::compute::conformance::mask::test_mask_conformance; - - // Test with i32 values - let zigzag = zigzag_encode(PrimitiveArray::new( - buffer![-100i32, 200, -300, 400, -500], - Validity::AllValid, - ))?; - test_mask_conformance(&zigzag.into_array()); - - // Test with i8 values - let zigzag = zigzag_encode(PrimitiveArray::new( - buffer![-127i8, 0, 127, -1, 1], - Validity::AllValid, - ))?; - test_mask_conformance(&zigzag.into_array()); - Ok(()) - } - - #[rstest] - #[case(buffer![-189i32, -160, 1, 42, -73].into_array())] - #[case(buffer![1000i64, -2000, 3000, -4000, 5000].into_array())] - #[case(PrimitiveArray::from_option_iter([Some(-10i16), None, Some(20), Some(-30), None]).into_array() - )] - #[case(buffer![42i32].into_array())] - fn test_take_zigzag_conformance(#[case] array: ArrayRef) -> VortexResult<()> { - use vortex_array::compute::conformance::take::test_take_conformance; - - let zigzag = zigzag_encode(array.to_primitive())?; - test_take_conformance(&zigzag.into_array()); - Ok(()) - } - - #[rstest] - // Basic ZigZag arrays - #[case::zigzag_i8(zigzag_encode(PrimitiveArray::from_iter([-128i8, -1, 0, 1, 127])).unwrap())] - #[case::zigzag_i16(zigzag_encode(PrimitiveArray::from_iter([-1000i16, -100, 0, 100, 1000])).unwrap())] - #[case::zigzag_i32(zigzag_encode(PrimitiveArray::from_iter([-100000i32, -1000, 0, 1000, 100000])).unwrap())] - #[case::zigzag_i64(zigzag_encode(PrimitiveArray::from_iter([-1000000i64, -10000, 0, 10000, 1000000])).unwrap())] - // Nullable arrays - #[case::zigzag_nullable_i32(zigzag_encode(PrimitiveArray::from_option_iter([Some(-100i32), None, Some(0), Some(100), None])).unwrap())] - #[case::zigzag_nullable_i64(zigzag_encode(PrimitiveArray::from_option_iter([Some(-1000i64), None, Some(0), Some(1000), None])).unwrap())] - // Edge cases - #[case::zigzag_single(zigzag_encode(PrimitiveArray::from_iter([-42i32])).unwrap())] - #[case::zigzag_alternating(zigzag_encode(PrimitiveArray::from_iter([-1i32, 1, -2, 2, -3, 3])).unwrap())] - // Large arrays - #[case::zigzag_large_i32(zigzag_encode(PrimitiveArray::from_iter(-500..500)).unwrap())] - #[case::zigzag_large_i64(zigzag_encode(PrimitiveArray::from_iter((-1000..1000).map(|i| i as i64 * 100))).unwrap())] - fn test_zigzag_consistency(#[case] array: ZigZagArray) { - test_array_consistency(&array.into_array()); - } - - #[rstest] - #[case::zigzag_i8_basic(zigzag_encode(PrimitiveArray::from_iter([-10i8, -5, 0, 5, 10])).unwrap())] - #[case::zigzag_i16_basic(zigzag_encode(PrimitiveArray::from_iter([-100i16, -50, 0, 50, 100])).unwrap())] - #[case::zigzag_i32_basic(zigzag_encode(PrimitiveArray::from_iter([-1000i32, -500, 0, 500, 1000])).unwrap())] - #[case::zigzag_i64_basic(zigzag_encode(PrimitiveArray::from_iter([-10000i64, -5000, 0, 5000, 10000])).unwrap())] - #[case::zigzag_i32_large(zigzag_encode(PrimitiveArray::from_iter((-50..50).map(|i| i * 10))).unwrap())] - fn test_zigzag_binary_numeric(#[case] array: ZigZagArray) { - test_binary_numeric_array(array.into_array()); - } -} diff --git a/encodings/zigzag/src/kernel.rs b/encodings/zigzag/src/kernel.rs deleted file mode 100644 index d0096abaae1..00000000000 --- a/encodings/zigzag/src/kernel.rs +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_array::arrays::dict::TakeExecuteAdaptor; -use vortex_array::kernel::ParentKernelSet; - -use crate::ZigZag; - -pub(crate) const PARENT_KERNELS: ParentKernelSet = - ParentKernelSet::new(&[ParentKernelSet::lift(&TakeExecuteAdaptor(ZigZag))]); diff --git a/encodings/zigzag/src/lib.rs b/encodings/zigzag/src/lib.rs index 89da8bd6069..67f4d3deeff 100644 --- a/encodings/zigzag/src/lib.rs +++ b/encodings/zigzag/src/lib.rs @@ -6,7 +6,3 @@ pub use compress::*; mod array; mod compress; -mod compute; -mod kernel; -mod rules; -mod slice; diff --git a/encodings/zigzag/src/rules.rs b/encodings/zigzag/src/rules.rs deleted file mode 100644 index c3b612d101d..00000000000 --- a/encodings/zigzag/src/rules.rs +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_array::arrays::filter::FilterReduceAdaptor; -use vortex_array::arrays::slice::SliceReduceAdaptor; -use vortex_array::optimizer::rules::ParentRuleSet; -use vortex_array::scalar_fn::fns::cast::CastReduceAdaptor; -use vortex_array::scalar_fn::fns::mask::MaskReduceAdaptor; - -use crate::ZigZag; - -pub(crate) static RULES: ParentRuleSet = ParentRuleSet::new(&[ - ParentRuleSet::lift(&CastReduceAdaptor(ZigZag)), - ParentRuleSet::lift(&FilterReduceAdaptor(ZigZag)), - ParentRuleSet::lift(&MaskReduceAdaptor(ZigZag)), - ParentRuleSet::lift(&SliceReduceAdaptor(ZigZag)), -]); diff --git a/encodings/zigzag/src/slice.rs b/encodings/zigzag/src/slice.rs deleted file mode 100644 index 6a7128d5111..00000000000 --- a/encodings/zigzag/src/slice.rs +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::ops::Range; - -use vortex_array::ArrayRef; -use vortex_array::ArrayView; -use vortex_array::IntoArray; -use vortex_array::arrays::slice::SliceReduce; -use vortex_error::VortexResult; - -use crate::ZigZag; -use crate::array::ZigZagArrayExt; - -impl SliceReduce for ZigZag { - fn slice(array: ArrayView<'_, Self>, range: Range) -> VortexResult> { - Ok(Some( - ZigZag::try_new(array.encoded().slice(range)?)?.into_array(), - )) - } -} diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 08dffbab678..207721413c0 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -3762,6 +3762,34 @@ pub type vortex_array::arrays::primitive::PrimitiveArray = vortex_array::Array + +pub vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayParts::children: alloc::vec::Vec + +pub vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayParts::options: ::Options + +pub struct vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin(_) + +impl vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::new(vtable: V) -> Self + +impl vortex_array::ArrayPlugin for vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::deserialize(&self, dtype: &vortex_array::dtype::DType, len: usize, metadata: &[u8], _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren, session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::id(&self) -> vortex_array::ArrayId + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::serialize(&self, array: &vortex_array::ArrayRef, session: &vortex_session::VortexSession) -> vortex_error::VortexResult>> + +pub trait vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable: vortex_array::scalar_fn::ScalarFnVTable + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable::deserialize(&self, dtype: &vortex_array::dtype::DType, len: usize, metadata: &[u8], children: &dyn vortex_array::serde::ArrayChildren, session: &vortex_session::VortexSession) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable::serialize(&self, view: &vortex_array::arrays::scalar_fn::ScalarFnArrayView<'_, Self>, session: &vortex_session::VortexSession) -> vortex_error::VortexResult>> + pub struct vortex_array::arrays::scalar_fn::AnyScalarFn impl core::fmt::Debug for vortex_array::arrays::scalar_fn::AnyScalarFn @@ -3806,34 +3834,6 @@ pub type vortex_array::arrays::scalar_fn::ScalarFnArrayView<'_, F>::Target = vor pub fn vortex_array::arrays::scalar_fn::ScalarFnArrayView<'_, F>::deref(&self) -> &Self::Target -pub struct vortex_array::arrays::scalar_fn::ScalarFnData - -impl vortex_array::arrays::scalar_fn::ScalarFnData - -pub fn vortex_array::arrays::scalar_fn::ScalarFnData::build(scalar_fn: vortex_array::scalar_fn::ScalarFnRef, children: alloc::vec::Vec, len: usize) -> vortex_error::VortexResult - -pub fn vortex_array::arrays::scalar_fn::ScalarFnData::scalar_fn(&self) -> &vortex_array::scalar_fn::ScalarFnRef - -impl core::clone::Clone for vortex_array::arrays::scalar_fn::ScalarFnData - -pub fn vortex_array::arrays::scalar_fn::ScalarFnData::clone(&self) -> vortex_array::arrays::scalar_fn::ScalarFnData - -impl core::fmt::Debug for vortex_array::arrays::scalar_fn::ScalarFnData - -pub fn vortex_array::arrays::scalar_fn::ScalarFnData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result - -impl core::fmt::Display for vortex_array::arrays::scalar_fn::ScalarFnData - -pub fn vortex_array::arrays::scalar_fn::ScalarFnData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result - -impl vortex_array::ArrayEq for vortex_array::arrays::scalar_fn::ScalarFnData - -pub fn vortex_array::arrays::scalar_fn::ScalarFnData::array_eq(&self, other: &Self, _precision: vortex_array::Precision) -> bool - -impl vortex_array::ArrayHash for vortex_array::arrays::scalar_fn::ScalarFnData - -pub fn vortex_array::arrays::scalar_fn::ScalarFnData::array_hash(&self, state: &mut H, _precision: vortex_array::Precision) - pub struct vortex_array::arrays::scalar_fn::ScalarFnVTable impl core::clone::Clone for vortex_array::arrays::scalar_fn::ScalarFnVTable @@ -3850,7 +3850,7 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::scalar_at(array: vortex_ impl vortex_array::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable -pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::ScalarFnData +pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::array::ScalarFnData pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::OperationsVTable = vortex_array::arrays::scalar_fn::ScalarFnVTable @@ -3886,7 +3886,7 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::serialize(_array: vortex pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String -pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> +pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::array::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> impl vortex_array::ValidityVTable for vortex_array::arrays::scalar_fn::ScalarFnVTable @@ -6284,7 +6284,7 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::scalar_at(array: vortex_ impl vortex_array::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable -pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::ScalarFnData +pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::array::ScalarFnData pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::OperationsVTable = vortex_array::arrays::scalar_fn::ScalarFnVTable @@ -6320,7 +6320,7 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::serialize(_array: vortex pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String -pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> +pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::array::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> impl vortex_array::ValidityVTable for vortex_array::arrays::scalar_fn::ScalarFnVTable @@ -19098,6 +19098,14 @@ pub fn V::id(&self) -> arcref::ArcRef pub fn V::serialize(&self, array: &vortex_array::ArrayRef, session: &vortex_session::VortexSession) -> core::result::Result>, vortex_error::VortexError> +impl vortex_array::ArrayPlugin for vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::deserialize(&self, dtype: &vortex_array::dtype::DType, len: usize, metadata: &[u8], _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren, session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::id(&self) -> vortex_array::ArrayId + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::serialize(&self, array: &vortex_array::ArrayRef, session: &vortex_session::VortexSession) -> vortex_error::VortexResult>> + pub trait vortex_array::vtable::ArrayVTable: 'static + core::clone::Clone + core::marker::Sized + core::marker::Send + core::marker::Sync + core::fmt::Debug pub type vortex_array::vtable::ArrayVTable::ArrayData: 'static + core::marker::Send + core::marker::Sync + core::clone::Clone + core::fmt::Debug + core::fmt::Display + vortex_array::ArrayHash + vortex_array::ArrayEq @@ -19900,7 +19908,7 @@ pub fn vortex_array::arrays::patched::Patched::validate(&self, data: &vortex_arr impl vortex_array::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable -pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::ScalarFnData +pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::array::ScalarFnData pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::OperationsVTable = vortex_array::arrays::scalar_fn::ScalarFnVTable @@ -19936,7 +19944,7 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::serialize(_array: vortex pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String -pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> +pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::array::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> impl vortex_array::VTable for vortex_array::arrays::slice::Slice @@ -20872,7 +20880,7 @@ pub fn vortex_array::arrays::patched::Patched::validate(&self, data: &vortex_arr impl vortex_array::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable -pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::ScalarFnData +pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::array::ScalarFnData pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::OperationsVTable = vortex_array::arrays::scalar_fn::ScalarFnVTable @@ -20908,7 +20916,7 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::serialize(_array: vortex pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String -pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> +pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::array::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> impl vortex_array::VTable for vortex_array::arrays::slice::Slice @@ -22558,10 +22566,6 @@ impl vortex_array::ArrayEq for vortex_array::arrays::primitive::PrimitiveData pub fn vortex_array::arrays::primitive::PrimitiveData::array_eq(&self, other: &Self, precision: vortex_array::Precision) -> bool -impl vortex_array::ArrayEq for vortex_array::arrays::scalar_fn::ScalarFnData - -pub fn vortex_array::arrays::scalar_fn::ScalarFnData::array_eq(&self, other: &Self, _precision: vortex_array::Precision) -> bool - impl vortex_array::ArrayEq for vortex_array::arrays::shared::SharedData pub fn vortex_array::arrays::shared::SharedData::array_eq(&self, _other: &Self, _precision: vortex_array::Precision) -> bool @@ -22670,10 +22674,6 @@ impl vortex_array::ArrayHash for vortex_array::arrays::primitive::PrimitiveData pub fn vortex_array::arrays::primitive::PrimitiveData::array_hash(&self, state: &mut H, precision: vortex_array::Precision) -impl vortex_array::ArrayHash for vortex_array::arrays::scalar_fn::ScalarFnData - -pub fn vortex_array::arrays::scalar_fn::ScalarFnData::array_hash(&self, state: &mut H, _precision: vortex_array::Precision) - impl vortex_array::ArrayHash for vortex_array::arrays::shared::SharedData pub fn vortex_array::arrays::shared::SharedData::array_hash(&self, _state: &mut H, _precision: vortex_array::Precision) @@ -22750,6 +22750,22 @@ pub fn V::serialize(&self, array: &vortex_array::ArrayRef, session: &vortex_sess pub fn V::serialize(&self, array: &vortex_array::ArrayRef, session: &vortex_session::VortexSession) -> core::result::Result>, vortex_error::VortexError> +impl vortex_array::ArrayPlugin for vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin + +impl vortex_array::ArrayPlugin for vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::deserialize(&self, dtype: &vortex_array::dtype::DType, len: usize, metadata: &[u8], _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren, session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::deserialize(&self, dtype: &vortex_array::dtype::DType, len: usize, metadata: &[u8], _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren, session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::id(&self) -> vortex_array::ArrayId + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::id(&self) -> vortex_array::ArrayId + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::serialize(&self, array: &vortex_array::ArrayRef, session: &vortex_session::VortexSession) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin::serialize(&self, array: &vortex_array::ArrayRef, session: &vortex_session::VortexSession) -> vortex_error::VortexResult>> + pub trait vortex_array::ArrayVTable: 'static + core::clone::Clone + core::marker::Sized + core::marker::Send + core::marker::Sync + core::fmt::Debug pub type vortex_array::ArrayVTable::ArrayData: 'static + core::marker::Send + core::marker::Sync + core::clone::Clone + core::fmt::Debug + core::fmt::Display + vortex_array::ArrayHash + vortex_array::ArrayEq @@ -23552,7 +23568,7 @@ pub fn vortex_array::arrays::patched::Patched::validate(&self, data: &vortex_arr impl vortex_array::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable -pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::ScalarFnData +pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::array::ScalarFnData pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::OperationsVTable = vortex_array::arrays::scalar_fn::ScalarFnVTable @@ -23588,7 +23604,7 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::serialize(_array: vortex pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String -pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> +pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::array::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> impl vortex_array::VTable for vortex_array::arrays::slice::Slice @@ -24772,7 +24788,7 @@ pub fn vortex_array::arrays::patched::Patched::validate(&self, data: &vortex_arr impl vortex_array::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable -pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::ScalarFnData +pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::ArrayData = vortex_array::arrays::scalar_fn::array::ScalarFnData pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::OperationsVTable = vortex_array::arrays::scalar_fn::ScalarFnVTable @@ -24808,7 +24824,7 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::serialize(_array: vortex pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String -pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> +pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::validate(&self, data: &vortex_array::arrays::scalar_fn::array::ScalarFnData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> impl vortex_array::VTable for vortex_array::arrays::slice::Slice diff --git a/vortex-array/src/arrays/scalar_fn/array.rs b/vortex-array/src/arrays/scalar_fn/array.rs index 301713761b5..d91f105d282 100644 --- a/vortex-array/src/arrays/scalar_fn/array.rs +++ b/vortex-array/src/arrays/scalar_fn/array.rs @@ -95,7 +95,7 @@ impl Array { let arg_dtypes: Vec<_> = children.iter().map(|c| c.dtype().clone()).collect(); let dtype = scalar_fn.return_dtype(&arg_dtypes)?; let data = ScalarFnData::build(scalar_fn.clone(), children.clone(), len)?; - let vtable = ScalarFnVTable { scalar_fn }; + let vtable = ScalarFnVTable { id: scalar_fn.id() }; Ok(unsafe { Array::from_parts_unchecked( ArrayParts::new(vtable, dtype, len, data) diff --git a/vortex-array/src/arrays/scalar_fn/mod.rs b/vortex-array/src/arrays/scalar_fn/mod.rs index 9d2e9e66ed8..6c508dcad96 100644 --- a/vortex-array/src/arrays/scalar_fn/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/mod.rs @@ -2,10 +2,10 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod array; +pub mod plugin; mod rules; mod vtable; pub use array::ScalarFnArrayExt; -pub use array::ScalarFnData; pub use vtable::ScalarFnFactoryExt; pub use vtable::*; diff --git a/vortex-array/src/arrays/scalar_fn/plugin.rs b/vortex-array/src/arrays/scalar_fn/plugin.rs new file mode 100644 index 00000000000..1fb656323d9 --- /dev/null +++ b/vortex-array/src/arrays/scalar_fn/plugin.rs @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; +use vortex_session::VortexSession; + +use crate::ArrayId; +use crate::ArrayPlugin; +use crate::ArrayRef; +use crate::IntoArray; +use crate::arrays::ScalarFnArray; +use crate::arrays::scalar_fn::ExactScalarFn; +use crate::arrays::scalar_fn::ScalarFnArrayView; +use crate::buffer::BufferHandle; +use crate::dtype::DType; +use crate::scalar_fn::ScalarFn; +use crate::scalar_fn::ScalarFnVTable; +use crate::serde::ArrayChildren; + +/// An adapter for enabling a scalar function to be serialized as an array. +pub struct ScalarFnArrayPlugin(V); + +impl ScalarFnArrayPlugin { + /// Create a new plugin for the given scalar function vtable. + pub fn new(vtable: V) -> Self { + Self(vtable) + } +} + +pub trait ScalarFnArrayVTable: ScalarFnVTable { + /// Serialize metadata for storing the scalar function as an array. + /// + /// Notably, this metadata needs enough information to reconstruct the child DTypes, as well + /// as the scalar function's own options. + fn serialize( + &self, + view: &ScalarFnArrayView, + session: &VortexSession, + ) -> VortexResult>>; + + /// Deserialize a scalar function array from its serialized components. + fn deserialize( + &self, + dtype: &DType, + len: usize, + metadata: &[u8], + children: &dyn ArrayChildren, + session: &VortexSession, + ) -> VortexResult>; +} + +/// The parts used to construct a ScalarFnArray. +pub struct ScalarFnArrayParts { + pub options: V::Options, + pub children: Vec, +} + +impl ArrayPlugin for ScalarFnArrayPlugin { + fn id(&self) -> ArrayId { + self.0.id() + } + + fn serialize( + &self, + array: &ArrayRef, + session: &VortexSession, + ) -> VortexResult>> { + // We serialize the scalar function options, along with any scalar function array data. + let scalar_fn = array.as_::>(); + ::serialize(&self.0, &scalar_fn, session) + } + + fn deserialize( + &self, + dtype: &DType, + len: usize, + metadata: &[u8], + _buffers: &[BufferHandle], + children: &dyn ArrayChildren, + session: &VortexSession, + ) -> VortexResult { + let parts = ::deserialize( + &self.0, dtype, len, metadata, children, session, + )?; + Ok(ScalarFnArray::try_new( + ScalarFn::new(self.0.clone(), parts.options).erased(), + parts.children, + len, + )? + .into_array()) + } +} diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 996fbee1046..c663ddfc64e 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -41,7 +41,6 @@ use crate::scalar_fn::Arity; use crate::scalar_fn::ChildName; use crate::scalar_fn::ExecutionArgs; use crate::scalar_fn::ScalarFnId; -use crate::scalar_fn::ScalarFnRef; use crate::scalar_fn::ScalarFnVTableExt; use crate::scalar_fn::VecExecutionArgs; use crate::serde::ArrayChildren; @@ -51,7 +50,7 @@ pub type ScalarFnArray = Array; #[derive(Clone, Debug)] pub struct ScalarFnVTable { - pub(super) scalar_fn: ScalarFnRef, + pub(super) id: ScalarFnId, } impl ArrayHash for ScalarFnData { @@ -72,7 +71,7 @@ impl VTable for ScalarFnVTable { type ValidityVTable = Self; fn id(&self) -> ArrayId { - self.scalar_fn.id() + self.id.clone() } fn validate( @@ -83,7 +82,7 @@ impl VTable for ScalarFnVTable { slots: &[Option], ) -> VortexResult<()> { vortex_ensure!( - data.scalar_fn == self.scalar_fn, + data.scalar_fn.id() == self.id, "ScalarFnArray data scalar_fn does not match vtable" ); vortex_ensure!( @@ -97,7 +96,7 @@ impl VTable for ScalarFnVTable { .map(|c| c.dtype().clone()) .collect_vec(); vortex_ensure!( - self.scalar_fn.return_dtype(&child_dtypes)? == *dtype, + data.scalar_fn.return_dtype(&child_dtypes)? == *dtype, "ScalarFnArray dtype does not match scalar function return dtype" ); Ok(()) @@ -128,7 +127,6 @@ impl VTable for ScalarFnVTable { _dtype: &DType, _len: usize, _metadata: &[u8], - _buffers: &[BufferHandle], _children: &dyn ArrayChildren, _session: &VortexSession, @@ -189,7 +187,7 @@ pub trait ScalarFnFactoryExt: scalar_fn::ScalarFnVTable { let data = ScalarFnData { scalar_fn: scalar_fn.clone(), }; - let vtable = ScalarFnVTable { scalar_fn }; + let vtable = ScalarFnVTable { id: scalar_fn.id() }; Ok(unsafe { Array::from_parts_unchecked( ArrayParts::new(vtable, dtype, len, data) diff --git a/vortex-btrblocks/src/schemes/integer.rs b/vortex-btrblocks/src/schemes/integer.rs index af809350a7f..3859e35be52 100644 --- a/vortex-btrblocks/src/schemes/integer.rs +++ b/vortex-btrblocks/src/schemes/integer.rs @@ -36,9 +36,8 @@ use vortex_runend::RunEnd; use vortex_runend::compress::runend_encode; use vortex_sequence::sequence_encode; use vortex_sparse::Sparse; -use vortex_zigzag::ZigZag; -use vortex_zigzag::ZigZagArrayExt; use vortex_zigzag::zigzag_encode; +use vortex_zigzag::zigzag_try_new; use crate::ArrayAndStats; use crate::CascadingCompressor; @@ -282,13 +281,13 @@ impl Scheme for ZigZagScheme { ) -> VortexResult { // Zigzag encode the values, then recursively compress the inner values. let zag = zigzag_encode(data.array_as_primitive())?; - let encoded = zag.encoded().to_primitive(); + let encoded = zag.nth_child(0).unwrap().to_primitive(); let compressed = compressor.compress_child(&encoded.into_array(), &ctx, self.id(), 0)?; tracing::debug!("zigzag output: {}", compressed.encoding_id()); - Ok(ZigZag::try_new(compressed)?.into_array()) + Ok(zigzag_try_new(compressed)?) } } diff --git a/vortex-cuda/src/dynamic_dispatch/mod.rs b/vortex-cuda/src/dynamic_dispatch/mod.rs index e6444327683..a3ddbe6b5b6 100644 --- a/vortex-cuda/src/dynamic_dispatch/mod.rs +++ b/vortex-cuda/src/dynamic_dispatch/mod.rs @@ -498,7 +498,7 @@ mod tests { use vortex::encodings::fastlanes::FoR; use vortex::encodings::fastlanes::FoRArrayExt; use vortex::encodings::runend::RunEnd; - use vortex::encodings::zigzag::ZigZag; + use vortex::encodings::zigzag::zigzag_try_new; use vortex::error::VortexExpect; use vortex::error::VortexResult; use vortex::session::VortexSession; @@ -894,10 +894,10 @@ mod tests { let prim = PrimitiveArray::new(Buffer::from(raw), NonNullable); let bp = BitPacked::encode(&prim.into_array(), bit_width)?; - let zz = ZigZag::try_new(bp.into_array())?; + let zz = zigzag_try_new(bp.into_array())?; let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = dispatch_plan(&zz.into_array(), &cuda_ctx)?; + let plan = dispatch_plan(&zz, &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; @@ -1197,9 +1197,9 @@ mod tests { let prim = PrimitiveArray::new(Buffer::from(raw), NonNullable); let bp = BitPacked::encode(&prim.into_array(), bit_width)?; - let zz = ZigZag::try_new(bp.into_array())?; + let zz = zigzag_try_new(bp.into_array())?; - let sliced = zz.into_array().slice(slice_start..slice_end)?; + let sliced = zz.slice(slice_start..slice_end)?; let expected: Vec = all_decoded[slice_start..slice_end].to_vec(); let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; diff --git a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs index 4bec033d4ff..106bfe97cee 100644 --- a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs +++ b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs @@ -8,6 +8,7 @@ use itertools::zip_eq; use tracing::trace; +use vortex::array::ArrayId; use vortex::array::ArrayRef; use vortex::array::arrays::Dict; use vortex::array::arrays::Primitive; @@ -27,7 +28,6 @@ use vortex::encodings::runend::RunEnd; use vortex::encodings::runend::RunEndArrayExt; use vortex::encodings::sequence::Sequence; use vortex::encodings::zigzag::ZigZag; -use vortex::encodings::zigzag::ZigZagArrayExt; use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::error::vortex_err; @@ -523,8 +523,7 @@ impl FusedPlan { array: ArrayRef, pending_subtrees: &mut Vec, ) -> VortexResult { - let zz = array.as_::(); - let encoded = zz.encoded().clone(); + let encoded = array.nth_child(0).unwrap(); let output_ptype = ptype_to_tag(PType::try_from(array.dtype()).map_err(|_| { vortex_err!("ZigZag must have primitive dtype, got {:?}", array.dtype()) })?); diff --git a/vortex-cuda/src/kernel/encodings/zigzag.rs b/vortex-cuda/src/kernel/encodings/zigzag.rs index f13a19bf0e1..fb2a5d3776c 100644 --- a/vortex-cuda/src/kernel/encodings/zigzag.rs +++ b/vortex-cuda/src/kernel/encodings/zigzag.rs @@ -10,13 +10,15 @@ use tracing::instrument; use vortex::array::ArrayRef; use vortex::array::Canonical; use vortex::array::arrays::PrimitiveArray; +use vortex::array::arrays::ScalarFnVTable; use vortex::array::arrays::primitive::PrimitiveDataParts; +use vortex::array::arrays::scalar_fn::ExactScalarFn; +use vortex::array::arrays::scalar_fn::ScalarFnArrayExt; use vortex::array::match_each_unsigned_integer_ptype; +use vortex::array::matcher::Matcher; use vortex::dtype::NativePType; use vortex::dtype::PType; use vortex::encodings::zigzag::ZigZag; -use vortex::encodings::zigzag::ZigZagArray; -use vortex::encodings::zigzag::ZigZagArrayExt; use vortex::error::VortexResult; use vortex::error::vortex_ensure; use vortex::error::vortex_err; @@ -31,8 +33,8 @@ use crate::executor::CudaExecutionCtx; pub(crate) struct ZigZagExecutor; impl ZigZagExecutor { - fn try_specialize(array: ArrayRef) -> Option { - array.try_downcast::().ok() + fn try_specialize(array: &ArrayRef) -> bool { + ExactScalarFn::::matches(array) } } @@ -44,32 +46,36 @@ impl CudaExecute for ZigZagExecutor { array: ArrayRef, ctx: &mut CudaExecutionCtx, ) -> VortexResult { - let array = - Self::try_specialize(array).ok_or_else(|| vortex_err!("Expected ZigZagArray"))?; + if !Self::try_specialize(&array) { + return Err(vortex_err!("Expected ZigZag ScalarFnArray")); + } + + let sfn_view = array.as_::(); // The encoded array is unsigned, we decode to signed of the same width. - let encoded_ptype = array.encoded().dtype().as_ptype(); + let encoded = sfn_view.child_at(0); + let encoded_ptype = encoded.dtype().as_ptype(); let output_ptype = PType::try_from(array.dtype())?; match_each_unsigned_integer_ptype!(encoded_ptype, |U| { - decode_zigzag::(array, output_ptype, ctx).await + decode_zigzag::(encoded, output_ptype, ctx).await }) } } async fn decode_zigzag( - array: ZigZagArray, + encoded: &ArrayRef, output_ptype: PType, ctx: &mut CudaExecutionCtx, ) -> VortexResult where U: NativePType + DeviceRepr + Send + Sync + 'static, { - let array_len = array.encoded().len(); + let array_len = encoded.len(); vortex_ensure!(array_len > 0, "ZigZag array must not be empty"); // Execute child and copy to device - let canonical = array.encoded().clone().execute_cuda(ctx).await?; + let canonical = encoded.clone().execute_cuda(ctx).await?; let primitive = canonical.into_primitive(); let PrimitiveDataParts { buffer, validity, .. @@ -103,7 +109,7 @@ mod tests { use vortex::array::assert_arrays_eq; use vortex::array::validity::Validity::NonNullable; use vortex::buffer::Buffer; - use vortex::encodings::zigzag::ZigZag; + use vortex::encodings::zigzag::zigzag_try_new; use vortex::error::VortexExpect; use vortex::session::VortexSession; @@ -120,14 +126,14 @@ mod tests { // So encoded [0, 2, 4, 1, 3] should decode to [0, 1, 2, -1, -2] let encoded_data: Vec = vec![0, 2, 4, 1, 3]; - let zigzag_array = ZigZag::try_new( + let zigzag_array = zigzag_try_new( PrimitiveArray::new(Buffer::from(encoded_data), NonNullable).into_array(), )?; let cpu_result = zigzag_array.to_canonical()?; let gpu_result = ZigZagExecutor - .execute(zigzag_array.into_array(), &mut cuda_ctx) + .execute(zigzag_array, &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() diff --git a/vortex-cuda/src/lib.rs b/vortex-cuda/src/lib.rs index 274fe31b18f..cfebc476682 100644 --- a/vortex-cuda/src/lib.rs +++ b/vortex-cuda/src/lib.rs @@ -59,6 +59,7 @@ pub use session::CudaSession; pub use session::CudaSessionExt; pub use stream::VortexCudaStream; pub use stream_pool::VortexCudaStreamPool; +use vortex::array::ArrayId; use vortex::array::arrays::Constant; use vortex::array::arrays::Dict; use vortex::array::arrays::Filter; diff --git a/vortex-file/src/lib.rs b/vortex-file/src/lib.rs index 000df4a7bcb..e33fbdd12e9 100644 --- a/vortex-file/src/lib.rs +++ b/vortex-file/src/lib.rs @@ -110,6 +110,7 @@ pub use forever_constant::*; pub use open::*; pub use strategy::*; use vortex_array::arrays::Dict; +use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin; use vortex_array::session::ArraySessionExt; use vortex_bytebool::ByteBool; use vortex_fsst::FSST; @@ -163,7 +164,7 @@ pub fn register_default_encodings(session: &VortexSession) { arrays.register(FSST); arrays.register(Pco); arrays.register(Sparse); - arrays.register(ZigZag); + arrays.register(ScalarFnArrayPlugin::new(ZigZag)); #[cfg(feature = "zstd")] arrays.register(vortex_zstd::Zstd); #[cfg(all(feature = "zstd", feature = "unstable_encodings"))] diff --git a/vortex-file/src/strategy.rs b/vortex-file/src/strategy.rs index 826b7773a78..d42ddbe79cf 100644 --- a/vortex-file/src/strategy.rs +++ b/vortex-file/src/strategy.rs @@ -28,6 +28,7 @@ use vortex_array::arrays::Struct; use vortex_array::arrays::VarBin; use vortex_array::arrays::VarBinView; use vortex_array::dtype::FieldPath; +use vortex_array::scalar_fn::ScalarFnVTable; use vortex_btrblocks::BtrBlocksCompressorBuilder; use vortex_btrblocks::SchemeExt; use vortex_btrblocks::schemes::integer::IntDictScheme; diff --git a/vortex-python/src/arrays/compressed.rs b/vortex-python/src/arrays/compressed.rs index 96abfa4f1a1..2ae76a7d542 100644 --- a/vortex-python/src/arrays/compressed.rs +++ b/vortex-python/src/arrays/compressed.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use pyo3::prelude::*; -use vortex::array::IntoArray; use vortex::array::ToCanonical; use vortex::array::arrays::Dict; use vortex::encodings::alp::ALP; @@ -12,7 +11,6 @@ use vortex::encodings::fsst::FSST; use vortex::encodings::runend::RunEnd; use vortex::encodings::sequence::Sequence; use vortex::encodings::sparse::Sparse; -use vortex::encodings::zigzag::ZigZag; use vortex::encodings::zigzag::zigzag_encode; use crate::PyVortex; @@ -81,17 +79,13 @@ impl EncodingSubclass for PySparseArray { #[pyclass(name = "ZigZagArray", module = "vortex", extends=PyNativeArray, frozen)] pub(crate) struct PyZigZagArray; -impl EncodingSubclass for PyZigZagArray { - type VTable = ZigZag; -} - #[pymethods] impl PyZigZagArray { #[staticmethod] pub fn encode(array: PyArrayRef) -> PyVortexResult { - Ok(PyVortex( - zigzag_encode(array.inner().clone().to_primitive())?.into_array(), - )) + Ok(PyVortex(zigzag_encode( + array.inner().clone().to_primitive(), + )?)) } } diff --git a/vortex-python/src/arrays/native.rs b/vortex-python/src/arrays/native.rs index 2ff7945fc55..f6c8c9b16a2 100644 --- a/vortex-python/src/arrays/native.rs +++ b/vortex-python/src/arrays/native.rs @@ -31,7 +31,6 @@ use vortex::encodings::fsst::FSST; use vortex::encodings::runend::RunEnd; use vortex::encodings::sequence::Sequence; use vortex::encodings::sparse::Sparse; -use vortex::encodings::zigzag::ZigZag; use vortex::error::VortexExpect; use crate::arrays::PyArray; @@ -56,7 +55,6 @@ use crate::arrays::compressed::PyFsstArray; use crate::arrays::compressed::PyRunEndArray; use crate::arrays::compressed::PySequenceArray; use crate::arrays::compressed::PySparseArray; -use crate::arrays::compressed::PyZigZagArray; use crate::arrays::fastlanes::PyFastLanesBitPackedArray; use crate::arrays::fastlanes::PyFastLanesDeltaArray; use crate::arrays::fastlanes::PyFastLanesFoRArray; @@ -153,10 +151,6 @@ impl PyNativeArray { return Self::with_subclass(py, array, PyRunEndArray); } - if array.is::() { - return Self::with_subclass(py, array, PyZigZagArray); - } - if array.is::() { return Self::with_subclass(py, array, PyFastLanesBitPackedArray); } diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/zigzag.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/zigzag.rs index 2e1bb972f5d..e0325504903 100644 --- a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/zigzag.rs +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/zigzag.rs @@ -11,6 +11,7 @@ use vortex::array::validity::Validity; use vortex::encodings::zigzag::ZigZag; use vortex::encodings::zigzag::zigzag_encode; use vortex::error::VortexResult; +use vortex::scalar_fn::ScalarFnVTable; use super::N; use crate::fixtures::FlatLayoutFixture; @@ -27,7 +28,7 @@ impl FlatLayoutFixture for ZigZagFixture { } fn expected_encodings(&self) -> Vec { - vec![ZigZag::ID] + vec![ZigZag.id()] } fn build(&self) -> VortexResult { @@ -80,17 +81,17 @@ impl FlatLayoutFixture for ZigZagFixture { "head_tail_nulls", ]), vec![ - zigzag_encode(alternating_i32)?.into_array(), - zigzag_encode(small_i64)?.into_array(), - zigzag_encode(deltas_i32)?.into_array(), - zigzag_encode(small_i16)?.into_array(), - zigzag_encode(small_i8)?.into_array(), - zigzag_encode(nullable_zigzag)?.into_array(), - zigzag_encode(extremes_i32)?.into_array(), - zigzag_encode(zero_heavy_outliers)?.into_array(), - zigzag_encode(repeated_negative)?.into_array(), - zigzag_encode(zero_crossing)?.into_array(), - zigzag_encode(head_tail_nulls)?.into_array(), + zigzag_encode(alternating_i32)?, + zigzag_encode(small_i64)?, + zigzag_encode(deltas_i32)?, + zigzag_encode(small_i16)?, + zigzag_encode(small_i8)?, + zigzag_encode(nullable_zigzag)?, + zigzag_encode(extremes_i32)?, + zigzag_encode(zero_heavy_outliers)?, + zigzag_encode(repeated_negative)?, + zigzag_encode(zero_crossing)?, + zigzag_encode(head_tail_nulls)?, ], N, Validity::NonNullable, diff --git a/vortex/benches/single_encoding_throughput.rs b/vortex/benches/single_encoding_throughput.rs index 091b9a6747f..50d6986bf52 100644 --- a/vortex/benches/single_encoding_throughput.rs +++ b/vortex/benches/single_encoding_throughput.rs @@ -223,7 +223,7 @@ fn bench_zigzag_compress_i32(bencher: Bencher) { #[divan::bench(name = "zigzag_decompress_i32")] fn bench_zigzag_decompress_i32(bencher: Bencher) { let (_, int_array, _) = setup_primitive_arrays(); - let compressed = zigzag_encode(int_array).unwrap().into_array(); + let compressed = zigzag_encode(int_array).unwrap(); with_byte_counter(bencher, NUM_VALUES * 4) .with_inputs(|| &compressed)