Skip to content

Commit

Permalink
fix!: nans should compare greater than any other float, and equal to …
Browse files Browse the repository at this point in the history
…themselves (pola-rs#12721)
  • Loading branch information
orlp authored Nov 30, 2023
1 parent 4c50e41 commit 8c4b392
Show file tree
Hide file tree
Showing 59 changed files with 1,514 additions and 3,065 deletions.
12 changes: 12 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ zstd = "0.13"
polars = { version = "0.35.4", path = "crates/polars", default-features = false }
polars-algo = { version = "0.35.4", path = "crates/polars-algo", default-features = false }
polars-core = { version = "0.35.4", path = "crates/polars-core", default-features = false }
polars-compute = { version = "0.35.4", path = "crates/polars-compute", default-features = false }
polars-error = { version = "0.35.4", path = "crates/polars-error", default-features = false }
polars-ffi = { version = "0.35.4", path = "crates/polars-ffi", default-features = false }
polars-io = { version = "0.35.4", path = "crates/polars-io", default-features = false }
Expand Down
19 changes: 19 additions & 0 deletions crates/polars-arrow/src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,25 @@ impl<O: Offset> BinaryArray<O> {
})
}

/// Creates a new [`BinaryArray`] without checking invariants.
///
/// # Safety
///
/// The invariants must be valid (see try_new).
pub unsafe fn new_unchecked(
data_type: ArrowDataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self {
data_type,
offsets,
values,
validity,
}
}

/// Creates a new [`BinaryArray`] from slices of `&[u8]`.
pub fn from_slice<T: AsRef<[u8]>, P: AsRef<[T]>>(slice: P) -> Self {
Self::from_trusted_len_values_iter(slice.as_ref().iter())
Expand Down
10 changes: 10 additions & 0 deletions crates/polars-arrow/src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,13 @@ impl Array for BooleanArray {
Box::new(self.clone().with_validity(validity))
}
}

impl From<Bitmap> for BooleanArray {
fn from(values: Bitmap) -> Self {
Self {
data_type: ArrowDataType::Boolean,
values,
validity: None,
}
}
}
1 change: 1 addition & 0 deletions crates/polars-arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ mod null;
mod primitive;
pub mod specification;
mod struct_;
mod total_ord;
mod union;
mod utf8;

Expand Down
9 changes: 9 additions & 0 deletions crates/polars-arrow/src/array/total_ord.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use polars_utils::total_ord::TotalEq;

use crate::array::Array;

impl TotalEq for Box<dyn Array> {
fn tot_eq(&self, other: &Self) -> bool {
self == other
}
}
13 changes: 13 additions & 0 deletions crates/polars-arrow/src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use either::Either;

use super::specification::try_check_utf8;
use super::{Array, GenericBinaryArray};
use crate::array::BinaryArray;
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::bitmap::Bitmap;
use crate::buffer::Buffer;
Expand Down Expand Up @@ -479,6 +480,18 @@ impl<O: Offset> Utf8Array<O> {
self.set_validity(Some(f(validity)))
}
}

// Convert this [`Utf8Array`] to a [`BinaryArray`].
pub fn to_binary(&self) -> BinaryArray<O> {
unsafe {
BinaryArray::new_unchecked(
BinaryArray::<O>::default_data_type(),
self.offsets.clone(),
self.values.clone(),
self.validity.clone(),
)
}
}
}

impl<O: Offset> Array for Utf8Array<O> {
Expand Down
18 changes: 14 additions & 4 deletions crates/polars-arrow/src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,10 +282,20 @@ impl Bitmap {
/// Initializes an new [`Bitmap`] filled with unset values.
#[inline]
pub fn new_zeroed(length: usize) -> Self {
// don't use `MutableBitmap::from_len_zeroed().into()`
// it triggers a bitcount
let bytes = vec![0; length.saturating_add(7) / 8];
unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, length) }
Self::new_with_value(false, length)
}

/// Initializes an new [`Bitmap`] filled with the given value.
#[inline]
pub fn new_with_value(value: bool, length: usize) -> Self {
// Don't use `MutableBitmap::from_len_zeroed().into()`, it triggers a bitcount.
let bytes = if value {
vec![u8::MAX; length.saturating_add(7) / 8]
} else {
vec![0; length.saturating_add(7) / 8]
};
let unset_bits = if value { 0 } else { length };
unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, unset_bits) }
}

/// Counts the nulls (unset bits) starting from `offset` bits and for `length` bits.
Expand Down
238 changes: 0 additions & 238 deletions crates/polars-arrow/src/compute/comparison/binary.rs

This file was deleted.

Loading

0 comments on commit 8c4b392

Please sign in to comment.