Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix!: Fix NaN ordering to make NaNs compare greater than any other float, and equal to themselves #12721

Merged
merged 35 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
434a7d7
move TotalOrd to polars-utils
orlp Oct 16, 2023
a309b3b
add polars-compute
orlp Oct 16, 2023
eaed1e7
wip
orlp Oct 17, 2023
c1912d6
remove nans_compare_equal
orlp Nov 23, 2023
913a4f1
remove float comparison exceptions
orlp Nov 23, 2023
d37d9c0
mostly fix broadcasting comparisons
orlp Nov 24, 2023
01fb1bb
remove inconsistent null equality optimization
orlp Nov 27, 2023
33f09b9
add warning to always-null comparisons
orlp Nov 27, 2023
3c5e101
fmt
orlp Nov 27, 2023
14e43eb
fix warnings in tests
orlp Nov 27, 2023
5c4a943
fix _missing comparison ops
orlp Nov 27, 2023
467b7fc
clippy
orlp Nov 27, 2023
d7fb115
remove not_equal_and_validity
orlp Nov 28, 2023
5354923
add new string comparison kernels
orlp Nov 28, 2023
da58d14
define gt/ge in terms of lt/le
orlp Nov 28, 2023
74f4e3f
add _missing kernels
orlp Nov 28, 2023
b6359c4
add array support to comparison kernels
orlp Nov 28, 2023
a50997d
fmt/clippy
orlp Nov 28, 2023
5a2c321
add boolean comparison kernels
orlp Nov 29, 2023
06b9426
expand comparison tests
orlp Nov 29, 2023
80a12d9
fix test
orlp Nov 29, 2023
0b93380
user new string broadcast comparison kernels
orlp Nov 29, 2023
ed030c3
remove old comparison kernels
orlp Nov 29, 2023
ba409a4
clippy
orlp Nov 29, 2023
ea99583
fix bad/outdated tests
orlp Nov 29, 2023
9013166
fix trait bounds
orlp Nov 29, 2023
3c63fec
fix conditional import
orlp Nov 29, 2023
cb4da95
fix another bad test
orlp Nov 29, 2023
1bd4957
fix failing doctest
orlp Nov 29, 2023
ba8c5e2
address review comments
orlp Nov 30, 2023
d58dc7c
fix mypy
orlp Nov 30, 2023
e2e8b85
fix incorrect bitcount
orlp Nov 30, 2023
3b00eb1
add missing inline
orlp Nov 30, 2023
733c634
add missing comment
orlp Nov 30, 2023
7a340e4
fmt
orlp Nov 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add _missing kernels
  • Loading branch information
orlp committed Nov 28, 2023
commit 74f4e3fcb44347db579950977fcfeee6f7859f8d
74 changes: 61 additions & 13 deletions crates/polars-compute/src/comparisons/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use arrow::bitmap::Bitmap;
use arrow::array::Array;
use arrow::bitmap::{Bitmap, self};

// Low-level comparison kernel.
// Ignores validity (results for nulls are unspecified but initialized).
pub trait TotalOrdKernel: Sized {
pub trait TotalOrdKernel: Sized + Array {
type Scalar: ?Sized;

// These kernels ignore validity entirely (results for nulls are unspecified
// but initialized).
fn tot_eq_kernel(&self, other: &Self) -> Bitmap;
fn tot_ne_kernel(&self, other: &Self) -> Bitmap;
fn tot_lt_kernel(&self, other: &Self) -> Bitmap;
Expand All @@ -16,28 +18,74 @@ pub trait TotalOrdKernel: Sized {
other.tot_le_kernel(self)
}

// These kernels ignore validity entirely (results for nulls are unspecified
// but initialized).
fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
}

trait NotSimd {}

// These kernels treat null as any other value equal to itself but unequal
// to anything else.
fn tot_eq_missing_kernel(&self, other: &Self) -> Bitmap {
let q = self.tot_eq_kernel(other);
let combined = match (self.validity(), other.validity()) {
(None, None) => q,
(None, Some(r)) => &q & r,
(Some(l), None) => &q & l,
(Some(l), Some(r)) => {
bitmap::ternary(&q, l, r, |q, l, r| (q & l & r) | !(l | r))
},
};
combined
}

fn tot_ne_missing_kernel(&self, other: &Self) -> Bitmap {
let q = self.tot_ne_kernel(other);
let combined = match (self.validity(), other.validity()) {
(None, None) => q,
(None, Some(r)) => &q | &!r,
(Some(l), None) => &q | &!l,
(Some(l), Some(r)) => {
bitmap::ternary(&q, l, r, |q, l, r| (q & l & r) | (l ^ r))
},
};
combined.into()
}

#[allow(unused)]
macro_rules! impl_not_simd {
($($T:ty,)*) => {
$(impl NotSimd for $T { })*
};
// These kernels treat null as any other value equal to itself but unequal
// to anything else. other is assumed to be non-null.
fn tot_eq_missing_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let q = self.tot_eq_kernel_broadcast(other);
if let Some(valid) = self.validity() {
bitmap::binary(&q, valid, |q, v| q & v).into()
} else {
q.into()
}
}

fn tot_ne_missing_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let q = self.tot_ne_kernel_broadcast(other);
if let Some(valid) = self.validity() {
bitmap::binary(&q, valid, |q, v| q | !v).into()
} else {
q.into()
}
}
}

// Trait to enable the scalar blanket implementation.
trait NotSimdPrimitive {}

#[cfg(not(feature = "simd"))]
impl<T> NotSimd for T {}
impl<T> NotSimdPrimitive for T {}

#[cfg(feature = "simd")]
impl_not_simd!(u128, i128,);
impl NotSimdPrimitive for u128 {}
#[cfg(feature = "simd")]
impl NotSimdPrimitive for i128 {}

mod scalar;

Expand Down
4 changes: 2 additions & 2 deletions crates/polars-compute/src/comparisons/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ use arrow::bitmap::Bitmap;
use arrow::types::NativeType;
use polars_utils::total_ord::{TotalOrd, TotalEq};

use super::{NotSimd, TotalOrdKernel};
use super::{NotSimdPrimitive, TotalOrdKernel};

impl<T: NativeType + NotSimd + TotalOrd> TotalOrdKernel for PrimitiveArray<T> {
impl<T: NativeType + NotSimdPrimitive + TotalOrd> TotalOrdKernel for PrimitiveArray<T> {
type Scalar = T;

fn tot_lt_kernel(&self, other: &Self) -> Bitmap {
Expand Down
74 changes: 5 additions & 69 deletions crates/polars-core/src/chunked_array/comparison/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ mod scalar;
use std::ops::{BitOr, Not};

use arrow::array::{BooleanArray, Utf8Array};
use arrow::bitmap::{self, MutableBitmap};
use arrow::bitmap::MutableBitmap;
use arrow::compute;
use arrow::compute::comparison;
use arrow::legacy::prelude::FromData;
Expand Down Expand Up @@ -60,23 +60,7 @@ where
rhs.is_null()
}
},
_ => arity::binary_mut_with_options(
self,
rhs,
|a, b| {
let q = a.tot_eq_kernel(b);
let combined = match (a.validity(), b.validity()) {
(None, None) => q,
(None, Some(r)) => &q & r,
(Some(l), None) => &q & l,
(Some(l), Some(r)) => {
bitmap::ternary(&q, l, r, |q, l, r| (q & l & r) | !(l | r))
},
};
combined.into()
},
"",
),
_ => arity::binary_mut_with_options(self, rhs, |a, b| a.tot_eq_missing_kernel(b).into(), ""),
}
}

Expand Down Expand Up @@ -118,23 +102,7 @@ where
rhs.is_not_null()
}
},
_ => arity::binary_mut_with_options(
self,
rhs,
|a, b| {
let q = a.tot_ne_kernel(b);
let combined = match (a.validity(), b.validity()) {
(None, None) => q,
(None, Some(r)) => &q | &!r,
(Some(l), None) => &q | &!l,
(Some(l), Some(r)) => {
bitmap::ternary(&q, l, r, |q, l, r| (q & l & r) | (l ^ r))
},
};
combined.into()
},
"",
),
_ => arity::binary_mut_with_options(self, rhs, |a, b| a.tot_ne_missing_kernel(b).into(), ""),
}
}

Expand Down Expand Up @@ -458,23 +426,7 @@ impl ChunkCompare<&BinaryChunked> for BinaryChunked {
rhs.is_null()
}
},
_ => arity::binary_mut_with_options(
self,
rhs,
|a, b| {
let q = a.tot_eq_kernel(b);
let combined = match (a.validity(), b.validity()) {
(None, None) => q,
(None, Some(r)) => &q & r,
(Some(l), None) => &q & l,
(Some(l), Some(r)) => {
bitmap::ternary(&q, l, r, |q, l, r| (q & l & r) | !(l | r))
},
};
combined.into()
},
"",
),
_ => arity::binary_mut_with_options(self, rhs, |a, b| a.tot_eq_missing_kernel(b).into(), ""),
}
}

Expand Down Expand Up @@ -516,23 +468,7 @@ impl ChunkCompare<&BinaryChunked> for BinaryChunked {
rhs.is_not_null()
}
},
_ => arity::binary_mut_with_options(
self,
rhs,
|a, b| {
let q = a.tot_ne_kernel(b);
let combined = match (a.validity(), b.validity()) {
(None, None) => q,
(None, Some(r)) => &q | &!r,
(Some(l), None) => &q | &!l,
(Some(l), Some(r)) => {
bitmap::ternary(&q, l, r, |q, l, r| (q & l & r) | (l ^ r))
},
};
combined.into()
},
"",
),
_ => arity::binary_mut_with_options(self, rhs, |a, b| a.tot_ne_missing_kernel(b).into(), ""),
}
}

Expand Down
20 changes: 2 additions & 18 deletions crates/polars-core/src/chunked_array/comparison/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,7 @@ where

fn equal_missing(&self, rhs: Rhs) -> BooleanChunked {
let rhs: T::Native = NumCast::from(rhs).unwrap();
let iter = self.downcast_iter().map(|arr| {
let eq = arr.tot_eq_kernel_broadcast(&rhs);
if let Some(valid) = arr.validity() {
bitmap::binary(&eq, valid, |e, v| e & v).into()
} else {
eq.into()
}
});
ChunkedArray::from_chunk_iter(self.name(), iter)
arity::unary_mut_with_options(self, |arr| arr.tot_eq_missing_kernel_broadcast(&rhs).into())
}

fn not_equal(&self, rhs: Rhs) -> BooleanChunked {
Expand All @@ -64,15 +56,7 @@ where

fn not_equal_missing(&self, rhs: Rhs) -> BooleanChunked {
let rhs: T::Native = NumCast::from(rhs).unwrap();
let iter = self.downcast_iter().map(|arr| {
let ne = arr.tot_ne_kernel_broadcast(&rhs);
if let Some(valid) = arr.validity() {
bitmap::binary(&ne, valid, |n, v| n | !v).into()
} else {
ne.into()
}
});
ChunkedArray::from_chunk_iter(self.name(), iter)
arity::unary_mut_with_options(self, |arr| arr.tot_ne_missing_kernel_broadcast(&rhs).into())
}

fn gt(&self, rhs: Rhs) -> BooleanChunked {
Expand Down
13 changes: 13 additions & 0 deletions crates/polars-core/src/chunked_array/ops/arity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,19 @@ where
ChunkedArray::from_chunk_iter(ca.name(), iter)
}

/// Applies a kernel that produces `Array` types.
#[inline]
pub fn unary_mut_with_options<T, V, F, Arr>(ca: &ChunkedArray<T>, mut op: F) -> ChunkedArray<V>
where
T: PolarsDataType,
V: PolarsDataType<Array = Arr>,
Arr: Array + StaticArray,
F: FnMut(&T::Array) -> Arr,
{
let iter = ca.downcast_iter().map(|arr| op(arr));
ChunkedArray::from_chunk_iter(ca.name(), iter)
}

#[inline]
pub fn binary_elementwise<T, U, V, F>(
lhs: &ChunkedArray<T>,
Expand Down
Loading