Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix!: Fix NaN ordering to make NaNs compare greater than any other float, and equal to themselves #12721

Merged
merged 35 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
434a7d7
move TotalOrd to polars-utils
orlp Oct 16, 2023
a309b3b
add polars-compute
orlp Oct 16, 2023
eaed1e7
wip
orlp Oct 17, 2023
c1912d6
remove nans_compare_equal
orlp Nov 23, 2023
913a4f1
remove float comparison exceptions
orlp Nov 23, 2023
d37d9c0
mostly fix broadcasting comparisons
orlp Nov 24, 2023
01fb1bb
remove inconsistent null equality optimization
orlp Nov 27, 2023
33f09b9
add warning to always-null comparisons
orlp Nov 27, 2023
3c5e101
fmt
orlp Nov 27, 2023
14e43eb
fix warnings in tests
orlp Nov 27, 2023
5c4a943
fix _missing comparison ops
orlp Nov 27, 2023
467b7fc
clippy
orlp Nov 27, 2023
d7fb115
remove not_equal_and_validity
orlp Nov 28, 2023
5354923
add new string comparison kernels
orlp Nov 28, 2023
da58d14
define gt/ge in terms of lt/le
orlp Nov 28, 2023
74f4e3f
add _missing kernels
orlp Nov 28, 2023
b6359c4
add array support to comparison kernels
orlp Nov 28, 2023
a50997d
fmt/clippy
orlp Nov 28, 2023
5a2c321
add boolean comparison kernels
orlp Nov 29, 2023
06b9426
expand comparison tests
orlp Nov 29, 2023
80a12d9
fix test
orlp Nov 29, 2023
0b93380
user new string broadcast comparison kernels
orlp Nov 29, 2023
ed030c3
remove old comparison kernels
orlp Nov 29, 2023
ba409a4
clippy
orlp Nov 29, 2023
ea99583
fix bad/outdated tests
orlp Nov 29, 2023
9013166
fix trait bounds
orlp Nov 29, 2023
3c63fec
fix conditional import
orlp Nov 29, 2023
cb4da95
fix another bad test
orlp Nov 29, 2023
1bd4957
fix failing doctest
orlp Nov 29, 2023
ba8c5e2
address review comments
orlp Nov 30, 2023
d58dc7c
fix mypy
orlp Nov 30, 2023
e2e8b85
fix incorrect bitcount
orlp Nov 30, 2023
3b00eb1
add missing inline
orlp Nov 30, 2023
733c634
add missing comment
orlp Nov 30, 2023
7a340e4
fmt
orlp Nov 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
mostly fix broadcasting comparisons
  • Loading branch information
orlp committed Nov 27, 2023
commit d37d9c0c427d5da83f6e6afe9d0011136ded2102
18 changes: 13 additions & 5 deletions crates/polars-compute/src/comparisons/mod.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
use arrow::bitmap::Bitmap;

// Low-level comparison kernel.
// Ignores validity (results for nulls are unspecified but initialized).
pub trait TotalOrdKernel : Sized {
type Scalar;

fn tot_eq_kernel(&self, other: &Self) -> Bitmap;
fn tot_ne_kernel(&self, other: &Self) -> Bitmap;
fn tot_lt_kernel(&self, other: &Self) -> Bitmap;
fn tot_le_kernel(&self, other: &Self) -> Bitmap;

fn tot_gt_kernel(&self, other: &Self) -> Bitmap {
other.tot_lt_kernel(self)
}

fn tot_ge_kernel(&self, other: &Self) -> Bitmap {
other.tot_le_kernel(self)
}

fn tot_eq_kernel(&self, other: &Self) -> Bitmap;
fn tot_ne_kernel(&self, other: &Self) -> Bitmap;

fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap;
}

trait NotSimd { }
Expand Down
58 changes: 51 additions & 7 deletions crates/polars-compute/src/comparisons/scalar.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
// fn to_bitmap(

use arrow::array::PrimitiveArray;
use arrow::bitmap::Bitmap;
use arrow::types::NativeType;
use polars_utils::total_ord::TotalOrd;

use super::{TotalOrdKernel, NotSimd};

impl<T: NativeType + NotSimd> TotalOrdKernel for PrimitiveArray<T> {
impl<T: NativeType + NotSimd + TotalOrd> TotalOrdKernel for PrimitiveArray<T> {
type Scalar = T;

fn tot_lt_kernel(&self, other: &Self) -> Bitmap {
assert!(self.len() == other.len());
self.values()
.iter()
.zip(other.values().iter())
.map(|(a, b)| a.tot_lt(b))
.map(|(l, r)| l.tot_lt(r))
.collect()
}

Expand All @@ -21,7 +22,7 @@ impl<T: NativeType + NotSimd> TotalOrdKernel for PrimitiveArray<T> {
self.values()
.iter()
.zip(other.values().iter())
.map(|(a, b)| a.tot_le(b))
.map(|(l, r)| l.tot_le(r))
.collect()
}

Expand All @@ -30,7 +31,7 @@ impl<T: NativeType + NotSimd> TotalOrdKernel for PrimitiveArray<T> {
self.values()
.iter()
.zip(other.values().iter())
.map(|(a, b)| a.tot_eq(b))
.map(|(l, r)| l.tot_eq(r))
.collect()
}

Expand All @@ -39,7 +40,50 @@ impl<T: NativeType + NotSimd> TotalOrdKernel for PrimitiveArray<T> {
self.values()
.iter()
.zip(other.values().iter())
.map(|(a, b)| a.tot_ne(b))
.map(|(l, r)| l.tot_ne(r))
.collect()
}

fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
self.values()
.iter()
.map(|l| l.tot_eq(other))
.collect()
}

fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
self.values()
.iter()
.map(|l| l.tot_ne(other))
.collect()
}

fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
self.values()
.iter()
.map(|l| l.tot_lt(other))
.collect()
}

fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
self.values()
.iter()
.map(|l| l.tot_le(other))
.collect()
}

fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
self.values()
.iter()
.map(|l| l.tot_gt(other))
.collect()
}

fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
self.values()
.iter()
.map(|l| l.tot_ge(other))
.collect()
}

}
167 changes: 150 additions & 17 deletions crates/polars-compute/src/comparisons/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use bytemuck::Pod;
use super::TotalOrdKernel;


fn apply_comp_kernel<const N: usize, M: Pod, T, F>(
fn apply_binary_kernel<const N: usize, M: Pod, T, F>(
lhs: &PrimitiveArray<T>,
rhs: &PrimitiveArray<T>,
mut f: F,
Expand Down Expand Up @@ -45,30 +45,105 @@ where
Bitmap::from_u8_vec(bytemuck::cast_vec(v), n)
}

fn apply_unary_kernel<const N: usize, M: Pod, T, F>(
arg: &PrimitiveArray<T>,
mut f: F,
) -> Bitmap
where
T: NativeType,
F: FnMut(&[T; N]) -> M,
{
assert!(std::mem::size_of::<M>() == N);
let n = arg.len();

let arg_buf = arg.values().as_slice();
let mut arg_chunks = arg_buf.chunks_exact(N);
let mut v = Vec::with_capacity(n.div_ceil(N));
v.extend(
arg_chunks.by_ref()
.map(|l| unsafe {
f(l.try_into().unwrap_unchecked())
})
);

if n % N > 0 {
let mut l: [T; N] = [T::zeroed(); N];
l.copy_from_slice(arg_chunks.remainder());
v.push(f(&l));
}

Bitmap::from_u8_vec(bytemuck::cast_vec(v), n)
}


macro_rules! impl_int_total_ord_kernel {
($T: ty, $width: literal, $mask: ty) => {
impl TotalOrdKernel for PrimitiveArray<$T> {
type Scalar = $T;

fn tot_eq_kernel(&self, other: &Self) -> Bitmap {
apply_binary_kernel::<$width, $mask, _, _>(self, other, |l, r| {
Simd::from(*l).simd_eq(Simd::from(*r)).to_bitmask()
})
}

fn tot_ne_kernel(&self, other: &Self) -> Bitmap {
apply_binary_kernel::<$width, $mask, _, _>(self, other, |l, r| {
Simd::from(*l).simd_ne(Simd::from(*r)).to_bitmask()
})
}

fn tot_lt_kernel(&self, other: &Self) -> Bitmap {
apply_comp_kernel::<$width, $mask, _, _>(self, other, |l, r| {
apply_binary_kernel::<$width, $mask, _, _>(self, other, |l, r| {
Simd::from(*l).simd_lt(Simd::from(*r)).to_bitmask()
})
}

fn tot_le_kernel(&self, other: &Self) -> Bitmap {
apply_comp_kernel::<$width, $mask, _, _>(self, other, |l, r| {
apply_binary_kernel::<$width, $mask, _, _>(self, other, |l, r| {
Simd::from(*l).simd_le(Simd::from(*r)).to_bitmask()
})
}

fn tot_eq_kernel(&self, other: &Self) -> Bitmap {
apply_comp_kernel::<$width, $mask, _, _>(self, other, |l, r| {
Simd::from(*l).simd_eq(Simd::from(*r)).to_bitmask()
fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let r = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
Simd::from(*l).simd_eq(r).to_bitmask()
})
}

fn tot_ne_kernel(&self, other: &Self) -> Bitmap {
apply_comp_kernel::<$width, $mask, _, _>(self, other, |l, r| {
Simd::from(*l).simd_ne(Simd::from(*r)).to_bitmask()
fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let r = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
Simd::from(*l).simd_ne(r).to_bitmask()
})
}

fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let r = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
Simd::from(*l).simd_lt(r).to_bitmask()
})
}

fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let r = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
Simd::from(*l).simd_le(r).to_bitmask()
})
}

fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let r = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
Simd::from(*l).simd_gt(r).to_bitmask()
})
}

fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let r = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
Simd::from(*l).simd_ge(r).to_bitmask()
})
}
}
Expand All @@ -78,8 +153,30 @@ macro_rules! impl_int_total_ord_kernel {
macro_rules! impl_float_total_ord_kernel {
($T: ty, $width: literal, $mask: ty) => {
impl TotalOrdKernel for PrimitiveArray<$T> {
type Scalar = $T;

fn tot_eq_kernel(&self, other: &Self) -> Bitmap {
apply_binary_kernel::<$width, $mask, _, _>(self, other, |l, r| {
let ls = Simd::from(*l);
let rs = Simd::from(*r);
let lhs_is_nan = ls.simd_ne(ls);
let rhs_is_nan = rs.simd_ne(rs);
((lhs_is_nan & rhs_is_nan) | ls.simd_eq(rs)).to_bitmask()
})
}

fn tot_ne_kernel(&self, other: &Self) -> Bitmap {
apply_binary_kernel::<$width, $mask, _, _>(self, other, |l, r| {
let ls = Simd::from(*l);
let rs = Simd::from(*r);
let lhs_is_nan = ls.simd_ne(ls);
let rhs_is_nan = rs.simd_ne(rs);
(!((lhs_is_nan & rhs_is_nan) | ls.simd_eq(rs))).to_bitmask()
})
}

fn tot_lt_kernel(&self, other: &Self) -> Bitmap {
apply_comp_kernel::<$width, $mask, _, _>(self, other, |l, r| {
apply_binary_kernel::<$width, $mask, _, _>(self, other, |l, r| {
let ls = Simd::from(*l);
let rs = Simd::from(*r);
let lhs_is_nan = ls.simd_ne(ls);
Expand All @@ -88,33 +185,69 @@ macro_rules! impl_float_total_ord_kernel {
}

fn tot_le_kernel(&self, other: &Self) -> Bitmap {
apply_comp_kernel::<$width, $mask, _, _>(self, other, |l, r| {
apply_binary_kernel::<$width, $mask, _, _>(self, other, |l, r| {
let ls = Simd::from(*l);
let rs = Simd::from(*r);
let rhs_is_nan = rs.simd_ne(rs);
(rhs_is_nan | ls.simd_le(rs)).to_bitmask()
})
}

fn tot_eq_kernel(&self, other: &Self) -> Bitmap {
apply_comp_kernel::<$width, $mask, _, _>(self, other, |l, r| {
fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let rs = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
let ls = Simd::from(*l);
let rs = Simd::from(*r);
let lhs_is_nan = ls.simd_ne(ls);
let rhs_is_nan = rs.simd_ne(rs);
((lhs_is_nan & rhs_is_nan) | ls.simd_eq(rs)).to_bitmask()
})
}

fn tot_ne_kernel(&self, other: &Self) -> Bitmap {
apply_comp_kernel::<$width, $mask, _, _>(self, other, |l, r| {
fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let rs = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
let ls = Simd::from(*l);
let rs = Simd::from(*r);
let lhs_is_nan = ls.simd_ne(ls);
let rhs_is_nan = rs.simd_ne(rs);
(!((lhs_is_nan & rhs_is_nan) | ls.simd_eq(rs))).to_bitmask()
})
}

fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let rs = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
let ls = Simd::from(*l);
let lhs_is_nan = ls.simd_ne(ls);
(!(lhs_is_nan | ls.simd_ge(rs))).to_bitmask()
})
}

fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let rs = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
let ls = Simd::from(*l);
let rhs_is_nan = rs.simd_ne(rs);
(rhs_is_nan | ls.simd_le(rs)).to_bitmask()
})
}

fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let rs = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
let ls = Simd::from(*l);
let rhs_is_nan = rs.simd_ne(rs);
(!(rhs_is_nan | rs.simd_ge(ls))).to_bitmask()
})
}

fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
let rs = Simd::splat(*other);
apply_unary_kernel::<$width, $mask, _, _>(self, |l| {
let ls = Simd::from(*l);
let lhs_is_nan = ls.simd_ne(ls);
(lhs_is_nan | rs.simd_le(ls)).to_bitmask()
})
}
}
}
}
Expand Down
Loading