Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix!: Fix NaN ordering to make NaNs compare greater than any other float, and equal to themselves #12721

Merged
merged 35 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
434a7d7
move TotalOrd to polars-utils
orlp Oct 16, 2023
a309b3b
add polars-compute
orlp Oct 16, 2023
eaed1e7
wip
orlp Oct 17, 2023
c1912d6
remove nans_compare_equal
orlp Nov 23, 2023
913a4f1
remove float comparison exceptions
orlp Nov 23, 2023
d37d9c0
mostly fix broadcasting comparisons
orlp Nov 24, 2023
01fb1bb
remove inconsistent null equality optimization
orlp Nov 27, 2023
33f09b9
add warning to always-null comparisons
orlp Nov 27, 2023
3c5e101
fmt
orlp Nov 27, 2023
14e43eb
fix warnings in tests
orlp Nov 27, 2023
5c4a943
fix _missing comparison ops
orlp Nov 27, 2023
467b7fc
clippy
orlp Nov 27, 2023
d7fb115
remove not_equal_and_validity
orlp Nov 28, 2023
5354923
add new string comparison kernels
orlp Nov 28, 2023
da58d14
define gt/ge in terms of lt/le
orlp Nov 28, 2023
74f4e3f
add _missing kernels
orlp Nov 28, 2023
b6359c4
add array support to comparison kernels
orlp Nov 28, 2023
a50997d
fmt/clippy
orlp Nov 28, 2023
5a2c321
add boolean comparison kernels
orlp Nov 29, 2023
06b9426
expand comparison tests
orlp Nov 29, 2023
80a12d9
fix test
orlp Nov 29, 2023
0b93380
user new string broadcast comparison kernels
orlp Nov 29, 2023
ed030c3
remove old comparison kernels
orlp Nov 29, 2023
ba409a4
clippy
orlp Nov 29, 2023
ea99583
fix bad/outdated tests
orlp Nov 29, 2023
9013166
fix trait bounds
orlp Nov 29, 2023
3c63fec
fix conditional import
orlp Nov 29, 2023
cb4da95
fix another bad test
orlp Nov 29, 2023
1bd4957
fix failing doctest
orlp Nov 29, 2023
ba8c5e2
address review comments
orlp Nov 30, 2023
d58dc7c
fix mypy
orlp Nov 30, 2023
e2e8b85
fix incorrect bitcount
orlp Nov 30, 2023
3b00eb1
add missing inline
orlp Nov 30, 2023
733c634
add missing comment
orlp Nov 30, 2023
7a340e4
fmt
orlp Nov 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
address review comments
  • Loading branch information
orlp committed Nov 30, 2023
commit ba8c5e2f4e2a3d81e335ebf9b3b4ef84aa1c2894
7 changes: 1 addition & 6 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,7 @@ def _compare_to_non_df(
op: ComparisonOperator,
) -> DataFrame:
"""Compare a DataFrame with a non-DataFrame object."""
_warn_null_comparison(other)
if op == "eq":
return self.select(F.all() == other)
elif op == "neq":
Expand Down Expand Up @@ -1451,27 +1452,21 @@ def __bool__(self) -> NoReturn:
)

def __eq__(self, other: Any) -> DataFrame: # type: ignore[override]
_warn_null_comparison(other)
return self._comp(other, "eq")

def __ne__(self, other: Any) -> DataFrame: # type: ignore[override]
_warn_null_comparison(other)
return self._comp(other, "neq")

def __gt__(self, other: Any) -> DataFrame:
_warn_null_comparison(other)
return self._comp(other, "gt")

def __lt__(self, other: Any) -> DataFrame:
_warn_null_comparison(other)
return self._comp(other, "lt")

def __ge__(self, other: Any) -> DataFrame:
_warn_null_comparison(other)
return self._comp(other, "gt_eq")

def __le__(self, other: Any) -> DataFrame:
_warn_null_comparison(other)
return self._comp(other, "lt_eq")

def __getstate__(self) -> list[Series]:
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/testing/asserts/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def _assert_series_nan_values_match(left: Series, right: Series) -> None:
if nan_value_mismatch.any():
raise_assertion_error(
"Series",
"nan value mismatch - nans compare equal",
"nan value mismatch",
left.to_list(),
right.to_list(),
)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/utils/various.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def is_str_sequence(
def _warn_null_comparison(obj: Any) -> None:
if obj is None:
warnings.warn(
"comparisons with None always result in null, consider using .is_null() or .is_not_null()",
"Comparisons with None always result in null. Consider using `.is_null()` or `.is_not_null()`.",
UserWarning,
stacklevel=find_stacklevel(),
)
Expand Down
26 changes: 16 additions & 10 deletions py-polars/tests/unit/operations/test_comparison.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations

import math
import warnings
from typing import Any
from contextlib import nullcontext

import pytest

Expand Down Expand Up @@ -201,11 +201,11 @@ def reference_ordering_missing(lhs: Any, rhs: Any) -> str:
def verify_total_ordering(
lhs: Any, rhs: Any, dummy: Any, dtype: pl.PolarsDataType
) -> None:
assert dummy is not None
ref = reference_ordering_propagating(lhs, rhs)
refmiss = reference_ordering_missing(lhs, rhs)

# Add dummy variable so we don't broadcast or do full-null optimization.
assert dummy is not None
df = pl.DataFrame(
{"l": [lhs, dummy], "r": [rhs, dummy]}, schema={"l": dtype, "r": dtype}
)
Expand Down Expand Up @@ -241,13 +241,11 @@ def verify_total_ordering(
def verify_total_ordering_broadcast(
lhs: Any, rhs: Any, dummy: Any, dtype: pl.PolarsDataType
) -> None:
# We do want to test None comparisons.
warnings.filterwarnings("ignore", category=UserWarning)

ref = reference_ordering_propagating(lhs, rhs)
refmiss = reference_ordering_missing(lhs, rhs)

# Add dummy variable so we don't broadcast inherently.
assert dummy is not None
df = pl.DataFrame(
{"l": [lhs, dummy], "r": [rhs, dummy]}, schema={"l": dtype, "r": dtype}
)
Expand Down Expand Up @@ -310,8 +308,10 @@ def verify_total_ordering_broadcast(
def test_total_ordering_float_series(lhs: float | None, rhs: float | None) -> None:
verify_total_ordering(lhs, rhs, 0.0, pl.Float32)
verify_total_ordering(lhs, rhs, 0.0, pl.Float64)
verify_total_ordering_broadcast(lhs, rhs, 0.0, pl.Float32)
verify_total_ordering_broadcast(lhs, rhs, 0.0, pl.Float64)
context = pytest.warns(UserWarning) if rhs is None else nullcontext()
with context:
verify_total_ordering_broadcast(lhs, rhs, 0.0, pl.Float32)
verify_total_ordering_broadcast(lhs, rhs, 0.0, pl.Float64)


INTERESTING_STRING_VALUES = [
Expand All @@ -329,7 +329,9 @@ def test_total_ordering_float_series(lhs: float | None, rhs: float | None) -> No
@pytest.mark.parametrize("rhs", INTERESTING_STRING_VALUES)
def test_total_ordering_string_series(lhs: str | None, rhs: str | None) -> None:
verify_total_ordering(lhs, rhs, "", pl.Utf8)
verify_total_ordering_broadcast(lhs, rhs, "", pl.Utf8)
context = pytest.warns(UserWarning) if rhs is None else nullcontext()
with context:
verify_total_ordering_broadcast(lhs, rhs, "", pl.Utf8)


@pytest.mark.parametrize("str_lhs", INTERESTING_STRING_VALUES)
Expand All @@ -338,11 +340,15 @@ def test_total_ordering_binary_series(str_lhs: str | None, str_rhs: str | None)
lhs = None if str_lhs is None else str_lhs.encode("utf-8")
rhs = None if str_rhs is None else str_rhs.encode("utf-8")
verify_total_ordering(lhs, rhs, b"", pl.Binary)
verify_total_ordering_broadcast(lhs, rhs, b"", pl.Binary)
context = pytest.warns(UserWarning) if rhs is None else nullcontext()
with context:
verify_total_ordering_broadcast(lhs, rhs, b"", pl.Binary)


@pytest.mark.parametrize("lhs", [None, False, True])
@pytest.mark.parametrize("rhs", [None, False, True])
def test_total_ordering_bool_series(lhs: bool | None, rhs: bool | None) -> None:
verify_total_ordering(lhs, rhs, False, pl.Boolean)
verify_total_ordering_broadcast(lhs, rhs, False, pl.Boolean)
context = pytest.warns(UserWarning) if rhs is None else nullcontext()
with context:
verify_total_ordering_broadcast(lhs, rhs, False, pl.Boolean)