Skip to content

Commit

Permalink
refactor(python,rust): More cleanup for arange (pola-rs#9681)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Jul 5, 2023
1 parent 1f200d7 commit e577509
Show file tree
Hide file tree
Showing 16 changed files with 79 additions and 73 deletions.
4 changes: 2 additions & 2 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ rank = ["polars-core/rank", "polars-lazy/rank"]
diff = ["polars-core/diff", "polars-lazy/diff", "polars-ops/diff"]
pct_change = ["polars-core/pct_change", "polars-lazy/pct_change"]
moment = ["polars-core/moment", "polars-lazy/moment", "polars-ops/moment"]
arange = ["polars-lazy/arange"]
range = ["polars-lazy/range"]
true_div = ["polars-lazy/true_div"]
diagonal_concat = ["polars-core/diagonal_concat", "polars-lazy/diagonal_concat"]
horizontal_concat = ["polars-core/horizontal_concat"]
Expand Down Expand Up @@ -287,7 +287,7 @@ docs-selection = [
"interpolate",
"diff",
"rank",
"arange",
"range",
"diagonal_concat",
"horizontal_concat",
"abs",
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ is_unique = ["polars-plan/is_unique"]
cross_join = ["polars-plan/cross_join", "polars-pipe/cross_join", "polars-ops/cross_join"]
asof_join = ["polars-plan/asof_join", "polars-time"]
concat_str = ["polars-plan/concat_str"]
arange = ["polars-plan/arange"]
range = ["polars-plan/range"]
mode = ["polars-plan/mode"]
cum_agg = ["polars-plan/cum_agg"]
interpolate = ["polars-plan/interpolate"]
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ is_unique = ["polars-ops/is_unique"]
cross_join = ["polars-core/cross_join"]
asof_join = ["polars-core/asof_join", "polars-time", "polars-ops/asof_join"]
concat_str = ["polars-core/concat_str"]
arange = []
range = []
mode = ["polars-core/mode"]
cum_agg = ["polars-core/cum_agg"]
interpolate = ["polars-ops/interpolate"]
Expand Down
12 changes: 6 additions & 6 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ mod nan;
mod pow;
#[cfg(feature = "random")]
mod random;
#[cfg(feature = "arange")]
#[cfg(feature = "range")]
mod range;
#[cfg(all(feature = "rolling_window", feature = "moment"))]
mod rolling;
Expand Down Expand Up @@ -75,7 +75,7 @@ pub use self::boolean::BooleanFunction;
pub(crate) use self::cat::CategoricalFunction;
#[cfg(feature = "temporal")]
pub(super) use self::datetime::TemporalFunction;
#[cfg(feature = "arange")]
#[cfg(feature = "range")]
pub(super) use self::range::RangeFunction;
#[cfg(feature = "strings")]
pub(crate) use self::strings::StringFunction;
Expand Down Expand Up @@ -103,7 +103,7 @@ pub enum FunctionExpr {
BinaryExpr(BinaryFunction),
#[cfg(feature = "temporal")]
TemporalExpr(TemporalFunction),
#[cfg(feature = "arange")]
#[cfg(feature = "range")]
Range(RangeFunction),
#[cfg(feature = "date_offset")]
DateOffset(polars_time::Duration),
Expand Down Expand Up @@ -229,7 +229,7 @@ impl Display for FunctionExpr {
BinaryExpr(b) => return write!(f, "{b}"),
#[cfg(feature = "temporal")]
TemporalExpr(fun) => return write!(f, "{fun}"),
#[cfg(feature = "arange")]
#[cfg(feature = "range")]
Range(func) => return write!(f, "{func}"),
#[cfg(feature = "date_offset")]
DateOffset(_) => "dt.offset_by",
Expand Down Expand Up @@ -415,7 +415,7 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
BinaryExpr(s) => s.into(),
#[cfg(feature = "temporal")]
TemporalExpr(func) => func.into(),
#[cfg(feature = "arange")]
#[cfg(feature = "range")]
Range(func) => func.into(),

#[cfg(feature = "date_offset")]
Expand Down Expand Up @@ -698,7 +698,7 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
}
}

#[cfg(feature = "arange")]
#[cfg(feature = "range")]
impl From<RangeFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
fn from(func: RangeFunction) -> Self {
use RangeFunction::*;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ impl FunctionExpr {
mapper.with_dtype(dtype)
}

#[cfg(feature = "arange")]
#[cfg(feature = "range")]
Range(fun) => {
use RangeFunction::*;
let field = match fun {
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-lazy/polars-plan/src/dsl/functions/index.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#[cfg(any(feature = "arange", feature = "arg_where"))]
#[cfg(any(feature = "range", feature = "arg_where"))]
use super::*;

/// Find the indexes that would sort these series in order of appearance.
/// That means that the first `Series` will be used to determine the ordering
/// until duplicates are found. Once duplicates are found, the next `Series` will
/// be used and so on.
#[cfg(feature = "arange")]
#[cfg(feature = "range")]
pub fn arg_sort_by<E: AsRef<[Expr]>>(by: E, descending: &[bool]) -> Expr {
let e = &by.as_ref()[0];
let name = expr_output_name(e).unwrap();
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-lazy/polars-plan/src/dsl/functions/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use super::*;
/// Create list entries that are range arrays
/// - if `start` and `end` are a column, every element will expand into an array in a list column.
/// - if `start` and `end` are literals the output will be of `Int64`.
#[cfg(feature = "arange")]
#[cfg(feature = "range")]
pub fn arange(start: Expr, end: Expr, step: i64) -> Expr {
let input = vec![start, end];

Expand All @@ -17,7 +17,7 @@ pub fn arange(start: Expr, end: Expr, step: i64) -> Expr {
}
}

#[cfg(feature = "arange")]
#[cfg(feature = "range")]
/// Generate a range of integers.
pub fn int_range(start: Expr, end: Expr, step: i64) -> Expr {
let input = vec![start, end];
Expand All @@ -32,7 +32,7 @@ pub fn int_range(start: Expr, end: Expr, step: i64) -> Expr {
}
}

#[cfg(feature = "arange")]
#[cfg(feature = "range")]
/// Generate a range of integers for each row of the input columns.
pub fn int_ranges(start: Expr, end: Expr, step: i64) -> Expr {
let input = vec![start, end];
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-lazy/src/tests/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use polars_core::frame::explode::MeltArgs;
use polars_core::series::ops::NullBehavior;

use super::*;
#[cfg(feature = "arange")]
#[cfg(feature = "range")]
use crate::dsl::arg_sort_by;

#[test]
Expand Down Expand Up @@ -1017,7 +1017,7 @@ fn test_groupby_cumsum() -> PolarsResult<()> {
}

#[test]
#[cfg(feature = "arange")]
#[cfg(feature = "range")]
fn test_arg_sort_multiple() -> PolarsResult<()> {
let df = df![
"int" => [1, 2, 3, 1, 2],
Expand Down
2 changes: 1 addition & 1 deletion polars/tests/it/lazy/expressions/apply.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::*;

#[test]
#[cfg(feature = "arange")]
#[cfg(feature = "range")]
fn test_arange_agg() -> PolarsResult<()> {
let df = df![
"x" => [5, 5, 4, 4, 2, 2]
Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ features = [
"rank",
"diff",
"moment",
"arange",
"range",
"true_div",
"dtype-categorical",
"diagonal_concat",
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/dataframe/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
It is better to implement this with an expression:
>>> df.filter(
... pl.arange(0, pl.count()).shuffle().over("color") < 2
... pl.int_range(0, pl.count()).shuffle().over("color") < 2
... ) # doctest: +IGNORE_RESULT
"""
Expand Down Expand Up @@ -931,7 +931,7 @@ def apply(
>>> (
... df.lazy()
... .filter(pl.arange(0, pl.count()).shuffle().over("color") < 2)
... .filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2)
... .collect()
... ) # doctest: +IGNORE_RESULT
Expand Down Expand Up @@ -1136,7 +1136,7 @@ def apply(
>>> (
... df.lazy()
... .filter(pl.arange(0, pl.count()).shuffle().over("color") < 2)
... .filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2)
... .collect()
... ) # doctest: +IGNORE_RESULT
Expand Down
40 changes: 18 additions & 22 deletions py-polars/polars/functions/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ def arange(

@overload
def arange(
start: int | Expr | Series,
end: int | Expr | Series,
start: int | IntoExpr,
end: int | IntoExpr,
step: int = ...,
*,
dtype: PolarsDataType | None = ...,
Expand All @@ -60,8 +60,8 @@ def arange(

@overload
def arange(
start: int | Expr | Series,
end: int | Expr | Series,
start: int | IntoExpr,
end: int | IntoExpr,
step: int = ...,
*,
dtype: PolarsDataType | None = ...,
Expand All @@ -72,8 +72,8 @@ def arange(

@deprecated_alias(low="start", high="end")
def arange(
start: int | Expr | Series,
end: int | Expr | Series,
start: int | IntoExpr,
end: int | IntoExpr,
step: int = 1,
*,
dtype: PolarsDataType | None = None,
Expand Down Expand Up @@ -101,8 +101,6 @@ def arange(
Examples
--------
Generate a single range.
>>> pl.arange(0, 3, eager=True)
shape: (3,)
Series: 'arange' [i64]
Expand All @@ -112,21 +110,19 @@ def arange(
2
]
Generate a range for each row of the input columns.
>>> df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
>>> df.select(pl.arange(pl.col("a"), pl.col("b")))
shape: (2, 1)
┌───────────┐
│ arange │
│ --- │
│ list[i64] │
╞═══════════╡
│ [1, 2] │
│ [2, 3] │
└───────────┘
"""
# This check is not water-proof, but we cannot check for literal expressions here
if not (isinstance(start, int) and isinstance(end, int)):
warnings.warn(
" `arange` has been replaced by two new functions:"
" `int_range` for generating a single range,"
" and `int_ranges` for generating a list column with multiple ranges."
" `arange` will remain available as an alias for `int_range`, which means its behaviour will change."
" To silence this warning, use either of the new functions.",
DeprecationWarning,
stacklevel=find_stacklevel(),
)

start = parse_as_expression(start)
end = parse_as_expression(end)
result = wrap_expr(plr.arange(start, end, step))
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/lazyframe/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def apply(
>>> (
... df.lazy()
... .filter(pl.arange(0, pl.count()).shuffle().over("color") < 2)
... .filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2)
... .collect()
... ) # doctest: +IGNORE_RESULT
Expand Down
46 changes: 29 additions & 17 deletions py-polars/tests/unit/functions/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,32 +33,44 @@ def test_arange_decreasing() -> None:

def test_arange_expr() -> None:
df = pl.DataFrame({"a": ["foobar", "barfoo"]})
out = df.select([pl.arange(0, pl.col("a").count() * 10)])
out = df.select(pl.int_range(0, pl.col("a").count() * 10))
assert out.shape == (20, 1)
assert out.to_series(0)[-1] == 19

# eager arange
out2 = pl.arange(0, 10, 2, eager=True)
assert out2.to_list() == [0, 2, 4, 6, 8]

out3 = pl.arange(pl.Series([0, 19]), pl.Series([3, 39]), step=2, eager=True)
assert out3.dtype == pl.List
assert out3[0].to_list() == [0, 2]

df = pl.DataFrame({"start": [1, 2, 3, 5, 5, 5], "stop": [8, 3, 12, 8, 8, 8]})
def test_arange_deprecated() -> None:
df = pl.DataFrame(
{
"start": [1, 2, 3, 5, 5, 5],
"stop": [8, 3, 12, 8, 8, 8],
}
)

assert df.select(pl.arange(pl.lit(1), pl.col("stop") + 1).alias("test")).to_dict(
False
) == {
"test": [
[1, 2, 3, 4, 5, 6, 7, 8],
[1, 2, 3],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
[1, 2, 3, 4, 5, 6, 7, 8],
[1, 2, 3, 4, 5, 6, 7, 8],
[1, 2, 3, 4, 5, 6, 7, 8],
]
}
with pytest.deprecated_call():
result = df.select(pl.arange(pl.lit(1), pl.col("stop") + 1).alias("test"))

expected = pl.DataFrame(
{
"test": [
[1, 2, 3, 4, 5, 6, 7, 8],
[1, 2, 3],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
[1, 2, 3, 4, 5, 6, 7, 8],
[1, 2, 3, 4, 5, 6, 7, 8],
[1, 2, 3, 4, 5, 6, 7, 8],
]
}
)
assert_frame_equal(result, expected)

with pytest.deprecated_call():
result_s = pl.arange(pl.Series([0, 19]), pl.Series([3, 39]), step=2, eager=True)
assert result_s.dtype == pl.List
assert result_s[0].to_list() == [0, 2]


def test_arange_name() -> None:
Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/operations/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,13 +219,13 @@ def test_rolling_extrema() -> None:
df = (
pl.DataFrame(
{
"col1": pl.arange(0, 7, eager=True),
"col2": pl.arange(0, 7, eager=True).reverse(),
"col1": pl.int_range(0, 7, eager=True),
"col2": pl.int_range(0, 7, eager=True).reverse(),
}
)
).with_columns(
[
pl.when(pl.arange(0, pl.count(), eager=False) < 2)
pl.when(pl.int_range(0, pl.count(), eager=False) < 2)
.then(None)
.otherwise(pl.all())
.suffix("_nulls")
Expand Down
12 changes: 5 additions & 7 deletions py-polars/tests/unit/operations/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,20 +116,18 @@ def test_window_function_cache() -> None:
assert out["values_rev"].to_list() == [1, 0, 4, 3, 2]


def test_arange_no_rows() -> None:
def test_window_range_no_rows() -> None:
df = pl.DataFrame({"x": [5, 5, 4, 4, 2, 2]})
expr = pl.arange(0, pl.count()).over("x")
expr = pl.int_range(0, pl.count()).over("x")
out = df.with_columns(expr)
assert_frame_equal(
out, pl.DataFrame({"x": [5, 5, 4, 4, 2, 2], "arange": [0, 1, 0, 1, 0, 1]})
out, pl.DataFrame({"x": [5, 5, 4, 4, 2, 2], "int": [0, 1, 0, 1, 0, 1]})
)

df = pl.DataFrame({"x": []})
out = df.with_columns(expr)
print(out)
expected = pl.DataFrame(
{"x": [], "arange": []}, schema={"x": pl.Float32, "arange": pl.Int64}
)

expected = pl.DataFrame(schema={"x": pl.Float32, "int": pl.Int64})
assert_frame_equal(out, expected)


Expand Down

0 comments on commit e577509

Please sign in to comment.