Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(python): Switch over some of the custom Python date/time conversions to native PyO3 conversions #16203

Merged
merged 9 commits into from
May 14, 2024
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ ndarray = { workspace = true }
num-traits = { workspace = true }
numpy = { version = "0.21", default-features = false }
once_cell = { workspace = true }
pyo3 = { workspace = true, features = ["abi3-py38", "extension-module", "multiple-pymethods"] }
pyo3 = { workspace = true, features = ["abi3-py38", "chrono", "extension-module", "multiple-pymethods"] }
pyo3-built = { version = "0.5", optional = true }
recursive = { workspace = true }
serde_json = { workspace = true, optional = true }
Expand Down
99 changes: 47 additions & 52 deletions py-polars/src/conversion/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,17 @@ use std::borrow::Cow;
use polars::chunked_array::object::PolarsObjectSafe;
use polars::datatypes::{DataType, Field, OwnedObject, PlHashMap, TimeUnit};
use polars::prelude::{AnyValue, Series};
use polars_core::export::chrono::{NaiveDate, NaiveTime, TimeDelta, Timelike};
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
use polars_core::utils::arrow::temporal_conversions::date32_to_date;
use pyo3::exceptions::{PyOverflowError, PyTypeError};
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PySequence, PyString, PyTuple};

use super::datetime::{
elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, timestamp_to_naive_datetime,
};
use super::{decimal_to_digits, struct_dict, ObjectValue, Wrap};
use crate::error::PyPolarsErr;
use crate::py_modules::{SERIES, UTILS};
Expand Down Expand Up @@ -59,26 +64,32 @@ pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject {
s.into_py(py)
},
AnyValue::Date(v) => {
let convert = utils.getattr(intern!(py, "to_py_date")).unwrap();
convert.call1((v,)).unwrap().into_py(py)
let date = date32_to_date(v);
date.into_py(py)
},
AnyValue::Datetime(v, time_unit, time_zone) => {
let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap();
let time_unit = time_unit.to_ascii();
convert
.call1((v, time_unit, time_zone.as_ref().map(|s| s.as_str())))
.unwrap()
.into_py(py)
if let Some(time_zone) = time_zone {
// When https://github.com/pola-rs/polars/issues/16199 is
// implemented, we'll switch to something like:
//
// let tz: chrono_tz::Tz = time_zone.parse().unwrap();
// let datetime = tz.from_local_datetime(&naive_datetime).earliest().unwrap();
// datetime.into_py(py)
let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap();
let time_unit = time_unit.to_ascii();
convert
.call1((v, time_unit, time_zone.as_str()))
.unwrap()
.into_py(py)
} else {
timestamp_to_naive_datetime(v, time_unit).into_py(py)
}
},
AnyValue::Duration(v, time_unit) => {
let convert = utils.getattr(intern!(py, "to_py_timedelta")).unwrap();
let time_unit = time_unit.to_ascii();
convert.call1((v, time_unit)).unwrap().into_py(py)
},
AnyValue::Time(v) => {
let convert = utils.getattr(intern!(py, "to_py_time")).unwrap();
convert.call1((v,)).unwrap().into_py(py)
let time_delta = elapsed_offset_to_timedelta(v, time_unit);
time_delta.into_py(py)
},
AnyValue::Time(v) => nanos_since_midnight_to_naivetime(v).into_py(py),
AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(),
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(py, av._iter_struct_av(), flds),
AnyValue::StructOwned(payload) => struct_dict(py, payload.0.into_iter(), &payload.1),
Expand Down Expand Up @@ -176,19 +187,16 @@ pub(crate) fn py_object_to_any_value<'py>(
}

fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Python::with_gil(|py| {
let date = UTILS
.bind(py)
.getattr(intern!(py, "date_to_int"))
.unwrap()
.call1((ob,))
.unwrap();
let v = date.extract::<i32>().unwrap();
Ok(AnyValue::Date(v))
})
// unwrap() isn't yet const safe.
const UNIX_EPOCH: Option<NaiveDate> = NaiveDate::from_ymd_opt(1970, 1, 1);
let date = ob.extract::<NaiveDate>()?;
let elapsed = date.signed_duration_since(UNIX_EPOCH.unwrap());
Ok(AnyValue::Date(elapsed.num_days() as i32))
}

fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
// Probably needs to wait for
// https://github.com/pola-rs/polars/issues/16199 to do it a faster way.
Python::with_gil(|py| {
let date = UTILS
.bind(py)
Expand All @@ -202,36 +210,23 @@ pub(crate) fn py_object_to_any_value<'py>(
}

fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Python::with_gil(|py| {
let f = UTILS
.bind(py)
.getattr(intern!(py, "timedelta_to_int"))
.unwrap();
let py_int = f.call1((ob, intern!(py, "us"))).unwrap();

let av = if let Ok(v) = py_int.extract::<i64>() {
AnyValue::Duration(v, TimeUnit::Microseconds)
} else {
// This should be faster than calling `timedelta_to_int` again with `"ms"` input.
let v_us = py_int.extract::<i128>().unwrap();
let v = (v_us / 1000) as i64;
AnyValue::Duration(v, TimeUnit::Milliseconds)
};
Ok(av)
})
let timedelta = ob.extract::<TimeDelta>()?;
if let Some(micros) = timedelta.num_microseconds() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tried nanoseconds here, it broke the tests, so stuck to micro/milliseconds only.

Ok(AnyValue::Duration(micros, TimeUnit::Microseconds))
} else {
Ok(AnyValue::Duration(
timedelta.num_milliseconds(),
TimeUnit::Milliseconds,
))
}
}

fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Python::with_gil(|py| {
let time = UTILS
.bind(py)
.getattr(intern!(py, "time_to_int"))
.unwrap()
.call1((ob,))
.unwrap();
let v = time.extract::<i64>().unwrap();
Ok(AnyValue::Time(v))
})
let time = ob.extract::<NaiveTime>()?;

Ok(AnyValue::Time(
(time.num_seconds_from_midnight() as i64) * 1_000_000_000 + time.nanosecond() as i64,
))
}

fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Expand Down
61 changes: 34 additions & 27 deletions py-polars/src/conversion/chunked_array.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
use polars_core::export::chrono::NaiveTime;
use polars_core::utils::arrow::temporal_conversions::date32_to_date;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyList, PyTuple};

use super::datetime::{
elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, timestamp_to_naive_datetime,
};
use super::{decimal_to_digits, struct_dict};
use crate::prelude::*;
use crate::py_modules::UTILS;
Expand Down Expand Up @@ -43,56 +48,58 @@ impl ToPyObject for Wrap<&StructChunked> {

impl ToPyObject for Wrap<&DurationChunked> {
fn to_object(&self, py: Python) -> PyObject {
let utils = UTILS.bind(py);
let convert = utils.getattr(intern!(py, "to_py_timedelta")).unwrap();
let time_unit = self.0.time_unit().to_ascii();
let time_unit = self.0.time_unit();
let iter = self
.0
.iter()
.map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit)).unwrap()));
.map(|opt_v| opt_v.map(|v| elapsed_offset_to_timedelta(v, time_unit)));
PyList::new_bound(py, iter).into_py(py)
}
}

impl ToPyObject for Wrap<&DatetimeChunked> {
fn to_object(&self, py: Python) -> PyObject {
let utils = UTILS.bind(py);
let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap();
let time_unit = self.0.time_unit().to_ascii();
let time_zone = self.0.time_zone().to_object(py);
let iter = self
.0
.iter()
.map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit, &time_zone)).unwrap()));
PyList::new_bound(py, iter).into_py(py)
let time_zone = self.0.time_zone();
if time_zone.is_some() {
// Switch to more efficient code path in
// https://github.com/pola-rs/polars/issues/16199
let utils = UTILS.bind(py);
let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap();
let time_unit = self.0.time_unit().to_ascii();
let time_zone = time_zone.to_object(py);
let iter = self
.0
.iter()
.map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit, &time_zone)).unwrap()));
PyList::new_bound(py, iter).into_py(py)
} else {
let time_unit = self.0.time_unit();
let iter = self
.0
.iter()
.map(|opt_v| opt_v.map(|v| timestamp_to_naive_datetime(v, time_unit)));
PyList::new_bound(py, iter).into_py(py)
}
}
}

impl ToPyObject for Wrap<&TimeChunked> {
fn to_object(&self, py: Python) -> PyObject {
let iter = time_to_pyobject_iter(py, self.0);
let iter = time_to_pyobject_iter(self.0);
PyList::new_bound(py, iter).into_py(py)
}
}

pub(crate) fn time_to_pyobject_iter<'a>(
py: Python<'a>,
ca: &'a TimeChunked,
) -> impl ExactSizeIterator<Item = Option<Bound<'a, PyAny>>> {
let utils = UTILS.bind(py);
let convert = utils.getattr(intern!(py, "to_py_time")).unwrap().clone();
pub(crate) fn time_to_pyobject_iter(
ca: &TimeChunked,
) -> impl '_ + ExactSizeIterator<Item = Option<NaiveTime>> {
ca.0.iter()
.map(move |opt_v| opt_v.map(|v| convert.call1((v,)).unwrap()))
.map(move |opt_v| opt_v.map(nanos_since_midnight_to_naivetime))
}

impl ToPyObject for Wrap<&DateChunked> {
fn to_object(&self, py: Python) -> PyObject {
let utils = UTILS.bind(py);
let convert = utils.getattr(intern!(py, "to_py_date")).unwrap();
let iter = self
.0
.into_iter()
.map(|opt_v| opt_v.map(|v| convert.call1((v,)).unwrap()));
let iter = self.0.into_iter().map(|opt_v| opt_v.map(date32_to_date));
PyList::new_bound(py, iter).into_py(py)
}
}
Expand Down
31 changes: 31 additions & 0 deletions py-polars/src/conversion/datetime.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//! Utilities for converting dates, times, datetimes, and so on.

use polars::datatypes::TimeUnit;
use polars_core::export::chrono::{NaiveDateTime, NaiveTime, TimeDelta};

pub fn elapsed_offset_to_timedelta(elapsed: i64, time_unit: TimeUnit) -> TimeDelta {
let (in_second, nano_multiplier) = match time_unit {
TimeUnit::Nanoseconds => (1_000_000_000, 1),
TimeUnit::Microseconds => (1_000_000, 1_000),
TimeUnit::Milliseconds => (1_000, 1_000_000),
};
let mut elapsed_sec = elapsed / in_second;
let mut elapsed_nanos = nano_multiplier * (elapsed % in_second);
if elapsed_nanos < 0 {
// TimeDelta expects nanos to always be positive.
elapsed_sec -= 1;
elapsed_nanos += 1_000_000_000;
}
TimeDelta::new(elapsed_sec, elapsed_nanos as u32).unwrap()
}

/// Convert time-units-since-epoch to a more structured object.
pub fn timestamp_to_naive_datetime(since_epoch: i64, time_unit: TimeUnit) -> NaiveDateTime {
NaiveDateTime::UNIX_EPOCH + elapsed_offset_to_timedelta(since_epoch, time_unit)
}

/// Convert nanoseconds-since-midnight to a more structured object.
pub fn nanos_since_midnight_to_naivetime(nanos_since_midnight: i64) -> NaiveTime {
NaiveTime::from_hms_opt(0, 0, 0).unwrap()
+ elapsed_offset_to_timedelta(nanos_since_midnight, TimeUnit::Nanoseconds)
}
1 change: 1 addition & 0 deletions py-polars/src/conversion/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub(crate) mod any_value;
pub(crate) mod chunked_array;
mod datetime;
use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};

Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/series/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ fn series_to_numpy_with_copy(py: Python, s: &Series) -> PyResult<PyObject> {
},
Time => {
let ca = s.time().unwrap();
let values = time_to_pyobject_iter(py, ca).map(|v| v.into_py(py));
let values = time_to_pyobject_iter(ca).map(|v| v.into_py(py));
PyArray1::from_iter_bound(py, values).into_py(py)
},
String => {
Expand Down