Skip to content

Commit

Permalink
Create internal Python api (pola-rs#1823)
Browse files Browse the repository at this point in the history
  • Loading branch information
zundertj authored Nov 21, 2021
1 parent 7314dd2 commit 1b9f53b
Show file tree
Hide file tree
Showing 20 changed files with 552 additions and 545 deletions.
2 changes: 1 addition & 1 deletion py-polars/legacy/pypolars/functions.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from polars.functions import *

51 changes: 20 additions & 31 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,19 @@
# this is only useful for documentation
warnings.warn("polars binary missing!")

# mypy needs these imported explicitly
from polars.eager.frame import DataFrame, wrap_df
from polars.eager.series import Series, wrap_s
from polars.lazy.expr import Expr, wrap_expr
from polars.lazy.frame import LazyFrame, wrap_ldf
from polars.lazy.functions import (
from polars.internals.expr import Expr
from polars.internals.frame import DataFrame, wrap_df # TODO: remove need for wrap_df
from polars.internals.functions import (
arg_where,
concat,
date_range,
get_dummies,
repeat,
)
from polars.internals.lazy_frame import LazyFrame
from polars.internals.lazy_functions import _date as date
from polars.internals.lazy_functions import _datetime as datetime
from polars.internals.lazy_functions import (
all,
any,
apply,
Expand Down Expand Up @@ -48,35 +55,17 @@
std,
sum,
tail,
to_list,
var,
)
from polars.internals.lazy_functions import to_list
from polars.internals.lazy_functions import to_list as list
from polars.internals.lazy_functions import var
from polars.internals.series import Series, wrap_s # TODO: remove need for wrap_s
from polars.internals.whenthen import when

from . import cfg, convert, datatypes, eager, functions, io, lazy, string_cache, testing
from .cfg import *
# TODO: remove wildcard imports
from .convert import *
from .datatypes import *
from .eager import *
from .functions import *
from .io import *

# explicit imports make mypy happy
from .lazy import *
from .lazy import _date as date
from .lazy import _datetime as datetime
from .lazy import col, lit
from .lazy import to_list as list
from .string_cache import *

__all__ = (
convert.__all__
+ datatypes.__all__
+ eager.__all__
+ functions.__all__
+ io.__all__
+ lazy.__all__
+ string_cache.__all__
+ cfg.__all__
)
from .string_cache import StringCache

__version__ = version()
8 changes: 4 additions & 4 deletions py-polars/polars/_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from types import TracebackType
from typing import Dict, Iterable, Optional, Type

import polars as pl
from polars.datatypes import DTYPE_TO_FFINAME, Object


class Tag:
Expand Down Expand Up @@ -40,7 +40,7 @@ def __exit__(


class HTMLFormatter:
def __init__(self, df: "pl.DataFrame", max_cols: int = 75, max_rows: int = 40):
def __init__(self, df: "DataFrame", max_cols: int = 75, max_rows: int = 40): # type: ignore # noqa
self.df = df
self.elements: tp.List[str] = []
self.max_cols = max_cols
Expand Down Expand Up @@ -75,7 +75,7 @@ def write_header(self) -> None:
self.elements.append(col)
with Tag(self.elements, "tr"):
for dtype in self.df.dtypes:
ffi_name = pl.DTYPE_TO_FFINAME[dtype]
ffi_name = DTYPE_TO_FFINAME[dtype]
with Tag(self.elements, "td"):
self.elements.append(ffi_name)

Expand All @@ -94,7 +94,7 @@ def write_body(self) -> None:
self.elements.append("...")
else:
series = self.df[:, c]
if series.dtype == pl.Object:
if series.dtype == Object:
self.elements.append(f"{series[r]}")
else:
self.elements.append(f"{series._s.get_fmt(r)}")
Expand Down
8 changes: 4 additions & 4 deletions py-polars/polars/cfg.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import os
from typing import Type

import polars as pl

__all__ = [
"Config",
]

from polars.string_cache import toggle_string_cache


class Config:
"Configure polars"
Expand Down Expand Up @@ -73,13 +73,13 @@ def set_global_string_cache(cls) -> "Type[Config]":
"""
Turn on the global string cache
"""
pl.toggle_string_cache(True)
toggle_string_cache(True)
return cls

@classmethod
def unset_global_string_cache(cls) -> "Type[Config]":
"""
Turn off the global string cache
"""
pl.toggle_string_cache(False)
toggle_string_cache(False)
return cls
26 changes: 13 additions & 13 deletions py-polars/polars/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

import polars as pl
from polars.internals import DataFrame, Series

if TYPE_CHECKING:
import pandas as pd
Expand Down Expand Up @@ -30,7 +30,7 @@
def from_dict(
data: Dict[str, Sequence[Any]],
columns: Optional[Sequence[str]] = None,
) -> "pl.DataFrame":
) -> DataFrame:
"""
Construct a DataFrame from a dictionary of sequences.
Expand Down Expand Up @@ -64,14 +64,14 @@ def from_dict(
│ 2 ┆ 4 │
╰─────┴─────╯
"""
return pl.DataFrame._from_dict(data=data, columns=columns)
return DataFrame._from_dict(data=data, columns=columns)


def from_records(
data: Union[np.ndarray, Sequence[Sequence[Any]]],
columns: Optional[Sequence[str]] = None,
orient: Optional[str] = None,
) -> "pl.DataFrame":
) -> DataFrame:
"""
Construct a DataFrame from a numpy ndarray or sequence of sequences.
Expand Down Expand Up @@ -110,10 +110,10 @@ def from_records(
│ 3 ┆ 6 │
╰─────┴─────╯
"""
return pl.DataFrame._from_records(data, columns=columns, orient=orient)
return DataFrame._from_records(data, columns=columns, orient=orient)


def from_dicts(dicts: Sequence[Dict[str, Any]]) -> "pl.DataFrame":
def from_dicts(dicts: Sequence[Dict[str, Any]]) -> DataFrame:
"""
Construct a DataFrame from a sequence of dictionaries.
Expand Down Expand Up @@ -144,12 +144,12 @@ def from_dicts(dicts: Sequence[Dict[str, Any]]) -> "pl.DataFrame":
│ 3 ┆ 6 │
╰─────┴─────╯
"""
return pl.DataFrame._from_dicts(dicts)
return DataFrame._from_dicts(dicts)


def from_arrow(
a: Union["pa.Table", "pa.Array"], rechunk: bool = True
) -> Union["pl.DataFrame", "pl.Series"]:
) -> Union[DataFrame, Series]:
"""
Create a DataFrame or Series from an Arrow Table or Array.
Expand Down Expand Up @@ -203,9 +203,9 @@ def from_arrow(
if not _PYARROW_AVAILABLE:
raise ImportError("'pyarrow' is required when using from_arrow().")
if isinstance(a, pa.Table):
return pl.DataFrame._from_arrow(a, rechunk=rechunk)
return DataFrame._from_arrow(a, rechunk=rechunk)
elif isinstance(a, (pa.Array, pa.ChunkedArray)):
return pl.Series._from_arrow("", a)
return Series._from_arrow("", a)
else:
raise ValueError(f"Expected Arrow Table or Array, got {type(a)}.")

Expand All @@ -214,7 +214,7 @@ def from_pandas(
df: Union["pd.DataFrame", "pd.Series", "pd.DatetimeIndex"],
rechunk: bool = True,
nan_to_none: bool = True,
) -> Union["pl.Series", "pl.DataFrame"]:
) -> Union[DataFrame, Series]:
"""
Construct a Polars DataFrame or Series from a pandas DataFrame or Series.
Expand Down Expand Up @@ -270,8 +270,8 @@ def from_pandas(
raise ImportError("'pandas' is required when using from_pandas().") from e

if isinstance(df, (pd.Series, pd.DatetimeIndex)):
return pl.Series._from_pandas("", df, nan_to_none=nan_to_none)
return Series._from_pandas("", df, nan_to_none=nan_to_none)
elif isinstance(df, pd.DataFrame):
return pl.DataFrame._from_pandas(df, rechunk=rechunk, nan_to_none=nan_to_none)
return DataFrame._from_pandas(df, rechunk=rechunk, nan_to_none=nan_to_none)
else:
raise ValueError(f"Expected pandas DataFrame or Series, got {type(df)}.")
6 changes: 0 additions & 6 deletions py-polars/polars/eager/__init__.py

This file was deleted.

12 changes: 12 additions & 0 deletions py-polars/polars/internals/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# flake8: noqa
"""
The modules within `polars.internals` are interdependent. To prevent cyclical imports, they all import from each other
via this __init__ file using `import polars.internals as pli`. The imports below are being shared across this module.
"""
from .expr import Expr, _selection_to_pyexpr_list, expr_to_lit_or_expr, wrap_expr
from .frame import DataFrame, wrap_df
from .functions import arg_where, concat, date_range, get_dummies, repeat
from .lazy_frame import LazyFrame, wrap_ldf
from .lazy_functions import arange, argsort_by, col, concat_list, lit
from .series import Series, wrap_s
from .whenthen import when # used in expr.clip()
28 changes: 14 additions & 14 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

import polars as pl
from polars import internals as pli
from polars.datatypes import (
DataType,
Date,
Expand Down Expand Up @@ -47,7 +47,7 @@

def series_to_pyseries(
name: str,
values: "pl.Series",
values: "pli.Series",
) -> "PySeries":
"""
Construct a PySeries from a Polars Series.
Expand Down Expand Up @@ -110,9 +110,9 @@ def sequence_to_pyseries(
constructor = polars_type_to_constructor(dtype)
pyseries = constructor(name, values, strict)
if dtype == Date:
pyseries = pyseries.cast(str(pl.Date), True)
pyseries = pyseries.cast(str(Date), True)
elif dtype == Datetime:
pyseries = pyseries.cast(str(pl.Datetime), True)
pyseries = pyseries.cast(str(Datetime), True)
return pyseries

else:
Expand All @@ -126,7 +126,7 @@ def sequence_to_pyseries(
)
return arrow_to_pyseries(name, pa.array(values))

elif dtype_ == list or dtype_ == tuple or dtype_ == pl.Series:
elif dtype_ == list or dtype_ == tuple or dtype_ == pli.Series:
nested_value = _get_first_non_none(value)
nested_dtype = type(nested_value) if value is not None else float

Expand Down Expand Up @@ -230,7 +230,7 @@ def _handle_columns_arg(
return data
else:
if not data:
return [pl.Series(c, None).inner() for c in columns]
return [pli.Series(c, None).inner() for c in columns]
elif len(data) == len(columns):
for i, c in enumerate(columns):
data[i].rename(c)
Expand All @@ -246,7 +246,7 @@ def dict_to_pydf(
"""
Construct a PyDataFrame from a dictionary of sequences.
"""
data_series = [pl.Series(name, values).inner() for name, values in data.items()]
data_series = [pli.Series(name, values).inner() for name, values in data.items()]
data_series = _handle_columns_arg(data_series, columns=columns)
return PyDataFrame(data_series)

Expand All @@ -265,7 +265,7 @@ def numpy_to_pydf(
data_series = []

elif len(shape) == 1:
s = pl.Series("column_0", data).inner()
s = pli.Series("column_0", data).inner()
data_series = [s]

elif len(shape) == 2:
Expand All @@ -285,11 +285,11 @@ def numpy_to_pydf(

if orient == "row":
data_series = [
pl.Series(f"column_{i}", data[:, i]).inner() for i in range(shape[1])
pli.Series(f"column_{i}", data[:, i]).inner() for i in range(shape[1])
]
else:
data_series = [
pl.Series(f"column_{i}", data[i]).inner() for i in range(shape[0])
pli.Series(f"column_{i}", data[i]).inner() for i in range(shape[0])
]
else:
raise ValueError("A numpy array should not have more than two dimensions.")
Expand All @@ -311,7 +311,7 @@ def sequence_to_pydf(
if len(data) == 0:
data_series = []

elif isinstance(data[0], pl.Series):
elif isinstance(data[0], pli.Series):
data_series = []
for i, s in enumerate(data):
if not s.name: # TODO: Replace by `if s.name is None` once allowed
Expand All @@ -336,11 +336,11 @@ def sequence_to_pydf(
return pydf
else:
data_series = [
pl.Series(f"column_{i}", data[i]).inner() for i in range(len(data))
pli.Series(f"column_{i}", data[i]).inner() for i in range(len(data))
]

else:
s = pl.Series("column_0", data).inner()
s = pli.Series("column_0", data).inner()
data_series = [s]

data_series = _handle_columns_arg(data_series, columns=columns)
Expand Down Expand Up @@ -384,7 +384,7 @@ def arrow_to_pydf(


def series_to_pydf(
data: "pl.Series",
data: "pli.Series",
columns: Optional[Sequence[str]] = None,
) -> "PyDataFrame":
"""
Expand Down
Loading

0 comments on commit 1b9f53b

Please sign in to comment.