Skip to content

Commit

Permalink
GH1089 Migrate frame/series tests to new framework
Browse files Browse the repository at this point in the history
  • Loading branch information
loicdiridollou committed Jan 10, 2025
1 parent 3f42941 commit 514eefe
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 68 deletions.
2 changes: 1 addition & 1 deletion pandas-stubs/core/frame.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ if sys.version_info >= (3, 12):
@overload
def __getitem__(self, key: Scalar | tuple[Hashable, ...]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload]
@overload
def __getitem__( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload]
def __getitem__( # pyright: ignore[reportOverlappingOverload]
self, key: Iterable[Hashable] | slice
) -> Self: ...
@overload
Expand Down
10 changes: 6 additions & 4 deletions tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2409,7 +2409,7 @@ def test_indexslice_getitem():
ind = pd.Index([2, 3])
check(
assert_type(
pd.IndexSlice[ind, :], tuple["pd.Index[int]", "slice[None, None, None]"]
pd.IndexSlice[ind, :], tuple["pd.Index[int]", "slice[None, None, None]"] # type: ignore[type-arg]
),
tuple,
)
Expand Down Expand Up @@ -2451,10 +2451,10 @@ def test_sum_get_add() -> None:


def test_getset_untyped() -> None:
result: int = 10
df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [10, 20, 30, 40, 50]})
# Tests that Dataframe.__getitem__ needs to return untyped series.
result = df["x"].max()
# TODO this typecheck is actually bogus as the right part is "Unknown"
result: pd.Series = df["x"].max()


def test_getmultiindex_columns() -> None:
Expand Down Expand Up @@ -2965,7 +2965,9 @@ def sum_mean(x: pd.DataFrame) -> float:
pd.Series,
)

lfunc: Callable[[pd.DataFrame], float] = lambda x: x.sum().mean()
def lfunc(x: pd.DataFrame) -> float:
return x.sum().mean()

with pytest_warns_bounded(
DeprecationWarning,
"DataFrameGroupBy.apply operated on the grouping columns.",
Expand Down
174 changes: 111 additions & 63 deletions tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,20 +145,20 @@ def test_types_all() -> None:

def test_types_csv() -> None:
s = pd.Series(data=[1, 2, 3])
csv_df: str = s.to_csv()
check(assert_type(s.to_csv(), str), str)

with ensure_clean() as path:
s.to_csv(path)
s2: pd.DataFrame = pd.read_csv(path)
check(assert_type(pd.read_csv(path), pd.DataFrame), pd.DataFrame)

with ensure_clean() as path:
s.to_csv(Path(path))
s3: pd.DataFrame = pd.read_csv(Path(path))
check(assert_type(pd.read_csv(Path(path)), pd.DataFrame), pd.DataFrame)

# This keyword was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html
with ensure_clean() as path:
s.to_csv(path, errors="replace")
s4: pd.DataFrame = pd.read_csv(path)
check(assert_type(pd.read_csv(path), pd.DataFrame), pd.DataFrame)


def test_types_copy() -> None:
Expand Down Expand Up @@ -229,11 +229,11 @@ def test_types_boolean_indexing() -> None:
def test_types_df_to_df_comparison() -> None:
s = pd.Series(data={"col1": [1, 2]})
s2 = pd.Series(data={"col1": [3, 2]})
res_gt: pd.Series = s > s2
res_ge: pd.Series = s >= s2
res_lt: pd.Series = s < s2
res_le: pd.Series = s <= s2
res_e: pd.Series = s == s2
check(assert_type(s > s2, "pd.Series[bool]"), pd.Series, np.bool_)
check(assert_type(s >= s2, "pd.Series[bool]"), pd.Series, np.bool_)
check(assert_type(s < s2, "pd.Series[bool]"), pd.Series, np.bool_)
check(assert_type(s <= s2, "pd.Series[bool]"), pd.Series, np.bool_)
check(assert_type(s == s2, "pd.Series[bool]"), pd.Series, np.bool_)


def test_types_head_tail() -> None:
Expand Down Expand Up @@ -309,7 +309,11 @@ def test_types_drop_multilevel() -> None:
codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
)
s = pd.Series(data=[1, 2, 3, 4, 5, 6], index=index)
res: pd.Series = s.drop(labels="first", level=1)
check(
assert_type(s.drop(labels="first", level=1), "pd.Series[int]"),
pd.Series,
np.int64,
)


def test_types_drop_duplicates() -> None:
Expand Down Expand Up @@ -382,7 +386,11 @@ def test_types_sort_index() -> None:
# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html
def test_types_sort_index_with_key() -> None:
s = pd.Series([1, 2, 3], index=["a", "B", "c"])
res: pd.Series = s.sort_index(key=lambda k: k.str.lower())
check(
assert_type(s.sort_index(key=lambda k: k.str.lower()), "pd.Series[int]"),
pd.Series,
np.int64,
)


def test_types_sort_values() -> None:
Expand Down Expand Up @@ -412,7 +420,11 @@ def test_types_sort_values() -> None:
# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html
def test_types_sort_values_with_key() -> None:
s = pd.Series([1, 2, 3], index=[2, 3, 1])
res: pd.Series = s.sort_values(key=lambda k: -k)
check(
assert_type(s.sort_values(key=lambda k: -k), "pd.Series[int]"),
pd.Series,
np.integer,
)


def test_types_shift() -> None:
Expand All @@ -435,18 +447,32 @@ def test_types_rank() -> None:

def test_types_mean() -> None:
s = pd.Series([1, 2, 3, np.nan])
f1: float = s.mean()
s1: pd.Series = s.groupby(level=0).mean()
f2: float = s.mean(skipna=False)
f3: float = s.mean(numeric_only=False)
check(assert_type(s.mean(), float), float)
check(
assert_type(
s.groupby(level=0).mean(), # pyright: ignore[reportAssertTypeFailure]
"pd.Series[float]",
),
pd.Series,
float,
)
check(assert_type(s.mean(skipna=False), float), float)
check(assert_type(s.mean(numeric_only=False), float), float)


def test_types_median() -> None:
s = pd.Series([1, 2, 3, np.nan])
f1: float = s.median()
s1: pd.Series = s.groupby(level=0).median()
f2: float = s.median(skipna=False)
f3: float = s.median(numeric_only=False)
check(assert_type(s.median(), float), float)
check(
assert_type(
s.groupby(level=0).median(), # pyright: ignore[reportAssertTypeFailure]
"pd.Series[float]",
),
pd.Series,
float,
)
check(assert_type(s.median(skipna=False), float), float)
check(assert_type(s.median(numeric_only=False), float), float)


def test_types_sum() -> None:
Expand Down Expand Up @@ -624,63 +650,79 @@ def test_types_element_wise_arithmetic() -> None:
s = pd.Series([0, 1, -10])
s2 = pd.Series([7, -5, 10])

res_add1: pd.Series = s + s2
res_add2: pd.Series = s.add(s2, fill_value=0)
check(assert_type(s + s2, "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s.add(s2, fill_value=0), "pd.Series[int]"), pd.Series, np.integer)

res_sub: pd.Series = s - s2
res_sub2: pd.Series = s.sub(s2, fill_value=0)
check(assert_type(s - s2, pd.Series), pd.Series, np.integer)
check(assert_type(s.sub(s2, fill_value=0), "pd.Series[int]"), pd.Series, np.integer)

res_mul: pd.Series = s * s2
res_mul2: pd.Series = s.mul(s2, fill_value=0)
check(assert_type(s * s2, pd.Series), pd.Series, np.integer)
check(assert_type(s.mul(s2, fill_value=0), pd.Series), pd.Series, np.integer)

res_div: pd.Series = s / s2
res_div2: pd.Series = s.div(s2, fill_value=0)
check(assert_type(s / s2, pd.Series), pd.Series, np.float64)
check(
assert_type(s.div(s2, fill_value=0), "pd.Series[float]"), pd.Series, np.float64
)

res_floordiv: pd.Series = s // s2
res_floordiv2: pd.Series = s.floordiv(s2, fill_value=0)
check(assert_type(s // s2, "pd.Series[int]"), pd.Series, np.integer)
check(
assert_type(s.floordiv(s2, fill_value=0), "pd.Series[int]"),
pd.Series,
np.integer,
)

res_mod: pd.Series = s % s2
res_mod2: pd.Series = s.mod(s2, fill_value=0)
check(assert_type(s % s2, "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s.mod(s2, fill_value=0), "pd.Series[int]"), pd.Series, np.integer)

res_pow: pd.Series = s ** s2.abs()
res_pow2: pd.Series = s.pow(s2.abs(), fill_value=0)
check(assert_type(s ** s2.abs(), "pd.Series[int]"), pd.Series, np.integer)
check(
assert_type(s.pow(s2.abs(), fill_value=0), "pd.Series[int]"),
pd.Series,
np.integer,
)

check(assert_type(divmod(s, s2), tuple["pd.Series[int]", "pd.Series[int]"]), tuple)


def test_types_scalar_arithmetic() -> None:
s = pd.Series([0, 1, -10])

res_add1: pd.Series = s + 1
res_add2: pd.Series = s.add(1, fill_value=0)
check(assert_type(s + 1, "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s.add(1, fill_value=0), "pd.Series[int]"), pd.Series, np.integer)

res_sub: pd.Series = s - 1
res_sub2: pd.Series = s.sub(1, fill_value=0)
check(assert_type(s - 1, pd.Series), pd.Series, np.integer)
check(assert_type(s.sub(1, fill_value=0), "pd.Series[int]"), pd.Series, np.integer)

res_mul: pd.Series = s * 2
res_mul2: pd.Series = s.mul(2, fill_value=0)
check(assert_type(s * 2, pd.Series), pd.Series, np.integer)
check(assert_type(s.mul(2, fill_value=0), pd.Series), pd.Series, np.integer)

res_div: pd.Series = s / 2
res_div2: pd.Series = s.div(2, fill_value=0)
check(assert_type(s / 2, pd.Series), pd.Series, np.float64)
check(
assert_type(s.div(2, fill_value=0), "pd.Series[float]"), pd.Series, np.float64
)

res_floordiv: pd.Series = s // 2
res_floordiv2: pd.Series = s.floordiv(2, fill_value=0)
check(assert_type(s // 2, "pd.Series[int]"), pd.Series, np.integer)
check(
assert_type(s.floordiv(2, fill_value=0), "pd.Series[int]"),
pd.Series,
np.integer,
)

res_mod: pd.Series = s % 2
res_mod2: pd.Series = s.mod(2, fill_value=0)
check(assert_type(s % 2, "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s.mod(2, fill_value=0), "pd.Series[int]"), pd.Series, np.integer)

res_pow: pd.Series = s**2
res_pow1: pd.Series = s**0
res_pow2: pd.Series = s**0.213
res_pow3: pd.Series = s.pow(0.5)
check(assert_type(s**2, "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s**0, "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s**0.213, "pd.Series[int]"), pd.Series, np.float64)
check(assert_type(s.pow(0.5), "pd.Series[int]"), pd.Series, np.float64)


# GH 103
def test_types_complex_arithmetic() -> None:
c = 1 + 1j
s = pd.Series([1.0, 2.0, 3.0])
x = s + c
y = s - c
check(assert_type(s + c, pd.Series), pd.Series)
check(assert_type(s - c, pd.Series), pd.Series)


def test_types_groupby() -> None:
Expand Down Expand Up @@ -1105,8 +1147,8 @@ def test_types_getitem() -> None:
s = pd.Series({"key": [0, 1, 2, 3]})
key: list[int] = s["key"]
s2 = pd.Series([0, 1, 2, 3])
value: int = s2[0]
s3: pd.Series = s[:2]
check(assert_type(s2[0], int), np.integer)
check(assert_type(s[:2], pd.Series), pd.Series)


def test_types_getitem_by_timestamp() -> None:
Expand All @@ -1117,9 +1159,9 @@ def test_types_getitem_by_timestamp() -> None:

def test_types_eq() -> None:
s1 = pd.Series([1, 2, 3])
res1: pd.Series = s1 == 1
check(assert_type(s1 == 1, "pd.Series[bool]"), pd.Series, np.bool_)
s2 = pd.Series([1, 2, 4])
res2: pd.Series = s1 == s2
check(assert_type(s1 == s2, "pd.Series[bool]"), pd.Series, np.bool_)


def test_types_rename_axis() -> None:
Expand Down Expand Up @@ -1317,7 +1359,7 @@ def test_series_multiindex_getitem() -> None:
s = pd.Series(
[1, 2, 3, 4], index=pd.MultiIndex.from_product([["a", "b"], ["x", "y"]])
)
s1: pd.Series = s["a", :]
check(assert_type(s["a", :], "pd.Series[int]"), pd.Series, np.integer)


def test_series_mul() -> None:
Expand Down Expand Up @@ -1408,13 +1450,19 @@ def test_cat_accessor() -> None:


def test_cat_ctor_values() -> None:
c1 = pd.Categorical(["a", "b", "a"])
check(assert_type(pd.Categorical(["a", "b", "a"]), pd.Categorical), pd.Categorical)
# GH 95
c2 = pd.Categorical(pd.Series(["a", "b", "a"]))
check(
assert_type(pd.Categorical(pd.Series(["a", "b", "a"])), pd.Categorical),
pd.Categorical,
)
s: Sequence = cast(Sequence, ["a", "b", "a"])
c3 = pd.Categorical(s)
check(assert_type(pd.Categorical(s), pd.Categorical), pd.Categorical)
# GH 107
c4 = pd.Categorical(np.array([1, 2, 3, 1, 1]))
check(
assert_type(pd.Categorical(np.array([1, 2, 3, 1, 1])), pd.Categorical),
pd.Categorical,
)


def test_iloc_getitem_ndarray() -> None:
Expand Down Expand Up @@ -2768,7 +2816,7 @@ def test_astype_other() -> None:

def test_all_astype_args_tested() -> None:
"""Check that all relevant numpy type aliases are tested."""
NUMPY_ALIASES: set[str] = {k for k in np.sctypeDict}
NUMPY_ALIASES: set[str | int] = {k for k in np.sctypeDict}
EXCLUDED_ALIASES = {
"datetime64",
"m",
Expand Down Expand Up @@ -2889,7 +2937,7 @@ def test_convert_dtypes_dtype_backend() -> None:
def test_apply_returns_none() -> None:
# GH 557
s = pd.Series([1, 2, 3])
check(assert_type(s.apply(lambda x: None), pd.Series), pd.Series)
check(assert_type(s.apply(lambda _: None), pd.Series), pd.Series)


def test_loc_callable() -> None:
Expand Down

0 comments on commit 514eefe

Please sign in to comment.