Skip to content

TST/REF: collect indexing tests by method #37638

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 6, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 13 additions & 96 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
@@ -66,21 +66,6 @@ def test_getitem_dupe_cols(self):
with pytest.raises(KeyError, match=re.escape(msg)):
df[["baf"]]

@pytest.mark.parametrize("key_type", [iter, np.array, Series, Index])
def test_loc_iterable(self, float_frame, key_type):
idx = key_type(["A", "B", "C"])
result = float_frame.loc[:, idx]
expected = float_frame.loc[:, ["A", "B", "C"]]
tm.assert_frame_equal(result, expected)

def test_loc_timedelta_0seconds(self):
# GH#10583
df = DataFrame(np.random.normal(size=(10, 4)))
df.index = pd.timedelta_range(start="0s", periods=10, freq="s")
expected = df.loc[pd.Timedelta("0s") :, :]
result = df.loc["0s":, :]
tm.assert_frame_equal(expected, result)

@pytest.mark.parametrize(
"idx_type",
[
@@ -125,28 +110,20 @@ def test_getitem_listlike(self, idx_type, levels, float_frame):
with pytest.raises(KeyError, match="not in index"):
frame[idx]

@pytest.mark.parametrize(
"val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))]
)
def test_loc_uint64(self, val, expected):
# see gh-19399
df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63])
result = df.loc[val]

expected.name = val
tm.assert_series_equal(result, expected)

def test_getitem_callable(self, float_frame):
# GH 12533
result = float_frame[lambda x: "A"]
tm.assert_series_equal(result, float_frame.loc[:, "A"])
expected = float_frame.loc[:, "A"]
tm.assert_series_equal(result, expected)

result = float_frame[lambda x: ["A", "B"]]
expected = float_frame.loc[:, ["A", "B"]]
tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]])

df = float_frame[:3]
result = df[lambda x: [True, False, True]]
tm.assert_frame_equal(result, float_frame.iloc[[0, 2], :])
expected = float_frame.iloc[[0, 2], :]
tm.assert_frame_equal(result, expected)

def test_setitem_list(self, float_frame):

@@ -181,11 +158,6 @@ def test_setitem_list(self, float_frame):
expected = Series(["1", "2"], df.columns, name=1)
tm.assert_series_equal(result, expected)

def test_setitem_list_not_dataframe(self, float_frame):
data = np.random.randn(len(float_frame), 2)
float_frame[["A", "B"]] = data
tm.assert_almost_equal(float_frame[["A", "B"]].values, data)

def test_setitem_list_of_tuples(self, float_frame):
tuples = list(zip(float_frame["A"], float_frame["B"]))
float_frame["tuples"] = tuples
@@ -273,14 +245,6 @@ def test_setitem_multi_index(self):
df[("joe", "last")] = df[("jolie", "first")].loc[i, j]
tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")])

def test_setitem_callable(self):
# GH 12533
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]})
df[lambda x: "A"] = [11, 12, 13, 14]

exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]})
tm.assert_frame_equal(df, exp)

def test_setitem_other_callable(self):
# GH 13299
def inc(x):
@@ -518,18 +482,13 @@ def test_setitem(self, float_frame):
df.loc[0] = np.nan
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
def test_setitem_dtype(self, dtype, float_frame):
arr = np.random.randn(len(float_frame))

float_frame[dtype] = np.array(arr, dtype=dtype)
assert float_frame[dtype].dtype.name == dtype

def test_setitem_tuple(self, float_frame):
float_frame["A", "B"] = float_frame["A"]
tm.assert_series_equal(
float_frame["A", "B"], float_frame["A"], check_names=False
)
assert ("A", "B") in float_frame.columns

result = float_frame["A", "B"]
expected = float_frame["A"]
tm.assert_series_equal(result, expected, check_names=False)

def test_setitem_always_copy(self, float_frame):
s = float_frame["A"].copy()
@@ -588,25 +547,6 @@ def test_setitem_boolean(self, float_frame):
np.putmask(expected.values, mask.values, df.values * 2)
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize(
"mask_type",
[lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values],
ids=["dataframe", "array"],
)
def test_setitem_boolean_mask(self, mask_type, float_frame):

# Test for issue #18582
df = float_frame.copy()
mask = mask_type(df)

# index with boolean mask
result = df.copy()
result[mask] = np.nan

expected = df.copy()
expected.values[np.array(mask)] = np.nan
tm.assert_frame_equal(result, expected)

def test_setitem_cast(self, float_frame):
float_frame["D"] = float_frame["D"].astype("i8")
assert float_frame["D"].dtype == np.int64
@@ -821,19 +761,6 @@ def test_getitem_empty_frame_with_boolean(self):
df2 = df[df > 0]
tm.assert_frame_equal(df, df2)

def test_slice_floats(self):
index = [52195.504153, 52196.303147, 52198.369883]
df = DataFrame(np.random.rand(3, 2), index=index)

s1 = df.loc[52195.1:52196.5]
assert len(s1) == 2

s1 = df.loc[52195.1:52196.6]
assert len(s1) == 2

s1 = df.loc[52195.1:52198.9]
assert len(s1) == 3

def test_getitem_fancy_slice_integers_step(self):
df = DataFrame(np.random.randn(10, 5))

@@ -883,15 +810,6 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame):

assert (float_frame["C"] == 4).all()

def test_setitem_slice_position(self):
# GH#31469
df = DataFrame(np.zeros((100, 1)))
df[-4:] = 1
arr = np.zeros((100, 1))
arr[-4:] = 1
expected = DataFrame(arr)
tm.assert_frame_equal(df, expected)

def test_getitem_setitem_non_ix_labels(self):
df = tm.makeTimeDataFrame()

@@ -1000,14 +918,13 @@ def test_getitem_fancy_ints(self, float_frame):
expected = float_frame.loc[:, float_frame.columns[[2, 0, 1]]]
tm.assert_frame_equal(result, expected)

def test_getitem_setitem_fancy_exceptions(self, float_frame):
ix = float_frame.iloc
def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
with pytest.raises(IndexingError, match="Too many indexers"):
ix[:, :, :]
float_frame.iloc[:, :, :]

with pytest.raises(IndexError, match="too many indices for array"):
# GH#32257 we let numpy do validation, get their exception
ix[:, :, :] = 1
float_frame.iloc[:, :, :] = 1

def test_getitem_setitem_boolean_misaligned(self, float_frame):
# boolean index misaligned labels
54 changes: 54 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
@@ -22,6 +22,18 @@


class TestDataFrameSetItem:
@pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
def test_setitem_dtype(self, dtype, float_frame):
arr = np.random.randn(len(float_frame))

float_frame[dtype] = np.array(arr, dtype=dtype)
assert float_frame[dtype].dtype.name == dtype

def test_setitem_list_not_dataframe(self, float_frame):
data = np.random.randn(len(float_frame), 2)
float_frame[["A", "B"]] = data
tm.assert_almost_equal(float_frame[["A", "B"]].values, data)

def test_setitem_error_msmgs(self):

# GH 7432
@@ -285,3 +297,45 @@ def test_iloc_setitem_bool_indexer(self, klass):
df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
tm.assert_frame_equal(df, expected)


class TestDataFrameSetItemSlicing:
def test_setitem_slice_position(self):
# GH#31469
df = DataFrame(np.zeros((100, 1)))
df[-4:] = 1
arr = np.zeros((100, 1))
arr[-4:] = 1
expected = DataFrame(arr)
tm.assert_frame_equal(df, expected)


class TestDataFrameSetItemCallable:
def test_setitem_callable(self):
# GH#12533
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]})
df[lambda x: "A"] = [11, 12, 13, 14]

exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]})
tm.assert_frame_equal(df, exp)


class TestDataFrameSetItemBooleanMask:
@pytest.mark.parametrize(
"mask_type",
[lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values],
ids=["dataframe", "array"],
)
def test_setitem_boolean_mask(self, mask_type, float_frame):

# Test for issue #18582
df = float_frame.copy()
mask = mask_type(df)

# index with boolean mask
result = df.copy()
result[mask] = np.nan

expected = df.copy()
expected.values[np.array(mask)] = np.nan
tm.assert_frame_equal(result, expected)
37 changes: 0 additions & 37 deletions pandas/tests/frame/indexing/test_sparse.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm
from pandas.arrays import SparseArray
from pandas.core.arrays.sparse import SparseDtype


class TestSparseDataFrameIndexing:
@@ -23,34 +17,3 @@ def test_getitem_sparse_column(self):

result = df.loc[:, "A"]
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
@pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
@td.skip_if_no_scipy
def test_loc_getitem_from_spmatrix(self, spmatrix_t, dtype):
import scipy.sparse

spmatrix_t = getattr(scipy.sparse, spmatrix_t)

# The bug is triggered by a sparse matrix with purely sparse columns. So the
# recipe below generates a rectangular matrix of dimension (5, 7) where all the
# diagonal cells are ones, meaning the last two columns are purely sparse.
rows, cols = 5, 7
spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
df = pd.DataFrame.sparse.from_spmatrix(spmatrix)

# regression test for #34526
itr_idx = range(2, rows)
result = df.loc[itr_idx].values
expected = spmatrix.toarray()[itr_idx]
tm.assert_numpy_array_equal(result, expected)

# regression test for #34540
result = df.loc[itr_idx].dtypes.values
expected = np.full(cols, SparseDtype(dtype, fill_value=0))
tm.assert_numpy_array_equal(result, expected)

def test_all_sparse(self):
df = pd.DataFrame({"A": pd.array([0, 0], dtype=pd.SparseDtype("int64"))})
result = df.loc[[0, 1]]
tm.assert_frame_equal(result, df)
33 changes: 30 additions & 3 deletions pandas/tests/indexing/test_at.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,41 @@
from datetime import datetime, timezone

import pandas as pd
import numpy as np
import pytest

from pandas import DataFrame
import pandas._testing as tm


def test_at_timezone():
# https://github.com/pandas-dev/pandas/issues/33544
result = pd.DataFrame({"foo": [datetime(2000, 1, 1)]})
result = DataFrame({"foo": [datetime(2000, 1, 1)]})
result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc)
expected = pd.DataFrame(
expected = DataFrame(
{"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object
)
tm.assert_frame_equal(result, expected)


class TestAtWithDuplicates:
def test_at_with_duplicate_axes_requires_scalar_lookup(self):
# GH#33041 check that falling back to loc doesn't allow non-scalar
# args to slip in

arr = np.random.randn(6).reshape(3, 2)
df = DataFrame(arr, columns=["A", "A"])

msg = "Invalid call for scalar access"
with pytest.raises(ValueError, match=msg):
df.at[[1, 2]]
with pytest.raises(ValueError, match=msg):
df.at[1, ["A"]]
with pytest.raises(ValueError, match=msg):
df.at[:, "A"]

with pytest.raises(ValueError, match=msg):
df.at[[1, 2]] = 1
with pytest.raises(ValueError, match=msg):
df.at[1, ["A"]] = 1
with pytest.raises(ValueError, match=msg):
df.at[:, "A"] = 1
10 changes: 0 additions & 10 deletions pandas/tests/indexing/test_categorical.py
Original file line number Diff line number Diff line change
@@ -73,16 +73,6 @@ def test_loc_scalar(self):
with pytest.raises(KeyError, match="^1$"):
df.loc[1]

def test_getitem_scalar(self):

cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])

s = Series([1, 2], index=cats)

expected = s.iloc[0]
result = s[cats[0]]
assert result == expected

def test_slicing(self):
cat = Series(Categorical([1, 2, 3, 4]))
reversed = cat[::-1]
92 changes: 4 additions & 88 deletions pandas/tests/indexing/test_datetime.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from datetime import date, datetime, timedelta

from dateutil import tz
import numpy as np
import pytest

@@ -206,26 +203,6 @@ def test_partial_setting_with_datetimelike_dtype(self):
df.loc[mask, "C"] = df.loc[mask].index
tm.assert_frame_equal(df, expected)

def test_loc_setitem_datetime(self):

# GH 9516
dt1 = Timestamp("20130101 09:00:00")
dt2 = Timestamp("20130101 10:00:00")

for conv in [
lambda x: x,
lambda x: x.to_datetime64(),
lambda x: x.to_pydatetime(),
lambda x: np.datetime64(x),
]:

df = DataFrame()
df.loc[conv(dt1), "one"] = 100
df.loc[conv(dt2), "one"] = 200

expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2])
tm.assert_frame_equal(df, expected)

def test_series_partial_set_datetime(self):
# GH 11497

@@ -245,7 +222,8 @@ def test_series_partial_set_datetime(self):
exp = Series(
[0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name="idx"), name="s"
)
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
result = ser.loc[keys]
tm.assert_series_equal(result, exp, check_index_type=True)

keys = [
Timestamp("2011-01-03"),
@@ -273,7 +251,8 @@ def test_series_partial_set_period(self):
pd.Period("2011-01-01", freq="D"),
]
exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name="idx"), name="s")
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
result = ser.loc[keys]
tm.assert_series_equal(result, exp, check_index_type=True)

keys = [
pd.Period("2011-01-03", freq="D"),
@@ -297,33 +276,6 @@ def test_nanosecond_getitem_setitem_with_tz(self):
expected = DataFrame(-1, index=index, columns=["a"])
tm.assert_frame_equal(result, expected)

def test_loc_getitem_across_dst(self):
# GH 21846
idx = pd.date_range(
"2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min"
)
series2 = Series([0, 1, 2, 3, 4], index=idx)

t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min")
t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min")
result = series2.loc[t_1:t_2]
expected = Series([2, 3], index=idx[2:4])
tm.assert_series_equal(result, expected)

result = series2[t_1]
expected = 2
assert result == expected

def test_loc_incremental_setitem_with_dst(self):
# GH 20724
base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific"))
idxs = [base + timedelta(seconds=i * 900) for i in range(16)]
result = Series([0], index=[idxs[0]])
for ts in idxs:
result.loc[ts] = 1
expected = Series(1, index=idxs)
tm.assert_series_equal(result, expected)

def test_loc_setitem_with_existing_dst(self):
# GH 18308
start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid")
@@ -339,39 +291,3 @@ def test_loc_setitem_with_existing_dst(self):
dtype=object,
)
tm.assert_frame_equal(result, expected)

def test_loc_str_slicing(self):
ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
ser = ix.to_series()
result = ser.loc[:"2017-12"]
expected = ser.iloc[:-1]

tm.assert_series_equal(result, expected)

def test_loc_label_slicing(self):
ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
ser = ix.to_series()
result = ser.loc[: ix[-2]]
expected = ser.iloc[:-1]

tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"slice_, positions",
[
[slice(date(2018, 1, 1), None), [0, 1, 2]],
[slice(date(2019, 1, 2), None), [2]],
[slice(date(2020, 1, 1), None), []],
[slice(None, date(2020, 1, 1)), [0, 1, 2]],
[slice(None, date(2019, 1, 1)), [0]],
],
)
def test_getitem_slice_date(self, slice_, positions):
# https://github.com/pandas-dev/pandas/issues/31501
s = Series(
[0, 1, 2],
pd.DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]),
)
result = s[slice_]
expected = s.take(positions)
tm.assert_series_equal(result, expected)
221 changes: 219 additions & 2 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
""" test label based indexing with loc """
from datetime import time
from datetime import datetime, time, timedelta
from io import StringIO
import re

from dateutil.tz import gettz
import numpy as np
import pytest

from pandas.compat.numpy import is_numpy_dev
import pandas.util._test_decorators as td

import pandas as pd
from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
SparseDtype,
Timedelta,
Timestamp,
date_range,
timedelta_range,
to_datetime,
)
import pandas._testing as tm
from pandas.api.types import is_scalar
from pandas.tests.indexing.common import Base
@@ -1014,6 +1027,73 @@ def test_loc_getitem_time_object(self, frame_or_series):
expected.index = expected.index._with_freq(None)
tm.assert_equal(result, expected)

@pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
@pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
@td.skip_if_no_scipy
def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
import scipy.sparse

spmatrix_t = getattr(scipy.sparse, spmatrix_t)

# The bug is triggered by a sparse matrix with purely sparse columns. So the
# recipe below generates a rectangular matrix of dimension (5, 7) where all the
# diagonal cells are ones, meaning the last two columns are purely sparse.
rows, cols = 5, 7
spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
df = DataFrame.sparse.from_spmatrix(spmatrix)

# regression test for GH#34526
itr_idx = range(2, rows)
result = df.loc[itr_idx].values
expected = spmatrix.toarray()[itr_idx]
tm.assert_numpy_array_equal(result, expected)

# regression test for GH#34540
result = df.loc[itr_idx].dtypes.values
expected = np.full(cols, SparseDtype(dtype, fill_value=0))
tm.assert_numpy_array_equal(result, expected)

def test_loc_getitem_listlike_all_retains_sparse(self):
df = DataFrame({"A": pd.array([0, 0], dtype=SparseDtype("int64"))})
result = df.loc[[0, 1]]
tm.assert_frame_equal(result, df)

@pytest.mark.parametrize("key_type", [iter, np.array, Series, Index])
def test_loc_getitem_iterable(self, float_frame, key_type):
idx = key_type(["A", "B", "C"])
result = float_frame.loc[:, idx]
expected = float_frame.loc[:, ["A", "B", "C"]]
tm.assert_frame_equal(result, expected)

def test_loc_getitem_timedelta_0seconds(self):
# GH#10583
df = DataFrame(np.random.normal(size=(10, 4)))
df.index = timedelta_range(start="0s", periods=10, freq="s")
expected = df.loc[Timedelta("0s") :, :]
result = df.loc["0s":, :]
tm.assert_frame_equal(expected, result)

@pytest.mark.parametrize(
"val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))]
)
def test_loc_getitem_uint64_scalar(self, val, expected):
# see GH#19399
df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63])
result = df.loc[val]

expected.name = val
tm.assert_series_equal(result, expected)

def test_loc_setitem_int_label_with_float64index(self):
# note labels are floats
ser = Series(["a", "b", "c"], index=[0, 0.5, 1])
tmp = ser.copy()

ser.loc[1] = "zoo"
tmp.iloc[2] = "zoo"

tm.assert_series_equal(ser, tmp)


class TestLocWithMultiIndex:
@pytest.mark.parametrize(
@@ -1103,6 +1183,11 @@ def test_loc_setitem_multiindex_slice(self):

tm.assert_series_equal(result, expected)

def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self):
times = date_range("2000-01-01", freq="10min", periods=100000)
ser = Series(range(100000), times)
ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)]


class TestLocSetitemWithExpansion:
@pytest.mark.slow
@@ -1113,6 +1198,59 @@ def test_loc_setitem_with_expansion_large_dataframe(self):
expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64")
tm.assert_frame_equal(result, expected)

def test_loc_setitem_empty_series(self):
# GH#5226

# partially set with an empty object series
ser = Series(dtype=object)
ser.loc[1] = 1
tm.assert_series_equal(ser, Series([1], index=[1]))
ser.loc[3] = 3
tm.assert_series_equal(ser, Series([1, 3], index=[1, 3]))

ser = Series(dtype=object)
ser.loc[1] = 1.0
tm.assert_series_equal(ser, Series([1.0], index=[1]))
ser.loc[3] = 3.0
tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3]))

ser = Series(dtype=object)
ser.loc["foo"] = 1
tm.assert_series_equal(ser, Series([1], index=["foo"]))
ser.loc["bar"] = 3
tm.assert_series_equal(ser, Series([1, 3], index=["foo", "bar"]))
ser.loc[3] = 4
tm.assert_series_equal(ser, Series([1, 3, 4], index=["foo", "bar", 3]))

def test_loc_setitem_incremental_with_dst(self):
# GH#20724
base = datetime(2015, 11, 1, tzinfo=gettz("US/Pacific"))
idxs = [base + timedelta(seconds=i * 900) for i in range(16)]
result = Series([0], index=[idxs[0]])
for ts in idxs:
result.loc[ts] = 1
expected = Series(1, index=idxs)
tm.assert_series_equal(result, expected)

def test_loc_setitem_datetime_keys_cast(self):
# GH#9516
dt1 = Timestamp("20130101 09:00:00")
dt2 = Timestamp("20130101 10:00:00")

for conv in [
lambda x: x,
lambda x: x.to_datetime64(),
lambda x: x.to_pydatetime(),
lambda x: np.datetime64(x),
]:

df = DataFrame()
df.loc[conv(dt1), "one"] = 100
df.loc[conv(dt2), "one"] = 200

expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2])
tm.assert_frame_equal(df, expected)


class TestLocCallable:
def test_frame_loc_getitem_callable(self):
@@ -1280,6 +1418,85 @@ def test_frame_loc_setitem_callable(self):
tm.assert_frame_equal(res, exp)


class TestPartialStringSlicing:
def test_loc_getitem_partial_string_slicing_datetimeindex(self):
# GH#35509
df = DataFrame(
{"col1": ["a", "b", "c"], "col2": [1, 2, 3]},
index=to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]),
)
expected = DataFrame(
{"col1": ["a", "c"], "col2": [1, 3]},
index=to_datetime(["2020-08-01", "2020-08-05"]),
)
result = df.loc["2020-08"]
tm.assert_frame_equal(result, expected)

def test_loc_getitem_partial_string_slicing_with_periodindex(self):
pi = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
ser = pi.to_series()
result = ser.loc[:"2017-12"]
expected = ser.iloc[:-1]

tm.assert_series_equal(result, expected)

def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self):
ix = timedelta_range(start="1 day", end="2 days", freq="1H")
ser = ix.to_series()
result = ser.loc[:"1 days"]
expected = ser.iloc[:-1]

tm.assert_series_equal(result, expected)


class TestLabelSlicing:
def test_loc_getitem_label_slice_across_dst(self):
# GH#21846
idx = date_range(
"2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min"
)
series2 = Series([0, 1, 2, 3, 4], index=idx)

t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min")
t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min")
result = series2.loc[t_1:t_2]
expected = Series([2, 3], index=idx[2:4])
tm.assert_series_equal(result, expected)

result = series2[t_1]
expected = 2
assert result == expected

def test_loc_getitem_label_slice_period(self):
ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
ser = ix.to_series()
result = ser.loc[: ix[-2]]
expected = ser.iloc[:-1]

tm.assert_series_equal(result, expected)

def test_loc_getitem_label_slice_timedelta64(self):
ix = timedelta_range(start="1 day", end="2 days", freq="1H")
ser = ix.to_series()
result = ser.loc[: ix[-2]]
expected = ser.iloc[:-1]

tm.assert_series_equal(result, expected)

def test_loc_getitem_slice_floats_inexact(self):
index = [52195.504153, 52196.303147, 52198.369883]
df = DataFrame(np.random.rand(3, 2), index=index)

s1 = df.loc[52195.1:52196.5]
assert len(s1) == 2

s1 = df.loc[52195.1:52196.6]
assert len(s1) == 2

s1 = df.loc[52195.1:52198.9]
assert len(s1) == 3


def test_series_loc_getitem_label_list_missing_values():
# gh-11428
key = np.array(
68 changes: 10 additions & 58 deletions pandas/tests/indexing/test_partial.py
Original file line number Diff line number Diff line change
@@ -351,31 +351,6 @@ def test_partial_set_invalid(self):
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
assert df.index.dtype == "object"

def test_partial_set_empty_series(self):

# GH5226

# partially set with an empty object series
s = Series(dtype=object)
s.loc[1] = 1
tm.assert_series_equal(s, Series([1], index=[1]))
s.loc[3] = 3
tm.assert_series_equal(s, Series([1, 3], index=[1, 3]))

s = Series(dtype=object)
s.loc[1] = 1.0
tm.assert_series_equal(s, Series([1.0], index=[1]))
s.loc[3] = 3.0
tm.assert_series_equal(s, Series([1.0, 3.0], index=[1, 3]))

s = Series(dtype=object)
s.loc["foo"] = 1
tm.assert_series_equal(s, Series([1], index=["foo"]))
s.loc["bar"] = 3
tm.assert_series_equal(s, Series([1, 3], index=["foo", "bar"]))
s.loc[3] = 4
tm.assert_series_equal(s, Series([1, 3, 4], index=["foo", "bar", 3]))

def test_partial_set_empty_frame(self):

# partially set with an empty object
@@ -504,10 +479,12 @@ def test_partial_set_empty_frame_set_series(self):
# GH 5756
# setting with empty Series
df = DataFrame(Series(dtype=object))
tm.assert_frame_equal(df, DataFrame({0: Series(dtype=object)}))
expected = DataFrame({0: Series(dtype=object)})
tm.assert_frame_equal(df, expected)

df = DataFrame(Series(name="foo", dtype=object))
tm.assert_frame_equal(df, DataFrame({"foo": Series(dtype=object)}))
expected = DataFrame({"foo": Series(dtype=object)})
tm.assert_frame_equal(df, expected)

def test_partial_set_empty_frame_empty_copy_assignment(self):
# GH 5932
@@ -565,19 +542,17 @@ def test_partial_set_empty_frame_empty_consistencies(self):
],
)
def test_loc_with_list_of_strings_representing_datetimes(
self, idx, labels, expected_idx
self, idx, labels, expected_idx, frame_or_series
):
# GH 11278
s = Series(range(20), index=idx)
df = DataFrame(range(20), index=idx)
obj = frame_or_series(range(20), index=idx)

expected_value = [3, 7, 11]
expected_s = Series(expected_value, expected_idx)
expected_df = DataFrame(expected_value, expected_idx)
expected = frame_or_series(expected_value, expected_idx)

tm.assert_series_equal(expected_s, s.loc[labels])
tm.assert_series_equal(expected_s, s[labels])
tm.assert_frame_equal(expected_df, df.loc[labels])
tm.assert_equal(expected, obj.loc[labels])
if frame_or_series is Series:
tm.assert_series_equal(expected, obj[labels])

@pytest.mark.parametrize(
"idx,labels",
@@ -651,16 +626,6 @@ def test_loc_with_list_of_strings_representing_datetimes_not_matched_type(
with pytest.raises(KeyError, match=msg):
df.loc[labels]

def test_indexing_timeseries_regression(self):
# Issue 34860
arr = date_range("1/1/2008", "1/1/2009")
result = arr.to_series()["2008"]

rng = date_range(start="2008-01-01", end="2008-12-31")
expected = Series(rng, index=rng)

tm.assert_series_equal(result, expected)

def test_index_name_empty(self):
# GH 31368
df = DataFrame({}, index=pd.RangeIndex(0, name="df_index"))
@@ -689,16 +654,3 @@ def test_slice_irregular_datetime_index_with_nan(self):
expected = DataFrame(range(len(index[:3])), index=index[:3])
result = df["2012-01-01":"2012-01-04"]
tm.assert_frame_equal(result, expected)

def test_slice_datetime_index(self):
# GH35509
df = DataFrame(
{"col1": ["a", "b", "c"], "col2": [1, 2, 3]},
index=pd.to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]),
)
expected = DataFrame(
{"col1": ["a", "c"], "col2": [1, 3]},
index=pd.to_datetime(["2020-08-01", "2020-08-05"]),
)
result = df.loc["2020-08"]
tm.assert_frame_equal(result, expected)
22 changes: 0 additions & 22 deletions pandas/tests/indexing/test_scalar.py
Original file line number Diff line number Diff line change
@@ -146,28 +146,6 @@ def test_frame_at_with_duplicate_axes(self):
expected = Series([2.0, 2.0], index=["A", "A"], name=1)
tm.assert_series_equal(df.iloc[1], expected)

def test_frame_at_with_duplicate_axes_requires_scalar_lookup(self):
# GH#33041 check that falling back to loc doesn't allow non-scalar
# args to slip in

arr = np.random.randn(6).reshape(3, 2)
df = DataFrame(arr, columns=["A", "A"])

msg = "Invalid call for scalar access"
with pytest.raises(ValueError, match=msg):
df.at[[1, 2]]
with pytest.raises(ValueError, match=msg):
df.at[1, ["A"]]
with pytest.raises(ValueError, match=msg):
df.at[:, "A"]

with pytest.raises(ValueError, match=msg):
df.at[[1, 2]] = 1
with pytest.raises(ValueError, match=msg):
df.at[1, ["A"]] = 1
with pytest.raises(ValueError, match=msg):
df.at[:, "A"] = 1

def test_series_at_raises_type_error(self):
# at should not fallback
# GH 7814
16 changes: 0 additions & 16 deletions pandas/tests/indexing/test_timedelta.py
Original file line number Diff line number Diff line change
@@ -104,19 +104,3 @@ def test_roundtrip_thru_setitem(self):

assert expected == result
tm.assert_frame_equal(df, df_copy)

def test_loc_str_slicing(self):
ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H")
ser = ix.to_series()
result = ser.loc[:"1 days"]
expected = ser.iloc[:-1]

tm.assert_series_equal(result, expected)

def test_loc_slicing(self):
ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H")
ser = ix.to_series()
result = ser.loc[: ix[-2]]
expected = ser.iloc[:-1]

tm.assert_series_equal(result, expected)
134 changes: 46 additions & 88 deletions pandas/tests/series/indexing/test_datetime.py
Original file line number Diff line number Diff line change
@@ -4,14 +4,23 @@
from datetime import datetime, timedelta
import re

from dateutil.tz import gettz, tzutc
import numpy as np
import pytest
import pytz

from pandas._libs import iNaT
import pandas._libs.index as _index
from pandas._libs import iNaT, index as libindex

import pandas as pd
from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range
from pandas import (
DataFrame,
DatetimeIndex,
NaT,
Series,
Timestamp,
date_range,
period_range,
)
import pandas._testing as tm


@@ -65,13 +74,6 @@ def test_dti_reset_index_round_trip():
assert df.reset_index()["Date"][0] == stamp


@pytest.mark.slow
def test_slice_locs_indexerror():
times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)]
s = Series(range(100000), times)
s.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)]


def test_slicing_datetimes():
# GH 7523

@@ -114,8 +116,6 @@ def test_slicing_datetimes():


def test_getitem_setitem_datetime_tz_pytz():
from pytz import timezone as tz

N = 50
# testing with timezone, GH #2785
rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
@@ -134,23 +134,20 @@ def test_getitem_setitem_datetime_tz_pytz():

# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4]
result[datetime(1990, 1, 1, 9, tzinfo=pytz.timezone("UTC"))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=pytz.timezone("UTC"))] = ts[4]
tm.assert_series_equal(result, ts)

result = ts.copy()

# comparison dates with datetime MUST be localized!
date = tz("US/Central").localize(datetime(1990, 1, 1, 3))
date = pytz.timezone("US/Central").localize(datetime(1990, 1, 1, 3))
result[date] = 0
result[date] = ts[4]
tm.assert_series_equal(result, ts)


def test_getitem_setitem_datetime_tz_dateutil():
from dateutil.tz import tzutc

from pandas._libs.tslibs.timezones import dateutil_gettz as gettz

tz = (
lambda x: tzutc() if x == "UTC" else gettz(x)
@@ -295,7 +292,6 @@ def test_getitem_setitem_datetimeindex():


def test_getitem_setitem_periodindex():
from pandas import period_range

N = 50
rng = period_range("1/1/1990", periods=N, freq="H")
@@ -466,72 +462,50 @@ def test_duplicate_dates_indexing(dups):
assert ts[datetime(2000, 1, 6)] == 0


def test_range_slice():
idx = DatetimeIndex(["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"])

ts = Series(np.random.randn(len(idx)), index=idx)

result = ts["1/2/2000":]
expected = ts[1:]
tm.assert_series_equal(result, expected)

result = ts["1/2/2000":"1/3/2000"]
expected = ts[1:4]
tm.assert_series_equal(result, expected)


def test_groupby_average_dup_values(dups):
result = dups.groupby(level=0).mean()
expected = dups.groupby(dups.index).mean()
tm.assert_series_equal(result, expected)


def test_indexing_over_size_cutoff():
import datetime

def test_indexing_over_size_cutoff(monkeypatch):
# #1821

old_cutoff = _index._SIZE_CUTOFF
try:
_index._SIZE_CUTOFF = 1000

# create large list of non periodic datetime
dates = []
sec = datetime.timedelta(seconds=1)
half_sec = datetime.timedelta(microseconds=500000)
d = datetime.datetime(2011, 12, 5, 20, 30)
n = 1100
for i in range(n):
dates.append(d)
dates.append(d + sec)
dates.append(d + sec + half_sec)
dates.append(d + sec + sec + half_sec)
d += 3 * sec

# duplicate some values in the list
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
for p in duplicate_positions:
dates[p + 1] = dates[p]

df = DataFrame(
np.random.randn(len(dates), 4), index=dates, columns=list("ABCD")
)

pos = n * 3
timestamp = df.index[pos]
assert timestamp in df.index

# it works!
df.loc[timestamp]
assert len(df.loc[[timestamp]]) > 0
finally:
_index._SIZE_CUTOFF = old_cutoff
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)

# create large list of non periodic datetime
dates = []
sec = timedelta(seconds=1)
half_sec = timedelta(microseconds=500000)
d = datetime(2011, 12, 5, 20, 30)
n = 1100
for i in range(n):
dates.append(d)
dates.append(d + sec)
dates.append(d + sec + half_sec)
dates.append(d + sec + sec + half_sec)
d += 3 * sec

# duplicate some values in the list
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
for p in duplicate_positions:
dates[p + 1] = dates[p]

df = DataFrame(np.random.randn(len(dates), 4), index=dates, columns=list("ABCD"))

pos = n * 3
timestamp = df.index[pos]
assert timestamp in df.index

# it works!
df.loc[timestamp]
assert len(df.loc[[timestamp]]) > 0


def test_indexing_over_size_cutoff_period_index(monkeypatch):
# GH 27136

monkeypatch.setattr(_index, "_SIZE_CUTOFF", 1000)
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)

n = 1100
idx = pd.period_range("1/1/2000", freq="T", periods=n)
@@ -654,19 +628,3 @@ def test_indexing():
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
with pytest.raises(KeyError, match=msg):
df[df.index[2]]


"""
test NaT support
"""


def test_setitem_tuple_with_datetimetz():
# GH 20441
arr = date_range("2017", periods=4, tz="US/Eastern")
index = [(0, 1), (0, 2), (0, 3), (0, 4)]
result = Series(arr, index=index)
expected = result.copy()
result[(0, 1)] = np.nan
expected.iloc[0] = np.nan
tm.assert_series_equal(result, expected)
71 changes: 69 additions & 2 deletions pandas/tests/series/indexing/test_getitem.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
"""
Series.__getitem__ test classes are organized by the type of key passed.
"""
from datetime import datetime, time
from datetime import date, datetime, time

import numpy as np
import pytest

from pandas._libs.tslibs import conversion, timezones

import pandas as pd
from pandas import DataFrame, Index, Series, Timestamp, date_range, period_range
from pandas import (
Categorical,
DataFrame,
DatetimeIndex,
Index,
Series,
Timestamp,
date_range,
period_range,
)
import pandas._testing as tm
from pandas.core.indexing import IndexingError

@@ -93,8 +102,46 @@ def test_getitem_time_object(self):
result.index = result.index._with_freq(None)
tm.assert_series_equal(result, expected)

# ------------------------------------------------------------------
# Series with CategoricalIndex

def test_getitem_scalar_categorical_index(self):
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])

ser = Series([1, 2], index=cats)

expected = ser.iloc[0]
result = ser[cats[0]]
assert result == expected


class TestSeriesGetitemSlices:
def test_getitem_partial_str_slice_with_datetimeindex(self):
# GH#34860
arr = date_range("1/1/2008", "1/1/2009")
ser = arr.to_series()
result = ser["2008"]

rng = date_range(start="2008-01-01", end="2008-12-31")
expected = Series(rng, index=rng)

tm.assert_series_equal(result, expected)

def test_getitem_slice_strings_with_datetimeindex(self):
idx = DatetimeIndex(
["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]
)

ts = Series(np.random.randn(len(idx)), index=idx)

result = ts["1/2/2000":]
expected = ts[1:]
tm.assert_series_equal(result, expected)

result = ts["1/2/2000":"1/3/2000"]
expected = ts[1:4]
tm.assert_series_equal(result, expected)

def test_getitem_slice_2d(self, datetime_series):
# GH#30588 multi-dimensional indexing deprecated

@@ -119,6 +166,26 @@ def test_getitem_median_slice_bug(self):
expected = s[indexer[0]]
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"slc, positions",
[
[slice(date(2018, 1, 1), None), [0, 1, 2]],
[slice(date(2019, 1, 2), None), [2]],
[slice(date(2020, 1, 1), None), []],
[slice(None, date(2020, 1, 1)), [0, 1, 2]],
[slice(None, date(2019, 1, 1)), [0]],
],
)
def test_getitem_slice_date(self, slc, positions):
# https://github.com/pandas-dev/pandas/issues/31501
ser = Series(
[0, 1, 2],
DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]),
)
result = ser[slc]
expected = ser.take(positions)
tm.assert_series_equal(result, expected)


class TestSeriesGetitemListLike:
@pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series])
11 changes: 0 additions & 11 deletions pandas/tests/series/indexing/test_numeric.py
Original file line number Diff line number Diff line change
@@ -71,17 +71,6 @@ def test_getitem_setitem_slice_integers():
assert not (s[4:] == 0).any()


def test_setitem_float_labels():
# note labels are floats
s = Series(["a", "b", "c"], index=[0, 0.5, 1])
tmp = s.copy()

s.loc[1] = "zoo"
tmp.iloc[2] = "zoo"

tm.assert_series_equal(s, tmp)


def test_slice_float_get_set(datetime_series):
msg = (
"cannot do slice indexing on DatetimeIndex with these indexers "
10 changes: 10 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
@@ -61,6 +61,16 @@ def test_setitem_with_different_tz_casts_to_object(self):
)
tm.assert_series_equal(ser, expected)

def test_setitem_tuple_with_datetimetz_values(self):
# GH#20441
arr = date_range("2017", periods=4, tz="US/Eastern")
index = [(0, 1), (0, 2), (0, 3), (0, 4)]
result = Series(arr, index=index)
expected = result.copy()
result[(0, 1)] = np.nan
expected.iloc[0] = np.nan
tm.assert_series_equal(result, expected)


class TestSetitemPeriodDtype:
@pytest.mark.parametrize("na_val", [None, np.nan])