-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
BUG: groupby.transform(name) validates name is an aggregation #27597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
784e621
4ad587e
b6216af
15597f5
0f77cbf
880b2d8
36a15f2
0c77a37
09703f8
8eeb01a
c0a71ce
2ce2bb7
d4bafef
57e2122
f942e55
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,8 +39,59 @@ Backwards incompatible API changes | |
|
||
.. _whatsnew_1000.api.other: | ||
|
||
- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`). | ||
- | ||
Groupby.transform(name) accepts only aggregation names | ||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
||
:meth:`DataFrameGroupBy.transform` and :meth:`SeriesGroupBy.transform` are used to either invoked | ||
This conversation was marked as resolved.
Show resolved
Hide resolved
|
||
a callable, or to convert an aggregation into a transformation by replicating the resulting aggregation | ||
value for each across the whole group. In previous releases, these methods did not | ||
validate that the function name passed was actually the name of an aggregation. As a result, | ||
users might get a cryptic error or worse, erroneous results. Starting with this release, passing | ||
either of these method the name of anything except a known aggregation function name will raise | ||
an exception. There is no change in the behavior associated with passing a callable. | ||
|
||
Users who relied on :meth:`DataFrameGroupBy.transform` or :meth:`SeriesGroupBy.transform` | ||
for transformations such as :meth:`DataFrameGroupBy.rank`, :meth:`DataFrameGroupBy.ffill`, | ||
etc, should instead call these method directly | ||
(:issue:`27597`) (:issue:`14274`) (:issue:`19354`) (:issue:`22509`). | ||
|
||
.. ipython:: python | ||
|
||
df = pd.DataFrame([0, 1, 100, 99]) | ||
labels = [0, 0, 1, 1] | ||
g = df.groupby(labels) | ||
|
||
*Previous behavior*: | ||
|
||
.. code-block:: ipython | ||
|
||
In [1]: g.transform('ers >= Decepticons') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just pass it a name like 'foo' |
||
AttributeError: 'DataFrameGroupBy' object has no attribute 'ers >= Decepticons' | ||
|
||
g.transform('rank') | ||
Out[14]: | ||
0 | ||
0 1.0 | ||
1 1.0 | ||
2 2.0 | ||
3 2.0 | ||
|
||
g.rank() | ||
Out[15]: | ||
0 | ||
0 1.0 | ||
1 2.0 | ||
2 2.0 | ||
3 1.0 | ||
|
||
*New behavior*: | ||
|
||
.. ipython:: python | ||
:okexcept: | ||
|
||
g.transform('ers >= Decepticons') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use foo, & make this a code-block (so we don't have the long traceback) put the 'rank' in its own ipython block; I would also show .rank() or at least indicate that they are now the same. |
||
g.transform('rank') | ||
|
||
|
||
Other API changes | ||
^^^^^^^^^^^^^^^^^ | ||
|
@@ -78,6 +129,7 @@ Performance improvements | |
Bug fixes | ||
~~~~~~~~~ | ||
|
||
- | ||
|
||
Categorical | ||
^^^^^^^^^^^ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -581,7 +581,7 @@ def test_cython_transform_series(op, args, targop): | |
# print(data.head()) | ||
expected = data.groupby(labels).transform(targop) | ||
|
||
tm.assert_series_equal(expected, data.groupby(labels).transform(op, *args)) | ||
tm.assert_series_equal(expected, getattr(data.groupby(labels), op)(*args)) | ||
tm.assert_series_equal(expected, getattr(data.groupby(labels), op)(*args)) | ||
|
||
|
||
|
@@ -632,7 +632,7 @@ def test_cython_transform_series(op, args, targop): | |
) | ||
def test_groupby_cum_skipna(op, skipna, input, exp): | ||
df = pd.DataFrame(input) | ||
result = df.groupby("key")["value"].transform(op, skipna=skipna) | ||
result = getattr(df.groupby("key")["value"], op)(skipna=skipna) | ||
if isinstance(exp, dict): | ||
expected = exp[(op, skipna)] | ||
else: | ||
|
@@ -710,20 +710,17 @@ def test_cython_transform_frame(op, args, targop): | |
expected = gb.apply(targop) | ||
|
||
expected = expected.sort_index(axis=1) | ||
tm.assert_frame_equal(expected, gb.transform(op, *args).sort_index(axis=1)) | ||
tm.assert_frame_equal(expected, getattr(gb, op)(*args).sort_index(axis=1)) | ||
# individual columns | ||
for c in df: | ||
if c not in ["float", "int", "float_missing"] and op != "shift": | ||
msg = "No numeric types to aggregate" | ||
with pytest.raises(DataError, match=msg): | ||
gb[c].transform(op) | ||
with pytest.raises(DataError, match=msg): | ||
getattr(gb[c], op)() | ||
else: | ||
expected = gb[c].apply(targop) | ||
expected.name = c | ||
tm.assert_series_equal(expected, gb[c].transform(op, *args)) | ||
tm.assert_series_equal(expected, getattr(gb[c], op)(*args)) | ||
tm.assert_series_equal(expected, getattr(gb[c], op)(*args)) | ||
|
||
|
||
|
@@ -765,7 +762,7 @@ def test_transform_with_non_scalar_group(): | |
), | ||
], | ||
) | ||
@pytest.mark.parametrize("agg_func", ["count", "rank", "size"]) | ||
@pytest.mark.parametrize("agg_func", ["count", "size"]) | ||
def test_transform_numeric_ret(cols, exp, comp_func, agg_func): | ||
if agg_func == "size" and isinstance(cols, list): | ||
pytest.xfail("'size' transformation not supported with NDFrameGroupy") | ||
|
@@ -1007,17 +1004,19 @@ def test_transform_invalid_name_raises(): | |
# GH#27486 | ||
df = DataFrame(dict(a=[0, 1, 1, 2])) | ||
g = df.groupby(["a", "b", "b", "c"]) | ||
with pytest.raises(ValueError, match="not a valid function name"): | ||
with pytest.raises(ValueError, match="exclusively"): | ||
g.transform("some_arbitrary_name") | ||
|
||
# method exists on the object, but is not a valid transformation/agg | ||
# make sure the error suggests using the method directly. | ||
assert hasattr(g, "aggregate") # make sure the method exists | ||
with pytest.raises(ValueError, match="not a valid function name"): | ||
with pytest.raises(ValueError, match="exclusively.+you should try"): | ||
g.transform("aggregate") | ||
|
||
# Test SeriesGroupBy | ||
g = df["a"].groupby(["a", "b", "b", "c"]) | ||
with pytest.raises(ValueError, match="not a valid function name"): | ||
ser = Series(range(4)) | ||
g = ser.groupby(["a", "b", "b", "c"]) | ||
with pytest.raises(ValueError, match="exclusively"): | ||
g.transform("some_arbitrary_name") | ||
|
||
|
||
|
@@ -1052,6 +1051,20 @@ def test_transform_agg_by_name(reduction_func, obj): | |
assert len(set(DataFrame(result).iloc[-3:, -1])) == 1 | ||
|
||
|
||
def test_transform_transformation_by_name(transformation_func): | ||
"""Make sure g.transform('name') raises a helpful error for non-agg | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add the issue refences numbers as a comment |
||
""" | ||
func = transformation_func | ||
obj = DataFrame( | ||
dict(a=[0, 0, 0, 1, 1, 1], b=range(6)), index=["A", "B", "C", "D", "E", "F"] | ||
) | ||
g = obj.groupby(np.repeat([0, 1], 3)) | ||
|
||
match = "exclusively for.+you should try" | ||
with pytest.raises(ValueError, match=match): | ||
g.transform(func) | ||
|
||
|
||
def test_transform_lambda_with_datetimetz(): | ||
# GH 27496 | ||
df = DataFrame( | ||
|
Uh oh!
There was an error while loading. Please reload this page.