From 31f1799644cc2e1975651207bc97234c03be19f2 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 6 Mar 2018 12:43:01 -0800
Subject: [PATCH 1/7] Added GroupBy mad tests

---
 pandas/tests/groupby/test_groupby.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 0561b3a1d8592..f41d4671e4a43 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2141,6 +2141,30 @@ def test_groupby_bool_aggs(self, agg_func, skipna, vals):
         result = getattr(df.groupby('key'), agg_func)(skipna=skipna)
         assert_frame_equal(result, exp_df)
 
+    @pytest.mark.parametrize("dtype", ['int', 'float'])
+    def test_groupby_mad(self, dtype):
+        vals = np.array(range(10)).astype(dtype)
+        df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': vals})
+        exp_df = DataFrame({'val': [1.2, 1.2]}, index=pd.Index(['a', 'b'],
+                                                               name='key'))
+
+        result = df.groupby('key').mad()
+        tm.assert_frame_equal(result, exp_df)
+
+    @pytest.mark.parametrize("vals", [
+        ['foo'] * 10, [True] * 10])
+    def test_groupby_mad_raises(self, vals):
+        df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': vals})
+
+        with tm.assert_raises_regex(TypeError, "Invalid type for mad"):
+            df.groupby('key').mad()
+
+    def test_groupby_mad_skipna(self):
+        df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': vals})
+        with tm.assert_raises_regex(
+                NotImplementedError, "'skipna=False' not yet implemented"):
+            df.groupby('key').mad(skipna=False)
+
     def test_dont_clobber_name_column(self):
         df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'],
                         'name': ['foo', 'bar', 'baz'] * 2})

From 0c103697a00451853eba086fd15263f7add5c622 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 6 Mar 2018 14:38:14 -0800
Subject: [PATCH 2/7] Implemented Cythonized GroupBy mad; fixed tests

---
 pandas/core/groupby.py                 | 20 ++++++++++++++++
 pandas/tests/groupby/test_groupby.py   | 33 ++++++++++++++------------
 pandas/tests/groupby/test_whitelist.py |  2 +-
 3 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 4a09d636ee320..587e8691314c0 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1353,6 +1353,26 @@ def var(self, ddof=1, *args, **kwargs):
             f = lambda x: x.var(ddof=ddof, **kwargs)
             return self._python_agg_general(f)
 
+    @Substitution(name='groupby')
+    @Appender(_doc_template)
+    def mad(self, skipna=True):
+        if not skipna:
+            raise NotImplementedError("'skipna=False' not yet implemented")
+
+        if self.axis != 0:
+            return self.apply(lambda x: x.mad(axis=self.axis))
+
+        # Wrap in a try..except. Ideally this wouldn't be required here but
+        # rather be moved to the underlying function calls
+        try:
+            demeaned = np.abs(self.shift(0) - self.transform('mean'))
+            demeaned = demeaned.set_index(self.grouper.labels)
+            agg = demeaned.groupby(demeaned.index)
+        except TypeError:
+            raise DataError('No numeric types to aggregate')
+
+        return agg.mean().set_index(self.grouper.result_index)
+
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def sem(self, ddof=1):
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index f41d4671e4a43..cedf4093c4066 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -9,6 +9,7 @@
 from pandas import (date_range, bdate_range, Timestamp,
                     Index, MultiIndex, DataFrame, Series,
                     concat, Panel, DatetimeIndex, read_csv)
+from pandas.core.base import DataError
 from pandas.core.dtypes.missing import isna
 from pandas.errors import UnsupportedFunctionCall, PerformanceWarning
 from pandas.util.testing import (assert_frame_equal, assert_index_equal,
@@ -1300,17 +1301,6 @@ def test_non_cython_api(self):
         g = df.groupby('A')
         gni = df.groupby('A', as_index=False)
 
-        # mad
-        expected = DataFrame([[0], [np.nan]], columns=['B'], index=[1, 3])
-        expected.index.name = 'A'
-        result = g.mad()
-        assert_frame_equal(result, expected)
-
-        expected = DataFrame([[0., 0.], [0, np.nan]], columns=['A', 'B'],
-                             index=[0, 1])
-        result = gni.mad()
-        assert_frame_equal(result, expected)
-
         # describe
         expected_index = pd.Index([1, 3], name='A')
         expected_col = pd.MultiIndex(levels=[['B'],
@@ -2141,14 +2131,26 @@ def test_groupby_bool_aggs(self, agg_func, skipna, vals):
         result = getattr(df.groupby('key'), agg_func)(skipna=skipna)
         assert_frame_equal(result, exp_df)
 
+    @pytest.mark.parametrize("test_mi", [True, False])
     @pytest.mark.parametrize("dtype", ['int', 'float'])
-    def test_groupby_mad(self, dtype):
+    def test_groupby_mad(self, test_mi, dtype):
         vals = np.array(range(10)).astype(dtype)
         df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': vals})
         exp_df = DataFrame({'val': [1.2, 1.2]}, index=pd.Index(['a', 'b'],
                                                                name='key'))
 
-        result = df.groupby('key').mad()
+        grping = ['key']
+        if test_mi:
+            df = df.append(df)  # Double the size of the frame
+            df['newcol'] = ['foo'] * 10 + ['bar'] * 10
+            grping.append('newcol')
+
+            mi = pd.MultiIndex.from_product((exp_df.index.values,
+                                             ['bar', 'foo']),
+                                            names=['key', 'newcol'])
+            exp_df = exp_df.append(exp_df).set_index(mi)
+
+        result = df.groupby(grping).mad()
         tm.assert_frame_equal(result, exp_df)
 
     @pytest.mark.parametrize("vals", [
@@ -2156,11 +2158,12 @@ def test_groupby_mad(self, dtype):
     def test_groupby_mad_raises(self, vals):
         df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': vals})
 
-        with tm.assert_raises_regex(TypeError, "Invalid type for mad"):
+        with tm.assert_raises_regex(DataError,
+                                    "No numeric types to aggregate"):
             df.groupby('key').mad()
 
     def test_groupby_mad_skipna(self):
-        df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': vals})
+        df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': range(10)})
         with tm.assert_raises_regex(
                 NotImplementedError, "'skipna=False' not yet implemented"):
             df.groupby('key').mad(skipna=False)
diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py
index 8d6e074881cbb..4bea747963dc8 100644
--- a/pandas/tests/groupby/test_whitelist.py
+++ b/pandas/tests/groupby/test_whitelist.py
@@ -12,7 +12,7 @@
 
 AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
                  'mad', 'std', 'var', 'sem']
-AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad']
+AGG_FUNCTIONS_WITH_SKIPNA = ['skew']
 
 df_whitelist = frozenset([
     'last',

From 192253fbd08d5eaa9158d2f9f0bb177220553a33 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 6 Mar 2018 15:33:15 -0800
Subject: [PATCH 3/7] Fixed support issue for series; added tests

---
 pandas/core/groupby.py               |  7 +++++--
 pandas/tests/groupby/test_groupby.py | 23 ++++++++++++++++-------
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 587e8691314c0..a6916557c8b77 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1366,12 +1366,15 @@ def mad(self, skipna=True):
         # rather be moved to the underlying function calls
         try:
             demeaned = np.abs(self.shift(0) - self.transform('mean'))
-            demeaned = demeaned.set_index(self.grouper.labels)
+            demeaned.index = self.grouper.labels
             agg = demeaned.groupby(demeaned.index)
         except TypeError:
             raise DataError('No numeric types to aggregate')
 
-        return agg.mean().set_index(self.grouper.result_index)
+        result = agg.mean()
+        result.index = self.grouper.result_index
+
+        return result
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index cedf4093c4066..acb3c2f98929c 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2131,27 +2131,36 @@ def test_groupby_bool_aggs(self, agg_func, skipna, vals):
         result = getattr(df.groupby('key'), agg_func)(skipna=skipna)
         assert_frame_equal(result, exp_df)
 
+    @pytest.mark.parametrize("obj", [Series, DataFrame])
     @pytest.mark.parametrize("test_mi", [True, False])
     @pytest.mark.parametrize("dtype", ['int', 'float'])
-    def test_groupby_mad(self, test_mi, dtype):
+    def test_groupby_mad(self, obj, test_mi, dtype):
         vals = np.array(range(10)).astype(dtype)
         df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': vals})
-        exp_df = DataFrame({'val': [1.2, 1.2]}, index=pd.Index(['a', 'b'],
-                                                               name='key'))
 
+        idx = pd.Index(['a', 'b'], name='key')
+        exp = obj([1.2, 1.2], index=idx)
         grping = ['key']
+
         if test_mi:
             df = df.append(df)  # Double the size of the frame
             df['newcol'] = ['foo'] * 10 + ['bar'] * 10
             grping.append('newcol')
 
-            mi = pd.MultiIndex.from_product((exp_df.index.values,
+            mi = pd.MultiIndex.from_product((exp.index.values,
                                              ['bar', 'foo']),
                                             names=['key', 'newcol'])
-            exp_df = exp_df.append(exp_df).set_index(mi)
+            exp = exp.append(exp)
+            exp.index = mi
 
-        result = df.groupby(grping).mad()
-        tm.assert_frame_equal(result, exp_df)
+        if obj is Series:
+            exp.name = 'val'
+            result = df.groupby(grping)['val'].mad()
+            tm.assert_series_equal(result, exp)
+        else:
+            exp = exp.rename(columns={0: 'val'})
+            result = df.groupby(grping).mad()
+            tm.assert_frame_equal(result, exp)
 
     @pytest.mark.parametrize("vals", [
         ['foo'] * 10, [True] * 10])

From 9d7f0ac90a0d0210d78b92b1cc4648e14c301e7b Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 6 Mar 2018 15:51:02 -0800
Subject: [PATCH 4/7] Code refactor / cleanup

---
 pandas/core/groupby.py               | 11 ++++-------
 pandas/tests/groupby/test_groupby.py |  8 ++++----
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index a6916557c8b77..bf05ed0785e52 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1362,18 +1362,15 @@ def mad(self, skipna=True):
         if self.axis != 0:
             return self.apply(lambda x: x.mad(axis=self.axis))
 
-        # Wrap in a try..except. Ideally this wouldn't be required here but
-        # rather be moved to the underlying function calls
+        # Wrap in a try..except to catch a TypeError with bool data
+        # Ideally this would be implemented in `mean` instead of here
         try:
             demeaned = np.abs(self.shift(0) - self.transform('mean'))
-            demeaned.index = self.grouper.labels
-            agg = demeaned.groupby(demeaned.index)
+            result = demeaned.groupby(self.grouper.labels).mean()
+            result.index = self.grouper.result_index
         except TypeError:
             raise DataError('No numeric types to aggregate')
 
-        result = agg.mean()
-        result.index = self.grouper.result_index
-
         return result
 
     @Substitution(name='groupby')
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index acb3c2f98929c..7e08a7e96d23c 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2131,15 +2131,15 @@ def test_groupby_bool_aggs(self, agg_func, skipna, vals):
         result = getattr(df.groupby('key'), agg_func)(skipna=skipna)
         assert_frame_equal(result, exp_df)
 
-    @pytest.mark.parametrize("obj", [Series, DataFrame])
+    @pytest.mark.parametrize("klass", [Series, DataFrame])
     @pytest.mark.parametrize("test_mi", [True, False])
     @pytest.mark.parametrize("dtype", ['int', 'float'])
-    def test_groupby_mad(self, obj, test_mi, dtype):
+    def test_groupby_mad(self, klass, test_mi, dtype):
         vals = np.array(range(10)).astype(dtype)
         df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': vals})
 
         idx = pd.Index(['a', 'b'], name='key')
-        exp = obj([1.2, 1.2], index=idx)
+        exp = klass([1.2, 1.2], index=idx)
         grping = ['key']
 
         if test_mi:
@@ -2153,7 +2153,7 @@ def test_groupby_mad(self, obj, test_mi, dtype):
             exp = exp.append(exp)
             exp.index = mi
 
-        if obj is Series:
+        if klass is Series:
             exp.name = 'val'
             result = df.groupby(grping)['val'].mad()
             tm.assert_series_equal(result, exp)

From 57152e6480f8f9622aaf44875e4148f109918085 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 6 Mar 2018 16:19:59 -0800
Subject: [PATCH 5/7] Updated whatsnew

---
 doc/source/whatsnew/v0.23.0.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 79d85513efa26..c332b977bd362 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -791,6 +791,7 @@ Performance Improvements
 - Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` (:issue:`11296`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`)
+- Improved performance of :func:`pandas.core.groupby.GroupBy.mad` (:issue:`19165`)
 
 .. _whatsnew_0230.docs:
 

From 5307ac3d9e7c1398e53f06a57d8e4dc40f23930f Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 6 Aug 2018 20:57:00 -0700
Subject: [PATCH 6/7] Test fixup

---
 pandas/tests/groupby/test_function.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 8cd95975fdb96..b337fa8e5f5fa 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -4,6 +4,7 @@
 import pandas as pd
 from pandas import (DataFrame, Index, compat, isna,
                     Series, MultiIndex, Timestamp, date_range)
+from pandas.core.base import DataError
 from pandas.errors import UnsupportedFunctionCall
 from pandas.util import testing as tm
 import pandas.core.nanops as nanops
@@ -472,7 +473,7 @@ def test_max_nan_bug():
 @pytest.mark.parametrize("klass", [Series, DataFrame])
 @pytest.mark.parametrize("test_mi", [True, False])
 @pytest.mark.parametrize("dtype", ['int', 'float'])
-def test_groupby_mad(self, klass, test_mi, dtype):
+def test_groupby_mad(klass, test_mi, dtype):
     vals = np.array(range(10)).astype(dtype)
     df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': vals})
 
@@ -503,7 +504,7 @@ def test_groupby_mad(self, klass, test_mi, dtype):
 
 @pytest.mark.parametrize("vals", [
     ['foo'] * 10, [True] * 10])
-def test_groupby_mad_raises(self, vals):
+def test_groupby_mad_raises(vals):
     df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': vals})
 
     with tm.assert_raises_regex(DataError,
@@ -511,7 +512,7 @@ def test_groupby_mad_raises(self, vals):
         df.groupby('key').mad()
 
 
-def test_groupby_mad_skipna(self):
+def test_groupby_mad_skipna():
     df = DataFrame({'key': ['a'] * 5 + ['b'] * 5, 'val': range(10)})
     with tm.assert_raises_regex(
             NotImplementedError, "'skipna=False' not yet implemented"):

From 5cab1eb6c10ab071d92e02946048f048cfe7158f Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 6 Aug 2018 20:57:35 -0700
Subject: [PATCH 7/7] LINT fixup

---
 pandas/tests/groupby/test_function.py | 1 +
 pandas/tests/groupby/test_groupby.py  | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index b337fa8e5f5fa..69b87b0689f23 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -470,6 +470,7 @@ def test_max_nan_bug():
     tm.assert_frame_equal(r, e)
     assert not r['File'].isna().any()
 
+
 @pytest.mark.parametrize("klass", [Series, DataFrame])
 @pytest.mark.parametrize("test_mi", [True, False])
 @pytest.mark.parametrize("dtype", ['int', 'float'])
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index ddf5ba3460d83..ca3ed354809a1 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -10,7 +10,6 @@
 from pandas import (date_range, Timestamp,
                     Index, MultiIndex, DataFrame, Series,
                     Panel, DatetimeIndex, read_csv)
-from pandas.core.base import DataError
 from pandas.errors import PerformanceWarning
 from pandas.util.testing import (assert_frame_equal,
                                  assert_series_equal, assert_almost_equal)