From 5f6cbf88d47e7e777b8be855bb950c4ed722cd53 Mon Sep 17 00:00:00 2001
From: Chris Reynolds <chris@britecore.com>
Date: Wed, 21 Jan 2015 22:26:31 -0600
Subject: [PATCH] BUG: Fixes GH9311 groupby on datetime64

datetime64 columns were changing at the nano-second scale when
applying a groupby aggregator.
---
 doc/source/whatsnew/v0.16.0.txt |    2 +
 pandas/core/groupby.py          |   60 +-
 pandas/core/internals.py        |    5 +-
 pandas/src/generate_code.py     |  125 ++-
 pandas/src/generated.pyx        | 1871 +++++++++++++++++++------------
 pandas/tests/test_groupby.py    |   71 ++
 6 files changed, 1391 insertions(+), 743 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
index 9e1546f5e50a9..cd7cdbb645686 100644
--- a/doc/source/whatsnew/v0.16.0.txt
+++ b/doc/source/whatsnew/v0.16.0.txt
@@ -187,6 +187,8 @@ Bug Fixes
 - Bug in the returned ``Series.dt.components`` index was reset to the default index (:issue:`9247`)
 - Bug in ``Categorical.__getitem__/__setitem__`` with listlike input getting incorrect results from indexer coercion (:issue:`9469`)
 - Bug in partial setting with a DatetimeIndex (:issue:`9478`)
+- Bug in groupby for integer and datetime64 columns when applying an aggregator that caused the value to be
+  changed when the number was sufficiently large (:issue:`9311`, :issue:`6620`)
 - Fixed bug in ``to_sql`` when mapping a ``Timestamp`` object column (datetime
   column with timezone info) to the according sqlalchemy type (:issue:`9085`).
 - Fixed bug in ``to_sql`` ``dtype`` argument not accepting an instantiated
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 440c0966ac066..9d5fde5600be3 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -24,7 +24,8 @@
 from pandas.core.common import(_possibly_downcast_to_dtype, isnull,
                                notnull, _DATELIKE_DTYPES, is_numeric_dtype,
                                is_timedelta64_dtype, is_datetime64_dtype,
-                               is_categorical_dtype, _values_from_object)
+                               is_categorical_dtype, _values_from_object,
+                               _is_datetime_or_timedelta_dtype, is_bool_dtype)
 from pandas.core.config import option_context
 import pandas.lib as lib
 from pandas.lib import Timestamp
@@ -1444,7 +1445,9 @@ def get_func(fname):
                 f = getattr(_algos, "%s_%s" % (fname, dtype_str), None)
                 if f is not None:
                     return f
-            return getattr(_algos, fname, None)
+
+            if dtype_str == 'float64':
+                return getattr(_algos, fname, None)
 
         ftype = self._cython_functions[how]
 
@@ -1471,7 +1474,6 @@ def wrapper(*args, **kwargs):
         return func, dtype_str
 
     def aggregate(self, values, how, axis=0):
-
         arity = self._cython_arity.get(how, 1)
 
         vdim = values.ndim
@@ -1487,27 +1489,44 @@ def aggregate(self, values, how, axis=0):
                 raise NotImplementedError
             out_shape = (self.ngroups,) + values.shape[1:]
 
-        if is_numeric_dtype(values.dtype):
-            values = com.ensure_float(values)
-            is_numeric = True
-            out_dtype = 'f%d' % values.dtype.itemsize
+        is_numeric = is_numeric_dtype(values.dtype)
+
+        if _is_datetime_or_timedelta_dtype(values.dtype):
+            values = values.view('int64')
+        elif is_bool_dtype(values.dtype):
+            values = _algos.ensure_float64(values)
+        elif com.is_integer_dtype(values):
+            values = values.astype('int64', copy=False)
+        elif is_numeric:
+            values = _algos.ensure_float64(values)
         else:
-            is_numeric = issubclass(values.dtype.type, (np.datetime64,
-                                                        np.timedelta64))
+            values = values.astype(object)
+
+        try:
+            agg_func, dtype_str = self._get_aggregate_function(how, values)
+        except NotImplementedError:
             if is_numeric:
-                out_dtype = 'float64'
-                values = values.view('int64')
+                values = _algos.ensure_float64(values)
+                agg_func, dtype_str = self._get_aggregate_function(how, values)
             else:
-                out_dtype = 'object'
-                values = values.astype(object)
+                raise
+
+        if is_numeric:
+            out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize)
+        else:
+            out_dtype = 'object'
 
         # will be filled in Cython function
         result = np.empty(out_shape, dtype=out_dtype)
-
         result.fill(np.nan)
         counts = np.zeros(self.ngroups, dtype=np.int64)
 
-        result = self._aggregate(result, counts, values, how, is_numeric)
+        result = self._aggregate(result, counts, values, agg_func, is_numeric)
+
+        if com.is_integer_dtype(result):
+            if len(result[result == tslib.iNaT]) > 0:
+                result = result.astype('float64')
+                result[result == tslib.iNaT] = np.nan
 
         if self._filter_empty_groups and not counts.all():
             if result.ndim == 2:
@@ -1535,9 +1554,7 @@ def aggregate(self, values, how, axis=0):
 
         return result, names
 
-    def _aggregate(self, result, counts, values, how, is_numeric):
-        agg_func, dtype = self._get_aggregate_function(how, values)
-
+    def _aggregate(self, result, counts, values, agg_func, is_numeric):
         comp_ids, _, ngroups = self.group_info
         if values.ndim > 3:
             # punting for now
@@ -1796,9 +1813,7 @@ def size(self):
         'ohlc': lambda *args: ['open', 'high', 'low', 'close']
     }
 
-    def _aggregate(self, result, counts, values, how, is_numeric=True):
-
-        agg_func, dtype = self._get_aggregate_function(how, values)
+    def _aggregate(self, result, counts, values, agg_func, is_numeric=True):
 
         if values.ndim > 3:
             # punting for now
@@ -2535,9 +2550,6 @@ def _cython_agg_blocks(self, how, numeric_only=True):
 
             values = block._try_operate(block.values)
 
-            if block.is_numeric:
-                values = _algos.ensure_float64(values)
-
             result, _ = self.grouper.aggregate(values, how, axis=agg_axis)
 
             # see if we can cast the block back to the original dtype
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index f4abe05097cff..6cf7fa5888539 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1811,10 +1811,7 @@ def _try_coerce_args(self, values, other):
     def _try_coerce_result(self, result):
         """ reverse of try_coerce_args """
         if isinstance(result, np.ndarray):
-            if result.dtype == 'i8':
-                result = tslib.array_to_datetime(
-                    result.astype(object).ravel()).reshape(result.shape)
-            elif result.dtype.kind in ['i', 'f', 'O']:
+            if result.dtype.kind in ['i', 'f', 'O']:
                 result = result.astype('M8[ns]')
         elif isinstance(result, (np.integer, np.datetime64)):
             result = lib.Timestamp(result)
diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index d04f55bb19fff..575fcf386f570 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -3,6 +3,9 @@
 # don't introduce a pandas/pandas.compat import
 # or we get a bootstrapping problem
 from StringIO import StringIO
+import numpy as np
+
+_int64_max = np.iinfo(np.int64).max
 
 header = """
 cimport numpy as np
@@ -680,7 +683,7 @@ def group_last_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     for i in range(len(counts)):
         for j in range(K):
             if nobs[i, j] == 0:
-                out[i, j] = nan
+                out[i, j] = %(nan_val)s
             else:
                 out[i, j] = resx[i, j]
 """
@@ -726,7 +729,7 @@ def group_last_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     for i in range(ngroups):
         for j in range(K):
             if nobs[i, j] == 0:
-                out[i, j] = nan
+                out[i, j] = %(nan_val)s
             else:
                 out[i, j] = resx[i, j]
 """
@@ -773,7 +776,7 @@ def group_nth_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     for i in range(ngroups):
         for j in range(K):
             if nobs[i, j] == 0:
-                out[i, j] = nan
+                out[i, j] = %(nan_val)s
             else:
                 out[i, j] = resx[i, j]
 """
@@ -819,7 +822,7 @@ def group_nth_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     for i in range(len(counts)):
         for j in range(K):
             if nobs[i, j] == 0:
-                out[i, j] = nan
+                out[i, j] = %(nan_val)s
             else:
                 out[i, j] = resx[i, j]
 """
@@ -1278,7 +1281,7 @@ def group_min_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     nobs = np.zeros_like(out)
 
     minx = np.empty_like(out)
-    minx.fill(np.inf)
+    minx.fill(%(inf_val)s)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
@@ -1319,7 +1322,7 @@ def group_min_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     for i in range(ngroups):
         for j in range(K):
             if nobs[i, j] == 0:
-                out[i, j] = nan
+                out[i, j] = %(nan_val)s
             else:
                 out[i, j] = minx[i, j]
 """
@@ -1344,7 +1347,7 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     nobs = np.zeros_like(out)
 
     maxx = np.empty_like(out)
-    maxx.fill(-np.inf)
+    maxx.fill(-%(inf_val)s)
 
     N, K = (<object> values).shape
 
@@ -1381,7 +1384,7 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     for i in range(len(counts)):
         for j in range(K):
             if nobs[i, j] == 0:
-                out[i, j] = nan
+                out[i, j] = %(nan_val)s
             else:
                 out[i, j] = maxx[i, j]
 """
@@ -1402,7 +1405,7 @@ def group_max_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
 
     nobs = np.zeros_like(out)
     maxx = np.empty_like(out)
-    maxx.fill(-np.inf)
+    maxx.fill(-%(inf_val)s)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
@@ -1443,7 +1446,7 @@ def group_max_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     for i in range(ngroups):
         for j in range(K):
             if nobs[i, j] == 0:
-                out[i, j] = nan
+                out[i, j] = %(nan_val)s
             else:
                 out[i, j] = maxx[i, j]
 """
@@ -1469,7 +1472,7 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     nobs = np.zeros_like(out)
 
     minx = np.empty_like(out)
-    minx.fill(np.inf)
+    minx.fill(%(inf_val)s)
 
     N, K = (<object> values).shape
 
@@ -1506,7 +1509,7 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
     for i in range(len(counts)):
         for j in range(K):
             if nobs[i, j] == 0:
-                out[i, j] = nan
+                out[i, j] = %(nan_val)s
             else:
                 out[i, j] = minx[i, j]
 """
@@ -2286,6 +2289,70 @@ def generate_put_template(template, use_ints=True, use_floats=True,
         output.write(func)
     return output.getvalue()
 
+def generate_put_min_max_template(template, use_ints=True, use_floats=True,
+                                  use_objects=False, use_datelikes=False):
+    floats_list = [
+        ('float64', 'float64_t', 'nan', 'np.inf'),
+        ('float32', 'float32_t', 'nan', 'np.inf'),
+    ]
+    ints_list = [
+        ('int64', 'int64_t', 'iNaT', _int64_max),
+    ]
+    date_like_list = [
+        ('int64', 'int64_t', 'iNaT', _int64_max),
+    ]
+    object_list = [('object', 'object', 'nan', 'np.inf')]
+    function_list = []
+    if use_floats:
+        function_list.extend(floats_list)
+    if use_ints:
+        function_list.extend(ints_list)
+    if use_objects:
+        function_list.extend(object_list)
+    if use_datelikes:
+        function_list.extend(date_like_list)
+
+    output = StringIO()
+    for name, dest_type, nan_val, inf_val in function_list:
+        func = template % {'name': name,
+                           'dest_type2': dest_type,
+                           'nan_val': nan_val,
+                           'inf_val': inf_val}
+        output.write(func)
+    return output.getvalue()
+
+def generate_put_selection_template(template, use_ints=True, use_floats=True,
+                                    use_objects=False, use_datelikes=False):
+    floats_list = [
+        ('float64', 'float64_t', 'float64_t', 'nan'),
+        ('float32', 'float32_t', 'float32_t', 'nan'),
+    ]
+    ints_list = [
+        ('int64', 'int64_t', 'int64_t', 'iNaT'),
+    ]
+    date_like_list = [
+        ('int64', 'int64_t', 'int64_t', 'iNaT'),
+    ]
+    object_list = [('object', 'object', 'object', 'nan')]
+    function_list = []
+    if use_floats:
+        function_list.extend(floats_list)
+    if use_ints:
+        function_list.extend(ints_list)
+    if use_objects:
+        function_list.extend(object_list)
+    if use_datelikes:
+        function_list.extend(date_like_list)
+
+    output = StringIO()
+    for name, c_type, dest_type, nan_val in function_list:
+        func = template % {'name': name,
+                           'c_type': c_type,
+                           'dest_type2': dest_type,
+                           'nan_val': nan_val}
+        output.write(func)
+    return output.getvalue()
+
 def generate_take_template(template, exclude=None):
     # name, dest, ctypein, ctypeout, preval, postval, cancopy
     function_list = [
@@ -2347,11 +2414,8 @@ def generate_from_template(template, exclude=None):
     return output.getvalue()
 
 put_2d = [diff_2d_template]
-groupbys = [group_last_template,
-            group_last_bin_template,
-            group_nth_template,
-            group_nth_bin_template,
-            group_add_template,
+
+groupbys = [group_add_template,
             group_add_bin_template,
             group_prod_template,
             group_prod_bin_template,
@@ -2359,12 +2423,18 @@ def generate_from_template(template, exclude=None):
             group_var_bin_template,
             group_mean_template,
             group_mean_bin_template,
-            group_min_template,
-            group_min_bin_template,
-            group_max_template,
-            group_max_bin_template,
             group_ohlc_template]
 
+groupby_selection = [group_last_template,
+                     group_last_bin_template,
+                     group_nth_template,
+                     group_nth_bin_template]
+
+groupby_min_max = [group_min_template,
+                   group_min_bin_template,
+                   group_max_template,
+                   group_max_bin_template]
+
 groupby_count = [group_count_template, group_count_bin_template]
 
 templates_1d = [map_indices_template,
@@ -2407,9 +2477,18 @@ def generate_take_cython_file(path='generated.pyx'):
         for template in groupbys:
             print(generate_put_template(template, use_ints=False), file=f)
 
+        for template in groupby_selection:
+            print(generate_put_selection_template(template, use_ints=True),
+                  file=f)
+
+        for template in groupby_min_max:
+            print(generate_put_min_max_template(template, use_ints=True),
+                  file=f)
+
         for template in groupby_count:
-            print(generate_put_template(template, use_ints=False,
-                                        use_datelikes=True, use_objects=True),
+            print(generate_put_selection_template(template, use_ints=True,
+                                                  use_datelikes=True,
+                                                  use_objects=True),
                   file=f)
 
         # for template in templates_1d_datetime:
diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx
index 01c80518ca21a..cab3a84f6ffe8 100644
--- a/pandas/src/generated.pyx
+++ b/pandas/src/generated.pyx
@@ -4845,391 +4845,487 @@ def diff_2d_int64(ndarray[int64_t, ndim=2] arr,
                 for j in range(start, stop):
                     out[i, j] = arr[i, j] - arr[i, j - periods]
 
+@cython.boundscheck(False)
 @cython.wraparound(False)
-@cython.wraparound(False)
-def group_last_float64(ndarray[float64_t, ndim=2] out,
-               ndarray[int64_t] counts,
-               ndarray[float64_t, ndim=2] values,
-               ndarray[int64_t] labels):
+def group_add_float64(ndarray[float64_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[float64_t, ndim=2] values,
+              ndarray[int64_t] labels):
     '''
     Only aggregates on axis=0
     '''
     cdef:
         Py_ssize_t i, j, N, K, lab
         float64_t val, count
-        ndarray[float64_t, ndim=2] resx
-        ndarray[int64_t, ndim=2] nobs
+        ndarray[float64_t, ndim=2] sumx, nobs
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
 
-    nobs = np.zeros((<object> out).shape, dtype=np.int64)
-    resx = np.empty_like(out)
+    nobs = np.zeros_like(out)
+    sumx = np.zeros_like(out)
 
     N, K = (<object> values).shape
 
-    for i in range(N):
-        lab = labels[i]
-        if lab < 0:
-            continue
+    if K > 1:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
 
-        counts[lab] += 1
-        for j in range(K):
-            val = values[i, j]
+            counts[lab] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                # not nan
+                if val == val:
+                    nobs[lab, j] += 1
+                    sumx[lab, j] += val
+    else:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+            val = values[i, 0]
 
             # not nan
             if val == val:
-                nobs[lab, j] += 1
-                resx[lab, j] = val
+                nobs[lab, 0] += 1
+                sumx[lab, 0] += val
 
     for i in range(len(counts)):
         for j in range(K):
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = resx[i, j]
-@cython.wraparound(False)
+                out[i, j] = sumx[i, j]
+@cython.boundscheck(False)
 @cython.wraparound(False)
-def group_last_float32(ndarray[float32_t, ndim=2] out,
-               ndarray[int64_t] counts,
-               ndarray[float32_t, ndim=2] values,
-               ndarray[int64_t] labels):
+def group_add_float32(ndarray[float32_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[float32_t, ndim=2] values,
+              ndarray[int64_t] labels):
     '''
     Only aggregates on axis=0
     '''
     cdef:
         Py_ssize_t i, j, N, K, lab
         float32_t val, count
-        ndarray[float32_t, ndim=2] resx
-        ndarray[int64_t, ndim=2] nobs
+        ndarray[float32_t, ndim=2] sumx, nobs
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
 
-    nobs = np.zeros((<object> out).shape, dtype=np.int64)
-    resx = np.empty_like(out)
+    nobs = np.zeros_like(out)
+    sumx = np.zeros_like(out)
 
     N, K = (<object> values).shape
 
-    for i in range(N):
-        lab = labels[i]
-        if lab < 0:
-            continue
+    if K > 1:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
 
-        counts[lab] += 1
-        for j in range(K):
-            val = values[i, j]
+            counts[lab] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                # not nan
+                if val == val:
+                    nobs[lab, j] += 1
+                    sumx[lab, j] += val
+    else:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+            val = values[i, 0]
 
             # not nan
             if val == val:
-                nobs[lab, j] += 1
-                resx[lab, j] = val
+                nobs[lab, 0] += 1
+                sumx[lab, 0] += val
 
     for i in range(len(counts)):
         for j in range(K):
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = resx[i, j]
+                out[i, j] = sumx[i, j]
 
+@cython.boundscheck(False)
 @cython.wraparound(False)
-@cython.wraparound(False)
-def group_last_bin_float64(ndarray[float64_t, ndim=2] out,
-                   ndarray[int64_t] counts,
-                   ndarray[float64_t, ndim=2] values,
-                   ndarray[int64_t] bins):
+def group_add_bin_float64(ndarray[float64_t, ndim=2] out,
+                  ndarray[int64_t] counts,
+                  ndarray[float64_t, ndim=2] values,
+                  ndarray[int64_t] bins):
     '''
     Only aggregates on axis=0
     '''
     cdef:
-        Py_ssize_t i, j, N, K, ngroups, b
+        Py_ssize_t i, j, N, K, ngroups, b, nbins
         float64_t val, count
-        ndarray[float64_t, ndim=2] resx, nobs
+        ndarray[float64_t, ndim=2] sumx, nobs
 
     nobs = np.zeros_like(out)
-    resx = np.empty_like(out)
+    sumx = np.zeros_like(out)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
     else:
         ngroups = len(bins) + 1
-
     N, K = (<object> values).shape
 
     b = 0
-    for i in range(N):
-        while b < ngroups - 1 and i >= bins[b]:
-            b += 1
+    if K > 1:
+        for i in range(N):
+            while b < ngroups - 1 and i >= bins[b]:
+                b += 1
 
-        counts[b] += 1
-        for j in range(K):
-            val = values[i, j]
+            counts[b] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                # not nan
+                if val == val:
+                    nobs[b, j] += 1
+                    sumx[b, j] += val
+    else:
+        for i in range(N):
+            while b < ngroups - 1 and i >= bins[b]:
+                b += 1
+
+            counts[b] += 1
+            val = values[i, 0]
 
             # not nan
             if val == val:
-                nobs[b, j] += 1
-                resx[b, j] = val
+                nobs[b, 0] += 1
+                sumx[b, 0] += val
 
     for i in range(ngroups):
         for j in range(K):
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = resx[i, j]
-@cython.wraparound(False)
+                out[i, j] = sumx[i, j]
+@cython.boundscheck(False)
 @cython.wraparound(False)
-def group_last_bin_float32(ndarray[float32_t, ndim=2] out,
-                   ndarray[int64_t] counts,
-                   ndarray[float32_t, ndim=2] values,
-                   ndarray[int64_t] bins):
+def group_add_bin_float32(ndarray[float32_t, ndim=2] out,
+                  ndarray[int64_t] counts,
+                  ndarray[float32_t, ndim=2] values,
+                  ndarray[int64_t] bins):
     '''
     Only aggregates on axis=0
     '''
     cdef:
-        Py_ssize_t i, j, N, K, ngroups, b
+        Py_ssize_t i, j, N, K, ngroups, b, nbins
         float32_t val, count
-        ndarray[float32_t, ndim=2] resx, nobs
+        ndarray[float32_t, ndim=2] sumx, nobs
 
     nobs = np.zeros_like(out)
-    resx = np.empty_like(out)
+    sumx = np.zeros_like(out)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
     else:
         ngroups = len(bins) + 1
-
     N, K = (<object> values).shape
 
     b = 0
-    for i in range(N):
-        while b < ngroups - 1 and i >= bins[b]:
-            b += 1
+    if K > 1:
+        for i in range(N):
+            while b < ngroups - 1 and i >= bins[b]:
+                b += 1
 
-        counts[b] += 1
-        for j in range(K):
-            val = values[i, j]
+            counts[b] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                # not nan
+                if val == val:
+                    nobs[b, j] += 1
+                    sumx[b, j] += val
+    else:
+        for i in range(N):
+            while b < ngroups - 1 and i >= bins[b]:
+                b += 1
+
+            counts[b] += 1
+            val = values[i, 0]
 
             # not nan
             if val == val:
-                nobs[b, j] += 1
-                resx[b, j] = val
+                nobs[b, 0] += 1
+                sumx[b, 0] += val
 
     for i in range(ngroups):
         for j in range(K):
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = resx[i, j]
+                out[i, j] = sumx[i, j]
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_nth_float64(ndarray[float64_t, ndim=2] out,
-              ndarray[int64_t] counts,
-              ndarray[float64_t, ndim=2] values,
-              ndarray[int64_t] labels, int64_t rank):
+def group_prod_float64(ndarray[float64_t, ndim=2] out,
+               ndarray[int64_t] counts,
+               ndarray[float64_t, ndim=2] values,
+               ndarray[int64_t] labels):
     '''
     Only aggregates on axis=0
     '''
     cdef:
         Py_ssize_t i, j, N, K, lab
         float64_t val, count
-        ndarray[float64_t, ndim=2] resx
-        ndarray[int64_t, ndim=2] nobs
+        ndarray[float64_t, ndim=2] prodx, nobs
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
 
-    nobs = np.zeros((<object> out).shape, dtype=np.int64)
-    resx = np.empty_like(out)
+    nobs = np.zeros_like(out)
+    prodx = np.ones_like(out)
 
     N, K = (<object> values).shape
 
-    for i in range(N):
-        lab = labels[i]
-        if lab < 0:
-            continue
+    if K > 1:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
 
-        counts[lab] += 1
-        for j in range(K):
-            val = values[i, j]
+            counts[lab] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                # not nan
+                if val == val:
+                    nobs[lab, j] += 1
+                    prodx[lab, j] *= val
+    else:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+            val = values[i, 0]
 
             # not nan
             if val == val:
-                nobs[lab, j] += 1
-                if nobs[lab, j] == rank:
-                    resx[lab, j] = val
+                nobs[lab, 0] += 1
+                prodx[lab, 0] *= val
 
     for i in range(len(counts)):
         for j in range(K):
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = resx[i, j]
+                out[i, j] = prodx[i, j]
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_nth_float32(ndarray[float32_t, ndim=2] out,
-              ndarray[int64_t] counts,
-              ndarray[float32_t, ndim=2] values,
-              ndarray[int64_t] labels, int64_t rank):
+def group_prod_float32(ndarray[float32_t, ndim=2] out,
+               ndarray[int64_t] counts,
+               ndarray[float32_t, ndim=2] values,
+               ndarray[int64_t] labels):
     '''
     Only aggregates on axis=0
     '''
     cdef:
         Py_ssize_t i, j, N, K, lab
         float32_t val, count
-        ndarray[float32_t, ndim=2] resx
-        ndarray[int64_t, ndim=2] nobs
+        ndarray[float32_t, ndim=2] prodx, nobs
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
 
-    nobs = np.zeros((<object> out).shape, dtype=np.int64)
-    resx = np.empty_like(out)
+    nobs = np.zeros_like(out)
+    prodx = np.ones_like(out)
 
     N, K = (<object> values).shape
 
-    for i in range(N):
-        lab = labels[i]
-        if lab < 0:
-            continue
+    if K > 1:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
 
-        counts[lab] += 1
-        for j in range(K):
-            val = values[i, j]
+            counts[lab] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                # not nan
+                if val == val:
+                    nobs[lab, j] += 1
+                    prodx[lab, j] *= val
+    else:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+            val = values[i, 0]
 
             # not nan
             if val == val:
-                nobs[lab, j] += 1
-                if nobs[lab, j] == rank:
-                    resx[lab, j] = val
+                nobs[lab, 0] += 1
+                prodx[lab, 0] *= val
 
     for i in range(len(counts)):
         for j in range(K):
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = resx[i, j]
+                out[i, j] = prodx[i, j]
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_nth_bin_float64(ndarray[float64_t, ndim=2] out,
+def group_prod_bin_float64(ndarray[float64_t, ndim=2] out,
                   ndarray[int64_t] counts,
                   ndarray[float64_t, ndim=2] values,
-                  ndarray[int64_t] bins, int64_t rank):
+                  ndarray[int64_t] bins):
     '''
     Only aggregates on axis=0
     '''
     cdef:
         Py_ssize_t i, j, N, K, ngroups, b
         float64_t val, count
-        ndarray[float64_t, ndim=2] resx, nobs
+        ndarray[float64_t, ndim=2] prodx, nobs
 
     nobs = np.zeros_like(out)
-    resx = np.empty_like(out)
+    prodx = np.ones_like(out)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
     else:
         ngroups = len(bins) + 1
-
     N, K = (<object> values).shape
 
     b = 0
-    for i in range(N):
-        while b < ngroups - 1 and i >= bins[b]:
-            b += 1
+    if K > 1:
+        for i in range(N):
+            while b < ngroups - 1 and i >= bins[b]:
+                b += 1
 
-        counts[b] += 1
-        for j in range(K):
-            val = values[i, j]
+            counts[b] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                # not nan
+                if val == val:
+                    nobs[b, j] += 1
+                    prodx[b, j] *= val
+    else:
+        for i in range(N):
+            while b < ngroups - 1 and i >= bins[b]:
+                b += 1
+
+            counts[b] += 1
+            val = values[i, 0]
 
             # not nan
             if val == val:
-                nobs[b, j] += 1
-                if nobs[b, j] == rank:
-                    resx[b, j] = val
+                nobs[b, 0] += 1
+                prodx[b, 0] *= val
 
     for i in range(ngroups):
         for j in range(K):
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = resx[i, j]
+                out[i, j] = prodx[i, j]
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_nth_bin_float32(ndarray[float32_t, ndim=2] out,
+def group_prod_bin_float32(ndarray[float32_t, ndim=2] out,
                   ndarray[int64_t] counts,
                   ndarray[float32_t, ndim=2] values,
-                  ndarray[int64_t] bins, int64_t rank):
+                  ndarray[int64_t] bins):
     '''
     Only aggregates on axis=0
     '''
     cdef:
         Py_ssize_t i, j, N, K, ngroups, b
         float32_t val, count
-        ndarray[float32_t, ndim=2] resx, nobs
+        ndarray[float32_t, ndim=2] prodx, nobs
 
     nobs = np.zeros_like(out)
-    resx = np.empty_like(out)
+    prodx = np.ones_like(out)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
     else:
         ngroups = len(bins) + 1
-
     N, K = (<object> values).shape
 
     b = 0
-    for i in range(N):
-        while b < ngroups - 1 and i >= bins[b]:
-            b += 1
+    if K > 1:
+        for i in range(N):
+            while b < ngroups - 1 and i >= bins[b]:
+                b += 1
 
-        counts[b] += 1
-        for j in range(K):
-            val = values[i, j]
+            counts[b] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                # not nan
+                if val == val:
+                    nobs[b, j] += 1
+                    prodx[b, j] *= val
+    else:
+        for i in range(N):
+            while b < ngroups - 1 and i >= bins[b]:
+                b += 1
+
+            counts[b] += 1
+            val = values[i, 0]
 
             # not nan
             if val == val:
-                nobs[b, j] += 1
-                if nobs[b, j] == rank:
-                    resx[b, j] = val
+                nobs[b, 0] += 1
+                prodx[b, 0] *= val
 
     for i in range(ngroups):
         for j in range(K):
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = resx[i, j]
+                out[i, j] = prodx[i, j]
 
-@cython.boundscheck(False)
 @cython.wraparound(False)
-def group_add_float64(ndarray[float64_t, ndim=2] out,
+@cython.boundscheck(False)
+def group_var_float64(ndarray[float64_t, ndim=2] out,
               ndarray[int64_t] counts,
               ndarray[float64_t, ndim=2] values,
               ndarray[int64_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
     cdef:
         Py_ssize_t i, j, N, K, lab
-        float64_t val, count
-        ndarray[float64_t, ndim=2] sumx, nobs
+        float64_t val, ct
+        ndarray[float64_t, ndim=2] nobs, sumx, sumxx
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
 
     nobs = np.zeros_like(out)
     sumx = np.zeros_like(out)
+    sumxx = np.zeros_like(out)
 
     N, K = (<object> values).shape
 
     if K > 1:
         for i in range(N):
+
             lab = labels[i]
             if lab < 0:
                 continue
 
             counts[lab] += 1
+
             for j in range(K):
                 val = values[i, j]
 
@@ -5237,55 +5333,60 @@ def group_add_float64(ndarray[float64_t, ndim=2] out,
                 if val == val:
                     nobs[lab, j] += 1
                     sumx[lab, j] += val
+                    sumxx[lab, j] += val * val
     else:
         for i in range(N):
+
             lab = labels[i]
             if lab < 0:
                 continue
 
             counts[lab] += 1
             val = values[i, 0]
-
             # not nan
             if val == val:
                 nobs[lab, 0] += 1
                 sumx[lab, 0] += val
+                sumxx[lab, 0] += val * val
+
 
     for i in range(len(counts)):
         for j in range(K):
-            if nobs[i, j] == 0:
+            ct = nobs[i, j]
+            if ct < 2:
                 out[i, j] = nan
             else:
-                out[i, j] = sumx[i, j]
-@cython.boundscheck(False)
+                out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) /
+                             (ct * ct - ct))
 @cython.wraparound(False)
-def group_add_float32(ndarray[float32_t, ndim=2] out,
+@cython.boundscheck(False)
+def group_var_float32(ndarray[float32_t, ndim=2] out,
               ndarray[int64_t] counts,
               ndarray[float32_t, ndim=2] values,
               ndarray[int64_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
     cdef:
         Py_ssize_t i, j, N, K, lab
-        float32_t val, count
-        ndarray[float32_t, ndim=2] sumx, nobs
+        float32_t val, ct
+        ndarray[float32_t, ndim=2] nobs, sumx, sumxx
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
 
     nobs = np.zeros_like(out)
     sumx = np.zeros_like(out)
+    sumxx = np.zeros_like(out)
 
     N, K = (<object> values).shape
 
     if K > 1:
         for i in range(N):
+
             lab = labels[i]
             if lab < 0:
                 continue
 
             counts[lab] += 1
+
             for j in range(K):
                 val = values[i, j]
 
@@ -5293,48 +5394,53 @@ def group_add_float32(ndarray[float32_t, ndim=2] out,
                 if val == val:
                     nobs[lab, j] += 1
                     sumx[lab, j] += val
+                    sumxx[lab, j] += val * val
     else:
         for i in range(N):
+
             lab = labels[i]
             if lab < 0:
                 continue
 
             counts[lab] += 1
             val = values[i, 0]
-
             # not nan
             if val == val:
                 nobs[lab, 0] += 1
                 sumx[lab, 0] += val
+                sumxx[lab, 0] += val * val
+
 
     for i in range(len(counts)):
         for j in range(K):
-            if nobs[i, j] == 0:
+            ct = nobs[i, j]
+            if ct < 2:
                 out[i, j] = nan
             else:
-                out[i, j] = sumx[i, j]
+                out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) /
+                             (ct * ct - ct))
 
-@cython.boundscheck(False)
 @cython.wraparound(False)
-def group_add_bin_float64(ndarray[float64_t, ndim=2] out,
+@cython.boundscheck(False)
+def group_var_bin_float64(ndarray[float64_t, ndim=2] out,
                   ndarray[int64_t] counts,
                   ndarray[float64_t, ndim=2] values,
                   ndarray[int64_t] bins):
-    '''
-    Only aggregates on axis=0
-    '''
+
     cdef:
-        Py_ssize_t i, j, N, K, ngroups, b, nbins
-        float64_t val, count
-        ndarray[float64_t, ndim=2] sumx, nobs
+        Py_ssize_t i, j, N, K, ngroups, b
+        float64_t val, ct
+        ndarray[float64_t, ndim=2] nobs, sumx, sumxx
 
     nobs = np.zeros_like(out)
     sumx = np.zeros_like(out)
+    sumxx = np.zeros_like(out)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
     else:
         ngroups = len(bins) + 1
+
     N, K = (<object> values).shape
 
     b = 0
@@ -5344,6 +5450,7 @@ def group_add_bin_float64(ndarray[float64_t, ndim=2] out,
                 b += 1
 
             counts[b] += 1
+
             for j in range(K):
                 val = values[i, j]
 
@@ -5351,6 +5458,7 @@ def group_add_bin_float64(ndarray[float64_t, ndim=2] out,
                 if val == val:
                     nobs[b, j] += 1
                     sumx[b, j] += val
+                    sumxx[b, j] += val * val
     else:
         for i in range(N):
             while b < ngroups - 1 and i >= bins[b]:
@@ -5363,34 +5471,37 @@ def group_add_bin_float64(ndarray[float64_t, ndim=2] out,
             if val == val:
                 nobs[b, 0] += 1
                 sumx[b, 0] += val
+                sumxx[b, 0] += val * val
 
     for i in range(ngroups):
         for j in range(K):
-            if nobs[i, j] == 0:
+            ct = nobs[i, j]
+            if ct < 2:
                 out[i, j] = nan
             else:
-                out[i, j] = sumx[i, j]
-@cython.boundscheck(False)
+                out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) /
+                             (ct * ct - ct))
 @cython.wraparound(False)
-def group_add_bin_float32(ndarray[float32_t, ndim=2] out,
+@cython.boundscheck(False)
+def group_var_bin_float32(ndarray[float32_t, ndim=2] out,
                   ndarray[int64_t] counts,
                   ndarray[float32_t, ndim=2] values,
                   ndarray[int64_t] bins):
-    '''
-    Only aggregates on axis=0
-    '''
+
     cdef:
-        Py_ssize_t i, j, N, K, ngroups, b, nbins
-        float32_t val, count
-        ndarray[float32_t, ndim=2] sumx, nobs
+        Py_ssize_t i, j, N, K, ngroups, b
+        float32_t val, ct
+        ndarray[float32_t, ndim=2] nobs, sumx, sumxx
 
     nobs = np.zeros_like(out)
     sumx = np.zeros_like(out)
+    sumxx = np.zeros_like(out)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
     else:
         ngroups = len(bins) + 1
+
     N, K = (<object> values).shape
 
     b = 0
@@ -5400,6 +5511,7 @@ def group_add_bin_float32(ndarray[float32_t, ndim=2] out,
                 b += 1
 
             counts[b] += 1
+
             for j in range(K):
                 val = values[i, j]
 
@@ -5407,6 +5519,7 @@ def group_add_bin_float32(ndarray[float32_t, ndim=2] out,
                 if val == val:
                     nobs[b, j] += 1
                     sumx[b, j] += val
+                    sumxx[b, j] += val * val
     else:
         for i in range(N):
             while b < ngroups - 1 and i >= bins[b]:
@@ -5419,33 +5532,33 @@ def group_add_bin_float32(ndarray[float32_t, ndim=2] out,
             if val == val:
                 nobs[b, 0] += 1
                 sumx[b, 0] += val
+                sumxx[b, 0] += val * val
 
     for i in range(ngroups):
         for j in range(K):
-            if nobs[i, j] == 0:
+            ct = nobs[i, j]
+            if ct < 2:
                 out[i, j] = nan
             else:
-                out[i, j] = sumx[i, j]
+                out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) /
+                             (ct * ct - ct))
 
-@cython.boundscheck(False)
 @cython.wraparound(False)
-def group_prod_float64(ndarray[float64_t, ndim=2] out,
+@cython.boundscheck(False)
+def group_mean_float64(ndarray[float64_t, ndim=2] out,
                ndarray[int64_t] counts,
                ndarray[float64_t, ndim=2] values,
                ndarray[int64_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
     cdef:
         Py_ssize_t i, j, N, K, lab
         float64_t val, count
-        ndarray[float64_t, ndim=2] prodx, nobs
+        ndarray[float64_t, ndim=2] sumx, nobs
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
 
     nobs = np.zeros_like(out)
-    prodx = np.ones_like(out)
+    sumx = np.zeros_like(out)
 
     N, K = (<object> values).shape
 
@@ -5458,11 +5571,10 @@ def group_prod_float64(ndarray[float64_t, ndim=2] out,
             counts[lab] += 1
             for j in range(K):
                 val = values[i, j]
-
                 # not nan
                 if val == val:
                     nobs[lab, j] += 1
-                    prodx[lab, j] *= val
+                    sumx[lab, j] += val
     else:
         for i in range(N):
             lab = labels[i]
@@ -5471,37 +5583,34 @@ def group_prod_float64(ndarray[float64_t, ndim=2] out,
 
             counts[lab] += 1
             val = values[i, 0]
-
             # not nan
             if val == val:
                 nobs[lab, 0] += 1
-                prodx[lab, 0] *= val
+                sumx[lab, 0] += val
 
     for i in range(len(counts)):
         for j in range(K):
+            count = nobs[i, j]
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = prodx[i, j]
-@cython.boundscheck(False)
+                out[i, j] = sumx[i, j] / count
 @cython.wraparound(False)
-def group_prod_float32(ndarray[float32_t, ndim=2] out,
+@cython.boundscheck(False)
+def group_mean_float32(ndarray[float32_t, ndim=2] out,
                ndarray[int64_t] counts,
                ndarray[float32_t, ndim=2] values,
                ndarray[int64_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
     cdef:
         Py_ssize_t i, j, N, K, lab
         float32_t val, count
-        ndarray[float32_t, ndim=2] prodx, nobs
+        ndarray[float32_t, ndim=2] sumx, nobs
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
 
     nobs = np.zeros_like(out)
-    prodx = np.ones_like(out)
+    sumx = np.zeros_like(out)
 
     N, K = (<object> values).shape
 
@@ -5514,11 +5623,10 @@ def group_prod_float32(ndarray[float32_t, ndim=2] out,
             counts[lab] += 1
             for j in range(K):
                 val = values[i, j]
-
                 # not nan
                 if val == val:
                     nobs[lab, j] += 1
-                    prodx[lab, j] *= val
+                    sumx[lab, j] += val
     else:
         for i in range(N):
             lab = labels[i]
@@ -5527,41 +5635,37 @@ def group_prod_float32(ndarray[float32_t, ndim=2] out,
 
             counts[lab] += 1
             val = values[i, 0]
-
             # not nan
             if val == val:
                 nobs[lab, 0] += 1
-                prodx[lab, 0] *= val
+                sumx[lab, 0] += val
 
     for i in range(len(counts)):
         for j in range(K):
+            count = nobs[i, j]
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = prodx[i, j]
+                out[i, j] = sumx[i, j] / count
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_prod_bin_float64(ndarray[float64_t, ndim=2] out,
-                  ndarray[int64_t] counts,
-                  ndarray[float64_t, ndim=2] values,
-                  ndarray[int64_t] bins):
-    '''
-    Only aggregates on axis=0
-    '''
+
+def group_mean_bin_float64(ndarray[float64_t, ndim=2] out,
+                   ndarray[int64_t] counts,
+                   ndarray[float64_t, ndim=2] values,
+                   ndarray[int64_t] bins):
     cdef:
         Py_ssize_t i, j, N, K, ngroups, b
         float64_t val, count
-        ndarray[float64_t, ndim=2] prodx, nobs
+        ndarray[float64_t, ndim=2] sumx, nobs
 
     nobs = np.zeros_like(out)
-    prodx = np.ones_like(out)
+    sumx = np.zeros_like(out)
 
+    N, K = (<object> values).shape
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
     else:
         ngroups = len(bins) + 1
-    N, K = (<object> values).shape
 
     b = 0
     if K > 1:
@@ -5576,7 +5680,7 @@ def group_prod_bin_float64(ndarray[float64_t, ndim=2] out,
                 # not nan
                 if val == val:
                     nobs[b, j] += 1
-                    prodx[b, j] *= val
+                    sumx[b, j] += val
     else:
         for i in range(N):
             while b < ngroups - 1 and i >= bins[b]:
@@ -5588,36 +5692,33 @@ def group_prod_bin_float64(ndarray[float64_t, ndim=2] out,
             # not nan
             if val == val:
                 nobs[b, 0] += 1
-                prodx[b, 0] *= val
+                sumx[b, 0] += val
 
     for i in range(ngroups):
         for j in range(K):
-            if nobs[i, j] == 0:
+            count = nobs[i, j]
+            if count == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = prodx[i, j]
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_prod_bin_float32(ndarray[float32_t, ndim=2] out,
-                  ndarray[int64_t] counts,
-                  ndarray[float32_t, ndim=2] values,
-                  ndarray[int64_t] bins):
-    '''
-    Only aggregates on axis=0
-    '''
+                out[i, j] = sumx[i, j] / count
+
+def group_mean_bin_float32(ndarray[float32_t, ndim=2] out,
+                   ndarray[int64_t] counts,
+                   ndarray[float32_t, ndim=2] values,
+                   ndarray[int64_t] bins):
     cdef:
         Py_ssize_t i, j, N, K, ngroups, b
         float32_t val, count
-        ndarray[float32_t, ndim=2] prodx, nobs
+        ndarray[float32_t, ndim=2] sumx, nobs
 
     nobs = np.zeros_like(out)
-    prodx = np.ones_like(out)
+    sumx = np.zeros_like(out)
 
+    N, K = (<object> values).shape
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
     else:
         ngroups = len(bins) + 1
-    N, K = (<object> values).shape
 
     b = 0
     if K > 1:
@@ -5632,7 +5733,7 @@ def group_prod_bin_float32(ndarray[float32_t, ndim=2] out,
                 # not nan
                 if val == val:
                     nobs[b, j] += 1
-                    prodx[b, j] *= val
+                    sumx[b, j] += val
     else:
         for i in range(N):
             while b < ngroups - 1 and i >= bins[b]:
@@ -5644,153 +5745,103 @@ def group_prod_bin_float32(ndarray[float32_t, ndim=2] out,
             # not nan
             if val == val:
                 nobs[b, 0] += 1
-                prodx[b, 0] *= val
+                sumx[b, 0] += val
 
     for i in range(ngroups):
         for j in range(K):
-            if nobs[i, j] == 0:
+            count = nobs[i, j]
+            if count == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = prodx[i, j]
+                out[i, j] = sumx[i, j] / count
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_var_float64(ndarray[float64_t, ndim=2] out,
-              ndarray[int64_t] counts,
-              ndarray[float64_t, ndim=2] values,
-              ndarray[int64_t] labels):
+def group_ohlc_float64(ndarray[float64_t, ndim=2] out,
+                  ndarray[int64_t] counts,
+                  ndarray[float64_t, ndim=2] values,
+                  ndarray[int64_t] bins):
+    '''
+    Only aggregates on axis=0
+    '''
     cdef:
-        Py_ssize_t i, j, N, K, lab
-        float64_t val, ct
-        ndarray[float64_t, ndim=2] nobs, sumx, sumxx
-
-    if not len(values) == len(labels):
-       raise AssertionError("len(index) != len(labels)")
-
-    nobs = np.zeros_like(out)
-    sumx = np.zeros_like(out)
-    sumxx = np.zeros_like(out)
-
-    N, K = (<object> values).shape
-
-    if K > 1:
-        for i in range(N):
-
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-
-            for j in range(K):
-                val = values[i, j]
+        Py_ssize_t i, j, N, K, ngroups, b
+        float64_t val, count
+        float64_t vopen, vhigh, vlow, vclose, NA
+        bint got_first = 0
 
-                # not nan
-                if val == val:
-                    nobs[lab, j] += 1
-                    sumx[lab, j] += val
-                    sumxx[lab, j] += val * val
+    if bins[len(bins) - 1] == len(values):
+        ngroups = len(bins)
     else:
-        for i in range(N):
-
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            val = values[i, 0]
-            # not nan
-            if val == val:
-                nobs[lab, 0] += 1
-                sumx[lab, 0] += val
-                sumxx[lab, 0] += val * val
-
-
-    for i in range(len(counts)):
-        for j in range(K):
-            ct = nobs[i, j]
-            if ct < 2:
-                out[i, j] = nan
-            else:
-                out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) /
-                             (ct * ct - ct))
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def group_var_float32(ndarray[float32_t, ndim=2] out,
-              ndarray[int64_t] counts,
-              ndarray[float32_t, ndim=2] values,
-              ndarray[int64_t] labels):
-    cdef:
-        Py_ssize_t i, j, N, K, lab
-        float32_t val, ct
-        ndarray[float32_t, ndim=2] nobs, sumx, sumxx
-
-    if not len(values) == len(labels):
-       raise AssertionError("len(index) != len(labels)")
-
-    nobs = np.zeros_like(out)
-    sumx = np.zeros_like(out)
-    sumxx = np.zeros_like(out)
+        ngroups = len(bins) + 1
 
     N, K = (<object> values).shape
 
-    if K > 1:
-        for i in range(N):
-
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
+    if out.shape[1] != 4:
+        raise ValueError('Output array must have 4 columns')
 
-            for j in range(K):
-                val = values[i, j]
+    NA = np.nan
 
-                # not nan
-                if val == val:
-                    nobs[lab, j] += 1
-                    sumx[lab, j] += val
-                    sumxx[lab, j] += val * val
+    b = 0
+    if K > 1:
+        raise NotImplementedError
     else:
         for i in range(N):
+            while b < ngroups - 1 and i >= bins[b]:
+                if not got_first:
+                    out[b, 0] = NA
+                    out[b, 1] = NA
+                    out[b, 2] = NA
+                    out[b, 3] = NA
+                else:
+                    out[b, 0] = vopen
+                    out[b, 1] = vhigh
+                    out[b, 2] = vlow
+                    out[b, 3] = vclose
+                b += 1
+                got_first = 0
 
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
+            counts[b] += 1
             val = values[i, 0]
+
             # not nan
             if val == val:
-                nobs[lab, 0] += 1
-                sumx[lab, 0] += val
-                sumxx[lab, 0] += val * val
-
-
-    for i in range(len(counts)):
-        for j in range(K):
-            ct = nobs[i, j]
-            if ct < 2:
-                out[i, j] = nan
-            else:
-                out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) /
-                             (ct * ct - ct))
+                if not got_first:
+                    got_first = 1
+                    vopen = val
+                    vlow = val
+                    vhigh = val
+                else:
+                    if val < vlow:
+                        vlow = val
+                    if val > vhigh:
+                        vhigh = val
+                vclose = val
 
+        if not got_first:
+            out[b, 0] = NA
+            out[b, 1] = NA
+            out[b, 2] = NA
+            out[b, 3] = NA
+        else:
+            out[b, 0] = vopen
+            out[b, 1] = vhigh
+            out[b, 2] = vlow
+            out[b, 3] = vclose
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_var_bin_float64(ndarray[float64_t, ndim=2] out,
+def group_ohlc_float32(ndarray[float32_t, ndim=2] out,
                   ndarray[int64_t] counts,
-                  ndarray[float64_t, ndim=2] values,
+                  ndarray[float32_t, ndim=2] values,
                   ndarray[int64_t] bins):
-
+    '''
+    Only aggregates on axis=0
+    '''
     cdef:
         Py_ssize_t i, j, N, K, ngroups, b
-        float64_t val, ct
-        ndarray[float64_t, ndim=2] nobs, sumx, sumxx
-
-    nobs = np.zeros_like(out)
-    sumx = np.zeros_like(out)
-    sumxx = np.zeros_like(out)
+        float32_t val, count
+        float32_t vopen, vhigh, vlow, vclose, NA
+        bint got_first = 0
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
@@ -5799,59 +5850,515 @@ def group_var_bin_float64(ndarray[float64_t, ndim=2] out,
 
     N, K = (<object> values).shape
 
+    if out.shape[1] != 4:
+        raise ValueError('Output array must have 4 columns')
+
+    NA = np.nan
+
     b = 0
     if K > 1:
+        raise NotImplementedError
+    else:
         for i in range(N):
             while b < ngroups - 1 and i >= bins[b]:
-                b += 1
-
-            counts[b] += 1
-
-            for j in range(K):
-                val = values[i, j]
+                if not got_first:
+                    out[b, 0] = NA
+                    out[b, 1] = NA
+                    out[b, 2] = NA
+                    out[b, 3] = NA
+                else:
+                    out[b, 0] = vopen
+                    out[b, 1] = vhigh
+                    out[b, 2] = vlow
+                    out[b, 3] = vclose
+                b += 1
+                got_first = 0
+
+            counts[b] += 1
+            val = values[i, 0]
+
+            # not nan
+            if val == val:
+                if not got_first:
+                    got_first = 1
+                    vopen = val
+                    vlow = val
+                    vhigh = val
+                else:
+                    if val < vlow:
+                        vlow = val
+                    if val > vhigh:
+                        vhigh = val
+                vclose = val
+
+        if not got_first:
+            out[b, 0] = NA
+            out[b, 1] = NA
+            out[b, 2] = NA
+            out[b, 3] = NA
+        else:
+            out[b, 0] = vopen
+            out[b, 1] = vhigh
+            out[b, 2] = vlow
+            out[b, 3] = vclose
+
+@cython.wraparound(False)
+@cython.wraparound(False)
+def group_last_float64(ndarray[float64_t, ndim=2] out,
+               ndarray[int64_t] counts,
+               ndarray[float64_t, ndim=2] values,
+               ndarray[int64_t] labels):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, lab
+        float64_t val, count
+        ndarray[float64_t, ndim=2] resx
+        ndarray[int64_t, ndim=2] nobs
+
+    if not len(values) == len(labels):
+       raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros((<object> out).shape, dtype=np.int64)
+    resx = np.empty_like(out)
+
+    N, K = (<object> values).shape
+
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0:
+            continue
+
+        counts[lab] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[lab, j] += 1
+                resx[lab, j] = val
+
+    for i in range(len(counts)):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+@cython.wraparound(False)
+@cython.wraparound(False)
+def group_last_float32(ndarray[float32_t, ndim=2] out,
+               ndarray[int64_t] counts,
+               ndarray[float32_t, ndim=2] values,
+               ndarray[int64_t] labels):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, lab
+        float32_t val, count
+        ndarray[float32_t, ndim=2] resx
+        ndarray[int64_t, ndim=2] nobs
+
+    if not len(values) == len(labels):
+       raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros((<object> out).shape, dtype=np.int64)
+    resx = np.empty_like(out)
+
+    N, K = (<object> values).shape
+
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0:
+            continue
+
+        counts[lab] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[lab, j] += 1
+                resx[lab, j] = val
+
+    for i in range(len(counts)):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+@cython.wraparound(False)
+@cython.wraparound(False)
+def group_last_int64(ndarray[int64_t, ndim=2] out,
+               ndarray[int64_t] counts,
+               ndarray[int64_t, ndim=2] values,
+               ndarray[int64_t] labels):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, lab
+        int64_t val, count
+        ndarray[int64_t, ndim=2] resx
+        ndarray[int64_t, ndim=2] nobs
+
+    if not len(values) == len(labels):
+       raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros((<object> out).shape, dtype=np.int64)
+    resx = np.empty_like(out)
+
+    N, K = (<object> values).shape
+
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0:
+            continue
+
+        counts[lab] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[lab, j] += 1
+                resx[lab, j] = val
+
+    for i in range(len(counts)):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = iNaT
+            else:
+                out[i, j] = resx[i, j]
+
+@cython.wraparound(False)
+@cython.wraparound(False)
+def group_last_bin_float64(ndarray[float64_t, ndim=2] out,
+                   ndarray[int64_t] counts,
+                   ndarray[float64_t, ndim=2] values,
+                   ndarray[int64_t] bins):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, ngroups, b
+        float64_t val, count
+        ndarray[float64_t, ndim=2] resx, nobs
+
+    nobs = np.zeros_like(out)
+    resx = np.empty_like(out)
+
+    if bins[len(bins) - 1] == len(values):
+        ngroups = len(bins)
+    else:
+        ngroups = len(bins) + 1
+
+    N, K = (<object> values).shape
+
+    b = 0
+    for i in range(N):
+        while b < ngroups - 1 and i >= bins[b]:
+            b += 1
+
+        counts[b] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[b, j] += 1
+                resx[b, j] = val
+
+    for i in range(ngroups):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+@cython.wraparound(False)
+@cython.wraparound(False)
+def group_last_bin_float32(ndarray[float32_t, ndim=2] out,
+                   ndarray[int64_t] counts,
+                   ndarray[float32_t, ndim=2] values,
+                   ndarray[int64_t] bins):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, ngroups, b
+        float32_t val, count
+        ndarray[float32_t, ndim=2] resx, nobs
+
+    nobs = np.zeros_like(out)
+    resx = np.empty_like(out)
+
+    if bins[len(bins) - 1] == len(values):
+        ngroups = len(bins)
+    else:
+        ngroups = len(bins) + 1
+
+    N, K = (<object> values).shape
+
+    b = 0
+    for i in range(N):
+        while b < ngroups - 1 and i >= bins[b]:
+            b += 1
+
+        counts[b] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[b, j] += 1
+                resx[b, j] = val
+
+    for i in range(ngroups):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+@cython.wraparound(False)
+@cython.wraparound(False)
+def group_last_bin_int64(ndarray[int64_t, ndim=2] out,
+                   ndarray[int64_t] counts,
+                   ndarray[int64_t, ndim=2] values,
+                   ndarray[int64_t] bins):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, ngroups, b
+        int64_t val, count
+        ndarray[int64_t, ndim=2] resx, nobs
+
+    nobs = np.zeros_like(out)
+    resx = np.empty_like(out)
+
+    if bins[len(bins) - 1] == len(values):
+        ngroups = len(bins)
+    else:
+        ngroups = len(bins) + 1
+
+    N, K = (<object> values).shape
+
+    b = 0
+    for i in range(N):
+        while b < ngroups - 1 and i >= bins[b]:
+            b += 1
+
+        counts[b] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[b, j] += 1
+                resx[b, j] = val
 
-                # not nan
-                if val == val:
-                    nobs[b, j] += 1
-                    sumx[b, j] += val
-                    sumxx[b, j] += val * val
+    for i in range(ngroups):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = iNaT
+            else:
+                out[i, j] = resx[i, j]
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_nth_float64(ndarray[float64_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[float64_t, ndim=2] values,
+              ndarray[int64_t] labels, int64_t rank):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, lab
+        float64_t val, count
+        ndarray[float64_t, ndim=2] resx
+        ndarray[int64_t, ndim=2] nobs
+
+    if not len(values) == len(labels):
+       raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros((<object> out).shape, dtype=np.int64)
+    resx = np.empty_like(out)
+
+    N, K = (<object> values).shape
+
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0:
+            continue
+
+        counts[lab] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[lab, j] += 1
+                if nobs[lab, j] == rank:
+                    resx[lab, j] = val
+
+    for i in range(len(counts)):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_nth_float32(ndarray[float32_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[float32_t, ndim=2] values,
+              ndarray[int64_t] labels, int64_t rank):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, lab
+        float32_t val, count
+        ndarray[float32_t, ndim=2] resx
+        ndarray[int64_t, ndim=2] nobs
+
+    if not len(values) == len(labels):
+       raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros((<object> out).shape, dtype=np.int64)
+    resx = np.empty_like(out)
+
+    N, K = (<object> values).shape
+
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0:
+            continue
+
+        counts[lab] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[lab, j] += 1
+                if nobs[lab, j] == rank:
+                    resx[lab, j] = val
+
+    for i in range(len(counts)):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_nth_int64(ndarray[int64_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[int64_t, ndim=2] values,
+              ndarray[int64_t] labels, int64_t rank):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, lab
+        int64_t val, count
+        ndarray[int64_t, ndim=2] resx
+        ndarray[int64_t, ndim=2] nobs
+
+    if not len(values) == len(labels):
+       raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros((<object> out).shape, dtype=np.int64)
+    resx = np.empty_like(out)
+
+    N, K = (<object> values).shape
+
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0:
+            continue
+
+        counts[lab] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[lab, j] += 1
+                if nobs[lab, j] == rank:
+                    resx[lab, j] = val
+
+    for i in range(len(counts)):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = iNaT
+            else:
+                out[i, j] = resx[i, j]
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_nth_bin_float64(ndarray[float64_t, ndim=2] out,
+                  ndarray[int64_t] counts,
+                  ndarray[float64_t, ndim=2] values,
+                  ndarray[int64_t] bins, int64_t rank):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, ngroups, b
+        float64_t val, count
+        ndarray[float64_t, ndim=2] resx, nobs
+
+    nobs = np.zeros_like(out)
+    resx = np.empty_like(out)
+
+    if bins[len(bins) - 1] == len(values):
+        ngroups = len(bins)
     else:
-        for i in range(N):
-            while b < ngroups - 1 and i >= bins[b]:
-                b += 1
+        ngroups = len(bins) + 1
 
-            counts[b] += 1
-            val = values[i, 0]
+    N, K = (<object> values).shape
+
+    b = 0
+    for i in range(N):
+        while b < ngroups - 1 and i >= bins[b]:
+            b += 1
+
+        counts[b] += 1
+        for j in range(K):
+            val = values[i, j]
 
             # not nan
             if val == val:
-                nobs[b, 0] += 1
-                sumx[b, 0] += val
-                sumxx[b, 0] += val * val
+                nobs[b, j] += 1
+                if nobs[b, j] == rank:
+                    resx[b, j] = val
 
     for i in range(ngroups):
         for j in range(K):
-            ct = nobs[i, j]
-            if ct < 2:
+            if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) /
-                             (ct * ct - ct))
-@cython.wraparound(False)
+                out[i, j] = resx[i, j]
 @cython.boundscheck(False)
-def group_var_bin_float32(ndarray[float32_t, ndim=2] out,
+@cython.wraparound(False)
+def group_nth_bin_float32(ndarray[float32_t, ndim=2] out,
                   ndarray[int64_t] counts,
                   ndarray[float32_t, ndim=2] values,
-                  ndarray[int64_t] bins):
-
+                  ndarray[int64_t] bins, int64_t rank):
+    '''
+    Only aggregates on axis=0
+    '''
     cdef:
         Py_ssize_t i, j, N, K, ngroups, b
-        float32_t val, ct
-        ndarray[float32_t, ndim=2] nobs, sumx, sumxx
+        float32_t val, count
+        ndarray[float32_t, ndim=2] resx, nobs
 
     nobs = np.zeros_like(out)
-    sumx = np.zeros_like(out)
-    sumxx = np.zeros_like(out)
+    resx = np.empty_like(out)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
@@ -5861,60 +6368,93 @@ def group_var_bin_float32(ndarray[float32_t, ndim=2] out,
     N, K = (<object> values).shape
 
     b = 0
-    if K > 1:
-        for i in range(N):
-            while b < ngroups - 1 and i >= bins[b]:
-                b += 1
+    for i in range(N):
+        while b < ngroups - 1 and i >= bins[b]:
+            b += 1
 
-            counts[b] += 1
+        counts[b] += 1
+        for j in range(K):
+            val = values[i, j]
 
-            for j in range(K):
-                val = values[i, j]
+            # not nan
+            if val == val:
+                nobs[b, j] += 1
+                if nobs[b, j] == rank:
+                    resx[b, j] = val
 
-                # not nan
-                if val == val:
-                    nobs[b, j] += 1
-                    sumx[b, j] += val
-                    sumxx[b, j] += val * val
+    for i in range(ngroups):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_nth_bin_int64(ndarray[int64_t, ndim=2] out,
+                  ndarray[int64_t] counts,
+                  ndarray[int64_t, ndim=2] values,
+                  ndarray[int64_t] bins, int64_t rank):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, ngroups, b
+        int64_t val, count
+        ndarray[int64_t, ndim=2] resx, nobs
+
+    nobs = np.zeros_like(out)
+    resx = np.empty_like(out)
+
+    if bins[len(bins) - 1] == len(values):
+        ngroups = len(bins)
     else:
-        for i in range(N):
-            while b < ngroups - 1 and i >= bins[b]:
-                b += 1
+        ngroups = len(bins) + 1
+
+    N, K = (<object> values).shape
 
-            counts[b] += 1
-            val = values[i, 0]
+    b = 0
+    for i in range(N):
+        while b < ngroups - 1 and i >= bins[b]:
+            b += 1
+
+        counts[b] += 1
+        for j in range(K):
+            val = values[i, j]
 
             # not nan
             if val == val:
-                nobs[b, 0] += 1
-                sumx[b, 0] += val
-                sumxx[b, 0] += val * val
+                nobs[b, j] += 1
+                if nobs[b, j] == rank:
+                    resx[b, j] = val
 
     for i in range(ngroups):
         for j in range(K):
-            ct = nobs[i, j]
-            if ct < 2:
-                out[i, j] = nan
+            if nobs[i, j] == 0:
+                out[i, j] = iNaT
             else:
-                out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) /
-                             (ct * ct - ct))
+                out[i, j] = resx[i, j]
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_mean_float64(ndarray[float64_t, ndim=2] out,
-               ndarray[int64_t] counts,
-               ndarray[float64_t, ndim=2] values,
-               ndarray[int64_t] labels):
+def group_min_float64(ndarray[float64_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[float64_t, ndim=2] values,
+              ndarray[int64_t] labels):
+    '''
+    Only aggregates on axis=0
+    '''
     cdef:
         Py_ssize_t i, j, N, K, lab
         float64_t val, count
-        ndarray[float64_t, ndim=2] sumx, nobs
+        ndarray[float64_t, ndim=2] minx, nobs
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
 
     nobs = np.zeros_like(out)
-    sumx = np.zeros_like(out)
+
+    minx = np.empty_like(out)
+    minx.fill(np.inf)
 
     N, K = (<object> values).shape
 
@@ -5927,10 +6467,12 @@ def group_mean_float64(ndarray[float64_t, ndim=2] out,
             counts[lab] += 1
             for j in range(K):
                 val = values[i, j]
+
                 # not nan
                 if val == val:
                     nobs[lab, j] += 1
-                    sumx[lab, j] += val
+                    if val < minx[lab, j]:
+                        minx[lab, j] = val
     else:
         for i in range(N):
             lab = labels[i]
@@ -5939,34 +6481,40 @@ def group_mean_float64(ndarray[float64_t, ndim=2] out,
 
             counts[lab] += 1
             val = values[i, 0]
+
             # not nan
             if val == val:
                 nobs[lab, 0] += 1
-                sumx[lab, 0] += val
+                if val < minx[lab, 0]:
+                    minx[lab, 0] = val
 
     for i in range(len(counts)):
         for j in range(K):
-            count = nobs[i, j]
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = sumx[i, j] / count
+                out[i, j] = minx[i, j]
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_mean_float32(ndarray[float32_t, ndim=2] out,
-               ndarray[int64_t] counts,
-               ndarray[float32_t, ndim=2] values,
-               ndarray[int64_t] labels):
+def group_min_float32(ndarray[float32_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[float32_t, ndim=2] values,
+              ndarray[int64_t] labels):
+    '''
+    Only aggregates on axis=0
+    '''
     cdef:
         Py_ssize_t i, j, N, K, lab
         float32_t val, count
-        ndarray[float32_t, ndim=2] sumx, nobs
+        ndarray[float32_t, ndim=2] minx, nobs
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
 
     nobs = np.zeros_like(out)
-    sumx = np.zeros_like(out)
+
+    minx = np.empty_like(out)
+    minx.fill(np.inf)
 
     N, K = (<object> values).shape
 
@@ -5979,10 +6527,12 @@ def group_mean_float32(ndarray[float32_t, ndim=2] out,
             counts[lab] += 1
             for j in range(K):
                 val = values[i, j]
+
                 # not nan
                 if val == val:
                     nobs[lab, j] += 1
-                    sumx[lab, j] += val
+                    if val < minx[lab, j]:
+                        minx[lab, j] = val
     else:
         for i in range(N):
             lab = labels[i]
@@ -5991,139 +6541,32 @@ def group_mean_float32(ndarray[float32_t, ndim=2] out,
 
             counts[lab] += 1
             val = values[i, 0]
+
             # not nan
             if val == val:
                 nobs[lab, 0] += 1
-                sumx[lab, 0] += val
+                if val < minx[lab, 0]:
+                    minx[lab, 0] = val
 
     for i in range(len(counts)):
         for j in range(K):
-            count = nobs[i, j]
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
-                out[i, j] = sumx[i, j] / count
-
-
-def group_mean_bin_float64(ndarray[float64_t, ndim=2] out,
-                   ndarray[int64_t] counts,
-                   ndarray[float64_t, ndim=2] values,
-                   ndarray[int64_t] bins):
-    cdef:
-        Py_ssize_t i, j, N, K, ngroups, b
-        float64_t val, count
-        ndarray[float64_t, ndim=2] sumx, nobs
-
-    nobs = np.zeros_like(out)
-    sumx = np.zeros_like(out)
-
-    N, K = (<object> values).shape
-    if bins[len(bins) - 1] == len(values):
-        ngroups = len(bins)
-    else:
-        ngroups = len(bins) + 1
-
-    b = 0
-    if K > 1:
-        for i in range(N):
-            while b < ngroups - 1 and i >= bins[b]:
-                b += 1
-
-            counts[b] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                if val == val:
-                    nobs[b, j] += 1
-                    sumx[b, j] += val
-    else:
-        for i in range(N):
-            while b < ngroups - 1 and i >= bins[b]:
-                b += 1
-
-            counts[b] += 1
-            val = values[i, 0]
-
-            # not nan
-            if val == val:
-                nobs[b, 0] += 1
-                sumx[b, 0] += val
-
-    for i in range(ngroups):
-        for j in range(K):
-            count = nobs[i, j]
-            if count == 0:
-                out[i, j] = nan
-            else:
-                out[i, j] = sumx[i, j] / count
-
-def group_mean_bin_float32(ndarray[float32_t, ndim=2] out,
-                   ndarray[int64_t] counts,
-                   ndarray[float32_t, ndim=2] values,
-                   ndarray[int64_t] bins):
-    cdef:
-        Py_ssize_t i, j, N, K, ngroups, b
-        float32_t val, count
-        ndarray[float32_t, ndim=2] sumx, nobs
-
-    nobs = np.zeros_like(out)
-    sumx = np.zeros_like(out)
-
-    N, K = (<object> values).shape
-    if bins[len(bins) - 1] == len(values):
-        ngroups = len(bins)
-    else:
-        ngroups = len(bins) + 1
-
-    b = 0
-    if K > 1:
-        for i in range(N):
-            while b < ngroups - 1 and i >= bins[b]:
-                b += 1
-
-            counts[b] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                if val == val:
-                    nobs[b, j] += 1
-                    sumx[b, j] += val
-    else:
-        for i in range(N):
-            while b < ngroups - 1 and i >= bins[b]:
-                b += 1
-
-            counts[b] += 1
-            val = values[i, 0]
-
-            # not nan
-            if val == val:
-                nobs[b, 0] += 1
-                sumx[b, 0] += val
-
-    for i in range(ngroups):
-        for j in range(K):
-            count = nobs[i, j]
-            if count == 0:
-                out[i, j] = nan
-            else:
-                out[i, j] = sumx[i, j] / count
-
+                out[i, j] = minx[i, j]
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_min_float64(ndarray[float64_t, ndim=2] out,
+def group_min_int64(ndarray[int64_t, ndim=2] out,
               ndarray[int64_t] counts,
-              ndarray[float64_t, ndim=2] values,
+              ndarray[int64_t, ndim=2] values,
               ndarray[int64_t] labels):
     '''
     Only aggregates on axis=0
     '''
     cdef:
         Py_ssize_t i, j, N, K, lab
-        float64_t val, count
-        ndarray[float64_t, ndim=2] minx, nobs
+        int64_t val, count
+        ndarray[int64_t, ndim=2] minx, nobs
 
     if not len(values) == len(labels):
        raise AssertionError("len(index) != len(labels)")
@@ -6131,7 +6574,7 @@ def group_min_float64(ndarray[float64_t, ndim=2] out,
     nobs = np.zeros_like(out)
 
     minx = np.empty_like(out)
-    minx.fill(np.inf)
+    minx.fill(9223372036854775807)
 
     N, K = (<object> values).shape
 
@@ -6168,83 +6611,84 @@ def group_min_float64(ndarray[float64_t, ndim=2] out,
     for i in range(len(counts)):
         for j in range(K):
             if nobs[i, j] == 0:
-                out[i, j] = nan
+                out[i, j] = iNaT
             else:
                 out[i, j] = minx[i, j]
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_min_float32(ndarray[float32_t, ndim=2] out,
-              ndarray[int64_t] counts,
-              ndarray[float32_t, ndim=2] values,
-              ndarray[int64_t] labels):
+def group_min_bin_float64(ndarray[float64_t, ndim=2] out,
+                   ndarray[int64_t] counts,
+                   ndarray[float64_t, ndim=2] values,
+                   ndarray[int64_t] bins):
     '''
     Only aggregates on axis=0
     '''
     cdef:
-        Py_ssize_t i, j, N, K, lab
-        float32_t val, count
-        ndarray[float32_t, ndim=2] minx, nobs
-
-    if not len(values) == len(labels):
-       raise AssertionError("len(index) != len(labels)")
+        Py_ssize_t i, j, N, K, ngroups, b
+        float64_t val, count
+        ndarray[float64_t, ndim=2] minx, nobs
 
     nobs = np.zeros_like(out)
 
     minx = np.empty_like(out)
     minx.fill(np.inf)
 
+    if bins[len(bins) - 1] == len(values):
+        ngroups = len(bins)
+    else:
+        ngroups = len(bins) + 1
+
     N, K = (<object> values).shape
 
+    b = 0
     if K > 1:
         for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
+            while b < ngroups - 1 and i >= bins[b]:
+                b += 1
 
-            counts[lab] += 1
+            counts[b] += 1
             for j in range(K):
                 val = values[i, j]
 
                 # not nan
                 if val == val:
-                    nobs[lab, j] += 1
-                    if val < minx[lab, j]:
-                        minx[lab, j] = val
+                    nobs[b, j] += 1
+                    if val < minx[b, j]:
+                        minx[b, j] = val
     else:
         for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
+            while b < ngroups - 1 and i >= bins[b]:
+                b += 1
 
-            counts[lab] += 1
+            counts[b] += 1
             val = values[i, 0]
 
             # not nan
             if val == val:
-                nobs[lab, 0] += 1
-                if val < minx[lab, 0]:
-                    minx[lab, 0] = val
+                nobs[b, 0] += 1
+                if val < minx[b, 0]:
+                    minx[b, 0] = val
 
-    for i in range(len(counts)):
+    for i in range(ngroups):
         for j in range(K):
             if nobs[i, j] == 0:
                 out[i, j] = nan
             else:
                 out[i, j] = minx[i, j]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_min_bin_float64(ndarray[float64_t, ndim=2] out,
+def group_min_bin_float32(ndarray[float32_t, ndim=2] out,
                    ndarray[int64_t] counts,
-                   ndarray[float64_t, ndim=2] values,
+                   ndarray[float32_t, ndim=2] values,
                    ndarray[int64_t] bins):
     '''
     Only aggregates on axis=0
     '''
     cdef:
         Py_ssize_t i, j, N, K, ngroups, b
-        float64_t val, count
-        ndarray[float64_t, ndim=2] minx, nobs
+        float32_t val, count
+        ndarray[float32_t, ndim=2] minx, nobs
 
     nobs = np.zeros_like(out)
 
@@ -6295,22 +6739,22 @@ def group_min_bin_float64(ndarray[float64_t, ndim=2] out,
                 out[i, j] = minx[i, j]
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_min_bin_float32(ndarray[float32_t, ndim=2] out,
+def group_min_bin_int64(ndarray[int64_t, ndim=2] out,
                    ndarray[int64_t] counts,
-                   ndarray[float32_t, ndim=2] values,
+                   ndarray[int64_t, ndim=2] values,
                    ndarray[int64_t] bins):
     '''
     Only aggregates on axis=0
     '''
     cdef:
         Py_ssize_t i, j, N, K, ngroups, b
-        float32_t val, count
-        ndarray[float32_t, ndim=2] minx, nobs
+        int64_t val, count
+        ndarray[int64_t, ndim=2] minx, nobs
 
     nobs = np.zeros_like(out)
 
     minx = np.empty_like(out)
-    minx.fill(np.inf)
+    minx.fill(9223372036854775807)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
@@ -6351,7 +6795,7 @@ def group_min_bin_float32(ndarray[float32_t, ndim=2] out,
     for i in range(ngroups):
         for j in range(K):
             if nobs[i, j] == 0:
-                out[i, j] = nan
+                out[i, j] = iNaT
             else:
                 out[i, j] = minx[i, j]
 
@@ -6475,6 +6919,66 @@ def group_max_float32(ndarray[float32_t, ndim=2] out,
                 out[i, j] = nan
             else:
                 out[i, j] = maxx[i, j]
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def group_max_int64(ndarray[int64_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[int64_t, ndim=2] values,
+              ndarray[int64_t] labels):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, lab
+        int64_t val, count
+        ndarray[int64_t, ndim=2] maxx, nobs
+
+    if not len(values) == len(labels):
+       raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros_like(out)
+
+    maxx = np.empty_like(out)
+    maxx.fill(-9223372036854775807)
+
+    N, K = (<object> values).shape
+
+    if K > 1:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                # not nan
+                if val == val:
+                    nobs[lab, j] += 1
+                    if val > maxx[lab, j]:
+                        maxx[lab, j] = val
+    else:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+            val = values[i, 0]
+
+            # not nan
+            if val == val:
+                nobs[lab, 0] += 1
+                if val > maxx[lab, 0]:
+                    maxx[lab, 0] = val
+
+    for i in range(len(counts)):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = iNaT
+            else:
+                out[i, j] = maxx[i, j]
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
@@ -6596,21 +7100,23 @@ def group_max_bin_float32(ndarray[float32_t, ndim=2] out,
                 out[i, j] = nan
             else:
                 out[i, j] = maxx[i, j]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_ohlc_float64(ndarray[float64_t, ndim=2] out,
+def group_max_bin_int64(ndarray[int64_t, ndim=2] out,
                   ndarray[int64_t] counts,
-                  ndarray[float64_t, ndim=2] values,
+                  ndarray[int64_t, ndim=2] values,
                   ndarray[int64_t] bins):
     '''
     Only aggregates on axis=0
     '''
     cdef:
         Py_ssize_t i, j, N, K, ngroups, b
-        float64_t val, count
-        float64_t vopen, vhigh, vlow, vclose, NA
-        bint got_first = 0
+        int64_t val, count
+        ndarray[int64_t, ndim=2] maxx, nobs
+
+    nobs = np.zeros_like(out)
+    maxx = np.empty_like(out)
+    maxx.fill(-9223372036854775807)
 
     if bins[len(bins) - 1] == len(values):
         ngroups = len(bins)
@@ -6619,130 +7125,41 @@ def group_ohlc_float64(ndarray[float64_t, ndim=2] out,
 
     N, K = (<object> values).shape
 
-    if out.shape[1] != 4:
-        raise ValueError('Output array must have 4 columns')
-
-    NA = np.nan
-
     b = 0
     if K > 1:
-        raise NotImplementedError
-    else:
         for i in range(N):
             while b < ngroups - 1 and i >= bins[b]:
-                if not got_first:
-                    out[b, 0] = NA
-                    out[b, 1] = NA
-                    out[b, 2] = NA
-                    out[b, 3] = NA
-                else:
-                    out[b, 0] = vopen
-                    out[b, 1] = vhigh
-                    out[b, 2] = vlow
-                    out[b, 3] = vclose
                 b += 1
-                got_first = 0
 
             counts[b] += 1
-            val = values[i, 0]
-
-            # not nan
-            if val == val:
-                if not got_first:
-                    got_first = 1
-                    vopen = val
-                    vlow = val
-                    vhigh = val
-                else:
-                    if val < vlow:
-                        vlow = val
-                    if val > vhigh:
-                        vhigh = val
-                vclose = val
-
-        if not got_first:
-            out[b, 0] = NA
-            out[b, 1] = NA
-            out[b, 2] = NA
-            out[b, 3] = NA
-        else:
-            out[b, 0] = vopen
-            out[b, 1] = vhigh
-            out[b, 2] = vlow
-            out[b, 3] = vclose
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def group_ohlc_float32(ndarray[float32_t, ndim=2] out,
-                  ndarray[int64_t] counts,
-                  ndarray[float32_t, ndim=2] values,
-                  ndarray[int64_t] bins):
-    '''
-    Only aggregates on axis=0
-    '''
-    cdef:
-        Py_ssize_t i, j, N, K, ngroups, b
-        float32_t val, count
-        float32_t vopen, vhigh, vlow, vclose, NA
-        bint got_first = 0
-
-    if bins[len(bins) - 1] == len(values):
-        ngroups = len(bins)
-    else:
-        ngroups = len(bins) + 1
-
-    N, K = (<object> values).shape
-
-    if out.shape[1] != 4:
-        raise ValueError('Output array must have 4 columns')
-
-    NA = np.nan
+            for j in range(K):
+                val = values[i, j]
 
-    b = 0
-    if K > 1:
-        raise NotImplementedError
+                # not nan
+                if val == val:
+                    nobs[b, j] += 1
+                    if val > maxx[b, j]:
+                        maxx[b, j] = val
     else:
         for i in range(N):
             while b < ngroups - 1 and i >= bins[b]:
-                if not got_first:
-                    out[b, 0] = NA
-                    out[b, 1] = NA
-                    out[b, 2] = NA
-                    out[b, 3] = NA
-                else:
-                    out[b, 0] = vopen
-                    out[b, 1] = vhigh
-                    out[b, 2] = vlow
-                    out[b, 3] = vclose
                 b += 1
-                got_first = 0
 
             counts[b] += 1
             val = values[i, 0]
 
             # not nan
             if val == val:
-                if not got_first:
-                    got_first = 1
-                    vopen = val
-                    vlow = val
-                    vhigh = val
-                else:
-                    if val < vlow:
-                        vlow = val
-                    if val > vhigh:
-                        vhigh = val
-                vclose = val
+                nobs[b, 0] += 1
+                if val > maxx[b, 0]:
+                    maxx[b, 0] = val
 
-        if not got_first:
-            out[b, 0] = NA
-            out[b, 1] = NA
-            out[b, 2] = NA
-            out[b, 3] = NA
-        else:
-            out[b, 0] = vopen
-            out[b, 1] = vhigh
-            out[b, 2] = vlow
-            out[b, 3] = vclose
+    for i in range(ngroups):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = iNaT
+            else:
+                out[i, j] = maxx[i, j]
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
@@ -6816,6 +7233,42 @@ def group_count_float32(ndarray[float32_t, ndim=2] out,
             out[i, j] = nobs[i, j]
 
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_count_int64(ndarray[int64_t, ndim=2] out,
+                         ndarray[int64_t] counts,
+                         ndarray[int64_t, ndim=2] values,
+                         ndarray[int64_t] labels):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, lab
+        Py_ssize_t N = values.shape[0], K = values.shape[1]
+        int64_t val
+        ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
+                                                 dtype=np.int64)
+
+    if len(values) != len(labels):
+       raise AssertionError("len(index) != len(labels)")
+
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0:
+            continue
+
+        counts[lab] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            nobs[lab, j] += val == val and val != iNaT
+
+    for i in range(len(counts)):
+        for j in range(K):
+            out[i, j] = nobs[i, j]
+
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_count_object(ndarray[object, ndim=2] out,
@@ -6854,7 +7307,7 @@ def group_count_object(ndarray[object, ndim=2] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_count_int64(ndarray[float64_t, ndim=2] out,
+def group_count_int64(ndarray[int64_t, ndim=2] out,
                          ndarray[int64_t] counts,
                          ndarray[int64_t, ndim=2] values,
                          ndarray[int64_t] labels):
@@ -6957,6 +7410,40 @@ def group_count_bin_float32(ndarray[float32_t, ndim=2] out,
             out[i, j] = nobs[i, j]
 
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_count_bin_int64(ndarray[int64_t, ndim=2] out,
+                             ndarray[int64_t] counts,
+                             ndarray[int64_t, ndim=2] values,
+                             ndarray[int64_t] bins):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, ngroups
+        Py_ssize_t N = values.shape[0], K = values.shape[1], b = 0
+        int64_t val
+        ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
+                                                 dtype=np.int64)
+
+    ngroups = len(bins) + (bins[len(bins) - 1] != N)
+
+    for i in range(N):
+        while b < ngroups - 1 and i >= bins[b]:
+            b += 1
+
+        counts[b] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            nobs[b, j] += val == val and val != iNaT
+
+    for i in range(ngroups):
+        for j in range(K):
+            out[i, j] = nobs[i, j]
+
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_count_bin_object(ndarray[object, ndim=2] out,
@@ -6993,7 +7480,7 @@ def group_count_bin_object(ndarray[object, ndim=2] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_count_bin_int64(ndarray[float64_t, ndim=2] out,
+def group_count_bin_int64(ndarray[int64_t, ndim=2] out,
                              ndarray[int64_t] counts,
                              ndarray[int64_t, ndim=2] values,
                              ndarray[int64_t] bins):
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index d4fcaaec9eb6e..f2ea17db44211 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -3483,6 +3483,77 @@ def test_groupby_categorical_no_compress(self):
         exp = np.array([1,2,4,np.nan])
         self.assert_numpy_array_equivalent(result, exp)
 
+    def test_groupby_non_arithmetic_agg_types(self):
+        # GH9311, GH6620
+        df = pd.DataFrame([{'a': 1, 'b': 1},
+                           {'a': 1, 'b': 2},
+                           {'a': 2, 'b': 3},
+                           {'a': 2, 'b': 4}])
+
+        dtypes = ['int8', 'int16', 'int32', 'int64',
+                  'float32', 'float64']
+
+        grp_exp = {'first': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]},
+                   'last': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]},
+                   'min': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]},
+                   'max': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]},
+                   'nth': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}],
+                           'args': [1]},
+                   'count': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}],
+                             'out_type': 'int64'}}
+
+        for dtype in dtypes:
+            df_in = df.copy()
+            df_in['b'] = df_in.b.astype(dtype)
+
+            for method, data in compat.iteritems(grp_exp):
+                if 'args' not in data:
+                    data['args'] = []
+
+                if 'out_type' in data:
+                    out_type = data['out_type']
+                else:
+                    out_type = dtype
+
+                exp = data['df']
+                df_out = pd.DataFrame(exp)
+
+                df_out['b'] = df_out.b.astype(out_type)
+                df_out.set_index('a', inplace=True)
+
+                grpd = df_in.groupby('a')
+                t = getattr(grpd, method)(*data['args'])
+                assert_frame_equal(t, df_out)
+
+    def test_groupby_non_arithmetic_agg_intlike_precision(self):
+        # GH9311, GH6620
+        c = 24650000000000000
+
+        inputs = ((Timestamp('2011-01-15 12:50:28.502376'),
+                   Timestamp('2011-01-20 12:50:28.593448')),
+                  (1 + c, 2 + c))
+
+        for i in inputs:
+            df = pd.DataFrame([{'a': 1,
+                                'b': i[0]},
+                               {'a': 1,
+                                'b': i[1]}])
+
+            grp_exp = {'first': {'expected': i[0]},
+                       'last': {'expected': i[1]},
+                       'min': {'expected': i[0]},
+                       'max': {'expected': i[1]},
+                       'nth': {'expected': i[1], 'args': [1]},
+                       'count': {'expected': 2}}
+
+            for method, data in compat.iteritems(grp_exp):
+                if 'args' not in data:
+                    data['args'] = []
+
+                grpd = df.groupby('a')
+                res = getattr(grpd, method)(*data['args'])
+                self.assertEqual(res.iloc[0].b, data['expected'])
+
     def test_groupby_first_datetime64(self):
         df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)])
         df[1] = df[1].view('M8[ns]')