From cf56ff1229827bc7c7a70472b1bf1dde60a8efb4 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 2 Nov 2014 13:01:37 -0500
Subject: [PATCH] BUG: concat of series of dtype category converting to object
 dtype (GH8641)

---
 doc/source/whatsnew/v0.15.2.txt    |   1 +
 pandas/core/categorical.py         | 118 +++++++++++++++++++++--------
 pandas/core/common.py              | 118 +++++++++++++++++++++--------
 pandas/core/generic.py             |   5 +-
 pandas/core/internals.py           |  79 +++++--------------
 pandas/sparse/array.py             |  43 +++++++++++
 pandas/sparse/tests/test_sparse.py |   5 +-
 pandas/tests/test_categorical.py   |  17 +++++
 pandas/tests/test_series.py        |  86 ++++++++++++++++++++-
 pandas/tools/merge.py              |  10 ++-
 pandas/tools/tests/test_merge.py   |   1 +
 pandas/tseries/common.py           |  58 +++++++++++++-
 12 files changed, 407 insertions(+), 134 deletions(-)

diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt
index 66b839ed01a29..8ea79089f95e3 100644
--- a/doc/source/whatsnew/v0.15.2.txt
+++ b/doc/source/whatsnew/v0.15.2.txt
@@ -20,6 +20,7 @@ users upgrade to this version.
 API changes
 ~~~~~~~~~~~
 
+- Bug in concat of Series with ``category`` dtype which were coercing to ``object``. (:issue:`8641`)
 
 .. _whatsnew_0152.enhancements:
 
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index dd23897a3f7e9..414c4a8315e6d 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -15,7 +15,12 @@
 import pandas.core.common as com
 from pandas.util.decorators import cache_readonly
 
-from pandas.core.common import isnull
+from pandas.core.common import (CategoricalDtype, ABCSeries, isnull, notnull,
+                                is_categorical_dtype, is_integer_dtype, is_object_dtype,
+                                _possibly_infer_to_datetimelike, get_dtype_kinds,
+                                is_list_like, _is_sequence,
+                                _ensure_platform_int, _ensure_object, _ensure_int64,
+                                _coerce_indexer_dtype, _values_from_object, take_1d)
 from pandas.util.terminal import get_terminal_size
 from pandas.core.config import get_option
 from pandas.core import format as fmt
@@ -69,11 +74,11 @@ def f(self, other):
 
 def _is_categorical(array):
     """ return if we are a categorical possibility """
-    return isinstance(array, Categorical) or isinstance(array.dtype, com.CategoricalDtype)
+    return isinstance(array, Categorical) or isinstance(array.dtype, CategoricalDtype)
 
 def _maybe_to_categorical(array):
     """ coerce to a categorical if a series is given """
-    if isinstance(array, com.ABCSeries):
+    if isinstance(array, ABCSeries):
         return array.values
     return array
 
@@ -175,7 +180,7 @@ class Categorical(PandasObject):
     >>> a.min()
     'c'
     """
-    dtype = com.CategoricalDtype()
+    dtype = CategoricalDtype()
     """The dtype (always "category")"""
 
     ordered = None
@@ -203,7 +208,7 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
 
         if fastpath:
             # fast path
-            self._codes = com._coerce_indexer_dtype(values, categories)
+            self._codes = _coerce_indexer_dtype(values, categories)
             self.name = name
             self.categories = categories
             self.ordered = ordered
@@ -223,11 +228,11 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
                                  "use only 'categories'")
 
         # sanitize input
-        if com.is_categorical_dtype(values):
+        if is_categorical_dtype(values):
 
             # we are either a Series or a Categorical
             cat = values
-            if isinstance(values, com.ABCSeries):
+            if isinstance(values, ABCSeries):
                 cat = values.values
             if categories is None:
                 categories = cat.categories
@@ -244,7 +249,7 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
             # which is fine, but since factorize does this correctly no need here
             # this is an issue because _sanitize_array also coerces np.nan to a string
             # under certain versions of numpy as well
-            values = com._possibly_infer_to_datetimelike(values, convert_dates=True)
+            values = _possibly_infer_to_datetimelike(values, convert_dates=True)
             if not isinstance(values, np.ndarray):
                 values = _convert_to_list_like(values)
                 from pandas.core.series import _sanitize_array
@@ -286,11 +291,11 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
             codes = _get_codes_for_values(values, categories)
 
             # TODO: check for old style usage. These warnings should be removes after 0.18/ in 2016
-            if com.is_integer_dtype(values) and not com.is_integer_dtype(categories):
+            if is_integer_dtype(values) and not is_integer_dtype(categories):
                 warn("Values and categories have different dtypes. Did you mean to use\n"
                      "'Categorical.from_codes(codes, categories)'?", RuntimeWarning)
 
-            if com.is_integer_dtype(values) and (codes == -1).all():
+            if is_integer_dtype(values) and (codes == -1).all():
                 warn("None of the categories were found in values. Did you mean to use\n"
                      "'Categorical.from_codes(codes, categories)'?", RuntimeWarning)
 
@@ -302,7 +307,7 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
         self.ordered = False if ordered is None else ordered
         self.categories = categories
         self.name = name
-        self._codes = com._coerce_indexer_dtype(codes, categories)
+        self._codes = _coerce_indexer_dtype(codes, categories)
 
     def copy(self):
         """ Copy constructor. """
@@ -409,7 +414,7 @@ def _validate_categories(cls, categories):
                 # on categories with NaNs, int values would be converted to float.
                 # Use "object" dtype to prevent this.
                 if isnull(categories).any():
-                    without_na = np.array([x for x in categories if com.notnull(x)])
+                    without_na = np.array([x for x in categories if notnull(x)])
                     with_na = np.array(categories)
                     if with_na.dtype != without_na.dtype:
                         dtype = "object"
@@ -617,7 +622,7 @@ def add_categories(self, new_categories, inplace=False):
         remove_unused_categories
         set_categories
         """
-        if not com.is_list_like(new_categories):
+        if not is_list_like(new_categories):
             new_categories = [new_categories]
         already_included = set(new_categories) & set(self._categories)
         if len(already_included) != 0:
@@ -627,7 +632,7 @@ def add_categories(self, new_categories, inplace=False):
         new_categories = self._validate_categories(new_categories)
         cat = self if inplace else self.copy()
         cat._categories = new_categories
-        cat._codes = com._coerce_indexer_dtype(cat._codes, new_categories)
+        cat._codes = _coerce_indexer_dtype(cat._codes, new_categories)
         if not inplace:
             return cat
 
@@ -662,7 +667,7 @@ def remove_categories(self, removals, inplace=False):
         remove_unused_categories
         set_categories
         """
-        if not com.is_list_like(removals):
+        if not is_list_like(removals):
             removals = [removals]
         removals = set(list(removals))
         not_included = removals - set(self._categories)
@@ -696,7 +701,7 @@ def remove_unused_categories(self, inplace=False):
         """
         cat = self if inplace else self.copy()
         _used = sorted(np.unique(cat._codes))
-        new_categories = cat.categories.take(com._ensure_platform_int(_used))
+        new_categories = cat.categories.take(_ensure_platform_int(_used))
         new_categories = _ensure_index(new_categories)
         cat._codes = _get_codes_for_values(cat.__array__(), new_categories)
         cat._categories = new_categories
@@ -734,7 +739,7 @@ def __array__(self, dtype=None):
             A numpy array of either the specified dtype or, if dtype==None (default), the same
             dtype as categorical.categories.dtype
         """
-        ret = com.take_1d(self.categories.values, self._codes)
+        ret = take_1d(self.categories.values, self._codes)
         if dtype and dtype != self.categories.dtype:
             return np.asarray(ret, dtype)
         return ret
@@ -822,8 +827,8 @@ def get_values(self):
 
         # if we are a period index, return a string repr
         if isinstance(self.categories, PeriodIndex):
-            return com.take_1d(np.array(self.categories.to_native_types(), dtype=object),
-                               self._codes)
+            return take_1d(np.array(self.categories.to_native_types(), dtype=object),
+                           self._codes)
 
         return np.array(self)
 
@@ -1010,7 +1015,7 @@ def fillna(self, fill_value=None, method=None, limit=None, **kwargs):
 
         else:
 
-            if not com.isnull(fill_value) and fill_value not in self.categories:
+            if not isnull(fill_value) and fill_value not in self.categories:
                 raise ValueError("fill value must be in categories")
 
             mask = values==-1
@@ -1031,7 +1036,7 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None):
         # but is passed thru internally
         assert isnull(fill_value)
 
-        codes = com.take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
+        codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
         result = Categorical(codes, categories=self.categories, ordered=self.ordered,
                              name=self.name, fastpath=True)
         return result
@@ -1178,7 +1183,7 @@ def __setitem__(self, key, value):
                 raise ValueError("Cannot set a Categorical with another, without identical "
                                  "categories")
 
-        rvalue = value if com.is_list_like(value) else [value]
+        rvalue = value if is_list_like(value) else [value]
         to_add = Index(rvalue).difference(self.categories)
         # no assignments of values not in categories, but it's always ok to set something to np.nan
         if len(to_add) and not isnull(to_add).all():
@@ -1221,7 +1226,7 @@ def __setitem__(self, key, value):
         # float categories do currently return -1 for np.nan, even if np.nan is included in the
         # index -> "repair" this here
         if isnull(rvalue).any() and isnull(self.categories).any():
-            nan_pos = np.where(com.isnull(self.categories))[0]
+            nan_pos = np.where(isnull(self.categories))[0]
             lindexer[lindexer == -1] = nan_pos
 
         key = self._maybe_coerce_indexer(key)
@@ -1304,7 +1309,7 @@ def mode(self):
 
         import pandas.hashtable as htable
         good = self._codes != -1
-        result = Categorical(sorted(htable.mode_int64(com._ensure_int64(self._codes[good]))),
+        result = Categorical(sorted(htable.mode_int64(_ensure_int64(self._codes[good]))),
                              categories=self.categories,ordered=self.ordered, name=self.name,
                              fastpath=True)
         return result
@@ -1373,9 +1378,9 @@ def describe(self):
             categories = np.arange(0,len(self.categories)+1 ,dtype=object)
             categories[:-1] = self.categories
             categories[-1] = np.nan
-            result.index = categories.take(com._ensure_platform_int(result.index))
+            result.index = categories.take(_ensure_platform_int(result.index))
         else:
-            result.index = self.categories.take(com._ensure_platform_int(result.index))
+            result.index = self.categories.take(_ensure_platform_int(result.index))
             result = result.reindex(self.categories)
         result.index.name = 'categories'
 
@@ -1447,23 +1452,72 @@ def _get_codes_for_values(values, categories):
 
     from pandas.core.algorithms import _get_data_algo, _hashtables
     if values.dtype != categories.dtype:
-        values = com._ensure_object(values)
-        categories = com._ensure_object(categories)
+        values = _ensure_object(values)
+        categories = _ensure_object(categories)
     (hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables)
     t = hash_klass(len(categories))
-    t.map_locations(com._values_from_object(categories))
-    return com._coerce_indexer_dtype(t.lookup(values), categories)
+    t.map_locations(_values_from_object(categories))
+    return _coerce_indexer_dtype(t.lookup(values), categories)
 
 def _convert_to_list_like(list_like):
     if hasattr(list_like, "dtype"):
         return list_like
     if isinstance(list_like, list):
         return list_like
-    if (com._is_sequence(list_like) or isinstance(list_like, tuple)
-                                    or isinstance(list_like, types.GeneratorType)):
+    if (_is_sequence(list_like) or isinstance(list_like, tuple)
+        or isinstance(list_like, types.GeneratorType)):
         return list(list_like)
     elif np.isscalar(list_like):
         return [list_like]
     else:
         # is this reached?
         return [list_like]
+
+def _concat_compat(to_concat, axis=0):
+    """
+    provide concatenation of an object/categorical array of arrays each of which is a single dtype
+
+    Parameters
+    ----------
+    to_concat : array of arrays
+    axis : axis to provide concatenation
+
+    Returns
+    -------
+    a single array, preserving the combined dtypes
+    """
+
+    def convert_categorical(x):
+        # coerce to object dtype
+        if is_categorical_dtype(x.dtype):
+            return x.get_values()
+        return x.ravel()
+
+    typs = get_dtype_kinds(to_concat)
+    if not len(typs-set(['object','category'])):
+
+        # we only can deal with object & category types
+        pass
+
+    else:
+
+        # convert to object type and perform a regular concat
+        from pandas.core.common import _concat_compat
+        return _concat_compat([ np.array(x,copy=False).astype('object') for x in to_concat ],axis=axis)
+
+    # we could have object blocks and categorical's here
+    # if we only have a single cateogoricals then combine everything
+    # else its a non-compat categorical
+    categoricals = [ x for x in to_concat if is_categorical_dtype(x.dtype) ]
+    objects = [ x for x in to_concat if is_object_dtype(x.dtype) ]
+
+    # validate the categories
+    categories = None
+    for x in categoricals:
+        if categories is None:
+            categories = x.categories
+        if not categories.equals(x.categories):
+            raise ValueError("incompatible categories in categorical concat")
+
+    # concat them
+    return Categorical(np.concatenate([ convert_categorical(x) for x in to_concat ],axis=axis), categories=categories)
diff --git a/pandas/core/common.py b/pandas/core/common.py
index f5de6c7da8914..759f5f1dfaf7a 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -2768,9 +2768,62 @@ def _check_as_is(x):
             self.queue.truncate(0)
 
 
+def get_dtype_kinds(l):
+    """
+    Parameters
+    ----------
+    l : list of arrays
+
+    Returns
+    -------
+    a set of kinds that exist in this list of arrays
+    """
+
+    typs = set()
+    for arr in l:
+
+        dtype = arr.dtype
+        if is_categorical_dtype(dtype):
+            typ = 'category'
+        elif isinstance(arr, ABCSparseArray):
+            typ = 'sparse'
+        elif is_datetime64_dtype(dtype):
+            typ = 'datetime'
+        elif is_timedelta64_dtype(dtype):
+            typ = 'timedelta'
+        elif is_object_dtype(dtype):
+            typ = 'object'
+        elif is_bool_dtype(dtype):
+            typ = 'bool'
+        else:
+            typ = dtype.kind
+        typs.add(typ)
+    return typs
+
 def _concat_compat(to_concat, axis=0):
+    """
+    provide concatenation of an array of arrays each of which is a single
+    'normalized' dtypes (in that for example, if its object, then it is a non-datetimelike
+    provde a combined dtype for the resulting array the preserves the overall dtype if possible)
+
+    Parameters
+    ----------
+    to_concat : array of arrays
+    axis : axis to provide concatenation
+
+    Returns
+    -------
+    a single array, preserving the combined dtypes
+    """
+
     # filter empty arrays
-    nonempty = [x for x in to_concat if x.shape[axis] > 0]
+    # 1-d dtypes always are included here
+    def is_nonempty(x):
+        try:
+            return x.shape[axis] > 0
+        except Exception:
+            return True
+    nonempty = [x for x in to_concat if is_nonempty(x)]
 
     # If all arrays are empty, there's nothing to convert, just short-cut to
     # the concatenation, #3121.
@@ -2778,38 +2831,37 @@ def _concat_compat(to_concat, axis=0):
     # Creating an empty array directly is tempting, but the winnings would be
     # marginal given that it would still require shape & dtype calculation and
     # np.concatenate which has them both implemented is compiled.
-    if nonempty:
-
-        is_datetime64 = [x.dtype == _NS_DTYPE for x in nonempty]
-        is_timedelta64 = [x.dtype == _TD_DTYPE for x in nonempty]
-
-        if all(is_datetime64):
-            new_values = np.concatenate([x.view(np.int64) for x in nonempty],
-                                        axis=axis)
-            return new_values.view(_NS_DTYPE)
-        elif all(is_timedelta64):
-            new_values = np.concatenate([x.view(np.int64) for x in nonempty],
-                                        axis=axis)
-            return new_values.view(_TD_DTYPE)
-        elif any(is_datetime64) or any(is_timedelta64):
-            to_concat = [_to_pydatetime(x) for x in nonempty]
-
-    return np.concatenate(to_concat, axis=axis)
-
-
-def _to_pydatetime(x):
-    # coerce to an object dtyped
-
-    if x.dtype == _NS_DTYPE:
-        shape = x.shape
-        x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
-        x = x.reshape(shape)
-    elif x.dtype == _TD_DTYPE:
-        shape = x.shape
-        x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel())
-        x = x.reshape(shape)
-
-    return x
+
+    typs = get_dtype_kinds(to_concat)
+
+    # these are mandated to handle empties as well
+    if 'datetime' in typs or 'timedelta' in typs:
+        from pandas.tseries.common import _concat_compat
+        return _concat_compat(to_concat, axis=axis)
+
+    elif 'sparse' in typs:
+        from pandas.sparse.array import _concat_compat
+        return _concat_compat(to_concat, axis=axis)
+
+    elif 'category' in typs:
+        from pandas.core.categorical import _concat_compat
+        return _concat_compat(to_concat, axis=axis)
+
+    if not nonempty:
+
+        # we have all empties, but may need to coerce the result dtype to object if we
+        # have non-numeric type operands (numpy would otherwise cast this to float)
+        typs = get_dtype_kinds(to_concat)
+        if len(typs) != 1:
+
+            if not len(typs-set(['i','u','f'])) or not len(typs-set(['bool','i','u'])):
+                # let numpy coerce
+                pass
+            else:
+                # coerce to object
+                to_concat = [ x.astype('object') for x in to_concat ]
+
+    return np.concatenate(to_concat,axis=axis)
 
 def _where_compat(mask, arr1, arr2):
     if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index bccc0e7b6be14..89178ba2d9dcc 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -271,14 +271,15 @@ def _construct_axes_from_arguments(self, args, kwargs, require_all=False):
         return axes, kwargs
 
     @classmethod
-    def _from_axes(cls, data, axes):
+    def _from_axes(cls, data, axes, **kwargs):
         # for construction from BlockManager
         if isinstance(data, BlockManager):
-            return cls(data)
+            return cls(data, **kwargs)
         else:
             if cls._AXIS_REVERSED:
                 axes = axes[::-1]
             d = cls._construct_axes_dict_from(cls, axes, copy=False)
+            d.update(kwargs)
             return cls(data, **d)
 
     def _get_axis_number(self, axis):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index bb81258efe4c5..7ab3e4d8d9482 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -493,18 +493,6 @@ def to_native_types(self, slicer=None, na_rep='', **kwargs):
         values[mask] = na_rep
         return values.tolist()
 
-    def _concat_blocks(self, blocks, values):
-        """ return the block concatenation """
-
-        # dispatch to a categorical to handle the concat
-        if self._holder is None:
-
-            for b in blocks:
-                if b.is_categorical:
-                    return b._concat_blocks(blocks,values)
-
-        return self._holder(values[0])
-
     # block actions ####
     def copy(self, deep=True):
         values = self.values
@@ -1759,34 +1747,6 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
                           ndim=self.ndim,
                           placement=self.mgr_locs)
 
-    def _concat_blocks(self, blocks, values):
-        """
-        validate that we can merge these blocks
-
-        return the block concatenation
-        """
-
-        # we could have object blocks and categorical's here
-        # if we only have a single cateogoricals then combine everything
-        # else its a non-compat categorical
-
-        categoricals = [ b for b in blocks if b.is_categorical ]
-        objects = [ b for b in blocks if not b.is_categorical and b.is_object ]
-
-        # convert everything to object and call it a day
-        if len(objects) + len(categoricals) != len(blocks):
-            raise ValueError("try to combine non-object blocks and categoricals")
-
-        # validate the categories
-        categories = None
-        for b in categoricals:
-            if categories is None:
-                categories = b.values.categories
-            if not categories.equals(b.values.categories):
-                raise ValueError("incompatible categories in categorical block merge")
-
-        return self._holder(values[0], categories=categories)
-
     def to_native_types(self, slicer=None, na_rep='', **kwargs):
         """ convert to our native types format, slicing if desired """
 
@@ -4102,22 +4062,15 @@ def get_empty_dtype_and_na(join_units):
         blk = join_units[0].block
         if blk is None:
             return np.float64, np.nan
-        else:
-            return blk.dtype, None
 
     has_none_blocks = False
     dtypes = [None] * len(join_units)
-
     for i, unit in enumerate(join_units):
         if unit.block is None:
             has_none_blocks = True
         else:
             dtypes[i] = unit.dtype
 
-    if not has_none_blocks and len(set(dtypes)) == 1:
-        # Unanimous decision, nothing to upcast.
-        return dtypes[0], None
-
     # dtypes = set()
     upcast_classes = set()
     null_upcast_classes = set()
@@ -4127,7 +4080,9 @@ def get_empty_dtype_and_na(join_units):
 
         if com.is_categorical_dtype(dtype):
             upcast_cls = 'category'
-        elif issubclass(dtype.type, (np.object_, np.bool_)):
+        elif issubclass(dtype.type, np.bool_):
+            upcast_cls = 'bool'
+        elif issubclass(dtype.type, np.object_):
             upcast_cls = 'object'
         elif is_datetime64_dtype(dtype):
             upcast_cls = 'datetime'
@@ -4150,6 +4105,11 @@ def get_empty_dtype_and_na(join_units):
     # create the result
     if 'object' in upcast_classes:
         return np.dtype(np.object_), np.nan
+    elif 'bool' in upcast_classes:
+        if has_none_blocks:
+            return np.dtype(np.object_), np.nan
+        else:
+            return np.dtype(np.bool_), None
     elif 'category' in upcast_classes:
         return com.CategoricalDtype(), np.nan
     elif 'float' in upcast_classes:
@@ -4184,14 +4144,7 @@ def concatenate_join_units(join_units, concat_axis, copy):
     else:
         concat_values = com._concat_compat(to_concat, axis=concat_axis)
 
-    if any(unit.needs_block_conversion for unit in join_units):
-
-        # need to ask the join unit block to convert to the underlying repr for us
-        blocks = [ unit.block for unit in join_units if unit.block is not None ]
-        return blocks[0]._concat_blocks(blocks, concat_values)
-    else:
-        return concat_values
-
+    return concat_values
 
 def get_mgr_concatenation_plan(mgr, indexers):
     """
@@ -4231,6 +4184,7 @@ def get_mgr_concatenation_plan(mgr, indexers):
     plan = []
     for blkno, placements in _get_blkno_placements(blknos, len(mgr.blocks),
                                                    group=False):
+
         assert placements.is_slice_like
 
         join_unit_indexers = indexers.copy()
@@ -4442,6 +4396,14 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
                     missing_arr.fill(fill_value)
                 return missing_arr
 
+            if not self.indexers:
+                if self.block.is_categorical:
+                    # preserve the categoricals for validation in _concat_compat
+                    return self.block.values
+                elif self.block.is_sparse:
+                    # preserve the sparse array for validation in _concat_compat
+                    return self.block.values
+
             if self.block.is_bool:
                 # External code requested filling/upcasting, bool values must
                 # be upcasted to object to avoid being upcasted to numeric.
@@ -4455,13 +4417,14 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
             # If there's no indexing to be done, we want to signal outside
             # code that this array must be copied explicitly.  This is done
             # by returning a view and checking `retval.base`.
-            return values.view()
+            values = values.view()
+
         else:
             for ax, indexer in self.indexers.items():
                 values = com.take_nd(values, indexer, axis=ax,
                                      fill_value=fill_value)
 
-            return values
+        return values
 
 
 def _fast_count_smallints(arr):
diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py
index 38a5688ed96e8..b765fdb8d67be 100644
--- a/pandas/sparse/array.py
+++ b/pandas/sparse/array.py
@@ -529,3 +529,46 @@ def make_sparse(arr, kind='block', fill_value=nan):
 ops.add_special_arithmetic_methods(SparseArray,
                                    arith_method=_arith_method,
                                    use_numexpr=False)
+
+
+
+def _concat_compat(to_concat, axis=0):
+    """
+    provide concatenation of an sparse/dense array of arrays each of which is a single dtype
+
+    Parameters
+    ----------
+    to_concat : array of arrays
+    axis : axis to provide concatenation
+
+    Returns
+    -------
+    a single array, preserving the combined dtypes
+    """
+
+    def convert_sparse(x, axis):
+        # coerce to native type
+        if isinstance(x, SparseArray):
+            x = x.get_values()
+        x = x.ravel()
+        if axis > 0:
+            x = np.atleast_2d(x)
+        return x
+
+    typs = com.get_dtype_kinds(to_concat)
+
+    # we have more than one type here, so densify and regular concat
+    to_concat = [ convert_sparse(x, axis) for x in to_concat ]
+    result = np.concatenate(to_concat,axis=axis)
+
+    if not len(typs-set(['sparse','f','i'])):
+
+        # we can remain sparse
+        result = SparseArray(result.ravel())
+
+    else:
+
+        # coerce to object if needed
+        result = result.astype('object')
+
+    return result
diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py
index 105f661f08b10..9197a4fc22b9c 100644
--- a/pandas/sparse/tests/test_sparse.py
+++ b/pandas/sparse/tests/test_sparse.py
@@ -168,6 +168,9 @@ def test_construct_DataFrame_with_sp_series(self):
 
         assert_sp_series_equal(df['col'], self.bseries)
 
+        result = df.iloc[:,0]
+        assert_sp_series_equal(result, self.bseries)
+
         # blocking
         expected = Series({'col': 'float64:sparse'})
         result = df.ftypes
@@ -909,8 +912,8 @@ def test_dtypes(self):
     def test_str(self):
         df = DataFrame(np.random.randn(10000, 4))
         df.ix[:9998] = np.nan
-        sdf = df.to_sparse()
 
+        sdf = df.to_sparse()
         str(sdf)
 
     def test_array_interface(self):
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 624c6cf9688d6..dc82abfb40e02 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -2246,6 +2246,23 @@ def f():
         dfx['grade'].cat.categories
         self.assert_numpy_array_equal(df['grade'].cat.categories, dfx['grade'].cat.categories)
 
+        # GH 8641
+        # series concat not preserving category dtype
+        s = Series(list('abc'),dtype='category')
+        s2 = Series(list('abd'),dtype='category')
+
+        def f():
+            pd.concat([s,s2])
+        self.assertRaises(ValueError, f)
+
+        result = pd.concat([s,s],ignore_index=True)
+        expected = Series(list('abcabc')).astype('category')
+        tm.assert_series_equal(result, expected)
+
+        result = pd.concat([s,s])
+        expected = Series(list('abcabc'),index=[0,1,2,0,1,2]).astype('category')
+        tm.assert_series_equal(result, expected)
+
     def test_append(self):
         cat = pd.Categorical(["a","b"], categories=["a","b"])
         vals = [1,2]
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 938d171506461..9ecdcd2b12d75 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -6198,13 +6198,93 @@ def test_numpy_unique(self):
         # it works!
         result = np.unique(self.ts)
 
+    def test_concat_empty_series_dtypes_roundtrips(self):
+
+        # round-tripping with self & like self
+        dtypes = map(np.dtype,['float64','int8','uint8','bool','m8[ns]','M8[ns]'])
+
+        for dtype in dtypes:
+            self.assertEqual(pd.concat([Series(dtype=dtype)]).dtype, dtype)
+            self.assertEqual(pd.concat([Series(dtype=dtype),
+                                        Series(dtype=dtype)]).dtype, dtype)
+
+        def int_result_type(dtype, dtype2):
+            typs = set([dtype.kind,dtype2.kind])
+            if not len(typs-set(['i','u','b'])) and (dtype.kind == 'i' or dtype2.kind == 'i'):
+                return 'i'
+            elif not len(typs-set(['u','b'])) and (dtype.kind == 'u' or dtype2.kind == 'u'):
+                 return 'u'
+            return None
+
+        def float_result_type(dtype, dtype2):
+            typs = set([dtype.kind,dtype2.kind])
+            if not len(typs-set(['f','i','u'])) and (dtype.kind == 'f' or dtype2.kind == 'f'):
+                return 'f'
+            return None
+
+        def get_result_type(dtype, dtype2):
+            result = float_result_type(dtype, dtype2)
+            if result is not None:
+                return result
+            result = int_result_type(dtype, dtype2)
+            if result is not None:
+                return result
+            return 'O'
+
+        for dtype in dtypes:
+            for dtype2 in dtypes:
+                if dtype == dtype2:
+                    continue
+
+                expected = get_result_type(dtype, dtype2)
+                result = pd.concat([Series(dtype=dtype),
+                                    Series(dtype=dtype2)]).dtype
+                self.assertEqual(result.kind, expected)
+
     def test_concat_empty_series_dtypes(self):
-        self.assertEqual(pd.concat([Series(dtype=np.float64)]).dtype, np.float64)
-        self.assertEqual(pd.concat([Series(dtype=np.int8)]).dtype, np.int8)
-        self.assertEqual(pd.concat([Series(dtype=np.bool_)]).dtype, np.bool_)
 
+        # bools
         self.assertEqual(pd.concat([Series(dtype=np.bool_),
                                     Series(dtype=np.int32)]).dtype, np.int32)
+        self.assertEqual(pd.concat([Series(dtype=np.bool_),
+                                    Series(dtype=np.float32)]).dtype, np.object_)
+
+        # datetimelike
+        self.assertEqual(pd.concat([Series(dtype='m8[ns]'),
+                                    Series(dtype=np.bool)]).dtype, np.object_)
+        self.assertEqual(pd.concat([Series(dtype='m8[ns]'),
+                                    Series(dtype=np.int64)]).dtype, np.object_)
+        self.assertEqual(pd.concat([Series(dtype='M8[ns]'),
+                                    Series(dtype=np.bool)]).dtype, np.object_)
+        self.assertEqual(pd.concat([Series(dtype='M8[ns]'),
+                                    Series(dtype=np.int64)]).dtype, np.object_)
+        self.assertEqual(pd.concat([Series(dtype='M8[ns]'),
+                                    Series(dtype=np.bool_),
+                                    Series(dtype=np.int64)]).dtype, np.object_)
+
+        # categorical
+        self.assertEqual(pd.concat([Series(dtype='category'),
+                                    Series(dtype='category')]).dtype, 'category')
+        self.assertEqual(pd.concat([Series(dtype='category'),
+                                    Series(dtype='float64')]).dtype, np.object_)
+        self.assertEqual(pd.concat([Series(dtype='category'),
+                                    Series(dtype='object')]).dtype, 'category')
+
+        # sparse
+        result = pd.concat([Series(dtype='float64').to_sparse(),
+                            Series(dtype='float64').to_sparse()])
+        self.assertEqual(result.dtype,np.float64)
+        self.assertEqual(result.ftype,'float64:sparse')
+
+        result = pd.concat([Series(dtype='float64').to_sparse(),
+                            Series(dtype='float64')])
+        self.assertEqual(result.dtype,np.float64)
+        self.assertEqual(result.ftype,'float64:sparse')
+
+        result = pd.concat([Series(dtype='float64').to_sparse(),
+                            Series(dtype='object')])
+        self.assertEqual(result.dtype,np.object_)
+        self.assertEqual(result.ftype,'object:dense')
 
     def test_searchsorted_numeric_dtypes_scalar(self):
         s = Series([1, 2, 90, 1000, 3e9])
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
index 7a89c317a69c6..2f0920b6d4e98 100644
--- a/pandas/tools/merge.py
+++ b/pandas/tools/merge.py
@@ -854,11 +854,17 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
         self.new_axes = self._get_new_axes()
 
     def get_result(self):
+
+        # series only
         if self._is_series:
+
+            # stack blocks
             if self.axis == 0:
-                new_data = com._concat_compat([x.get_values() for x in self.objs])
+                new_data = com._concat_compat([x.values for x in self.objs])
                 name = com._consensus_name_attr(self.objs)
                 return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat')
+
+            # combine as columns in a frame
             else:
                 data = dict(zip(range(len(self.objs)), self.objs))
                 index, columns = self.new_axes
@@ -866,6 +872,8 @@ def get_result(self):
                 if columns is not None:
                     tmpdf.columns = columns
                 return tmpdf.__finalize__(self, method='concat')
+
+        # combine block managers
         else:
             mgrs_indexers = []
             for obj in self.objs:
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
index 8f375ca168edd..c942998d430f4 100644
--- a/pandas/tools/tests/test_merge.py
+++ b/pandas/tools/tests/test_merge.py
@@ -2056,6 +2056,7 @@ def test_panel4d_concat_mixed_type(self):
         tm.assert_panel4d_equal(result, expected)
 
     def test_concat_series(self):
+
         ts = tm.makeTimeSeries()
         ts.name = 'foo'
 
diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py
index 227af42f07411..f12e0263bcf0c 100644
--- a/pandas/tseries/common.py
+++ b/pandas/tseries/common.py
@@ -5,6 +5,9 @@
 from pandas.core import common as com
 from pandas import Series, DatetimeIndex, PeriodIndex, TimedeltaIndex
 from pandas import lib, tslib
+from pandas.core.common import (_NS_DTYPE, _TD_DTYPE, is_period_arraylike,
+                                is_datetime_arraylike, is_integer_dtype, is_list_like,
+                                get_dtype_kinds)
 
 def is_datetimelike(data):
     """ return a boolean if we can be successfully converted to a datetimelike """
@@ -42,9 +45,9 @@ def maybe_to_datetimelike(data, copy=False):
     elif issubclass(data.dtype.type, np.timedelta64):
         return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index)
     else:
-        if com.is_period_arraylike(data):
+        if is_period_arraylike(data):
             return PeriodProperties(PeriodIndex(data, copy=copy), index)
-        if com.is_datetime_arraylike(data):
+        if is_datetime_arraylike(data):
             return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index)
 
     raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
@@ -60,9 +63,9 @@ def _delegate_property_get(self, name):
 
         # maybe need to upcast (ints)
         if isinstance(result, np.ndarray):
-            if com.is_integer_dtype(result):
+            if is_integer_dtype(result):
                 result = result.astype('int64')
-        elif not com.is_list_like(result):
+        elif not is_list_like(result):
             return result
 
         # return the result as a Series, which is by definition a copy
@@ -162,3 +165,50 @@ class PeriodProperties(Properties):
 PeriodProperties._add_delegate_accessors(delegate=PeriodIndex,
                                          accessors=PeriodIndex._datetimelike_ops,
                                          typ='property')
+
+def _concat_compat(to_concat, axis=0):
+    """
+    provide concatenation of an datetimelike array of arrays each of which is a single
+    M8[ns], or m8[ns] dtype
+
+    Parameters
+    ----------
+    to_concat : array of arrays
+    axis : axis to provide concatenation
+
+    Returns
+    -------
+    a single array, preserving the combined dtypes
+    """
+
+    def convert_to_pydatetime(x, axis):
+        # coerce to an object dtype
+        if x.dtype == _NS_DTYPE:
+            shape = x.shape
+            x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
+            x = x.reshape(shape)
+        elif x.dtype == _TD_DTYPE:
+            shape = x.shape
+            x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel())
+            x = x.reshape(shape)
+        return x
+
+    typs = get_dtype_kinds(to_concat)
+
+    # single dtype
+    if len(typs) == 1:
+
+        if not len(typs-set(['datetime'])):
+            new_values = np.concatenate([x.view(np.int64) for x in to_concat],
+                                        axis=axis)
+            return new_values.view(_NS_DTYPE)
+
+        elif not len(typs-set(['timedelta'])):
+            new_values = np.concatenate([x.view(np.int64) for x in to_concat],
+                                        axis=axis)
+            return new_values.view(_TD_DTYPE)
+
+    # need to coerce to object
+    to_concat = [convert_to_pydatetime(x, axis) for x in to_concat]
+
+    return np.concatenate(to_concat,axis=axis)