From 2ef52169767c50682e2e9ee7a5fda2163b80754e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 15 Jan 2018 10:06:34 -0600
Subject: [PATCH 01/40] REF: Define extension base classes

---
 pandas/core/arrays/__init__.py                |   1 +
 pandas/core/arrays/base.py                    | 201 ++++++++++++++
 pandas/core/arrays/categorical.py             |  18 +-
 pandas/core/dtypes/base.py                    |  92 +++++++
 pandas/core/dtypes/common.py                  |  32 +++
 pandas/core/dtypes/dtypes.py                  |  14 +-
 pandas/core/internals.py                      | 248 +++++++++++++-----
 pandas/tests/dtypes/test_dtypes.py            |  36 ++-
 pandas/tests/internals/test_external_block.py |   4 +-
 9 files changed, 566 insertions(+), 80 deletions(-)
 create mode 100644 pandas/core/arrays/base.py
 create mode 100644 pandas/core/dtypes/base.py

diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
index ee32b12f0e712..f8adcf520c15b 100644
--- a/pandas/core/arrays/__init__.py
+++ b/pandas/core/arrays/__init__.py
@@ -1 +1,2 @@
+from .base import ExtensionArray  # noqa
 from .categorical import Categorical  # noqa
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
new file mode 100644
index 0000000000000..ad29edde34ce6
--- /dev/null
+++ b/pandas/core/arrays/base.py
@@ -0,0 +1,201 @@
+"""An interface for extending pandas with custom arrays."""
+import abc
+
+import numpy as np
+
+from pandas.compat import add_metaclass
+
+
+_not_implemented_message = "{} does not implement {}."
+
+
+@add_metaclass(abc.ABCMeta)
+class ExtensionArray(object):
+    """Abstract base class for custom array types
+
+    pandas will recognize instances of this class as proper arrays
+    with a custom type and will not attempt to coerce them to objects.
+
+    Subclasses are expected to implement the following methods.
+    """
+    # ------------------------------------------------------------------------
+    # Must be a Sequence
+    # ------------------------------------------------------------------------
+    @abc.abstractmethod
+    def __getitem__(self, item):
+        """Select a subset of self
+
+        Notes
+        -----
+        As a sequence, __getitem__ should expect integer or slice ``key``.
+
+        For slice ``key``, you should return an instance of yourself, even
+        if the slice is length 0 or 1.
+
+        For scalar ``key``, you may return a scalar suitable for your type.
+        The scalar need not be an instance or subclass of your array type.
+        """
+        # type (Any) -> Any
+
+    def __setitem__(self, key, value):
+        # type: (Any, Any) -> None
+        raise NotImplementedError(_not_implemented_message.format(
+            type(self), '__setitem__')
+        )
+
+    @abc.abstractmethod
+    def __iter__(self):
+        # type: () -> Iterator
+        pass
+
+    @abc.abstractmethod
+    def __len__(self):
+        # type: () -> int
+        pass
+
+    # ------------------------------------------------------------------------
+    # Required attributes
+    # ------------------------------------------------------------------------
+    @property
+    def base(self):
+        """The base array I am a view of. None by default."""
+
+    @property
+    @abc.abstractmethod
+    def dtype(self):
+        """An instance of 'ExtensionDtype'."""
+        # type: () -> ExtensionDtype
+        pass
+
+    @property
+    def shape(self):
+        # type: () -> Tuple[int, ...]
+        return (len(self),)
+
+    @property
+    def ndim(self):
+        # type: () -> int
+        """Extension Arrays are only allowed to be 1-dimensional."""
+        return 1
+
+    @property
+    @abc.abstractmethod
+    def nbytes(self):
+        """The number of bytes needed to store this object in memory."""
+        # type: () -> int
+        pass
+
+    # ------------------------------------------------------------------------
+    # Additional Methods
+    # ------------------------------------------------------------------------
+    @abc.abstractmethod
+    def isna(self):
+        """Boolean NumPy array indicating if each value is missing."""
+        # type: () -> np.ndarray
+        pass
+
+    # ------------------------------------------------------------------------
+    # Indexing methods
+    # ------------------------------------------------------------------------
+    @abc.abstractmethod
+    def take(self, indexer, allow_fill=True, fill_value=None):
+        # type: (Sequence, bool, Optional[Any]) -> ExtensionArray
+        """For slicing"""
+
+    def take_nd(self, indexer, allow_fill=True, fill_value=None):
+        """For slicing"""
+        # TODO: this isn't really nescessary for 1-D
+        return self.take(indexer, allow_fill=allow_fill,
+                         fill_value=fill_value)
+
+    @abc.abstractmethod
+    def copy(self, deep=False):
+        # type: (bool) -> ExtensionArray
+        """Return a copy of the array."""
+
+    # ------------------------------------------------------------------------
+    # Block-related methods
+    # ------------------------------------------------------------------------
+    @property
+    def _fill_value(self):
+        """The missing value for this type, e.g. np.nan"""
+        # type: () -> Any
+        return None
+
+    @abc.abstractmethod
+    def _formatting_values(self):
+        # type: () -> np.ndarray
+        # At the moment, this has to be an array since we use result.dtype
+        """An array of values to be printed in, e.g. the Series repr"""
+
+    @classmethod
+    @abc.abstractmethod
+    def _concat_same_type(cls, to_concat):
+        # type: (Sequence[ExtensionArray]) -> ExtensionArray
+        """Concatenate multiple array
+
+        Parameters
+        ----------
+        to_concat : sequence of this type
+
+        Returns
+        -------
+        ExtensionArray
+        """
+
+    @abc.abstractmethod
+    def get_values(self):
+        # type: () -> np.ndarray
+        """Get the underlying values backing your data
+        """
+        pass
+
+    def _can_hold_na(self):
+        """Whether your array can hold missing values. True by default.
+
+        Notes
+        -----
+        Setting this to false will optimize some operations like fillna.
+        """
+        # type: () -> bool
+        return True
+
+    @property
+    def is_sparse(self):
+        """Whether your array is sparse. True by default."""
+        # type: () -> bool
+        return False
+
+    def _slice(self, slicer):
+        # type: (Union[tuple, Sequence, int]) -> 'ExtensionArray'
+        """Return a new array sliced by `slicer`.
+
+        Parameters
+        ----------
+        slicer : slice or np.ndarray
+            If an array, it should just be a boolean mask
+
+        Returns
+        -------
+        array : ExtensionArray
+            Should return an ExtensionArray, even if ``self[slicer]``
+            would return a scalar.
+        """
+        return type(self)(self[slicer])
+
+    def value_counts(self, dropna=True):
+        """Optional method for computing the histogram of the counts.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            whether to exclude missing values from the computation
+
+        Returns
+        -------
+        counts : Series
+        """
+        from pandas.core.algorithms import value_counts
+        mask = ~np.asarray(self.isna())
+        values = self[mask]  # XXX: this imposes boolean indexing
+        return value_counts(np.asarray(values), dropna=dropna)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 708f903cd73cb..f0ec046e00e65 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -44,6 +44,8 @@
 from pandas.util._validators import validate_bool_kwarg
 from pandas.core.config import get_option
 
+from .base import ExtensionArray
+
 
 def _cat_compare_op(op):
     def f(self, other):
@@ -149,7 +151,7 @@ def _maybe_to_categorical(array):
 """
 
 
-class Categorical(PandasObject):
+class Categorical(ExtensionArray, PandasObject):
     """
     Represents a categorical variable in classic R / S-plus fashion
 
@@ -2131,6 +2133,20 @@ def repeat(self, repeats, *args, **kwargs):
         return self._constructor(values=codes, categories=self.categories,
                                  ordered=self.ordered, fastpath=True)
 
+    # Interface things
+    # can_hold_na, concat_same_type, formatting_values
+    @property
+    def _can_hold_na(self):
+        return True
+
+    @classmethod
+    def _concat_same_type(self, to_concat):
+        from pandas.types.concat import union_categoricals
+        return union_categoricals(to_concat)
+
+    def _formatting_values(self):
+        return self
+
 # The Series.cat accessor
 
 
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
new file mode 100644
index 0000000000000..348b4f077673a
--- /dev/null
+++ b/pandas/core/dtypes/base.py
@@ -0,0 +1,92 @@
+"""Extend pandas with custom array types"""
+import abc
+
+from pandas.compat import add_metaclass
+
+
+@add_metaclass(abc.ABCMeta)
+class ExtensionDtype(object):
+    """A custom data type for your array.
+    """
+    @property
+    def type(self):
+        """Typically a metaclass inheriting from 'type' with no methods."""
+        return type(self.name, (), {})
+
+    @property
+    def kind(self):
+        """A character code (one of 'biufcmMOSUV'), default 'O'
+
+        See Also
+        --------
+        numpy.dtype.kind
+        """
+        return 'O'
+
+    @property
+    @abc.abstractmethod
+    def name(self):
+        """An string identifying the data type.
+
+        Will be used in, e.g. ``Series.dtype``
+        """
+
+    @property
+    def names(self):
+        """Ordered list of field names, or None if there are no fields"""
+        return None
+
+    @classmethod
+    def construct_from_string(cls, string):
+        """Attempt to construct this type from a string.
+
+        Parameters
+        ----------
+        string : str
+
+        Returns
+        -------
+        self : instance of 'cls'
+
+        Raises
+        ------
+        TypeError
+
+        Notes
+        -----
+        The default implementation checks if 'string' matches your
+        type's name. If so, it calls your class with no arguments.
+        """
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from "
+                            "'{}'".format(cls, string))
+
+    @classmethod
+    def is_dtype(cls, dtype):
+        """Check if we match 'dtype'
+
+        Parameters
+        ----------
+        dtype : str or dtype
+
+        Returns
+        -------
+        is_dtype : bool
+
+        Notes
+        -----
+        The default implementation is True if
+
+        1. 'dtype' is a string that returns true for
+           ``cls.construct_from_string``
+        2. 'dtype' is ``cls`` or a subclass of ``cls``.
+        """
+        if isinstance(dtype, str):
+            try:
+                return isinstance(cls.construct_from_string(dtype), cls)
+            except TypeError:
+                return False
+        else:
+            return issubclass(dtype, cls)
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index dca9a5fde0d74..2e4d0d884bf95 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1685,6 +1685,38 @@ def is_extension_type(arr):
     return False
 
 
+def is_extension_array_dtype(arr_or_dtype):
+    """Check if an object is a pandas extension array type
+
+    Parameters
+    ----------
+    arr_or_dtype : object
+
+    Returns
+    -------
+    bool
+
+    Notes
+    -----
+    This checks whether an object implements the pandas extension
+    array interface. In pandas, this includes:
+
+    * Categorical
+    * PeriodArray
+    * IntervalArray
+    * SparseArray
+
+    Third-party libraries may implement arrays or types satisfying
+    this interface as well.
+    """
+    from pandas.core.arrays import ExtensionArray
+
+    # we want to unpack series, anything else?
+    if isinstance(arr_or_dtype, ABCSeries):
+        arr_or_dtype = arr_or_dtype.values
+    return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))
+
+
 def is_complex_dtype(arr_or_dtype):
     """
     Check whether the provided array or dtype is of a complex dtype.
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 1eb87aa99fd1e..df7b0dc9ea60e 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -5,15 +5,15 @@
 from pandas import compat
 from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex
 
+from .base import ExtensionDtype
 
-class ExtensionDtype(object):
+
+class PandasExtensionDtype(ExtensionDtype):
     """
     A np.dtype duck-typed class, suitable for holding a custom dtype.
 
     THIS IS NOT A REAL NUMPY DTYPE
     """
-    name = None
-    names = None
     type = None
     subdtype = None
     kind = None
@@ -108,7 +108,7 @@ class CategoricalDtypeType(type):
     pass
 
 
-class CategoricalDtype(ExtensionDtype):
+class CategoricalDtype(PandasExtensionDtype):
     """
     Type for categorical data with the categories and orderedness
 
@@ -387,7 +387,7 @@ class DatetimeTZDtypeType(type):
     pass
 
 
-class DatetimeTZDtype(ExtensionDtype):
+class DatetimeTZDtype(PandasExtensionDtype):
 
     """
     A np.dtype duck-typed class, suitable for holding a custom datetime with tz
@@ -501,7 +501,7 @@ class PeriodDtypeType(type):
     pass
 
 
-class PeriodDtype(ExtensionDtype):
+class PeriodDtype(PandasExtensionDtype):
     __metaclass__ = PeriodDtypeType
     """
     A Period duck-typed class, suitable for holding a period with freq dtype.
@@ -619,7 +619,7 @@ class IntervalDtypeType(type):
     pass
 
 
-class IntervalDtype(ExtensionDtype):
+class IntervalDtype(PandasExtensionDtype):
     __metaclass__ = IntervalDtypeType
     """
     A Interval duck-typed class, suitable for holding an interval
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 764e06c19e76c..fb52a60c4cdd5 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -31,6 +31,7 @@
     is_datetimelike_v_numeric,
     is_float_dtype, is_numeric_dtype,
     is_numeric_v_string_like, is_extension_type,
+    is_extension_array_dtype,
     is_list_like,
     is_re,
     is_re_compilable,
@@ -59,7 +60,7 @@
 
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import maybe_convert_indices, length_of_indexer
-from pandas.core.arrays.categorical import Categorical, _maybe_to_categorical
+from pandas.core.arrays import Categorical
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.io.formats.printing import pprint_thing
 
@@ -95,6 +96,7 @@ class Block(PandasObject):
     is_object = False
     is_categorical = False
     is_sparse = False
+    is_extension = False
     _box_to_block_values = True
     _can_hold_na = False
     _can_consolidate = True
@@ -107,14 +109,15 @@ class Block(PandasObject):
     def __init__(self, values, placement, ndim=None, fastpath=False):
         if ndim is None:
             ndim = values.ndim
-        elif values.ndim != ndim:
+        elif self._validate_ndim and values.ndim != ndim:
             raise ValueError('Wrong number of dimensions')
         self.ndim = ndim
 
         self.mgr_locs = placement
         self.values = values
 
-        if ndim and len(self.mgr_locs) != len(self.values):
+        if (self._validate_ndim and ndim and
+                len(self.mgr_locs) != len(self.values)):
             raise ValueError(
                 'Wrong number of items passed {val}, placement implies '
                 '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
@@ -273,7 +276,6 @@ def reshape_nd(self, labels, shape, ref_items, mgr=None):
 
         return a new block that is transformed to a nd block
         """
-
         return _block2d_to_blocknd(values=self.get_values().T,
                                    placement=self.mgr_locs, shape=shape,
                                    labels=labels, ref_items=ref_items)
@@ -548,15 +550,20 @@ def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
 
     def _astype(self, dtype, copy=False, errors='raise', values=None,
                 klass=None, mgr=None, **kwargs):
-        """
-        Coerce to the new type
+        """Coerce to the new type
 
+        Parameters
+        ----------
         dtype : str, dtype convertible
         copy : boolean, default False
             copy if indicated
         errors : str, {'raise', 'ignore'}, default 'ignore'
             - ``raise`` : allow exceptions to be raised
             - ``ignore`` : suppress exceptions. On error return original object
+
+        Returns
+        -------
+        IntervalArray
         """
         errors_legal_values = ('raise', 'ignore')
 
@@ -1695,24 +1702,20 @@ class NonConsolidatableMixIn(object):
     _holder = None
 
     def __init__(self, values, placement, ndim=None, fastpath=False, **kwargs):
+        # Placement must be converted to BlockPlacement so that we can check
+        # its length
+        if not isinstance(placement, BlockPlacement):
+            placement = BlockPlacement(placement)
 
-        # Placement must be converted to BlockPlacement via property setter
-        # before ndim logic, because placement may be a slice which doesn't
-        # have a length.
-        self.mgr_locs = placement
-
-        # kludgetastic
+        # Maybe infer ndim from placement
         if ndim is None:
-            if len(self.mgr_locs) != 1:
+            if len(placement) != 1:
                 ndim = 1
             else:
                 ndim = 2
-        self.ndim = ndim
-
-        if not isinstance(values, self._holder):
-            raise TypeError("values must be {0}".format(self._holder.__name__))
-
-        self.values = values
+        super(NonConsolidatableMixIn, self).__init__(values, placement,
+                                                     ndim=ndim,
+                                                     fastpath=fastpath)
 
     @property
     def shape(self):
@@ -1763,7 +1766,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
 
         Returns
         -------
-        a new block(s), the result of the putmask
+        a new block, the result of the putmask
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
 
@@ -1821,6 +1824,130 @@ def _unstack(self, unstacker_func, new_columns):
         return blocks, mask
 
 
+class ExtensionBlock(NonConsolidatableMixIn, Block):
+    """Block for holding extension types.
+
+    Notes
+    -----
+    This is the holds all 3rd-party extension types. It's also the immediate
+    parent class for our internal extension types' blocks, CategoricalBlock.
+
+    All extension arrays *must* be 1-D, which simplifies things a bit.
+    """
+    # Some questions / notes as comments, will be removed.
+    #
+    # Currently inherited from NCB. We'll keep it around until SparseBlock
+    # and DatetimeTZBlock are refactored.
+    # - set
+    # - iget
+    # - should_store
+    # - putmask
+    # - _slice
+    # - _try_cast_result
+    # - unstack
+
+    # Think about overriding these methods from Block
+    # - _maybe_downcast: (never downcast)
+
+    # Methods we can (probably) ignore and just use Block's:
+
+    # * replace / replace_single
+    #   Categorical got Object, but was hopefully unnescessary.
+    #   DatetimeTZ, Sparse got Block
+    # * is_view
+    #   Categorical overrides to say that it is not.
+    #   DatetimeTZ, Sparse inherits Base anyway
+
+    is_extension = True
+
+    # XXX
+    # is_bool is is a change for CategoricalBlock. Used to inherit
+    # from Object to infer from values. If this matters, we should
+    # override it directly in CategoricalBlock so that we infer from
+    # the categories, not the codes.
+    is_bool = False
+
+    def __init__(self, values, placement, ndim=None, fastpath=False):
+        self._holder = type(values)
+        super(ExtensionBlock, self).__init__(values, placement, ndim=ndim,
+                                             fastpath=fastpath)
+
+    def get_values(self, dtype=None):
+        # ExtensionArrays must be iterable, so this works.
+        values = np.asarray(self.values)
+        if values.ndim == self.ndim - 1:
+            values = values.reshape((1,) + values.shape)
+        return values
+
+    def to_dense(self):
+        return self.values.to_dense().view()
+
+    def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
+        """
+        Take values according to indexer and return them as a block.bb
+        """
+        if fill_tuple is None:
+            fill_value = None
+        else:
+            fill_value = fill_tuple[0]
+
+        # axis doesn't matter; we are really a single-dim object
+        # but are passed the axis depending on the calling routing
+        # if its REALLY axis 0, then this will be a reindex and not a take
+        new_values = self.values.take_nd(indexer, fill_value=fill_value)
+
+        # if we are a 1-dim object, then always place at 0
+        if self.ndim == 1:
+            new_mgr_locs = [0]
+        else:
+            if new_mgr_locs is None:
+                new_mgr_locs = self.mgr_locs
+
+        return self.make_block_same_class(new_values, new_mgr_locs)
+
+    def _can_hold_element(self, element):
+        # XXX:
+        # Not defined on NCM.
+        # Categorical got True from ObjectBlock
+        # DatetimeTZ gets DatetimeBlock
+        # Sparse gets Block
+        # Let's just assume yes for now, but we can maybe push
+        # this onto the array.
+        return True
+
+    def convert(self, copy=True, **kwargs):
+        # We're dedicated to a type, we don't convert.
+        # Taken from CategoricalBlock / Block.
+        return self.copy() if copy else self
+
+    def _slice(self, slicer):
+        """ return a slice of my values """
+
+        # slice the category
+        # return same dims as we currently have
+
+        if isinstance(slicer, tuple) and len(slicer) == 2:
+            if not is_null_slice(slicer[0]):
+                raise AssertionError("invalid slicing for a 1-ndim "
+                                     "categorical")
+            slicer = slicer[1]
+
+        return self.values._slice(slicer)
+
+    def formatting_values(self):
+        return self.values._formatting_values()
+
+    def concat_same_type(self, to_concat, placement=None):
+        """
+        Concatenate list of single blocks of the same type.
+        """
+        values = self._holder._concat_same_type(
+            [blk.values for blk in to_concat])
+        placement = placement or slice(0, len(values), 1)
+        return self.make_block_same_class(values, ndim=self.ndim,
+                                          placement=placement)
+
+
 class NumericBlock(Block):
     __slots__ = ()
     is_numeric = True
@@ -2334,7 +2461,7 @@ def re_replacer(s):
         return block
 
 
-class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock):
+class CategoricalBlock(ExtensionBlock):
     __slots__ = ()
     is_categorical = True
     _verify_integrity = True
@@ -2343,6 +2470,7 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock):
     _concatenator = staticmethod(_concat._concat_categorical)
 
     def __init__(self, values, placement, fastpath=False, **kwargs):
+        from pandas.core.arrays.categorical import _maybe_to_categorical
 
         # coerce to categorical if we can
         super(CategoricalBlock, self).__init__(_maybe_to_categorical(values),
@@ -2354,12 +2482,6 @@ def is_view(self):
         """ I am never a view """
         return False
 
-    def to_dense(self):
-        return self.values.to_dense().view()
-
-    def convert(self, copy=True, **kwargs):
-        return self.copy() if copy else self
-
     @property
     def array_dtype(self):
         """ the dtype to return if I want to construct this block as an
@@ -2367,13 +2489,6 @@ def array_dtype(self):
         """
         return np.object_
 
-    def _slice(self, slicer):
-        """ return a slice of my values """
-
-        # slice the category
-        # return same dims as we currently have
-        return self.values._slice(slicer)
-
     def _try_coerce_result(self, result):
         """ reverse of try_coerce_args """
 
@@ -2410,29 +2525,6 @@ def shift(self, periods, axis=0, mgr=None):
         return self.make_block_same_class(values=self.values.shift(periods),
                                           placement=self.mgr_locs)
 
-    def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
-        """
-        Take values according to indexer and return them as a block.bb
-        """
-        if fill_tuple is None:
-            fill_value = None
-        else:
-            fill_value = fill_tuple[0]
-
-        # axis doesn't matter; we are really a single-dim object
-        # but are passed the axis depending on the calling routing
-        # if its REALLY axis 0, then this will be a reindex and not a take
-        new_values = self.values.take_nd(indexer, fill_value=fill_value)
-
-        # if we are a 1-dim object, then always place at 0
-        if self.ndim == 1:
-            new_mgr_locs = [0]
-        else:
-            if new_mgr_locs is None:
-                new_mgr_locs = self.mgr_locs
-
-        return self.make_block_same_class(new_values, new_mgr_locs)
-
     def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
 
@@ -2447,17 +2539,6 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
         # we are expected to return a 2-d ndarray
         return values.reshape(1, len(values))
 
-    def concat_same_type(self, to_concat, placement=None):
-        """
-        Concatenate list of single blocks of the same type.
-        """
-        values = self._concatenator([blk.values for blk in to_concat],
-                                    axis=self.ndim - 1)
-        # not using self.make_block_same_class as values can be object dtype
-        return make_block(
-            values, placement=placement or slice(0, len(values), 1),
-            ndim=self.ndim)
-
 
 class DatetimeBlock(DatetimeLikeBlockMixin, Block):
     __slots__ = ()
@@ -2465,7 +2546,8 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block):
     _can_hold_na = True
 
     def __init__(self, values, placement, fastpath=False, **kwargs):
-        if values.dtype != _NS_DTYPE:
+        if values.dtype != _NS_DTYPE and values.dtype.base != _NS_DTYPE:
+            # not datetime64 or datetime64tz
             values = conversion.ensure_datetime64ns(values)
 
         super(DatetimeBlock, self).__init__(values, fastpath=True,
@@ -2954,6 +3036,8 @@ def get_block_type(values, dtype=None):
         cls = BoolBlock
     elif is_categorical(values):
         cls = CategoricalBlock
+    elif is_extension_array_dtype(values):
+        cls = ExtensionBlock
     else:
         cls = ObjectBlock
     return cls
@@ -4681,6 +4765,7 @@ def form_blocks(arrays, names, axes):
     # generalize?
     items_dict = defaultdict(list)
     extra_locs = []
+    external_items = []
 
     names_idx = Index(names)
     if names_idx.equals(axes[0]):
@@ -4748,6 +4833,31 @@ def form_blocks(arrays, names, axes):
                       for i, _, array in items_dict['CategoricalBlock']]
         blocks.extend(cat_blocks)
 
+    if len(items_dict['ExtensionBlock']):
+
+        external_blocks = []
+        for i, _, array in items_dict['ExtensionBlock']:
+            if isinstance(array, ABCSeries):
+                array = array.values
+            # Allow our internal arrays to chose their block type.
+            block_type = getattr(array, '_block_type', ExtensionBlock)
+            external_blocks.append(
+                make_block(array, klass=block_type,
+                           fastpath=True, placement=[i]))
+        blocks.extend(external_blocks)
+
+    if len(external_items):
+        external_blocks = []
+        for i, _, array in external_items:
+            if isinstance(array, ABCSeries):
+                array = array.values
+            # Allow our internal arrays to chose their block type.
+            block_type = getattr(array, '_block_type', ExtensionBlock)
+            external_blocks.append(
+                make_block(array, klass=block_type,
+                           fastpath=True, placement=[i]))
+        blocks.extend(external_blocks)
+
     if len(extra_locs):
         shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])
 
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index d800a7b92b559..3423e22a4c64e 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -10,12 +10,14 @@
     Series, Categorical, CategoricalIndex, IntervalIndex, date_range)
 
 from pandas.compat import string_types
+from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.dtypes import (
     DatetimeTZDtype, PeriodDtype,
-    IntervalDtype, CategoricalDtype)
+    IntervalDtype, CategoricalDtype, ExtensionDtype)
 from pandas.core.dtypes.common import (
     is_categorical_dtype, is_categorical,
     is_datetime64tz_dtype, is_datetimetz,
+    is_extension_array_dtype,
     is_period_dtype, is_period,
     is_dtype_equal, is_datetime64_ns_dtype,
     is_datetime64_dtype, is_interval_dtype,
@@ -742,3 +744,35 @@ def test_categorical_categories(self):
         tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
         c1 = CategoricalDtype(CategoricalIndex(['a', 'b']))
         tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
+
+
+class DummyArray(object):
+    pass
+
+
+class DummyDtype(object):
+    pass
+
+
+ExtensionArray.register(DummyArray)
+ExtensionDtype.register(DummyDtype)
+
+
+class TestExtensionArrayDtype(object):
+
+    @pytest.mark.parametrize('values', [
+        pd.Categorical([]),
+        pd.Categorical([]).dtype,
+        pd.Series(pd.Categorical([])),
+        DummyDtype(),
+        DummyArray(),
+    ])
+    def test_is_extension_array_dtype(self, values):
+        assert is_extension_array_dtype(values)
+
+    @pytest.mark.parametrize('values', [
+        np.array([]),
+        pd.Series(np.array([])),
+    ])
+    def test_is_not_extension_array_dtype(self, values):
+        assert not is_extension_array_dtype(values)
diff --git a/pandas/tests/internals/test_external_block.py b/pandas/tests/internals/test_external_block.py
index 729ee0093b6dc..2487363df8f99 100644
--- a/pandas/tests/internals/test_external_block.py
+++ b/pandas/tests/internals/test_external_block.py
@@ -5,12 +5,12 @@
 
 import pandas as pd
 from pandas.core.internals import (
-    Block, BlockManager, SingleBlockManager, NonConsolidatableMixIn)
+    BlockManager, SingleBlockManager, ExtensionBlock)
 
 import pytest
 
 
-class CustomBlock(NonConsolidatableMixIn, Block):
+class CustomBlock(ExtensionBlock):
 
     _holder = np.ndarray
 

From 57e8b0fb81b8bfaeeae366e84f94ae1b20f55b35 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 18 Jan 2018 14:12:52 -0600
Subject: [PATCH 02/40] Updated for comments

* removed take_nd
* Changed to_dense to return get_values
* Fixed docstrings, types
* Removed is_sparse
---
 pandas/core/arrays/base.py | 41 ++++++++++++++++----------------------
 pandas/core/dtypes/base.py | 19 +++++++++++++-----
 pandas/core/internals.py   |  2 +-
 3 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index ad29edde34ce6..82c3b9f53e498 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -13,16 +13,26 @@
 class ExtensionArray(object):
     """Abstract base class for custom array types
 
+    Notes
+    -----
     pandas will recognize instances of this class as proper arrays
     with a custom type and will not attempt to coerce them to objects.
 
-    Subclasses are expected to implement the following methods.
+    **Restrictions on your class constructor**
+
+        * Your class should be able to be constructed with no arguments,
+          i.e. ``ExtensionArray()`` returns an instance.
+          TODO: See comment in ``ExtensionDtype.construct_from_string``
+        * Your class should be able to be constructed with instances of
+          our class, i.e. ``ExtensionArray(extension_array)`` should returns
+          an instance.
     """
     # ------------------------------------------------------------------------
     # Must be a Sequence
     # ------------------------------------------------------------------------
     @abc.abstractmethod
     def __getitem__(self, item):
+        # type (Any) -> Any
         """Select a subset of self
 
         Notes
@@ -35,7 +45,6 @@ def __getitem__(self, item):
         For scalar ``key``, you may return a scalar suitable for your type.
         The scalar need not be an instance or subclass of your array type.
         """
-        # type (Any) -> Any
 
     def __setitem__(self, key, value):
         # type: (Any, Any) -> None
@@ -63,9 +72,8 @@ def base(self):
     @property
     @abc.abstractmethod
     def dtype(self):
-        """An instance of 'ExtensionDtype'."""
         # type: () -> ExtensionDtype
-        pass
+        """An instance of 'ExtensionDtype'."""
 
     @property
     def shape(self):
@@ -81,18 +89,16 @@ def ndim(self):
     @property
     @abc.abstractmethod
     def nbytes(self):
-        """The number of bytes needed to store this object in memory."""
         # type: () -> int
-        pass
+        """The number of bytes needed to store this object in memory."""
 
     # ------------------------------------------------------------------------
     # Additional Methods
     # ------------------------------------------------------------------------
     @abc.abstractmethod
     def isna(self):
-        """Boolean NumPy array indicating if each value is missing."""
         # type: () -> np.ndarray
-        pass
+        """Boolean NumPy array indicating if each value is missing."""
 
     # ------------------------------------------------------------------------
     # Indexing methods
@@ -102,12 +108,6 @@ def take(self, indexer, allow_fill=True, fill_value=None):
         # type: (Sequence, bool, Optional[Any]) -> ExtensionArray
         """For slicing"""
 
-    def take_nd(self, indexer, allow_fill=True, fill_value=None):
-        """For slicing"""
-        # TODO: this isn't really nescessary for 1-D
-        return self.take(indexer, allow_fill=allow_fill,
-                         fill_value=fill_value)
-
     @abc.abstractmethod
     def copy(self, deep=False):
         # type: (bool) -> ExtensionArray
@@ -118,8 +118,8 @@ def copy(self, deep=False):
     # ------------------------------------------------------------------------
     @property
     def _fill_value(self):
-        """The missing value for this type, e.g. np.nan"""
         # type: () -> Any
+        """The missing value for this type, e.g. np.nan"""
         return None
 
     @abc.abstractmethod
@@ -146,26 +146,19 @@ def _concat_same_type(cls, to_concat):
     @abc.abstractmethod
     def get_values(self):
         # type: () -> np.ndarray
-        """Get the underlying values backing your data
+        """A NumPy array representing your data.
         """
-        pass
 
     def _can_hold_na(self):
+        # type: () -> bool
         """Whether your array can hold missing values. True by default.
 
         Notes
         -----
         Setting this to false will optimize some operations like fillna.
         """
-        # type: () -> bool
         return True
 
-    @property
-    def is_sparse(self):
-        """Whether your array is sparse. True by default."""
-        # type: () -> bool
-        return False
-
     def _slice(self, slicer):
         # type: (Union[tuple, Sequence, int]) -> 'ExtensionArray'
         """Return a new array sliced by `slicer`.
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 348b4f077673a..a8ef8b6b209b0 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -9,14 +9,19 @@ class ExtensionDtype(object):
     """A custom data type for your array.
     """
     @property
+    @abc.abstractmethod
     def type(self):
-        """Typically a metaclass inheriting from 'type' with no methods."""
-        return type(self.name, (), {})
+        # type: () -> type
+        """The scalar type for your array, e.g. ``int`` or ``object``."""
 
     @property
     def kind(self):
+        # type () -> str
         """A character code (one of 'biufcmMOSUV'), default 'O'
 
+        This should match the NumPy dtype used when your array is
+        converted to an ndarray, which is probably 'O' for object.
+
         See Also
         --------
         numpy.dtype.kind
@@ -26,14 +31,16 @@ def kind(self):
     @property
     @abc.abstractmethod
     def name(self):
-        """An string identifying the data type.
+        # type: () -> str
+        """A string identifying the data type.
 
-        Will be used in, e.g. ``Series.dtype``
+        Will be used for display in, e.g. ``Series.dtype``
         """
 
     @property
     def names(self):
-        """Ordered list of field names, or None if there are no fields"""
+        # type: () -> Optional[List[str]]
+        """Ordered list of field names, or None if there are no fields."""
         return None
 
     @classmethod
@@ -58,6 +65,8 @@ def construct_from_string(cls, string):
         type's name. If so, it calls your class with no arguments.
         """
         if string == cls.name:
+            # XXX: Better to mandate a ``.from_empty`` classmethod
+            # rather than imposing this on the constructor?
             return cls()
         else:
             raise TypeError("Cannot construct a '{}' from "
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index fb52a60c4cdd5..dc64b471138bd 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1880,7 +1880,7 @@ def get_values(self, dtype=None):
         return values
 
     def to_dense(self):
-        return self.values.to_dense().view()
+        return self.values.get_values()
 
     def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
         """

From 01bd42fde9ac38491ac3098c0f8865a56f5358a5 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 18 Jan 2018 14:36:12 -0600
Subject: [PATCH 03/40] Remove metaclasses from PeriodDtype and IntervalDtype

---
 pandas/core/dtypes/dtypes.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index df7b0dc9ea60e..d8d3a96992757 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -502,7 +502,6 @@ class PeriodDtypeType(type):
 
 
 class PeriodDtype(PandasExtensionDtype):
-    __metaclass__ = PeriodDtypeType
     """
     A Period duck-typed class, suitable for holding a period with freq dtype.
 
@@ -620,7 +619,6 @@ class IntervalDtypeType(type):
 
 
 class IntervalDtype(PandasExtensionDtype):
-    __metaclass__ = IntervalDtypeType
     """
     A Interval duck-typed class, suitable for holding an interval
 

From ce81706b8997fa5b7d84b470807bbbf058e66176 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 18 Jan 2018 14:43:18 -0600
Subject: [PATCH 04/40] Fixup form_blocks rebase

---
 pandas/core/internals.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index dc64b471138bd..dfe4d8100a2fb 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -4765,7 +4765,6 @@ def form_blocks(arrays, names, axes):
     # generalize?
     items_dict = defaultdict(list)
     extra_locs = []
-    external_items = []
 
     names_idx = Index(names)
     if names_idx.equals(axes[0]):
@@ -4846,18 +4845,6 @@ def form_blocks(arrays, names, axes):
                            fastpath=True, placement=[i]))
         blocks.extend(external_blocks)
 
-    if len(external_items):
-        external_blocks = []
-        for i, _, array in external_items:
-            if isinstance(array, ABCSeries):
-                array = array.values
-            # Allow our internal arrays to chose their block type.
-            block_type = getattr(array, '_block_type', ExtensionBlock)
-            external_blocks.append(
-                make_block(array, klass=block_type,
-                           fastpath=True, placement=[i]))
-        blocks.extend(external_blocks)
-
     if len(extra_locs):
         shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])
 

From 87a70e3958d603868a0a5dff13b04f8779290965 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 18 Jan 2018 16:00:42 -0600
Subject: [PATCH 05/40] Restore concat casting cat -> object

---
 pandas/core/arrays/categorical.py |  5 +++--
 pandas/core/internals.py          | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index f0ec046e00e65..87fee8e8fd6ab 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2141,8 +2141,9 @@ def _can_hold_na(self):
 
     @classmethod
     def _concat_same_type(self, to_concat):
-        from pandas.types.concat import union_categoricals
-        return union_categoricals(to_concat)
+        from pandas.core.dtypes.concat import _concat_categorical
+
+        return _concat_categorical(to_concat)
 
     def _formatting_values(self):
         return self
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index dfe4d8100a2fb..2a3a1cee33484 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -2539,6 +2539,26 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
         # we are expected to return a 2-d ndarray
         return values.reshape(1, len(values))
 
+    def concat_same_type(self, to_concat, placement=None):
+        """
+        Concatenate list of single blocks of the same type.
+
+        Note that this CategoricalBlock._concat_same_type *may* not
+        return a CategoricalBlock. When the categories in `to_concat`
+        differ, this will return an object ndarray.
+
+        If / when we decide we don't like that behavior:
+
+        1. Change Categorical._concat_same_type to use union_categoricals
+        2. Delete this method.
+        """
+        values = self._concatenator([blk.values for blk in to_concat],
+                                    axis=self.ndim - 1)
+        # not using self.make_block_same_class as values can be object dtype
+        return make_block(
+            values, placement=placement or slice(0, len(values), 1),
+            ndim=self.ndim)
+
 
 class DatetimeBlock(DatetimeLikeBlockMixin, Block):
     __slots__ = ()

From 8c61886011392cabeb1354c893015e84cb8049d8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 19 Jan 2018 10:20:45 -0600
Subject: [PATCH 06/40] Remove _slice, clarify semantics around __getitem__

---
 pandas/core/arrays/base.py | 36 +++++++++++++-----------------------
 pandas/core/internals.py   |  2 +-
 2 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 82c3b9f53e498..94f0b6c4b0e6b 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -33,17 +33,24 @@ class ExtensionArray(object):
     @abc.abstractmethod
     def __getitem__(self, item):
         # type (Any) -> Any
-        """Select a subset of self
+        """Select a subset of self.
 
         Notes
         -----
-        As a sequence, __getitem__ should expect integer or slice ``key``.
+        ``item`` may be one of
 
-        For slice ``key``, you should return an instance of yourself, even
+            * A scalar integer position
+            * A slice object
+            * A boolean mask the same length as 'self'
+
+        For scalar ``item``, return a scalar value suitable for the array's
+        type. This should be an instance of ``self.dtype.type``.
+
+        For slice ``key``, return an instance of ``ExtensionArray``, even
         if the slice is length 0 or 1.
 
-        For scalar ``key``, you may return a scalar suitable for your type.
-        The scalar need not be an instance or subclass of your array type.
+        For a boolean mask, return an instance of ``ExtensionArray``, filtered
+        to the values where ``item`` is True.
         """
 
     def __setitem__(self, key, value):
@@ -159,23 +166,6 @@ def _can_hold_na(self):
         """
         return True
 
-    def _slice(self, slicer):
-        # type: (Union[tuple, Sequence, int]) -> 'ExtensionArray'
-        """Return a new array sliced by `slicer`.
-
-        Parameters
-        ----------
-        slicer : slice or np.ndarray
-            If an array, it should just be a boolean mask
-
-        Returns
-        -------
-        array : ExtensionArray
-            Should return an ExtensionArray, even if ``self[slicer]``
-            would return a scalar.
-        """
-        return type(self)(self[slicer])
-
     def value_counts(self, dropna=True):
         """Optional method for computing the histogram of the counts.
 
@@ -190,5 +180,5 @@ def value_counts(self, dropna=True):
         """
         from pandas.core.algorithms import value_counts
         mask = ~np.asarray(self.isna())
-        values = self[mask]  # XXX: this imposes boolean indexing
+        values = self[mask]
         return value_counts(np.asarray(values), dropna=dropna)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 2a3a1cee33484..7620a3797b265 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1932,7 +1932,7 @@ def _slice(self, slicer):
                                      "categorical")
             slicer = slicer[1]
 
-        return self.values._slice(slicer)
+        return self.values[slicer]
 
     def formatting_values(self):
         return self.values._formatting_values()

From cb41803fe0b7abb2995b6ec9220b2874472677c5 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 19 Jan 2018 11:16:58 -0600
Subject: [PATCH 07/40] Document and use take.

---
 pandas/core/arrays/base.py | 24 ++++++++++++++++++++++--
 pandas/core/internals.py   |  2 +-
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 94f0b6c4b0e6b..7b70ccf908564 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -112,8 +112,28 @@ def isna(self):
     # ------------------------------------------------------------------------
     @abc.abstractmethod
     def take(self, indexer, allow_fill=True, fill_value=None):
-        # type: (Sequence, bool, Optional[Any]) -> ExtensionArray
-        """For slicing"""
+        # type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray
+        """Take elements from an array
+
+        Parameters
+        ----------
+        indexer : sequence of integers
+            indices to be taken. -1 is used to indicate values
+            that are missing.
+        allow_fill : bool, default True
+            If False, indexer is assumed to contain no -1 values so no filling
+            will be done. This short-circuits computation of a mask. Result is
+            undefined if allow_fill == False and -1 is present in indexer.
+        fill_value : any, default None
+            Fill value to replace -1 values with
+
+        Notes
+        -----
+        This should follow pandas' semantics where -1 indicates missing values.
+
+        This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the
+        indexer is a sequence of values.
+        """
 
     @abc.abstractmethod
     def copy(self, deep=False):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 7620a3797b265..1bd4e10a1f5f9 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1894,7 +1894,7 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
         # axis doesn't matter; we are really a single-dim object
         # but are passed the axis depending on the calling routing
         # if its REALLY axis 0, then this will be a reindex and not a take
-        new_values = self.values.take_nd(indexer, fill_value=fill_value)
+        new_values = self.values.take(indexer, fill_value=fill_value)
 
         # if we are a 1-dim object, then always place at 0
         if self.ndim == 1:

From 65d5a61852c93d81eae986896f0ae68ffda5a675 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 19 Jan 2018 11:24:01 -0600
Subject: [PATCH 08/40] Clarify type, kind, init

---
 pandas/core/arrays/base.py |  3 ---
 pandas/core/dtypes/base.py | 10 ++++++++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 7b70ccf908564..090da1fbdbe87 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -20,9 +20,6 @@ class ExtensionArray(object):
 
     **Restrictions on your class constructor**
 
-        * Your class should be able to be constructed with no arguments,
-          i.e. ``ExtensionArray()`` returns an instance.
-          TODO: See comment in ``ExtensionDtype.construct_from_string``
         * Your class should be able to be constructed with instances of
           our class, i.e. ``ExtensionArray(extension_array)`` should returns
           an instance.
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index a8ef8b6b209b0..c6e465999622d 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -12,7 +12,11 @@ class ExtensionDtype(object):
     @abc.abstractmethod
     def type(self):
         # type: () -> type
-        """The scalar type for your array, e.g. ``int`` or ``object``."""
+        """The scalar type for your array, e.g. ``int``
+
+        It's expected ``ExtensionArray[item]`` returns an instance
+        of ``ExtensionDtype.type`` for scalar ``item``.
+        """
 
     @property
     def kind(self):
@@ -20,7 +24,9 @@ def kind(self):
         """A character code (one of 'biufcmMOSUV'), default 'O'
 
         This should match the NumPy dtype used when your array is
-        converted to an ndarray, which is probably 'O' for object.
+        converted to an ndarray, which is probably 'O' for object if
+        your extension type cannot be represented as a built-in NumPy
+        type.
 
         See Also
         --------

From 57c749bd15a0ed28be1ad0c6012d2ba3fe650687 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 19 Jan 2018 11:34:38 -0600
Subject: [PATCH 09/40] Remove base

---
 pandas/core/arrays/base.py | 18 ++++++++++--------
 pandas/core/dtypes/base.py |  4 ++--
 pandas/core/internals.py   |  2 +-
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 090da1fbdbe87..dd4db74ba3cc7 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -20,9 +20,9 @@ class ExtensionArray(object):
 
     **Restrictions on your class constructor**
 
-        * Your class should be able to be constructed with instances of
-          our class, i.e. ``ExtensionArray(extension_array)`` should returns
-          an instance.
+        * Extension arrays should be able to be constructed with instances of
+          the class, i.e. ``ExtensionArray(extension_array)`` should return
+          an instance, not error.
     """
     # ------------------------------------------------------------------------
     # Must be a Sequence
@@ -69,10 +69,6 @@ def __len__(self):
     # ------------------------------------------------------------------------
     # Required attributes
     # ------------------------------------------------------------------------
-    @property
-    def base(self):
-        """The base array I am a view of. None by default."""
-
     @property
     @abc.abstractmethod
     def dtype(self):
@@ -94,7 +90,11 @@ def ndim(self):
     @abc.abstractmethod
     def nbytes(self):
         # type: () -> int
-        """The number of bytes needed to store this object in memory."""
+        """The number of bytes needed to store this object in memory.
+
+        If this is expensive to compute, return an approximate lower bound
+        on the number of bytes needed.
+        """
 
     # ------------------------------------------------------------------------
     # Additional Methods
@@ -127,6 +127,8 @@ def take(self, indexer, allow_fill=True, fill_value=None):
         Notes
         -----
         This should follow pandas' semantics where -1 indicates missing values.
+        Positions where indexer is ``-1`` should be filled with the missing
+        value for this type.
 
         This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the
         indexer is a sequence of values.
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index c6e465999622d..57e96b83a28c4 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -94,8 +94,8 @@ def is_dtype(cls, dtype):
         -----
         The default implementation is True if
 
-        1. 'dtype' is a string that returns true for
-           ``cls.construct_from_string``
+        1. ``cls.construct_from_string(dtype)`` is an instance
+           of ``cls``.
         2. 'dtype' is ``cls`` or a subclass of ``cls``.
         """
         if isinstance(dtype, str):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 1bd4e10a1f5f9..17e588a85a6db 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -563,7 +563,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
 
         Returns
         -------
-        IntervalArray
+        Block
         """
         errors_legal_values = ('raise', 'ignore')
 

From 6736b0ff59a33929cb59639564cb9bf38fac0ff9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 21 Jan 2018 08:15:07 -0600
Subject: [PATCH 10/40] API: Remove unused __iter__ and get_values

---
 pandas/core/arrays/base.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index dd4db74ba3cc7..54e8030df2640 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -56,11 +56,6 @@ def __setitem__(self, key, value):
             type(self), '__setitem__')
         )
 
-    @abc.abstractmethod
-    def __iter__(self):
-        # type: () -> Iterator
-        pass
-
     @abc.abstractmethod
     def __len__(self):
         # type: () -> int
@@ -169,12 +164,6 @@ def _concat_same_type(cls, to_concat):
         ExtensionArray
         """
 
-    @abc.abstractmethod
-    def get_values(self):
-        # type: () -> np.ndarray
-        """A NumPy array representing your data.
-        """
-
     def _can_hold_na(self):
         # type: () -> bool
         """Whether your array can hold missing values. True by default.

From e4acb598dc9ed1ef342b9898e02cd9f69e577273 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 21 Jan 2018 08:15:21 -0600
Subject: [PATCH 11/40] API: Implement repr and str

---
 pandas/core/dtypes/base.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 57e96b83a28c4..1bc46641ee6ef 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -8,6 +8,13 @@
 class ExtensionDtype(object):
     """A custom data type for your array.
     """
+
+    def __repr__(self):
+        return str(self)
+
+    def __str__(self):
+        return self.name
+
     @property
     @abc.abstractmethod
     def type(self):

From df68f3bbf33edbe47d3b7beda99b24d187a2ed7b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 26 Jan 2018 18:46:19 +0100
Subject: [PATCH 12/40] Remove default value_counts for now

---
 pandas/core/arrays/base.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 54e8030df2640..68783b86dbe68 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -173,20 +173,3 @@ def _can_hold_na(self):
         Setting this to false will optimize some operations like fillna.
         """
         return True
-
-    def value_counts(self, dropna=True):
-        """Optional method for computing the histogram of the counts.
-
-        Parameters
-        ----------
-        dropna : bool, default True
-            whether to exclude missing values from the computation
-
-        Returns
-        -------
-        counts : Series
-        """
-        from pandas.core.algorithms import value_counts
-        mask = ~np.asarray(self.isna())
-        values = self[mask]
-        return value_counts(np.asarray(values), dropna=dropna)

From 2746a433ada4510aef44d784d2590da53954993e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 27 Jan 2018 11:11:24 +0100
Subject: [PATCH 13/40] Fixed merge conflicts

---
 pandas/core/internals.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 9b3f827b97af3..27f99469538bd 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1688,8 +1688,7 @@ def __init__(self, values, placement, ndim=None):
             else:
                 ndim = 2
         super(NonConsolidatableMixIn, self).__init__(values, placement,
-                                                     ndim=ndim,
-                                                     fastpath=fastpath)
+                                                     ndim=ndim)
 
     @property
     def shape(self):
@@ -1901,7 +1900,7 @@ def _slice(self, slicer):
         # return same dims as we currently have
 
         if isinstance(slicer, tuple) and len(slicer) == 2:
-            if not is_null_slice(slicer[0]):
+            if not com.is_null_slice(slicer[0]):
                 raise AssertionError("invalid slicing for a 1-ndim "
                                      "categorical")
             slicer = slicer[1]
@@ -2447,7 +2446,7 @@ def __init__(self, values, placement):
 
         # coerce to categorical if we can
         super(CategoricalBlock, self).__init__(_maybe_to_categorical(values),
-                                               placement=placement, ndim=ndim)
+                                               placement=placement)
 
     @property
     def is_view(self):

From 34d2b99f20a1a487fa16cc2da0adf7f131544435 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 27 Jan 2018 11:20:40 +0100
Subject: [PATCH 14/40] Remove implementation of construct_from_string

---
 pandas/core/dtypes/base.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 1bc46641ee6ef..27e3f736d211d 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -57,6 +57,7 @@ def names(self):
         return None
 
     @classmethod
+    @abc.abstractmethod
     def construct_from_string(cls, string):
         """Attempt to construct this type from a string.
 
@@ -71,19 +72,26 @@ def construct_from_string(cls, string):
         Raises
         ------
         TypeError
+            If a class cannot be constructed from this 'string'.
 
         Notes
         -----
         The default implementation checks if 'string' matches your
         type's name. If so, it calls your class with no arguments.
+
+        Examples
+        --------
+        If the extension dtype can be constructed without any arguments,
+        the following may be an adequate implementation.
+
+        >>> @classmethod
+        ... def construct_from_string(cls, string)
+        ...     if string == cls.name:
+        ...         return cls()
+        ...     else:
+        ...         raise TypeError("Cannot construct a '{}' from "
+        ...                         "'{}'".format(cls, string))
         """
-        if string == cls.name:
-            # XXX: Better to mandate a ``.from_empty`` classmethod
-            # rather than imposing this on the constructor?
-            return cls()
-        else:
-            raise TypeError("Cannot construct a '{}' from "
-                            "'{}'".format(cls, string))
 
     @classmethod
     def is_dtype(cls, dtype):

From a484d615fce72483f5ef82c60b90042b917671b3 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 27 Jan 2018 11:21:04 +0100
Subject: [PATCH 15/40] Example implementation of take

---
 pandas/core/arrays/base.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 68783b86dbe68..402e65180c322 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -37,8 +37,9 @@ def __getitem__(self, item):
         ``item`` may be one of
 
             * A scalar integer position
-            * A slice object
-            * A boolean mask the same length as 'self'
+            * A slice object, where 'start', 'stop', and 'step' are
+              integers or None
+            * A 1-d boolean NumPy ndarray the same length as 'self'
 
         For scalar ``item``, return a scalar value suitable for the array's
         type. This should be an instance of ``self.dtype.type``.
@@ -105,7 +106,7 @@ def isna(self):
     @abc.abstractmethod
     def take(self, indexer, allow_fill=True, fill_value=None):
         # type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray
-        """Take elements from an array
+        """Take elements from an array.
 
         Parameters
         ----------
@@ -117,7 +118,8 @@ def take(self, indexer, allow_fill=True, fill_value=None):
             will be done. This short-circuits computation of a mask. Result is
             undefined if allow_fill == False and -1 is present in indexer.
         fill_value : any, default None
-            Fill value to replace -1 values with
+            Fill value to replace -1 values with. By default, this uses
+            the missing value sentinel for this type, ``self._fill_value``.
 
         Notes
         -----
@@ -127,6 +129,20 @@ def take(self, indexer, allow_fill=True, fill_value=None):
 
         This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the
         indexer is a sequence of values.
+
+        Examples
+        --------
+        Suppose the extension array is actually a NumPy structured array with
+        two fields, and that the underlying structured array is stored as
+        ``self.data``. ``take`` may be written as
+
+        >>> def take(self, indexer, allow_fill=True, fill_value=None):
+        ...     mask = indexer == -1
+        ...     result = self.data.take(indexer)
+        ...     result[mask] = self._fill_value
+        ...     return type(self)(result)
+
+        We ignore the 'allow_fill' and 'fill_value' arguments.
         """
 
     @abc.abstractmethod

From 04b2e723281fa5c4b1b2a8ad6b3ef3a98839880b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 27 Jan 2018 17:46:28 +0100
Subject: [PATCH 16/40] Cleanup ExtensionBlock

---
 pandas/core/internals.py | 41 +++++-----------------------------------
 1 file changed, 5 insertions(+), 36 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 27f99469538bd..5000f7336eb02 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1805,45 +1805,14 @@ class ExtensionBlock(NonConsolidatableMixIn, Block):
     This is the holds all 3rd-party extension types. It's also the immediate
     parent class for our internal extension types' blocks, CategoricalBlock.
 
-    All extension arrays *must* be 1-D, which simplifies things a bit.
+    ExtensionArrays are limited to 1-D.
     """
-    # Some questions / notes as comments, will be removed.
-    #
-    # Currently inherited from NCB. We'll keep it around until SparseBlock
-    # and DatetimeTZBlock are refactored.
-    # - set
-    # - iget
-    # - should_store
-    # - putmask
-    # - _slice
-    # - _try_cast_result
-    # - unstack
-
-    # Think about overriding these methods from Block
-    # - _maybe_downcast: (never downcast)
-
-    # Methods we can (probably) ignore and just use Block's:
-
-    # * replace / replace_single
-    #   Categorical got Object, but was hopefully unnescessary.
-    #   DatetimeTZ, Sparse got Block
-    # * is_view
-    #   Categorical overrides to say that it is not.
-    #   DatetimeTZ, Sparse inherits Base anyway
-
     is_extension = True
-
-    # XXX
-    # is_bool is is a change for CategoricalBlock. Used to inherit
-    # from Object to infer from values. If this matters, we should
-    # override it directly in CategoricalBlock so that we infer from
-    # the categories, not the codes.
     is_bool = False
 
-    def __init__(self, values, placement, ndim=None, fastpath=False):
+    def __init__(self, values, placement, ndim=None):
         self._holder = type(values)
-        super(ExtensionBlock, self).__init__(values, placement, ndim=ndim,
-                                             fastpath=fastpath)
+        super(ExtensionBlock, self).__init__(values, placement, ndim=ndim)
 
     def get_values(self, dtype=None):
         # ExtensionArrays must be iterable, so this works.
@@ -1857,7 +1826,7 @@ def to_dense(self):
 
     def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
         """
-        Take values according to indexer and return them as a block.bb
+        Take values according to indexer and return them as a block.
         """
         if fill_tuple is None:
             fill_value = None
@@ -2441,7 +2410,7 @@ class CategoricalBlock(ExtensionBlock):
     _holder = Categorical
     _concatenator = staticmethod(_concat._concat_categorical)
 
-    def __init__(self, values, placement):
+    def __init__(self, values, placement, ndim=None):
         from pandas.core.arrays.categorical import _maybe_to_categorical
 
         # coerce to categorical if we can

From e77805318b3a031d79e83ed33eccd7bfec6e82be Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 27 Jan 2018 18:00:04 +0100
Subject: [PATCH 17/40] Pass through ndim

---
 pandas/core/internals.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 5000f7336eb02..259696c23cf58 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -2415,7 +2415,8 @@ def __init__(self, values, placement, ndim=None):
 
         # coerce to categorical if we can
         super(CategoricalBlock, self).__init__(_maybe_to_categorical(values),
-                                               placement=placement)
+                                               placement=placement,
+                                               ndim=ndim)
 
     @property
     def is_view(self):

From d15a7227078539941442bbaccd1a34bac2466057 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 27 Jan 2018 19:08:37 +0100
Subject: [PATCH 18/40] Use series._values

---
 pandas/core/dtypes/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 2e4d0d884bf95..bae9a2f866904 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1713,7 +1713,7 @@ def is_extension_array_dtype(arr_or_dtype):
 
     # we want to unpack series, anything else?
     if isinstance(arr_or_dtype, ABCSeries):
-        arr_or_dtype = arr_or_dtype.values
+        arr_or_dtype = arr_or_dtype._values
     return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))
 
 

From b5f736da3181f2f3004cad167f8101517977bacd Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 27 Jan 2018 19:35:24 +0100
Subject: [PATCH 19/40] Removed repr, updated take doc

---
 pandas/core/arrays/base.py | 18 +++++++++---------
 pandas/core/dtypes/base.py |  3 ---
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 402e65180c322..7381fc004d2f5 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -132,17 +132,17 @@ def take(self, indexer, allow_fill=True, fill_value=None):
 
         Examples
         --------
-        Suppose the extension array is actually a NumPy structured array with
-        two fields, and that the underlying structured array is stored as
-        ``self.data``. ``take`` may be written as
+        Suppose the extension array somehow backed by a NumPy structured array
+        and that the underlying structured array is stored as ``self.data``.
+        Then ``take`` may be written as
 
-        >>> def take(self, indexer, allow_fill=True, fill_value=None):
-        ...     mask = indexer == -1
-        ...     result = self.data.take(indexer)
-        ...     result[mask] = self._fill_value
-        ...     return type(self)(result)
+        .. code-block:: python
 
-        We ignore the 'allow_fill' and 'fill_value' arguments.
+           def take(self, indexer, allow_fill=True, fill_value=None):
+               mask = indexer == -1
+               result = self.data.take(indexer)
+               result[mask] = self._fill_value
+               return type(self)(result)
         """
 
     @abc.abstractmethod
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 27e3f736d211d..ab0cde5431214 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -9,9 +9,6 @@ class ExtensionDtype(object):
     """A custom data type for your array.
     """
 
-    def __repr__(self):
-        return str(self)
-
     def __str__(self):
         return self.name
 

From 240e8f6f1b44fd401217961486efd079137616cb Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 28 Jan 2018 22:03:43 +0100
Subject: [PATCH 20/40] Various cleanups

---
 pandas/core/arrays/base.py        |  7 ++++---
 pandas/core/arrays/categorical.py |  3 +--
 pandas/core/dtypes/base.py        | 13 ++++---------
 pandas/core/dtypes/common.py      |  5 +----
 pandas/core/internals.py          |  9 ++-------
 5 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 7381fc004d2f5..a3bed7af63220 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1,8 +1,6 @@
 """An interface for extending pandas with custom arrays."""
 import abc
 
-import numpy as np
-
 from pandas.compat import add_metaclass
 
 
@@ -11,7 +9,7 @@
 
 @add_metaclass(abc.ABCMeta)
 class ExtensionArray(object):
-    """Abstract base class for custom array types
+    """Abstract base class for custom array types.
 
     Notes
     -----
@@ -23,6 +21,9 @@ class ExtensionArray(object):
         * Extension arrays should be able to be constructed with instances of
           the class, i.e. ``ExtensionArray(extension_array)`` should return
           an instance, not error.
+
+    Additionally, certain methods and interfaces are required for proper
+    this array to be properly stored inside a ``DataFrame`` or ``Series``.
     """
     # ------------------------------------------------------------------------
     # Must be a Sequence
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 43829d0917d81..40987cfe0f484 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2132,8 +2132,7 @@ def repeat(self, repeats, *args, **kwargs):
         return self._constructor(values=codes, categories=self.categories,
                                  ordered=self.ordered, fastpath=True)
 
-    # Interface things
-    # can_hold_na, concat_same_type, formatting_values
+    # ExtensionArray Interface things
     @property
     def _can_hold_na(self):
         return True
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index ab0cde5431214..b5257c444e6d3 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -6,7 +6,7 @@
 
 @add_metaclass(abc.ABCMeta)
 class ExtensionDtype(object):
-    """A custom data type for your array.
+    """A custom data type, to be paired with an ExtensionArray.
     """
 
     def __str__(self):
@@ -16,7 +16,7 @@ def __str__(self):
     @abc.abstractmethod
     def type(self):
         # type: () -> type
-        """The scalar type for your array, e.g. ``int``
+        """The scalar type for the array, e.g. ``int``
 
         It's expected ``ExtensionArray[item]`` returns an instance
         of ``ExtensionDtype.type`` for scalar ``item``.
@@ -27,9 +27,9 @@ def kind(self):
         # type () -> str
         """A character code (one of 'biufcmMOSUV'), default 'O'
 
-        This should match the NumPy dtype used when your array is
+        This should match the NumPy dtype used when the array is
         converted to an ndarray, which is probably 'O' for object if
-        your extension type cannot be represented as a built-in NumPy
+        the extension type cannot be represented as a built-in NumPy
         type.
 
         See Also
@@ -71,11 +71,6 @@ def construct_from_string(cls, string):
         TypeError
             If a class cannot be constructed from this 'string'.
 
-        Notes
-        -----
-        The default implementation checks if 'string' matches your
-        type's name. If so, it calls your class with no arguments.
-
         Examples
         --------
         If the extension dtype can be constructed without any arguments,
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index bae9a2f866904..c66e7fcfc6978 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1686,7 +1686,7 @@ def is_extension_type(arr):
 
 
 def is_extension_array_dtype(arr_or_dtype):
-    """Check if an object is a pandas extension array type
+    """Check if an object is a pandas extension array type.
 
     Parameters
     ----------
@@ -1702,9 +1702,6 @@ def is_extension_array_dtype(arr_or_dtype):
     array interface. In pandas, this includes:
 
     * Categorical
-    * PeriodArray
-    * IntervalArray
-    * SparseArray
 
     Third-party libraries may implement arrays or types satisfying
     this interface as well.
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 259696c23cf58..9afb412f93781 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1848,13 +1848,8 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
         return self.make_block_same_class(new_values, new_mgr_locs)
 
     def _can_hold_element(self, element):
-        # XXX:
-        # Not defined on NCM.
-        # Categorical got True from ObjectBlock
-        # DatetimeTZ gets DatetimeBlock
-        # Sparse gets Block
-        # Let's just assume yes for now, but we can maybe push
-        # this onto the array.
+        # XXX: We may need to think about pushing this onto the array.
+        # We're doing the same as CategoricalBlock here.
         return True
 
     def convert(self, copy=True, **kwargs):

From f9b0b49b20fcb8096049fe44de40309c6a03f758 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 29 Jan 2018 00:56:51 +0100
Subject: [PATCH 21/40] Handle get_values, to_dense, is_view

---
 pandas/core/internals.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 9afb412f93781..1f7d7f47f83eb 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1814,6 +1814,11 @@ def __init__(self, values, placement, ndim=None):
         self._holder = type(values)
         super(ExtensionBlock, self).__init__(values, placement, ndim=ndim)
 
+    @property
+    def is_view(self):
+        """Extension arrays are never treated as views."""
+        return False
+
     def get_values(self, dtype=None):
         # ExtensionArrays must be iterable, so this works.
         values = np.asarray(self.values)
@@ -1822,7 +1827,7 @@ def get_values(self, dtype=None):
         return values
 
     def to_dense(self):
-        return self.values.get_values()
+        return np.asarray(self.values)
 
     def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
         """
@@ -2412,12 +2417,6 @@ def __init__(self, values, placement, ndim=None):
         super(CategoricalBlock, self).__init__(_maybe_to_categorical(values),
                                                placement=placement,
                                                ndim=ndim)
-
-    @property
-    def is_view(self):
-        """ I am never a view """
-        return False
-
     @property
     def array_dtype(self):
         """ the dtype to return if I want to construct this block as an
@@ -2461,6 +2460,12 @@ def shift(self, periods, axis=0, mgr=None):
         return self.make_block_same_class(values=self.values.shift(periods),
                                           placement=self.mgr_locs)
 
+    def to_dense(self):
+        # Categorical.get_values returns a DatetimeIndex for datetime
+        # categories, so we can't simply use `np.asarray(self.values)` like
+        # other types.
+        return self.values.get_values()
+
     def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
 

From 79131861d17c40ab843ee3e00a4afed6e0c39b01 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 30 Jan 2018 09:07:59 -0600
Subject: [PATCH 22/40] Docs

---
 pandas/core/arrays/base.py | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index a3bed7af63220..f6796eb1cedcf 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -33,15 +33,22 @@ def __getitem__(self, item):
         # type (Any) -> Any
         """Select a subset of self.
 
-        Notes
-        -----
-        ``item`` may be one of
+        Parameters
+        ----------
+        item : int, slice, or ndarray
+            * int: The position in 'self' to get.
 
-            * A scalar integer position
-            * A slice object, where 'start', 'stop', and 'step' are
+            * slice: A slice object, where 'start', 'stop', and 'step' are
               integers or None
-            * A 1-d boolean NumPy ndarray the same length as 'self'
 
+            * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
+
+        Returns
+        -------
+        item : scalar or ExtensionArray
+
+        Notes
+        -----
         For scalar ``item``, return a scalar value suitable for the array's
         type. This should be an instance of ``self.dtype.type``.
 
@@ -60,6 +67,12 @@ def __setitem__(self, key, value):
 
     @abc.abstractmethod
     def __len__(self):
+        """Length of this array
+
+        Returns
+        -------
+        length : int
+        """
         # type: () -> int
         pass
 
@@ -149,7 +162,17 @@ def take(self, indexer, allow_fill=True, fill_value=None):
     @abc.abstractmethod
     def copy(self, deep=False):
         # type: (bool) -> ExtensionArray
-        """Return a copy of the array."""
+        """Return a copy of the array.
+
+        Parameters
+        ----------
+        deep : bool, default False
+            Also copy the underlying data backing this array.
+
+        Returns
+        -------
+        ExtensionArray
+        """
 
     # ------------------------------------------------------------------------
     # Block-related methods

From df18c3b95de1ab75dfa9a4cc56a647a2a4c7f6ca Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 30 Jan 2018 09:08:05 -0600
Subject: [PATCH 23/40] Remove is_extension, is_bool

Remove inherited convert
---
 pandas/core/internals.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 1f7d7f47f83eb..c725fa6103c45 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -98,7 +98,6 @@ class Block(PandasObject):
     is_object = False
     is_categorical = False
     is_sparse = False
-    is_extension = False
     _box_to_block_values = True
     _can_hold_na = False
     _can_consolidate = True
@@ -1807,9 +1806,6 @@ class ExtensionBlock(NonConsolidatableMixIn, Block):
 
     ExtensionArrays are limited to 1-D.
     """
-    is_extension = True
-    is_bool = False
-
     def __init__(self, values, placement, ndim=None):
         self._holder = type(values)
         super(ExtensionBlock, self).__init__(values, placement, ndim=ndim)
@@ -1857,11 +1853,6 @@ def _can_hold_element(self, element):
         # We're doing the same as CategoricalBlock here.
         return True
 
-    def convert(self, copy=True, **kwargs):
-        # We're dedicated to a type, we don't convert.
-        # Taken from CategoricalBlock / Block.
-        return self.copy() if copy else self
-
     def _slice(self, slicer):
         """ return a slice of my values """
 
@@ -2417,6 +2408,7 @@ def __init__(self, values, placement, ndim=None):
         super(CategoricalBlock, self).__init__(_maybe_to_categorical(values),
                                                placement=placement,
                                                ndim=ndim)
+
     @property
     def array_dtype(self):
         """ the dtype to return if I want to construct this block as an

From ab2f0457839fece3b3ef067f29994b42908bd037 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 30 Jan 2018 10:02:49 -0600
Subject: [PATCH 24/40] Sparse formatter

---
 pandas/io/formats/format.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 2293032ebb8a1..c5805fa3b6c46 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -21,6 +21,7 @@
     is_integer,
     is_float,
     is_scalar,
+    is_sparse,
     is_numeric_dtype,
     is_datetime64_dtype,
     is_timedelta64_dtype,
@@ -1803,6 +1804,8 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
         fmt_klass = CategoricalArrayFormatter
     elif is_interval_dtype(values):
         fmt_klass = IntervalArrayFormatter
+    elif is_sparse(values):
+        fmt_klass = SparseArrayFormatter
     elif is_float_dtype(values.dtype):
         fmt_klass = FloatArrayFormatter
     elif is_period_arraylike(values):
@@ -2115,6 +2118,15 @@ def _format_strings(self):
         return fmt_values
 
 
+class SparseArrayFormatter(GenericArrayFormatter):
+
+    def _format_strings(self):
+        return format_array(self.values.get_values(), self.formatter,
+                            float_format=self.float_format,
+                            na_rep=self.na_rep, digits=self.digits,
+                            space=self.space, justify=self.justify)
+
+
 def format_percentiles(percentiles):
     """
     Outputs rounded and formatted percentiles.

From 520876f73650232c30e8ececd869d24e9ec28a60 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 30 Jan 2018 12:47:57 -0600
Subject: [PATCH 25/40] Revert "Sparse formatter"

This reverts commit ab2f0457839fece3b3ef067f29994b42908bd037.
---
 pandas/io/formats/format.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index c5805fa3b6c46..2293032ebb8a1 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -21,7 +21,6 @@
     is_integer,
     is_float,
     is_scalar,
-    is_sparse,
     is_numeric_dtype,
     is_datetime64_dtype,
     is_timedelta64_dtype,
@@ -1804,8 +1803,6 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
         fmt_klass = CategoricalArrayFormatter
     elif is_interval_dtype(values):
         fmt_klass = IntervalArrayFormatter
-    elif is_sparse(values):
-        fmt_klass = SparseArrayFormatter
     elif is_float_dtype(values.dtype):
         fmt_klass = FloatArrayFormatter
     elif is_period_arraylike(values):
@@ -2118,15 +2115,6 @@ def _format_strings(self):
         return fmt_values
 
 
-class SparseArrayFormatter(GenericArrayFormatter):
-
-    def _format_strings(self):
-        return format_array(self.values.get_values(), self.formatter,
-                            float_format=self.float_format,
-                            na_rep=self.na_rep, digits=self.digits,
-                            space=self.space, justify=self.justify)
-
-
 def format_percentiles(percentiles):
     """
     Outputs rounded and formatted percentiles.

From 4dfa39ca239d409b8bbb02b253dced775098ca9a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 30 Jan 2018 12:49:12 -0600
Subject: [PATCH 26/40] Unbox SparseSeries

---
 pandas/core/internals.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index c725fa6103c45..09a5877005dc3 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -2792,6 +2792,13 @@ class SparseBlock(NonConsolidatableMixIn, Block):
     _holder = SparseArray
     _concatenator = staticmethod(_concat._concat_sparse)
 
+    def __init__(self, values, placement, ndim=None):
+        # Ensure that we have the underlying SparseArray here...
+        if isinstance(values, ABCSeries):
+            values = values.values
+        assert isinstance(values, SparseArray)
+        super(SparseBlock, self).__init__(values, placement, ndim=ndim)
+
     @property
     def shape(self):
         return (len(self.mgr_locs), self.sp_index.length)

From e252103266b5e303ba54b092777b0e07f83baee6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 30 Jan 2018 13:03:47 -0600
Subject: [PATCH 27/40] Added test for sparse consolidation

---
 pandas/tests/sparse/frame/test_frame.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index 2b589ebd4735e..4d49b82e67946 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -574,6 +574,15 @@ def test_setitem_array(self):
                                   self.frame['F'].reindex(index),
                                   check_names=False)
 
+    def test_setitem_chained_no_consolidate(self):
+        # https://github.com/pandas-dev/pandas/pull/19268
+        # issuecomment-361696418
+        # chained setitem used to cause consolidation
+        sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
+        with pd.option_context('mode.chained_assignment', None):
+            sdf[0][1] = 2
+        assert len(sdf._data.blocks) == 2
+
     def test_delitem(self):
         A = self.frame['A']
         C = self.frame['C']

From 7110b2a78d759174e6df811ae64f93806120805d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 30 Jan 2018 13:31:18 -0600
Subject: [PATCH 28/40] Docs

---
 pandas/core/arrays/base.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index f6796eb1cedcf..e219652334cd5 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -9,18 +9,27 @@
 
 @add_metaclass(abc.ABCMeta)
 class ExtensionArray(object):
-    """Abstract base class for custom array types.
+    """Abstract base class for custom 1-D array types.
 
     Notes
     -----
     pandas will recognize instances of this class as proper arrays
     with a custom type and will not attempt to coerce them to objects.
 
-    **Restrictions on your class constructor**
+    ExtensionArrays are limited to 1 dimension.
 
-        * Extension arrays should be able to be constructed with instances of
-          the class, i.e. ``ExtensionArray(extension_array)`` should return
-          an instance, not error.
+    They may be backed by none, one, or many NumPy ararys. For example,
+    ``pandas.Categorical`` is an extension array backed by two arrays,
+    one for codes and one for categories. An array of IPv6 address may
+    be backed by a NumPy structured array with two fields, one for the
+    lower 64 bits and one for the upper 64 bits. Or they may be backed
+    by some other storage type, like Python lists. Pandas makes no
+    assumptions on how the data are stored, just that it can be converted
+    to a NumPy array.
+
+    Extension arrays should be able to be constructed with instances of
+    the class, i.e. ``ExtensionArray(extension_array)`` should return
+    an instance, not error.
 
     Additionally, certain methods and interfaces are required for proper
     this array to be properly stored inside a ``DataFrame`` or ``Series``.

From fc688a56e20e62e6b7b806aedc4a4a760af079b1 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 31 Jan 2018 11:45:24 -0600
Subject: [PATCH 29/40] Moved to errors

---
 pandas/core/common.py     | 16 ++--------------
 pandas/errors/__init__.py | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index e606be3cc2a23..6748db825acf0 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -25,7 +25,8 @@
 
 # compat
 from pandas.errors import (  # noqa
-    PerformanceWarning, UnsupportedFunctionCall, UnsortedIndexError)
+    PerformanceWarning, UnsupportedFunctionCall, UnsortedIndexError,
+    AbstractMethodError)
 
 # back-compat of public API
 # deprecate these functions
@@ -88,19 +89,6 @@ class SettingWithCopyWarning(Warning):
     pass
 
 
-class AbstractMethodError(NotImplementedError):
-    """Raise this error instead of NotImplementedError for abstract methods
-    while keeping compatibility with Python 2 and Python 3.
-    """
-
-    def __init__(self, class_instance):
-        self.class_instance = class_instance
-
-    def __str__(self):
-        msg = "This method must be defined in the concrete class of {name}"
-        return (msg.format(name=self.class_instance.__class__.__name__))
-
-
 def flatten(l):
     """Flatten an arbitrarily nested sequence.
 
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index 22b6d33be9d38..cfdcada801b9d 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -77,3 +77,17 @@ class NullFrequencyError(ValueError):
 
 class AccessorRegistrationWarning(Warning):
     """Warning for attribute conflicts in accessor registration."""
+
+
+class AbstractMethodError(NotImplementedError):
+    """Raise this error instead of NotImplementedError for abstract methods
+    while keeping compatibility with Python 2 and Python 3.
+    """
+
+    def __init__(self, class_instance):
+        self.class_instance = class_instance
+
+    def __str__(self):
+        msg = "This method must be defined in the concrete class of {name}"
+        return (msg.format(name=self.class_instance.__class__.__name__))
+

From fbc846644be3e97314b203dd3eab9a36ef50a274 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 31 Jan 2018 11:50:36 -0600
Subject: [PATCH 30/40] Handle classmethods, properties

---
 pandas/errors/__init__.py   | 16 +++++++++++++---
 pandas/tests/test_errors.py | 29 +++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index cfdcada801b9d..c1e855732f915 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -84,10 +84,20 @@ class AbstractMethodError(NotImplementedError):
     while keeping compatibility with Python 2 and Python 3.
     """
 
-    def __init__(self, class_instance):
+    def __init__(self, class_instance, methodtype='method'):
+        types = {'method', 'classmethod', 'staticmethod', 'property'}
+        if methodtype not in types:
+            msg = 'methodtype must be one of {}, got {} instead.'.format(
+                methodtype, types)
+            raise ValueError(msg)
+        self.methodtype = methodtype
         self.class_instance = class_instance
 
     def __str__(self):
-        msg = "This method must be defined in the concrete class of {name}"
-        return (msg.format(name=self.class_instance.__class__.__name__))
+        if self.methodtype == 'classmethod':
+            name = self.class_instance.__name__
+        else:
+            name = self.class_instance.__class__.__name__
+        msg = "This {methodtype} must be defined in the concrete class {name}"
+        return (msg.format(methodtype=self.methodtype, name=name))
 
diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py
index babf88ef1df8d..e2a142366a89e 100644
--- a/pandas/tests/test_errors.py
+++ b/pandas/tests/test_errors.py
@@ -4,6 +4,8 @@
 from warnings import catch_warnings
 import pandas  # noqa
 import pandas as pd
+from pandas.errors import AbstractMethodError
+import pandas.util.testing as tm
 
 
 @pytest.mark.parametrize(
@@ -50,3 +52,30 @@ def test_error_rename():
             raise ParserError()
         except pd.parser.CParserError:
             pass
+
+
+class Foo:
+    @classmethod
+    def classmethod(cls):
+        raise AbstractMethodError(cls, methodtype='classmethod')
+
+    @property
+    def property(self):
+        raise AbstractMethodError(self, methodtype='property')
+
+    def method(self):
+        raise AbstractMethodError(self)
+
+
+def test_AbstractMethodError_classmethod():
+    xpr = "This classmethod must be defined in the concrete class Foo"
+    with tm.assert_raises_regex(AbstractMethodError, xpr):
+        Foo.classmethod()
+
+    xpr = "This property must be defined in the concrete class Foo"
+    with tm.assert_raises_regex(AbstractMethodError, xpr):
+        Foo().property
+
+    xpr = "This method must be defined in the concrete class Foo"
+    with tm.assert_raises_regex(AbstractMethodError, xpr):
+        Foo().method()

From 030bb194e523b24442db77c8cbabe16315880c73 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 31 Jan 2018 11:50:45 -0600
Subject: [PATCH 31/40] Use our AbstractMethodError

---
 pandas/core/arrays/base.py         | 48 +++++++++++++++++++-----------
 pandas/core/dtypes/base.py         | 25 +++++++++++-----
 pandas/tests/dtypes/test_dtypes.py |  8 ++---
 3 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index e219652334cd5..62d7b685163b7 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1,20 +1,35 @@
 """An interface for extending pandas with custom arrays."""
-import abc
-
-from pandas.compat import add_metaclass
-
+from pandas.errors import AbstractMethodError
 
 _not_implemented_message = "{} does not implement {}."
 
 
-@add_metaclass(abc.ABCMeta)
 class ExtensionArray(object):
     """Abstract base class for custom 1-D array types.
 
+    pandas will recognize instances of this class as proper arrays
+    with a custom type and will not attempt to coerce them to objects. They
+    may be stored directly inside a :class:`DataFrame` or :class:`Series`.
+
     Notes
     -----
-    pandas will recognize instances of this class as proper arrays
-    with a custom type and will not attempt to coerce them to objects.
+    The interface includes the following abstract methods that must be
+    implemented by subclasses:
+
+    * __getitem__
+    * __len__
+    * dtype
+    * nbytes
+    * isna
+    * take
+    * copy
+    * _formatting_values
+    * _concat_same_type
+
+    This class does not inherit from 'abc.ABCMeta' for performance reasons.
+    Methods and properties required by the interface raise
+    ``pandas.errors.AbstractMethodError`` and no ``register`` method is
+    provided for registering virtual subclasses.
 
     ExtensionArrays are limited to 1 dimension.
 
@@ -37,7 +52,6 @@ class ExtensionArray(object):
     # ------------------------------------------------------------------------
     # Must be a Sequence
     # ------------------------------------------------------------------------
-    @abc.abstractmethod
     def __getitem__(self, item):
         # type (Any) -> Any
         """Select a subset of self.
@@ -67,6 +81,7 @@ def __getitem__(self, item):
         For a boolean mask, return an instance of ``ExtensionArray``, filtered
         to the values where ``item`` is True.
         """
+        raise AbstractMethodError(self)
 
     def __setitem__(self, key, value):
         # type: (Any, Any) -> None
@@ -74,7 +89,6 @@ def __setitem__(self, key, value):
             type(self), '__setitem__')
         )
 
-    @abc.abstractmethod
     def __len__(self):
         """Length of this array
 
@@ -83,16 +97,16 @@ def __len__(self):
         length : int
         """
         # type: () -> int
-        pass
+        raise AbstractMethodError(self)
 
     # ------------------------------------------------------------------------
     # Required attributes
     # ------------------------------------------------------------------------
     @property
-    @abc.abstractmethod
     def dtype(self):
         # type: () -> ExtensionDtype
         """An instance of 'ExtensionDtype'."""
+        raise AbstractMethodError(self)
 
     @property
     def shape(self):
@@ -106,7 +120,6 @@ def ndim(self):
         return 1
 
     @property
-    @abc.abstractmethod
     def nbytes(self):
         # type: () -> int
         """The number of bytes needed to store this object in memory.
@@ -114,19 +127,19 @@ def nbytes(self):
         If this is expensive to compute, return an approximate lower bound
         on the number of bytes needed.
         """
+        raise AbstractMethodError(self)
 
     # ------------------------------------------------------------------------
     # Additional Methods
     # ------------------------------------------------------------------------
-    @abc.abstractmethod
     def isna(self):
         # type: () -> np.ndarray
         """Boolean NumPy array indicating if each value is missing."""
+        raise AbstractMethodError(self)
 
     # ------------------------------------------------------------------------
     # Indexing methods
     # ------------------------------------------------------------------------
-    @abc.abstractmethod
     def take(self, indexer, allow_fill=True, fill_value=None):
         # type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray
         """Take elements from an array.
@@ -167,8 +180,8 @@ def take(self, indexer, allow_fill=True, fill_value=None):
                result[mask] = self._fill_value
                return type(self)(result)
         """
+        raise AbstractMethodError(self)
 
-    @abc.abstractmethod
     def copy(self, deep=False):
         # type: (bool) -> ExtensionArray
         """Return a copy of the array.
@@ -182,6 +195,7 @@ def copy(self, deep=False):
         -------
         ExtensionArray
         """
+        raise AbstractMethodError(self)
 
     # ------------------------------------------------------------------------
     # Block-related methods
@@ -192,14 +206,13 @@ def _fill_value(self):
         """The missing value for this type, e.g. np.nan"""
         return None
 
-    @abc.abstractmethod
     def _formatting_values(self):
         # type: () -> np.ndarray
         # At the moment, this has to be an array since we use result.dtype
         """An array of values to be printed in, e.g. the Series repr"""
+        raise AbstractMethodError(self)
 
     @classmethod
-    @abc.abstractmethod
     def _concat_same_type(cls, to_concat):
         # type: (Sequence[ExtensionArray]) -> ExtensionArray
         """Concatenate multiple array
@@ -212,6 +225,7 @@ def _concat_same_type(cls, to_concat):
         -------
         ExtensionArray
         """
+        raise AbstractMethodError(cls)
 
     def _can_hold_na(self):
         # type: () -> bool
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index b5257c444e6d3..17171e3bcb25a 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -1,19 +1,29 @@
 """Extend pandas with custom array types"""
-import abc
+from pandas.errors import AbstractMethodError
 
-from pandas.compat import add_metaclass
 
-
-@add_metaclass(abc.ABCMeta)
 class ExtensionDtype(object):
     """A custom data type, to be paired with an ExtensionArray.
+
+    Notes
+    -----
+    The interface includes the following abstract methods that must
+    be implemented by subclasses:
+
+    * type
+    * name
+    * construct_from_string
+
+    This class does not inherit from 'abc.ABCMeta' for performance reasons.
+    Methods and properties required by the interface raise
+    ``pandas.errors.AbstractMethodError`` and no ``register`` method is
+    provided for registering virtual subclasses.
     """
 
     def __str__(self):
         return self.name
 
     @property
-    @abc.abstractmethod
     def type(self):
         # type: () -> type
         """The scalar type for the array, e.g. ``int``
@@ -21,6 +31,7 @@ def type(self):
         It's expected ``ExtensionArray[item]`` returns an instance
         of ``ExtensionDtype.type`` for scalar ``item``.
         """
+        raise AbstractMethodError(self)
 
     @property
     def kind(self):
@@ -39,13 +50,13 @@ def kind(self):
         return 'O'
 
     @property
-    @abc.abstractmethod
     def name(self):
         # type: () -> str
         """A string identifying the data type.
 
         Will be used for display in, e.g. ``Series.dtype``
         """
+        raise AbstractMethodError(self)
 
     @property
     def names(self):
@@ -54,7 +65,6 @@ def names(self):
         return None
 
     @classmethod
-    @abc.abstractmethod
     def construct_from_string(cls, string):
         """Attempt to construct this type from a string.
 
@@ -84,6 +94,7 @@ def construct_from_string(cls, string):
         ...         raise TypeError("Cannot construct a '{}' from "
         ...                         "'{}'".format(cls, string))
         """
+        raise AbstractMethodError(cls)
 
     @classmethod
     def is_dtype(cls, dtype):
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index 3423e22a4c64e..eca4dd4cf2106 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -746,18 +746,14 @@ def test_categorical_categories(self):
         tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
 
 
-class DummyArray(object):
+class DummyArray(ExtensionArray):
     pass
 
 
-class DummyDtype(object):
+class DummyDtype(ExtensionDtype):
     pass
 
 
-ExtensionArray.register(DummyArray)
-ExtensionDtype.register(DummyDtype)
-
-
 class TestExtensionArrayDtype(object):
 
     @pytest.mark.parametrize('values', [

From 0f4c2d797b7e7a791f0c96b1320bc8fe425a9a72 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 31 Jan 2018 13:22:17 -0600
Subject: [PATCH 32/40] Lint

---
 pandas/errors/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index c1e855732f915..af4e83f506257 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -100,4 +100,3 @@ def __str__(self):
             name = self.class_instance.__class__.__name__
         msg = "This {methodtype} must be defined in the concrete class {name}"
         return (msg.format(methodtype=self.methodtype, name=name))
-

From f9316e0b7d8521746015811ae8d8d55a0266da57 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 31 Jan 2018 21:06:53 -0600
Subject: [PATCH 33/40] Cleanup

---
 pandas/core/arrays/base.py        | 11 ++++++++++-
 pandas/core/arrays/categorical.py |  2 +-
 pandas/core/dtypes/base.py        |  6 +++++-
 pandas/core/internals.py          |  9 ++++++++-
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 62d7b685163b7..1556b653819a6 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -26,6 +26,12 @@ class ExtensionArray(object):
     * _formatting_values
     * _concat_same_type
 
+    Some additional methods are required to satisfy pandas' internal, private
+    block API.
+
+    * _concat_same_type
+    * _can_hold_na
+
     This class does not inherit from 'abc.ABCMeta' for performance reasons.
     Methods and properties required by the interface raise
     ``pandas.errors.AbstractMethodError`` and no ``register`` method is
@@ -134,7 +140,10 @@ def nbytes(self):
     # ------------------------------------------------------------------------
     def isna(self):
         # type: () -> np.ndarray
-        """Boolean NumPy array indicating if each value is missing."""
+        """Boolean NumPy array indicating if each value is missing.
+
+        This should return a 1-D array the same length as 'self'.
+        """
         raise AbstractMethodError(self)
 
     # ------------------------------------------------------------------------
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 40987cfe0f484..62c6a6b16cbe9 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2132,7 +2132,7 @@ def repeat(self, repeats, *args, **kwargs):
         return self._constructor(values=codes, categories=self.categories,
                                  ordered=self.ordered, fastpath=True)
 
-    # ExtensionArray Interface things
+    # Implement the ExtensionArray interface
     @property
     def _can_hold_na(self):
         return True
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 17171e3bcb25a..c7c5378801f02 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -61,7 +61,11 @@ def name(self):
     @property
     def names(self):
         # type: () -> Optional[List[str]]
-        """Ordered list of field names, or None if there are no fields."""
+        """Ordered list of field names, or None if there are no fields.
+
+        This is for compatibility with NumPy arrays, and may be removed in the
+        future.
+        """
         return None
 
     @classmethod
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index bb5057bc412ad..9e2bd21c665f7 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1680,6 +1680,13 @@ class NonConsolidatableMixIn(object):
     _holder = None
 
     def __init__(self, values, placement, ndim=None):
+        """Initialize a non-consolidatable block.
+
+        'ndim' may be inferred from 'placement'.
+
+        This will call continue to call __init__ for the other base
+        classes mixed in with this Mixin.
+        """
         # Placement must be converted to BlockPlacement so that we can check
         # its length
         if not isinstance(placement, BlockPlacement):
@@ -1806,7 +1813,7 @@ class ExtensionBlock(NonConsolidatableMixIn, Block):
 
     Notes
     -----
-    This is the holds all 3rd-party extension types. It's also the immediate
+    This holds all 3rd-party extension array types. It's also the immediate
     parent class for our internal extension types' blocks, CategoricalBlock.
 
     ExtensionArrays are limited to 1-D.

From 9c06b13d2c8ecf53d678a0ec613acea927bb3955 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 31 Jan 2018 21:07:04 -0600
Subject: [PATCH 34/40] Move ndim validation to a method.

---
 pandas/core/internals.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 9e2bd21c665f7..fdaebf57a7aa2 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -110,10 +110,10 @@ class Block(PandasObject):
     def __init__(self, values, placement, ndim=None):
         if ndim is None:
             ndim = values.ndim
-        elif self._validate_ndim and values.ndim != ndim:
-            raise ValueError('Wrong number of dimensions')
-        self.ndim = ndim
 
+        self._maybe_validate_ndim(values, ndim)
+
+        self.ndim = ndim
         self.mgr_locs = placement
         self.values = values
 
@@ -123,6 +123,18 @@ def __init__(self, values, placement, ndim=None):
                 'Wrong number of items passed {val}, placement implies '
                 '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
 
+    def _maybe_validate_ndim(self, values, ndim):
+        """Maybe check that ``values.ndim`` matches ``ndim``.
+
+        This is not checked if ``self._validate_ndim`` is False.
+
+        Raises
+        ------
+        ValueError : the number of dimensions do not match
+        """
+        if self._validate_ndim and values.ndim != ndim:
+            raise ValueError('Wrong number of dimensions')
+
     @property
     def _consolidate_key(self):
         return (self._can_consolidate, self.dtype.name)

From 7d2cf9cdfdfbe736c949a05eb7a81bfe15db25a1 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 1 Feb 2018 07:12:08 -0600
Subject: [PATCH 35/40] Try this

---
 pandas/core/internals.py | 53 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 6 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index fdaebf57a7aa2..1841501c1b601 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -2523,13 +2523,29 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block):
     _can_hold_na = True
 
     def __init__(self, values, placement, ndim=None):
-        if values.dtype != _NS_DTYPE and values.dtype.base != _NS_DTYPE:
-            # not datetime64 or datetime64tz
-            values = conversion.ensure_datetime64ns(values)
-
+        values = self._maybe_coerce_values(values)
         super(DatetimeBlock, self).__init__(values,
                                             placement=placement, ndim=ndim)
 
+    def _maybe_coerce_values(self, values):
+        """Input validation for values passed to __init__. Ensure that
+        we have datetime64ns, coercing if nescessary.
+
+        Parametetrs
+        -----------
+        values : array-like
+            Must be convertable to datetime64
+
+        Returns
+        -------
+        values : ndarray[datetime64ns]
+
+        Overridden by DatetimeTZBlock.
+        """
+        if values.dtype != _NS_DTYPE:
+            values = conversion.ensure_datetime64ns(values)
+        return values
+
     def _astype(self, dtype, mgr=None, **kwargs):
         """
         these automatically copy, so copy=True has no effect
@@ -2660,7 +2676,33 @@ class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock):
     is_datetimetz = True
 
     def __init__(self, values, placement, ndim=2, dtype=None):
+        # XXX: This will end up calling _maybe_coerce_values twice
+        # when dtype is not None. It's relatively cheap (just an isinstance)
+        # but it'd nice to avoid.
+        #
+        # If we can remove dtype from __init__, and push that conversion
+        # push onto the callers, then we can remove this entire __init__
+        # and just use DatetimeBlock's.
+        if dtype is not None:
+            values = self._maybe_coerce_values(values, dtype=dtype)
+        super(DatetimeTZBlock, self).__init__(values, placement=placement,
+                                              ndim=ndim)
+
+    def _maybe_coerce_values(self, values, dtype=None):
+        """Input validation for values passed to __init__. Ensure that
+        we have datetime64TZ, coercing if nescessary.
+
+        Parametetrs
+        -----------
+        values : array-like
+            Must be convertable to datetime64
+        dtype : string or DatetimeTZDtype, optional
+            Does a shallow copy to this tz
 
+        Returns
+        -------
+        values : ndarray[datetime64ns]
+        """
         if not isinstance(values, self._holder):
             values = self._holder(values)
 
@@ -2672,8 +2714,7 @@ def __init__(self, values, placement, ndim=2, dtype=None):
         if values.tz is None:
             raise ValueError("cannot create a DatetimeTZBlock without a tz")
 
-        super(DatetimeTZBlock, self).__init__(values, placement=placement,
-                                              ndim=ndim)
+        return values
 
     def copy(self, deep=True, mgr=None):
         """ copy constructor """

From afae8ae9563142ee1c3b29158269f8f38e3f9e1c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 1 Feb 2018 08:06:06 -0600
Subject: [PATCH 36/40] Make ExtensionBlock._holder a property

Removed ExtensionBlock.__init__
---
 pandas/core/internals.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 1841501c1b601..7e66c6e04a010 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1830,9 +1830,11 @@ class ExtensionBlock(NonConsolidatableMixIn, Block):
 
     ExtensionArrays are limited to 1-D.
     """
-    def __init__(self, values, placement, ndim=None):
-        self._holder = type(values)
-        super(ExtensionBlock, self).__init__(values, placement, ndim=ndim)
+    @property
+    def _holder(self):
+        # For extension blocks, the holder is values-dependent so we
+        # use a property.
+        return type(self.values)
 
     @property
     def is_view(self):

From cd0997e354121bc0414ef1675cb5b8241944a9b0 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 1 Feb 2018 08:27:53 -0600
Subject: [PATCH 37/40] Make _holder a property for all

---
 pandas/core/internals.py                 | 35 +++++++++++++++++++-----
 pandas/tests/internals/test_internals.py | 14 +++++++++-
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 7e66c6e04a010..767c890d7a63b 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -64,6 +64,7 @@
 from pandas.core.indexing import maybe_convert_indices, length_of_indexer
 from pandas.core.arrays import Categorical
 from pandas.core.indexes.datetimes import DatetimeIndex
+from pandas.core.indexes.timedeltas import TimedeltaIndex
 from pandas.io.formats.printing import pprint_thing
 
 import pandas.core.missing as missing
@@ -104,7 +105,6 @@ class Block(PandasObject):
     _verify_integrity = True
     _validate_ndim = True
     _ftype = 'dense'
-    _holder = None
     _concatenator = staticmethod(np.concatenate)
 
     def __init__(self, values, placement, ndim=None):
@@ -135,6 +135,15 @@ def _maybe_validate_ndim(self, values, ndim):
         if self._validate_ndim and values.ndim != ndim:
             raise ValueError('Wrong number of dimensions')
 
+    @property
+    def _holder(self):
+        """The array-like that can hold the underlying values.
+
+        None for 'Block', overridden by subclasses that don't
+        use an ndarray.
+        """
+        return None
+
     @property
     def _consolidate_key(self):
         return (self._can_consolidate, self.dtype.name)
@@ -1689,7 +1698,6 @@ class NonConsolidatableMixIn(object):
     _can_consolidate = False
     _verify_integrity = False
     _validate_ndim = False
-    _holder = None
 
     def __init__(self, values, placement, ndim=None):
         """Initialize a non-consolidatable block.
@@ -1832,8 +1840,7 @@ class ExtensionBlock(NonConsolidatableMixIn, Block):
     """
     @property
     def _holder(self):
-        # For extension blocks, the holder is values-dependent so we
-        # use a property.
+        # For extension blocks, the holder is values-dependent.
         return type(self.values)
 
     @property
@@ -2012,6 +2019,11 @@ def should_store(self, value):
 
 
 class DatetimeLikeBlockMixin(object):
+    """Mixin class for DatetimeBlock and DatetimeTZBlock."""
+
+    @property
+    def _holder(self):
+        return DatetimeIndex
 
     @property
     def _na_value(self):
@@ -2044,6 +2056,10 @@ def __init__(self, values, placement, ndim=None):
         super(TimeDeltaBlock, self).__init__(values,
                                              placement=placement, ndim=ndim)
 
+    @property
+    def _holder(self):
+        return TimedeltaIndex
+
     @property
     def _box_func(self):
         return lambda x: tslib.Timedelta(x, unit='ns')
@@ -2424,7 +2440,6 @@ class CategoricalBlock(ExtensionBlock):
     is_categorical = True
     _verify_integrity = True
     _can_hold_na = True
-    _holder = Categorical
     _concatenator = staticmethod(_concat._concat_categorical)
 
     def __init__(self, values, placement, ndim=None):
@@ -2435,6 +2450,10 @@ def __init__(self, values, placement, ndim=None):
                                                placement=placement,
                                                ndim=ndim)
 
+    @property
+    def _holder(self):
+        return Categorical
+
     @property
     def array_dtype(self):
         """ the dtype to return if I want to construct this block as an
@@ -2673,7 +2692,6 @@ def set(self, locs, values, check=False):
 class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock):
     """ implement a datetime64 block with a tz attribute """
     __slots__ = ()
-    _holder = DatetimeIndex
     _concatenator = staticmethod(_concat._concat_datetime)
     is_datetimetz = True
 
@@ -2856,7 +2874,6 @@ class SparseBlock(NonConsolidatableMixIn, Block):
     _box_to_block_values = False
     _can_hold_na = True
     _ftype = 'sparse'
-    _holder = SparseArray
     _concatenator = staticmethod(_concat._concat_sparse)
 
     def __init__(self, values, placement, ndim=None):
@@ -2866,6 +2883,10 @@ def __init__(self, values, placement, ndim=None):
         assert isinstance(values, SparseArray)
         super(SparseBlock, self).__init__(values, placement, ndim=ndim)
 
+    @property
+    def _holder(self):
+        return SparseArray
+
     @property
     def shape(self):
         return (len(self.mgr_locs), self.sp_index.length)
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index e3490f465b24a..c8ccf23ebcf66 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -11,7 +11,7 @@
 from distutils.version import LooseVersion
 import itertools
 from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex,
-                    Series, Categorical)
+                    Series, Categorical, TimedeltaIndex, SparseArray)
 from pandas.compat import OrderedDict, lrange
 from pandas.core.sparse.array import SparseArray
 from pandas.core.internals import (BlockPlacement, SingleBlockManager,
@@ -1263,6 +1263,18 @@ def test_binop_other(self, op, value, dtype):
         assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize('typestr, holder', [
+    ('category', Categorical),
+    ('M8[ns]', DatetimeIndex),
+    ('M8[ns, US/Central]', DatetimeIndex),
+    ('m8[ns]', TimedeltaIndex),
+    ('sparse', SparseArray),
+])
+def test_holder(typestr, holder):
+    blk = create_block(typestr, [1])
+    assert blk._holder is holder
+
+
 def test_deprecated_fastpath():
     # GH#19265
     values = np.random.rand(3, 3)

From 1d6eb049d0ad7546b1193c718cea773d8799e7cd Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 1 Feb 2018 10:47:45 -0600
Subject: [PATCH 38/40] Refactored validate_ndim

---
 pandas/core/internals.py                 | 33 +++++++++++++++++-------
 pandas/tests/internals/test_internals.py |  9 +++++++
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 767c890d7a63b..a271114274fc6 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -108,12 +108,7 @@ class Block(PandasObject):
     _concatenator = staticmethod(np.concatenate)
 
     def __init__(self, values, placement, ndim=None):
-        if ndim is None:
-            ndim = values.ndim
-
-        self._maybe_validate_ndim(values, ndim)
-
-        self.ndim = ndim
+        self.ndim = self._check_ndim(values, ndim)
         self.mgr_locs = placement
         self.values = values
 
@@ -123,17 +118,35 @@ def __init__(self, values, placement, ndim=None):
                 'Wrong number of items passed {val}, placement implies '
                 '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
 
-    def _maybe_validate_ndim(self, values, ndim):
-        """Maybe check that ``values.ndim`` matches ``ndim``.
+    def _check_ndim(self, values, ndim):
+        """ndim inference and validation.
 
-        This is not checked if ``self._validate_ndim`` is False.
+        Infers ndim from 'values' if not provided to __init__.
+        Validates that values.ndim and ndim are consistent if and only if
+        the class variable '_validate_ndim' is True.
+
+        Parameters
+        ----------
+        values : array-like
+        ndim : int or None
+
+        Returns
+        -------
+        ndim : int
 
         Raises
         ------
         ValueError : the number of dimensions do not match
         """
+        if ndim is None:
+            ndim = values.ndim
+
         if self._validate_ndim and values.ndim != ndim:
-            raise ValueError('Wrong number of dimensions')
+            msg = ("Wrong number of dimensions. values.ndim != ndim "
+                   "[{} != {}]")
+            raise ValueError(msg.format(values.ndim, ndim))
+
+        return ndim
 
     @property
     def _holder(self):
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index c8ccf23ebcf66..a45b5cb48d914 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -1281,3 +1281,12 @@ def test_deprecated_fastpath():
     with tm.assert_produces_warning(DeprecationWarning,
                                     check_stacklevel=False):
         make_block(values, placement=np.arange(3), fastpath=True)
+
+
+def test_validate_ndim():
+    values = np.array([1.0, 2.0])
+    placement = slice(2)
+    msg = "Wrong number of dimensions. values.ndim != ndim \[1 != 2\]"
+
+    with tm.assert_raises_regex(ValueError, msg):
+        make_block(values, placement, ndim=2)

From 92aed49f4c49b89ae09045ee563f6b0de5f2e6bd Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 1 Feb 2018 11:21:16 -0600
Subject: [PATCH 39/40] fixup! Refactored validate_ndim

---
 pandas/core/internals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index a271114274fc6..cef5b776eff66 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -112,7 +112,7 @@ def __init__(self, values, placement, ndim=None):
         self.mgr_locs = placement
         self.values = values
 
-        if (self._validate_ndim and ndim and
+        if (self._validate_ndim and self.ndim and
                 len(self.mgr_locs) != len(self.values)):
             raise ValueError(
                 'Wrong number of items passed {val}, placement implies '

From 34134f2f9633cca26b4efd382475927c1eb3fe5a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 1 Feb 2018 14:54:53 -0600
Subject: [PATCH 40/40] lint

---
 pandas/tests/internals/test_internals.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index a45b5cb48d914..9338aba90d7cb 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -13,7 +13,6 @@
 from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex,
                     Series, Categorical, TimedeltaIndex, SparseArray)
 from pandas.compat import OrderedDict, lrange
-from pandas.core.sparse.array import SparseArray
 from pandas.core.internals import (BlockPlacement, SingleBlockManager,
                                    make_block, BlockManager)
 import pandas.core.algorithms as algos