pandas-dev · h-vetinari · Nov 15, 2018 · Oct 16, 2018 · Nov 15, 2018 · Nov 8, 2018
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -281,6 +281,8 @@ Other Enhancements
   all instances of ``set`` will not be considered "list-like" anymore (:issue:`23061`)
 - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
 - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
+- :meth:`Series.update` now supports the same keywords and functionality as :meth:`DataFrame.update`.
+  In particular, it has gained the keywords ``overwrite``, ``filter_func`` and ``errors`` (:issue:`22358`, :issue:`23585`)
 - Compatibility with Matplotlib 3.0 (:issue:`22790`).
 - Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
 - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`)
@@ -297,6 +299,7 @@ Backwards incompatible API changes
 - A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`)
 - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
 - Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
+- :meth:`DataFrame.update` will now try to preserve the dtype of the caller as much as possible (:issue:`23606`)
 
 .. _whatsnew_0240.api_breaking.deps:
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5203,157 +5203,13 @@ def combiner(x, y):
 
         return self.combine(other, combiner, overwrite=False)
 
+    @Appender(NDFrame.update.__doc__)
     @deprecate_kwarg(old_arg_name='raise_conflict', new_arg_name='errors',
                      mapping={False: 'ignore', True: 'raise'})
     def update(self, other, join='left', overwrite=True, filter_func=None,
                errors='ignore'):
-        """
-        Modify in place using non-NA values from another DataFrame.
-
-        Aligns on indices. There is no return value.
-
-        Parameters
-        ----------
-        other : DataFrame, or object coercible into a DataFrame
-            Should have at least one matching index/column label
-            with the original DataFrame. If a Series is passed,
-            its name attribute must be set, and that will be
-            used as the column name to align with the original DataFrame.
-        join : {'left'}, default 'left'
-            Only left join is implemented, keeping the index and columns of the
-            original object.
-        overwrite : bool, default True
-            How to handle non-NA values for overlapping keys:
-
-            * True: overwrite original DataFrame's values
-              with values from `other`.
-            * False: only update values that are NA in
-              the original DataFrame.
-
-        filter_func : callable(1d-array) -> bool 1d-array, optional
-            Can choose to replace values other than NA. Return True for values
-            that should be updated.
-        errors : {'raise', 'ignore'}, default 'ignore'
-            If 'raise', will raise a ValueError if the DataFrame and `other`
-            both contain non-NA data in the same place.
-
-            .. versionchanged :: 0.24.0
-               Changed from `raise_conflict=False|True`
-               to `errors='ignore'|'raise'`.
-
-        Returns
-        -------
-        None : method directly changes calling object
-
-        Raises
-        ------
-        ValueError
-            * When `errors='raise'` and there's overlapping non-NA data.
-            * When `errors` is not either `'ignore'` or `'raise'`
-        NotImplementedError
-            * If `join != 'left'`
-
-        See Also
-        --------
-        dict.update : Similar method for dictionaries.
-        DataFrame.merge : For column(s)-on-columns(s) operations.
-
-        Examples
-        --------
-        >>> df = pd.DataFrame({'A': [1, 2, 3],
-        ...                    'B': [400, 500, 600]})
-        >>> new_df = pd.DataFrame({'B': [4, 5, 6],
-        ...                        'C': [7, 8, 9]})
-        >>> df.update(new_df)
-        >>> df
-           A  B
-        0  1  4
-        1  2  5
-        2  3  6
-
-        The DataFrame's length does not increase as a result of the update,
-        only values at matching index/column labels are updated.
-
-        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
-        ...                    'B': ['x', 'y', 'z']})
-        >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})
-        >>> df.update(new_df)
-        >>> df
-           A  B
-        0  a  d
-        1  b  e
-        2  c  f
-
-        For Series, it's name attribute must be set.
-
-        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
-        ...                    'B': ['x', 'y', 'z']})
-        >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2])
-        >>> df.update(new_column)
-        >>> df
-           A  B
-        0  a  d
-        1  b  y
-        2  c  e
-        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
-        ...                    'B': ['x', 'y', 'z']})
-        >>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2])
-        >>> df.update(new_df)
-        >>> df
-           A  B
-        0  a  x
-        1  b  d
-        2  c  e
-
-        If `other` contains NaNs the corresponding values are not updated
-        in the original dataframe.
-
-        >>> df = pd.DataFrame({'A': [1, 2, 3],
-        ...                    'B': [400, 500, 600]})
-        >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]})
-        >>> df.update(new_df)
-        >>> df
-           A      B
-        0  1    4.0
-        1  2  500.0
-        2  3    6.0
-        """
-        import pandas.core.computation.expressions as expressions
-        # TODO: Support other joins
-        if join != 'left':  # pragma: no cover
-            raise NotImplementedError("Only left join is supported")
-        if errors not in ['ignore', 'raise']:
-            raise ValueError("The parameter errors must be either "
-                             "'ignore' or 'raise'")
-
-        if not isinstance(other, DataFrame):
-            other = DataFrame(other)
-
-        other = other.reindex_like(self)
-
-        for col in self.columns:
-            this = self[col].values
-            that = other[col].values
-            if filter_func is not None:
-                with np.errstate(all='ignore'):
-                    mask = ~filter_func(this) | isna(that)
-            else:
-                if errors == 'raise':
-                    mask_this = notna(that)
-                    mask_that = notna(this)
-                    if any(mask_this & mask_that):
-                        raise ValueError("Data overlaps.")
-
-                if overwrite:
-                    mask = isna(that)
-                else:
-                    mask = notna(this)
-
-            # don't overwrite columns unecessarily
-            if mask.all():
-                continue
-
-            self[col] = expressions.where(mask, this, that)
+        super(DataFrame, self).update(other, join=join, overwrite=overwrite,
+                                      filter_func=filter_func, errors=errors)
 
     # ----------------------------------------------------------------------
     # Data reshaping

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -4173,6 +4173,181 @@ def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False,
 
         return self._constructor(new_data).__finalize__(self)
 
+    def update(self, other, join='left', overwrite=True, filter_func=None,
+               errors='ignore'):
+        """
+        Modify in place using non-NA values from another DataFrame.
+
+        Series/DataFrame will be aligned on indexes, and whenever possible,
+        the dtype of the individual Series of the caller will be preserved.
+
+        There is no return value.
+
+        Parameters
+        ----------
+        other : DataFrame, or object coercible into a DataFrame
+            Should have at least one matching index/column label
+            with the original DataFrame. If a Series is passed,
+            its name attribute must be set, and that will be
+            used as the column name to align with the original DataFrame.
+        join : {'left'}, default 'left'
+            Only left join is implemented, keeping the index and columns of the
+            original object.
+        overwrite : bool, default True
+            How to handle non-NA values for overlapping keys:
+
+            * True: overwrite original DataFrame's values
+              with values from `other`.
+            * False: only update values that are NA in
+              the original DataFrame.
+
+        filter_func : callable(1d-array) -> bool 1d-array, optional
+            Can choose to replace values other than NA. Return True for values
+            that should be updated.
+        errors : {'raise', 'ignore'}, default 'ignore'
+            If 'raise', will raise a ValueError if the DataFrame and `other`
+            both contain non-NA data in the same place.
+
+            .. versionchanged :: 0.24.0
+               Changed from `raise_conflict=False|True`
+               to `errors='ignore'|'raise'`.
+
+        Returns
+        -------
+        None : method directly changes calling object
+
+        Raises
+        ------
+        ValueError
+            * When `errors='raise'` and there's overlapping non-NA data.
+            * When `errors` is not either `'ignore'` or `'raise'`
+        NotImplementedError
+            * If `join != 'left'`
+
+        See Also
+        --------
+        Series.update : Similar method for `Series`.
+        DataFrame.merge : For column(s)-on-columns(s) operations.
+        dict.update : Similar method for `dict`.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'A': [1, 2, 3],
+        ...                    'B': [400, 500, 600]})
+        >>> new_df = pd.DataFrame({'B': [4, 5, 6],
+        ...                        'C': [7, 8, 9]})
+        >>> df.update(new_df)
+        >>> df
+           A  B
+        0  1  4
+        1  2  5
+        2  3  6
+
+        The DataFrame's length does not increase as a result of the update,
+        only values at matching index/column labels are updated.
+
+        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
+        ...                    'B': ['x', 'y', 'z']})
+        >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})
+        >>> df.update(new_df)
+        >>> df
+           A  B
+        0  a  d
+        1  b  e
+        2  c  f
+
+        For Series, it's name attribute must be set.
+
+        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
+        ...                    'B': ['x', 'y', 'z']})
+        >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2])
+        >>> df.update(new_column)
+        >>> df
+           A  B
+        0  a  d
+        1  b  y
+        2  c  e
+        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
+        ...                    'B': ['x', 'y', 'z']})
+        >>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2])
+        >>> df.update(new_df)
+        >>> df
+           A  B
+        0  a  x
+        1  b  d
+        2  c  e
+
+        If `other` contains NaNs the corresponding values are not updated
+        in the original dataframe.
+
+        >>> df = pd.DataFrame({'A': [1, 2, 3],
+        ...                    'B': [400, 500, 600]})
+        >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]})
+        >>> df.update(new_df)
+        >>> df
+           A    B
+        0  1    4
+        1  2  500
+        2  3    6
+        """
+        from pandas import Series, DataFrame
+        # TODO: Support other joins
+        if join != 'left':  # pragma: no cover
+            raise NotImplementedError("Only left join is supported")
+        if errors not in ['ignore', 'raise']:
+            raise ValueError("The parameter errors must be either "
+                             "'ignore' or 'raise'")
+
+        if isinstance(self, ABCSeries):
+            if not isinstance(other, ABCSeries):
+                other = Series(other)
+            other = other.reindex_like(self)
+            this = self.values
+            that = other.values
+
+            # will return None if "this" remains unchanged
+            updated_array = missing._update_array(this, that,
+                                                  overwrite=overwrite,
+                                                  filter_func=filter_func,
+                                                  errors=errors)
+            # don't overwrite unnecessarily
+            if updated_array is not None:
+                # avoid unnecessary upcasting (introduced by alignment)
+                try:
+                    updated = Series(updated_array, index=self.index,
+                                     dtype=this.dtype)
+                except ValueError:
+                    updated = Series(updated_array, index=self.index)
+                self._update_inplace(updated)
+        else:  # DataFrame
+            if not isinstance(other, ABCDataFrame):
+                other = DataFrame(other)
+
+            other = other.reindex_like(self)
+
+            for col in self.columns:
+                this = self[col].values
+                that = other[col].values
+
+                # will return None if "this" remains unchanged
+                updated_array = missing._update_array(this, that,
+                                                      overwrite=overwrite,
+                                                      filter_func=filter_func,
+                                                      errors=errors)
+                # don't overwrite unnecessarily
+                if updated_array is not None:
+                    # no problem to set DataFrame column with array
+                    updated = updated_array
+
+                    if updated_array.dtype != this.dtype:
+                        # avoid unnecessary upcasting (introduced by alignment)
+                        try:
+                            updated = Series(updated_array, index=self.index,
+                                             dtype=this.dtype)
+                        except ValueError:
+                            pass
+                    self[col] = updated
+
     def filter(self, items=None, like=None, regex=None, axis=None):
         """
         Subset rows or columns of dataframe according to labels in

diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -21,7 +21,7 @@
     ensure_float64)
 
 from pandas.core.dtypes.cast import infer_dtype_from_array
-from pandas.core.dtypes.missing import isna
+from pandas.core.dtypes.missing import isna, notna
 
 
 def mask_missing(arr, values_to_mask):
@@ -75,6 +75,80 @@ def mask_missing(arr, values_to_mask):
     return mask
 
 
+def update_array(this, that, overwrite=True, filter_func=None,
+                 errors='ignore'):
+    """
+    Update one array with non-NA values from another array.
+
+    Parameters
+    ----------
+    this : np.ndarray (one-dimensional)
+        The array being updated.
+    that : np.ndarray (one-dimensional)
+        The array being used to update.
+    overwrite : bool, default True
+        How to handle non-NA values for overlapping keys:
+
+        * True: overwrite original array's values with values from `that`.
+        * False: only update values that are NA in `this`.
+
+    filter_func : callable(1d-array) -> boolean 1d-array, optional
+        Can choose to replace values other than NA. Return True for values
+        that should be updated.
+    errors : {'raise', 'ignore'}, default 'ignore'
+        If 'raise', will raise a ValueError if `this` and `that` both contain
+        non-NA data in the same place.
+
+    Raises
+    ------
+    ValueError
+        When `errors='raise'` and there's overlapping non-NA data.
+
+    Returns
+    -------
+    updated : np.ndarray (one-dimensional)
+        The updated array.
+
+    See Also
+    --------
+    Series.update : Similar method for `Series`.
+    DataFrame.update : Similar method for `DataFrame`.
+    dict.update : Similar method for `dict`.
+    """
+    updated = _update_array(this, that, overwrite=overwrite,
+                            filter_func=filter_func, errors=errors)
+    return this if updated is None else updated
+
+
+def _update_array(this, that, overwrite=True, filter_func=None,
+                  errors='ignore'):
+    """
+    Same as update_array, except we return None if `this` is not updated.
+    """
+    import pandas.core.computation.expressions as expressions
+
+    if filter_func is not None:
+        with np.errstate(all='ignore'):
+            mask = ~filter_func(this) | isna(that)
+    else:
+        if errors == 'raise':
+            mask_this = notna(that)
+            mask_that = notna(this)
+            if any(mask_this & mask_that):
+                raise ValueError("Data overlaps.")
+
+        if overwrite:
+            mask = isna(that)
+        else:
+            mask = notna(this)
+
+    # don't overwrite columns unnecessarily
+    if mask.all():
+        return None
+
+    return expressions.where(mask, this, that)
+
+
 def clean_fill_method(method, allow_nearest=False):
     # asfreq is compat for resampling
     if method in [None, 'asfreq']:

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2385,14 +2385,59 @@ def combine_first(self, other):
 
         return this.where(notna(this), other)
 
-    def update(self, other):
+    def update(self, other, join='left', overwrite=True, filter_func=None,
+               errors='ignore'):
         """
-        Modify Series in place using non-NA values from passed
-        Series. Aligns on index
+        Modify Series in place using non-NA values from passed Series.
+
+        Series will be aligned on indexes, and whenever possible, the dtype of
+        the caller will be preserved.
+
+        There is no return value.
 
         Parameters
         ----------
-        other : Series
+        other : Series, or object coercible into a Series
+            Should have at least one matching index label with the calling
+            Series.
+        join : {'left'}, default 'left'
+            Only left join is implemented, keeping the index and columns of the
+            original object.
+
+            .. versionadded:: 0.24.0
+        overwrite : bool, default True
+            How to handle non-NA values for overlapping keys:
+
+            * True: overwrite original DataFrame's values
+              with values from `other`.
+            * False: only update values that are NA in
+              the original DataFrame.
+
+            .. versionadded:: 0.24.0
+        filter_func : callable(1d-array) -> bool 1d-array, optional
+            Can choose to replace values other than NA. Return True for values
+            that should be updated.
+
+            .. versionadded:: 0.24.0
+        errors : {'raise', 'ignore'}, default 'ignore'
+            If 'raise', will raise a ValueError if the DataFrame and `other`
+            both contain non-NA data in the same place.
+
+            .. versionadded:: 0.24.0
+
+        Raises
+        ------
+        ValueError
+            When `errors='ignore'` and there's overlapping non-NA data.
+
+        Returns
+        -------
+        Nothing, the Series is modified inplace.
+
+        See Also
+        --------
+        DataFrame.update : Similar method for `DataFrame`.
+        dict.update : Similar method for `dict`.
 
         Examples
         --------
@@ -2431,11 +2476,9 @@ def update(self, other):
         2    6
         dtype: int64
         """
-        other = other.reindex_like(self)
-        mask = notna(other)
-
-        self._data = self._data.putmask(mask=mask, new=other, inplace=True)
-        self._maybe_update_cacher()
+        super(Series, self).update(other, join=join, overwrite=overwrite,
+                                   filter_func=filter_func,
+                                   errors=errors)
 
     # ----------------------------------------------------------------------
     # Reindexing, sorting

diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py
@@ -279,6 +279,25 @@ def test_update_dtypes(self):
                              columns=['A', 'B', 'bool1', 'bool2'])
         assert_frame_equal(df, expected)
 
+        df = DataFrame([[10, 100], [11, 101], [12, 102]], columns=['A', 'B'])
+        other = DataFrame([[61, 601], [63, 603]], columns=['A', 'B'],
+                          index=[1, 3])
+        df.update(other)
+
+        expected = DataFrame([[10, 100], [61, 601], [12, 102]],
+                             columns=['A', 'B'])
+        assert_frame_equal(df, expected)
+
+        # we always try to keep original dtype, even if other has different one
+        df.update(other.astype(float))
+        assert_frame_equal(df, expected)
+
+        # if keeping the dtype is not possible, we allow upcasting
+        df.update(other + 0.1)
+        expected = DataFrame([[10., 100.], [61.1, 601.1], [12., 102.]],
+                             columns=['A', 'B'])
+        assert_frame_equal(df, expected)
+
     def test_update_nooverwrite(self):
         df = DataFrame([[1.5, nan, 3.],
                         [1.5, nan, 3.],

diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py
@@ -4,13 +4,12 @@
 from datetime import datetime
 
 import numpy as np
-from numpy import nan
 import pytest
 
 import pandas as pd
 from pandas import DataFrame, DatetimeIndex, Series, compat, date_range
 import pandas.util.testing as tm
-from pandas.util.testing import assert_series_equal
+from pandas.util.testing import assert_frame_equal, assert_series_equal
 
 
 class TestSeriesCombine():
@@ -105,8 +104,8 @@ def test_combine_first(self):
         assert_series_equal(s, result)
 
     def test_update(self):
-        s = Series([1.5, nan, 3., 4., nan])
-        s2 = Series([nan, 3.5, nan, 5.])
+        s = Series([1.5, np.nan, 3., 4., np.nan])
+        s2 = Series([np.nan, 3.5, np.nan, 5.])
         s.update(s2)
 
         expected = Series([1.5, 3.5, 3., 5., np.nan])
@@ -116,8 +115,62 @@ def test_update(self):
         df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
         df['c'] = np.nan
 
-        # this will fail as long as series is a sub-class of ndarray
-        # df['c'].update(Series(['foo'],index=[0])) #####
+        df['c'].update(Series(['foo'], index=[0]))
+        expected = DataFrame([[1, np.nan, 'foo'], [3, 2., np.nan]],
+                             columns=['a', 'b', 'c'])
+        assert_frame_equal(df, expected)
+
+    def test_update_dtypes(self):
+        s = Series([1., 2., False, True])
+
+        other = Series([45])
+        s.update(other)
+
+        expected = Series([45., 2., False, True])
+        assert_series_equal(s, expected)
+
+        s = Series([10, 11, 12])
+        other = Series([61, 63], index=[1, 3])
+        s.update(other)
+
+        expected = Series([10, 61, 12])
+        assert_series_equal(s, expected)
+
+        # we always try to keep original dtype, even if other has different one
+        s.update(other.astype(float))
+        assert_series_equal(s, expected)
+
+        # if keeping the dtype is not possible, we allow upcasting
+        s.update(other + 0.1)
+        expected = Series([10., 61.1, 12.])
+        assert_series_equal(s, expected)
+
+    def test_update_nooverwrite(self):
+        s = Series([0, 1, 2, np.nan, np.nan, 5, 6, np.nan])
+        other = Series([1, 3, np.nan, 7, 9], index=[1, 3, 5, 7, 9])
+
+        s.update(other, overwrite=False)
+
+        expected = Series([0, 1, 2, 3, np.nan, 5, 6, 7])
+        assert_series_equal(s, expected)
+
+    def test_update_filtered(self):
+        # for small values, np.arange defaults to int32,
+        # but pandas default (e.g. for "expected" below) is int64
+        s = Series(np.arange(8), dtype='int64')
+        other = Series(np.arange(8), dtype='int64') + 10
+
+        s.update(other, filter_func=lambda x: x % 2 == 1)
+
+        expected = Series([0, 11, 2, 13, 4, 15, 6, 17])
+        assert_series_equal(s, expected)
+
+    def test_update_raise(self):
+        s = Series([0, 1, 2, np.nan, np.nan, 5, 6, np.nan])
+        other = Series([1, 3, np.nan, 7, 9], index=[1, 3, 5, 7, 9])
+
+        with pytest.raises(ValueError, match="Data overlaps"):
+            s.update(other, errors='raise')
 
     def test_concat_empty_series_dtypes_roundtrips(self):