pandas-dev · jreback · Nov 11, 2019 · Nov 10, 2019 · Nov 10, 2019 · Nov 10, 2019
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -753,8 +753,7 @@ def group_quantile(ndarray[float64_t] out,
     assert values.shape[0] == N
 
     if not (0 <= q <= 1):
-        raise ValueError("'q' must be between 0 and 1. Got"
-                         " '{}' instead".format(q))
+        raise ValueError(f"'q' must be between 0 and 1. Got '{q}' instead")
 
     inter_methods = {
         'linear': INTERPOLATION_LINEAR,

diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
@@ -47,8 +47,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
     k = <bytes>key.encode(encoding)
     kb = <uint8_t *>k
     if len(k) != 16:
-        raise ValueError("key should be a 16-byte string encoded, "
-                         "got {key} (len {klen})".format(key=k, klen=len(k)))
+        raise ValueError(f"key should be a 16-byte string encoded, "
+                         f"got {k} (len {len(k)})")
 
     n = len(arr)
 
@@ -67,9 +67,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
             data = <bytes>str(val).encode(encoding)
 
         else:
-            raise TypeError("{val} of type {typ} is not a valid type "
-                            "for hashing, must be string or null"
-                            .format(val=val, typ=type(val)))
+            raise TypeError(f"{val} of type {type(val)} is not a valid type "
+                            f"for hashing, must be string or null"
+                            )
 
         l = len(data)
         lens[i] = l

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -109,7 +109,7 @@ cdef class IndexEngine:
             Py_ssize_t loc
 
         if is_definitely_invalid_key(val):
-            raise TypeError("'{val}' is an invalid key".format(val=val))
+            raise TypeError(f"'{val}' is an invalid key")
 
         if self.over_size_threshold and self.is_monotonic_increasing:
             if not self.is_unique:
@@ -556,8 +556,8 @@ cpdef convert_scalar(ndarray arr, object value):
             pass
         elif value is None or value != value:
             return np.datetime64("NaT", "ns")
-        raise ValueError("cannot set a Timestamp with a non-timestamp {typ}"
-                         .format(typ=type(value).__name__))
+        raise ValueError(f"cannot set a Timestamp with a non-timestamp "
+                         f"{type(value).__name__}")
 
     elif arr.descr.type_num == NPY_TIMEDELTA:
         if util.is_array(value):
@@ -573,8 +573,8 @@ cpdef convert_scalar(ndarray arr, object value):
             pass
         elif value is None or value != value:
             return np.timedelta64("NaT", "ns")
-        raise ValueError("cannot set a Timedelta with a non-timedelta {typ}"
-                         .format(typ=type(value).__name__))
+        raise ValueError(f"cannot set a Timedelta with a non-timedelta "
+                         f"{type(value).__name__}")
 
     if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and
             not issubclass(arr.dtype.type, np.bool_)):
@@ -677,7 +677,7 @@ cdef class BaseMultiIndexCodesEngine:
             # Index._get_fill_indexer), sort (integer representations of) keys:
             order = np.argsort(lab_ints)
             lab_ints = lab_ints[order]
-            indexer = (getattr(self._base, 'get_{}_indexer'.format(method))
+            indexer = (getattr(self._base, f'get_{method}_indexer')
                        (self, lab_ints, limit=limit))
             indexer = indexer[order]
         else:
@@ -687,7 +687,7 @@ cdef class BaseMultiIndexCodesEngine:
 
     def get_loc(self, object key):
         if is_definitely_invalid_key(key):
-            raise TypeError("'{key}' is an invalid key".format(key=key))
+            raise TypeError(f"'{key}' is an invalid key")
         if not isinstance(key, tuple):
             raise KeyError(key)
         try:

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
@@ -61,7 +61,7 @@ cdef class BlockPlacement:
         else:
             v = self._as_array
 
-        return '%s(%r)' % (self.__class__.__name__, v)
+        return f'{self.__class__.__name__}({v})'
 
     def __repr__(self) -> str:
         return str(self)

diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -179,8 +179,8 @@ cdef class IntervalMixin:
             When `other` is not closed exactly the same as self.
         """
         if self.closed != other.closed:
-            msg = "'{}.closed' is '{}', expected '{}'."
-            raise ValueError(msg.format(name, other.closed, self.closed))
+            msg = f"'{name}.closed' is '{other.closed}', expected '{self.closed}'."
+            raise ValueError(msg)
 
 
 cdef _interval_like(other):
@@ -308,17 +308,16 @@ cdef class Interval(IntervalMixin):
         self._validate_endpoint(right)
 
         if closed not in _VALID_CLOSED:
-            msg = "invalid option for 'closed': {closed}".format(closed=closed)
+            msg = f"invalid option for 'closed': {closed}"
             raise ValueError(msg)
         if not left <= right:
             raise ValueError('left side of interval must be <= right side')
         if (isinstance(left, Timestamp) and
                 not tz_compare(left.tzinfo, right.tzinfo)):
             # GH 18538
-            msg = ("left and right must have the same time zone, got "
-                   "'{left_tz}' and '{right_tz}'")
-            raise ValueError(msg.format(left_tz=left.tzinfo,
-                                        right_tz=right.tzinfo))
+            msg = (f"left and right must have the same time zone, got "
+                   f"'{left.tzinfo}' and '{right.tzinfo}'")
+            raise ValueError(msg)
         self.left = left
         self.right = right
         self.closed = closed
@@ -359,8 +358,7 @@ cdef class Interval(IntervalMixin):
             name = type(self).__name__
             other = type(other).__name__
             op_str = {Py_LT: '<', Py_LE: '<=', Py_GT: '>', Py_GE: '>='}[op]
-            raise TypeError('unorderable types: {name}() {op} {other}()'
-                            .format(name=name, op=op_str, other=other))
+            raise TypeError(f'unorderable types: {name}() {op_str} {other}()')
 
     def __reduce__(self):
         args = (self.left, self.right, self.closed)
@@ -381,17 +379,15 @@ cdef class Interval(IntervalMixin):
 
         left, right = self._repr_base()
         name = type(self).__name__
-        repr_str = '{name}({left!r}, {right!r}, closed={closed!r})'.format(
-            name=name, left=left, right=right, closed=self.closed)
+        repr_str = f'{name}({left!r}, {right!r}, closed={self.closed!r})'
         return repr_str
 
     def __str__(self) -> str:
 
         left, right = self._repr_base()
         start_symbol = '[' if self.closed_left else '('
         end_symbol = ']' if self.closed_right else ')'
-        return '{start}{left}, {right}{end}'.format(
-            start=start_symbol, left=left, right=right, end=end_symbol)
+        return f'{start_symbol}{left}, {right}{end_symbol}'
 
     def __add__(self, y):
         if isinstance(y, numbers.Number):
@@ -477,8 +473,8 @@ cdef class Interval(IntervalMixin):
         False
         """
         if not isinstance(other, Interval):
-            msg = '`other` must be an Interval, got {other}'
-            raise TypeError(msg.format(other=type(other).__name__))
+            msg = f'`other` must be an Interval, got {type(other).__name__}'
+            raise TypeError(msg)
 
         # equality is okay if both endpoints are closed (overlap at a point)
         op1 = le if (self.closed_left and other.closed_right) else lt
@@ -529,8 +525,8 @@ def intervals_to_interval_bounds(ndarray intervals,
             continue
 
         if not isinstance(interval, Interval):
-            raise TypeError("type {typ} with value {iv} is not an interval"
-                            .format(typ=type(interval), iv=interval))
+            raise TypeError(f"type {type(interval)} with value "
+                            f"{interval} is not an interval")
 
         left[i] = interval.left
         right[i] = interval.right

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -1219,8 +1219,7 @@ def infer_dtype(value: object, skipna: object=None) -> str:
                 return value
 
             # its ndarray like but we can't handle
-            raise ValueError("cannot infer type for {typ}"
-                             .format(typ=type(value)))
+            raise ValueError(f"cannot infer type for {type(value)}")
 
     else:
         if not isinstance(value, list):
@@ -1497,9 +1496,8 @@ cdef class Validator:
         return self.is_valid(value) or self.is_valid_null(value)
 
     cdef bint is_value_typed(self, object value) except -1:
-        raise NotImplementedError(
-            '{typ} child class must define is_value_typed'
-            .format(typ=type(self).__name__))
+        raise NotImplementedError(f'{type(self).__name__} child class '
+                                  f'must define is_value_typed')
 
     cdef bint is_valid_null(self, object value) except -1:
         return value is None or util.is_nan(value)
@@ -1635,9 +1633,8 @@ cdef class TemporalValidator(Validator):
         return self.is_value_typed(value) or self.is_valid_null(value)
 
     cdef bint is_valid_null(self, object value) except -1:
-        raise NotImplementedError(
-            '{typ} child class must define is_valid_null'
-            .format(typ=type(self).__name__))
+        raise NotImplementedError(f'{type(self).__name__} child class '
+                                  f'must define is_valid_null')
 
     cdef inline bint is_valid_skipna(self, object value) except -1:
         cdef:
@@ -1926,7 +1923,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
                     seen.float_ = True
             except (TypeError, ValueError) as e:
                 if not seen.coerce_numeric:
-                    raise type(e)(str(e) + " at position {pos}".format(pos=i))
+                    raise type(e)(str(e) + f" at position {i}")
                 elif "uint64" in str(e):  # Exception from check functions.
                     raise
 

diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
@@ -123,8 +123,7 @@ def vec_compare(object[:] left, object[:] right, object op):
         int flag
 
     if n != <Py_ssize_t>len(right):
-        raise ValueError('Arrays were different lengths: {n} vs {nright}'
-                         .format(n=n, nright=len(right)))
+        raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')
 
     if op is operator.lt:
         flag = Py_LT
@@ -224,8 +223,7 @@ def vec_binop(object[:] left, object[:] right, object op):
         object[:] result
 
     if n != <Py_ssize_t>len(right):
-        raise ValueError('Arrays were different lengths: {n} vs {nright}'
-                         .format(n=n, nright=len(right)))
+        raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')
 
     result = np.empty(n, dtype=object)
 

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -637,19 +637,19 @@ cdef class TextReader:
                     source = zip_file.open(file_name)
 
                 elif len(zip_names) == 0:
-                    raise ValueError('Zero files found in compressed '
-                                     'zip file %s', source)
+                    raise ValueError(f'Zero files found in compressed '
+                                     f'zip file {source}')
                 else:
-                    raise ValueError('Multiple files found in compressed '
-                                     'zip file %s', str(zip_names))
+                    raise ValueError(f'Multiple files found in compressed '
+                                     f'zip file {zip_names}')
             elif self.compression == 'xz':
                 if isinstance(source, str):
                     source = _get_lzma_file(lzma)(source, 'rb')
                 else:
                     source = _get_lzma_file(lzma)(filename=source)
             else:
-                raise ValueError('Unrecognized compression type: %s' %
-                                 self.compression)
+                raise ValueError(f'Unrecognized compression type: '
+                                 f'{self.compression}')
 
             if b'utf-16' in (self.encoding or b''):
                 # we need to read utf-16 through UTF8Recoder.
@@ -703,8 +703,8 @@ cdef class TextReader:
             self.parser.cb_io = &buffer_rd_bytes
             self.parser.cb_cleanup = &del_rd_source
         else:
-            raise IOError('Expected file path name or file-like object,'
-                          ' got %s type' % type(source))
+            raise IOError(f'Expected file path name or file-like object, '
+                          f'got {type(source)} type')
 
     cdef _get_header(self):
         # header is now a list of lists, so field_count should use header[0]
@@ -744,8 +744,8 @@ cdef class TextReader:
                         msg = "[%s], len of %d," % (
                             ','.join(str(m) for m in msg), len(msg))
                     raise ParserError(
-                        'Passed header=%s but only %d lines in file'
-                        % (msg, self.parser.lines))
+                        f'Passed header={msg} but only '
+                        f'{self.parser.lines} lines in file')
 
                 else:
                     field_count = self.parser.line_fields[hr]
@@ -779,7 +779,7 @@ cdef class TextReader:
                     if not self.has_mi_columns and self.mangle_dupe_cols:
                         while count > 0:
                             counts[name] = count + 1
-                            name = '%s.%d' % (name, count)
+                            name = f'{name}.{count}'
                             count = counts.get(name, 0)
 
                     if old_name == '':
@@ -1662,7 +1662,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,
         char *data
         ndarray result
 
-    result = np.empty(line_end - line_start, dtype='|S%d' % width)
+    result = np.empty(line_end - line_start, dtype=f'|S{width}')
     data = <char*>result.data
 
     with nogil:
@@ -2176,8 +2176,8 @@ def _concatenate_chunks(list chunks):
     if warning_columns:
         warning_names = ','.join(warning_columns)
         warning_message = " ".join([
-            "Columns (%s) have mixed types." % warning_names,
-            "Specify dtype option on import or set low_memory=False."
+            f"Columns ({warning_names}) have mixed types."
+            f"Specify dtype option on import or set low_memory=False."
           ])
         warnings.warn(warning_message, DtypeWarning, stacklevel=8)
     return result

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -92,8 +92,7 @@ cdef class Reducer:
             if dummy.dtype != self.arr.dtype:
                 raise ValueError('Dummy array must be same dtype')
             if len(dummy) != self.chunksize:
-                raise ValueError('Dummy array must be length {length}'
-                                 .format(length=self.chunksize))
+                raise ValueError(f'Dummy array must be length {self.chunksize}')
 
         return dummy, typ, index, ityp
 

diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
@@ -53,7 +53,7 @@ cdef class IntIndex(SparseIndex):
 
     def __repr__(self) -> str:
         output = 'IntIndex\n'
-        output += 'Indices: %s\n' % repr(self.indices)
+        output += f'Indices: {repr(self.indices)}\n'
         return output
 
     @property
@@ -72,9 +72,8 @@ cdef class IntIndex(SparseIndex):
         """
 
         if self.npoints > self.length:
-            msg = ("Too many indices. Expected "
-                   "{exp} but found {act}").format(
-                exp=self.length, act=self.npoints)
+            msg = (f"Too many indices. Expected "
+                   f"{self.length} but found {self.npoints}")
             raise ValueError(msg)
 
         # Indices are vacuously ordered and non-negative
@@ -343,8 +342,8 @@ cdef class BlockIndex(SparseIndex):
 
     def __repr__(self) -> str:
         output = 'BlockIndex\n'
-        output += 'Block locations: %s\n' % repr(self.blocs)
-        output += 'Block lengths: %s' % repr(self.blengths)
+        output += f'Block locations: {repr(self.blocs)}\n'
+        output += f'Block lengths: {repr(self.blengths)}'
 
         return output
 
@@ -380,15 +379,14 @@ cdef class BlockIndex(SparseIndex):
 
             if i < self.nblocks - 1:
                 if blocs[i] + blengths[i] > blocs[i + 1]:
-                    raise ValueError('Block {idx} overlaps'.format(idx=i))
+                    raise ValueError(f'Block {i} overlaps')
             else:
                 if blocs[i] + blengths[i] > self.length:
-                    raise ValueError('Block {idx} extends beyond end'
-                                     .format(idx=i))
+                    raise ValueError(f'Block {i} extends beyond end')
 
             # no zero-length blocks
             if blengths[i] == 0:
-                raise ValueError('Zero-length block {idx}'.format(idx=i))
+                raise ValueError(f'Zero-length block {i}')
 
     def equals(self, other):
         if not isinstance(other, BlockIndex):

diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx
@@ -108,7 +108,7 @@ cpdef assert_almost_equal(a, b,
         return assert_dict_equal(a, b)
 
     if isinstance(a, str) or isinstance(b, str):
-        assert a == b, "%r != %r" % (a, b)
+        assert a == b, f"{a} != {b}"
         return True
 
     a_is_ndarray = isinstance(a, np.ndarray)
@@ -128,16 +128,15 @@ cpdef assert_almost_equal(a, b,
             assert_class_equal(a, b, obj=obj)
 
         assert has_length(a) and has_length(b), (
-            "Can't compare objects without length, one or both is invalid: "
-            "(%r, %r)" % (a, b))
+            f"Can't compare objects without length, one or both is invalid: "
+            f"({a}, {b})")
 
         if a_is_ndarray and b_is_ndarray:
             na, nb = a.size, b.size
             if a.shape != b.shape:
                 from pandas.util.testing import raise_assert_detail
                 raise_assert_detail(
-                    obj, '{0} shapes are different'.format(obj),
-                    a.shape, b.shape)
+                    obj, f'{obj} shapes are different', a.shape, b.shape)
 
             if check_dtype and not is_dtype_equal(a.dtype, b.dtype):
                 from pandas.util.testing import assert_attr_equal
@@ -158,8 +157,7 @@ cpdef assert_almost_equal(a, b,
             else:
                 r = None
 
-            raise_assert_detail(obj, '{0} length are different'.format(obj),
-                                na, nb, r)
+            raise_assert_detail(obj, f'{obj} length are different', na, nb, r)
 
         for i in xrange(len(a)):
             try:
@@ -171,8 +169,8 @@ cpdef assert_almost_equal(a, b,
 
         if is_unequal:
             from pandas.util.testing import raise_assert_detail
-            msg = '{0} values are different ({1} %)'.format(
-                obj, np.round(diff * 100.0 / na, 5))
+            msg = (f'{obj} values are different '
+                   f'({np.round(diff * 100.0 / na, 5)} %)')
             raise_assert_detail(obj, msg, lobj, robj)
 
         return True
@@ -214,4 +212,4 @@ cpdef assert_almost_equal(a, b,
                                'with decimal %d' % (fb, fa, decimal))
         return True
 
-    raise AssertionError("{0} != {1}".format(a, b))
+    raise AssertionError(f"{a} != {b}")
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -357,8 +357,8 @@ def array_with_unit_to_datetime(ndarray values, object unit,
 
             if ((fvalues < Timestamp.min.value).any()
                     or (fvalues > Timestamp.max.value).any()):
-                raise OutOfBoundsDatetime("cannot convert input with unit "
-                                          "'{unit}'".format(unit=unit))
+                raise OutOfBoundsDatetime(f"cannot convert input with unit "
+                                          f"'{unit}'")
             result = (iresult * m).astype('M8[ns]')
             iresult = result.view('i8')
             iresult[mask] = NPY_NAT
@@ -384,8 +384,8 @@ def array_with_unit_to_datetime(ndarray values, object unit,
                     except OverflowError:
                         if is_raise:
                             raise OutOfBoundsDatetime(
-                                "cannot convert input {val} with the unit "
-                                "'{unit}'".format(val=val, unit=unit))
+                                f"cannot convert input {val} with the unit "
+                                f"'{unit}'")
                         elif is_ignore:
                             raise AssertionError
                         iresult[i] = NPY_NAT
@@ -400,25 +400,25 @@ def array_with_unit_to_datetime(ndarray values, object unit,
                     except ValueError:
                         if is_raise:
                             raise ValueError(
-                                "non convertible value {val} with the unit "
-                                "'{unit}'".format(val=val, unit=unit))
+                                f"non convertible value {val} with the unit "
+                                f"'{unit}'")
                         elif is_ignore:
                             raise AssertionError
                         iresult[i] = NPY_NAT
                     except OverflowError:
                         if is_raise:
                             raise OutOfBoundsDatetime(
-                                "cannot convert input {val} with the unit "
-                                "'{unit}'".format(val=val, unit=unit))
+                                f"cannot convert input {val} with the unit "
+                                f"'{unit}'")
                         elif is_ignore:
                             raise AssertionError
                         iresult[i] = NPY_NAT
 
             else:
 
                 if is_raise:
-                    raise ValueError("unit='{0}' not valid with non-numerical "
-                                     "val='{1}'".format(unit, val))
+                    raise ValueError(f"unit='{unit}' not valid with non-numerical "
+                                     f"val='{val}'")
                 if is_ignore:
                     raise AssertionError
 
@@ -600,9 +600,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
                                 iresult[i] = NPY_NAT
                                 continue
                             elif is_raise:
-                                raise ValueError("time data {val} doesn't "
-                                                 "match format specified"
-                                                 .format(val=val))
+                                raise ValueError(f"time data {val} doesn't "
+                                                 f"match format specified")
                             return values, tz_out
 
                         try:
@@ -657,8 +656,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
                     if is_coerce:
                         iresult[i] = NPY_NAT
                     else:
-                        raise TypeError("{typ} is not convertible to datetime"
-                                        .format(typ=type(val)))
+                        raise TypeError(f"{type(val)} is not convertible to datetime")
 
             except OutOfBoundsDatetime:
                 if is_coerce:

diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
@@ -69,8 +69,8 @@ def _check_minp(win, minp, N, floor=None) -> int:
     if not util.is_integer_object(minp):
         raise ValueError("min_periods must be an integer")
     if minp > win:
-        raise ValueError("min_periods (minp) must be <= "
-                         "window (win)".format(minp=minp, win=win))
+        raise ValueError(f"min_periods (minp) must be <= "
+                         f"window (win)")
     elif minp > N:
         minp = N + 1
     elif minp < 0:
@@ -1476,13 +1476,12 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win,
         int ret = 0
 
     if quantile <= 0.0 or quantile >= 1.0:
-        raise ValueError("quantile value {0} not in [0, 1]".format(quantile))
+        raise ValueError(f"quantile value {quantile} not in [0, 1]")
 
     try:
         interpolation_type = interpolation_types[interpolation]
     except KeyError:
-        raise ValueError("Interpolation '{interp}' is not supported"
-                         .format(interp=interpolation))
+        raise ValueError(f"Interpolation '{interpolation}' is not supported")
 
     # we use the Fixed/Variable Indexer here as the
     # actual skiplist ops outweigh any window computation costs
@@ -2077,8 +2076,8 @@ def ewmcov(float64_t[:] input_x, float64_t[:] input_y,
         bint is_observation
 
     if <Py_ssize_t>len(input_y) != N:
-        raise ValueError("arrays are of different lengths "
-                         "({N} and {len_y})".format(N=N, len_y=len(input_y)))
+        raise ValueError(f"arrays are of different lengths "
+                         f"({N} and {len(input_y)})")
 
     output = np.empty(N, dtype=float)
     if N == 0: