|
1 | 1 | # pylint: disable=E1103 |
2 | 2 |
|
3 | | -import pytest |
4 | | -from datetime import datetime, date |
5 | | -from numpy.random import randn |
6 | | -from numpy import nan |
7 | | -import numpy as np |
8 | 3 | import random |
9 | 4 | import re |
| 5 | +from collections import OrderedDict |
| 6 | +from datetime import date, datetime |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import pytest |
| 10 | +from numpy import nan |
| 11 | +from numpy.random import randn |
10 | 12 |
|
11 | 13 | import pandas as pd |
| 14 | +import pandas.util.testing as tm |
| 15 | +from pandas import (Categorical, CategoricalIndex, DataFrame, DatetimeIndex, |
| 16 | + Float64Index, Index, Int64Index, MultiIndex, RangeIndex, |
| 17 | + Series, UInt64Index) |
| 18 | +from pandas.api.types import CategoricalDtype as CDT |
12 | 19 | from pandas.compat import lrange, lzip |
| 20 | +from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype |
| 21 | +from pandas.core.dtypes.dtypes import CategoricalDtype |
13 | 22 | from pandas.core.reshape.concat import concat |
14 | | -from pandas.core.reshape.merge import merge, MergeError |
| 23 | +from pandas.core.reshape.merge import MergeError, merge |
15 | 24 | from pandas.util.testing import assert_frame_equal, assert_series_equal |
16 | | -from pandas.core.dtypes.dtypes import CategoricalDtype |
17 | | -from pandas.core.dtypes.common import ( |
18 | | - is_categorical_dtype, |
19 | | - is_object_dtype, |
20 | | -) |
21 | | -from pandas import DataFrame, Index, MultiIndex, Series, Categorical |
22 | | -import pandas.util.testing as tm |
23 | | -from pandas.api.types import CategoricalDtype as CDT |
24 | 25 |
|
25 | 26 | N = 50 |
26 | 27 | NGROUPS = 8 |
@@ -813,7 +814,7 @@ def test_validation(self): |
813 | 814 |
|
814 | 815 | # Dups on right |
815 | 816 | right_w_dups = right.append(pd.DataFrame({'a': ['e'], 'c': ['moo']}, |
816 | | - index=[4])) |
| 817 | + index=[4])) |
817 | 818 | merge(left, right_w_dups, left_index=True, right_index=True, |
818 | 819 | validate='one_to_many') |
819 | 820 |
|
@@ -1388,17 +1389,24 @@ def test_merge_datetime_index(self, klass): |
1388 | 1389 | if klass is not None: |
1389 | 1390 | on_vector = klass(on_vector) |
1390 | 1391 |
|
1391 | | - expected = DataFrame({"a": [1, 2, 3]}) |
1392 | | - |
1393 | | - if klass == np.asarray: |
1394 | | - # The join key is added for ndarray. |
1395 | | - expected["key_1"] = [2016, 2017, 2018] |
| 1392 | + expected = DataFrame( |
| 1393 | + OrderedDict([ |
| 1394 | + ("a", [1, 2, 3]), |
| 1395 | + ("key_1", [2016, 2017, 2018]), |
| 1396 | + ]) |
| 1397 | + ) |
1396 | 1398 |
|
1397 | 1399 | result = df.merge(df, on=["a", on_vector], how="inner") |
1398 | 1400 | tm.assert_frame_equal(result, expected) |
1399 | 1401 |
|
1400 | | - expected = DataFrame({"a_x": [1, 2, 3], |
1401 | | - "a_y": [1, 2, 3]}) |
| 1402 | + expected = DataFrame( |
| 1403 | + OrderedDict([ |
| 1404 | + ("key_0", [2016, 2017, 2018]), |
| 1405 | + ("a_x", [1, 2, 3]), |
| 1406 | + ("a_y", [1, 2, 3]), |
| 1407 | + ]) |
| 1408 | + ) |
| 1409 | + |
1402 | 1410 | result = df.merge(df, on=[df.index.year], how="inner") |
1403 | 1411 | tm.assert_frame_equal(result, expected) |
1404 | 1412 |
|
@@ -1427,7 +1435,7 @@ def test_different(self, right_vals): |
1427 | 1435 | # We allow merging on object and categorical cols and cast |
1428 | 1436 | # categorical cols to object |
1429 | 1437 | if (is_categorical_dtype(right['A'].dtype) or |
1430 | | - is_object_dtype(right['A'].dtype)): |
| 1438 | + is_object_dtype(right['A'].dtype)): |
1431 | 1439 | result = pd.merge(left, right, on='A') |
1432 | 1440 | assert is_object_dtype(result.A.dtype) |
1433 | 1441 |
|
@@ -1826,3 +1834,26 @@ def test_merge_on_indexes(self, left_df, right_df, how, sort, expected): |
1826 | 1834 | how=how, |
1827 | 1835 | sort=sort) |
1828 | 1836 | tm.assert_frame_equal(result, expected) |
| 1837 | + |
| 1838 | + |
| 1839 | +@pytest.mark.parametrize( |
| 1840 | + 'index', [ |
| 1841 | + CategoricalIndex(['A', 'B'], categories=['A', 'B'], name='index_col'), |
| 1842 | + Float64Index([1.0, 2.0], name='index_col'), |
| 1843 | + Int64Index([1, 2], name='index_col'), |
| 1844 | + UInt64Index([1, 2], name='index_col'), |
| 1845 | + RangeIndex(start=0, stop=2, name='index_col'), |
| 1846 | + DatetimeIndex(["2018-01-01", "2018-01-02"], name='index_col'), |
| 1847 | + ], ids=lambda x: type(x).__name__) |
| 1848 | +def test_merge_index_types(index): |
| 1849 | + # gh-20777 |
| 1850 | + # assert key access is consistent across index types |
| 1851 | + left = DataFrame({"left_data": [1, 2]}, index=index) |
| 1852 | + right = DataFrame({"right_data": [1.0, 2.0]}, index=index) |
| 1853 | + |
| 1854 | + result = left.merge(right, on=['index_col']) |
| 1855 | + |
| 1856 | + expected = DataFrame( |
| 1857 | + OrderedDict([('left_data', [1, 2]), ('right_data', [1.0, 2.0])]), |
| 1858 | + index=index) |
| 1859 | + assert_frame_equal(result, expected) |
0 commit comments