Skip to content

TST: dtype related test failures on 32bit system #36579

Closed
@fangchenli

Description

@fangchenli
Member
  1. tests/test_algos.py:TestHashTable:test_lookup_nan TST: 32bit dtype compat #36579 #36584
  2. tests/test_algos.py:TestHashTable:test_lookup_overflow TST: 32bit dtype compat #36579 #36584
  3. tests/indexes/test_base.py:test_get_indexer_non_unique_wrong_dtype TST: 32bit dtype compat #36579 #36584
  4. tests/indexes/period/test_indexing.py:TestGetIndexer:test_get_indexer_non_unique TST: 32bit dtype compat #36579 #36584
E       AssertionError: numpy array are different
E       
E       Attribute "dtype" are different
E       [left]:  int32
E       [right]: int64

Activity

added
Needs TriageIssue that has not been reviewed by a pandas team member
on Sep 23, 2020
added a commit that references this issue on Sep 24, 2020
added
32bit32-bit systems
and removed
Needs TriageIssue that has not been reviewed by a pandas team member
on Sep 30, 2020
fangchenli

fangchenli commented on Oct 25, 2020

@fangchenli
MemberAuthor

There are several new failures reported by #35898.

________________________ test_stat_method[var-kwargs0] _________________________

pandasmethname = 'var', kwargs = {'ddof': 0}

    @pytest.mark.parametrize(
        "pandasmethname, kwargs",
        [
            ("var", {"ddof": 0}),
            ("var", {"ddof": 1}),
            ("kurtosis", {}),
            ("skew", {}),
            ("sem", {}),
        ],
    )
    def test_stat_method(pandasmethname, kwargs):
        s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64")
        pandasmeth = getattr(s, pandasmethname)
        result = pandasmeth(**kwargs)
        s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64")
        pandasmeth = getattr(s2, pandasmethname)
        expected = pandasmeth(**kwargs)
>       assert expected == result
E       assert 0.029166666666666664 == 0.02916666666666666

pandas/tests/arrays/floating/test_function.py:91: AssertionError
________________________ test_stat_method[var-kwargs1] _________________________

pandasmethname = 'var', kwargs = {'ddof': 1}

    @pytest.mark.parametrize(
        "pandasmethname, kwargs",
        [
            ("var", {"ddof": 0}),
            ("var", {"ddof": 1}),
            ("kurtosis", {}),
            ("skew", {}),
            ("sem", {}),
        ],
    )
    def test_stat_method(pandasmethname, kwargs):
        s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64")
        pandasmeth = getattr(s, pandasmethname)
        result = pandasmeth(**kwargs)
        s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64")
        pandasmeth = getattr(s2, pandasmethname)
        expected = pandasmeth(**kwargs)
>       assert expected == result
E       assert 0.034999999999999996 == 0.03499999999999999

pandas/tests/arrays/floating/test_function.py:91: AssertionError
______________________ test_stat_method[kurtosis-kwargs2] ______________________

pandasmethname = 'kurtosis', kwargs = {}

    @pytest.mark.parametrize(
        "pandasmethname, kwargs",
        [
            ("var", {"ddof": 0}),
            ("var", {"ddof": 1}),
            ("kurtosis", {}),
            ("skew", {}),
            ("sem", {}),
        ],
    )
    def test_stat_method(pandasmethname, kwargs):
        s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64")
        pandasmeth = getattr(s, pandasmethname)
        result = pandasmeth(**kwargs)
        s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64")
        pandasmeth = getattr(s2, pandasmethname)
        expected = pandasmeth(**kwargs)
>       assert expected == result
E       assert -1.2000000000000002 == -1.1999999999999993

pandas/tests/arrays/floating/test_function.py:91: AssertionError
________________________ test_stat_method[sem-kwargs4] _________________________

pandasmethname = 'sem', kwargs = {}

    @pytest.mark.parametrize(
        "pandasmethname, kwargs",
        [
            ("var", {"ddof": 0}),
            ("var", {"ddof": 1}),
            ("kurtosis", {}),
            ("skew", {}),
            ("sem", {}),
        ],
    )
    def test_stat_method(pandasmethname, kwargs):
        s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64")
        pandasmeth = getattr(s, pandasmethname)
        result = pandasmeth(**kwargs)
        s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64")
        pandasmeth = getattr(s2, pandasmethname)
        expected = pandasmeth(**kwargs)
>       assert expected == result
E       assert 0.07637626158259733 == 0.07637626158259732

pandas/tests/arrays/floating/test_function.py:91: AssertionError
__________________ test_memory_usage[series-with-empty-index] __________________

index_or_series_obj = Series([], Name: a, dtype: float64)

    @pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
    def test_memory_usage(index_or_series_obj):
        obj = index_or_series_obj
    
        res = obj.memory_usage()
        res_deep = obj.memory_usage(deep=True)
    
        is_object = is_object_dtype(obj) or (
            isinstance(obj, Series) and is_object_dtype(obj.index)
        )
        is_categorical = is_categorical_dtype(obj.dtype) or (
            isinstance(obj, Series) and is_categorical_dtype(obj.index.dtype)
        )
    
        if len(obj) == 0:
            expected = 0 if isinstance(obj, Index) else 80
>           assert res_deep == res == expected
E           assert 48 == 80

pandas/tests/base/test_misc.py:132: AssertionError
___________________________ test_groupby_nat_exclude ___________________________

    def test_groupby_nat_exclude():
        # GH 6992
        df = DataFrame(
            {
                "values": np.random.randn(8),
                "dt": [
                    np.nan,
                    pd.Timestamp("2013-01-01"),
                    np.nan,
                    pd.Timestamp("2013-02-01"),
                    np.nan,
                    pd.Timestamp("2013-02-01"),
                    np.nan,
                    pd.Timestamp("2013-01-01"),
                ],
                "str": [np.nan, "a", np.nan, "a", np.nan, "a", np.nan, "b"],
            }
        )
        grouped = df.groupby("dt")
    
        expected = [Index([1, 7]), Index([3, 5])]
        keys = sorted(grouped.groups.keys())
        assert len(keys) == 2
        for k, e in zip(keys, expected):
            # grouped.groups keys are np.datetime64 with system tz
            # not to be affected by tz, only compare values
            tm.assert_index_equal(grouped.groups[k], e)
    
        # confirm obj is not filtered
        tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df)
        assert grouped.ngroups == 2
    
        expected = {
            Timestamp("2013-01-01 00:00:00"): np.array([1, 7], dtype=np.int64),
            Timestamp("2013-02-01 00:00:00"): np.array([3, 5], dtype=np.int64),
        }
    
        for k in grouped.indices:
>           tm.assert_numpy_array_equal(grouped.indices[k], expected[k])
E           AssertionError: numpy array are different
E           
E           Attribute "dtype" are different
E           [left]:  int32
E           [right]: int64

pandas/tests/groupby/test_groupby.py:1276: AssertionError
____________________________ test_info_int_columns _____________________________

    def test_info_int_columns():
        # GH#37245
        df = DataFrame({1: [1, 2], 2: [2, 3]}, index=["A", "B"])
        buf = StringIO()
        df.info(null_counts=True, buf=buf)
        result = buf.getvalue()
        expected = textwrap.dedent(
            """\
            <class 'pandas.core.frame.DataFrame'>
            Index: 2 entries, A to B
            Data columns (total 2 columns):
             #   Column  Non-Null Count  Dtype
            ---  ------  --------------  -----
             0   1       2 non-null      int64
             1   2       2 non-null      int64
            dtypes: int64(2)
            memory usage: 48.0+ bytes
            """
        )
>       assert result == expected
E       assert "<class 'pand...40.0+ bytes\n" == "<class 'pand...48.0+ bytes\n"
E         Skipping 257 identical leading characters in diff, use -v to show
E         - y usage: 48.0+ bytes
E         ?           ^
E         + y usage: 40.0+ bytes
E         ?           ^

pandas/tests/io/formats/test_info.py:497: AssertionError
_____________________ test_float_precision_options[c_high] _____________________

c_parser_only = <pandas.tests.io.parser.conftest.CParserHighMemory object at 0xf15966ec>

    def test_float_precision_options(c_parser_only):
        # GH 17154, 36228
        parser = c_parser_only
        s = "foo\n243.164\n"
        df = parser.read_csv(StringIO(s))
        df2 = parser.read_csv(StringIO(s), float_precision="high")
    
        tm.assert_frame_equal(df, df2)
    
        df3 = parser.read_csv(StringIO(s), float_precision="legacy")
    
>       assert not df.iloc[0, 0] == df3.iloc[0, 0]
E       assert not 243.164 == 243.164

pandas/tests/io/parser/test_c_parser_only.py:720: AssertionError
_____________________ test_float_precision_options[c_low] ______________________

c_parser_only = <pandas.tests.io.parser.conftest.CParserLowMemory object at 0xf159614c>

    def test_float_precision_options(c_parser_only):
        # GH 17154, 36228
        parser = c_parser_only
        s = "foo\n243.164\n"
        df = parser.read_csv(StringIO(s))
        df2 = parser.read_csv(StringIO(s), float_precision="high")
    
        tm.assert_frame_equal(df, df2)
    
        df3 = parser.read_csv(StringIO(s), float_precision="legacy")
    
>       assert not df.iloc[0, 0] == df3.iloc[0, 0]
E       assert not 243.164 == 243.164

pandas/tests/io/parser/test_c_parser_only.py:720: AssertionError
__________________________ TestPivot.test_pivot_empty __________________________

self = <pandas.tests.reshape.test_pivot.TestPivot object at 0xbe10860c>

    def test_pivot_empty(self):
        df = DataFrame(columns=["a", "b", "c"])
>       result = df.pivot("a", "b", "c")

pandas/tests/reshape/test_pivot.py:2109: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
pandas/core/frame.py:6808: in pivot
    return pivot(self, index=index, columns=columns, values=values)
pandas/core/reshape/pivot.py:477: in pivot
    return indexed.unstack(columns)
pandas/core/series.py:3886: in unstack
    return unstack(self, level, fill_value)
pandas/core/reshape/reshape.py:423: in unstack
    obj.index, level=level, constructor=obj._constructor_expanddim
pandas/core/reshape/reshape.py:118: in __init__
    self._make_selectors()
pandas/core/reshape/reshape.py:152: in _make_selectors
    remaining_labels = self.sorted_labels[:-1]
pandas/_libs/properties.pyx:33: in pandas._libs.properties.CachedProperty.__get__
    val = self.func(obj)
pandas/core/reshape/reshape.py:139: in sorted_labels
    indexer, to_sort = self._indexer_and_to_sort
pandas/_libs/properties.pyx:33: in pandas._libs.properties.CachedProperty.__get__
    val = self.func(obj)
pandas/core/reshape/reshape.py:132: in _indexer_and_to_sort
    indexer = libalgos.groupsort_indexer(comp_index, ngroups)[0]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
E   ValueError: Buffer dtype mismatch, expected 'const int64_t' but got 'int'

pandas/_libs/algos.pyx:177: ValueError

fangchenli

fangchenli commented on Oct 25, 2020

@fangchenli
MemberAuthor

Comment on xfailed test: On 32-bit system, the output dtype is also int64. Is this a bug?

____________________________ test_info_int_columns _____________________________

    def test_info_int_columns():
        # GH#37245
        df = DataFrame({1: [1, 2], 2: [2, 3]}, index=["A", "B"])
        buf = StringIO()
        df.info(null_counts=True, buf=buf)
        result = buf.getvalue()
        expected = textwrap.dedent(
            """\
            <class 'pandas.core.frame.DataFrame'>
            Index: 2 entries, A to B
            Data columns (total 2 columns):
             #   Column  Non-Null Count  Dtype
            ---  ------  --------------  -----
             0   1       2 non-null      int64
             1   2       2 non-null      int64
            dtypes: int64(2)
            memory usage: 48.0+ bytes
            """
        )
>       assert result == expected
E       assert "<class 'pand...40.0+ bytes\n" == "<class 'pand...48.0+ bytes\n"
E         Skipping 257 identical leading characters in diff, use -v to show
E         - y usage: 48.0+ bytes
E         ?           ^
E         + y usage: 40.0+ bytes
E         ?           ^

pandas/tests/io/formats/test_info.py:497: AssertionError
added a commit that references this issue on Oct 28, 2020
ivanovmg

ivanovmg commented on Nov 4, 2020

@ivanovmg
Member

One more failure (addressing in #37623).

def test_groupby_nan_included():
        # GH 35646
        data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]}
        df = pd.DataFrame(data)
        grouped = df.groupby("group", dropna=False)
        result = grouped.indices
        dtype = "int64"
        expected = {
            "g1": np.array([0, 2], dtype=dtype),
            "g2": np.array([3], dtype=dtype),
            np.nan: np.array([1, 4], dtype=dtype),
        }
        for result_values, expected_values in zip(result.values(), expected.values()):
>           tm.assert_numpy_array_equal(result_values, expected_values)
E           AssertionError: numpy array are different
E           
E           Attribute "dtype" are different
E           [left]:  int32
E           [right]: int64
added
Testingpandas testing functions or related to the test suite
on Aug 13, 2021
mroeschke

mroeschke commented on Nov 22, 2023

@mroeschke
Member

Looks like the tests in the OP have been all marked now so closing

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Metadata

Assignees

No one assigned

    Labels

    32bit32-bit systemsTestingpandas testing functions or related to the test suite

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

      Development

      No branches or pull requests

        Participants

        @fangchenli@mroeschke@ivanovmg

        Issue actions

          TST: dtype related test failures on 32bit system · Issue #36579 · pandas-dev/pandas