diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 8695e196c4f38..1fb543de46652 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -83,6 +83,7 @@ Other enhancements - Improved deprecation message for offset aliases (:issue:`60820`) - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) +- Support :class:`DataFrame`, :class:`Series` and :class:`Index` plugin accessors via entry points (:issue:`29076`) - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) diff --git a/pandas/__init__.py b/pandas/__init__.py index 7d6dd7b7c1a88..ecb69548445d5 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -346,3 +346,8 @@ "unique", "wide_to_long", ] + +from .core.accessor import accessor_entry_point_loader + +accessor_entry_point_loader() +del accessor_entry_point_loader diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 0331c26c805b6..3894aedab23c4 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -25,6 +25,11 @@ from pandas import Index from pandas.core.generic import NDFrame +from importlib.metadata import ( + EntryPoints, + entry_points, +) + class DirNamesMixin: _accessors: set[str] = set() @@ -393,3 +398,114 @@ def register_index_accessor(name: str) -> Callable[[TypeT], TypeT]: from pandas import Index return _register_accessor(name, Index) + + +def accessor_entry_point_loader() -> None: + """ + Load and register pandas accessors declared via entry points. + + This function scans the 'pandas..accessor' entry point group for + accessors registered by third-party packages. Each entry point is expected + to follow the format: + + # setup.py + entry_points={ + 'pandas.DataFrame.accessor': [ = :, ... ], + 'pandas.Series.accessor': [ = :, ... ], + 'pandas.Index.accessor': [ = :, ... ], + } + + OR for pyproject.toml file: + + # pyproject.toml + [project.entry-points."pandas.DataFrame.accessor"] + = ":" + + [project.entry-points."pandas.Series.accessor"] + = ":" + + [project.entry-points."pandas.Index.accessor"] + = ":" + + For more information about entrypoints: + https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/#plugin-entry-points + + + For each valid entry point: + - The accessor class is dynamically imported and registered using + the appropriate registration decorator function + (e.g. register_dataframe_accessor). + - If two packages declare the same accessor name, a warning is issued, + and only the first one is used. + + Notes + ----- + - This function is only intended to be called at pandas startup. + - For more information about accessors read their documentation. + + Raises + ------ + UserWarning + If two accessors share the same name, the second one is ignored. + + Examples + -------- + # setup.py + entry_points={ + 'pandas.DataFrame.accessor': [ + 'myplugin = myplugin.accessor:MyPluginAccessor', + ], + } + # END setup.py + + - That entrypoint would allow the following code: + + import pandas as pd + + df = pd.DataFrame({"A": [1, 2, 3]}) + df.myplugin.do_something() # Calls MyPluginAccessor.do_something() + """ + + ACCESSOR_REGISTRY_FUNCTIONS: dict[str, Callable] = { + "pandas.DataFrame.accessor": register_dataframe_accessor, + "pandas.Series.accessor": register_series_accessor, + "pandas.Index.accessor": register_index_accessor, + } + + pd_objects_entrypoints: list[str] = ACCESSOR_REGISTRY_FUNCTIONS.keys() + + for pd_obj_entrypoint in pd_objects_entrypoints: + accessors: EntryPoints = entry_points(group=pd_obj_entrypoint) + accessor_package_dict: dict[str, str] = {} + + for new_accessor in accessors: + dist = getattr(new_accessor, "dist", None) + new_pkg_name = getattr(dist, "name", "Unknown") if dist else "Unknown" + + # Verifies duplicated accessor names + if new_accessor.name in accessor_package_dict: + loaded_pkg_name: str = accessor_package_dict.get( + new_accessor.name, "Unknown" + ) + + warnings.warn( + "Warning: you have two accessors with the same name:" + f" '{new_accessor.name}' has already been registered" + f" by the package '{new_pkg_name}'. The " + f"'{new_accessor.name}' provided by the package " + f"'{loaded_pkg_name}' is not being used. " + "Uninstall the package you don't want" + "to use if you want to get rid of this warning.\n", + UserWarning, + stacklevel=2, + ) + + accessor_package_dict.update({new_accessor.name: new_pkg_name}) + + def make_accessor(ep): + return lambda self, ep=ep: ep.load()(self) + + register_fn = ACCESSOR_REGISTRY_FUNCTIONS.get(pd_obj_entrypoint) + + if register_fn is not None: + register_fn(new_accessor.name)(make_accessor(new_accessor)) diff --git a/pandas/tests/test_plugins_entrypoint_loader.py b/pandas/tests/test_plugins_entrypoint_loader.py new file mode 100644 index 0000000000000..5699cf773b20b --- /dev/null +++ b/pandas/tests/test_plugins_entrypoint_loader.py @@ -0,0 +1,438 @@ +from typing import Any + +import pandas as pd +import pandas._testing as tm +from pandas.core.accessor import accessor_entry_point_loader + +PD_OBJECTS_ENTRYPOINTS = [ + "pandas.DataFrame.accessor", + "pandas.Series.accessor", + "pandas.Index.accessor", +] + + +def create_mock_entry_points(entry_points: dict[str, list[tuple[str, Any, str]]]): + """ + Auxiliary function to create mock entry points for testing accessor loading. + + Parameters: + ----------- + entry_points : list of tuple + List of (name, accessor_class, dist_name) where: + - name: str, the name of the accessor + - accessor_class: class, the accessor class to be returned by load() + - dist_name: str, the name of the distribution (package) + + Returns: + -------- + function + A mock_entry_points function that returns the mocked entry points. + """ + + class MockDistribution: + def __init__(self, name): + self.name = name + + class MockEntryPoint: + def __init__(self, name, accessor_class, dist_name): + self.name = name + self._accessor_class = accessor_class + self.dist = MockDistribution(dist_name) + + def load(self): + return self._accessor_class + + # Create a dictionary of MockEntryPoint instances + group_map: dict[str, list[MockEntryPoint]] = {g: [] for g in PD_OBJECTS_ENTRYPOINTS} + + for ep_group, ep_properties in entry_points.items(): + for name, accessor_class, dist_name in ep_properties: + group_map[ep_group].append(MockEntryPoint(name, accessor_class, dist_name)) + + def mock_entry_points(*, group): + return group_map.get(group, []) + + return mock_entry_points + + +def test_no_accessors(monkeypatch): + # No entry points + mock_entry_points = create_mock_entry_points( + { + "pandas.DataFrame.accessor": [], + "pandas.Series.accessor": [], + "pandas.Index.accessor": [], + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + accessor_entry_point_loader() + + +def test_load_dataframe_accessors(monkeypatch): + class TestAccessor: + def __init__(self, df): + self._df = df + + def test_method(self): + return "success" + + mock_entry_points = create_mock_entry_points( + { + "pandas.DataFrame.accessor": [ + ( + "test_accessor", + TestAccessor, + "TestPackage", + ) + ], + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + accessor_entry_point_loader() + + # Create DataFrame and verify that the accessor was registered + df = pd.DataFrame({"a": [1, 2, 3]}) + assert hasattr(df, "test_accessor") + assert df.test_accessor.test_method() == "success" + + +def test_load_series_accessors(monkeypatch): + class TestAccessor: + def __init__(self, ser): + self._ser = ser + + def test_method(self): + return "success" + + mock_entry_points = create_mock_entry_points( + { + "pandas.Series.accessor": [("test_accessor", TestAccessor, "TestPackage")], + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + accessor_entry_point_loader() + + # Create Series and verify that the accessor was registered + s = pd.Series([1, 2, 3]) + assert hasattr(s, "test_accessor") + assert s.test_accessor.test_method() == "success" + + +def test_load_index_accessors(monkeypatch): + class TestAccessor: + def __init__(self, idx): + self._idx = idx + + def test_method(self): + return "success" + + mock_entry_points = create_mock_entry_points( + { + "pandas.Index.accessor": [("test_accessor", TestAccessor, "TestPackage")], + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + accessor_entry_point_loader() + + # Create Index and verify that the accessor was registered + idx = pd.Index([1, 2, 3]) + assert hasattr(idx, "test_accessor") + assert idx.test_accessor.test_method() == "success" + + +def test_duplicate_dataframe_accessor_names(monkeypatch): + class Accessor1: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor1" + + class Accessor2: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor2" + + mock_entry_points = create_mock_entry_points( + { + "pandas.DataFrame.accessor": [ + ("duplicate_accessor", Accessor1, "TestPackage1"), + ("duplicate_accessor", Accessor2, "TestPackage2"), + ] + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Check that the UserWarning is raised + with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record: + accessor_entry_point_loader() + + messages = [str(w.message) for w in record] + assert any("you have two accessors with the same name:" in msg for msg in messages) + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "duplicate_accessor") + assert df.duplicate_accessor.which() == "Accessor2" # Last registered accessor + + +def test_duplicate_series_accessor_names(monkeypatch): + class Accessor1: + def __init__(self, series): + self._series = series + + def which(self): + return "Accessor1" + + class Accessor2: + def __init__(self, series): + self._series = series + + def which(self): + return "Accessor2" + + mock_entry_points = create_mock_entry_points( + { + "pandas.Series.accessor": [ + ("duplicate_accessor", Accessor1, "TestPackage1"), + ("duplicate_accessor", Accessor2, "TestPackage2"), + ] + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Check that the UserWarning is raised + with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record: + accessor_entry_point_loader() + + messages = [str(w.message) for w in record] + assert any("you have two accessors with the same name:" in msg for msg in messages) + + s = pd.Series([1, 2, 3]) + assert hasattr(s, "duplicate_accessor") + assert s.duplicate_accessor.which() == "Accessor2" # Last registered accessor + + +def test_duplicate_index_accessor_names(monkeypatch): + class Accessor1: + def __init__(self, idx): + self._idx = idx + + def which(self): + return "Accessor1" + + class Accessor2: + def __init__(self, idx): + self._idx = idx + + def which(self): + return "Accessor2" + + mock_entry_points = create_mock_entry_points( + { + "pandas.Index.accessor": [ + ("duplicate_accessor", Accessor1, "TestPackage1"), + ("duplicate_accessor", Accessor2, "TestPackage2"), + ] + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Check that the UserWarning is raised + with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record: + accessor_entry_point_loader() + + messages = [str(w.message) for w in record] + assert any("you have two accessors with the same name:" in msg for msg in messages) + + idx = pd.Index([1, 2, 3]) + assert hasattr(idx, "duplicate_accessor") + assert idx.duplicate_accessor.which() == "Accessor2" # Last registered accessor + + +def test_wrong_obj_accessor(monkeypatch): + class Accessor1: + def __init__(self, obj): + self._obj = obj + + def which(self): + return "Accessor1" + + mock_entry_points = create_mock_entry_points( + { + "pandas.DataFrame.accessor": [ + ("accessor", Accessor1, "TestPackage1"), + ] + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + accessor_entry_point_loader() + + # Check that the accessor is not registered for Index + idx = pd.Index([1, 2, 3]) + assert not hasattr(idx, "accessor"), "Accessor should not be registered for Index" + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "accessor") + assert df.accessor.which() == "Accessor1" + + +def test_unique_accessor_names(monkeypatch): + class Accessor1: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor1" + + class Accessor2: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor2" + + mock_entry_points = create_mock_entry_points( + { + "pandas.DataFrame.accessor": [ + ("accessor1", Accessor1, "Package1"), + ("accessor2", Accessor2, "Package2"), + ] + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Check that no UserWarning is raised + with tm.assert_produces_warning(None, check_stacklevel=False): + accessor_entry_point_loader() + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "accessor1"), "Accessor1 not registered" + assert hasattr(df, "accessor2"), "Accessor2 not registered" + + assert df.accessor1.which() == "Accessor1", "Accessor1 method incorrect" + assert df.accessor2.which() == "Accessor2", "Accessor2 method incorrect" + + +def test_duplicate_and_unique_accessor_names(monkeypatch): + class Accessor1: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor1" + + class Accessor2: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor2" + + class Accessor3: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor3" + + mock_entry_points = create_mock_entry_points( + { + "pandas.DataFrame.accessor": [ + ("duplicate_accessor", Accessor1, "Package1"), + ("duplicate_accessor", Accessor2, "Package2"), + ("unique_accessor", Accessor3, "Package3"), + ] + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Capture warnings + with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record: + accessor_entry_point_loader() + + messages = [str(w.message) for w in record] + + # Filter warnings for the specific message about duplicate accessors + duplicate_package_warnings = [ + msg + for msg in messages + if "you have two accessors with the same name: 'duplicate_accessor'" in msg + ] + + # Assert one warning about duplicate accessors + assert len(duplicate_package_warnings) == 1, ( + f"Expected exactly one warning about duplicate accessors, " + f"got {len(duplicate_package_warnings)}: {duplicate_package_warnings}" + ) + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "duplicate_accessor"), "duplicate_accessor not registered" + assert hasattr(df, "unique_accessor"), "unique_accessor not registered" + + assert df.duplicate_accessor.which() == "Accessor2", ( + "duplicate_accessor should use Accessor2" + ) + assert df.unique_accessor.which() == "Accessor3", "unique_accessor method incorrect" + + +def test_duplicate_names_different_pandas_objs(monkeypatch): + class Accessor1: + def __init__(self, obj): + self._obj = obj + + def which(self): + return "Accessor1" + + class Accessor2: + def __init__(self, obj): + self._obj = obj + + def which(self): + return "Accessor2" + + mock_entry_points = create_mock_entry_points( + { + "pandas.DataFrame.accessor": [ + ("acc1", Accessor1, "Package1"), + ("acc2", Accessor2, "Package2"), + ], + "pandas.Series.accessor": [ + ("acc1", Accessor1, "Package1"), + ("acc2", Accessor2, "Package2"), + ], + "pandas.Index.accessor": [ + ("acc1", Accessor1, "Package1"), + ("acc2", Accessor2, "Package2"), + ], + } + ) + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Check that no UserWarning is raised + with tm.assert_produces_warning(None, check_stacklevel=False): + accessor_entry_point_loader() + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "acc1") + assert df.acc1.which() == "Accessor1" + assert hasattr(df, "acc2") + assert df.acc2.which() == "Accessor2" + + s = pd.Series([1, 2, 3]) + assert hasattr(s, "acc1") + assert s.acc1.which() == "Accessor1" + assert hasattr(s, "acc2") + assert s.acc2.which() == "Accessor2" + + idx = pd.Index([1, 2, 3]) + assert hasattr(idx, "acc1") + assert idx.acc1.which() == "Accessor1" + assert hasattr(idx, "acc2") + assert idx.acc2.which() == "Accessor2"