pytorch
diff --git a/‎captum/_utils/gradient.py
Lines changed: 21 additions & 0 deletions b/‎captum/_utils/gradient.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎captum/attr/_core/deep_lift.py
Lines changed: 9 additions & 3 deletions b/‎captum/attr/_core/deep_lift.py
Lines changed: 9 additions & 3 deletions
diff --git a/‎captum/attr/_core/guided_backprop_deconvnet.py
Lines changed: 8 additions & 3 deletions b/‎captum/attr/_core/guided_backprop_deconvnet.py
Lines changed: 8 additions & 3 deletions
diff --git a/‎captum/attr/_core/guided_grad_cam.py
Lines changed: 5 additions & 1 deletion b/‎captum/attr/_core/guided_grad_cam.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎captum/attr/_core/layer/layer_deep_lift.py
Lines changed: 8 additions & 2 deletions b/‎captum/attr/_core/layer/layer_deep_lift.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎captum/attr/_core/layer/layer_lrp.py
Lines changed: 5 additions & 0 deletions b/‎captum/attr/_core/layer/layer_lrp.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎captum/attr/_core/lrp.py
Lines changed: 7 additions & 3 deletions b/‎captum/attr/_core/lrp.py
Lines changed: 7 additions & 3 deletions
diff --git a/‎captum/attr/_core/neuron/neuron_deep_lift.py
Lines changed: 32 additions & 2 deletions b/‎captum/attr/_core/neuron/neuron_deep_lift.py
Lines changed: 32 additions & 2 deletions
diff --git a/‎captum/attr/_core/neuron/neuron_guided_backprop_deconvnet.py
Lines changed: 33 additions & 2 deletions b/‎captum/attr/_core/neuron/neuron_guided_backprop_deconvnet.py
Lines changed: 33 additions & 2 deletions
diff --git a/‎tests/attr/layer/test_layer_deeplift.py
Lines changed: 6 additions & 6 deletions b/‎tests/attr/layer/test_layer_deeplift.py
Lines changed: 6 additions & 6 deletions
@@ -84,6 +84,27 @@ def undo_gradient_requirements(
             input.requires_grad_(False)
 
 
+def register_backward_hook(
+    module: Module, hook: Callable, attr_obj: Any
+) -> torch.utils.hooks.RemovableHandle:
+    # Special case for supporting output attributions for neuron methods
+    # This can be removed after deprecation of neuron output attributions
+    # for NeuronDeepLift, NeuronDeconvolution, and NeuronGuidedBackprop
+    # in v0.6.0
+    if (
+        hasattr(attr_obj, "skip_new_hook_layer")
+        and attr_obj.skip_new_hook_layer == module
+    ):
+        return module.register_backward_hook(hook)
+
+    try:
+        # Only supported for torch >= 1.8
+        return module.register_full_backward_hook(hook)
+    except AttributeError:
+        # Fallback for previous versions of PyTorch
+        return module.register_backward_hook(hook)
+
+
 def compute_gradients(
     forward_fn: Callable,
     inputs: Union[Tensor, Tuple[Tensor, ...]],
 
@@ -21,6 +21,7 @@
 )
 from captum._utils.gradient import (
     apply_gradient_requirements,
+    register_backward_hook,
     undo_gradient_requirements,
 )
 from captum._utils.typing import (
@@ -112,7 +113,10 @@ def __init__(
         r"""
         Args:
 
-            model (nn.Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                        contain any in-place nonlinear submodules; these are not
+                        supported by the register_full_backward_hook PyTorch API
+                        starting from PyTorch v1.8.
             multiply_by_inputs (bool, optional): Indicates whether to factor
                         model inputs' multiplier in the final attribution scores.
                         In the literature this is also known as local vs global
@@ -542,7 +546,7 @@ def _register_hooks(
         # adds forward hook to leaf nodes that are non-linear
         forward_handle = module.register_forward_hook(self._forward_hook)
         pre_forward_handle = module.register_forward_pre_hook(self._forward_pre_hook)
-        backward_handle = module.register_backward_hook(self._backward_hook)
+        backward_handle = register_backward_hook(module, self._backward_hook, self)
         self.forward_handles.append(forward_handle)
         self.forward_handles.append(pre_forward_handle)
         self.backward_handles.append(backward_handle)
@@ -622,7 +626,9 @@ def __init__(self, model: Module, multiply_by_inputs: bool = True) -> None:
         r"""
         Args:
 
-            model (nn.Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                        contain any in-place nonlinear submodules; these are not
+                        supported by the register_full_backward_hook PyTorch API.
             multiply_by_inputs (bool, optional): Indicates whether to factor
                         model inputs' multiplier in the final attribution scores.
                         In the literature this is also known as local vs global
 
@@ -7,6 +7,7 @@
 from captum._utils.common import _format_input, _format_output, _is_tuple
 from captum._utils.gradient import (
     apply_gradient_requirements,
+    register_backward_hook,
     undo_gradient_requirements,
 )
 from captum._utils.typing import TargetType, TensorOrTupleOfTensorsGeneric
@@ -74,7 +75,7 @@ def attribute(
 
     def _register_hooks(self, module: Module):
         if isinstance(module, torch.nn.ReLU):
-            hook = module.register_backward_hook(self._backward_hook)
+            hook = register_backward_hook(module, self._backward_hook, self)
             self.backward_hooks.append(hook)
 
     def _backward_hook(
@@ -116,7 +117,9 @@ def __init__(self, model: Module) -> None:
         r"""
         Args:
 
-            model (nn.Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                        contain any in-place ReLU submodules; these are not
+                        supported by the register_full_backward_hook PyTorch API.
         """
         ModifiedReluGradientAttribution.__init__(
             self, model, use_relu_grad_output=False
@@ -227,7 +230,9 @@ def __init__(self, model: Module) -> None:
         r"""
         Args:
 
-            model (nn.Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                        contain any in-place ReLU submodules; these are not
+                        supported by the register_full_backward_hook PyTorch API.
         """
         ModifiedReluGradientAttribution.__init__(self, model, use_relu_grad_output=True)
 
 
@@ -51,7 +51,10 @@ def __init__(
         r"""
         Args:
 
-            model (nn.Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                        contain any in-place ReLU submodules; these are not
+                        supported by the register_full_backward_hook PyTorch API
+                        starting from PyTorch v1.8.
             layer (torch.nn.Module): Layer for which GradCAM attributions are computed.
                           Currently, only layers with a single tensor output are
                           supported.
@@ -194,6 +197,7 @@ def attribute(
                 "outputs is not supported."
             )
             grad_cam_attr = grad_cam_attr[0]
+
         guided_backprop_attr = self.guided_backprop.attribute.__wrapped__(
             self.guided_backprop,  # self
             inputs=inputs,
 
@@ -69,7 +69,10 @@ def __init__(
         r"""
         Args:
 
-            model (torch.nn.Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                        contain any in-place nonlinear submodules; these are not
+                        supported by the register_full_backward_hook PyTorch API
+                        starting from PyTorch v1.8.
             layer (torch.nn.Module): Layer for which attributions are computed.
                         The size and dimensionality of the attributions
                         corresponds to the size and dimensionality of the layer's
@@ -397,7 +400,10 @@ def __init__(
         r"""
         Args:
 
-            model (torch.nn.Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                        contain any in-place nonlinear submodules; these are not
+                        supported by the register_full_backward_hook PyTorch API
+                        starting from PyTorch v1.8.
             layer (torch.nn.Module): Layer for which attributions are computed.
                         The size and dimensionality of the attributions
                         corresponds to the size and dimensionality of the layer's
 
@@ -42,6 +42,11 @@ def __init__(self, model: Module, layer: ModuleOrModuleList) -> None:
                         any modification of it. Custom rules for a given layer need to
                         be defined as attribute
                         `module.rule` and need to be of type PropagationRule.
+                        Model cannot contain any in-place nonlinear submodules;
+                        these are not supported by the register_full_backward_hook
+                        PyTorch API starting from PyTorch v1.8.
+
+
             layer (torch.nn.Module or list(torch.nn.Module)): Layer or layers
                           for which attributions are computed.
                           The size and dimensionality of the attributions
 
@@ -8,6 +8,7 @@
 from captum._utils.common import _format_input, _format_output, _is_tuple, _run_forward
 from captum._utils.gradient import (
     apply_gradient_requirements,
+    register_backward_hook,
     undo_gradient_requirements,
 )
 from captum._utils.typing import Literal, TargetType, TensorOrTupleOfTensorsGeneric
@@ -43,7 +44,10 @@ def __init__(self, model: Module) -> None:
                 it. Custom rules for a given layer need to be defined as attribute
                 `module.rule` and need to be of type PropagationRule. If no rule is
                 specified for a layer, a pre-defined default rule for the module type
-                is used.
+                is used. Model cannot contain any in-place nonlinear submodules;
+                these are not supported by the register_full_backward_hook
+                PyTorch API starting from PyTorch v1.8.
+
         """
         GradientAttribution.__init__(self, model)
         self.model = model
@@ -305,8 +309,8 @@ def _check_rules(self) -> None:
     def _register_forward_hooks(self) -> None:
         for layer in self.layers:
             if type(layer) in SUPPORTED_NON_LINEAR_LAYERS:
-                backward_handle = layer.register_backward_hook(
-                    PropagationRule.backward_hook_activation
+                backward_handle = register_backward_hook(
+                    layer, PropagationRule.backward_hook_activation, self
                 )
                 self.backward_handles.append(backward_handle)
             else:
 
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+import warnings
 from typing import Any, Callable, Tuple, Union, cast
 
 from captum._utils.gradient import construct_neuron_grad_fn
@@ -45,7 +46,10 @@ def __init__(
         r"""
         Args:
 
-            model (torch.nn.Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                        contain any in-place nonlinear submodules; these are not
+                        supported by the register_full_backward_hook PyTorch API
+                        starting from PyTorch v1.8.
             layer (torch.nn.Module): Layer for which neuron attributions are computed.
                         Attributions for a particular neuron for the input or output
                         of this layer are computed using the argument neuron_selector
@@ -227,6 +231,17 @@ def attribute(
             >>> attribution = dl.attribute(input, (4,1,2))
         """
         dl = DeepLift(cast(Module, self.forward_func), self.multiplies_by_inputs)
+        if not attribute_to_neuron_input:
+            warnings.warn(
+                "Attribution to neuron output is no longer supported for"
+                " NeuronDeepLift and will be deprecated in Captum"
+                " 0.6.0 due to changes in PyTorch's full backward hook"
+                " behavior. To obtain attributions for a neuron's"
+                " output, please attribute with respect to the next layer's input"
+            )
+            dl.skip_new_hook_layer = self.layer  # type: ignore
+        else:
+            dl.skip_new_hook_layer = None  # type: ignore
         dl.gradient_func = construct_neuron_grad_fn(
             self.layer,
             neuron_selector,
@@ -274,7 +289,10 @@ def __init__(
         r"""
         Args:
 
-            model (torch.nn.Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                        contain any in-place nonlinear submodules; these are not
+                        supported by the register_full_backward_hook PyTorch API
+                        starting from PyTorch v1.8.
             layer (torch.nn.Module): Layer for which neuron attributions are computed.
                         Attributions for a particular neuron for the input or output
                         of this layer are computed using the argument neuron_selector
@@ -448,7 +466,19 @@ def attribute(
             >>> # index (4,1,2).
             >>> attribution = dl.attribute(input, (4,1,2))
         """
+
         dl = DeepLiftShap(cast(Module, self.forward_func), self.multiplies_by_inputs)
+        if not attribute_to_neuron_input:
+            warnings.warn(
+                "Attribution to neuron output is no longer supported for"
+                " NeuronDeepLiftShap and will be deprecated in Captum"
+                " 0.6.0 due to changes in PyTorch's full backward hook"
+                " behavior. To obtain attributions for a neuron's"
+                " output, please attribute with respect to the next layer's input"
+            )
+            dl.skip_new_hook_layer = self.layer  # type: ignore
+        else:
+            dl.skip_new_hook_layer = None  # type: ignore
         dl.gradient_func = construct_neuron_grad_fn(
             self.layer,
             neuron_selector,
 
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+import warnings
 from typing import Any, Callable, List, Tuple, Union
 
 from captum._utils.gradient import construct_neuron_grad_fn
@@ -34,7 +35,10 @@ def __init__(
         r"""
         Args:
 
-            model (Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                          contain any in-place ReLU submodules; these are not
+                          supported by the register_full_backward_hook PyTorch API
+                          starting from PyTorch v1.8.
             layer (Module): Layer for which attributions are computed.
                           Output size of attribute matches this layer's input or
                           output dimensions, depending on whether we attribute to
@@ -159,6 +163,18 @@ def attribute(
             >>> # index (4,1,2).
             >>> attribution = neuron_deconv.attribute(input, (4,1,2))
         """
+        if not attribute_to_neuron_input:
+            warnings.warn(
+                "Attribution to neuron output is no longer supported for"
+                " NeuronDeconvolution and will be deprecated in Captum"
+                " 0.6.0 due to changes in PyTorch's full backward hook"
+                " behavior. To obtain attributions for a neuron's"
+                " output, please attribute with respect to the next layer's input"
+            )
+            self.deconv.skip_new_hook_layer = self.layer  # type: ignore
+        else:
+            self.deconv.skip_new_hook_layer = None  # type: ignore
+
         self.deconv.gradient_func = construct_neuron_grad_fn(
             self.layer, neuron_selector, self.device_ids, attribute_to_neuron_input
         )
@@ -191,7 +207,10 @@ def __init__(
         r"""
         Args:
 
-            model (Module):  The reference to PyTorch model instance.
+            model (nn.Module):  The reference to PyTorch model instance. Model cannot
+                          contain any in-place ReLU submodules; these are not
+                          supported by the register_full_backward_hook PyTorch API
+                          starting from PyTorch v1.8.
             layer (Module): Layer for which neuron attributions are computed.
                           Attributions for a particular neuron in the output of
                           this layer are computed using the argument neuron_selector
@@ -313,6 +332,18 @@ def attribute(
             >>> # index (4,1,2).
             >>> attribution = neuron_gb.attribute(input, (4,1,2))
         """
+        if not attribute_to_neuron_input:
+            warnings.warn(
+                "Attribution to neuron output is no longer supported for"
+                " NeuronGuidedBackprop and will be deprecated in Captum"
+                " 0.6.0 due to changes in PyTorch's full backward hook"
+                " behavior. To obtain attributions for a neuron's"
+                " output, please attribute with respect to the next layer's input"
+            )
+            self.guided_backprop.skip_new_hook_layer = self.layer  # type: ignore
+        else:
+            self.guided_backprop.skip_new_hook_layer = None  # type: ignore
+
         self.guided_backprop.gradient_func = construct_neuron_grad_fn(
             self.layer, neuron_selector, self.device_ids, attribute_to_neuron_input
         )
 
@@ -25,7 +25,7 @@
 
 class TestDeepLift(BaseTest):
     def test_relu_layer_deeplift(self) -> None:
-        model = ReLULinearModel(inplace=True)
+        model = ReLULinearModel(inplace=False)
         inputs, baselines = _create_inps_and_base_for_deeplift_neuron_layer_testing()
 
         layer_dl = LayerDeepLift(model, model.relu)
@@ -39,7 +39,7 @@ def test_relu_layer_deeplift(self) -> None:
         assert_delta(self, delta)
 
     def test_relu_layer_deeplift_wo_mutliplying_by_inputs(self) -> None:
-        model = ReLULinearModel(inplace=True)
+        model = ReLULinearModel(inplace=False)
         inputs, baselines = _create_inps_and_base_for_deeplift_neuron_layer_testing()
 
         layer_dl = LayerDeepLift(model, model.relu, multiply_by_inputs=False)
@@ -83,7 +83,7 @@ def test_relu_layer_deeplift_add_args(self) -> None:
         assert_delta(self, delta)
 
     def test_linear_layer_deeplift(self) -> None:
-        model = ReLULinearModel(inplace=True)
+        model = ReLULinearModel(inplace=False)
         inputs, baselines = _create_inps_and_base_for_deeplift_neuron_layer_testing()
 
         layer_dl = LayerDeepLift(model, model.l3)
@@ -103,7 +103,7 @@ def test_relu_deeplift_with_custom_attr_func(self) -> None:
         self._relu_custom_attr_func_assert(attr_method, inputs, baselines, [[2.0]])
 
     def test_inplace_maxpool_relu_with_custom_attr_func(self) -> None:
-        model = BasicModel_MaxPool_ReLU(inplace=True)
+        model = BasicModel_MaxPool_ReLU(inplace=False)
         inp = torch.tensor([[[1.0, 2.0, -4.0], [-3.0, -2.0, -1.0]]])
         dl = LayerDeepLift(model, model.maxpool)
 
@@ -116,7 +116,7 @@ def custom_att_func(mult, inp, baseline):
         dl.attribute(inp, custom_attribution_func=custom_att_func)
 
     def test_linear_layer_deeplift_batch(self) -> None:
-        model = ReLULinearModel(inplace=True)
+        model = ReLULinearModel(inplace=False)
         _, baselines = _create_inps_and_base_for_deeplift_neuron_layer_testing()
         x1 = torch.tensor(
             [[-10.0, 1.0, -5.0], [-10.0, 1.0, -5.0], [-10.0, 1.0, -5.0]],
@@ -197,7 +197,7 @@ def test_relu_layer_deepliftshap_multiple_output(self) -> None:
         assert_delta(self, delta)
 
     def test_linear_layer_deepliftshap(self) -> None:
-        model = ReLULinearModel(inplace=True)
+        model = ReLULinearModel(inplace=False)
         (
             inputs,
             baselines,