Allow truncation of hurdle distributions

ricardoV94 · ricardoV94 · commit ac73b55cacc4 · 2025-06-04T16:28:46.000+02:00
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
@@ -39,7 +39,6 @@
 )
 from pymc.distributions.shape_utils import _change_dist_size, change_dist_size, rv_size_is_none
 from pymc.distributions.transforms import _default_transform
-from pymc.distributions.truncated import Truncated
 from pymc.logprob.abstract import _logcdf, _logcdf_helper, _logprob
 from pymc.logprob.basic import logp
 from pymc.logprob.transforms import IntervalTransform
@@ -831,6 +830,8 @@ def _create(cls, *, name, nonzero_p, nonzero_dist, max_n_steps=10_000, **kwargs)
 
         Note: this is invalid for discrete nonzero distributions with mass below 0, as we simply truncate[lower=1].
         """
+        from pymc.distributions.truncated import Truncated
+
         dtype = nonzero_dist.dtype
 
         if dtype.startswith("int"):
diff --git a/pymc/distributions/truncated.py b/pymc/distributions/truncated.py
@@ -35,6 +35,7 @@
     _support_point,
     support_point,
 )
+from pymc.distributions.mixture import _HurdleRV
 from pymc.distributions.shape_utils import (
     _change_dist_size,
     change_dist_size,
@@ -79,7 +80,9 @@ def rv_op(cls, dist, lower, upper, max_n_steps, *, size=None):
 
         # Try to use specialized Op
         try:
-            return _truncated(dist.owner.op, lower, upper, size, *dist.owner.inputs)
+            return _truncated(
+                dist.owner.op, lower, upper, size, *dist.owner.inputs, max_n_steps=max_n_steps
+            )
         except NotImplementedError:
             pass
 
@@ -222,7 +225,7 @@ def update(self, node: Apply):
 
 
 @singledispatch
-def _truncated(op: Op, lower, upper, size, *params):
+def _truncated(op: Op, lower, upper, size, *params, max_n_steps: int):
     """Return the truncated equivalent of another `RandomVariable`."""
     raise NotImplementedError(f"{op} does not have an equivalent truncated version implemented")
 
@@ -307,13 +310,14 @@ def dist(cls, dist, lower=None, upper=None, max_n_steps: int = 10_000, **kwargs)
                 f"Truncation dist must be a distribution created via the `.dist()` API, got {type(dist)}"
             )
 
-        if (
-            isinstance(dist.owner.op, SymbolicRandomVariable)
-            and "[size]" not in dist.owner.op.extended_signature
+        if isinstance(dist.owner.op, SymbolicRandomVariable) and not (
+            "[size]" in dist.owner.op.extended_signature
+            # If there's a specific _truncated dispatch for this RV, that's also fine
+            or _truncated.dispatch(type(dist.owner.op)) is not _truncated.dispatch(object)
         ):
             # Truncation needs to wrap the underlying dist, but not all SymbolicRandomVariables encapsulate the whole
             # random graph and as such we don't know where the actual inputs begin. This happens mostly for
-            # distribution factories like `Censored` and `Mixture` which would have a very complex signature if they
+            # distribution factories like `Censored` which would have a very complex signature if they
             # encapsulated the random components instead of taking them as inputs like they do now.
             # SymbolicRandomVariables that encapsulate the whole random graph can be identified for having a size parameter.
             raise NotImplementedError(f"Truncation not implemented for {dist.owner.op}")
@@ -462,7 +466,7 @@ def truncated_logcdf(op: TruncatedRV, value, *inputs, **kwargs):
 
 
 @_truncated.register(NormalRV)
-def _truncated_normal(op, lower, upper, size, rng, old_size, mu, sigma):
+def _truncated_normal(op, lower, upper, size, rng, old_size, mu, sigma, *, max_n_steps):
     return TruncatedNormal.dist(
         mu=mu,
         sigma=sigma,
@@ -472,3 +476,32 @@ def _truncated_normal(op, lower, upper, size, rng, old_size, mu, sigma):
         size=size,
         dtype=op.dtype,
     )
+
+
+@_truncated.register(_HurdleRV)
+def _truncated_hurdle(
+    op: _HurdleRV, lower, upper, size, rng, weights, zero_dist, dist, max_n_steps
+):
+    # If the DiracDelta value is outside the truncation bounds, this is effectively a non-hurdle distribution
+    # We achieve this by adjusting the weights of the DiracDelta component, so it's never selected in that case
+    psi = weights[..., 1]
+
+    checks = np.array(True)
+    if lower is not None:
+        checks &= lower <= 0
+    if upper is not None:
+        checks &= 0 <= upper
+
+    adjusted_psi = pt.where(
+        checks,
+        psi,
+        1,
+    )
+    adjusted_weights = pt.stack([1 - adjusted_psi, adjusted_psi], axis=-1)
+
+    # The only remaining step is to truncate the other distribution
+    truncated_dist = Truncated.dist(dist, lower=lower, upper=upper, max_n_steps=max_n_steps)
+
+    # Creating a hurdle with the adjusted weights and the truncated distribution
+    # Should be equivalent to truncating the original hurdle distribution
+    return op.rv_op(adjusted_weights, zero_dist, truncated_dist, size=size)
diff --git a/tests/distributions/test_mixture.py b/tests/distributions/test_mixture.py
@@ -49,6 +49,7 @@
     Poisson,
     StickBreakingWeights,
     Triangular,
+    Truncated,
     Uniform,
     ZeroInflatedBinomial,
     ZeroInflatedNegativeBinomial,
@@ -1710,3 +1711,30 @@ def logp_fn(value, psi, mu, sigma):
                 return np.log(psi) + st.lognorm.logpdf(value, sigma, 0, np.exp(mu))
 
         check_logp(HurdleLogNormal, Rplus, {"psi": Unit, "mu": R, "sigma": Rplusbig}, logp_fn)
+
+    @pytest.mark.parametrize("lower", (-np.inf, 0, None, 1))
+    def test_truncated_hurdle_lognormal(self, lower):
+        psi = 0.7
+        x = HurdleLogNormal.dist(psi=psi, mu=3, sigma=1)
+        x_trunc = Truncated.dist(x, lower=lower, upper=30, size=(1000,))
+
+        x_trunc_draws = draw(x_trunc)
+        assert ((x_trunc_draws >= (lower or -np.inf)) & (x_trunc_draws <= 30)).all()
+
+        x_trunc = Truncated.dist(x, lower=lower, upper=30, size=(4,))
+        x_trunc_logp = logp(x_trunc, [0, 5.5, 30.0, 30.1]).eval()
+        effective_psi = psi if (lower or -np.inf) <= 0 else 1
+        np.testing.assert_allclose(
+            x_trunc_logp,
+            [
+                np.log(1 - effective_psi),  # 0 is not in the support of the distribution
+                *(
+                    np.log(effective_psi)
+                    + logp(
+                        Truncated.dist(LogNormal.dist(mu=3, sigma=1), lower=lower, upper=30),
+                        [5.5, 30.0],
+                    )
+                ).eval(),
+                -np.inf,  # 30.1 is outside the upper bound
+            ],
+        )