facebookresearch · lbrabson · Sep 1, 2025 · Oct 4, 2025 · Nov 11, 2025 · Nov 17, 2025
diff --git a/src/fairchem/core/components/benchmark/deformation_reducer.py b/src/fairchem/core/components/benchmark/deformation_reducer.py
@@ -0,0 +1,79 @@
+"""
+Copyright (c) Meta Platforms, Inc. and affiliates.
+
+This source code is licensed under the MIT license found in the
+LICENSE file in the root directory of this source tree.
+"""
+
+from __future__ import annotations
+
+from typing import TypeVar
+
+import numpy as np
+import pandas as pd
+
+from fairchem.core.components.benchmark.benchmark_reducer import JsonDFReducer
+
+R = TypeVar("R")
+M = TypeVar("M")
+
+
+class DeformationReducer(JsonDFReducer):
+    """Reducer for deformation benchmark results."""
+
+    def __init__(
+        self,
+        benchmark_name: str,
+        target_ads_key: str | None = None,
+        target_int_key: str | None = None,
+        target_mof_deform_key: str | None = None,
+        index_name: str | None = None,
+    ):
+        """
+        Initialize DeformationReducer.
+
+        Args:
+            benchmark_name: Name of the benchmark, used for file naming
+            target_ads_key: Column name for target adsorption energies
+            target_int_key: Column name for target interaction energies
+            target_mof_deform_key: Column name for target MOF deformation energies
+            index_name: Optional index column name for output DataFrame
+        """
+        self.index_name = index_name
+        self.benchmark_name = benchmark_name
+        self.target_ads_key = target_ads_key
+        self.target_int_key = target_int_key
+        self.target_mof_deform_key = target_mof_deform_key
+
+    def compute_metrics(self, results: pd.DataFrame, run_name: str) -> pd.DataFrame:
+        """
+        Compute mean absolute error metrics for E_ads, E_int, and E_mof_deform.
+
+        Args:
+            results: DataFrame containing prediction results and target columns
+            run_name: Name of the current run, used as index in the metrics DataFrame
+
+        Returns:
+            DataFrame containing computed metrics with run_name as index
+        """
+        ads_pred_col = results["E_ads"].tolist()
+        int_pred_col = results["E_int"].tolist()
+        mof_deform_pred_col = results["E_mof_deform"].tolist()
+
+        ads_target = results[self.target_ads_key].tolist()
+        int_target = results[self.target_int_key].tolist()
+        mof_deform_target = results[self.target_mof_deform_key].tolist()
+
+        metrics = {
+            "E_ads,mae": np.mean(
+                [abs(x - y) for x, y in zip(ads_pred_col, ads_target)]
+            ),
+            "E_int,mae": np.mean(
+                [abs(x - y) for x, y in zip(int_pred_col, int_target)]
+            ),
+            "E_mof_deform,mae": np.mean(
+                [abs(x - y) for x, y in zip(mof_deform_pred_col, mof_deform_target)]
+            ),
+        }
+
+        return pd.DataFrame([metrics], index=[run_name])
diff --git a/src/fairchem/core/components/calculate/_single/deform_relax_functions.py b/src/fairchem/core/components/calculate/_single/deform_relax_functions.py
@@ -0,0 +1,59 @@
+"""
+Copyright (c) Meta Platforms, Inc. and affiliates.
+
+This source code is licensed under the MIT license found in the
+LICENSE file in the root directory of this source tree.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from ase.constraints import FixSymmetry
+from ase.optimize import FIRE
+
+if TYPE_CHECKING:
+    from ase import Atoms
+    from ase.filters import Filter
+    from ase.optimize import Optimizer
+
+
+def relax_atoms_w_maxstep(
+    atoms: Atoms,
+    steps: int = 500,
+    fmax: float = 0.02,
+    maxstep: float | None = None,
+    optimizer_cls: type[Optimizer] | None = None,
+    fix_symmetry: bool = False,
+    cell_filter_cls: type[Filter] | None = None,
+) -> Atoms:
+    """Run a relaxation on ASE atoms and return the relaxed structure.
+
+    Args:
+        atoms: ASE Atoms object with a calculator
+        steps: Maximum number of relaxation steps
+        fmax: Force convergence threshold
+        maxstep: Maximum atomic displacement per iteration
+        optimizer_cls: ASE optimizer class. Defaults to FIRE
+        fix_symmetry: Whether to fix structure symmetry during relaxation
+        cell_filter_cls: Optional ASE filter to modify the atoms before optimization
+
+    Returns:
+        Atoms: Relaxed ASE atoms object
+    """
+
+    if fix_symmetry:
+        atoms.set_constraint(FixSymmetry(atoms))
+
+    if cell_filter_cls is not None:
+        _atoms = cell_filter_cls(atoms)
+    else:
+        _atoms = atoms
+
+    optimizer_cls = FIRE if optimizer_cls is None else optimizer_cls
+    opt = optimizer_cls(_atoms, maxstep=maxstep, logfile=None)
+    opt.run(fmax=fmax, steps=steps)
+
+    # Update atoms.info with optimization metadata
+    atoms.info |= {"opt_nsteps": opt.nsteps, "opt_converged": opt.converged()}
+    return atoms
diff --git a/src/fairchem/core/components/calculate/_single/deformation_runner.py b/src/fairchem/core/components/calculate/_single/deformation_runner.py
@@ -0,0 +1,243 @@
+"""
+Copyright (c) Meta Platforms, Inc. and affiliates.
+
+This source code is licensed under the MIT license found in the
+LICENSE file in the root directory of this source tree.
+"""
+
+from __future__ import annotations
+
+import os
+import traceback
+from typing import TYPE_CHECKING, Any, ClassVar
+
+import numpy as np
+import pandas as pd
+from ase.filters import FrechetCellFilter
+from ase.optimize import BFGS
+from pymatgen.io.ase import MSONAtoms
+from tqdm import tqdm
+
+from adjust_adsorbate import get_large_adsorbate, shift_adsorbate
+from deform_relax_functions import relax_atoms_w_maxstep
+from fairchem.core.components.calculate import CalculateRunner
+from fairchem.core.components.calculate.recipes.utils import (
+    get_property_dict_from_atoms,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from ase.calculators.calculator import Calculator
+    from fairchem.core.datasets import AseDBDataset
+
+
+class DeformationRunner(CalculateRunner):
+    """Relax a sequence of several structures/molecules.
+
+    This class handles the relaxation of atomic structures using a specified calculator,
+    processes the input data in chunks, and saves the results.
+    """
+
+    result_glob_pattern: ClassVar[str] = "relaxation_*-*.json.gz"
+
+    def __init__(
+        self,
+        calculator: Calculator,
+        input_data: AseDBDataset,
+        calculate_properties: Sequence[str],
+        relax_empty_cell: bool = True,
+        save_relaxed_atoms: bool = True,
+        normalize_properties_by: dict[str, str] | None = None,
+        save_target_properties: Sequence[str] | None = None,
+        **relax_kwargs,
+    ):
+        """Initialize the DeformationRunner.
+
+        Args:
+            calculator: ASE calculator to use for energy and force calculations
+            input_data: Dataset containing atomic structures to process
+            relax_empty_cell: Whether to relax the unit cell as part of the empty MOF relaxation.
+                Disable for calculators which do not implement stresses
+            calculate_properties: Sequence of properties to calculate after relaxation
+            save_relaxed_atoms: Whether to save the relaxed structures in the results
+            normalize_properties_by: Dictionary mapping property names to natoms or a key in
+                atoms.info to normalize by
+            save_target_properties: Sequence of target property names to save in the results file.
+                These properties need to be available using atoms.get_properties or present in the
+                atoms.info dictionary. This is useful if running a benchmark where errors will be
+                computed after running relaxations
+            relax_kwargs: Keyword arguments passed to relax. Defaults for deformation benchmark:
+                atoms: ASE atoms with a calculator
+                steps: max number of relaxation steps --> default: 1000
+                fmax: force convergence threshold --> default: 0.05
+                optimizer_cls: ASE optimizer --> default: BFGS
+                fix_symmetry: fix structure symmetry in relaxation --> default: False
+                cell_filter_cls: filter for relaxing empty MOF unit cell --> default: FrechetCellFilter
+        """
+        filter_cls = FrechetCellFilter if relax_empty_cell else None
+
+        DEFAULT_RELAX_KWARGS = {
+            "steps": 1000,  # max number of relaxation steps
+            "fmax": 0.05,  # force convergence threshold
+            "maxstep": 0.05,  # max atomic displacement per iteration
+            "optimizer_cls": BFGS,  # ASE optimizer class
+            "fix_symmetry": False,  # keep symmetry
+            "cell_filter_cls": filter_cls,  # default filter
+        }
+
+        self._calculate_properties = calculate_properties
+        self._save_relaxed_atoms = save_relaxed_atoms
+        self._normalize_properties_by = normalize_properties_by or {}
+        self._save_target_properties = save_target_properties or ()
+        self._relax_kwargs = DEFAULT_RELAX_KWARGS.copy()
+        self._relax_kwargs.update(relax_kwargs)
+
+        super().__init__(calculator=calculator, input_data=input_data)
+
+    def calculate(self, job_num: int = 0, num_jobs: int = 1) -> list[dict[str, Any]]:
+        """Perform relaxation calculations on a subset of structures.
+
+        Splits the input data into chunks and processes the chunk corresponding to job_num.
+
+        Args:
+            job_num: Current job number in array job. Defaults to 0.
+            num_jobs: Total number of jobs in array. Defaults to 1.
+
+        Returns:
+            List of dictionaries containing calculation results
+        """
+        all_results = []
+        chunk_indices = np.array_split(range(len(self.input_data)), num_jobs)[job_num]
+
+        for i in tqdm(chunk_indices, desc="Running relaxations"):
+            atoms = self.input_data.get_atoms(i)
+
+            results = {
+                "sid": atoms.info.get("sid", i),
+                "natoms": len(atoms),
+            }
+
+            # Add target properties if requested
+            target_properties = get_property_dict_from_atoms(
+                self._save_target_properties, atoms, self._normalize_properties_by
+            )
+            results.update(
+                {f"{key}_target": target_properties[key] for key in target_properties}
+            )
+            if self._save_relaxed_atoms:
+                results["atoms_initial"] = MSONAtoms(atoms).as_dict()
+
+            try:
+                # Separate MOF from adsorbate
+                frame = atoms.copy()
+                adsorbate = atoms.copy()
+                tags = atoms.get_tags()
+                for j in reversed(range(len(tags))):
+                    if tags[j] != 0:  # adsorbate atom found
+                        del frame[j]
+                    else:
+                        del adsorbate[j]
+
+                # Relax empty MOF
+                frame.calc = self.calculator
+                frame = relax_atoms_w_maxstep(frame, **self._relax_kwargs)
+                E_empty = frame.get_potential_energy()
+                self._relax_kwargs["cell_filter_cls"] = None  # remove cell filter
+
+                # Update adsorbate positions and combine
+                adsorbate = shift_adsorbate(adsorbate, frame.cell)
+                combo = frame + adsorbate
+
+                # Relax combined structure
+                combo.calc = self.calculator
+                combo = relax_atoms_w_maxstep(combo, **self._relax_kwargs)
+                E_combo = combo.get_potential_energy()
+
+                # Single-point frame
+                frame = combo[:-3]
+                frame.calc = self.calculator
+                E_frame = frame.get_potential_energy()
+
+                # Frame re-relax
+                frame = relax_atoms_w_maxstep(frame, **self._relax_kwargs)
+                E_frame_relax = frame.get_potential_energy()
+
+                # Single-point adsorbate
+                adsorbate = combo[-3:]
+                adsorbate.calc = self.calculator
+                E_adsorbate = adsorbate.get_potential_energy()
+
+                # Gas-phase adsorbate
+                adsorbate_big = get_large_adsorbate(adsorbate)
+                adsorbate_big.calc = self.calculator
+                adsorbate_big = relax_atoms_w_maxstep(
+                    adsorbate_big, **self._relax_kwargs
+                )
+                E_adsorbate_gas = adsorbate_big.get_potential_energy()
+
+                # Compute energies
+                E_ref = min(E_empty, E_frame_relax)
+                E_ads = E_combo - E_ref - E_adsorbate_gas
+                E_int = E_combo - E_frame - E_adsorbate
+                E_mof_deform = E_frame - E_ref
+
+                results.update(
+                    get_property_dict_from_atoms(
+                        self._calculate_properties, atoms, self._normalize_properties_by
+                    )
+                )
+                results.update(
+                    {
+                        "E_ads": E_ads,
+                        "E_int": E_int,
+                        "E_mof_deform": E_mof_deform,
+                        "E_ads_dft": atoms.info["E_ads_dft"],
+                        "E_int_dft": atoms.info["E_int_dft"],
+                        "E_mof_deform_dft": atoms.info["E_mof_deform_dft"],
+                        "errors": "",
+                        "traceback": "",
+                    }
+                )
+
+            except Exception as ex:  # TODO: narrow exception type
+                results.update(dict.fromkeys(self._calculate_properties, np.nan))
+                results.update(
+                    {
+                        "errors": f"{ex!r}",
+                        "traceback": traceback.format_exc(),
+                        "opt_nsteps": np.nan,
+                        "opt_converged": np.nan,
+                    }
+                )
+
+            if self._save_relaxed_atoms:
+                results["atoms"] = MSONAtoms(combo).as_dict()
+
+            all_results.append(results)
+
+        return all_results
+
+    def write_results(
+        self,
+        results: list[dict[str, Any]],
+        results_dir: str,
+        job_num: int = 0,
+        num_jobs: int = 1,
+    ) -> None:
+        """Write calculation results to a compressed JSON file.
+
+        Args:
+            results: List of dictionaries containing elastic properties
+            results_dir: Directory path where results will be saved
+            job_num: Index of the current job
+            num_jobs: Total number of jobs
+        """
+        results_df = pd.DataFrame(results)
+        results_df.to_json(
+            os.path.join(results_dir, f"relaxation_{num_jobs}-{job_num}.json.gz")
+        )
+
+    def save_state(self, checkpoint_location: str, is_preemption: bool = False) -> bool:
+        """Stub for checkpoint saving (not implemented)."""
+        return True
diff --git a/src/fairchem/data/odac/deform_dataset.db b/src/fairchem/data/odac/deform_dataset.db