Add configs and adapt exporter for RSL-RL distillation (isaac-sim#2182)

ClemensSchwarke · Mayankm96 · web-flow · commit 9fbe0b810229 · 2025-04-10T12:11:37.000+02:00
# Description This PR adds configuration classes for Student-Teacher Distillation and adapts the policy exporters to be able to export student policies. ## Type of change - Non-breaking change ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there --------- Signed-off-by: Mayank Mittal <12863862+Mayankm96@users.noreply.github.com> Co-authored-by: Mayank Mittal <mittalma@leggedrobotics.com>
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -46,6 +46,7 @@ Guidelines for modifications:
 * Calvin Yu
 * Cheng-Rong Lai
 * Chenyu Yang
+* Clemens Schwarke
 * CY (Chien-Ying) Chen
 * David Yang
 * Dorsa Rohani
diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py
@@ -5,21 +5,6 @@
 
 """Script to play a checkpoint if an RL agent from RSL-RL."""
 
-import platform
-from importlib.metadata import version
-
-if version("rsl-rl-lib") != "2.3.0":
-    if platform.system() == "Windows":
-        cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"]
-    else:
-        cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"]
-    print(
-        f"Please install the correct version of RSL-RL.\nExisting version is: '{version('rsl-rl-lib')}'"
-        " and required version is: '2.3.0'.\nTo install the correct version, run:"
-        f"\n\n\t{' '.join(cmd)}\n"
-    )
-    exit(1)
-
 """Launch Isaac Sim Simulator first."""
 
 import argparse
@@ -133,11 +118,20 @@ def main():
     # obtain the trained policy for inference
     policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)
 
+    # extract the neural network module
+    # we do this in a try-except to maintain backwards compatibility.
+    try:
+        # version 2.3 onwards
+        policy_nn = ppo_runner.alg.policy
+    except AttributeError:
+        # version 2.2 and below
+        policy_nn = ppo_runner.alg.actor_critic
+
     # export policy to onnx/jit
     export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
-    export_policy_as_jit(ppo_runner.alg.policy, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt")
+    export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt")
     export_policy_as_onnx(
-        ppo_runner.alg.policy, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"
+        policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"
     )
 
     dt = env.unwrapped.step_dt
diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py
@@ -5,21 +5,6 @@
 
 """Script to train RL agent with RSL-RL."""
 
-import platform
-from importlib.metadata import version
-
-if version("rsl-rl-lib") != "2.3.0":
-    if platform.system() == "Windows":
-        cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"]
-    else:
-        cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"]
-    print(
-        f"Please install the correct version of RSL-RL.\nExisting version is: '{version('rsl-rl-lib')}'"
-        " and required version is: '2.3.0'.\nTo install the correct version, run:"
-        f"\n\n\t{' '.join(cmd)}\n"
-    )
-    exit(1)
-
 """Launch Isaac Sim Simulator first."""
 
 import argparse
@@ -60,6 +45,28 @@
 app_launcher = AppLauncher(args_cli)
 simulation_app = app_launcher.app
 
+"""Check for minimum supported RSL-RL version."""
+
+import importlib.metadata as metadata
+import platform
+
+from packaging import version
+
+# for distributed training, check minimum supported rsl-rl version
+RSL_RL_VERSION = "2.3.1"
+installed_version = metadata.version("rsl-rl-lib")
+if args_cli.distributed and version.parse(installed_version) < version.parse(RSL_RL_VERSION):
+    if platform.system() == "Windows":
+        cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
+    else:
+        cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
+    print(
+        f"Please install the correct version of RSL-RL.\nExisting version is: '{installed_version}'"
+        f" and required version is: '{RSL_RL_VERSION}'.\nTo install the correct version, run:"
+        f"\n\n\t{' '.join(cmd)}\n"
+    )
+    exit(1)
+
 """Rest everything follows."""
 
 import gymnasium as gym
@@ -138,7 +145,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         env = multi_agent_to_single_agent(env)
 
     # save resume path before creating a new log_dir
-    if agent_cfg.resume:
+    if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation":
         resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
 
     # wrap for video recording
@@ -161,7 +168,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     # write git state to logs
     runner.add_git_repo_to_log(__file__)
     # load the checkpoint
-    if agent_cfg.resume:
+    if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation":
         print(f"[INFO]: Loading model checkpoint from: {resume_path}")
         # load previously trained model
         runner.load(resume_path)
diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.1.3"
+version = "0.1.4"
 
 # Description
 title = "Isaac Lab RL"
diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst
@@ -1,6 +1,16 @@
 Changelog
 ---------
 
+0.1.4 (2025-04-10)
+~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added configurations for distillation implementation in RSL-RL.
+* Added configuration for recurrent actor-critic in RSL-RL.
+
+
 0.1.3 (2025-03-31)
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/__init__.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/__init__.py
@@ -15,8 +15,9 @@
 
 """
 
+from .distillation_cfg import *
 from .exporter import export_policy_as_jit, export_policy_as_onnx
-from .rl_cfg import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+from .rl_cfg import *
 from .rnd_cfg import RslRlRndCfg
 from .symmetry_cfg import RslRlSymmetryCfg
 from .vecenv_wrapper import RslRlVecEnvWrapper
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+from dataclasses import MISSING
+from typing import Literal
+
+from isaaclab.utils import configclass
+
+#########################
+# Policy configurations #
+#########################
+
+
+@configclass
+class RslRlDistillationStudentTeacherCfg:
+    """Configuration for the distillation student-teacher networks."""
+
+    class_name: str = "StudentTeacher"
+    """The policy class name. Default is StudentTeacher."""
+
+    init_noise_std: float = MISSING
+    """The initial noise standard deviation for the student policy."""
+
+    noise_std_type: Literal["scalar", "log"] = "scalar"
+    """The type of noise standard deviation for the policy. Default is scalar."""
+
+    student_hidden_dims: list[int] = MISSING
+    """The hidden dimensions of the student network."""
+
+    teacher_hidden_dims: list[int] = MISSING
+    """The hidden dimensions of the teacher network."""
+
+    activation: str = MISSING
+    """The activation function for the student and teacher networks."""
+
+
+@configclass
+class RslRlDistillationStudentTeacherRecurrentCfg(RslRlDistillationStudentTeacherCfg):
+    """Configuration for the distillation student-teacher recurrent networks."""
+
+    class_name: str = "StudentTeacherRecurrent"
+    """The policy class name. Default is StudentTeacherRecurrent."""
+
+    rnn_type: str = MISSING
+    """The type of the RNN network. Either "lstm" or "gru"."""
+
+    rnn_hidden_dim: int = MISSING
+    """The hidden dimension of the RNN network."""
+
+    rnn_num_layers: int = MISSING
+    """The number of layers of the RNN network."""
+
+    teacher_recurrent: bool = MISSING
+    """Whether the teacher network is recurrent too."""
+
+
+############################
+# Algorithm configurations #
+############################
+
+
+@configclass
+class RslRlDistillationAlgorithmCfg:
+    """Configuration for the distillation algorithm."""
+
+    class_name: str = "Distillation"
+    """The algorithm class name. Default is Distillation."""
+
+    num_learning_epochs: int = MISSING
+    """The number of updates performed with each sample."""
+
+    learning_rate: float = MISSING
+    """The learning rate for the student policy."""
+
+    gradient_length: int = MISSING
+    """The number of environment steps the gradient flows back."""
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/exporter.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/exporter.py
@@ -8,34 +8,34 @@
 import torch
 
 
-def export_policy_as_jit(actor_critic: object, normalizer: object | None, path: str, filename="policy.pt"):
+def export_policy_as_jit(policy: object, normalizer: object | None, path: str, filename="policy.pt"):
     """Export policy into a Torch JIT file.
 
     Args:
-        actor_critic: The actor-critic torch module.
+        policy: The policy torch module.
         normalizer: The empirical normalizer module. If None, Identity is used.
         path: The path to the saving directory.
         filename: The name of exported JIT file. Defaults to "policy.pt".
     """
-    policy_exporter = _TorchPolicyExporter(actor_critic, normalizer)
+    policy_exporter = _TorchPolicyExporter(policy, normalizer)
     policy_exporter.export(path, filename)
 
 
 def export_policy_as_onnx(
-    actor_critic: object, path: str, normalizer: object | None = None, filename="policy.onnx", verbose=False
+    policy: object, path: str, normalizer: object | None = None, filename="policy.onnx", verbose=False
 ):
     """Export policy into a Torch ONNX file.
 
     Args:
-        actor_critic: The actor-critic torch module.
+        policy: The policy torch module.
         normalizer: The empirical normalizer module. If None, Identity is used.
         path: The path to the saving directory.
         filename: The name of exported ONNX file. Defaults to "policy.onnx".
         verbose: Whether to print the model summary. Defaults to False.
     """
     if not os.path.exists(path):
         os.makedirs(path, exist_ok=True)
-    policy_exporter = _OnnxPolicyExporter(actor_critic, normalizer, verbose)
+    policy_exporter = _OnnxPolicyExporter(policy, normalizer, verbose)
     policy_exporter.export(path, filename)
 
 
@@ -47,12 +47,22 @@ def export_policy_as_onnx(
 class _TorchPolicyExporter(torch.nn.Module):
     """Exporter of actor-critic into JIT file."""
 
-    def __init__(self, actor_critic, normalizer=None):
+    def __init__(self, policy, normalizer=None):
         super().__init__()
-        self.actor = copy.deepcopy(actor_critic.actor)
-        self.is_recurrent = actor_critic.is_recurrent
+        self.is_recurrent = policy.is_recurrent
+        # copy policy parameters
+        if hasattr(policy, "actor"):
+            self.actor = copy.deepcopy(policy.actor)
+            if self.is_recurrent:
+                self.rnn = copy.deepcopy(policy.memory_a.rnn)
+        elif hasattr(policy, "student"):
+            self.actor = copy.deepcopy(policy.student)
+            if self.is_recurrent:
+                self.rnn = copy.deepcopy(policy.memory_s.rnn)
+        else:
+            raise ValueError("Policy does not have an actor/student module.")
+        # set up recurrent network
         if self.is_recurrent:
-            self.rnn = copy.deepcopy(actor_critic.memory_a.rnn)
             self.rnn.cpu()
             self.register_buffer("hidden_state", torch.zeros(self.rnn.num_layers, 1, self.rnn.hidden_size))
             self.register_buffer("cell_state", torch.zeros(self.rnn.num_layers, 1, self.rnn.hidden_size))
@@ -94,13 +104,23 @@ def export(self, path, filename):
 class _OnnxPolicyExporter(torch.nn.Module):
     """Exporter of actor-critic into ONNX file."""
 
-    def __init__(self, actor_critic, normalizer=None, verbose=False):
+    def __init__(self, policy, normalizer=None, verbose=False):
         super().__init__()
         self.verbose = verbose
-        self.actor = copy.deepcopy(actor_critic.actor)
-        self.is_recurrent = actor_critic.is_recurrent
+        self.is_recurrent = policy.is_recurrent
+        # copy policy parameters
+        if hasattr(policy, "actor"):
+            self.actor = copy.deepcopy(policy.actor)
+            if self.is_recurrent:
+                self.rnn = copy.deepcopy(policy.memory_a.rnn)
+        elif hasattr(policy, "student"):
+            self.actor = copy.deepcopy(policy.student)
+            if self.is_recurrent:
+                self.rnn = copy.deepcopy(policy.memory_s.rnn)
+        else:
+            raise ValueError("Policy does not have an actor/student module.")
+        # set up recurrent network
         if self.is_recurrent:
-            self.rnn = copy.deepcopy(actor_critic.memory_a.rnn)
             self.rnn.cpu()
             self.forward = self.forward_lstm
         # copy normalizer if exists
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py