diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 84a522f73e9..41c7708975d 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -46,6 +46,7 @@ Guidelines for modifications: * Calvin Yu * Cheng-Rong Lai * Chenyu Yang +* Clemens Schwarke * CY (Chien-Ying) Chen * David Yang * Dorsa Rohani diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index 08db310d8e7..e3c0121e311 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -5,21 +5,6 @@ """Script to play a checkpoint if an RL agent from RSL-RL.""" -import platform -from importlib.metadata import version - -if version("rsl-rl-lib") != "2.3.0": - if platform.system() == "Windows": - cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"] - else: - cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"] - print( - f"Please install the correct version of RSL-RL.\nExisting version is: '{version('rsl-rl-lib')}'" - " and required version is: '2.3.0'.\nTo install the correct version, run:" - f"\n\n\t{' '.join(cmd)}\n" - ) - exit(1) - """Launch Isaac Sim Simulator first.""" import argparse @@ -133,11 +118,20 @@ def main(): # obtain the trained policy for inference policy = ppo_runner.get_inference_policy(device=env.unwrapped.device) + # extract the neural network module + # we do this in a try-except to maintain backwards compatibility. + try: + # version 2.3 onwards + policy_nn = ppo_runner.alg.policy + except AttributeError: + # version 2.2 and below + policy_nn = ppo_runner.alg.actor_critic + # export policy to onnx/jit export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") - export_policy_as_jit(ppo_runner.alg.policy, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt") + export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt") export_policy_as_onnx( - ppo_runner.alg.policy, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx" + policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx" ) dt = env.unwrapped.step_dt diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index 2f5f4437f5f..a4925d075e6 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -5,21 +5,6 @@ """Script to train RL agent with RSL-RL.""" -import platform -from importlib.metadata import version - -if version("rsl-rl-lib") != "2.3.0": - if platform.system() == "Windows": - cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"] - else: - cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"] - print( - f"Please install the correct version of RSL-RL.\nExisting version is: '{version('rsl-rl-lib')}'" - " and required version is: '2.3.0'.\nTo install the correct version, run:" - f"\n\n\t{' '.join(cmd)}\n" - ) - exit(1) - """Launch Isaac Sim Simulator first.""" import argparse @@ -60,6 +45,28 @@ app_launcher = AppLauncher(args_cli) simulation_app = app_launcher.app +"""Check for minimum supported RSL-RL version.""" + +import importlib.metadata as metadata +import platform + +from packaging import version + +# for distributed training, check minimum supported rsl-rl version +RSL_RL_VERSION = "2.3.1" +installed_version = metadata.version("rsl-rl-lib") +if args_cli.distributed and version.parse(installed_version) < version.parse(RSL_RL_VERSION): + if platform.system() == "Windows": + cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] + else: + cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] + print( + f"Please install the correct version of RSL-RL.\nExisting version is: '{installed_version}'" + f" and required version is: '{RSL_RL_VERSION}'.\nTo install the correct version, run:" + f"\n\n\t{' '.join(cmd)}\n" + ) + exit(1) + """Rest everything follows.""" import gymnasium as gym @@ -138,7 +145,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen env = multi_agent_to_single_agent(env) # save resume path before creating a new log_dir - if agent_cfg.resume: + if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation": resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) # wrap for video recording @@ -161,7 +168,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # write git state to logs runner.add_git_repo_to_log(__file__) # load the checkpoint - if agent_cfg.resume: + if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation": print(f"[INFO]: Loading model checkpoint from: {resume_path}") # load previously trained model runner.load(resume_path) diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml index 79e0efc943c..6eeca9c1a97 100644 --- a/source/isaaclab_rl/config/extension.toml +++ b/source/isaaclab_rl/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.1.3" +version = "0.1.4" # Description title = "Isaac Lab RL" diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst index 282c59364b1..9ade85682f0 100644 --- a/source/isaaclab_rl/docs/CHANGELOG.rst +++ b/source/isaaclab_rl/docs/CHANGELOG.rst @@ -1,6 +1,16 @@ Changelog --------- +0.1.4 (2025-04-10) +~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Added configurations for distillation implementation in RSL-RL. +* Added configuration for recurrent actor-critic in RSL-RL. + + 0.1.3 (2025-03-31) ~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/__init__.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/__init__.py index cff78064852..ca0daa51cf0 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/__init__.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/__init__.py @@ -15,8 +15,9 @@ """ +from .distillation_cfg import * from .exporter import export_policy_as_jit, export_policy_as_onnx -from .rl_cfg import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg +from .rl_cfg import * from .rnd_cfg import RslRlRndCfg from .symmetry_cfg import RslRlSymmetryCfg from .vecenv_wrapper import RslRlVecEnvWrapper diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py new file mode 100644 index 00000000000..de4db2d2efb --- /dev/null +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py @@ -0,0 +1,80 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from dataclasses import MISSING +from typing import Literal + +from isaaclab.utils import configclass + +######################### +# Policy configurations # +######################### + + +@configclass +class RslRlDistillationStudentTeacherCfg: + """Configuration for the distillation student-teacher networks.""" + + class_name: str = "StudentTeacher" + """The policy class name. Default is StudentTeacher.""" + + init_noise_std: float = MISSING + """The initial noise standard deviation for the student policy.""" + + noise_std_type: Literal["scalar", "log"] = "scalar" + """The type of noise standard deviation for the policy. Default is scalar.""" + + student_hidden_dims: list[int] = MISSING + """The hidden dimensions of the student network.""" + + teacher_hidden_dims: list[int] = MISSING + """The hidden dimensions of the teacher network.""" + + activation: str = MISSING + """The activation function for the student and teacher networks.""" + + +@configclass +class RslRlDistillationStudentTeacherRecurrentCfg(RslRlDistillationStudentTeacherCfg): + """Configuration for the distillation student-teacher recurrent networks.""" + + class_name: str = "StudentTeacherRecurrent" + """The policy class name. Default is StudentTeacherRecurrent.""" + + rnn_type: str = MISSING + """The type of the RNN network. Either "lstm" or "gru".""" + + rnn_hidden_dim: int = MISSING + """The hidden dimension of the RNN network.""" + + rnn_num_layers: int = MISSING + """The number of layers of the RNN network.""" + + teacher_recurrent: bool = MISSING + """Whether the teacher network is recurrent too.""" + + +############################ +# Algorithm configurations # +############################ + + +@configclass +class RslRlDistillationAlgorithmCfg: + """Configuration for the distillation algorithm.""" + + class_name: str = "Distillation" + """The algorithm class name. Default is Distillation.""" + + num_learning_epochs: int = MISSING + """The number of updates performed with each sample.""" + + learning_rate: float = MISSING + """The learning rate for the student policy.""" + + gradient_length: int = MISSING + """The number of environment steps the gradient flows back.""" diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/exporter.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/exporter.py index 94144888719..36768b6454d 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/exporter.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/exporter.py @@ -8,26 +8,26 @@ import torch -def export_policy_as_jit(actor_critic: object, normalizer: object | None, path: str, filename="policy.pt"): +def export_policy_as_jit(policy: object, normalizer: object | None, path: str, filename="policy.pt"): """Export policy into a Torch JIT file. Args: - actor_critic: The actor-critic torch module. + policy: The policy torch module. normalizer: The empirical normalizer module. If None, Identity is used. path: The path to the saving directory. filename: The name of exported JIT file. Defaults to "policy.pt". """ - policy_exporter = _TorchPolicyExporter(actor_critic, normalizer) + policy_exporter = _TorchPolicyExporter(policy, normalizer) policy_exporter.export(path, filename) def export_policy_as_onnx( - actor_critic: object, path: str, normalizer: object | None = None, filename="policy.onnx", verbose=False + policy: object, path: str, normalizer: object | None = None, filename="policy.onnx", verbose=False ): """Export policy into a Torch ONNX file. Args: - actor_critic: The actor-critic torch module. + policy: The policy torch module. normalizer: The empirical normalizer module. If None, Identity is used. path: The path to the saving directory. filename: The name of exported ONNX file. Defaults to "policy.onnx". @@ -35,7 +35,7 @@ def export_policy_as_onnx( """ if not os.path.exists(path): os.makedirs(path, exist_ok=True) - policy_exporter = _OnnxPolicyExporter(actor_critic, normalizer, verbose) + policy_exporter = _OnnxPolicyExporter(policy, normalizer, verbose) policy_exporter.export(path, filename) @@ -47,12 +47,22 @@ def export_policy_as_onnx( class _TorchPolicyExporter(torch.nn.Module): """Exporter of actor-critic into JIT file.""" - def __init__(self, actor_critic, normalizer=None): + def __init__(self, policy, normalizer=None): super().__init__() - self.actor = copy.deepcopy(actor_critic.actor) - self.is_recurrent = actor_critic.is_recurrent + self.is_recurrent = policy.is_recurrent + # copy policy parameters + if hasattr(policy, "actor"): + self.actor = copy.deepcopy(policy.actor) + if self.is_recurrent: + self.rnn = copy.deepcopy(policy.memory_a.rnn) + elif hasattr(policy, "student"): + self.actor = copy.deepcopy(policy.student) + if self.is_recurrent: + self.rnn = copy.deepcopy(policy.memory_s.rnn) + else: + raise ValueError("Policy does not have an actor/student module.") + # set up recurrent network if self.is_recurrent: - self.rnn = copy.deepcopy(actor_critic.memory_a.rnn) self.rnn.cpu() self.register_buffer("hidden_state", torch.zeros(self.rnn.num_layers, 1, self.rnn.hidden_size)) self.register_buffer("cell_state", torch.zeros(self.rnn.num_layers, 1, self.rnn.hidden_size)) @@ -94,13 +104,23 @@ def export(self, path, filename): class _OnnxPolicyExporter(torch.nn.Module): """Exporter of actor-critic into ONNX file.""" - def __init__(self, actor_critic, normalizer=None, verbose=False): + def __init__(self, policy, normalizer=None, verbose=False): super().__init__() self.verbose = verbose - self.actor = copy.deepcopy(actor_critic.actor) - self.is_recurrent = actor_critic.is_recurrent + self.is_recurrent = policy.is_recurrent + # copy policy parameters + if hasattr(policy, "actor"): + self.actor = copy.deepcopy(policy.actor) + if self.is_recurrent: + self.rnn = copy.deepcopy(policy.memory_a.rnn) + elif hasattr(policy, "student"): + self.actor = copy.deepcopy(policy.student) + if self.is_recurrent: + self.rnn = copy.deepcopy(policy.memory_s.rnn) + else: + raise ValueError("Policy does not have an actor/student module.") + # set up recurrent network if self.is_recurrent: - self.rnn = copy.deepcopy(actor_critic.memory_a.rnn) self.rnn.cpu() self.forward = self.forward_lstm # copy normalizer if exists diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py index 299c8ed1118..3707d4122b2 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py @@ -3,14 +3,21 @@ # # SPDX-License-Identifier: BSD-3-Clause +from __future__ import annotations + from dataclasses import MISSING from typing import Literal from isaaclab.utils import configclass +from .distillation_cfg import RslRlDistillationAlgorithmCfg, RslRlDistillationStudentTeacherCfg from .rnd_cfg import RslRlRndCfg from .symmetry_cfg import RslRlSymmetryCfg +######################### +# Policy configurations # +######################### + @configclass class RslRlPpoActorCriticCfg: @@ -36,23 +43,33 @@ class RslRlPpoActorCriticCfg: @configclass -class RslRlPpoAlgorithmCfg: - """Configuration for the PPO algorithm.""" +class RslRlPpoActorCriticRecurrentCfg(RslRlPpoActorCriticCfg): + """Configuration for the PPO actor-critic networks with recurrent layers.""" - class_name: str = "PPO" - """The algorithm class name. Default is PPO.""" + class_name: str = "ActorCriticRecurrent" + """The policy class name. Default is ActorCriticRecurrent.""" - value_loss_coef: float = MISSING - """The coefficient for the value loss.""" + rnn_type: str = MISSING + """The type of RNN to use. Either "lstm" or "gru".""" - use_clipped_value_loss: bool = MISSING - """Whether to use clipped value loss.""" + rnn_hidden_dim: int = MISSING + """The dimension of the RNN layers.""" - clip_param: float = MISSING - """The clipping parameter for the policy.""" + rnn_num_layers: int = MISSING + """The number of RNN layers.""" - entropy_coef: float = MISSING - """The coefficient for the entropy loss.""" + +############################ +# Algorithm configurations # +############################ + + +@configclass +class RslRlPpoAlgorithmCfg: + """Configuration for the PPO algorithm.""" + + class_name: str = "PPO" + """The algorithm class name. Default is PPO.""" num_learning_epochs: int = MISSING """The number of learning epochs per update.""" @@ -72,12 +89,24 @@ class RslRlPpoAlgorithmCfg: lam: float = MISSING """The lambda parameter for Generalized Advantage Estimation (GAE).""" + entropy_coef: float = MISSING + """The coefficient for the entropy loss.""" + desired_kl: float = MISSING """The desired KL divergence.""" max_grad_norm: float = MISSING """The maximum gradient norm.""" + value_loss_coef: float = MISSING + """The coefficient for the value loss.""" + + use_clipped_value_loss: bool = MISSING + """Whether to use clipped value loss.""" + + clip_param: float = MISSING + """The clipping parameter for the policy.""" + normalize_advantage_per_mini_batch: bool = False """Whether to normalize the advantage per mini-batch. Default is False. @@ -94,6 +123,11 @@ class RslRlPpoAlgorithmCfg: """ +######################### +# Runner configurations # +######################### + + @configclass class RslRlOnPolicyRunnerCfg: """Configuration of the runner for on-policy algorithms.""" @@ -113,10 +147,10 @@ class RslRlOnPolicyRunnerCfg: empirical_normalization: bool = MISSING """Whether to use empirical normalization.""" - policy: RslRlPpoActorCriticCfg = MISSING + policy: RslRlPpoActorCriticCfg | RslRlDistillationStudentTeacherCfg = MISSING """The policy configuration.""" - algorithm: RslRlPpoAlgorithmCfg = MISSING + algorithm: RslRlPpoAlgorithmCfg | RslRlDistillationAlgorithmCfg = MISSING """The algorithm configuration.""" clip_actions: float | None = None @@ -126,10 +160,6 @@ class RslRlOnPolicyRunnerCfg: This clipping is performed inside the :class:`RslRlVecEnvWrapper` wrapper. """ - ## - # Checkpointing parameters - ## - save_interval: int = MISSING """The number of iterations between saves.""" @@ -144,10 +174,6 @@ class RslRlOnPolicyRunnerCfg: ``{time-stamp}_{run_name}``. """ - ## - # Logging parameters - ## - logger: Literal["tensorboard", "neptune", "wandb"] = "tensorboard" """The logger to use. Default is tensorboard.""" @@ -157,10 +183,6 @@ class RslRlOnPolicyRunnerCfg: wandb_project: str = "isaaclab" """The wandb project name. Default is "isaaclab".""" - ## - # Loading parameters - ## - resume: bool = False """Whether to resume. Default is False.""" diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index 96c5b39e478..ada55f47d5e 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -44,7 +44,7 @@ "sb3": ["stable-baselines3>=2.1"], "skrl": ["skrl>=1.4.2"], "rl-games": ["rl-games==1.6.1", "gym"], # rl-games still needs gym :( - "rsl-rl": ["rsl-rl-lib==2.3.0"], + "rsl-rl": ["rsl-rl-lib==2.3.1"], } # Add the names with hyphens as aliases for convenience EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"]