From ed14c9c1a0798e5b14c5d0d7215b7affcd40cd5a Mon Sep 17 00:00:00 2001 From: KaydenKnapik Date: Wed, 22 Oct 2025 06:28:23 +0200 Subject: [PATCH] Updated training for rough env and custom reward functions --- .../bdxr_locomotion/agents/rsl_rl_ppo_cfg.py | 2 +- .../tasks/bdxr_locomotion/bdxr_env_cfg.py | 137 ++++++++++------ .../BDXR/tasks/bdxr_locomotion/mdp/rewards.py | 154 +++++++++++++++++- 3 files changed, 238 insertions(+), 55 deletions(-) diff --git a/source/BDXR/BDXR/tasks/bdxr_locomotion/agents/rsl_rl_ppo_cfg.py b/source/BDXR/BDXR/tasks/bdxr_locomotion/agents/rsl_rl_ppo_cfg.py index e6cf067..eeb078f 100644 --- a/source/BDXR/BDXR/tasks/bdxr_locomotion/agents/rsl_rl_ppo_cfg.py +++ b/source/BDXR/BDXR/tasks/bdxr_locomotion/agents/rsl_rl_ppo_cfg.py @@ -11,7 +11,7 @@ @configclass class PPORunnerCfg(RslRlOnPolicyRunnerCfg): num_steps_per_env = 24 - max_iterations = 30_000 + max_iterations = 100000 save_interval = 50 experiment_name = "bdxr_rough" empirical_normalization = False diff --git a/source/BDXR/BDXR/tasks/bdxr_locomotion/bdxr_env_cfg.py b/source/BDXR/BDXR/tasks/bdxr_locomotion/bdxr_env_cfg.py index 1fcbb85..75376df 100644 --- a/source/BDXR/BDXR/tasks/bdxr_locomotion/bdxr_env_cfg.py +++ b/source/BDXR/BDXR/tasks/bdxr_locomotion/bdxr_env_cfg.py @@ -43,7 +43,7 @@ import BDXR.tasks.bdxr_locomotion.mdp as mdp # isort:skip - +# Old Version COBBLESTONE_ROAD_CFG = terrain_gen.TerrainGeneratorCfg( size=(8.0, 8.0), border_width=20.0, @@ -62,6 +62,29 @@ }, ) +#COBBLESTONE_ROAD_CFG = terrain_gen.TerrainGeneratorCfg( +# size=(8.0, 8.0), +# border_width=20.0, +# num_rows=9, +# num_cols=21, +# horizontal_scale=0.1, +# vertical_scale=0.002, # This is already quite low, which is good for an easier terrain +# slope_threshold=0.75, +# difficulty_range=(0.0, 1.0), +# use_cache=False, +# sub_terrains={ +# # Increase the proportion of flat ground +# "flat": terrain_gen.MeshPlaneTerrainCfg(proportion=0.8), +# # Decrease the proportion of rough ground +# "random_rough": terrain_gen.HfRandomUniformTerrainCfg( +# proportion=0.2, +# noise_range=(0.02, 0.05), +# noise_step=0.02, +# border_width=0.25 +# ), +# }, +#) + # TODO: Are all critics necessary (i don't think so) # TODO: In critic we should remove all noise # TODO: Add some scales to avoid sim instabilities @@ -198,25 +221,35 @@ class BDXRRewards(RewardsCfg): #track_ang_vel_z_exp = RewTerm( # func=mdp.track_ang_vel_z_world_exp, weight=2.5, params={"command_name": "base_velocity", "std": 0.5} #) - base_angular_velocity = RewTerm( - func=mdp.base_angular_velocity_reward, - weight=5.0, - params={"std": 2.0, "asset_cfg": SceneEntityCfg("robot")}, - ) base_linear_velocity = RewTerm( func=mdp.base_linear_velocity_reward, weight=5.0, params={"std": 1.0, "ramp_rate": 0.5, "ramp_at_vel": 1.0, "asset_cfg": SceneEntityCfg("robot")}, ) + base_angular_velocity = RewTerm( + func=mdp.base_angular_velocity_reward, + weight=2.5, + params={"std": 2.0, "asset_cfg": SceneEntityCfg("robot")}, + ) feet_air_time = RewTerm( func=mdp.feet_air_time_positive_biped, - weight=5.0, + weight=0.0, params={ "command_name": "base_velocity", "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_Foot"), "threshold": 0.2, }, ) + air_time = RewTerm( + func=mdp.bipedal_air_time_reward, + weight=3.0, + params={ + "mode_time": 0.5, + "velocity_threshold": 0.1, + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_Foot"), + }, + ) feet_slide = RewTerm( func=mdp.feet_slide, weight=-0.2, @@ -225,6 +258,15 @@ class BDXRRewards(RewardsCfg): "asset_cfg": SceneEntityCfg("robot", body_names=".*_Foot"), }, ) + joint_pos = RewTerm( + func=mdp.joint_position_penalty, + weight=1.0, # Or -5.0 if you use the original function + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), + "stand_still_scale": 20.0, # <-- Increase this value substantially + "velocity_threshold": 0.1, + }, + ) # Penalize ankle joint limits dof_pos_limits = RewTerm( @@ -235,14 +277,35 @@ class BDXRRewards(RewardsCfg): # Penalize deviation from default of the joints that are not essential for locomotion joint_deviation_hip = RewTerm( func=mdp.joint_deviation_l1, - weight=-2, + weight=-1, params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_Hip_Yaw", ".*_Hip_Roll"])}, ) + joint_deviation_ankle = RewTerm( + func=mdp.joint_deviation_l1, + weight=-0.5, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_Ankle"])}, + ) #base_height_l2 = RewTerm( # func=mdp.base_height_l2, # weight=-1.5, # Negative weight to create a penalty # params={"target_height": 0.3} # IMPORTANT: Set this to your robot's desired base height in meters #) + foot_clearance = RewTerm( + func=mdp.foot_clearance_reward, + weight=5, + params={ + "std": 0.05, + "tanh_mult": 2.0, + "target_height": 0.2, + "asset_cfg": SceneEntityCfg("robot", body_names=".*_Foot"), + }, + ) + base_motion = RewTerm( + func=mdp.base_motion_penalty, weight=-2.0, params={"asset_cfg": SceneEntityCfg("robot")} + ) + base_orientation = RewTerm( + func=mdp.base_orientation_penalty, weight=-3.0, params={"asset_cfg": SceneEntityCfg("robot")} + ) # TODO: With the way it's implemented, it is not used, should we remove it or use itS @configclass @@ -251,7 +314,7 @@ class EventCfg: # startup physics_material = EventTerm( func=mdp.randomize_rigid_body_material, - mode="reset", + mode="startup", params={ "asset_cfg": SceneEntityCfg("robot", body_names=".*"), "static_friction_range": (0.8, 0.8), @@ -263,23 +326,13 @@ class EventCfg: add_base_mass = EventTerm( func=mdp.randomize_rigid_body_mass, - mode="reset", + mode="startup", params={ "asset_cfg": SceneEntityCfg("robot", body_names="base"), - "mass_distribution_params": (-1.0, 1.0), + "mass_distribution_params": (-5.0, 5.0), "operation": "add", }, ) - randomize_com_positions = EventTerm( - func=mdp.randomize_rigid_body_com, - mode="startup", - params={ - "asset_cfg": SceneEntityCfg("robot", body_names=".*"), - "com_range": {"x": (-0.05, 0.05), "y": (-0.05, 0.05), "z": (-0.05, 0.05)}, - }, - ) - - # reset base_external_force_torque = EventTerm( func=mdp.apply_external_force_torque, mode="reset", @@ -289,43 +342,22 @@ class EventCfg: "torque_range": (-0.0, 0.0), }, ) - randomize_actuator_gains = EventTerm( - func=mdp.randomize_actuator_gains, - mode="reset", - params={ - "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), - "operation": "scale", - "stiffness_distribution_params": (0.6, 1.4), - "damping_distribution_params": (0.6, 1.4), - }, - ) - randomize_joint_parameters = EventTerm( - func=mdp.randomize_joint_parameters, - mode="reset", - params={ - "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), - "operation": "scale", - "friction_distribution_params": (0.8, 1.2), - "armature_distribution_params": (0.8, 1.2), - }, - ) - reset_base = EventTerm( func=mdp.reset_root_state_uniform, mode="reset", params={ "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, "velocity_range": { - "x": (-0.5, 0.5), - "y": (-0.5, 0.5), - "z": (-0.5, 0.5), - "roll": (-0.5, 0.5), - "pitch": (-0.5, 0.5), - "yaw": (-0.5, 0.5), + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), }, }, ) - +# reset_robot_joints = EventTerm( func=mdp.reset_joints_by_scale, mode="reset", @@ -334,8 +366,6 @@ class EventCfg: "velocity_range": (0.0, 0.0), }, ) - - # interval push_robot = EventTerm( func=mdp.push_by_setting_velocity, mode="interval", @@ -371,7 +401,7 @@ def __post_init__(self): # events self.events.push_robot.params["velocity_range"] = {"x": (-0.1, 0.1), "y": (-0.1, 0.1)} - # self.events.push_robot = None + #self.events.push_robot = None self.events.add_base_mass.params["asset_cfg"].body_names = ["base_link"] self.events.add_base_mass.params["mass_distribution_params"] = (-0.5, 0.5) self.events.reset_robot_joints.params["position_range"] = (0.8, 1.2) @@ -464,4 +494,7 @@ def __post_init__(self) -> None: # disable randomization for play self.observations.policy.enable_corruption = False + self.commands.base_velocity.ranges.lin_vel_x = (0.3, 0.3) + self.commands.base_velocity.ranges.lin_vel_y = (0.0, 0.0) + self.commands.base_velocity.ranges.ang_vel_z = (0.0, 0.0) # remove random pushing event \ No newline at end of file diff --git a/source/BDXR/BDXR/tasks/bdxr_locomotion/mdp/rewards.py b/source/BDXR/BDXR/tasks/bdxr_locomotion/mdp/rewards.py index 9619583..b8a8b22 100644 --- a/source/BDXR/BDXR/tasks/bdxr_locomotion/mdp/rewards.py +++ b/source/BDXR/BDXR/tasks/bdxr_locomotion/mdp/rewards.py @@ -8,9 +8,10 @@ import torch from typing import TYPE_CHECKING -from isaaclab.assets import Articulation +from isaaclab.assets import Articulation, RigidObject from isaaclab.managers import SceneEntityCfg from isaaclab.utils.math import wrap_to_pi +from isaaclab.sensors import ContactSensor if TYPE_CHECKING: from isaaclab.envs import ManagerBasedRLEnv @@ -47,4 +48,153 @@ def base_linear_velocity_reward( # fixed 1.0 multiple for tracking below the ramp_at_vel value, then scale by the rate above vel_cmd_magnitude = torch.linalg.norm(target, dim=1) velocity_scaling_multiple = torch.clamp(1.0 + ramp_rate * (vel_cmd_magnitude - ramp_at_vel), min=1.0) - return torch.exp(-lin_vel_error / std) * velocity_scaling_multiple \ No newline at end of file + return torch.exp(-lin_vel_error / std) * velocity_scaling_multiple + +def bipedal_air_time_reward( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg, + sensor_cfg: SceneEntityCfg, + mode_time: float, + velocity_threshold: float, +) -> torch.Tensor: + """Reward longer feet air and contact time for a bipedal robot.""" + # extract the used quantities (to enable type-hinting) + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + asset: Articulation = env.scene[asset_cfg.name] + if not contact_sensor.cfg.track_air_time: + raise RuntimeError("Activate ContactSensor's track_air_time!") + + # Ensure you are tracking the correct body IDs for the two feet. + # This will depend on your robot's URDF file. + # For example, if your foot body IDs are 3 and 6: + # sensor_cfg.body_ids = [3, 6] + + # compute the reward + current_air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids] + current_contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids] + + t_max = torch.max(current_air_time, current_contact_time) + t_min = torch.clip(t_max, max=mode_time) + stance_cmd_reward = torch.clip(current_contact_time - current_air_time, -mode_time, mode_time) + + # MODIFIED: Expand command to 2 dimensions for the two feet + cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1).unsqueeze(dim=1).expand(-1, 2) + + # MODIFIED: Expand body velocity to 2 dimensions for the two feet + body_vel = torch.linalg.norm(asset.data.root_lin_vel_b[:, :2], dim=1).unsqueeze(dim=1).expand(-1, 2) + + reward = torch.where( + torch.logical_or(cmd > 0.0, body_vel > velocity_threshold), + torch.where(t_max < mode_time, t_min, 0), + stance_cmd_reward, + ) + return torch.sum(reward, dim=1) + +def foot_clearance_reward( + env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, target_height: float, std: float, tanh_mult: float +) -> torch.Tensor: + """Reward the swinging feet for clearing a specified height off the ground.""" + asset: RigidObject = env.scene[asset_cfg.name] + foot_heights = asset.data.body_pos_w[:, asset_cfg.body_ids, 2] + foot_velocities_norm = torch.norm(asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2], dim=2) + + # -- Start: Debugging Print Statement (Re-added) -- + # To avoid flooding the console, we print the heights for only the first environment (env 0). + # We use .cpu().numpy() to make the tensor printable in a standard format. + if env.common_step_counter % 10 == 0: # Optional: print only every 10 steps + print(f"Current foot heights for env 0: {foot_heights[0].cpu().numpy()}") + # -- End: Debugging Print Statement -- + + # --- Original Calculation --- + foot_z_target_error = torch.square(foot_heights - target_height) + foot_velocity_tanh = torch.tanh(tanh_mult * foot_velocities_norm) + reward_error = torch.sum(foot_z_target_error * foot_velocity_tanh, dim=1) + clearance_reward = torch.exp(-reward_error / std) + + # --- THE FIX: Add a "Walking Gate" --- + # This gate ensures that the reward is only applied if at least one foot is swinging. + max_foot_vel = torch.max(foot_velocities_norm, dim=1)[0] + # The gate is a value from 0 to 1. It's 0 if the robot is still, and 1 if it's walking. + walking_gate = torch.tanh(5.0 * max_foot_vel) + + # The final reward is the clearance reward scaled by the walking gate. + return clearance_reward * walking_gate + + + +def bipedal_air_time_reward( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg, + sensor_cfg: SceneEntityCfg, + mode_time: float, + velocity_threshold: float, +) -> torch.Tensor: + """Reward longer feet air and contact time for a bipedal robot.""" + # extract the used quantities (to enable type-hinting) + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + asset: Articulation = env.scene[asset_cfg.name] + if not contact_sensor.cfg.track_air_time: + raise RuntimeError("Activate ContactSensor's track_air_time!") + + # Ensure you are tracking the correct body IDs for the two feet. + # This will depend on your robot's URDF file. + # For example, if your foot body IDs are 3 and 6: + # sensor_cfg.body_ids = [3, 6] + + # compute the reward + current_air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids] + current_contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids] + + t_max = torch.max(current_air_time, current_contact_time) + t_min = torch.clip(t_max, max=mode_time) + stance_cmd_reward = torch.clip(current_contact_time - current_air_time, -mode_time, mode_time) + + # MODIFIED: Expand command to 2 dimensions for the two feet + cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1).unsqueeze(dim=1).expand(-1, 2) + + # MODIFIED: Expand body velocity to 2 dimensions for the two feet + body_vel = torch.linalg.norm(asset.data.root_lin_vel_b[:, :2], dim=1).unsqueeze(dim=1).expand(-1, 2) + + reward = torch.where( + torch.logical_or(cmd > 0.0, body_vel > velocity_threshold), + torch.where(t_max < mode_time, t_min, 0), + stance_cmd_reward, + ) + return torch.sum(reward, dim=1) + +def joint_position_penalty( + env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, stand_still_scale: float, velocity_threshold: float +) -> torch.Tensor: + """Penalize joint position error from default on the articulation.""" + asset: Articulation = env.scene[asset_cfg.name] + cmd_norm = torch.linalg.norm(env.command_manager.get_command("base_velocity"), dim=1) + + # Calculate the penalty (deviation from default pose) + joint_pos_error = torch.linalg.norm((asset.data.joint_pos - asset.data.default_joint_pos), dim=1) + + # When the command is to stand still, apply a much larger penalty + # The penalty is negative because we want to discourage this behavior. + penalty = -joint_pos_error + scaled_penalty = -stand_still_scale * joint_pos_error + + return torch.where(cmd_norm < velocity_threshold, scaled_penalty, penalty) + + +# ! look into simplifying the kernel here; it's a little oddly complex +def base_motion_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor: + """Penalize base vertical and roll/pitch velocity""" + # extract the used quantities (to enable type-hinting) + asset: RigidObject = env.scene[asset_cfg.name] + return 0.8 * torch.square(asset.data.root_lin_vel_b[:, 2]) + 0.2 * torch.sum( + torch.abs(asset.data.root_ang_vel_b[:, :2]), dim=1 + ) + + +def base_orientation_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor: + """Penalize non-flat base orientation + + This is computed by penalizing the xy-components of the projected gravity vector. + """ + # extract the used quantities (to enable type-hinting) + asset: RigidObject = env.scene[asset_cfg.name] + return torch.linalg.norm((asset.data.projected_gravity_b[:, :2]), dim=1)