From ed14c9c1a0798e5b14c5d0d7215b7affcd40cd5a Mon Sep 17 00:00:00 2001
From: KaydenKnapik <kaydenknapik@gmail.com>
Date: Wed, 22 Oct 2025 06:28:23 +0200
Subject: [PATCH] Updated training for rough env and custom reward functions

---
 .../bdxr_locomotion/agents/rsl_rl_ppo_cfg.py  |   2 +-
 .../tasks/bdxr_locomotion/bdxr_env_cfg.py     | 137 ++++++++++------
 .../BDXR/tasks/bdxr_locomotion/mdp/rewards.py | 154 +++++++++++++++++-
 3 files changed, 238 insertions(+), 55 deletions(-)

diff --git a/source/BDXR/BDXR/tasks/bdxr_locomotion/agents/rsl_rl_ppo_cfg.py b/source/BDXR/BDXR/tasks/bdxr_locomotion/agents/rsl_rl_ppo_cfg.py
index e6cf067..eeb078f 100644
--- a/source/BDXR/BDXR/tasks/bdxr_locomotion/agents/rsl_rl_ppo_cfg.py
+++ b/source/BDXR/BDXR/tasks/bdxr_locomotion/agents/rsl_rl_ppo_cfg.py
@@ -11,7 +11,7 @@
 @configclass
 class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
     num_steps_per_env = 24
-    max_iterations = 30_000
+    max_iterations = 100000
     save_interval = 50
     experiment_name = "bdxr_rough"
     empirical_normalization = False
diff --git a/source/BDXR/BDXR/tasks/bdxr_locomotion/bdxr_env_cfg.py b/source/BDXR/BDXR/tasks/bdxr_locomotion/bdxr_env_cfg.py
index 1fcbb85..75376df 100644
--- a/source/BDXR/BDXR/tasks/bdxr_locomotion/bdxr_env_cfg.py
+++ b/source/BDXR/BDXR/tasks/bdxr_locomotion/bdxr_env_cfg.py
@@ -43,7 +43,7 @@
 
 import BDXR.tasks.bdxr_locomotion.mdp as mdp  # isort:skip
 
-
+# Old Version
 COBBLESTONE_ROAD_CFG = terrain_gen.TerrainGeneratorCfg(
     size=(8.0, 8.0),
     border_width=20.0,
@@ -62,6 +62,29 @@
     },
 )
 
+#COBBLESTONE_ROAD_CFG = terrain_gen.TerrainGeneratorCfg(
+#    size=(8.0, 8.0),
+#    border_width=20.0,
+#    num_rows=9,
+#    num_cols=21,
+#    horizontal_scale=0.1,
+#    vertical_scale=0.002,  # This is already quite low, which is good for an easier terrain
+#    slope_threshold=0.75,
+#    difficulty_range=(0.0, 1.0),
+#    use_cache=False,
+#    sub_terrains={
+#        # Increase the proportion of flat ground
+#        "flat": terrain_gen.MeshPlaneTerrainCfg(proportion=0.8),
+#        # Decrease the proportion of rough ground
+#        "random_rough": terrain_gen.HfRandomUniformTerrainCfg(
+#            proportion=0.2,
+#            noise_range=(0.02, 0.05),
+#            noise_step=0.02,
+#            border_width=0.25
+#        ),
+#    },
+#)
+
 # TODO: Are all critics necessary (i don't think so) 
 # TODO: In critic we should remove all noise
 # TODO: Add some scales to avoid sim instabilities
@@ -198,25 +221,35 @@ class BDXRRewards(RewardsCfg):
     #track_ang_vel_z_exp = RewTerm(
     #    func=mdp.track_ang_vel_z_world_exp, weight=2.5, params={"command_name": "base_velocity", "std": 0.5}
     #)
-    base_angular_velocity = RewTerm(
-        func=mdp.base_angular_velocity_reward,
-        weight=5.0,
-        params={"std": 2.0, "asset_cfg": SceneEntityCfg("robot")},
-    )
     base_linear_velocity = RewTerm(
         func=mdp.base_linear_velocity_reward,
         weight=5.0,
         params={"std": 1.0, "ramp_rate": 0.5, "ramp_at_vel": 1.0, "asset_cfg": SceneEntityCfg("robot")},
     )
+    base_angular_velocity = RewTerm(
+        func=mdp.base_angular_velocity_reward,
+        weight=2.5,
+        params={"std": 2.0, "asset_cfg": SceneEntityCfg("robot")},
+    )
     feet_air_time = RewTerm(
         func=mdp.feet_air_time_positive_biped,
-        weight=5.0,
+        weight=0.0,
         params={
             "command_name": "base_velocity",
             "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_Foot"),
             "threshold": 0.2,
         },
     )
+    air_time = RewTerm(
+        func=mdp.bipedal_air_time_reward,
+        weight=3.0,
+        params={
+            "mode_time": 0.5,
+            "velocity_threshold": 0.1,
+            "asset_cfg": SceneEntityCfg("robot"),
+            "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_Foot"),
+        },
+    )
     feet_slide = RewTerm(
         func=mdp.feet_slide,
         weight=-0.2,
@@ -225,6 +258,15 @@ class BDXRRewards(RewardsCfg):
             "asset_cfg": SceneEntityCfg("robot", body_names=".*_Foot"),
         },
     )
+    joint_pos = RewTerm(
+        func=mdp.joint_position_penalty,
+        weight=1.0, # Or -5.0 if you use the original function
+        params={
+            "asset_cfg": SceneEntityCfg("robot", joint_names=".*"),
+            "stand_still_scale": 20.0,  # <-- Increase this value substantially
+            "velocity_threshold": 0.1,
+        },
+    )
 
     # Penalize ankle joint limits
     dof_pos_limits = RewTerm(
@@ -235,14 +277,35 @@ class BDXRRewards(RewardsCfg):
     # Penalize deviation from default of the joints that are not essential for locomotion
     joint_deviation_hip = RewTerm(
         func=mdp.joint_deviation_l1,
-        weight=-2,
+        weight=-1,
         params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_Hip_Yaw", ".*_Hip_Roll"])},
     )
+    joint_deviation_ankle = RewTerm(
+        func=mdp.joint_deviation_l1,
+        weight=-0.5,
+        params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_Ankle"])},
+    )
     #base_height_l2 = RewTerm(
     #    func=mdp.base_height_l2,
     #    weight=-1.5,  # Negative weight to create a penalty
     #    params={"target_height": 0.3}  # IMPORTANT: Set this to your robot's desired base height in meters
     #)
+    foot_clearance = RewTerm(
+        func=mdp.foot_clearance_reward,
+        weight=5,
+        params={
+            "std": 0.05,
+            "tanh_mult": 2.0,
+            "target_height": 0.2,
+            "asset_cfg": SceneEntityCfg("robot", body_names=".*_Foot"),
+        },
+    )
+    base_motion = RewTerm(
+        func=mdp.base_motion_penalty, weight=-2.0, params={"asset_cfg": SceneEntityCfg("robot")}
+    )
+    base_orientation = RewTerm(
+        func=mdp.base_orientation_penalty, weight=-3.0, params={"asset_cfg": SceneEntityCfg("robot")}
+    )
 
 # TODO: With the way it's implemented, it is not used, should we remove it or use itS
 @configclass
@@ -251,7 +314,7 @@ class EventCfg:
     # startup
     physics_material = EventTerm(
         func=mdp.randomize_rigid_body_material,
-        mode="reset",
+        mode="startup",
         params={
             "asset_cfg": SceneEntityCfg("robot", body_names=".*"),
             "static_friction_range": (0.8, 0.8),
@@ -263,23 +326,13 @@ class EventCfg:
 
     add_base_mass = EventTerm(
         func=mdp.randomize_rigid_body_mass,
-        mode="reset",
+        mode="startup",
         params={
             "asset_cfg": SceneEntityCfg("robot", body_names="base"),
-            "mass_distribution_params": (-1.0, 1.0),
+            "mass_distribution_params": (-5.0, 5.0),
             "operation": "add",
         },
     )
-    randomize_com_positions = EventTerm(
-        func=mdp.randomize_rigid_body_com,
-        mode="startup",
-        params={
-            "asset_cfg": SceneEntityCfg("robot", body_names=".*"),
-            "com_range": {"x": (-0.05, 0.05), "y": (-0.05, 0.05), "z": (-0.05, 0.05)},
-        },
-    )
-
-    # reset
     base_external_force_torque = EventTerm(
         func=mdp.apply_external_force_torque,
         mode="reset",
@@ -289,43 +342,22 @@ class EventCfg:
             "torque_range": (-0.0, 0.0),
         },
     )
-    randomize_actuator_gains = EventTerm(
-        func=mdp.randomize_actuator_gains,
-        mode="reset",
-        params={
-            "asset_cfg": SceneEntityCfg("robot", joint_names=".*"),
-            "operation": "scale",
-            "stiffness_distribution_params": (0.6, 1.4),
-            "damping_distribution_params": (0.6, 1.4),
-        },
-    )
-    randomize_joint_parameters = EventTerm(
-        func=mdp.randomize_joint_parameters,
-        mode="reset",
-        params={
-            "asset_cfg": SceneEntityCfg("robot", joint_names=".*"),
-            "operation": "scale",
-            "friction_distribution_params": (0.8, 1.2),
-            "armature_distribution_params": (0.8, 1.2),
-        },
-    )
-
     reset_base = EventTerm(
         func=mdp.reset_root_state_uniform,
         mode="reset",
         params={
             "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)},
             "velocity_range": {
-                "x": (-0.5, 0.5),
-                "y": (-0.5, 0.5),
-                "z": (-0.5, 0.5),
-                "roll": (-0.5, 0.5),
-                "pitch": (-0.5, 0.5),
-                "yaw": (-0.5, 0.5),
+                "x": (0.0, 0.0),
+                "y": (0.0, 0.0),
+                "z": (0.0, 0.0),
+                "roll": (0.0, 0.0),
+                "pitch": (0.0, 0.0),
+                "yaw": (0.0, 0.0),
             },
         },
     )
-
+#
     reset_robot_joints = EventTerm(
         func=mdp.reset_joints_by_scale,
         mode="reset",
@@ -334,8 +366,6 @@ class EventCfg:
             "velocity_range": (0.0, 0.0),
         },
     )
-
-    # interval
     push_robot = EventTerm(
         func=mdp.push_by_setting_velocity,
         mode="interval",
@@ -371,7 +401,7 @@ def __post_init__(self):
 
         # events
         self.events.push_robot.params["velocity_range"] = {"x": (-0.1, 0.1), "y": (-0.1, 0.1)}
-        # self.events.push_robot = None
+        #self.events.push_robot = None
         self.events.add_base_mass.params["asset_cfg"].body_names = ["base_link"]
         self.events.add_base_mass.params["mass_distribution_params"] = (-0.5, 0.5)
         self.events.reset_robot_joints.params["position_range"] = (0.8, 1.2)
@@ -464,4 +494,7 @@ def __post_init__(self) -> None:
 
         # disable randomization for play
         self.observations.policy.enable_corruption = False
+        self.commands.base_velocity.ranges.lin_vel_x = (0.3, 0.3)
+        self.commands.base_velocity.ranges.lin_vel_y = (0.0, 0.0)
+        self.commands.base_velocity.ranges.ang_vel_z = (0.0, 0.0)
         # remove random pushing event
\ No newline at end of file
diff --git a/source/BDXR/BDXR/tasks/bdxr_locomotion/mdp/rewards.py b/source/BDXR/BDXR/tasks/bdxr_locomotion/mdp/rewards.py
index 9619583..b8a8b22 100644
--- a/source/BDXR/BDXR/tasks/bdxr_locomotion/mdp/rewards.py
+++ b/source/BDXR/BDXR/tasks/bdxr_locomotion/mdp/rewards.py
@@ -8,9 +8,10 @@
 import torch
 from typing import TYPE_CHECKING
 
-from isaaclab.assets import Articulation
+from isaaclab.assets import Articulation, RigidObject
 from isaaclab.managers import SceneEntityCfg
 from isaaclab.utils.math import wrap_to_pi
+from isaaclab.sensors import ContactSensor
 
 if TYPE_CHECKING:
     from isaaclab.envs import ManagerBasedRLEnv
@@ -47,4 +48,153 @@ def base_linear_velocity_reward(
     # fixed 1.0 multiple for tracking below the ramp_at_vel value, then scale by the rate above
     vel_cmd_magnitude = torch.linalg.norm(target, dim=1)
     velocity_scaling_multiple = torch.clamp(1.0 + ramp_rate * (vel_cmd_magnitude - ramp_at_vel), min=1.0)
-    return torch.exp(-lin_vel_error / std) * velocity_scaling_multiple
\ No newline at end of file
+    return torch.exp(-lin_vel_error / std) * velocity_scaling_multiple
+
+def bipedal_air_time_reward(
+    env: ManagerBasedRLEnv,
+    asset_cfg: SceneEntityCfg,
+    sensor_cfg: SceneEntityCfg,
+    mode_time: float,
+    velocity_threshold: float,
+) -> torch.Tensor:
+    """Reward longer feet air and contact time for a bipedal robot."""
+    # extract the used quantities (to enable type-hinting)
+    contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
+    asset: Articulation = env.scene[asset_cfg.name]
+    if not contact_sensor.cfg.track_air_time:
+        raise RuntimeError("Activate ContactSensor's track_air_time!")
+
+    # Ensure you are tracking the correct body IDs for the two feet.
+    # This will depend on your robot's URDF file.
+    # For example, if your foot body IDs are 3 and 6:
+    # sensor_cfg.body_ids = [3, 6]
+
+    # compute the reward
+    current_air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
+    current_contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
+
+    t_max = torch.max(current_air_time, current_contact_time)
+    t_min = torch.clip(t_max, max=mode_time)
+    stance_cmd_reward = torch.clip(current_contact_time - current_air_time, -mode_time, mode_time)
+
+    # MODIFIED: Expand command to 2 dimensions for the two feet
+    cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1).unsqueeze(dim=1).expand(-1, 2)
+
+    # MODIFIED: Expand body velocity to 2 dimensions for the two feet
+    body_vel = torch.linalg.norm(asset.data.root_lin_vel_b[:, :2], dim=1).unsqueeze(dim=1).expand(-1, 2)
+
+    reward = torch.where(
+        torch.logical_or(cmd > 0.0, body_vel > velocity_threshold),
+        torch.where(t_max < mode_time, t_min, 0),
+        stance_cmd_reward,
+    )
+    return torch.sum(reward, dim=1)
+
+def foot_clearance_reward(
+    env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, target_height: float, std: float, tanh_mult: float
+) -> torch.Tensor:
+    """Reward the swinging feet for clearing a specified height off the ground."""
+    asset: RigidObject = env.scene[asset_cfg.name]
+    foot_heights = asset.data.body_pos_w[:, asset_cfg.body_ids, 2]
+    foot_velocities_norm = torch.norm(asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2], dim=2)
+
+    # -- Start: Debugging Print Statement (Re-added) --
+    # To avoid flooding the console, we print the heights for only the first environment (env 0).
+    # We use .cpu().numpy() to make the tensor printable in a standard format.
+    if env.common_step_counter % 10 == 0: # Optional: print only every 10 steps
+        print(f"Current foot heights for env 0: {foot_heights[0].cpu().numpy()}")
+    # -- End: Debugging Print Statement --
+
+    # --- Original Calculation ---
+    foot_z_target_error = torch.square(foot_heights - target_height)
+    foot_velocity_tanh = torch.tanh(tanh_mult * foot_velocities_norm)
+    reward_error = torch.sum(foot_z_target_error * foot_velocity_tanh, dim=1)
+    clearance_reward = torch.exp(-reward_error / std)
+
+    # --- THE FIX: Add a "Walking Gate" ---
+    # This gate ensures that the reward is only applied if at least one foot is swinging.
+    max_foot_vel = torch.max(foot_velocities_norm, dim=1)[0]
+    # The gate is a value from 0 to 1. It's 0 if the robot is still, and 1 if it's walking.
+    walking_gate = torch.tanh(5.0 * max_foot_vel)
+
+    # The final reward is the clearance reward scaled by the walking gate.
+    return clearance_reward * walking_gate
+
+
+
+def bipedal_air_time_reward(
+    env: ManagerBasedRLEnv,
+    asset_cfg: SceneEntityCfg,
+    sensor_cfg: SceneEntityCfg,
+    mode_time: float,
+    velocity_threshold: float,
+) -> torch.Tensor:
+    """Reward longer feet air and contact time for a bipedal robot."""
+    # extract the used quantities (to enable type-hinting)
+    contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
+    asset: Articulation = env.scene[asset_cfg.name]
+    if not contact_sensor.cfg.track_air_time:
+        raise RuntimeError("Activate ContactSensor's track_air_time!")
+
+    # Ensure you are tracking the correct body IDs for the two feet.
+    # This will depend on your robot's URDF file.
+    # For example, if your foot body IDs are 3 and 6:
+    # sensor_cfg.body_ids = [3, 6]
+
+    # compute the reward
+    current_air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
+    current_contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
+
+    t_max = torch.max(current_air_time, current_contact_time)
+    t_min = torch.clip(t_max, max=mode_time)
+    stance_cmd_reward = torch.clip(current_contact_time - current_air_time, -mode_time, mode_time)
+
+    # MODIFIED: Expand command to 2 dimensions for the two feet
+    cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1).unsqueeze(dim=1).expand(-1, 2)
+
+    # MODIFIED: Expand body velocity to 2 dimensions for the two feet
+    body_vel = torch.linalg.norm(asset.data.root_lin_vel_b[:, :2], dim=1).unsqueeze(dim=1).expand(-1, 2)
+
+    reward = torch.where(
+        torch.logical_or(cmd > 0.0, body_vel > velocity_threshold),
+        torch.where(t_max < mode_time, t_min, 0),
+        stance_cmd_reward,
+    )
+    return torch.sum(reward, dim=1)
+
+def joint_position_penalty(
+    env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, stand_still_scale: float, velocity_threshold: float
+) -> torch.Tensor:
+    """Penalize joint position error from default on the articulation."""
+    asset: Articulation = env.scene[asset_cfg.name]
+    cmd_norm = torch.linalg.norm(env.command_manager.get_command("base_velocity"), dim=1)
+
+    # Calculate the penalty (deviation from default pose)
+    joint_pos_error = torch.linalg.norm((asset.data.joint_pos - asset.data.default_joint_pos), dim=1)
+
+    # When the command is to stand still, apply a much larger penalty
+    # The penalty is negative because we want to discourage this behavior.
+    penalty = -joint_pos_error
+    scaled_penalty = -stand_still_scale * joint_pos_error
+
+    return torch.where(cmd_norm < velocity_threshold, scaled_penalty, penalty)
+
+
+# ! look into simplifying the kernel here; it's a little oddly complex
+def base_motion_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize base vertical and roll/pitch velocity"""
+    # extract the used quantities (to enable type-hinting)
+    asset: RigidObject = env.scene[asset_cfg.name]
+    return 0.8 * torch.square(asset.data.root_lin_vel_b[:, 2]) + 0.2 * torch.sum(
+        torch.abs(asset.data.root_ang_vel_b[:, :2]), dim=1
+    )
+
+
+def base_orientation_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize non-flat base orientation
+
+    This is computed by penalizing the xy-components of the projected gravity vector.
+    """
+    # extract the used quantities (to enable type-hinting)
+    asset: RigidObject = env.scene[asset_cfg.name]
+    return torch.linalg.norm((asset.data.projected_gravity_b[:, :2]), dim=1)