diff --git a/.gitignore b/.gitignore index c78e6696..a7793db4 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,9 @@ gym/wandb *.npz user/wandb_config.json *trajectories/ +*.png +scaling_analysis/ +recovery* # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index f9d8b788..ece06842 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -15,6 +15,9 @@ "MiniCheetah": ".mini_cheetah.mini_cheetah", "MiniCheetahRef": ".mini_cheetah.mini_cheetah_ref", "MiniCheetahOsc": ".mini_cheetah.mini_cheetah_osc", + "Horse": ".horse.horse", + "HorseOsc": ".horse.horse_osc", + "HorseOscBelay": ".horse.horse_osc_belay", "MIT_Humanoid": ".mit_humanoid.mit_humanoid", "Anymal": ".anymal_c.anymal", "A1": ".a1.a1", @@ -28,6 +31,9 @@ "MiniCheetahRefCfg": ".mini_cheetah.mini_cheetah_ref_config", "MiniCheetahOscCfg": ".mini_cheetah.mini_cheetah_osc_config", "MiniCheetahSACCfg": ".mini_cheetah.mini_cheetah_SAC_config", + "HorseCfg": ".horse.horse_config", + "HorseOscCfg": ".horse.horse_osc_config", + "HorseOscBelayCfg": ".horse.horse_osc_belay_config", "MITHumanoidCfg": ".mit_humanoid.mit_humanoid_config", "A1Cfg": ".a1.a1_config", "AnymalCFlatCfg": ".anymal_c.flat.anymal_c_flat_config", @@ -42,6 +48,9 @@ "MiniCheetahRefRunnerCfg": ".mini_cheetah.mini_cheetah_ref_config", "MiniCheetahOscRunnerCfg": ".mini_cheetah.mini_cheetah_osc_config", "MiniCheetahSACRunnerCfg": ".mini_cheetah.mini_cheetah_SAC_config", + "HorseRunnerCfg": ".horse.horse_config", + "HorseOscRunnerCfg": ".horse.horse_osc_config", + "HorseOscBelayRunnerCfg": ".horse.horse_osc_belay_config", "MITHumanoidRunnerCfg": ".mit_humanoid.mit_humanoid_config", "A1RunnerCfg": ".a1.a1_config", "AnymalCFlatRunnerCfg": ".anymal_c.flat.anymal_c_flat_config", @@ -68,6 +77,17 @@ "MiniCheetahSACCfg", "MiniCheetahSACRunnerCfg" ], + "horse": ["Horse", "HorseCfg", "HorseRunnerCfg"], + "horse_osc": [ + "HorseOsc", + "HorseOscCfg", + "HorseOscRunnerCfg", + ], + "horse_osc_belay": [ + "HorseOscBelay", + "HorseOscBelayCfg", + "HorseOscBelayRunnerCfg", + ], "humanoid": ["MIT_Humanoid", "MITHumanoidCfg", "MITHumanoidRunnerCfg"], "humanoid_running": [ "HumanoidRunning", diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py index 404c62e4..2344e05f 100644 --- a/gym/envs/base/legged_robot.py +++ b/gym/envs/base/legged_robot.py @@ -517,8 +517,9 @@ def _init_buffers(self): dtype=torch.float, device=self.device, ) + # add height as the 4th command self.commands = torch.zeros( - self.num_envs, 3, dtype=torch.float, device=self.device + self.num_envs, 4, dtype=torch.float, device=self.device ) self.base_lin_vel = quat_rotate_inverse( self.base_quat, self.root_states[:, 7:10] @@ -986,6 +987,31 @@ def _sqrdexp(self, x, scale=1.0): -torch.square(x / scale) / self.cfg.reward_settings.tracking_sigma ) + def _process_rigid_body_props(self, props, env_id): + if env_id == 0: + # * init buffers for the domain rand changes + self.mass = torch.zeros(self.num_envs, 1, device=self.device) + self.com = torch.zeros(self.num_envs, 3, device=self.device) + + # * randomize mass + if self.cfg.domain_rand.randomize_base_mass: + lower = self.cfg.domain_rand.lower_mass_offset + upper = self.cfg.domain_rand.upper_mass_offset + # self.mass_ + props[0].mass += np.random.uniform(lower, upper) + self.mass[env_id] = props[0].mass + # * randomize com position + lower = self.cfg.domain_rand.lower_z_offset + upper = self.cfg.domain_rand.upper_z_offset + props[0].com.z += np.random.uniform(lower, upper) + self.com[env_id, 2] = props[0].com.z + + lower = self.cfg.domain_rand.lower_x_offset + upper = self.cfg.domain_rand.upper_x_offset + props[0].com.x += np.random.uniform(lower, upper) + self.com[env_id, 0] = props[0].com.x + return props + # ------------ reward functions---------------- def _reward_lin_vel_z(self): diff --git a/gym/envs/horse/horse.py b/gym/envs/horse/horse.py new file mode 100644 index 00000000..bb2e545e --- /dev/null +++ b/gym/envs/horse/horse.py @@ -0,0 +1,78 @@ +import torch + +from isaacgym.torch_utils import torch_rand_float +from gym.envs.base.legged_robot import LeggedRobot + + +class Horse(LeggedRobot): + def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): + super().__init__(gym, sim, cfg, sim_params, sim_device, headless) + + def _reward_lin_vel_z(self): + """Penalize z axis base linear velocity with squared exp""" + return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"]) + + def _reward_ang_vel_xy(self): + """Penalize xy axes base angular velocity""" + error = self._sqrdexp(self.base_ang_vel[:, :2] / self.scales["base_ang_vel"]) + return torch.sum(error, dim=1) + + def _reward_orientation(self): + """Penalize non-flat base orientation""" + error = ( + torch.square(self.projected_gravity[:, :2]) + / self.cfg.reward_settings.tracking_sigma + ) + return torch.sum(torch.exp(-error), dim=1) + + def _reward_min_base_height(self): + """Squared exponential saturating at base_height target""" + error = self.base_height - self.cfg.reward_settings.base_height_target + error /= self.scales["base_height"] + error = torch.clamp(error, max=0, min=None).flatten() + return self._sqrdexp(error) + + def _reward_tracking_lin_vel(self): + """Tracking of linear velocity commands (xy axes)""" + # just use lin_vel? + error = self.commands[:, :2] - self.base_lin_vel[:, :2] + # * scale by (1+|cmd|): if cmd=0, no scaling. + error *= 1.0 / (1.0 + torch.abs(self.commands[:, :2])) + error = torch.sum(torch.square(error), dim=1) + return torch.exp(-error / self.cfg.reward_settings.tracking_sigma) + + def _reward_tracking_ang_vel(self): + """Tracking of angular velocity commands (yaw)""" + ang_vel_error = torch.square( + (self.commands[:, 2] - self.base_ang_vel[:, 2]) / 5.0 + ) + return self._sqrdexp(ang_vel_error) + + def _reward_dof_vel(self): + """Penalize dof velocities""" + return torch.sum(self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1) + + def _reward_dof_near_home(self): + return torch.sum( + self._sqrdexp( + (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos_obs"] + ), + dim=1, + ) + + def _resample_commands(self, env_ids): + super()._resample_commands(env_ids) + + # resample height + height_range = self.command_ranges["height"] + self.commands[env_ids, 3] = torch_rand_float( + height_range[0], height_range[1], (len(env_ids), 1), device=self.device + ).squeeze(1) + + def _reward_tracking_height(self): + """Reward for base height.""" + # error between current and commanded height + error = self.base_height.flatten() - self.commands[:, 3].flatten() + error /= self.scales["base_height"] + + return self._sqrdexp(error) diff --git a/gym/envs/horse/horse_config.py b/gym/envs/horse/horse_config.py new file mode 100644 index 00000000..699aff5f --- /dev/null +++ b/gym/envs/horse/horse_config.py @@ -0,0 +1,262 @@ +from gym.envs.base.legged_robot_config import ( + LeggedRobotCfg, + LeggedRobotRunnerCfg, +) + +BASE_HEIGHT_REF = 1.3 + + +class HorseCfg(LeggedRobotCfg): + class env(LeggedRobotCfg.env): + num_envs = 2**12 + num_actuators = 1 + 4 * 5 + episode_length_s = 10 + + class terrain(LeggedRobotCfg.terrain): + mesh_type = "plane" + + class init_state(LeggedRobotCfg.init_state): + default_joint_angles = { + "haa": 0.0, + "hfe": 0.0, + "kfe": 0.0, + "pfe": 0.0, + "pastern_to_foot": 0.0, + "base_joint": 0.0, + } + + # * reset setup chooses how the initial conditions are chosen. + # * "reset_to_basic" = a single position + # * "reset_to_range" = uniformly random from a range defined below + reset_mode = "reset_to_range" + + # * default COM for basic initialization + pos = [0.0, 0.0, 1.4] # x,y,z [m] + rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] + lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] + ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] + + # * initialization for random range setup + # these are the physical limits in the URDF as of 17 Nov 2025 + # dof_pos_range = { + # "haa": [-0.2, 0.2], + # "hfe": [-0.7, 0.6], + # "kfe": [-1.3, 0.1], + # "pfe": [-0.3, 2.2], + # "pastern_to_foot": [-0.3, 1.8], + # "base_joint": [-0.2, 0.2], + # } + dof_pos_range = { + "haa": [-0.2, 0.2], + "hfe": [-0.7, 0.6], + "kfe": [-1.3, 0.1], + "pfe": [-0.3, 2.2], + "pastern_to_foot": [-0.3, 1.8], + "base_joint": [-0.2, 0.2], + } + dof_vel_range = { + "haa": [-0.2, 0.2], + "hfe": [-0.2, 0.2], + "kfe": [-0.2, 0.2], + "pfe": [-0.2, 0.2], + "pastern_to_foot": [-0.2, 0.2], + "base_joint": [-0.2, 0.2], + } + + root_pos_range = [ + [0.0, 0.0], # x + [0.0, 0.0], # y + [1.3, 1.3], # z + [-0.2, 0.2], # roll + [-0.2, 0.2], # pitch + [-0.2, 0.2], # yaw + ] + root_vel_range = [ + [-0.5, 5.0], # x + [0.0, 0.0], # y + [-0.05, 0.05], # z + [0.0, 0.0], # roll + [0.0, 0.0], # pitch + [0.0, 0.0], # yaw + ] + + class control(LeggedRobotCfg.control): + # * PD Drive parameters: + stiffness = { + "haa": 4000, + "hfe": 4000, + "kfe": 4000, + "pfe": 4000, + "pastern_to_foot": 4000, + "base_joint": 50, + } + damping = { + "haa": 250, + "hfe": 250, + "kfe": 250, + "pfe": 250, + "pastern_to_foot": 250, + "base_joint": 10, + } + ctrl_frequency = 250 # how often the PDF controller/action updates run + desired_sim_frequency = 500 # how often the physics is calculated + + class commands: + # * time before command are changed[s] + resampling_time = 3.0 + + class ranges: + lin_vel_x = [-1.0, 8.0] # min max [m/s] + lin_vel_y = 1.0 # max [m/s] + yaw_vel = 6 # max [rad/s] + height = [0.61, 1.30] # m + + class push_robots: + toggle = False + interval_s = 1 + max_push_vel_xy = 0.5 + push_box_dims = [0.3, 0.1, 0.1] # x,y,z [m] + + class domain_rand: + randomize_friction = True + friction_range = [0.5, 5.0] + randomize_base_mass = False + added_mass_range = [-1.0, 1.0] + + class asset(LeggedRobotCfg.asset): + file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf" + foot_name = "foot" + penalize_contacts_on = ["shank"] + terminate_after_contacts_on = ["base"] + end_effector_names = ["foot"] + collapse_fixed_joints = False + self_collisions = 1 + flip_visual_attachments = False + disable_gravity = False + disable_motors = False + joint_damping = 0.3 + fix_base_link = False + rotor_inertia = [0.002268] + 4 * ( + [0.002268, 0.002268, 0.005484, 0.005484, 0.005484] + ) + + class reward_settings(LeggedRobotCfg.reward_settings): + soft_dof_pos_limit = 0.9 + soft_dof_vel_limit = 0.9 + soft_torque_limit = 0.9 + max_contact_force = 600.0 + base_height_target = BASE_HEIGHT_REF + tracking_sigma = 0.25 + + class scaling(LeggedRobotCfg.scaling): + base_ang_vel = 0.3 + base_lin_vel = BASE_HEIGHT_REF + dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) + base_height = BASE_HEIGHT_REF + dof_pos = [0.2] + (4 * [0.4, 1.3, 1.4, 2.5, 2.1]) + dof_pos_obs = dof_pos + dof_pos_target = [2.0 * x for x in dof_pos] # target joint angles + tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300]) # not being used + commands = [3, 1, 3, 1] # add height as a command + + +class HorseRunnerCfg(LeggedRobotRunnerCfg): + seed = -1 + runner_class_name = "OnPolicyRunner" + + class actor(LeggedRobotRunnerCfg.actor): + hidden_dims = [256, 256, 128] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "dof_pos_target", + ] + normalize_obs = True + actions = ["dof_pos_target"] + add_noise = True + disable_actions = False + + class noise: + scale = 1.0 + dof_pos_obs = 0.01 + base_ang_vel = 0.01 + dof_pos = 0.005 + dof_vel = 0.005 + lin_vel = 0.05 + ang_vel = [0.3, 0.15, 0.4] + gravity_vec = 0.1 + + class critic(LeggedRobotRunnerCfg.critic): + hidden_dims = [128, 64] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "dof_pos_target", + ] + normalize_obs = True + + class reward: + class weights: + tracking_lin_vel = 4.0 + tracking_ang_vel = 2.0 + lin_vel_z = 0.0 + ang_vel_xy = 0.01 + orientation = 1.0 + torques = 5.0e-7 + dof_vel = 0.0 + min_base_height = 1.5 + action_rate = 1e-4 + action_rate2 = 1e-5 + stand_still = 0.0 + dof_pos_limits = 0.0 + feet_contact_forces = 0.0 + dof_near_home = 0.0 + tracking_height = 1.5 + + class termination_weight: + termination = 0.01 + + class algorithm(LeggedRobotRunnerCfg.algorithm): + class algorithm: + # both + gamma = 0.99 + lam = 0.95 + # shared + batch_size = 2**15 + max_gradient_steps = 24 + # new + storage_size = 2**17 # new + batch_size = 2**15 # new + + clip_param = 0.2 + learning_rate = 1.0e-3 + max_grad_norm = 1.0 + # Critic + use_clipped_value_loss = True + # Actor + entropy_coef = 0.01 + schedule = "adaptive" # could be adaptive, fixed + desired_kl = 0.01 + lr_range = [2e-4, 1e-2] + lr_ratio = 1.3 + + class runner(LeggedRobotRunnerCfg.runner): + run_name = "" + experiment_name = "horse" + max_iterations = 1000 + algorithm_class_name = "PPO2" + num_steps_per_env = 32 diff --git a/gym/envs/horse/horse_osc.py b/gym/envs/horse/horse_osc.py new file mode 100644 index 00000000..99a6f1bc --- /dev/null +++ b/gym/envs/horse/horse_osc.py @@ -0,0 +1,563 @@ +import torch +from isaacgym.torch_utils import torch_rand_float + +from gym.envs.horse.horse import Horse + +from isaacgym import gymtorch + +HORSE_WEIGHT = 536.38 * 9.81 # Weight of horse in Newtons +BASE_HEIGHT_REF = 1.0 + + +class HorseOsc(Horse): + def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): + super().__init__(gym, sim, cfg, sim_params, sim_device, headless) + + def _init_buffers(self): + super()._init_buffers() + + BASE = 0 + + RH = dict(haa=1, hfe=2, kfe=3, pfe=4, pastern=5) + LH = dict(haa=6, hfe=7, kfe=8, pfe=9, pastern=10) + RF = dict(haa=11, hfe=12, kfe=13, pfe=14, pastern=15) + LF = dict(haa=16, hfe=17, kfe=18, pfe=19, pastern=20) + + # tensors for vectorized ops + self.idx = { + "base": torch.tensor([BASE], device=self.device), + # all legs by joint type (RH, LH, RF, LF order) + "haa": torch.tensor( + [RH["haa"], LH["haa"], RF["haa"], LF["haa"]], device=self.device + ), + "hfe": torch.tensor( + [RH["hfe"], LH["hfe"], RF["hfe"], LF["hfe"]], device=self.device + ), + "kfe": torch.tensor( + [RH["kfe"], LH["kfe"], RF["kfe"], LF["kfe"]], device=self.device + ), + "pfe": torch.tensor( + [RH["pfe"], LH["pfe"], RF["pfe"], LF["pfe"]], device=self.device + ), + "pastern": torch.tensor( + [RH["pastern"], LH["pastern"], RF["pastern"], LF["pastern"]], + device=self.device, + ), + # hind vs front splits (HIND = RH, LH / FRONT = RF, LF) + "hind_haa": torch.tensor([RH["haa"], LH["haa"]], device=self.device), + "hind_hfe": torch.tensor([RH["hfe"], LH["hfe"]], device=self.device), + "hind_kfe": torch.tensor([RH["kfe"], LH["kfe"]], device=self.device), + "hind_pfe": torch.tensor([RH["pfe"], LH["pfe"]], device=self.device), + "hind_pastern": torch.tensor( + [RH["pastern"], LH["pastern"]], device=self.device + ), + "front_haa": torch.tensor([RF["haa"], LF["haa"]], device=self.device), + "front_hfe": torch.tensor([RF["hfe"], LF["hfe"]], device=self.device), + "front_kfe": torch.tensor([RF["kfe"], LF["kfe"]], device=self.device), + "front_pfe": torch.tensor([RF["pfe"], LF["pfe"]], device=self.device), + "front_pastern": torch.tensor( + [RF["pastern"], LF["pastern"]], device=self.device + ), + # legs (all joints within each leg) + "rh_leg": torch.tensor( + [RH["haa"], RH["hfe"], RH["kfe"], RH["pfe"], RH["pastern"]], + device=self.device, + ), + "lh_leg": torch.tensor( + [LH["haa"], LH["hfe"], LH["kfe"], LH["pfe"], LH["pastern"]], + device=self.device, + ), + "rf_leg": torch.tensor( + [RF["haa"], RF["hfe"], RF["kfe"], RF["pfe"], RF["pastern"]], + device=self.device, + ), + "lf_leg": torch.tensor( + [LF["haa"], LF["hfe"], LF["kfe"], LF["pfe"], LF["pastern"]], + device=self.device, + ), + # group by front or hind legs + "hind_legs": torch.tensor( + [ + RH["haa"], + RH["hfe"], + RH["kfe"], + RH["pfe"], + RH["pastern"], + LH["haa"], + LH["hfe"], + LH["kfe"], + LH["pfe"], + LH["pastern"], + ], + device=self.device, + ), + "front_legs": torch.tensor( + [ + RF["haa"], + RF["hfe"], + RF["kfe"], + RF["pfe"], + RF["pastern"], + LF["haa"], + LF["hfe"], + LF["kfe"], + LF["pfe"], + LF["pastern"], + ], + device=self.device, + ), + } + + self.oscillators = torch.zeros(self.num_envs, 4, device=self.device) + self.oscillator_obs = torch.zeros(self.num_envs, 8, device=self.device) + + self.oscillators_vel = torch.zeros_like(self.oscillators) + self.grf = torch.zeros(self.num_envs, 4, device=self.device) + self.osc_omega = self.cfg.osc.omega * torch.ones( + self.num_envs, 1, device=self.device + ) + self.osc_coupling = self.cfg.osc.coupling * torch.ones( + self.num_envs, 1, device=self.device + ) + self.osc_offset = self.cfg.osc.offset * torch.ones( + self.num_envs, 1, device=self.device + ) + + self.commands[:, 3] = 1.0 + + def _reset_oscillators(self, env_ids): + if len(env_ids) == 0: + return + # * random + if self.cfg.osc.init_to == "random": + self.oscillators[env_ids] = torch_rand_float( + 0, + 2 * torch.pi, + shape=self.oscillators[env_ids].shape, + device=self.device, + ) + elif self.cfg.osc.init_to == "standing": + self.oscillators[env_ids] = 3 * torch.pi / 2 + self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) + + def _reset_system(self, env_ids): + if len(env_ids) == 0: + return + self._reset_oscillators(env_ids) + self.oscillator_obs = torch.cat( + (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 + ) + + # * keep some robots in the same starting state as they ended + timed_out_subset = (self.timed_out & ~self.terminated) * ( + torch.rand(self.num_envs, device=self.device) + < self.cfg.init_state.timeout_reset_ratio + ) + + env_ids = (self.terminated | timed_out_subset).nonzero().flatten() + if len(env_ids) == 0: + return + super()._reset_system(env_ids) + self.commands[env_ids, 3] = 1.0 + + def _pre_decimation_step(self): + super()._pre_decimation_step() + # self.grf = self._compute_grf() + self.compute_osc_slope() + + def compute_osc_slope(self): + cmd_x = torch.abs(self.commands[:, 0:1]) - self.cfg.osc.stop_threshold + stop = cmd_x < 0 + + self.osc_offset = stop * self.cfg.osc.offset + self.osc_omega = stop * self.cfg.osc.omega_stop + self.osc_coupling = stop * self.cfg.osc.coupling_stop + + self.osc_omega += (~stop) * torch.clamp( + cmd_x * self.cfg.osc.omega_slope + self.cfg.osc.omega_step, + min=0.0, + max=self.cfg.osc.omega_max, + ) + self.osc_coupling += (~stop) * torch.clamp( + cmd_x * self.cfg.osc.coupling_slope + self.cfg.osc.coupling_step, + min=0.0, + max=self.cfg.osc.coupling_max, + ) + + self.osc_omega = torch.clamp_min(self.osc_omega, 0.1) + self.osc_coupling = torch.clamp_min(self.osc_coupling, 0) + + def _post_decimation_step(self): + """Update all states that are not handled in PhysX""" + super()._post_decimation_step() + self.grf = self._compute_grf() + # self._step_oscillators() + + def _post_physx_step(self): + super()._post_physx_step() + self._step_oscillators(self.dt / self.cfg.control.decimation) + return None + + def _step_oscillators(self, dt=None): + if dt is None: + dt = self.dt + + local_feedback = self.osc_coupling * ( + torch.cos(self.oscillators) + self.osc_offset + ) + grf = self._compute_grf() + self.oscillators_vel = self.osc_omega - grf * local_feedback + self.oscillators_vel += ( + torch.randn(self.oscillators_vel.shape, device=self.device) + * self.cfg.osc.process_noise_std + ) + + self.oscillators_vel *= 2 * torch.pi + self.oscillators += self.oscillators_vel * dt + self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) + self.oscillator_obs = torch.cat( + (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 + ) + + def _resample_commands(self, env_ids): + """Randommly select commands of some environments + + Args: + env_ids (List[int]): Environments ids for which new commands are needed + """ + if len(env_ids) == 0: + return + super()._resample_commands(env_ids) + possible_commands = torch.tensor( + self.command_ranges["lin_vel_x"], device=self.device + ) + self.commands[env_ids, 0:1] = possible_commands[ + torch.randint( + 0, len(possible_commands), (len(env_ids), 1), device=self.device + ) + ] + # add some gaussian noise to the commands + self.commands[env_ids, 0:1] += ( + torch.randn((len(env_ids), 1), device=self.device) * self.cfg.commands.var + ) + + if 0 in self.cfg.commands.ranges.lin_vel_x: + # * with 20% chance, reset to 0 commands except for forward + self.commands[env_ids, 1:] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.8 + ).unsqueeze(1) + # * with 20% chance, reset to 0 commands except for rotation + self.commands[env_ids, :2] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.8 + ).unsqueeze(1) + # * with 10% chance, reset to 0 + self.commands[env_ids, :] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.9 + ).unsqueeze(1) + + if self.cfg.osc.randomize_osc_params: + self._resample_osc_params(env_ids) + + height_min, height_max = self.cfg.commands.ranges.height + self.commands[env_ids, 3] = 1.0 + + def _resample_osc_params(self, env_ids): + if len(env_ids) > 0: + self.osc_omega[env_ids, 0] = torch_rand_float( + self.cfg.osc.omega_range[0], + self.cfg.osc.omega_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + self.osc_coupling[env_ids, 0] = torch_rand_float( + self.cfg.osc.coupling_range[0], + self.cfg.osc.coupling_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + self.osc_offset[env_ids, 0] = torch_rand_float( + self.cfg.osc.offset_range[0], + self.cfg.osc.offset_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + + def perturb_base_velocity(self, velocity_delta, env_ids=None): + if env_ids is None: + env_ids = [range(self.num_envs)] + self.root_states[env_ids, 7:10] += velocity_delta + self.gym.set_actor_root_state_tensor( + self.sim, gymtorch.unwrap_tensor(self.root_states) + ) + + def _compute_grf(self, grf_norm=True): + grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) + if grf_norm: + return torch.clamp_max(grf / HORSE_WEIGHT, 1.0) + else: + return grf + + def _switch(self): + c_vel = torch.linalg.norm(self.commands, dim=1) + return torch.exp( + -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1)) + / self.cfg.reward_settings.switch_scale + ) + + def _switch_height(self): + h_cmd = self.commands[:, 3] + h_err = torch.abs(h_cmd - BASE_HEIGHT_REF) # (num_envs,) + x = torch.clamp(h_err - 0.02, min=0.0) # 2 cm deadzone (tune) + return torch.exp(-(x * x) / self.cfg.reward_settings.switch_scale_height) + + def _reward_lin_vel_z(self): + """Penalize z axis base linear velocity with squared exp""" + return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"][0]) + + def _reward_ang_vel_xy(self): + """Penalize xy axes base angular velocity""" + error = self._sqrdexp(self.base_ang_vel[:, :2] / self.scales["base_ang_vel"][0]) + return torch.sum(error, dim=1) + + # modified this method so joint index list uses horse 5 DOF legs, not 3 DOF + def _reward_cursorial(self): + """ + Encourage legs under body, joints stay neutral, avoid inverted/folded poses + Penalize key joints drifting from home. For a horse with 5 joints per leg, + we penalize (haa, hfe, kfe) for each of the 4 legs. + Distal joint (pfe, pastern_to_foot) naturally flex a lot, shock absorbers + """ + legs = torch.tensor(list(range(1, 21)), device=self.device) + return ( + -torch.mean(torch.square(self.dof_pos[:, legs]), dim=1) + * self._switch_height() + ) + + def _reward_swing_grf(self): + # Reward non-zero grf during swing (0 to pi) + rew = self.get_swing_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool) + return -torch.sum(rew, dim=1) * self._switch_height() + + def _reward_stance_grf(self): + # Reward non-zero grf during stance (pi to 2pi) + rew = self.get_stance_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool) + return torch.sum(rew, dim=1) * self._switch_height() + + def get_swing_grf(self, osc_bool=False, contact_bool=False): + if osc_bool: + phase = torch.lt(self.oscillators, torch.pi).int() + else: + phase = torch.maximum( + torch.zeros_like(self.oscillators), torch.sin(self.oscillators) + ) + if contact_bool: + return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold) + else: + return phase * self._compute_grf() + + def get_stance_grf(self, osc_bool=False, contact_bool=False): + if osc_bool: + phase = torch.gt(self.oscillators, torch.pi).int() + else: + phase = torch.maximum( + torch.zeros_like(self.oscillators), -torch.sin(self.oscillators) + ) + if contact_bool: + return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold) + else: + return phase * self._compute_grf() + + def _reward_coupled_grf(self): + """ + Multiply rewards for stance/swing grf, discount when undesirable + behavior (grf during swing, no grf during stance) + """ + swing_rew = self.get_swing_grf() + stance_rew = self.get_stance_grf() + combined_rew = self._sqrdexp(swing_rew * 2) + stance_rew + prod = torch.prod(torch.clip(combined_rew, 0, 1), dim=1) + return prod - torch.ones_like(prod) + + def _reward_dof_vel(self): + """Penalize dof velocities""" + return super()._reward_dof_vel() * self._switch() + + def _reward_dof_near_home(self): + return super()._reward_dof_near_home() * self._switch() * self._switch_height() + + def _reward_stand_still(self): + """Penalize any movement when at zero commands.""" + # normalize angles so we care about being within ~5 degrees + rew_pos = torch.mean( + self._sqrdexp((self.dof_pos - self.default_dof_pos) / torch.pi * 36), dim=1 + ) + rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1) + rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1) + rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1) + return ( + (rew_vel + rew_pos - rew_base_vel) * self._switch() * self._switch_height() + ) + + def _reward_standing_torques(self): + """Penalize torques at zero commands""" + return super()._reward_torques() * self._switch() * self._switch_height() + + # * gait similarity scores + def angle_difference(self, theta1, theta2): + diff = torch.abs(theta1 - theta2) % (2 * torch.pi) + return torch.min(diff, 2 * torch.pi - diff) + + def _reward_tracking_height(self): + """Reward for base height.""" + # error between current and commanded height + error = self.base_height.flatten() - self.commands[:, 3].flatten() + return -(error**2) + + def _lerp_clamped(self, x, x0, x1, y0, y1): + """Linear interpolation y(x) between (x0,y0)->(x1,y1), with clamping.""" + denom = x1 - x0 + denom = torch.where(torch.abs(denom) < 1e-8, torch.ones_like(denom), denom) + t = (x - x0) / denom + t = torch.clamp(t, 0.0, 1.0) + return y0 + t * (y1 - y0) + + def _piecewise_2seg(self, x, mid, x_lo, x_hi, y_lo, y_mid, y_hi): + """ + Piecewise linear passing through: + (x=mid, y=y_mid) + and reaching: + (x=x_lo, y=y_lo) for x <= mid + (x=x_hi, y=y_hi) for x >= mid + """ + y_left = self._lerp_clamped(x, mid, x_lo, y_mid, y_lo) + y_right = self._lerp_clamped(x, mid, x_hi, y_mid, y_hi) + return torch.where(x <= mid, y_left, y_right) + + def _reward_hind_kfe_tendon(self): + # hind hfe: 0 -> -1.5 => kfe: 0 -> +1.0 + hfe_hind = self.dof_pos[:, self.idx["hind_hfe"]] # (N,2) + kfe_hind = self.dof_pos[:, self.idx["hind_kfe"]] # (N,2) + + kfe_hind_des = self._piecewise_2seg( + hfe_hind, + mid=torch.zeros_like(hfe_hind), + x_lo=-1.5 * torch.ones_like(hfe_hind), + x_hi=+0.5 * torch.ones_like(hfe_hind), + y_lo=+1.0 * torch.ones_like(hfe_hind), + y_mid=0.0 * torch.ones_like(hfe_hind), + y_hi=-0.2 * torch.ones_like(hfe_hind), + ) + + err = (kfe_hind - kfe_hind_des) ** 2 + return -err.mean(dim=1) + + def _reward_hind_pfe_tendon(self): + # hind hfe: 0 -> -1.5 => pfe: 0 -> -1.2 + hfe_hind = self.dof_pos[:, self.idx["hind_hfe"]] # (N,2) + pfe_hind = self.dof_pos[:, self.idx["hind_pfe"]] # (N,2) + + pfe_hind_des = self._piecewise_2seg( + hfe_hind, + mid=torch.zeros_like(hfe_hind), + x_lo=-1.5 * torch.ones_like(hfe_hind), + x_hi=+0.5 * torch.ones_like(hfe_hind), + y_lo=-1.2 * torch.ones_like(hfe_hind), + y_mid=0.0 * torch.ones_like(hfe_hind), + y_hi=-0.5 * torch.ones_like(hfe_hind), + ) + + err = (pfe_hind - pfe_hind_des) ** 2 + return -err.mean(dim=1) + + def _reward_front_kfe_tendon(self): + # front hfe: 0 -> +0.6 => kfe: 0 -> 0, pfe: 0 -> 0 + # front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +1.5 + # front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +3.0 + hfe_front = self.dof_pos[:, self.idx["front_hfe"]] # (N,2) + kfe_front = self.dof_pos[:, self.idx["front_kfe"]] # (N,2) + + zeros = torch.zeros_like(hfe_front) + ones = torch.ones_like(hfe_front) + + kfe_front_des = self._piecewise_2seg( + hfe_front, + mid=zeros, + x_lo=-1.0 * ones, + x_hi=+0.6 * ones, + y_lo=-1.5 * ones, + y_mid=0.0 * ones, + y_hi=0.0 * ones, + ) + + err = (kfe_front - kfe_front_des) ** 2 + return -err.mean(dim=1) + + def _reward_front_pfe_tendon(self): + # front hfe: 0 -> +0.6 => kfe: 0 -> 0, pfe: 0 -> 0 + # front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +1.5 + # front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +3.0 + hfe_front = self.dof_pos[:, self.idx["front_hfe"]] # (N,2) + pfe_front = self.dof_pos[:, self.idx["front_pfe"]] # (N,2) + + zeros = torch.zeros_like(hfe_front) + ones = torch.ones_like(hfe_front) + + pfe_front_des_15 = self._piecewise_2seg( + hfe_front, + mid=zeros, + x_lo=-1.0 * ones, + x_hi=+0.6 * ones, + y_lo=+1.5 * ones, + y_mid=0.0 * ones, + y_hi=0.0 * ones, + ) + + pfe_front_des_30 = self._piecewise_2seg( + hfe_front, + mid=zeros, + x_lo=-1.0 * ones, + x_hi=+0.6 * ones, + y_lo=+3.0 * ones, + y_mid=0.0 * ones, + y_hi=0.0 * ones, + ) + + err15 = (pfe_front - pfe_front_des_15) ** 2 + err30 = (pfe_front - pfe_front_des_30) ** 2 + err = torch.minimum(err15, err30) + + return -err.mean(dim=1) + + def _reward_feet_contact_count(self): + grf = self._compute_grf(grf_norm=True) + contact = (grf > self.cfg.osc.grf_threshold).float() + return contact.mean(dim=1) + + def _reward_feet_support_upright(self): + grf = self._compute_grf(grf_norm=True) + support = torch.min(grf, dim=1).values + return support * self._switch_height() + + def _reward_hfe_upright(self): + # commanded height + h_cmd = self.commands[:, 3] + + # gate: only active near standing height + h_err = torch.abs(h_cmd - BASE_HEIGHT_REF) + gate = torch.exp(-(h_err**2) / self.cfg.reward_settings.switch_scale_height) + + # HFE joints (both legs) + hfe = self.dof_pos[:, self.idx["hind_hfe"]] # (N, 2) just the hind legs for now + + # want HFE -> 0 + err = hfe**2 # squared error to 0 + + rew = torch.exp(-err / 0.5) + + # average across legs + rew = rew.mean(dim=1) + + return gate * rew diff --git a/gym/envs/horse/horse_osc_belay.py b/gym/envs/horse/horse_osc_belay.py new file mode 100644 index 00000000..0f3bfb46 --- /dev/null +++ b/gym/envs/horse/horse_osc_belay.py @@ -0,0 +1,356 @@ +import torch +from isaacgym import gymtorch, gymapi + +from gym.envs.horse.horse_osc import HorseOsc + + +class HorseOscBelay(HorseOsc): + def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): + super().__init__(gym, sim, cfg, sim_params, sim_device, headless) + + def _init_buffers(self): + super()._init_buffers() + self._init_perturbation_buffers() + self._init_belay_buffers() + + def _init_belay_buffers(self): + # per-env belay state buffers + self.belay_enabled = torch.full( + (self.num_envs,), + self.cfg.belay.start_enabled, + device=self.device, + dtype=torch.bool, + ) + + # belay force per env + self.belay_force = torch.full( + (self.num_envs,), + self.cfg.belay.mass_kg * 9.81, + device=self.device, + dtype=torch.float, + ) + + # normalized command in [0, 1] + self.belay_force_scale = torch.ones( + self.num_envs, + device=self.device, + dtype=torch.float, + ) + + # per-env belay force vector [Fx, Fy, Fz] + self.belay_force_vec = torch.zeros( + (self.num_envs, 3), + device=self.device, + dtype=torch.float, + ) + + # rigid-body force tensors + self.num_sim_bodies = self.gym.get_sim_rigid_body_count(self.sim) + + self.rb_forces = torch.zeros( + (self.num_sim_bodies, 3), + device=self.device, + dtype=torch.float, + ) + self.rb_torques = torch.zeros_like(self.rb_forces) + + # determine rigid body to pull on + self.belay_body_name = "front_base" + + self.front_base_local_id = self.gym.find_actor_rigid_body_handle( + self.envs[0], + self.actor_handles[0], + self.belay_body_name, + ) + + print( + f"[BELAY] local body index for '{self.belay_body_name}' = " + f"{self.front_base_local_id}" + ) + + self.base_body_ids = torch.zeros( + self.num_envs, + device=self.device, + dtype=torch.long, + ) + + for i in range(self.num_envs): + sim_body_id = self.gym.get_actor_rigid_body_index( + self.envs[i], + self.actor_handles[i], + self.front_base_local_id, + gymapi.DOMAIN_SIM, + ) + self.base_body_ids[i] = sim_body_id + + print("[BELAY] sim body ids:", self.base_body_ids[:5]) + + # rigid body state tensor: position is columns 0:3 + rb_state_tensor = self.gym.acquire_rigid_body_state_tensor(self.sim) + self.rb_states = gymtorch.wrap_tensor(rb_state_tensor).view(-1, 13) + + # make sure rb_states are current before setting anchor + self.gym.refresh_rigid_body_state_tensor(self.sim) + + # fixed overhead anchor point for each env + # starts above the initial front_base position + self.belay_anchor_pos = torch.zeros( + (self.num_envs, 3), + device=self.device, + dtype=torch.float, + ) + + initial_body_pos = self.rb_states[self.base_body_ids, 0:3] + self.belay_anchor_pos[:] = initial_body_pos + + # vertical offset above the initial body position + self.belay_anchor_pos[:, 2] += self.cfg.belay.anchor_height + + print("[BELAY] anchor pos:", self.belay_anchor_pos[:5]) + + self.pending_belay_anchor_reset = torch.zeros( + self.num_envs, device=self.device, dtype=torch.bool + ) + + max_latency = self.cfg.perturbations.latency_steps + + self.action_delay_buffer = torch.zeros( + self.num_envs, + max_latency + 1, + self.num_dof, + device=self.device, + dtype=torch.float, + ) + + def _reset_system(self, env_ids): + super()._reset_system(env_ids) + + self._reset_belay_anchor(env_ids) + self.pending_belay_anchor_reset[env_ids] = True + + def _post_physx_step(self): + super()._post_physx_step() + + pending_ids = torch.nonzero(self.pending_belay_anchor_reset).flatten() + if len(pending_ids) > 0: + self.gym.refresh_rigid_body_state_tensor(self.sim) + self._reset_belay_anchor(pending_ids) + self.pending_belay_anchor_reset[pending_ids] = False + + self._apply_belay_force() + return None + + def _reset_belay_anchor(self, env_ids): + self.gym.refresh_rigid_body_state_tensor(self.sim) + + front_base_pos = self.rb_states[self.base_body_ids[env_ids], 0:3] + + self.belay_anchor_pos[env_ids, 0] = front_base_pos[:, 0] + self.belay_anchor_pos[env_ids, 1] = front_base_pos[:, 1] + self.belay_anchor_pos[env_ids, 2] = ( + front_base_pos[:, 2] + self.cfg.belay.anchor_height + ) + + def _update_belay_force_buffer(self): + self.belay_force_vec.zero_() + + active = self.belay_enabled.float() + force_mag = active * self.belay_force_scale * self.belay_force # (N,) + + if self.cfg.belay.mode == "vertical": + unit_direction = torch.zeros((self.num_envs, 3), device=self.device) + unit_direction[:, 2] = 1.0 + + elif self.cfg.belay.mode == "horizontal": + body_pos = self.rb_states[self.base_body_ids, 0:3] + direction = self.belay_anchor_pos - body_pos + direction[:, 2] = 0.0 + norm = torch.norm(direction, dim=1, keepdim=True).clamp(min=1e-6) + unit_direction = direction / norm + + elif self.cfg.belay.mode == "anchor": + body_pos = self.rb_states[self.base_body_ids, 0:3] + direction = self.belay_anchor_pos - body_pos + norm = torch.norm(direction, dim=1, keepdim=True).clamp(min=1e-6) + unit_direction = direction / norm + + else: + raise ValueError(f"Unknown belay mode: {self.cfg.belay.mode}") + + self.belay_force_vec[:] = unit_direction * force_mag.unsqueeze(1) + + def _apply_belay_force(self): + # map per-env belay force vectors into rigid-body force tensor and apply them + self.gym.refresh_rigid_body_state_tensor(self.sim) + + self.rb_forces.zero_() + self.rb_torques.zero_() + + self._update_belay_force_buffer() + + # per-env body force into global sim rigid body tensor + self.rb_forces[self.base_body_ids] = self.belay_force_vec + + if torch.any(self.belay_enabled): + self.gym.apply_rigid_body_force_tensors( + self.sim, + gymtorch.unwrap_tensor(self.rb_forces), + gymtorch.unwrap_tensor(self.rb_torques), + gymapi.ENV_SPACE, + ) + + def toggle_belay(self, env_ids=None): + # toggle belay on/off. + if env_ids is None: + self.belay_enabled = ~self.belay_enabled + print( + f"all envs toggled | any_on = {bool(torch.any(self.belay_enabled))} | " + f"force = {self.cfg.belay.mass_kg * 9.81:.1f} N" + ) + else: + self.belay_enabled[env_ids] = ~self.belay_enabled[env_ids] + print( + f"[BELAY] toggled env_ids={env_ids.tolist()} | " + f"force = {self.cfg.belay.mass_kg * 9.81:.1f} N" + ) + + def set_belay(self, enabled: bool, env_ids=None): + # set belay state + if env_ids is None: + self.belay_enabled[:] = enabled + else: + self.belay_enabled[env_ids] = enabled + + def debug_print_belay(self, env_id=0): + print( + f"env={env_id} | enabled={bool(self.belay_enabled[env_id])} | " + f"body_id={self.base_body_ids[env_id].item()} | " + f"force_vec={self.belay_force_vec[env_id].detach().cpu().numpy()}" + ) + + def draw_belay_debug(self, force_scale=0.01): + if self.viewer is None: + return + + self.gym.clear_lines(self.viewer) + self.gym.refresh_rigid_body_state_tensor(self.sim) + + body_pos = self.rb_states[self.base_body_ids, 0:3] + + active = self.belay_enabled.float() + force_mag = active * self.belay_force_scale * self.belay_force + + mode = self.cfg.belay.mode + + if mode == "vertical": + unit_direction = torch.zeros((self.num_envs, 3), device=self.device) + unit_direction[:, 2] = 1.0 + + elif mode == "horizontal": + direction = self.belay_anchor_pos - body_pos + direction[:, 2] = 0.0 + norm = torch.norm(direction, dim=1, keepdim=True).clamp(min=1e-6) + unit_direction = direction / norm + + elif mode == "anchor": + direction = self.belay_anchor_pos - body_pos + norm = torch.norm(direction, dim=1, keepdim=True).clamp(min=1e-6) + unit_direction = direction / norm + + else: + raise ValueError(f"Unknown belay mode: {mode}") + + # Red: intended belay direction scaled by force magnitude + red_end = body_pos + force_scale * force_mag.unsqueeze(1) * unit_direction + + # Green: actual applied force vector + green_end = body_pos + force_scale * self.belay_force_vec + + red = [1.0, 0.0, 0.0] + green = [0.0, 1.0, 0.0] + + for i in range(self.num_envs): + self.gym.add_lines( + self.viewer, + self.envs[i], + 1, + [ + body_pos[i, 0].item(), + body_pos[i, 1].item(), + body_pos[i, 2].item(), + red_end[i, 0].item(), + red_end[i, 1].item(), + red_end[i, 2].item(), + ], + red, + ) + + self.gym.add_lines( + self.viewer, + self.envs[i], + 1, + [ + body_pos[i, 0].item(), + body_pos[i, 1].item(), + body_pos[i, 2].item(), + green_end[i, 0].item(), + green_end[i, 1].item(), + green_end[i, 2].item(), + ], + green, + ) + + def _compute_torques(self): + torques = super()._compute_torques() + + if ( + self.cfg.perturbations.enabled + and self.cfg.perturbations.reduced_torque_enabled + ): + torques = torques * self.cfg.perturbations.torque_scale + + return torques + + def _apply_target_latency(self): + if not ( + self.cfg.perturbations.enabled and self.cfg.perturbations.latency_enabled + ): + return + + delay = self.cfg.perturbations.latency_steps + + # shift history buffer back + self.dof_pos_target_delay_buffer[:, 1:] = self.dof_pos_target_delay_buffer[ + :, :-1 + ].clone() + + # newest target goes in front + self.dof_pos_target_delay_buffer[:, 0] = self.dof_pos_target.clone() + + # apply delayed target + self.dof_pos_target = self.dof_pos_target_delay_buffer[:, delay] + + def _pre_compute_torques(self): + super()._pre_compute_torques() + + if self.cfg.perturbations.enabled: + if self.cfg.perturbations.latency_enabled: + self._apply_target_latency() + + def _init_perturbation_buffers(self): + max_latency = self.cfg.perturbations.max_latency_steps + + self.dof_pos_target_delay_buffer = torch.zeros( + self.num_envs, + max_latency + 1, + self.num_dof, + device=self.device, + dtype=torch.float, + ) + + def _pre_decimation_step(self): + super()._pre_decimation_step() + + if self.cfg.perturbations.enabled: + if self.cfg.perturbations.latency_enabled: + self._apply_target_latency() diff --git a/gym/envs/horse/horse_osc_belay_config.py b/gym/envs/horse/horse_osc_belay_config.py new file mode 100644 index 00000000..bfeb2d09 --- /dev/null +++ b/gym/envs/horse/horse_osc_belay_config.py @@ -0,0 +1,35 @@ +from gym.envs.horse.horse_osc_config import ( + HorseOscCfg, + HorseOscRunnerCfg, +) + + +class HorseOscBelayCfg(HorseOscCfg): + class belay: + body_name = "front_base" + mass_kg = 20.0 + force_n = mass_kg * 9.81 + keyboard_toggle = True + toggle_all_envs = True + start_enabled = True + debug_print = True + anchor_height = 2.0 + mode = "vertical" # "anchor", "vertical", "horizontal" + + class perturbations: + enabled = False + + reduced_torque_enabled = False + torque_scale = 1.0 # 1.0 normal, 0.5 = 50% strength + + latency_enabled = False + latency_steps = 0 # number of control steps delay + max_latency_steps = 10 + + motor_noise_enabled = False + motor_noise_std = 0.0 + + +class HorseOscBelayRunnerCfg(HorseOscRunnerCfg): + class runner(HorseOscRunnerCfg.runner): + experiment_name = "horse_osc_belay" diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py new file mode 100644 index 00000000..936417c2 --- /dev/null +++ b/gym/envs/horse/horse_osc_config.py @@ -0,0 +1,330 @@ +from gym.envs.horse.horse_config import ( + HorseCfg, + HorseRunnerCfg, +) + +BASE_HEIGHT_REF = 1.0 + + +class HorseOscCfg(HorseCfg): + class env(HorseCfg.env): + num_envs = 2**12 + num_actuators = 1 + 4 * 5 + episode_length_s = 10 + + class terrain(HorseCfg.terrain): + mesh_type = "plane" + # mesh_type = 'trimesh' # none, plane, heightfield or trimesh + + class init_state(HorseCfg.init_state): + timeout_reset_ratio = 0.75 + reset_mode = "reset_to_range" + # * default COM for basic initialization + pos = [0.0, 0.0, 0.6] # x,y,z [m] + rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] + lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] + ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] + default_joint_angles = { + "haa": 0.0, + "hfe": 0.0, + "kfe": 0.0, + "pfe": 0.0, + "pastern_to_foot": 0.0, + "base_joint": 0.0, + } + + # * initialization for random range setup + # these are the physical limits in the URDF as of 11 Dec 2025 + # dof_pos_range = { + # haa": [-0.2, 0.2], + # f_hfe": [-1.0, 0.6], + # h_hfe": [-1.5, 0.5], + # f_kfe": [-1.5, 0.1], + # h_kfe": [-0.2, 1.0], + # f_pfe": [-0.3, 3.0], + # h_pfe": [-1.2, 2.5], + # f_pastern_to_foot": [-0.3, 1.8], + # h_pastern_to_foot": [-0.3, 1.8], + # base_joint": [-0.2, 0.2], + # } + # dof_pos_range = { + # "haa": [-0.2, 0.2], + # "hfe": [-1.0, 0.5], + # "kfe": [-0.2, 0.1], + # "pfe": [-0.3, 2.5], + # "pastern_to_foot": [-0.3, 1.8], + # "base_joint": [-0.0, 0.0], + # } + dof_pos_range = { + "haa": [-0.2, 0.2], + "h_hfe": [-1.5, -1.5], + "f_hfe": [-1.0, -1.0], + "h_kfe": [1.0, 1.0], + "f_kfe": [-1.5, -1.5], + "h_pfe": [-1.2, -1.2], + "f_pfe": [1.5, 1.5], + "pastern_to_foot": [-0.3, 1.8], + "base_joint": [-0.0, 0.0], + } + dof_vel_range = { + "haa": [-0.2, 0.2], + "hfe": [-0.2, 0.2], + "kfe": [-0.2, 0.2], + "pfe": [-0.2, 0.2], + "pastern_to_foot": [-0.2, 0.2], + "base_joint": [-0.2, 0.2], + } + + root_pos_range = [ + [0.0, 0.0], # x + [0.0, 0.0], # y + [0.6, 0.6], # z + [-0.0, 0.0], # roll + [-0.0, 0.0], # pitch + [-0.2, 0.2], # yaw + ] + root_vel_range = [ + [0.0, 0.0], # x + [0.0, 0.0], # y + [0.0, 0.0], # z + [0.0, 0.0], # roll + [0.0, 0.0], # pitch + [0.0, 0.0], # yaw + ] + + class control(HorseCfg.control): + # * PD Drive parameters: + # HorseOscCfg.control + stiffness = { + "haa": 75, + "hfe": 40, + "kfe": 38, + "pfe": 10, + "pastern_to_foot": 5, + "base_joint": 50, # still needs modifying + } + damping = { + "haa": 0.001, + "hfe": 0.001, + "kfe": 0.001, + "pfe": 0.5, + "pastern_to_foot": 0.5, + "base_joint": 10, + } + ctrl_frequency = 250 # how often the PDF controller/action updates run + desired_sim_frequency = 1000 # how often the physics is calculated + + class osc: # <-------------------most likely needs tuning + process_noise_std = 0.25 + grf_threshold = 0.1 # 20. # Normalized to body weight + # oscillator parameters + omega = 3 # gets overwritten + coupling = 1 # gets overwritten + osc_bool = False # not used in paper + grf_bool = False # not used in paper + randomize_osc_params = False + omega_range = [1.0, 4.0] # [0.0, 10.] + coupling_range = [0.0, 1.0] + offset_range = [0.0, 0.0] + stop_threshold = 0.5 + omega_stop = 1.0 + omega_step = 2.0 + omega_slope = 1.0 + omega_max = 4.0 + # coupling_step = 0. + # coupling_stop = 0. + coupling_stop = 4.0 + coupling_step = 1.0 + coupling_slope = 0.0 + coupling_max = 1.0 + offset = 1.0 + + init_to = "random" + + class commands: + resampling_time = 3.0 # * time before command are changed[s] + var = 1.0 + + class ranges: + lin_vel_x = [0.0, 0.0] # min max [m/s] + lin_vel_y = 0.0 # max [m/s] + yaw_vel = 0 # max [rad/s] + height = [0.6, 1.0] # m + + class push_robots: + toggle = False + interval_s = 1 + max_push_vel_xy = 0.5 + push_box_dims = [0.3, 0.1, 0.1] # x,y,z [m] + + class domain_rand: + randomize_friction = True + friction_range = [0.4, 1.0] + randomize_base_mass = False + lower_mass_offset = -0.5 # kg + upper_mass_offset = 2.0 + lower_z_offset = 0.0 # m + upper_z_offset = 0.2 + lower_x_offset = 0.0 + upper_x_offset = 0.0 + + class asset(HorseCfg.asset): + shank_length_diff = 0 # Units in cm + file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf" + foot_name = "foot" + penalize_contacts_on = ["thigh"] # "thigh", "shank", "pastern" + terminate_after_contacts_on = ["top"] + collapse_fixed_joints = False + fix_base_link = False + self_collisions = 1 + flip_visual_attachments = False + disable_gravity = False + disable_motors = False + joint_damping = 0.3 + + class reward_settings(HorseCfg.reward_settings): + soft_dof_pos_limit = 0.9 + soft_dof_vel_limit = 0.9 + soft_torque_limit = 0.9 + max_contact_force = 3000.0 # testing, this was too low for a horse weight + base_height_target = 1.0 + 0.03 + tracking_sigma = 0.25 + switch_scale = 0.5 + switch_scale_height = 0.05 # drops to near 0 when cmd_h is 0.6 + + class scaling(HorseCfg.scaling): + base_ang_vel = [0.3, 0.3, 0.1] + base_lin_vel = BASE_HEIGHT_REF + dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) + base_height = BASE_HEIGHT_REF + dof_pos = [0.2] + ( + 4 * [0.4, 1.3, 1.4, 2.5, 2.1] + ) # reducing this to be alot smaller 2.1 or 2.5 + dof_pos_obs = dof_pos + # dof_pos_target = [2.0 * x for x in dof_pos] # target joint angles + dof_pos_target = [0.4] + (4 * [0.8, 2.6, 2.8, 5.0, 4.2]) + tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300]) # not being used + commands = [3, 1, 3, 1] # add height as a command + + +class HorseOscRunnerCfg(HorseRunnerCfg): + seed = -1 + runner_class_name = "OnPolicyRunner" + + class actor(HorseRunnerCfg.actor): + hidden_dims = [256, 256, 128] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "oscillator_obs", + "dof_pos_target", + # "osc_omega", + # "osc_coupling" + # "oscillators_vel", + # "grf", + ] + normalize_obs = True + actions = ["dof_pos_target"] + add_noise = True + disable_actions = False + + class noise: + scale = 1.0 + dof_pos_obs = 0.01 + base_ang_vel = 0.01 + dof_pos = 0.005 + dof_vel = 0.005 + lin_vel = 0.05 + ang_vel = [0.3, 0.15, 0.4] + gravity_vec = 0.1 + + class critic(HorseRunnerCfg.critic): + hidden_dims = [128, 64] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "oscillator_obs", + "oscillators_vel", + "dof_pos_target", + ] + normalize_obs = True + + class reward: + class weights: + tracking_lin_vel = 4.0 + tracking_ang_vel = 2.0 + lin_vel_z = 0.0 + ang_vel_xy = 0.01 + orientation = 4.0 + torques = 5.0e-10 + dof_vel = 0.0 + min_base_height = 0.0 + action_rate = 1e-5 + action_rate2 = 1e-6 + stand_still = 0.0 + dof_pos_limits = 0.0 + feet_contact_forces = 0.0 + dof_near_home = 0.0 + swing_grf = 5.0 + stance_grf = 5.0 + swing_velocity = 0.0 + stance_velocity = 0.0 + coupled_grf = 1.0 # penalize for grf during swing, no grf during stance + enc_pace = 0.0 + cursorial = 1.0 # enourage legs to stay under body, don't splay out + standing_torques = 0.0 # 1.e-5 + tracking_height = 10.0 + hind_kfe_tendon = 2.0 + hind_pfe_tendon = 2.0 + front_kfe_tendon = 3 + front_pfe_tendon = 3 + feet_contact_count = 3 + feet_support_upright = 10 + hfe_upright = 10 + + class termination_weight: + termination = 50.0 / 100.0 + + class algorithm: + # both + gamma = 0.99 + lam = 0.95 + # shared + batch_size = 2**15 + max_gradient_steps = 24 + # new + storage_size = 2**17 # new + batch_size = 2**15 # new + + clip_param = 0.2 + learning_rate = 1.0e-3 + max_grad_norm = 1.0 + # Critic + use_clipped_value_loss = True + # Actor + entropy_coef = 0.01 + schedule = "adaptive" # could be adaptive, fixed + desired_kl = 0.01 + lr_range = [2e-4, 1e-2] + lr_ratio = 1.3 + + class runner(HorseRunnerCfg.runner): + run_name = "" + experiment_name = "horse_osc" + max_iterations = 1000 + algorithm_class_name = "PPO2" + num_steps_per_env = 32 diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index 3dc1507f..7608e080 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -119,7 +119,7 @@ class scaling(LeggedRobotCfg.scaling): dof_pos_obs = dof_pos dof_pos_target = 4 * [0.2, 0.3, 0.3] tau_ff = 4 * [18, 18, 28] - commands = [3, 1, 3] + commands = [3, 1, 3, 1] class MiniCheetahRunnerCfg(LeggedRobotRunnerCfg): diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc.py b/gym/envs/mini_cheetah/mini_cheetah_osc.py index 77866ca3..f2b64409 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_osc.py +++ b/gym/envs/mini_cheetah/mini_cheetah_osc.py @@ -1,5 +1,4 @@ import torch -import numpy as np from isaacgym.torch_utils import torch_rand_float from gym.envs.mini_cheetah.mini_cheetah import MiniCheetah @@ -129,31 +128,6 @@ def compute_osc_slope(self): self.osc_omega = torch.clamp_min(self.osc_omega, 0.1) self.osc_coupling = torch.clamp_min(self.osc_coupling, 0) - def _process_rigid_body_props(self, props, env_id): - if env_id == 0: - # * init buffers for the domain rand changes - self.mass = torch.zeros(self.num_envs, 1, device=self.device) - self.com = torch.zeros(self.num_envs, 3, device=self.device) - - # * randomize mass - if self.cfg.domain_rand.randomize_base_mass: - lower = self.cfg.domain_rand.lower_mass_offset - upper = self.cfg.domain_rand.upper_mass_offset - # self.mass_ - props[0].mass += np.random.uniform(lower, upper) - self.mass[env_id] = props[0].mass - # * randomize com position - lower = self.cfg.domain_rand.lower_z_offset - upper = self.cfg.domain_rand.upper_z_offset - props[0].com.z += np.random.uniform(lower, upper) - self.com[env_id, 2] = props[0].com.z - - lower = self.cfg.domain_rand.lower_x_offset - upper = self.cfg.domain_rand.upper_x_offset - props[0].com.x += np.random.uniform(lower, upper) - self.com[env_id, 0] = props[0].com.x - return props - def _post_decimation_step(self): """Update all states that are not handled in PhysX""" super()._post_decimation_step() @@ -174,19 +148,13 @@ def _step_oscillators(self, dt=None): ) grf = self._compute_grf() self.oscillators_vel = self.osc_omega - grf * local_feedback - # self.oscillators_vel *= torch_rand_float(0.9, - # 1.1, - # self.oscillators_vel.shape, - # self.device) self.oscillators_vel += ( torch.randn(self.oscillators_vel.shape, device=self.device) * self.cfg.osc.process_noise_std ) self.oscillators_vel *= 2 * torch.pi - self.oscillators += ( - self.oscillators_vel * dt - ) # torch.clamp(self.oscillators_vel * dt, min=0) + self.oscillators += self.oscillators_vel * dt self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) self.oscillator_obs = torch.cat( (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 @@ -214,17 +182,6 @@ def _resample_commands(self, env_ids): torch.randn((len(env_ids), 1), device=self.device) * self.cfg.commands.var ) - # possible_commands = torch.tensor(self.command_ranges["lin_vel_y"], - # device=self.device) - # self.commands[env_ids, 1:2] = possible_commands[torch.randint( - # 0, len(possible_commands), (len(env_ids), 1), - # device=self.device)] - # possible_commands = torch.tensor(self.command_ranges["yaw_vel"], - # device=self.device) - # self.commands[env_ids, 0:1] = possible_commands[torch.randint( - # 0, len(possible_commands), (len(env_ids), 1), - # device=self.device)] - if 0 in self.cfg.commands.ranges.lin_vel_x: # * with 20% chance, reset to 0 commands except for forward self.commands[env_ids, 1:] *= ( diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py index 2c32f06b..e8796147 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py @@ -163,7 +163,7 @@ class scaling(MiniCheetahCfg.scaling): dof_pos_target = 4 * [0.2, 0.3, 0.3] tau_ff = 4 * [18, 18, 28] # hip-abad, hip-pitch, knee # commands = [base_lin_vel, base_lin_vel, base_ang_vel] - commands = [3, 1, 3] # [base_lin_vel, base_lin_vel, base_ang_vel] + commands = [3, 1, 3, 1] # [base_lin_vel, base_lin_vel, base_ang_vel] class MiniCheetahOscRunnerCfg(MiniCheetahRunnerCfg): diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref.py b/gym/envs/mini_cheetah/mini_cheetah_ref.py index 57e1f0e7..a1a3ca13 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_ref.py +++ b/gym/envs/mini_cheetah/mini_cheetah_ref.py @@ -59,8 +59,7 @@ def _switch(self): def _reward_swing_grf(self): """Reward non-zero grf during swing (0 to pi)""" in_contact = torch.gt( - torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), - 50.0, + torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), 50.0 ) ph_off = torch.lt(self.phase, torch.pi) rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1) @@ -69,8 +68,7 @@ def _reward_swing_grf(self): def _reward_stance_grf(self): """Reward non-zero grf during stance (pi to 2pi)""" in_contact = torch.gt( - torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), - 50.0, + torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), 50.0 ) ph_off = torch.gt(self.phase, torch.pi) # should this be in swing? rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1) diff --git a/gym/utils/helpers.py b/gym/utils/helpers.py index dd431810..da5ec146 100644 --- a/gym/utils/helpers.py +++ b/gym/utils/helpers.py @@ -300,6 +300,30 @@ def get_args(custom_parameters=None): "default": False, "help": "Use original config file for loaded policy.", }, + { + "name": "--torque_scale", + "type": float, + "default": 1.0, + "help": "Torque scale for reduced strength perturbation.", + }, + { + "name": "--latency_steps", + "type": int, + "default": 0, + "help": "Number of delayed target steps for latency perturbation.", + }, + { + "name": "--max_steps", + "type": int, + "default": 2500, + "help": "Number of play steps before automatically saving and exiting.", + }, + { + "name": "--results_dir", + "type": str, + "default": "recovery_sweep", + "help": "Directory to save recovery CSV results.", + }, ] # * parse arguments args = gymutil.parse_arguments( diff --git a/gym/utils/interfaces/KeyboardInterface.py b/gym/utils/interfaces/KeyboardInterface.py index d2e3ce99..c5f200b1 100644 --- a/gym/utils/interfaces/KeyboardInterface.py +++ b/gym/utils/interfaces/KeyboardInterface.py @@ -15,6 +15,13 @@ def __init__(self, env): env.gym.subscribe_viewer_keyboard_event( env.viewer, gymapi.KEY_SPACE, "space_shoot" ) + env.gym.subscribe_viewer_keyboard_event(env.viewer, gymapi.KEY_UP, "height_up") + env.gym.subscribe_viewer_keyboard_event( + env.viewer, gymapi.KEY_DOWN, "height_down" + ) + env.gym.subscribe_viewer_keyboard_event( + env.viewer, gymapi.KEY_B, "toggle_belay" + ) env.gym.subscribe_viewer_mouse_event( env.viewer, gymapi.MOUSE_LEFT_BUTTON, "mouse_shoot" ) @@ -24,10 +31,12 @@ def __init__(self, env): print("WASD: forward, strafe left, backward, strafe right") print("QE: yaw left/right") print("R: reset environments") + print("Arrow Up/Down: stand up/lay down") print("ESC: quit") print("______________________________________________________________") env.commands[:] = 0.0 + env.commands[:, 3] = 0.6 env.cfg.commands.resampling_time = env.max_episode_length_s + 1 self.max_vel_backward = -1.0 self.max_vel_forward = 4.0 @@ -39,6 +48,10 @@ def __init__(self, env): self.max_vel_yaw = 2.0 self.increment_yaw = self.max_vel_yaw * 0.2 + self.min_height = -1000 + self.max_height = 1.0 + self.increment_height = 0.1 + def update(self, env): for evt in env.gym.query_viewer_action_events(env.viewer): if evt.value == 0: @@ -73,6 +86,16 @@ def update(self, env): env.commands[:, 2] + self.increment_yaw, max=self.max_vel_yaw, ) + elif evt.action == "height_up": + env.commands[:, 3] = torch.clamp( + env.commands[:, 3] + self.increment_height, + max=self.max_height, + ) + elif evt.action == "height_down": + env.commands[:, 3] = torch.clamp( + env.commands[:, 3] - self.increment_height, + min=self.min_height, + ) elif evt.action == "QUIT": env.exit = True elif evt.action == "RESET": @@ -82,3 +105,7 @@ def update(self, env): evt.action == "space_shoot" or evt.action == "mouse_shoot" ) and evt.value > 0: env.shoot() + + elif evt.action == "toggle_belay" and evt.value > 0: + if hasattr(env, "toggle_belay"): + env.toggle_belay() diff --git a/plot_recovery_belay_comparison.py b/plot_recovery_belay_comparison.py new file mode 100644 index 00000000..740bec68 --- /dev/null +++ b/plot_recovery_belay_comparison.py @@ -0,0 +1,169 @@ +# plot_recovery_belay_comparison.py + +import glob +import os +import re +import pandas as pd +import matplotlib.pyplot as plt + + +TORQUE_DIR = "recovery_torque" +LATENCY_DIR = "recovery_latency" + +TORQUE_OUTPUT = "success_vs_torque_by_belay.png" +LATENCY_OUTPUT = "success_vs_latency_by_belay.png" + + +def parse_belay_strength(path): + """ + Expects folder names like: + recovery_torque/belay_0/recovery_results_torque_0.8_latency_0.csv + recovery_torque/belay_20/recovery_results_torque_0.8_latency_0.csv + """ + parts = os.path.normpath(path).split(os.sep) + + for part in parts: + match = re.match(r"belay_(.+)", part) + if match: + value = match.group(1) + try: + return float(value) + except ValueError: + return value + + return "unknown" + + +def load_nested_results(base_dir): + csv_files = glob.glob(os.path.join(base_dir, "belay_*", "*.csv")) + + if not csv_files: + raise FileNotFoundError(f"No CSV files found under {base_dir}/belay_*/") + + dfs = [] + + for path in csv_files: + df = pd.read_csv(path) + df["source_file"] = os.path.basename(path) + df["belay_strength"] = parse_belay_strength(path) + dfs.append(df) + + return pd.concat(dfs, ignore_index=True) + + +def format_belay_label(value): + if isinstance(value, str): + return f"belay={value}" + + if float(value).is_integer(): + return f"belay={int(value)}" + + return f"belay={value}" + + +def plot_success_vs_torque(): + df = load_nested_results(TORQUE_DIR) + + required_cols = {"torque_scale", "success", "belay_strength"} + missing = required_cols - set(df.columns) + if missing: + raise ValueError(f"Missing required columns in torque data: {missing}") + + summary = ( + df.groupby(["belay_strength", "torque_scale"])["success"] + .agg(["mean", "count"]) + .reset_index() + ) + + summary["success_percent"] = summary["mean"] * 100.0 + + print("\nTorque summary:") + print( + summary[ + ["belay_strength", "torque_scale", "success_percent", "count"] + ].sort_values(["belay_strength", "torque_scale"]) + ) + + plt.figure(figsize=(9, 6)) + + for belay_strength, group in summary.groupby("belay_strength"): + group = group.sort_values("torque_scale", ascending=False) + + plt.plot( + group["torque_scale"], + group["success_percent"], + marker="o", + label=format_belay_label(belay_strength), + ) + + plt.xlabel("Torque scale") + plt.ylabel("% of envs that successfully stand") + plt.title("Recovery Success vs Reduced Torque by Belay Strength") + plt.ylim(-5, 105) + plt.grid(True) + plt.legend(title="Belay strength") + + # Show torque reducing from left to right + plt.gca().invert_xaxis() + + plt.savefig(TORQUE_OUTPUT, dpi=200, bbox_inches="tight") + plt.show() + + print(f"Saved torque plot to {TORQUE_OUTPUT}") + + +def plot_success_vs_latency(): + df = load_nested_results(LATENCY_DIR) + + required_cols = {"latency_steps", "success", "belay_strength"} + missing = required_cols - set(df.columns) + if missing: + raise ValueError(f"Missing required columns in latency data: {missing}") + + summary = ( + df.groupby(["belay_strength", "latency_steps"])["success"] + .agg(["mean", "count"]) + .reset_index() + ) + + summary["success_percent"] = summary["mean"] * 100.0 + + print("\nLatency summary:") + print( + summary[ + ["belay_strength", "latency_steps", "success_percent", "count"] + ].sort_values(["belay_strength", "latency_steps"]) + ) + + plt.figure(figsize=(9, 6)) + + for belay_strength, group in summary.groupby("belay_strength"): + group = group.sort_values("latency_steps") + + plt.plot( + group["latency_steps"], + group["success_percent"], + marker="o", + label=format_belay_label(belay_strength), + ) + + plt.xlabel("Latency steps") + plt.ylabel("% of envs that successfully stand") + plt.title("Recovery Success vs Control Latency by Belay Strength") + plt.ylim(-5, 105) + plt.grid(True) + plt.legend(title="Belay strength") + + plt.savefig(LATENCY_OUTPUT, dpi=200, bbox_inches="tight") + plt.show() + + print(f"Saved latency plot to {LATENCY_OUTPUT}") + + +def main(): + plot_success_vs_torque() + plot_success_vs_latency() + + +if __name__ == "__main__": + main() diff --git a/resources/robots/horse/urdf/horse.urdf b/resources/robots/horse/urdf/horse.urdf new file mode 100644 index 00000000..85d7184f --- /dev/null +++ b/resources/robots/horse/urdf/horse.urdf @@ -0,0 +1,605 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/scripts/analyze_joint_scaling.py b/scripts/analyze_joint_scaling.py new file mode 100644 index 00000000..1de90d03 --- /dev/null +++ b/scripts/analyze_joint_scaling.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import numpy as np +import os +import matplotlib.pyplot as plt +import csv + +OUT_DIR = "scaling_analysis" +os.makedirs(OUT_DIR, exist_ok=True) + +LOG_FILE = "joint_logs.npz" + +# load data +d = np.load(LOG_FILE, allow_pickle=True) + +actual = d["actual_pos"] # [envs, steps, joints] +target = d["target_pos"] +joint_names = d["joint_name"] + +num_envs, total_steps, num_joints = actual.shape + +# episode length +if "step" in d: + step_arr = d["step"] + valid = np.where(step_arr != 0)[0] + actual_steps = valid[-1] + 1 if len(valid) > 0 else 1 +else: + tmp = target[0] + nz = np.where(np.any(tmp != 0, axis=1))[0] + actual_steps = nz[-1] + 1 + +print(f"Trimming timestep dimension from {total_steps} -> {actual_steps}") + +# trim arrays +actual = actual[:, :actual_steps, :] +target = target[:, :actual_steps, :] + +# raw error +error = target - actual # [envs, steps, joints] +abs_error = np.abs(error) + +# shape: (num_envs * steps, num_joints) +flat_err = abs_error.reshape(-1, num_joints) + +# mean absolute error for each joint +mae = np.mean(flat_err, axis=0) + +# standard deviation of absolute error for each joint +std_err = np.std(flat_err, axis=0) + +# csv stats +csv_path = os.path.join(OUT_DIR, "error_summary.csv") +with open(csv_path, "w", newline="") as f: + w = csv.writer(f) + w.writerow(["joint_idx", "joint_name", "MAE_all_envs", "STD_all_envs"]) + for j in range(num_joints): + w.writerow( + [ + j, + joint_names[j], + float(mae[j]), + float(std_err[j]), + ] + ) + +print(f"Saved raw error summary (all envs): {csv_path}") + +# histogram with all env +all_errors = error.reshape(-1, num_joints) + +for j in range(num_joints): + name = joint_names[j] + plt.figure(figsize=(10, 4)) + plt.hist(all_errors[:, j], bins=80) + plt.title(f"{name} – Raw Error Histogram (All Envs Combined)") + plt.xlabel("target_pos - actual_pos") + plt.ylabel("count") + plt.grid(True, alpha=0.3) + + out = os.path.join(OUT_DIR, f"{j:02d}_{name}_hist.png") + plt.savefig(out, dpi=200) + plt.close() + +print(f"Saved histograms in: {OUT_DIR}") diff --git a/scripts/play.py b/scripts/play.py index 833e6641..dfe589d4 100644 --- a/scripts/play.py +++ b/scripts/play.py @@ -5,6 +5,203 @@ # torch needs to be imported after isaacgym imports in local source import torch +import numpy as np +import os +import csv + +BASE_HEIGHT_REF = 1.0 + + +# stability / success tracking +class RecoveryTracker: + def __init__(self, env, output_file="recovery_results.csv"): # noqa: F811 + self.env = env + self.output_file = output_file + + self.num_envs = env.num_envs + self.dt = env.dt + + # success criteria settings + self.height_threshold = 0.8 * BASE_HEIGHT_REF + self.orientation_threshold = 0.6 + self.required_stable_time = 0.4 # seconds + self.required_stable_steps = int(self.required_stable_time / self.dt) + + # per-env tracking + self.stable_counter = torch.zeros( + self.num_envs, device=env.device, dtype=torch.long + ) + + self.success = torch.zeros(self.num_envs, device=env.device, dtype=torch.bool) + + self.time_to_stand = torch.full( + (self.num_envs,), + -1.0, + device=env.device, + dtype=torch.float, + ) + + self.max_tilt = torch.zeros( + self.num_envs, + device=env.device, + dtype=torch.float, + ) + + self.fall = torch.zeros( + self.num_envs, + device=env.device, + dtype=torch.bool, + ) + + self.elapsed_time = 0.0 + + self._init_csv() + + def _init_csv(self): + file_exists = os.path.isfile(self.output_file) + + if not file_exists: + with open(self.output_file, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow( + [ + "torque_scale", + "latency_steps", + "env_id", + "success", + "time_to_stand", + "max_tilt", + "final_height", + "fall", + ] + ) + + def update(self): + # call once every control step during play loop + + env = self.env + + height = env.base_height.flatten() + + tilt = torch.norm(env.projected_gravity[:, :2], dim=1) + + grf = env._compute_grf(grf_norm=True) + contact = grf > env.cfg.osc.grf_threshold + feet_contact_count = contact.float().sum(dim=1) + + # stable standing criteria + height_ok = height > self.height_threshold + orientation_ok = tilt < self.orientation_threshold + feet_ok = feet_contact_count >= 3 + + stable_now = height_ok & orientation_ok & feet_ok + + # consecutive stable steps + self.stable_counter[stable_now] += 1 + self.stable_counter[~stable_now] = 0 + + newly_successful = (self.stable_counter >= self.required_stable_steps) & ( + ~self.success + ) + + self.success[newly_successful] = True + self.time_to_stand[newly_successful] = self.elapsed_time + + # max tilt + self.max_tilt = torch.maximum(self.max_tilt, tilt) + + # fall detection + if hasattr(env, "to_be_reset"): + self.fall |= env.to_be_reset.clone() + + self.elapsed_time += self.dt + + def save_results(self): + # call once at end of rollout + env = self.env + + torque_scale = ( + env.cfg.perturbations.torque_scale + if env.cfg.perturbations.reduced_torque_enabled + else 1.0 + ) + + latency_steps = ( + env.cfg.perturbations.latency_steps + if env.cfg.perturbations.latency_enabled + else 0 + ) + + final_height = env.base_height.flatten().detach().cpu() + success = self.success.detach().cpu() + time_to_stand = self.time_to_stand.detach().cpu() + max_tilt = self.max_tilt.detach().cpu() + fall = self.fall.detach().cpu() + + with open(self.output_file, "a", newline="") as f: + writer = csv.writer(f) + + for i in range(self.num_envs): + writer.writerow( + [ + float(torque_scale), + int(latency_steps), + int(i), + int(success[i].item()), + float(time_to_stand[i].item()), + float(max_tilt[i].item()), + float(final_height[i].item()), + int(fall[i].item()), + ] + ) + + print(f"Saved results to: {self.output_file}") + + +def make_results_filename(env): + torque_scale = ( + env.cfg.perturbations.torque_scale + if env.cfg.perturbations.reduced_torque_enabled + else 1.0 + ) + + latency_steps = ( + env.cfg.perturbations.latency_steps + if env.cfg.perturbations.latency_enabled + else 0 + ) + + # 1.0 -> 1 and 0.5 stays 0.5 + if float(torque_scale).is_integer(): + torque_str = str(int(torque_scale)) + else: + torque_str = str(torque_scale) + + filename = ( + f"recovery_results_" f"torque_{torque_str}_" f"latency_{int(latency_steps)}.csv" + ) + + return filename + + +def get_reward_fns(env): + requested = [ + "tracking_height", + "tendon_constraints", + "tracking_lin_vel", + "swing_grf", + "stance_grf", + "cursorial", + ] + + out = {} + for name in requested: + fn_name = f"_reward_{name}" + if hasattr(env, fn_name): + out[name] = getattr(env, fn_name) + else: + print(f"reward not found (skipping): {fn_name}") + return out def setup(args): @@ -23,13 +220,128 @@ def setup(args): train_cfg.logging.enable_local_saving = False runner = task_registry.make_alg_runner(env, train_cfg) # runner.actor_cfg["disable_actions"] = True - + env.set_belay(True) # * switch to evaluation mode (dropout for example) runner.switch_to_eval() + + # enable perturbations + env.cfg.perturbations.enabled = True + + env.cfg.perturbations.reduced_torque_enabled = True + env.cfg.perturbations.torque_scale = args.torque_scale + + env.cfg.perturbations.latency_enabled = True + env.cfg.perturbations.latency_steps = args.latency_steps + return env, runner, train_cfg +def create_obs_logging_dict(env, obs_vars, num_steps): + """ + Create a dictionary to log raw and scaled observation data. + + Returns: + obs_log: dict with keys: + {var}_raw -> [num_envs, num_steps, ...] + {var}_scaled -> [num_envs, num_steps, ...] + """ + num_envs = env.num_envs + obs_log = {} + + for var in obs_vars: + val = getattr(env, var) + shape = (num_envs, num_steps) + val.shape[1:] # preserve timestep array shape + + # allocate tensors + obs_log[f"{var}_raw"] = torch.zeros(shape, device=val.device) + obs_log[f"{var}_scaled"] = torch.zeros_like(obs_log[f"{var}_raw"]) + + return obs_log + + +def log_obs_step(env, obs_log, obs_vars, step_idx): + for var in obs_vars: + obs_log[f"{var}_raw"][:, step_idx, ...] = getattr(env, var) + obs_log[f"{var}_scaled"][:, step_idx, ...] = env.get_state(var) + + +def create_logging_dict(env, num_steps): + # creates a dictionary of tensors to store joint data for each timestep. + num_envs = env.num_envs + num_dofs = env.num_dof + + log_dict = { + "step": torch.zeros((num_steps,), dtype=torch.int32, device=env.device), + "target_pos": torch.zeros((num_envs, num_steps, num_dofs), device=env.device), + "actual_pos": torch.zeros((num_envs, num_steps, num_dofs), device=env.device), + "torque": torch.zeros((num_envs, num_steps, num_dofs), device=env.device), + } + + # Get joint names + joint_names = ( + env.dof_names + if hasattr(env, "dof_names") + else [f"joint_{i}" for i in range(num_dofs)] + ) + log_dict["joint_name"] = joint_names + + return log_dict + + def play(env, runner, train_cfg): + num_steps = int(env.max_episode_length) + log_data = create_logging_dict(env, num_steps) + + reward_fns = get_reward_fns(env) + reward_names = list(reward_fns.keys()) + + reward_log = { + "reward_names": np.array(reward_names, dtype=object), + "height_command": np.zeros((num_steps,), dtype=np.float32), + "height_actual": np.zeros((num_steps,), dtype=np.float32), + "switch_height": np.zeros((num_steps,), dtype=np.float32), + "total_reward": np.zeros((num_steps,), dtype=np.float32), + } + + for name in reward_names: + reward_log[name] = np.zeros((num_steps,), dtype=np.float32) + + os.makedirs(args.results_dir, exist_ok=True) + + results_file = os.path.join( + args.results_dir, + f"recovery_results_torque_{args.torque_scale:g}_latency_{args.latency_steps}.csv", + ) + + tracker = RecoveryTracker(env, output_file=results_file) + + obs_vars = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "dof_pos_target", + ] + + obs_log = create_obs_logging_dict(env, obs_vars, num_steps) + + # height reward logging + height_rew = np.zeros((num_steps,), dtype=np.float32) + height_actual = np.zeros((num_steps,), dtype=np.float32) + height_target = np.zeros((num_steps,), dtype=np.float32) + + # record init pos/height + OUT_DIR = "play_logs" + os.makedirs(OUT_DIR, exist_ok=True) + + # track actual number of simulation steps + actual_steps = 0 + # track and print commanded height changes + last_height_cmd = None + # * set up recording if env.cfg.viewer.record: recorder = VisualizationRecorder( @@ -41,18 +353,129 @@ def play(env, runner, train_cfg): if COMMANDS_INTERFACE: # interface = GamepadInterface(env) interface = KeyboardInterface(env) - for i in range(10 * int(env.max_episode_length)): - if COMMANDS_INTERFACE: - interface.update(env) + env.commands[:, 3] = BASE_HEIGHT_REF + + max_play_steps = getattr(args, "max_steps", 10 * int(env.max_episode_length)) + headless = getattr(args, "headless", False) + + try: + for i in range(max_play_steps): + if COMMANDS_INTERFACE and not headless: + interface.update(env) + + if env.cfg.viewer.record and not headless: + recorder.update(i) + + target_height = env.commands[0, 3].item() + if (last_height_cmd is None) or ( + abs(target_height - last_height_cmd) > 1e-6 + ): + actual_height = env.base_height[0].item() + print( + f"[HEIGHT] actual = {actual_height:.3f} m | " + f"target = {target_height:.3f} m | " + f"error = {actual_height - target_height:.3f} | " + f"descend mode = {(env.commands[:, 3][0]) < 1.0}" + ) + last_height_cmd = target_height + + runner.set_actions( + runner.actor_cfg["actions"], + runner.get_inference_actions(), + runner.actor_cfg["disable_actions"], + ) + + env.step() + tracker.update() + + if not headless and hasattr(env, "draw_belay_debug"): + env.draw_belay_debug() + + if actual_steps < num_steps: + log_data["step"][actual_steps] = actual_steps + log_data["target_pos"][:, actual_steps, :] = env.dof_pos_target + log_data["actual_pos"][:, actual_steps, :] = env.dof_pos + log_data["torque"][:, actual_steps, :] = env.torques + + log_obs_step(env, obs_log, obs_vars, actual_steps) + + r = env._reward_tracking_height() + height_rew[actual_steps] = r[0].item() + height_actual[actual_steps] = env.base_height[0].item() + height_target[actual_steps] = env.commands[0, 3].item() + + reward_log["height_actual"][actual_steps] = env.base_height[0].item() + reward_log["height_command"][actual_steps] = env.commands[0, 3].item() + reward_log["switch_height"][actual_steps] = env._switch_height()[ + 0 + ].item() + + total = 0.0 + for name, fn in reward_fns.items(): + r = fn() + r0 = r[0].item() + reward_log[name][actual_steps] = r0 + total += r0 + + reward_log["total_reward"][actual_steps] = total + actual_steps += 1 + + if not headless: + env.check_exit() + + print(f"\n[INFO] Finished fixed play run: {max_play_steps} steps") + + except KeyboardInterrupt: + print("\n[INFO] Interrupted by user, saving logs...") + + except SystemExit: + print("\n[INFO] Viewer closed, saving logs...") + + finally: if env.cfg.viewer.record: - recorder.update(i) - runner.set_actions( - runner.actor_cfg["actions"], - runner.get_inference_actions(), - runner.actor_cfg["disable_actions"], - ) - env.step() - env.check_exit() + if hasattr(recorder, "close"): + recorder.close() + print("[RECORD] recorder.close() called") + elif hasattr(recorder, "save"): + recorder.save() + print("[RECORD] recorder.save() called") + elif hasattr(recorder, "finish"): + recorder.finish() + print("[RECORD] recorder.finish() called") + # slice to actual steps before saving + """ + log_data_cpu = { + k: (v.detach().cpu().numpy() if torch.is_tensor(v) else v) + for k, v in log_data.items() + } + for key in ["step", "target_pos", "actual_pos", "torque"]: + log_data_cpu[key] = log_data_cpu[key][:actual_steps] + + np.savez_compressed("joint_logs.npz", **log_data_cpu) + print(f"\nSaved joint log to joint_logs.npz ({actual_steps} steps)") + + obs_log_cpu = { + k: (v.detach().cpu().numpy() if torch.is_tensor(v) else v) + for k, v in obs_log.items() + } + for key in obs_log_cpu.keys(): + obs_log_cpu[key] = obs_log_cpu[key][:, :actual_steps, ...] + + np.savez_compressed("obs_logs.npz", **obs_log_cpu) + print(f"\nSaved obs log to obs_logs.npz ({actual_steps} steps)") + + # save rewards + reward_log_cpu = {} + for k, v in reward_log.items(): + if isinstance(v, np.ndarray) and v.shape[0] == num_steps: + reward_log_cpu[k] = v[:actual_steps] + else: + reward_log_cpu[k] = v + np.savez_compressed("reward_logs.npz", **reward_log_cpu) + """ + + # save recovery results + tracker.save_results() if __name__ == "__main__": diff --git a/scripts/plot_logs_by_joint.py b/scripts/plot_logs_by_joint.py new file mode 100644 index 00000000..a2952bf2 --- /dev/null +++ b/scripts/plot_logs_by_joint.py @@ -0,0 +1,244 @@ +import numpy as np +import matplotlib.pyplot as plt +import os +import re + +# config +LOG_FILE = "joint_logs.npz" +ENV_ID = 0 +SAVE_DIR = "plots_by_joint" + +# output folder +os.makedirs(SAVE_DIR, exist_ok=True) + +# joint limits +JOINT_LIMITS = { + r".*haa": [-0.2, 0.2], + r".*f_hfe": [-1.0, 0.6], + r".*h_hfe": [-1.5, 0.5], + r".*f_kfe": [-1.5, 0.1], + r".*h_kfe": [-0.2, 1.0], + r".*f_pfe": [-0.3, 3.0], + r".*h_pfe": [-1.2, 2.5], + r".*f_pastern_to_foot": [-0.3, 1.8], + r".*h_pastern_to_foot": [-0.3, 1.8], + r".*base_joint": [-0.2, 0.2], +} + + +def find_joint_limits(joint_name: str): + for pattern, limits in JOINT_LIMITS.items(): + if re.match(pattern, joint_name): + return limits + return None + + +data = np.load(LOG_FILE, allow_pickle=True) +steps = data["step"] +target_pos = data["target_pos"] +actual_pos = data["actual_pos"] +torques = data["torque"] +joint_names = data["joint_name"] + +num_envs, num_steps, num_joints = actual_pos.shape +print(f"Loaded log for {num_joints} joints, {num_steps} steps.") + +pattern = re.compile(r".*(rh|lh|rf|lf)_(haa|hfe|kfe|pfe|pastern_to_foot)$") + +joint_map = {} +base_joints = {} + +for idx, name in enumerate(joint_names): + match = pattern.match(name) + if match: + leg, joint_type = match.groups() + leg, joint_type = leg.lower(), joint_type.lower() + joint_map.setdefault(joint_type, {})[leg] = idx + else: + # any joint name not matching the joint pattern will get its own plot + base_joints[name] = idx + + +def plot_joint_type(joint_type, leg_to_idx): + # 4 rows (LF/RF pos+torque, LH/RH pos+torque) × 2 columns + fig, axs = plt.subplots(4, 2, figsize=(12, 12), constrained_layout=True) + axs = np.array(axs) + + leg_pos = { + "lf": (0, 0), + "rf": (0, 1), + "lh": (2, 0), + "rh": (2, 1), + } + + for leg, (pos_row, col) in leg_pos.items(): + torque_row = pos_row + 1 + if leg not in leg_to_idx: + axs[pos_row, col].axis("off") + axs[torque_row, col].axis("off") + continue + + j = leg_to_idx[leg] + + # find last index with nonzero torque (or position) + nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] + last_valid = nonzero_idx[-1] + 1 if len(nonzero_idx) > 0 else len(steps) + + # position plot + pos_ax = axs[pos_row, col] + pos_ax.plot( + steps[:last_valid], + target_pos[ENV_ID, :last_valid, j], + linestyle="--", + linewidth=1.3, + label="Target Pos", + ) + pos_ax.plot( + steps[:last_valid], + actual_pos[ENV_ID, :last_valid, j], + linewidth=1.3, + label="Actual Pos", + ) + + # add joint limit lines + limits = find_joint_limits(joint_names[j]) + if limits is not None: + lo, hi = limits + pos_ax.axhline( + lo, + color="blue", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Lower Limit ({lo:.2f})", + ) + pos_ax.axhline( + hi, + color="red", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Upper Limit ({hi:.2f})", + ) + + ymin, ymax = pos_ax.get_ylim() + ymin = min(ymin, lo - 0.05) + ymax = max(ymax, hi + 0.05) + pos_ax.set_ylim([ymin, ymax]) + + pos_ax.set_title(f"{leg.upper()} - {joint_type.upper()} Position") + pos_ax.set_ylabel("Position (rad)") + pos_ax.grid(True, linestyle="--", alpha=0.4) + pos_ax.legend(fontsize=7) + + # torque plot + torque_ax = axs[torque_row, col] + torque_ax.plot( + steps[:last_valid], + torques[ENV_ID, :last_valid, j], + color="tab:red", + linewidth=1.2, + ) + torque_ax.set_title(f"{leg.upper()} - {joint_type.upper()} Torque") + torque_ax.set_ylabel("Torque (Nm)") + torque_ax.set_xlabel("Step") + torque_ax.grid(True, linestyle="--", alpha=0.4) + + plt.suptitle(f"{joint_type.upper()} Joints (All Legs)", fontsize=16) + save_path = os.path.join(SAVE_DIR, f"{joint_type}.png") + plt.savefig(save_path, dpi=200) + plt.close(fig) + print(f"Saved {save_path}") + + +# plot function for base +def plot_base_joints(base_joints): + fig, axs = plt.subplots( + len(base_joints) * 2, + 1, + figsize=(10, 4 * len(base_joints)), + constrained_layout=True, + ) + + if len(base_joints) == 1: + axs = np.array([axs[0], axs[1]]) + + for i, (name, j) in enumerate(base_joints.items()): + pos_ax = axs[2 * i] + torque_ax = axs[2 * i + 1] + + # find last index with nonzero torque (or position) + nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] + last_valid = nonzero_idx[-1] + 1 if len(nonzero_idx) > 0 else len(steps) + + # position + pos_ax.plot( + steps[:last_valid], + target_pos[ENV_ID, :last_valid, j], + linestyle="--", + linewidth=1.3, + label="Target Pos", + ) + pos_ax.plot( + steps[:last_valid], + actual_pos[ENV_ID, :last_valid, j], + linewidth=1.3, + label="Actual Pos", + ) + + # add joint limits + limits = find_joint_limits(name) + if limits is not None: + lo, hi = limits + pos_ax.axhline( + lo, + color="blue", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Lower Limit ({lo:.2f})", + ) + pos_ax.axhline( + hi, + color="red", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Upper Limit ({hi:.2f})", + ) + + ymin, ymax = pos_ax.get_ylim() + ymin = min(ymin, lo - 0.05) + ymax = max(ymax, hi + 0.05) + pos_ax.set_ylim([ymin, ymax]) + + pos_ax.set_title(f"{name} Position") + pos_ax.set_ylabel("Position (rad)") + pos_ax.legend(fontsize=7) + pos_ax.grid(True, linestyle="--", alpha=0.4) + + # torque + torque_ax.plot( + steps[:last_valid], + torques[ENV_ID, :last_valid, j], + color="tab:red", + linewidth=1.2, + ) + torque_ax.set_title(f"{name} Torque") + torque_ax.set_ylabel("Torque (Nm)") + torque_ax.set_xlabel("Step") + torque_ax.grid(True, linestyle="--", alpha=0.4) + + plt.suptitle("Base Joints", fontsize=16) + save_path = os.path.join(SAVE_DIR, "base_joints.png") + plt.savefig(save_path, dpi=200) + plt.close(fig) + print(f"Saved {save_path}") + + +# generate plots +for joint_type, leg_to_idx in joint_map.items(): + plot_joint_type(joint_type, leg_to_idx) + +if base_joints: + plot_base_joints(base_joints) diff --git a/scripts/plot_logs_by_leg.py b/scripts/plot_logs_by_leg.py new file mode 100644 index 00000000..a12351cc --- /dev/null +++ b/scripts/plot_logs_by_leg.py @@ -0,0 +1,97 @@ +import numpy as np +import matplotlib.pyplot as plt +import os + +# config +LOG_FILE = "joint_logs.npz" +ENV_ID = 0 +SAVE_DIR = "plots_by_leg" + +# output folder +os.makedirs(SAVE_DIR, exist_ok=True) + +data = np.load(LOG_FILE, allow_pickle=True) +steps = data["step"] +target_pos = data["target_pos"] +actual_pos = data["actual_pos"] +torques = data["torque"] +joint_names = data["joint_name"] + +num_envs, num_steps, num_joints = actual_pos.shape +print(f"Loaded log for {num_joints} joints, {num_steps} steps.") + +# define grouping +groups = { + "base": [0], + "right_hind": list(range(1, 6)), + "left_hind": list(range(6, 11)), + "right_front": list(range(11, 16)), + "left_front": list(range(16, 21)), +} + + +def plot_joint_group(group_name, joint_indices): + num_joints_group = len(joint_indices) + fig, axs = plt.subplots( + nrows=num_joints_group * 2, + ncols=1, + figsize=(10, 3 * num_joints_group * 2), + constrained_layout=True, + ) + + if num_joints_group == 1: + axs = np.array([axs[0], axs[1]]) # ensure consistent 2D array shape + + for idx, j in enumerate(joint_indices): + pos_ax = axs[2 * idx] + torque_ax = axs[2 * idx + 1] + + # find last index with nonzero torque (or position) + nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] + if len(nonzero_idx) > 0: + last_valid = nonzero_idx[-1] + 1 + else: + last_valid = len(steps) + + # position + pos_ax.plot( + steps[:last_valid], + target_pos[ENV_ID, :last_valid, j], + label="Target Pos", + linestyle="--", + linewidth=1.5, + ) + pos_ax.plot( + steps[:last_valid], + actual_pos[ENV_ID, :last_valid, j], + label="Actual Pos", + linewidth=1.5, + ) + pos_ax.set_ylabel(f"{joint_names[j]} Pos (rad)") + pos_ax.legend(loc="upper right", fontsize=8) + pos_ax.grid(True, linestyle="--", alpha=0.4) + + # torque + torque_ax.plot( + steps[:last_valid], + torques[ENV_ID, :last_valid, j], + color="tab:red", + linewidth=1.2, + ) + torque_ax.set_ylabel(f"{joint_names[j]} Torque (Nm)") + torque_ax.grid(True, linestyle="--", alpha=0.4) + + axs[-1].set_xlabel("Step") + plt.suptitle(f"{group_name.capitalize()} Joint Logs", fontsize=14) + save_path = os.path.join(SAVE_DIR, f"{group_name}.png") + plt.savefig(save_path, dpi=200) + plt.close(fig) + print(f"Saved {group_name}.png") + + +# generate plots +for group_name, joint_indices in groups.items(): + if max(joint_indices) < num_joints: + plot_joint_group(group_name, joint_indices) + else: + print(f"Skipping {group_name}: exceeds joint index range.") diff --git a/scripts/plot_recovery_3d.py b/scripts/plot_recovery_3d.py new file mode 100644 index 00000000..2e23d209 --- /dev/null +++ b/scripts/plot_recovery_3d.py @@ -0,0 +1,79 @@ +# plot_recovery_3d_surface.py + +import glob +import os +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +RESULTS_DIR = "recovery_sweep" +OUTPUT_PNG = "recovery_success_3d_surface.png" + +DT = 0.004 # seconds per step + +csv_files = glob.glob(os.path.join(RESULTS_DIR, "*.csv")) +if not csv_files: + raise FileNotFoundError(f"No CSV files found in {RESULTS_DIR}/") + +data = pd.concat([pd.read_csv(f) for f in csv_files], ignore_index=True) + +summary = ( + data.groupby(["torque_scale", "latency_steps"])["success"].mean().reset_index() +) + +summary["success_percent"] = summary["success"] * 100.0 + +# convert torque to percentage +summary["torque_percent"] = summary["torque_scale"] * 100.0 + +# convert latency to seconds +summary["latency_seconds"] = summary["latency_steps"] * DT + +pivot = summary.pivot( + index="latency_seconds", + columns="torque_percent", + values="success_percent", +) + +# preserve exact values (no interpolation) +torque_vals = pivot.columns.values +latency_vals = pivot.index.values + +X, Y = np.meshgrid(torque_vals, latency_vals) +Z = pivot.values + +fig = plt.figure(figsize=(10, 7)) +ax = fig.add_subplot(111, projection="3d") + +surf = ax.plot_surface( + X, + Y, + Z, + cmap="viridis", + vmin=0, + vmax=100, + edgecolor="k", + linewidth=0.5, +) + +ax.set_xlabel("Torque (%)") +ax.set_ylabel("Latency (seconds)") +ax.set_zlabel("Success rate (%)") +ax.set_title("Recovery Success Surface") + +ax.set_zlim(0, 100) +ax.invert_xaxis() + +ax.set_xticks(torque_vals) +ax.set_xticklabels([f"{int(v)}" for v in torque_vals]) + +ax.set_yticks(latency_vals) +ax.set_yticklabels([f"{v:.3f}" for v in latency_vals]) + +cbar = fig.colorbar(surf, ax=ax, pad=0.1) +cbar.set_label("Success rate (%)") + +plt.savefig(OUTPUT_PNG, dpi=200, bbox_inches="tight") + +print(f"Saved plot to {OUTPUT_PNG}") diff --git a/scripts/plot_recovery_heatmap.py b/scripts/plot_recovery_heatmap.py new file mode 100644 index 00000000..0f871e72 --- /dev/null +++ b/scripts/plot_recovery_heatmap.py @@ -0,0 +1,66 @@ +# plot_recovery_heatmap.py + +import glob +import os +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +RESULTS_DIR = "recovery_sweep" +OUTPUT_PNG = "recovery_success_heatmap.png" + +DT = 0.004 # seconds per step + +csv_files = glob.glob(os.path.join(RESULTS_DIR, "*.csv")) +if not csv_files: + raise FileNotFoundError(f"No CSV files found in {RESULTS_DIR}/") + +data = pd.concat([pd.read_csv(f) for f in csv_files], ignore_index=True) + +summary = ( + data.groupby(["torque_scale", "latency_steps"])["success"].mean().reset_index() +) + +summary["success_percent"] = summary["success"] * 100.0 +summary["torque_percent"] = summary["torque_scale"] * 100.0 +summary["latency_seconds"] = summary["latency_steps"] * DT + +pivot = summary.pivot( + index="latency_seconds", + columns="torque_percent", + values="success_percent", +) + +pivot = pivot.sort_index(ascending=True) +pivot = pivot.reindex(sorted(pivot.columns, reverse=True), axis=1) + +plt.figure(figsize=(9, 6)) + +im = plt.imshow( + pivot.values, + aspect="auto", + origin="lower", + vmin=0, + vmax=100, +) + +plt.colorbar(im, label="Success rate (%)") + +plt.xticks( + ticks=np.arange(len(pivot.columns)), + labels=[f"{int(v)}" for v in pivot.columns], +) + +plt.yticks( + ticks=np.arange(len(pivot.index)), + labels=[f"{v:.3f}" for v in pivot.index], +) + +plt.xlabel("Torque (%)") +plt.ylabel("Latency (seconds)") +plt.title("Recovery Success Rate Heatmap") + +plt.savefig(OUTPUT_PNG, dpi=200, bbox_inches="tight") + +print(f"Saved heatmap to {OUTPUT_PNG}") diff --git a/scripts/plot_recovery_latency.py b/scripts/plot_recovery_latency.py new file mode 100644 index 00000000..525b90de --- /dev/null +++ b/scripts/plot_recovery_latency.py @@ -0,0 +1,57 @@ +# plot_recovery_latency.py + +import glob +import os +import pandas as pd +import matplotlib.pyplot as plt + + +RESULTS_DIR = "recovery_latency" # folder with latency sweep CSVs +OUTPUT_PNG = "success_vs_latency.png" + +csv_files = glob.glob(os.path.join(RESULTS_DIR, "*.csv")) + +if not csv_files: + raise FileNotFoundError(f"No CSV files found in {RESULTS_DIR}/") + +dfs = [] +for path in csv_files: + df = pd.read_csv(path) + df["source_file"] = os.path.basename(path) + dfs.append(df) + +all_data = pd.concat(dfs, ignore_index=True) + +required_cols = {"latency_steps", "success"} +missing = required_cols - set(all_data.columns) +if missing: + raise ValueError(f"Missing required columns: {missing}") + +# success rate per latency value +summary = ( + all_data.groupby("latency_steps")["success"] + .agg(["mean", "count"]) + .reset_index() + .sort_values("latency_steps") +) + +summary["success_percent"] = summary["mean"] * 100.0 + +print(summary[["latency_steps", "success_percent", "count"]]) + +plt.figure(figsize=(8, 5)) +plt.plot( + summary["latency_steps"], + summary["success_percent"], + marker="o", +) + +plt.xlabel("Latency steps") +plt.ylabel("% of envs that successfully stand") +plt.title("Recovery Success vs Control Latency") +plt.ylim(-5, 105) +plt.grid(True) + +plt.savefig(OUTPUT_PNG, dpi=200, bbox_inches="tight") + +print(f"Saved plot to {OUTPUT_PNG}") diff --git a/scripts/plot_recovery_torques.py b/scripts/plot_recovery_torques.py new file mode 100644 index 00000000..0361c7f9 --- /dev/null +++ b/scripts/plot_recovery_torques.py @@ -0,0 +1,65 @@ +# plot_recovery_torque.py + +import glob +import os +import pandas as pd +import matplotlib.pyplot as plt + + +RESULTS_DIR = "recovery_torque" +OUTPUT_PNG = "success_vs_torque.png" + + +def main(): + csv_files = glob.glob(os.path.join(RESULTS_DIR, "*.csv")) + + if not csv_files: + raise FileNotFoundError(f"No CSV files found in {RESULTS_DIR}/") + + dfs = [] + for path in csv_files: + df = pd.read_csv(path) + df["source_file"] = os.path.basename(path) + dfs.append(df) + + all_data = pd.concat(dfs, ignore_index=True) + + required_cols = {"torque_scale", "success"} + missing = required_cols - set(all_data.columns) + if missing: + raise ValueError(f"Missing required columns: {missing}") + + summary = ( + all_data.groupby("torque_scale")["success"] + .agg(["mean", "count"]) + .reset_index() + .sort_values("torque_scale", ascending=False) + ) + + summary["success_percent"] = summary["mean"] * 100.0 + + print(summary[["torque_scale", "success_percent", "count"]]) + + plt.figure(figsize=(8, 5)) + plt.plot( + summary["torque_scale"], + summary["success_percent"], + marker="o", + ) + + plt.xlabel("Torque scale") + plt.ylabel("% of envs that successfully stand") + plt.title("Recovery Success vs Reduced Torque") + plt.ylim(-5, 105) + plt.grid(True) + + # reverse x-axis so impairment increases left-to-right + plt.gca().invert_xaxis() + + plt.savefig(OUTPUT_PNG, dpi=200, bbox_inches="tight") + + print(f"Saved plot to {OUTPUT_PNG}") + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_rewards.py b/scripts/plot_rewards.py new file mode 100644 index 00000000..07f5a76f --- /dev/null +++ b/scripts/plot_rewards.py @@ -0,0 +1,62 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("reward_logs.npz", allow_pickle=True) + +reward_names = data["reward_names"] +total_reward = data["total_reward"] + +height_command = data["height_command"] if "height_command" in data else None +switch_height = data["switch_height"] if "switch_height" in data else None + +steps = np.arange(len(total_reward)) + +fig, ax1 = plt.subplots(figsize=(14, 8)) + +# reward plots +ax1.plot(steps, total_reward, label="total_reward", linewidth=2) + +for name in reward_names: + name = str(name) + if name in data: + ax1.plot(steps, data[name], label=name, alpha=0.9) + +ax1.set_xlabel("Step") +ax1.set_ylabel("Reward") +ax1.set_title("Rewards, Height Command, and Switch Height") +ax1.grid(True) + +# second axis for height signals +ax2 = ax1.twinx() + +if height_command is not None: + ax2.plot( + steps, + height_command, + linestyle="--", + linewidth=2, + label="height_command", + ) + +if switch_height is not None: + ax2.plot( + steps, + switch_height, + linestyle=":", + linewidth=2, + label="switch_height", + ) + +ax2.set_ylabel("Height / Switch") + +# combined legend +lines1, labels1 = ax1.get_legend_handles_labels() +lines2, labels2 = ax2.get_legend_handles_labels() +ax1.legend(lines1 + lines2, labels1 + labels2, loc="best") + +plt.tight_layout() + +# save figure +plt.savefig("reward_plot.png", dpi=300) + +print("Saved reward_plot.png") diff --git a/scripts/run_recovery_sweep.py b/scripts/run_recovery_sweep.py new file mode 100644 index 00000000..95cf1e64 --- /dev/null +++ b/scripts/run_recovery_sweep.py @@ -0,0 +1,34 @@ +# run_recovery_sweep.py + +import subprocess +import os + +RESULTS_DIR = "recovery_sweep" +os.makedirs(RESULTS_DIR, exist_ok=True) + +torque_values = [1.0, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6] +latency_values = [0, 1, 2, 3, 4, 5] + +for torque in torque_values: + for latency in latency_values: + print(f"\nRunning torque={torque}, latency={latency}") + + cmd = [ + "python", + "scripts/play.py", + "--task", + "horse_osc_belay", + "--torque_scale", + str(torque), + "--latency_steps", + str(latency), + "--max_steps", + "2500", + "--results_dir", + RESULTS_DIR, + "--headless", + ] + + subprocess.run(cmd, check=True) + +print("Sweep complete.") diff --git a/scripts/sweep_configs/sweep_horse_osc_belay_config.json b/scripts/sweep_configs/sweep_horse_osc_belay_config.json new file mode 100644 index 00000000..180f721f --- /dev/null +++ b/scripts/sweep_configs/sweep_horse_osc_belay_config.json @@ -0,0 +1,19 @@ +{ + "method": "grid", + "metric": { + "name": "rewards/total_rewards", + "goal": "maximize" + }, + "run_cap": 50, + "parameters": { + "train_cfg.runner.max_iterations": { + "values": [1000] + }, + "env_cfg.belay.mass_kg": { + "values": [0, 20, 50, 100, 150, 200, 300, 400, 500] + }, + "train_cfg.seed": { + "values": [1, 2, 3] + } + } +} \ No newline at end of file diff --git a/scripts/sweep_configs/sweep_horse_osc_config.json b/scripts/sweep_configs/sweep_horse_osc_config.json new file mode 100644 index 00000000..df27bc5a --- /dev/null +++ b/scripts/sweep_configs/sweep_horse_osc_config.json @@ -0,0 +1,58 @@ +{ + "method": "random", + "metric": { + "name": "rewards/total_rewards", + "goal": "maximize" + }, + "run_cap": 50, + "parameters": { + "train_cfg.runner.max_iterations": { + "values": [500] + }, + "env_cfg.scaling.dof_pos": { + "values": [ + [0.2, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1], + [0.24, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52], + [0.288, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024], + [0.3456, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288], + [0.41472, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536], + [0.497664, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472] + ] + }, + + "env_cfg.scaling.dof_pos_target": { + "values": [ + [0.4, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2], + [0.48, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04], + [0.576, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048], + [0.6912, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576], + [0.82944, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008], + [0.995328, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976] + ] + }, + + "env_cfg.control.stiffness": { + "values": [ + {"haa": 4000, "hfe": 4000, "kfe": 4000, "pfe": 4000, "pastern_to_foot": 4000, "base_joint": 4000}, + {"haa": 3000, "hfe": 3000, "kfe": 3000, "pfe": 3000, "pastern_to_foot": 3000, "base_joint": 3000}, + {"haa": 2000, "hfe": 2000, "kfe": 2000, "pfe": 2000, "pastern_to_foot": 2000, "base_joint": 2000}, + {"haa": 1000, "hfe": 1000, "kfe": 1000, "pfe": 1000, "pastern_to_foot": 1000, "base_joint": 1000}, + {"haa": 500, "hfe": 500, "kfe": 500, "pfe": 500, "pastern_to_foot": 500, "base_joint": 500}, + {"haa": 250, "hfe": 250, "kfe": 250, "pfe": 250, "pastern_to_foot": 250, "base_joint": 250}, + {"haa": 100, "hfe": 100, "kfe": 100, "pfe": 100, "pastern_to_foot": 100, "base_joint": 100}, + {"haa": 50, "hfe": 50, "kfe": 50, "pfe": 50, "pastern_to_foot": 50, "base_joint": 50} + ] + }, + + "env_cfg.control.damping": { + "values": [ + {"haa": 400, "hfe": 400, "kfe": 400, "pfe": 400, "pastern_to_foot": 400, "base_joint": 400}, + {"haa": 300, "hfe": 300, "kfe": 300, "pfe": 300, "pastern_to_foot": 300, "base_joint": 300}, + {"haa": 250, "hfe": 250, "kfe": 250, "pfe": 250, "pastern_to_foot": 250, "base_joint": 250}, + {"haa": 200, "hfe": 200, "kfe": 200, "pfe": 200, "pastern_to_foot": 200, "base_joint": 200}, + {"haa": 100, "hfe": 100, "kfe": 100, "pfe": 100, "pastern_to_foot": 100, "base_joint": 100}, + {"haa": 50, "hfe": 50, "kfe": 50, "pfe": 50, "pastern_to_foot": 50, "base_joint": 50} + ] + } + } +}