From 9c6c8d075bdbc16b3cd8fa08eb4e7ad526ecd42a Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Thu, 20 Nov 2025 08:47:22 +0100 Subject: [PATCH] update libero --- src/lerobot/envs/libero.py | 124 ++++++++++++++++++++++--------------- 1 file changed, 73 insertions(+), 51 deletions(-) diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py index 58f4e051a..35bc58e07 100644 --- a/src/lerobot/envs/libero.py +++ b/src/lerobot/envs/libero.py @@ -28,9 +28,6 @@ import torch from gymnasium import spaces from libero.libero import benchmark, get_libero_path from libero.libero.envs import OffScreenRenderEnv -from robosuite.utils.transform_utils import quat2axisangle - -from lerobot.policies.xvla.utils import mat_to_rotate6d def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]: @@ -83,14 +80,14 @@ def get_libero_dummy_action(): return [0, 0, 0, 0, 0, 0, -1] -OBS_STATE_DIM = 20 +OBS_STATE_DIM = 8 ACTION_DIM = 7 AGENT_POS_LOW = -1000.0 AGENT_POS_HIGH = 1000.0 ACTION_LOW = -1.0 ACTION_HIGH = 1.0 TASK_SUITE_MAX_STEPS: dict[str, int] = { - "libero_spatial": 800, # longest training demo has 193 steps + "libero_spatial": 280, # longest training demo has 193 steps "libero_object": 280, # longest training demo has 254 steps "libero_goal": 300, # longest training demo has 270 steps "libero_10": 520, # longest training demo has 505 steps @@ -117,7 +114,6 @@ class LiberoEnv(gym.Env): episode_index: int = 0, camera_name_mapping: dict[str, str] | None = None, num_steps_wait: int = 10, - action_type: str = "rel", ): super().__init__() self.task_id = task_id @@ -148,9 +144,11 @@ class LiberoEnv(gym.Env): # Load once and keep self._init_states = get_task_init_states(task_suite, self.task_id) if self.init_states else None self._init_state_id = self.episode_index # tie each sub-env to a fixed init state + self._env = self._make_envs_task(task_suite, self.task_id) default_steps = 500 self._max_episode_steps = TASK_SUITE_MAX_STEPS.get(task_suite_name, default_steps) + images = {} for cam in self.camera_name: images[self.camera_name_mapping[cam]] = spaces.Box( @@ -176,11 +174,36 @@ class LiberoEnv(gym.Env): self.observation_space = spaces.Dict( { "pixels": spaces.Dict(images), - "agent_pos": spaces.Box( - low=AGENT_POS_LOW, - high=AGENT_POS_HIGH, - shape=(OBS_STATE_DIM,), - dtype=np.float64, + "robot_state": spaces.Dict( + { + "eef": spaces.Dict( + { + "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64), + "quat": spaces.Box( + low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64 + ), + "mat": spaces.Box( + low=-np.inf, high=np.inf, shape=(3, 3), dtype=np.float64 + ), + } + ), + "gripper": spaces.Dict( + { + "qpos": spaces.Box( + low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64 + ), + "qvel": spaces.Box( + low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64 + ), + } + ), + "joints": spaces.Dict( + { + "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64), + "vel": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64), + } + ), + } ), } ) @@ -188,11 +211,11 @@ class LiberoEnv(gym.Env): self.action_space = spaces.Box( low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32 ) - self.action_type = action_type def render(self): raw_obs = self._env.env._get_observations() image = self._format_raw_obs(raw_obs)["pixels"]["image"] + image = image[::-1, ::-1] # flip both H and W for visualization return image def _make_envs_task(self, task_suite: Any, task_id: int = 0): @@ -214,38 +237,48 @@ class LiberoEnv(gym.Env): images = {} for camera_name in self.camera_name: image = raw_obs[camera_name] - if camera_name == "agentview_image": - image = image[::-1, ::-1] # rotate 180 degrees images[self.camera_name_mapping[camera_name]] = image - if self.action_type == "rel": - state = np.concatenate( - ( - raw_obs["robot0_eef_pos"], - quat2axisangle(raw_obs["robot0_eef_quat"]), - raw_obs["robot0_gripper_qpos"], - ) - ) - # TODO: jadechoghari, this is an ugly quick workaround for XVLA states. - # we will open a new PR to handle this in a preprocessor. - elif self.action_type == "abs": - robo_ori = mat_to_rotate6d(self._env.robots[0].controller.ee_ori_mat) - robo_pos = self._env.robots[0].controller.ee_pos - proprio = np.concatenate([robo_pos, robo_ori, np.array([0.0])], axis=-1) - state = np.concatenate([proprio, np.zeros_like(proprio)], axis=-1) - else: - raise NotImplementedError( - f"The action type '{self.action_type}' is not supported in LiberoEnv. " - "Please switch to an action type (e.g. 'rel', 'abs')." - ) - agent_pos = state + eef_pos = raw_obs.get("robot0_eef_pos") + eef_quat = raw_obs.get("robot0_eef_quat") + + # rotation matrix from controller + eef_mat = self._env.robots[0].controller.ee_ori_mat if eef_pos is not None else None + gripper_qpos = raw_obs.get("robot0_gripper_qpos") + gripper_qvel = raw_obs.get("robot0_gripper_qvel") + joint_pos = raw_obs.get("robot0_joint_pos") + joint_vel = raw_obs.get("robot0_joint_vel") + obs = { + "pixels": images, + "robot_state": { + "eef": { + "pos": eef_pos, # (3,) + "quat": eef_quat, # (4,) + "mat": eef_mat, # (3, 3) + }, + "gripper": { + "qpos": gripper_qpos, # (2,) + "qvel": gripper_qvel, # (2,) + }, + "joints": { + "pos": joint_pos, # (7,) + "vel": joint_vel, # (7,) + }, + }, + } if self.obs_type == "pixels": return {"pixels": images.copy()} + if self.obs_type == "pixels_agent_pos": - return { - "pixels": images.copy(), - "agent_pos": agent_pos, - } + # Validate required fields are present + if eef_pos is None or eef_quat is None or gripper_qpos is None: + raise ValueError( + f"Missing required robot state fields in raw observation. " + f"Got eef_pos={eef_pos is not None}, eef_quat={eef_quat is not None}, " + f"gripper_qpos={gripper_qpos is not None}" + ) + return obs + raise NotImplementedError( f"The observation type '{self.obs_type}' is not supported in LiberoEnv. " "Please switch to an image-based obs_type (e.g. 'pixels', 'pixels_agent_pos')." @@ -256,19 +289,14 @@ class LiberoEnv(gym.Env): self._env.seed(seed) if self.init_states and self._init_states is not None: self._env.set_init_state(self._init_states[self._init_state_id]) - raw_obs = self._env.reset() # After reset, objects may be unstable (slightly floating, intersecting, etc.). # Step the simulator with a no-op action for a few frames so everything settles. # Increasing this value can improve determinism and reproducibility across resets. for _ in range(self.num_steps_wait): - action = np.array(get_libero_dummy_action()) - raw_obs, _, _, _ = self._env.step(action) - + raw_obs, _, _, _ = self._env.step(get_libero_dummy_action()) observation = self._format_raw_obs(raw_obs) - for robot in self._env.robots: - robot.controller.use_delta = False info = {"is_success": False} return observation, info @@ -315,7 +343,6 @@ def _make_env_fns( camera_names: list[str], init_states: bool, gym_kwargs: Mapping[str, Any], - action_type: str, ) -> list[Callable[[], LiberoEnv]]: """Build n_envs factory callables for a single (suite, task_id).""" @@ -328,7 +355,6 @@ def _make_env_fns( camera_name=camera_names, init_states=init_states, episode_index=episode_index, - action_type=action_type, **local_kwargs, ) @@ -348,7 +374,6 @@ def create_libero_envs( camera_name: str | Sequence[str] = "agentview_image,robot0_eye_in_hand_image", init_states: bool = True, env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None, - action_type: str = "rel", ) -> dict[str, dict[int, Any]]: """ Create vectorized LIBERO environments with a consistent return shape. @@ -380,12 +405,10 @@ def create_libero_envs( print(f"Restricting to task_ids={task_ids_filter}") out: dict[str, dict[int, Any]] = defaultdict(dict) - for suite_name in suite_names: suite = _get_suite(suite_name) total = len(suite.tasks) selected = _select_task_ids(total, task_ids_filter) - if not selected: raise ValueError(f"No tasks selected for suite '{suite_name}' (available: {total}).") @@ -398,7 +421,6 @@ def create_libero_envs( camera_names=camera_names, init_states=init_states, gym_kwargs=gym_kwargs, - action_type=action_type, ) out[suite_name][tid] = env_cls(fns) print(f"Built vec env | suite={suite_name} | task_id={tid} | n_envs={n_envs}")