diff --git a/src/lerobot/envs/robotwin.py b/src/lerobot/envs/robotwin.py index 07f9f6202..e18aea7dd 100644 --- a/src/lerobot/envs/robotwin.py +++ b/src/lerobot/envs/robotwin.py @@ -206,12 +206,18 @@ class RoboTwinEnv(gym.Env): Observations ------------ The ``pixels`` dict uses the raw RoboTwin camera names as keys (e.g. - ``"head_camera"``, ``"left_wrist"``). ``preprocess_observation`` in + ``"head_camera"``, ``"left_camera"``). ``preprocess_observation`` in ``envs/utils.py`` then converts these to ``observation.images.``. Actions ------- 14-dim float32 array in ``[-1, 1]`` (joint-space, 7 DOF per arm). + + Autograd + -------- + ``setup_demo`` and ``take_action`` drive CuRobo's Newton trajectory + optimizer, which calls ``cost.backward()`` internally. lerobot_eval wraps + the rollout in ``torch.no_grad()``, so both call sites re-enable grad. """ metadata = {"render_modes": ["rgb_array"], "render_fps": 25} @@ -241,6 +247,7 @@ class RoboTwinEnv(gym.Env): self._env: Any | None = None # deferred — created on first reset() inside worker self._step_count: int = 0 + self._black_frame = np.zeros((observation_height, observation_width, 3), dtype=np.uint8) image_spaces = { cam: spaces.Box( @@ -276,36 +283,31 @@ class RoboTwinEnv(gym.Env): def _get_obs(self) -> RobotObservation: assert self._env is not None, "_get_obs called before _ensure_env()" raw = self._env.get_obs() + cameras_raw = raw.get("observation", {}) - # RoboTwin's get_obs returns a nested dict: - # {"observation": {cam_name: {"rgb": img, "intrinsic_matrix": ...}, ...}, - # "joint_action": {"left_arm": [...], "left_gripper": ..., "vector": np.ndarray}, - # "endpose": {...}} - cameras_raw = raw.get("observation", {}) if isinstance(raw, dict) else {} images: dict[str, np.ndarray] = {} for cam in self.camera_names: cam_data = cameras_raw.get(cam) - img = cam_data.get("rgb") if isinstance(cam_data, dict) else None - if img is not None: - img = np.asarray(img, dtype=np.uint8) - if img.ndim == 2: - img = np.stack([img, img, img], axis=-1) - elif img.shape[-1] != 3: - img = img[..., :3] - images[cam] = img - else: - # Camera not exposed by this task — return a black frame so the - # observation shape stays consistent across all tasks. - images[cam] = np.zeros((self.observation_height, self.observation_width, 3), dtype=np.uint8) + img = cam_data.get("rgb") if cam_data else None + if img is None: + images[cam] = self._black_frame + continue + img = np.asarray(img, dtype=np.uint8) + if img.ndim == 2: + img = np.stack([img, img, img], axis=-1) + elif img.shape[-1] != 3: + img = img[..., :3] + images[cam] = img - # Joint state: RoboTwin exposes it as raw["joint_action"]["vector"] - # (flat array of left_arm+left_gripper+right_arm+right_gripper). - joint_state = np.zeros(ACTION_DIM, dtype=np.float64) - ja = raw.get("joint_action") if isinstance(raw, dict) else None - if isinstance(ja, dict) and "vector" in ja: - arr = np.asarray(ja["vector"], dtype=np.float64).flatten() - if arr.size >= ACTION_DIM: - joint_state = arr[:ACTION_DIM] + ja = raw.get("joint_action") or {} + vec = ja.get("vector") + if vec is not None: + arr = np.asarray(vec, dtype=np.float64).ravel() + joint_state = ( + arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float64) + ) + else: + joint_state = np.zeros(ACTION_DIM, dtype=np.float64) return {"pixels": images, "agent_pos": joint_state} @@ -317,9 +319,6 @@ class RoboTwinEnv(gym.Env): actual_seed = self.episode_index if seed is None else seed setup_kwargs = _load_robotwin_setup_kwargs(self.task_name) setup_kwargs.update(seed=actual_seed, is_test=True) - # setup_demo → load_robot → CuroboPlanner.warmup() runs Newton's-method - # trajectory optimization, which requires autograd. lerobot_eval wraps - # the whole rollout in torch.no_grad(), so re-enable grad here. with torch.enable_grad(): self._env.setup_demo(**setup_kwargs) self.episode_index += self._reset_stride @@ -333,9 +332,6 @@ class RoboTwinEnv(gym.Env): if action.ndim != 1 or action.shape[0] != ACTION_DIM: raise ValueError(f"Expected 1-D action of shape ({ACTION_DIM},), got {action.shape}") - # RoboTwin 2.0 uses take_action(); fall back to step() for older forks. - # take_action() invokes the CuRobo planner, which needs autograd — - # lerobot_eval wraps the rollout in torch.no_grad(). with torch.enable_grad(): if hasattr(self._env, "take_action"): self._env.take_action(action)