tests/envs/test_robotwin.py

#!/usr/bin/env python

# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests for the RoboTwin 2.0 Gymnasium wrapper.

These tests mock out the SAPIEN-based RoboTwin runtime (task modules +
YAML config loader) so they run without the full RoboTwin installation
(SAPIEN, CuRobo, mplib, asset downloads, etc.).
"""

from __future__ import annotations

from contextlib import contextmanager
from unittest.mock import MagicMock, patch

import gymnasium as gym
import numpy as np
import pytest

from lerobot.envs.robotwin import (
    ACTION_DIM,
    ROBOTWIN_CAMERA_NAMES,
    ROBOTWIN_TASKS,
    RoboTwinEnv,
    create_robotwin_envs,
)

# ---------------------------------------------------------------------------
# Fixtures / helpers
# ---------------------------------------------------------------------------


def _make_mock_task_env(
    height: int = 240,
    width: int = 320,
    cameras: tuple[str, ...] = ROBOTWIN_CAMERA_NAMES,
) -> MagicMock:
    """Return a mock that mimics the RoboTwin task class API.

    RoboTwin's real get_obs returns
        {"observation": {cam: {"rgb": img}}, "joint_action": {"vector": np.ndarray}, ...}
    so the mock follows the same nested shape.
    """
    obs_dict = {
        "observation": {cam: {"rgb": np.zeros((height, width, 3), dtype=np.uint8)} for cam in cameras},
        "joint_action": {"vector": np.zeros(ACTION_DIM, dtype=np.float32)},
        "endpose": {},
    }

    mock = MagicMock()
    mock.get_obs.return_value = obs_dict
    mock.setup_demo.return_value = None
    mock.take_action.return_value = None
    mock.eval_success = False
    mock.check_success.return_value = False
    mock.close_env.return_value = None
    return mock


@contextmanager
def _patch_runtime(mock_task_instance: MagicMock):
    """Patch both the task-class loader and the YAML config loader so the
    env can construct + reset without a real RoboTwin install."""
    task_cls = MagicMock(return_value=mock_task_instance)
    fake_setup = {
        "head_camera_h": 240,
        "head_camera_w": 320,
        "left_embodiment_config": {},
        "right_embodiment_config": {},
        "left_robot_file": "",
        "right_robot_file": "",
        "dual_arm_embodied": True,
        "render_freq": 0,
        "task_name": "beat_block_hammer",
        "task_config": "demo_clean",
    }
    with (
        patch("lerobot.envs.robotwin._load_robotwin_task", return_value=task_cls),
        patch("lerobot.envs.robotwin._load_robotwin_setup_kwargs", return_value=fake_setup),
    ):
        yield


# ---------------------------------------------------------------------------
# RoboTwinEnv unit tests
# ---------------------------------------------------------------------------


class TestRoboTwinEnv:
    def test_observation_space_shape(self):
        """observation_space should have the configured h×w×3 for every camera."""
        h, w = 240, 320
        env = RoboTwinEnv(
            task_name="beat_block_hammer",
            observation_height=h,
            observation_width=w,
            camera_names=["head_camera", "left_camera"],
        )
        pixels_space = env.observation_space["pixels"]
        assert pixels_space["head_camera"].shape == (h, w, 3)
        assert pixels_space["left_camera"].shape == (h, w, 3)
        assert "right_camera" not in pixels_space

    def test_action_space(self):
        env = RoboTwinEnv(task_name="beat_block_hammer")
        assert env.action_space.shape == (ACTION_DIM,)
        assert env.action_space.dtype == np.float32

    def test_reset_returns_correct_obs_keys(self):
        mock_task = _make_mock_task_env()
        env = RoboTwinEnv(task_name="beat_block_hammer")
        with _patch_runtime(mock_task):
            obs, info = env.reset()

        assert "pixels" in obs
        for cam in ROBOTWIN_CAMERA_NAMES:
            assert cam in obs["pixels"], f"Missing camera '{cam}' in obs"
        assert "agent_pos" in obs
        assert obs["agent_pos"].shape == (ACTION_DIM,)
        assert info["is_success"] is False

    def test_reset_calls_setup_demo(self):
        mock_task = _make_mock_task_env()
        env = RoboTwinEnv(task_name="beat_block_hammer")
        with _patch_runtime(mock_task):
            env.reset(seed=42)
        # setup_demo receives the full YAML-derived kwargs plus seed + is_test;
        # we only assert the caller-provided bits.
        assert mock_task.setup_demo.call_count == 1
        call_kwargs = mock_task.setup_demo.call_args.kwargs
        assert call_kwargs["seed"] == 42
        assert call_kwargs["is_test"] is True

    def test_step_returns_correct_types(self):
        mock_task = _make_mock_task_env()
        env = RoboTwinEnv(task_name="beat_block_hammer")
        action = np.zeros(ACTION_DIM, dtype=np.float32)
        with _patch_runtime(mock_task):
            env.reset()
            obs, reward, terminated, truncated, info = env.step(action)

        assert isinstance(obs, dict)
        assert isinstance(reward, float)
        assert isinstance(terminated, bool)
        assert isinstance(truncated, bool)
        assert isinstance(info, dict)

    def test_step_wrong_action_shape_raises(self):
        mock_task = _make_mock_task_env()
        env = RoboTwinEnv(task_name="beat_block_hammer")
        bad_action = np.zeros(7, dtype=np.float32)  # wrong dim
        with _patch_runtime(mock_task):
            env.reset()
            with pytest.raises(ValueError, match="Expected 1-D action"):
                env.step(bad_action)

    def test_success_terminates_episode(self):
        mock_task = _make_mock_task_env()
        mock_task.check_success.return_value = True
        env = RoboTwinEnv(task_name="beat_block_hammer")
        action = np.zeros(ACTION_DIM, dtype=np.float32)
        with _patch_runtime(mock_task):
            env.reset()
            _, _, terminated, _, info = env.step(action)
        assert terminated is True
        assert info["is_success"] is True

    def test_truncation_after_episode_length(self):
        mock_task = _make_mock_task_env()
        env = RoboTwinEnv(task_name="beat_block_hammer", episode_length=2)
        action = np.zeros(ACTION_DIM, dtype=np.float32)
        with _patch_runtime(mock_task):
            env.reset()
            env.step(action)  # step 1
            _, _, _, truncated, _ = env.step(action)  # step 2 → truncated
        assert truncated is True

    def test_close_calls_close_env(self):
        mock_task = _make_mock_task_env()
        env = RoboTwinEnv(task_name="beat_block_hammer")
        with _patch_runtime(mock_task):
            env.reset()
            env.close()
        mock_task.close_env.assert_called_once()

    def test_black_frame_for_missing_camera(self):
        """If a camera key is absent from get_obs(), a black frame is returned."""
        # Mock exposes only head_camera; we ask for both head_camera + left_camera.
        mock_task = _make_mock_task_env(height=10, width=10, cameras=("head_camera",))
        env = RoboTwinEnv(
            task_name="beat_block_hammer",
            camera_names=["head_camera", "left_camera"],
            observation_height=10,
            observation_width=10,
        )
        with _patch_runtime(mock_task):
            obs, _ = env.reset()
        assert obs["pixels"]["left_camera"].shape == (10, 10, 3)
        assert obs["pixels"]["left_camera"].sum() == 0

    def test_task_and_task_description_attributes(self):
        env = RoboTwinEnv(task_name="beat_block_hammer")
        assert env.task == "beat_block_hammer"
        assert isinstance(env.task_description, str)

    def test_deferred_init_env_is_none_before_reset(self):
        env = RoboTwinEnv(task_name="beat_block_hammer")
        assert env._env is None  # noqa: SLF001  (testing internal state)


# ---------------------------------------------------------------------------
# create_robotwin_envs tests
# ---------------------------------------------------------------------------


class TestCreateRoboTwinEnvs:
    def test_returns_correct_structure(self):
        mock_task = _make_mock_task_env()
        with _patch_runtime(mock_task):
            envs = create_robotwin_envs(
                task="beat_block_hammer",
                n_envs=1,
                env_cls=gym.vector.SyncVectorEnv,
            )
        assert "beat_block_hammer" in envs
        assert 0 in envs["beat_block_hammer"]
        assert isinstance(envs["beat_block_hammer"][0], gym.vector.SyncVectorEnv)

    def test_multi_task(self):
        mock_task = _make_mock_task_env()
        with _patch_runtime(mock_task):
            envs = create_robotwin_envs(
                task="beat_block_hammer,click_bell",
                n_envs=1,
                env_cls=gym.vector.SyncVectorEnv,
            )
        assert set(envs.keys()) == {"beat_block_hammer", "click_bell"}

    def test_unknown_task_raises(self):
        with pytest.raises(ValueError, match="Unknown RoboTwin tasks"):
            create_robotwin_envs(
                task="not_a_real_task",
                n_envs=1,
                env_cls=gym.vector.SyncVectorEnv,
            )

    def test_invalid_n_envs_raises(self):
        with pytest.raises(ValueError, match="n_envs must be a positive int"):
            create_robotwin_envs(
                task="beat_block_hammer",
                n_envs=0,
                env_cls=gym.vector.SyncVectorEnv,
            )


# ---------------------------------------------------------------------------
# ROBOTWIN_TASKS list
# ---------------------------------------------------------------------------


def test_task_list_not_empty():
    assert len(ROBOTWIN_TASKS) >= 50


def test_all_tasks_are_strings():
    assert all(isinstance(t, str) and t for t in ROBOTWIN_TASKS)


def test_no_duplicate_tasks():
    assert len(ROBOTWIN_TASKS) == len(set(ROBOTWIN_TASKS))
-												feat(envs): add RoboTwin 2.0 benchmark (#3315)

* feat(envs): add RoboTwin 2.0 benchmark integration

- RoboTwinEnvConfig with 4-camera setup (head/front/left_wrist/right_wrist)
- Docker image with SAPIEN, mplib, CuRobo, pytorch3d (Python 3.12)
- CI workflow: 1-episode smoke eval with pepijn223/smolvla_robotwin
- RoboTwinProcessorStep for state float32 casting
- Camera rename_map: head_camera/front_camera/left_wrist -> camera1/2/3

* fix(robotwin): re-enable autograd for CuRobo planner warmup and take_action

lerobot_eval wraps the full rollout in torch.no_grad() (lerobot_eval.py:566),
but RoboTwin's setup_demo → load_robot → CuroboPlanner(...) runs
motion_gen.warmup(), which invokes Newton's-method trajectory optimization.
That optimizer calls cost.backward() internally, which raises

    RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

when autograd is disabled. take_action() hits the same planner path at every
step. Wrap both setup_demo and take_action in torch.enable_grad() so CuRobo's
optimizer can build its computation graph. Policy inference is unaffected —
rollout()'s inner torch.inference_mode() block around select_action() is
untouched, so we still don't allocate grad buffers during policy forward.

* fix(robotwin): read nested get_obs() output and use aloha-agilex camera names

RoboTwin's base_task.get_obs() returns a nested dict:

    {"observation": {cam: {"rgb": ..., "intrinsic_matrix": ...}},
     "joint_action": {"left_arm": ..., "left_gripper": ...,
                      "right_arm": ..., "right_gripper": ...,
                      "vector": np.ndarray},
     "endpose": {...}}

Our _get_obs was reading raw["{cam}_rgb"] / raw["{cam}"] and raw["joint_action"]
as if they were flat, so np.asarray(raw["joint_action"], dtype=float64) tripped
on a dict and raised

    TypeError: float() argument must be a string or a real number, not 'dict'

Fix:
- Pull images from raw["observation"][cam]["rgb"]
- Pull joint state from raw["joint_action"]["vector"] (the flat array)
- Update the default camera tuple to (head_camera, left_camera, right_camera)
  to match RoboTwin's actual wrist-camera names (envs/camera/camera.py:135-151)

* refactor(robotwin): drop defensive dict guards, cache black fallback frame

_get_obs was guarding every dict access with isinstance(..., dict) in case
RoboTwin's get_obs returned something else — but the API contract
(envs/_base_task.py:437) always returns a dict, so the guards were silently
masking real failures behind plausible-looking zero observations. Drop them.

Also:
- Cache a single black fallback frame in __init__ instead of allocating
  a fresh np.zeros((H, W, 3), uint8) for every missing camera on every
  step — the "camera not exposed" set is static per env.
- Only allocate the zero joint_state on the fallback path (not unconditionally
  before the real value overwrites it).
- Replace .flatten() with .ravel() (no copy when already 1-D).
- Fold the nested-dict schema comment and two identical torch.enable_grad()
  rationales into a single Autograd section in the class docstring.
- Fix stale `left_wrist` camera name in the observation docstring.

* fix(robotwin): align observation_space dims with D435 camera output

lerobot_eval crashed in gym.vector's SyncVectorEnv.reset with:

    ValueError: Output array is the wrong shape

because RoboTwinEnvConfig declared observation_space = (480, 640, 3) but
task_config/demo_clean.yml specifies head_camera_type=D435, which renders
(240, 320, 3). gym.vector.concatenate pre-allocates a buffer from the
declared space, so the first np.stack raises on shape mismatch.

Changes:
- Config defaults now 240×320 (the D435 dims in _camera_config.yml), with
  a comment pointing at the source of truth.
- RoboTwinEnv.__init__ accepts observation_height/width as Optional and
  falls back to setup_kwargs["head_camera_h/w"] so the env is self-consistent
  even if the config is not in sync.
- Config camera_names / features_map use the actual aloha-agilex camera
  names (head_camera, left_camera, right_camera). Drops the stale
  "front_camera" and "left_wrist"/"right_wrist" entries that never matched
  anything RoboTwin exposes.
- CI workflow's rename_map updated to match the new camera names.

* fix(robotwin): expose _max_episode_steps for lerobot_eval.rollout

rollout() does `env.call("_max_episode_steps")` (lerobot_eval.py:157) to
know when to stop stepping. LiberoEnv and MetaworldEnv set this attribute;
RoboTwinEnv was tracking the limit under `episode_length` only, so the call
raised AttributeError once CuRobo finished warming up.

* fix(robotwin): install av-dep so lerobot_eval can write rollout MP4s

write_video (utils/io_utils.py:53) lazily imports PyAV via require_package
and raises silently inside the video-writing thread when the extra is not
installed — so the eval itself succeeds with pc_success=100 but no MP4
ever lands in videos/, and the artifact upload reports "No files were
found". Add av-dep to the install line (same pattern as the RoboMME image).

* feat(robotwin): eval 5 diverse tasks per CI run with NL descriptions

Widen the smoke eval from a single task (beat_block_hammer) to five:
click_bell, handover_block, open_laptop, stack_blocks_two on top of the
original. Each gets its own rollout video in videos/<task>_0/ so the
dashboard can surface visually distinct behaviours.

extract_task_descriptions.py now has a RoboTwin branch that reads
`description/task_instruction/<task>.json` (already shipped in the clone
at /opt/robotwin) and pulls the `full_description` field. CI cds into
the clone before invoking the script so the relative path resolves.

parse_eval_metrics.py is invoked with the same 5-task list so the
metrics.json embeds one entry per task.

* ci: point benchmark eval checkpoints at the lerobot/ org mirrors

pepijn223/smolvla_* → lerobot/smolvla_* across every benchmark job in
this branch (libero, metaworld, and the per-branch benchmark). The
checkpoints were mirrored into the lerobot/ org and that's the canonical
location going forward.

* refactor(robotwin): rebase docker image on huggingface/lerobot-gpu

Mirror the libero/metaworld/libero_plus/robomme pattern: start from the
nightly GPU image (apt deps, python, uv, venv, lerobot[all] already
there) and layer on only what RoboTwin 2.0 uniquely needs —
cuda-nvcc + cuda-cudart-dev (CuRobo builds from source), Vulkan libs +
NVIDIA ICD (SAPIEN renderer), sapien/mplib/open3d/pytorch3d/curobo
installs, the mplib + sapien upstream patches, and the TianxingChen
asset download.

Drops ~90 lines of duplicated base setup (CUDA FROM, apt python, uv
install, user creation, venv init, base lerobot install). 199 → 110.

Also repoint the docs + env docstring dataset link from
hxma/RoboTwin-LeRobot-v3.0 to the canonical lerobot/robotwin_unified.

* docs(robotwin): add robotwin to _toctree.yml under Benchmarks

doc-builder's TOC integrity check was rejecting the branch because
docs/source/robotwin.mdx existed but wasn't listed in _toctree.yml.


* fix(robotwin): defer YAML lookup and realign tests with current API

__init__ was eagerly calling _load_robotwin_setup_kwargs just to read
head_camera_h/w from the YAML. That import (`from envs import CONFIGS_PATH`)
required a real RoboTwin install, so constructing the env — and thus every
test in tests/envs/test_robotwin.py — blew up with ModuleNotFoundError
on fast-tests where RoboTwin isn't installed.

Replace the eager lookup with DEFAULT_CAMERA_H/W constants (240×320, the
D435 dims baked into task_config/demo_clean.yml). reset() still resolves
the full setup_kwargs lazily — that's fine because reset() is only
called inside the benchmark Docker image where RoboTwin is present.

Also resync the test file with the current env API:
  - mock get_obs() as the real nested {"observation": {cam: {"rgb": …}},
    "joint_action": {"vector": …}} shape
  - patch both _load_robotwin_task and _load_robotwin_setup_kwargs
    (_patch_load → _patch_runtime)
  - drop `front_camera` / `left_wrist` from assertions — aloha-agilex
    exposes head_camera + left_camera + right_camera, not those
  - black-frame test now uses left_camera as the missing camera
  - setup_demo call check loosened to the caller-provided seed/is_test
    bits (full kwargs include the YAML-derived blob)

* fix: integrate PR #3315 review feedback

- ci: add Docker Hub login step, add HF_USER_TOKEN guard on eval step
- docker: tie patches to pinned versions with removal guidance, remove
  unnecessary HF_TOKEN for public dataset, fix hadolint warnings
- docs: fix paper link to arxiv, add teaser image, fix camera names
  (4→3 cameras), fix observation dims (480x640→240x320)


* fix(docs): correct RoboTwin 2.0 paper arxiv link


* fix(docs): use correct RoboTwin 2.0 teaser image URL


* fix(docs): use plain markdown image to fix MDX build

* ci(robotwin): smoke-eval 10 tasks instead of 5

Broader coverage on the RoboTwin 2.0 benchmark CI job: bump the smoke
eval from 5 tasks to 10 (one episode each). Added tasks are all drawn
from ROBOTWIN_TASKS and mirror the shape/complexity of the existing
set (simple single-object or single-fixture manipulations).

Tasks now run: beat_block_hammer, click_bell, handover_block,
open_laptop, stack_blocks_two, click_alarmclock, close_laptop,
close_microwave, open_microwave, place_block.

`parse_eval_metrics.py` reads `overall` for multi-task runs so no
parser change is needed. Bumped the step name and the metrics label
to reflect the 10-task layout.


* fix(ci): swap 4 broken RoboTwin tasks in smoke eval

The smoke eval hit two upstream issues:
- `open_laptop`: bug in OpenMOSS/RoboTwin main — `check_success()` uses
  `self.arm_tag`, but that attribute is only set inside `play_once()`
  (the scripted-expert path). During eval `take_action()` calls
  `check_success()` directly, hitting `AttributeError: 'open_laptop'
  object has no attribute 'arm_tag'`.
- `close_laptop`, `close_microwave`, `place_block`: not present in
  upstream RoboTwin `envs/` at all — our ROBOTWIN_TASKS tuple drifted
  from upstream and these names leaked into CI.

Replace the four broken tasks with upstream-confirmed equivalents
that exist both in ROBOTWIN_TASKS and in RoboTwin's `envs/`:
`adjust_bottle`, `lift_pot`, `stamp_seal`, `turn_switch`.

New 10-task smoke set: beat_block_hammer, click_bell, handover_block,
stack_blocks_two, click_alarmclock, open_microwave, adjust_bottle,
lift_pot, stamp_seal, turn_switch.


* fix(robotwin): sync ROBOTWIN_TASKS + doc with upstream (50 tasks)

The local ROBOTWIN_TASKS tuple drifted from upstream
RoboTwin-Platform/RoboTwin. Users passing names like `close_laptop`,
`close_microwave`, `dump_bin`, `place_block`, `pour_water`,
`fold_cloth`, etc. got past our validator (the names were in the
tuple) but then crashed inside robosuite with a confusing error,
because those tasks don't exist in upstream `envs/`.

- Replace ROBOTWIN_TASKS with a verbatim mirror of upstream's
  `envs/` directory: 50 tasks as of main (was 60 with many
  stale entries). Added a `gh api`-based one-liner comment so
  future bumps are mechanical.
- Update the `60 tasks` claims in robotwin.mdx and
  RoboTwinEnvConfig's docstring to `50`.
- Replace the stale example-task table in robotwin.mdx with ten
  upstream-confirmed examples, and flag `open_laptop` as
  temporarily broken (its `check_success()` uses `self.arm_tag`
  which is only set inside `play_once()`; eval-mode callers hit
  AttributeError).
- Rebuild the "Full benchmark" command with the actual 50-task
  list, omitting `open_laptop`.


* test(robotwin): lower task-count floor from 60 to 50

ROBOTWIN_TASKS was trimmed to 50 tasks (see comment in
`src/lerobot/envs/robotwin.py:48`), but the assertion still
required ≥60, causing CI failures. Align the test with the
current upstream task count.


* fix(envs): preserve AsyncVectorEnv metadata/unwrapped in lazy eval envs

Port of #3416 onto this branch.

* ci: gate Docker Hub login on secret availability


* fix: integrate PR #3315 review feedback

- envs(robotwin): default `observation_height/width` in
  `create_robotwin_envs` to `DEFAULT_CAMERA_H/W` (240/320) so they
  match the D435 dims baked into `task_config/demo_clean.yml`.
- envs(robotwin): resolve `task_config/demo_clean.yml` via
  `CONFIGS_PATH` instead of a cwd-relative path; works regardless
  of where `lerobot-eval` is invoked.
- envs(robotwin): replace `print()` calls in `create_robotwin_envs`
  with `logger.info(...)` (module-level `logger = logging.getLogger`).
- envs(robotwin): use `_LazyAsyncVectorEnv` for the async path so
  async workers start lazily (matches LIBERO / RoboCasa / VLABench).
- envs(robotwin): cast `agent_pos` space + joint-state output to
  float32 end-to-end (was mixed float64/float32).
- envs(configs): use the existing `_make_vec_env_cls(use_async,
  n_envs)` helper in `RoboTwinEnvConfig.create_envs`; drop the
  `get_env_processors` override so RoboTwin uses the identity
  processor inherited from `EnvConfig`.
- processor: delete `RoboTwinProcessorStep` — the float32 cast now
  happens in the wrapper itself, so the processor is redundant.
- tests: drop the `TestRoboTwinProcessorStep` suite; update the
  mock obs fixture to use float32 `joint_action.vector`.
- ci: hoist `ROBOTWIN_POLICY` and `ROBOTWIN_TASKS` to job-level
  env vars so the task list and policy aren't duplicated across
  eval / extract / parse steps.
- docker: pin RoboTwin + CuRobo upstream clones to commit SHAs
  (`RoboTwin@0aeea2d6`, `curobo@ca941586`) for reproducibility.
											
										
										
											2026-04-20 17:46:39 +02:00
+								#!/usr/bin/env python
 								# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 								#
 								# Licensed under the Apache License, Version 2.0 (the "License");
 								# you may not use this file except in compliance with the License.
 								# You may obtain a copy of the License at
 								#
 								#     http://www.apache.org/licenses/LICENSE-2.0
 								#
 								# Unless required by applicable law or agreed to in writing, software
 								# distributed under the License is distributed on an "AS IS" BASIS,
 								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								# See the License for the specific language governing permissions and
 								# limitations under the License.
 								"""Unit tests for the RoboTwin 2.0 Gymnasium wrapper.
 								These tests mock out the SAPIEN-based RoboTwin runtime (task modules +
 								YAML config loader) so they run without the full RoboTwin installation
 								(SAPIEN, CuRobo, mplib, asset downloads, etc.).
 								"""
 								from __future__ import annotations
 								from contextlib import contextmanager
 								from unittest.mock import MagicMock, patch
 								import gymnasium as gym
 								import numpy as np
 								import pytest
 								from lerobot.envs.robotwin import (
 								    ACTION_DIM,
 								    ROBOTWIN_CAMERA_NAMES,
 								    ROBOTWIN_TASKS,
 								    RoboTwinEnv,
 								    create_robotwin_envs,
 								)
 								# ---------------------------------------------------------------------------
 								# Fixtures / helpers
 								# ---------------------------------------------------------------------------
 								def _make_mock_task_env(
 								    height: int = 240,
 								    width: int = 320,
 								    cameras: tuple[str, ...] = ROBOTWIN_CAMERA_NAMES,
 								) -> MagicMock:
 								    """Return a mock that mimics the RoboTwin task class API.
 								    RoboTwin's real get_obs returns
 								        {"observation": {cam: {"rgb": img}}, "joint_action": {"vector": np.ndarray}, ...}
 								    so the mock follows the same nested shape.
 								    """
 								    obs_dict = {
 								        "observation": {cam: {"rgb": np.zeros((height, width, 3), dtype=np.uint8)} for cam in cameras},
 								        "joint_action": {"vector": np.zeros(ACTION_DIM, dtype=np.float32)},
 								        "endpose": {},
 								    }
 								    mock = MagicMock()
 								    mock.get_obs.return_value = obs_dict
 								    mock.setup_demo.return_value = None
 								    mock.take_action.return_value = None
 								    mock.eval_success = False
 								    mock.check_success.return_value = False
 								    mock.close_env.return_value = None
 								    return mock
 								@contextmanager
 								def _patch_runtime(mock_task_instance: MagicMock):
 								    """Patch both the task-class loader and the YAML config loader so the
 								    env can construct + reset without a real RoboTwin install."""
 								    task_cls = MagicMock(return_value=mock_task_instance)
 								    fake_setup = {
 								        "head_camera_h": 240,
 								        "head_camera_w": 320,
 								        "left_embodiment_config": {},
 								        "right_embodiment_config": {},
 								        "left_robot_file": "",
 								        "right_robot_file": "",
 								        "dual_arm_embodied": True,
 								        "render_freq": 0,
 								        "task_name": "beat_block_hammer",
 								        "task_config": "demo_clean",
 								    }
 								    with (
 								        patch("lerobot.envs.robotwin._load_robotwin_task", return_value=task_cls),
 								        patch("lerobot.envs.robotwin._load_robotwin_setup_kwargs", return_value=fake_setup),
 								    ):
 								        yield
 								# ---------------------------------------------------------------------------
 								# RoboTwinEnv unit tests
 								# ---------------------------------------------------------------------------
 								class TestRoboTwinEnv:
 								    def test_observation_space_shape(self):
 								        """observation_space should have the configured h×w×3 for every camera."""
 								        h, w = 240, 320
 								        env = RoboTwinEnv(
 								            task_name="beat_block_hammer",
 								            observation_height=h,
 								            observation_width=w,
 								            camera_names=["head_camera", "left_camera"],
 								        )
 								        pixels_space = env.observation_space["pixels"]
 								        assert pixels_space["head_camera"].shape == (h, w, 3)
 								        assert pixels_space["left_camera"].shape == (h, w, 3)
 								        assert "right_camera" not in pixels_space
 								    def test_action_space(self):
 								        env = RoboTwinEnv(task_name="beat_block_hammer")
 								        assert env.action_space.shape == (ACTION_DIM,)
 								        assert env.action_space.dtype == np.float32
 								    def test_reset_returns_correct_obs_keys(self):
 								        mock_task = _make_mock_task_env()
 								        env = RoboTwinEnv(task_name="beat_block_hammer")
 								        with _patch_runtime(mock_task):
 								            obs, info = env.reset()
 								        assert "pixels" in obs
 								        for cam in ROBOTWIN_CAMERA_NAMES:
 								            assert cam in obs["pixels"], f"Missing camera '{cam}' in obs"
 								        assert "agent_pos" in obs
 								        assert obs["agent_pos"].shape == (ACTION_DIM,)
 								        assert info["is_success"] is False
 								    def test_reset_calls_setup_demo(self):
 								        mock_task = _make_mock_task_env()
 								        env = RoboTwinEnv(task_name="beat_block_hammer")
 								        with _patch_runtime(mock_task):
 								            env.reset(seed=42)
 								        # setup_demo receives the full YAML-derived kwargs plus seed + is_test;
 								        # we only assert the caller-provided bits.
 								        assert mock_task.setup_demo.call_count == 1
 								        call_kwargs = mock_task.setup_demo.call_args.kwargs
 								        assert call_kwargs["seed"] == 42
 								        assert call_kwargs["is_test"] is True
 								    def test_step_returns_correct_types(self):
 								        mock_task = _make_mock_task_env()
 								        env = RoboTwinEnv(task_name="beat_block_hammer")
 								        action = np.zeros(ACTION_DIM, dtype=np.float32)
 								        with _patch_runtime(mock_task):
 								            env.reset()
 								            obs, reward, terminated, truncated, info = env.step(action)
 								        assert isinstance(obs, dict)
 								        assert isinstance(reward, float)
 								        assert isinstance(terminated, bool)
 								        assert isinstance(truncated, bool)
 								        assert isinstance(info, dict)
 								    def test_step_wrong_action_shape_raises(self):
 								        mock_task = _make_mock_task_env()
 								        env = RoboTwinEnv(task_name="beat_block_hammer")
 								        bad_action = np.zeros(7, dtype=np.float32)  # wrong dim
 								        with _patch_runtime(mock_task):
 								            env.reset()
 								            with pytest.raises(ValueError, match="Expected 1-D action"):
 								                env.step(bad_action)
 								    def test_success_terminates_episode(self):
 								        mock_task = _make_mock_task_env()
 								        mock_task.check_success.return_value = True
 								        env = RoboTwinEnv(task_name="beat_block_hammer")
 								        action = np.zeros(ACTION_DIM, dtype=np.float32)
 								        with _patch_runtime(mock_task):
 								            env.reset()
 								            _, _, terminated, _, info = env.step(action)
 								        assert terminated is True
 								        assert info["is_success"] is True
 								    def test_truncation_after_episode_length(self):
 								        mock_task = _make_mock_task_env()
 								        env = RoboTwinEnv(task_name="beat_block_hammer", episode_length=2)
 								        action = np.zeros(ACTION_DIM, dtype=np.float32)
 								        with _patch_runtime(mock_task):
 								            env.reset()
 								            env.step(action)  # step 1
 								            _, _, _, truncated, _ = env.step(action)  # step 2 → truncated
 								        assert truncated is True
 								    def test_close_calls_close_env(self):
 								        mock_task = _make_mock_task_env()
 								        env = RoboTwinEnv(task_name="beat_block_hammer")
 								        with _patch_runtime(mock_task):
 								            env.reset()
 								            env.close()
 								        mock_task.close_env.assert_called_once()
 								    def test_black_frame_for_missing_camera(self):
 								        """If a camera key is absent from get_obs(), a black frame is returned."""
 								        # Mock exposes only head_camera; we ask for both head_camera + left_camera.
 								        mock_task = _make_mock_task_env(height=10, width=10, cameras=("head_camera",))
 								        env = RoboTwinEnv(
 								            task_name="beat_block_hammer",
 								            camera_names=["head_camera", "left_camera"],
 								            observation_height=10,
 								            observation_width=10,
 								        )
 								        with _patch_runtime(mock_task):
 								            obs, _ = env.reset()
 								        assert obs["pixels"]["left_camera"].shape == (10, 10, 3)
 								        assert obs["pixels"]["left_camera"].sum() == 0
 								    def test_task_and_task_description_attributes(self):
 								        env = RoboTwinEnv(task_name="beat_block_hammer")
 								        assert env.task == "beat_block_hammer"
 								        assert isinstance(env.task_description, str)
 								    def test_deferred_init_env_is_none_before_reset(self):
 								        env = RoboTwinEnv(task_name="beat_block_hammer")
 								        assert env._env is None  # noqa: SLF001  (testing internal state)
 								# ---------------------------------------------------------------------------
 								# create_robotwin_envs tests
 								# ---------------------------------------------------------------------------
 								class TestCreateRoboTwinEnvs:
 								    def test_returns_correct_structure(self):
 								        mock_task = _make_mock_task_env()
 								        with _patch_runtime(mock_task):
 								            envs = create_robotwin_envs(
 								                task="beat_block_hammer",
 								                n_envs=1,
 								                env_cls=gym.vector.SyncVectorEnv,
 								            )
 								        assert "beat_block_hammer" in envs
 								        assert 0 in envs["beat_block_hammer"]
 								        assert isinstance(envs["beat_block_hammer"][0], gym.vector.SyncVectorEnv)
 								    def test_multi_task(self):
 								        mock_task = _make_mock_task_env()
 								        with _patch_runtime(mock_task):
 								            envs = create_robotwin_envs(
 								                task="beat_block_hammer,click_bell",
 								                n_envs=1,
 								                env_cls=gym.vector.SyncVectorEnv,
 								            )
 								        assert set(envs.keys()) == {"beat_block_hammer", "click_bell"}
 								    def test_unknown_task_raises(self):
 								        with pytest.raises(ValueError, match="Unknown RoboTwin tasks"):
 								            create_robotwin_envs(
 								                task="not_a_real_task",
 								                n_envs=1,
 								                env_cls=gym.vector.SyncVectorEnv,
 								            )
 								    def test_invalid_n_envs_raises(self):
 								        with pytest.raises(ValueError, match="n_envs must be a positive int"):
 								            create_robotwin_envs(
 								                task="beat_block_hammer",
 								                n_envs=0,
 								                env_cls=gym.vector.SyncVectorEnv,
 								            )
 								# ---------------------------------------------------------------------------
 								# ROBOTWIN_TASKS list
 								# ---------------------------------------------------------------------------
 								def test_task_list_not_empty():
 								    assert len(ROBOTWIN_TASKS) >= 50
 								def test_all_tasks_are_strings():
 								    assert all(isinstance(t, str) and t for t in ROBOTWIN_TASKS)
 								def test_no_duplicate_tasks():
 								    assert len(ROBOTWIN_TASKS) == len(set(ROBOTWIN_TASKS))