tests/test_robomme_env.py

# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests for the RoboMME env wrapper and config.

RoboMME requires Linux + ManiSkill (Vulkan/SAPIEN), so tests that touch the
env wrapper mock the ``robomme`` package. Tests that only exercise the
dataclass config run without any mocking.
"""

from __future__ import annotations

import sys
from types import ModuleType
from unittest.mock import MagicMock

import numpy as np


def _install_robomme_stub():
    """Register a minimal stub for the ``robomme`` package on sys.modules."""
    stub = ModuleType("robomme")
    wrapper_stub = ModuleType("robomme.env_record_wrapper")

    class FakeBuilder:
        def __init__(self, **kwargs):
            pass

        def make_env_for_episode(self, episode_idx: int, max_steps: int):
            env = MagicMock()
            obs = {
                "front_rgb_list": [np.zeros((256, 256, 3), dtype=np.uint8)],
                "wrist_rgb_list": [np.zeros((256, 256, 3), dtype=np.uint8)],
                "joint_state_list": [np.zeros(7, dtype=np.float32)],
                "gripper_state_list": [np.zeros(2, dtype=np.float32)],
            }
            env.reset.return_value = (obs, {"status": "ongoing", "task_goal": "pick the cube"})
            env.step.return_value = (obs, 0.0, False, False, {"status": "ongoing", "task_goal": ""})
            return env

    wrapper_stub.BenchmarkEnvBuilder = FakeBuilder
    stub.env_record_wrapper = wrapper_stub
    sys.modules["robomme"] = stub
    sys.modules["robomme.env_record_wrapper"] = wrapper_stub


def _uninstall_robomme_stub():
    sys.modules.pop("robomme", None)
    sys.modules.pop("robomme.env_record_wrapper", None)


# ---------------------------------------------------------------------------
# Config tests (no sim required)
# ---------------------------------------------------------------------------


def test_robomme_env_config_defaults():
    from lerobot.envs.configs import RoboMMEEnv

    cfg = RoboMMEEnv()
    assert cfg.task == "PickXtimes"
    assert cfg.fps == 10
    assert cfg.episode_length == 300
    assert cfg.action_space == "joint_angle"
    assert cfg.dataset_split == "test"
    assert cfg.task_ids is None


def test_robomme_env_config_type():
    from lerobot.envs.configs import RoboMMEEnv

    cfg = RoboMMEEnv()
    assert cfg.type == "robomme"


def test_robomme_features_map():
    from lerobot.envs.configs import RoboMMEEnv
    from lerobot.utils.constants import ACTION, OBS_IMAGES, OBS_STATE

    cfg = RoboMMEEnv()
    assert cfg.features_map[ACTION] == ACTION
    assert cfg.features_map["pixels/image"] == f"{OBS_IMAGES}.image"
    assert cfg.features_map["pixels/wrist_image"] == f"{OBS_IMAGES}.wrist_image"
    assert cfg.features_map["agent_pos"] == OBS_STATE


def test_robomme_features_action_dim_joint_angle():
    from lerobot.envs.configs import RoboMMEEnv
    from lerobot.utils.constants import ACTION

    cfg = RoboMMEEnv(action_space="joint_angle")
    assert cfg.features[ACTION].shape == (8,)


def test_robomme_features_action_dim_ee_pose():
    """`ee_pose` uses a 7-D action; __post_init__ sets the correct shape."""
    from lerobot.envs.configs import RoboMMEEnv
    from lerobot.utils.constants import ACTION

    cfg = RoboMMEEnv(action_space="ee_pose")
    assert cfg.features[ACTION].shape == (7,)


# ---------------------------------------------------------------------------
# Obs conversion (pure Python, no sim)
# ---------------------------------------------------------------------------


def test_convert_obs_list_format():
    """_convert_obs takes the last element from list-format obs fields and
    emits a nested ``pixels`` dict (image, wrist_image) plus ``agent_pos``.

    The nested layout is required so ``preprocess_observation()`` in
    ``envs/utils.py`` maps each camera to ``observation.images.<cam>``.
    """
    _install_robomme_stub()
    try:
        from lerobot.envs.robomme import RoboMMEGymEnv

        env = RoboMMEGymEnv.__new__(RoboMMEGymEnv)

        front = np.full((256, 256, 3), 42, dtype=np.uint8)
        wrist = np.full((256, 256, 3), 7, dtype=np.uint8)
        joints = np.arange(7, dtype=np.float32)
        gripper = np.array([0.5, 0.5], dtype=np.float32)

        obs_raw = {
            "front_rgb_list": [np.zeros_like(front), front],
            "wrist_rgb_list": [np.zeros_like(wrist), wrist],
            "joint_state_list": [np.zeros(7, dtype=np.float32), joints],
            "gripper_state_list": [np.zeros(2, dtype=np.float32), gripper],
        }

        result = env._convert_obs(obs_raw)
        np.testing.assert_array_equal(result["pixels"]["image"], front)
        np.testing.assert_array_equal(result["pixels"]["wrist_image"], wrist)
        assert result["agent_pos"].shape == (8,)
        np.testing.assert_array_almost_equal(result["agent_pos"][:7], joints)
        assert result["agent_pos"][7] == gripper[0]
    finally:
        _uninstall_robomme_stub()


def test_convert_obs_array_format():
    """_convert_obs also handles non-list (direct array) obs."""
    _install_robomme_stub()
    try:
        from lerobot.envs.robomme import RoboMMEGymEnv

        env = RoboMMEGymEnv.__new__(RoboMMEGymEnv)

        front = np.zeros((256, 256, 3), dtype=np.uint8)
        obs_raw = {
            "front_rgb_list": front,
            "wrist_rgb_list": front,
            "joint_state_list": np.zeros(7, dtype=np.float32),
            "gripper_state_list": np.zeros(2, dtype=np.float32),
        }
        result = env._convert_obs(obs_raw)
        assert result["pixels"]["image"].shape == (256, 256, 3)
        assert result["pixels"]["wrist_image"].shape == (256, 256, 3)
        assert result["agent_pos"].shape == (8,)
    finally:
        _uninstall_robomme_stub()


# ---------------------------------------------------------------------------
# create_robomme_envs (mocked sim)
# ---------------------------------------------------------------------------


def test_create_robomme_envs_returns_correct_structure():
    """Single task -> {task_name: {task_id: VectorEnv}} with one entry per task_id."""
    _install_robomme_stub()
    try:
        from lerobot.envs.robomme import create_robomme_envs

        env_cls = MagicMock(return_value=MagicMock())
        result = create_robomme_envs(
            task="PickXtimes",
            n_envs=1,
            task_ids=[0, 1],
            env_cls=env_cls,
        )

        assert "PickXtimes" in result
        assert 0 in result["PickXtimes"]
        assert 1 in result["PickXtimes"]
        assert env_cls.call_count == 2
    finally:
        _uninstall_robomme_stub()


def test_create_robomme_envs_multi_task():
    """Comma-separated task list produces one suite per task."""
    _install_robomme_stub()
    try:
        from lerobot.envs.robomme import create_robomme_envs

        env_cls = MagicMock(return_value=MagicMock())
        result = create_robomme_envs(
            task="PickXtimes,BinFill,StopCube",
            n_envs=1,
            env_cls=env_cls,
        )

        assert set(result.keys()) == {"PickXtimes", "BinFill", "StopCube"}
    finally:
        _uninstall_robomme_stub()


def test_create_robomme_envs_raises_on_invalid_env_cls():
    _install_robomme_stub()
    try:
        import pytest

        from lerobot.envs.robomme import create_robomme_envs

        with pytest.raises(ValueError, match="env_cls must be a callable"):
            create_robomme_envs(task="PickXtimes", n_envs=1, env_cls=None)
    finally:
        _uninstall_robomme_stub()
feat(envs): add RoboMME benchmark (#3311) * feat(envs): add RoboMME benchmark integration - RoboMME env wrapper with image/wrist_image/state observations - Docker image with Vulkan, SAPIEN, mani-skill deps - CI workflow: 1-episode smoke eval with pepijn223/smolvla_robomme - preprocess_observation: handle image/wrist_image/state keys - pyproject.toml: robomme extra Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * refactor(docker): rebase RoboMME image on huggingface/lerobot-gpu Mirror the libero/metaworld pattern: start from the nightly GPU image (which already has apt deps, uv, venv, and lerobot[all] preinstalled) and only layer on what RoboMME uniquely needs — the Vulkan libs ManiSkill/SAPIEN requires, plus the robomme extra with the gymnasium/numpy overrides. Drops 48 lines of duplicated base setup (CUDA FROM, python install, user creation, venv init, base apt deps) that the nightly image already provides. Net: 102 → 54 lines. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs(robomme): drop prototype-branch note and move dataset to lerobot/robomme - Remove the "Related work" block referencing the prototype branch feat/robomme-integration; the PR stands on its own. - Point all dataset references at lerobot/robomme (docs, env module docstring, RoboMMEEnvConfig docstring) — this is the canonical HF location once the dataset is mirrored. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(robomme): make docs build + fast tests green 1. Docs: add robomme to _toctree.yml under Benchmarks so doc-builder's TOC integrity check stops rejecting the new page. 2. Fast tests: robomme's mani-skill transitively pins numpy<2 which is unsatisfiable against the project's numpy>=2 base pin, so `uv sync` couldn't resolve a universal lockfile. Drop robomme as a pyproject extra entirely — it truly cannot coexist with the rest of the dep tree. The Dockerfile installs robomme directly from its git URL via `uv pip install --override`, which was already the runtime path. pyproject, docs, env docstrings, and the CI job comment all now point to the docker-only install. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test(robomme): realign unit tests with current env API The tests were written against an earlier env layout and never updated when the wrapper was refactored, so CI's fast-test job was failing with: - KeyError: 'front_rgb' / 'wrist_rgb' — these were renamed to the lerobot-canonical 'image' / 'wrist_image' keys (matching the dataset columns and preprocess_observation's built-in fallbacks). - AssertionError: 'robomme' not in result — create_robomme_envs now returns {task_name: {task_id: env}}, not {'robomme': {...}}, so comma-separated task lists work. - ModuleNotFoundError: lerobot.envs.lazy_vec_env — LazyVectorEnv was removed; create_robomme_envs is straightforward synchronous now. Rewrite the 7 failing cases against the current API, drop the three LazyVectorEnv tests, and add a multi-task test so the new comma-separated task parsing is covered. Stub install/teardown is moved into helpers (`_install_robomme_stub` / `_uninstall_robomme_stub`) so individual tests stop repeating six boilerplate lines. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * ci: point benchmark eval checkpoints at the lerobot/ org mirrors pepijn223/smolvla_* → lerobot/smolvla_* across every benchmark job in this branch (libero, metaworld, and the per-branch benchmark). The checkpoints were mirrored into the lerobot/ org and that's the canonical location going forward. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: integrate PR #3311 review feedback - envs: rename obs keys to pixels/image, pixels/wrist_image, agent_pos - envs: add __post_init__ for dynamic action_dim in RoboMMEEnv config - envs: remove special-case obs conversion in utils.py (no longer needed) - ci: add Docker Hub login, HF_USER_TOKEN guard, --env.task_ids=[0] - scripts: extract_task_descriptions supports multiple task_ids - docs: title to # RoboMME, add image, restructure eval section - tests: update all key assertions to match new obs naming Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(docs): use correct RoboMME teaser image URL Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * ci(robomme): smoke-eval 10 tasks instead of 5 Broader coverage on the RoboMME benchmark CI job: bump the smoke eval from 5 tasks to 10 (one episode each), all drawn from ROBOMME_TASKS. Tasks now run: PickXtimes, BinFill, StopCube, MoveCube, InsertPeg, SwingXtimes, VideoUnmask, ButtonUnmask, PickHighlight, PatternLock. Updated the parse_eval_metrics.py `--task` label from the single `PickXtimes` stub to the full comma list so the metrics artifact reflects what was actually run. `parse_eval_metrics.py` already reads `overall` for multi-task runs, so no parser change is needed. Made-with: Cursor * fix(robomme): nest `pixels` as a dict so preprocess_observation picks it up `_convert_obs` was returning flat keys (`pixels/image`, `pixels/wrist_image`). `preprocess_observation()` in envs/utils.py keys off the top-level `"pixels"` entry and, not finding it, silently dropped every image from the batch. The policy then saw zero image features and raised ValueError: All image features are missing from the batch. Match the LIBERO layout: return `{"pixels": {"image": ..., "wrist_image": ...}, "agent_pos": ...}` and declare the same shape in `observation_space`. Made-with: Cursor * fix(robomme): align docs and tests with nested pixels obs layout Addresses PR #3311 review feedback: - Docs: correct observation keys to `pixels/image` / `pixels/wrist_image` (mapped to `observation.images.image` / `observation.images.wrist_image`) and drop the now-obsolete column-rename snippet. - Tests: assert `result["pixels"]["image"]` instead of flat `pixels/image`, matching the nested layout required by `preprocess_observation()`. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * fix(envs): preserve AsyncVectorEnv metadata/unwrapped in lazy eval envs Port of #3416 onto this branch. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * ci: gate Docker Hub login on secret availability Fork PRs cannot access `secrets.DOCKERHUB_LEROBOT_{USERNAME,PASSWORD}`, which made every benchmark job fail at the login step. Gate the login on the env-var expansion of the username so the step is skipped (not failed) when secrets are absent. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * fix(robomme): address review feedback --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-20 20:21:27 +02:00			`# Copyright 2026 The HuggingFace Inc. team. All rights reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`"""Unit tests for the RoboMME env wrapper and config.`

			`RoboMME requires Linux + ManiSkill (Vulkan/SAPIEN), so tests that touch the`
			env wrapper mock the ``robomme`` package. Tests that only exercise the
			`dataclass config run without any mocking.`
			`"""`

			`from __future__ import annotations`

			`import sys`
			`from types import ModuleType`
			`from unittest.mock import MagicMock`

			`import numpy as np`


			`def _install_robomme_stub():`
			"""Register a minimal stub for the ``robomme`` package on sys.modules."""
			`stub = ModuleType("robomme")`
			`wrapper_stub = ModuleType("robomme.env_record_wrapper")`

			`class FakeBuilder:`
			`def __init__(self, **kwargs):`
			`pass`

			`def make_env_for_episode(self, episode_idx: int, max_steps: int):`
			`env = MagicMock()`
			`obs = {`
			`"front_rgb_list": [np.zeros((256, 256, 3), dtype=np.uint8)],`
			`"wrist_rgb_list": [np.zeros((256, 256, 3), dtype=np.uint8)],`
			`"joint_state_list": [np.zeros(7, dtype=np.float32)],`
			`"gripper_state_list": [np.zeros(2, dtype=np.float32)],`
			`}`
			`env.reset.return_value = (obs, {"status": "ongoing", "task_goal": "pick the cube"})`
			`env.step.return_value = (obs, 0.0, False, False, {"status": "ongoing", "task_goal": ""})`
			`return env`

			`wrapper_stub.BenchmarkEnvBuilder = FakeBuilder`
			`stub.env_record_wrapper = wrapper_stub`
			`sys.modules["robomme"] = stub`
			`sys.modules["robomme.env_record_wrapper"] = wrapper_stub`


			`def _uninstall_robomme_stub():`
			`sys.modules.pop("robomme", None)`
			`sys.modules.pop("robomme.env_record_wrapper", None)`


			`# ---------------------------------------------------------------------------`
			`# Config tests (no sim required)`
			`# ---------------------------------------------------------------------------`


			`def test_robomme_env_config_defaults():`
			`from lerobot.envs.configs import RoboMMEEnv`

			`cfg = RoboMMEEnv()`
			`assert cfg.task == "PickXtimes"`
			`assert cfg.fps == 10`
			`assert cfg.episode_length == 300`
			`assert cfg.action_space == "joint_angle"`
			`assert cfg.dataset_split == "test"`
			`assert cfg.task_ids is None`


			`def test_robomme_env_config_type():`
			`from lerobot.envs.configs import RoboMMEEnv`

			`cfg = RoboMMEEnv()`
			`assert cfg.type == "robomme"`


			`def test_robomme_features_map():`
			`from lerobot.envs.configs import RoboMMEEnv`
			`from lerobot.utils.constants import ACTION, OBS_IMAGES, OBS_STATE`

			`cfg = RoboMMEEnv()`
			`assert cfg.features_map[ACTION] == ACTION`
			`assert cfg.features_map["pixels/image"] == f"{OBS_IMAGES}.image"`
			`assert cfg.features_map["pixels/wrist_image"] == f"{OBS_IMAGES}.wrist_image"`
			`assert cfg.features_map["agent_pos"] == OBS_STATE`


			`def test_robomme_features_action_dim_joint_angle():`
			`from lerobot.envs.configs import RoboMMEEnv`
			`from lerobot.utils.constants import ACTION`

			`cfg = RoboMMEEnv(action_space="joint_angle")`
			`assert cfg.features[ACTION].shape == (8,)`


			`def test_robomme_features_action_dim_ee_pose():`
			"""`ee_pose` uses a 7-D action; __post_init__ sets the correct shape."""
			`from lerobot.envs.configs import RoboMMEEnv`
			`from lerobot.utils.constants import ACTION`

			`cfg = RoboMMEEnv(action_space="ee_pose")`
			`assert cfg.features[ACTION].shape == (7,)`


			`# ---------------------------------------------------------------------------`
			`# Obs conversion (pure Python, no sim)`
			`# ---------------------------------------------------------------------------`


			`def test_convert_obs_list_format():`
			`"""_convert_obs takes the last element from list-format obs fields and`
			emits a nested ``pixels`` dict (image, wrist_image) plus ``agent_pos``.

			The nested layout is required so ``preprocess_observation()`` in
			``envs/utils.py`` maps each camera to ``observation.images.<cam>``.
			`"""`
			`_install_robomme_stub()`
			`try:`
			`from lerobot.envs.robomme import RoboMMEGymEnv`

			`env = RoboMMEGymEnv.__new__(RoboMMEGymEnv)`

			`front = np.full((256, 256, 3), 42, dtype=np.uint8)`
			`wrist = np.full((256, 256, 3), 7, dtype=np.uint8)`
			`joints = np.arange(7, dtype=np.float32)`
			`gripper = np.array([0.5, 0.5], dtype=np.float32)`

			`obs_raw = {`
			`"front_rgb_list": [np.zeros_like(front), front],`
			`"wrist_rgb_list": [np.zeros_like(wrist), wrist],`
			`"joint_state_list": [np.zeros(7, dtype=np.float32), joints],`
			`"gripper_state_list": [np.zeros(2, dtype=np.float32), gripper],`
			`}`

			`result = env._convert_obs(obs_raw)`
			`np.testing.assert_array_equal(result["pixels"]["image"], front)`
			`np.testing.assert_array_equal(result["pixels"]["wrist_image"], wrist)`
			`assert result["agent_pos"].shape == (8,)`
			`np.testing.assert_array_almost_equal(result["agent_pos"][:7], joints)`
			`assert result["agent_pos"][7] == gripper[0]`
			`finally:`
			`_uninstall_robomme_stub()`


			`def test_convert_obs_array_format():`
			`"""_convert_obs also handles non-list (direct array) obs."""`
			`_install_robomme_stub()`
			`try:`
			`from lerobot.envs.robomme import RoboMMEGymEnv`

			`env = RoboMMEGymEnv.__new__(RoboMMEGymEnv)`

			`front = np.zeros((256, 256, 3), dtype=np.uint8)`
			`obs_raw = {`
			`"front_rgb_list": front,`
			`"wrist_rgb_list": front,`
			`"joint_state_list": np.zeros(7, dtype=np.float32),`
			`"gripper_state_list": np.zeros(2, dtype=np.float32),`
			`}`
			`result = env._convert_obs(obs_raw)`
			`assert result["pixels"]["image"].shape == (256, 256, 3)`
			`assert result["pixels"]["wrist_image"].shape == (256, 256, 3)`
			`assert result["agent_pos"].shape == (8,)`
			`finally:`
			`_uninstall_robomme_stub()`


			`# ---------------------------------------------------------------------------`
			`# create_robomme_envs (mocked sim)`
			`# ---------------------------------------------------------------------------`


			`def test_create_robomme_envs_returns_correct_structure():`
			`"""Single task -> {task_name: {task_id: VectorEnv}} with one entry per task_id."""`
			`_install_robomme_stub()`
			`try:`
			`from lerobot.envs.robomme import create_robomme_envs`

			`env_cls = MagicMock(return_value=MagicMock())`
			`result = create_robomme_envs(`
			`task="PickXtimes",`
			`n_envs=1,`
			`task_ids=[0, 1],`
			`env_cls=env_cls,`
			`)`

			`assert "PickXtimes" in result`
			`assert 0 in result["PickXtimes"]`
			`assert 1 in result["PickXtimes"]`
			`assert env_cls.call_count == 2`
			`finally:`
			`_uninstall_robomme_stub()`


			`def test_create_robomme_envs_multi_task():`
			`"""Comma-separated task list produces one suite per task."""`
			`_install_robomme_stub()`
			`try:`
			`from lerobot.envs.robomme import create_robomme_envs`

			`env_cls = MagicMock(return_value=MagicMock())`
			`result = create_robomme_envs(`
			`task="PickXtimes,BinFill,StopCube",`
			`n_envs=1,`
			`env_cls=env_cls,`
			`)`

			`assert set(result.keys()) == {"PickXtimes", "BinFill", "StopCube"}`
			`finally:`
			`_uninstall_robomme_stub()`


			`def test_create_robomme_envs_raises_on_invalid_env_cls():`
			`_install_robomme_stub()`
			`try:`
			`import pytest`

			`from lerobot.envs.robomme import create_robomme_envs`

			`with pytest.raises(ValueError, match="env_cls must be a callable"):`
			`create_robomme_envs(task="PickXtimes", n_envs=1, env_cls=None)`
			`finally:`
			`_uninstall_robomme_stub()`