diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml
index 00806f990..b65883f1a 100644
--- a/.github/workflows/benchmark_tests.yml
+++ b/.github/workflows/benchmark_tests.yml
@@ -525,3 +525,110 @@ jobs:
           name: robocasa-metrics
           path: /tmp/robocasa-artifacts/metrics.json
           if-no-files-found: warn
+
+  # ── ROBOCEREBRA ───────────────────────────────────────────────────────────
+  # Reuses the LIBERO simulator (libero_10 suite) with RoboCerebra camera
+  # defaults (image/wrist_image). The image is layered on
+  # huggingface/lerobot-gpu, which already ships [libero] as part of [all].
+  robocerebra-integration-test:
+    name: RoboCerebra — build image + 1-episode eval
+    runs-on:
+      group: aws-g6-4xlarge-plus
+    env:
+      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+          lfs: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
+        with:
+          cache-binary: false
+
+      - name: Login to Docker Hub
+        if: ${{ env.DOCKERHUB_USERNAME != '' }}
+        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
+        with:
+          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
+
+      - name: Build RoboCerebra benchmark image
+        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
+        with:
+          context: .
+          file: docker/Dockerfile.benchmark.robocerebra
+          push: false
+          load: true
+          tags: lerobot-benchmark-robocerebra:ci
+          cache-from: type=local,src=/tmp/.buildx-cache-robocerebra
+          cache-to: type=local,dest=/tmp/.buildx-cache-robocerebra,mode=max
+
+      - name: Run RoboCerebra smoke eval (1 episode)
+        if: env.HF_USER_TOKEN != ''
+        run: |
+          docker run --name robocerebra-eval --gpus all \
+            --shm-size=4g \
+            -e HF_HOME=/tmp/hf \
+            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
+            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
+            -e LIBERO_DATA_FOLDER=/tmp/libero_data \
+            lerobot-benchmark-robocerebra:ci \
+            bash -c "
+              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
+              lerobot-eval \
+                --policy.path=lerobot/smolvla_robocerebra \
+                --env.type=libero \
+                --env.task=libero_10 \
+                --env.fps=20 \
+                --env.obs_type=pixels_agent_pos \
+                --env.observation_height=256 \
+                --env.observation_width=256 \
+                '--env.camera_name_mapping={\"agentview_image\": \"image\", \"robot0_eye_in_hand_image\": \"wrist_image\"}' \
+                --eval.batch_size=1 \
+                --eval.n_episodes=1 \
+                --eval.use_async_envs=false \
+                --policy.device=cuda \
+                '--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.wrist_image\": \"observation.images.camera2\"}' \
+                --policy.empty_cameras=1 \
+                --output_dir=/tmp/eval-artifacts
+              python scripts/ci/extract_task_descriptions.py \
+                --env libero --task libero_10 \
+                --output /tmp/eval-artifacts/task_descriptions.json
+            "
+
+      - name: Copy RoboCerebra artifacts from container
+        if: always()
+        run: |
+          mkdir -p /tmp/robocerebra-artifacts
+          docker cp robocerebra-eval:/tmp/eval-artifacts/. /tmp/robocerebra-artifacts/ 2>/dev/null || true
+          docker rm -f robocerebra-eval || true
+
+      - name: Parse RoboCerebra eval metrics
+        if: always()
+        run: |
+          python3 scripts/ci/parse_eval_metrics.py \
+            --artifacts-dir /tmp/robocerebra-artifacts \
+            --env robocerebra \
+            --task libero_10 \
+            --policy lerobot/smolvla_robocerebra
+
+      - name: Upload RoboCerebra rollout video
+        if: always()
+        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
+        with:
+          name: robocerebra-rollout-video
+          path: /tmp/robocerebra-artifacts/videos/
+          if-no-files-found: warn
+
+      - name: Upload RoboCerebra eval metrics
+        if: always()
+        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
+        with:
+          name: robocerebra-metrics
+          path: /tmp/robocerebra-artifacts/metrics.json
+          if-no-files-found: warn
diff --git a/docker/Dockerfile.benchmark.robocerebra b/docker/Dockerfile.benchmark.robocerebra
new file mode 100644
index 000000000..9378bd66a
--- /dev/null
+++ b/docker/Dockerfile.benchmark.robocerebra
@@ -0,0 +1,43 @@
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Benchmark image for RoboCerebra integration tests.
+# RoboCerebra reuses LIBERO's simulator (libero_10 suite) with a different
+# rename_map, so this image is identical to the LIBERO benchmark image —
+# extends the nightly GPU base with LIBERO assets + the PR's source code.
+#
+# Build:  docker build -f docker/Dockerfile.benchmark.robocerebra -t lerobot-benchmark-robocerebra .
+# Run:    docker run --gpus all --rm lerobot-benchmark-robocerebra lerobot-eval ...
+
+FROM huggingface/lerobot-gpu:latest
+
+# Pre-download lerobot/libero-assets from HF Hub so nothing is fetched at
+# runtime (which times out on CI). Point the libero config at the cached path.
+# libero/libero/__init__.py calls input() when ~/.libero/config.yaml is missing,
+# so we write the config before any libero import can happen.
+RUN LIBERO_DIR=$(python -c \
+      "import importlib.util, os; s=importlib.util.find_spec('libero'); \
+       print(os.path.join(os.path.dirname(s.origin), 'libero'))") && \
+    mkdir -p /home/user_lerobot/.libero && \
+    python -c "\
+from huggingface_hub import snapshot_download; \
+snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \
+                  local_dir='/home/user_lerobot/.libero/assets')" && \
+    printf "assets: /home/user_lerobot/.libero/assets\nbddl_files: ${LIBERO_DIR}/bddl_files\ndatasets: ${LIBERO_DIR}/../datasets\ninit_states: ${LIBERO_DIR}/init_files\n" \
+    > /home/user_lerobot/.libero/config.yaml
+
+# Overlay the PR's source code on top of the nightly image.
+COPY --chown=user_lerobot:user_lerobot . .
+
+CMD ["/bin/bash"]
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index bb0dad1bf..ff3c08e96 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -83,6 +83,8 @@
     title: RoboTwin 2.0
   - local: robocasa
     title: RoboCasa365
+  - local: robocerebra
+    title: RoboCerebra
   - local: envhub_isaaclab_arena
     title: NVIDIA IsaacLab Arena Environments
   title: "Benchmarks"
diff --git a/docs/source/robocerebra.mdx b/docs/source/robocerebra.mdx
new file mode 100644
index 000000000..9776bd40f
--- /dev/null
+++ b/docs/source/robocerebra.mdx
@@ -0,0 +1,99 @@
+# RoboCerebra
+
+[RoboCerebra](https://robocerebra-project.github.io/) is a long-horizon manipulation benchmark that evaluates **high-level reasoning, planning, and memory** in VLAs. Episodes chain multiple sub-goals with language-grounded intermediate instructions, built on top of LIBERO's simulator stack (MuJoCo + robosuite, Franka Panda 7-DOF).
+
+- Paper: [RoboCerebra: A Large-scale Benchmark for Long-horizon Robotic Manipulation Evaluation](https://arxiv.org/abs/2506.06677)
+- Project website: [robocerebra-project.github.io](https://robocerebra-project.github.io/)
+- Dataset: [`lerobot/robocerebra_unified`](https://huggingface.co/datasets/lerobot/robocerebra_unified) — LeRobot v3.0, 6,660 episodes / 571,116 frames at 20 fps, 1,728 language-grounded sub-tasks.
+- Pretrained policy: [`lerobot/smolvla_robocerebra`](https://huggingface.co/lerobot/smolvla_robocerebra)
+
+## Available tasks
+
+RoboCerebra reuses LIBERO's simulator, so evaluation runs against the LIBERO `libero_10` long-horizon suite:
+
+| Suite     | CLI name    | Tasks | Description                                                   |
+| --------- | ----------- | ----- | ------------------------------------------------------------- |
+| LIBERO-10 | `libero_10` | 10    | Long-horizon kitchen/living room tasks chaining 3–6 sub-goals |
+
+Each RoboCerebra episode in the dataset is segmented into multiple sub-tasks with natural-language instructions, which the unified dataset exposes as independent supervision signals.
+
+## Installation
+
+RoboCerebra piggybacks on LIBERO, so the `libero` extra is all you need:
+
+```bash
+pip install -e ".[libero]"
+```
+
+<Tip>
+RoboCerebra requires Linux (MuJoCo / robosuite). Set the rendering backend before training or evaluation:
+
+```bash
+export MUJOCO_GL=egl  # for headless servers (HPC, cloud)
+```
+
+</Tip>
+
+## Evaluation
+
+RoboCerebra eval runs against LIBERO's `libero_10` suite with RoboCerebra's camera naming (`image` + `wrist_image`) and an extra empty-camera slot so a three-view-trained policy receives the expected input layout:
+
+```bash
+lerobot-eval \
+  --policy.path=lerobot/smolvla_robocerebra \
+  --env.type=libero \
+  --env.task=libero_10 \
+  --env.fps=20 \
+  --env.obs_type=pixels_agent_pos \
+  --env.observation_height=256 \
+  --env.observation_width=256 \
+  '--env.camera_name_mapping={"agentview_image": "image", "robot0_eye_in_hand_image": "wrist_image"}' \
+  --eval.batch_size=1 \
+  --eval.n_episodes=10 \
+  --eval.use_async_envs=false \
+  --policy.device=cuda \
+  '--rename_map={"observation.images.image": "observation.images.camera1", "observation.images.wrist_image": "observation.images.camera2"}' \
+  --policy.empty_cameras=1
+```
+
+### Recommended evaluation episodes
+
+**10 episodes per task** across the `libero_10` suite (100 total) for reproducible benchmarking. Matches the protocol used in the RoboCerebra paper.
+
+## Policy inputs and outputs
+
+**Observations:**
+
+- `observation.state` — 8-dim proprioceptive state (7 joint positions + gripper)
+- `observation.images.image` — third-person view, 256×256 HWC uint8
+- `observation.images.wrist_image` — wrist-mounted camera view, 256×256 HWC uint8
+
+**Actions:**
+
+- Continuous control in `Box(-1, 1, shape=(7,))` — end-effector delta (6D) + gripper (1D)
+
+## Training
+
+The unified dataset at [`lerobot/robocerebra_unified`](https://huggingface.co/datasets/lerobot/robocerebra_unified) exposes two RGB streams and language-grounded sub-task annotations:
+
+| Feature                          | Shape         | Description          |
+| -------------------------------- | ------------- | -------------------- |
+| `observation.images.image`       | (256, 256, 3) | Third-person view    |
+| `observation.images.wrist_image` | (256, 256, 3) | Wrist-mounted camera |
+| `observation.state`              | (8,)          | Joint pos + gripper  |
+| `action`                         | (7,)          | EEF delta + gripper  |
+
+Fine-tune a SmolVLA base on it:
+
+```bash
+lerobot-train \
+  --policy.path=lerobot/smolvla_base \
+  --dataset.repo_id=lerobot/robocerebra_unified \
+  --env.type=libero \
+  --env.task=libero_10 \
+  --output_dir=outputs/smolvla_robocerebra
+```
+
+## Reproducing published results
+
+The released checkpoint [`lerobot/smolvla_robocerebra`](https://huggingface.co/lerobot/smolvla_robocerebra) was trained on `lerobot/robocerebra_unified` and evaluated with the command in the [Evaluation](#evaluation) section. CI runs the same command with `--eval.n_episodes=1` as a smoke test on every PR touching the benchmark.