feat(benchmarks): add matrix runner and leaderboard

Merge branch 'main' into feat/libero-benchmark
fix(feetech): motor position readings overflow (#3373 )
2026-06-02 20:01:25 +00:00 · 2026-04-15 21:31:33 +02:00 · 2026-04-14 10:43:49 +02:00 · 2026-04-13 22:39:58 +02:00 · 2026-04-13 21:24:01 +02:00 · 2026-04-13 16:25:42 +02:00
363 changed files with 6927 additions and 2169 deletions
--- a/.github/workflows/benchmark_tests.yml
+++ b/.github/workflows/benchmark_tests.yml
@@ -0,0 +1,490 @@
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Integration tests: build an isolated Docker image per benchmark and run a
 # 1-episode smoke eval. Each benchmark gets its own image so incompatible
 # dependency trees (e.g. hf-libero vs metaworld==3.0.0) can never collide.
 #
 # To add a new benchmark:
 #   1. Add docker/Dockerfile.benchmark.<name>  (install only lerobot[<name>])
 #   2. Copy one of the jobs below and adjust the image name and eval command.
 name: Benchmark Integration Tests
 on:
  # Run manually from the Actions tab
  workflow_dispatch:
  # Run every Monday at 02:00 UTC.
  schedule:
    - cron: "0 2 * * 1"
  push:
    branches:
      - main
    paths:
      - "src/lerobot/envs/**"
      - "src/lerobot/scripts/lerobot_eval.py"
      - "docker/Dockerfile.benchmark.*"
      - ".github/workflows/benchmark_tests.yml"
      - "pyproject.toml"
  pull_request:
    branches:
      - main
    paths:
      - "src/lerobot/envs/**"
      - "src/lerobot/scripts/lerobot_eval.py"
      - "docker/Dockerfile.benchmark.*"
      - ".github/workflows/benchmark_tests.yml"
      - "pyproject.toml"
 permissions:
  contents: read
 env:
  UV_VERSION: "0.8.0"
  PYTHON_VERSION: "3.12"
 # Cancel in-flight runs for the same branch/PR.
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true
 jobs:
  # ── LIBERO ────────────────────────────────────────────────────────────────
  # Isolated image: lerobot[libero] only (hf-libero, dm-control, mujoco chain)
  libero-integration-test:
    name: Libero — build image + 1-episode eval
    runs-on:
      group: aws-g6-4xlarge-plus
    env:
      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          persist-credentials: false
          lfs: true
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
        with:
          cache-binary: false
      - name: Login to Docker Hub
        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
        with:
          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
      # Build the benchmark-specific image. The Dockerfile separates dep-install
      # from source-copy, so code-only changes skip the slow uv-sync layer
      # when the runner has a warm Docker daemon cache.
      - name: Build Libero benchmark image
        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
        with:
          context: .
          file: docker/Dockerfile.benchmark.libero
          push: false
          load: true
          tags: lerobot-benchmark-libero:ci
      - name: Run Libero smoke eval (1 episode)
        if: env.HF_USER_TOKEN != ''
        run: |
          # Named container (no --rm) so we can docker cp artifacts out.
          # Output to /tmp inside the container — /artifacts doesn't exist
          # and user_lerobot cannot create root-level dirs.
          docker run --name libero-eval --gpus all \
            --shm-size=4g \
            -e HF_HOME=/tmp/hf \
            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
            lerobot-benchmark-libero:ci \
            bash -c "
              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
              lerobot-eval \
                --policy.path=pepijn223/smolvla_libero \
                --env.type=libero \
                --env.task=libero_spatial \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
                --policy.device=cuda \
                '--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \
                --policy.empty_cameras=1 \
                --output_dir=/tmp/eval-artifacts
              python scripts/ci/extract_task_descriptions.py \
                --env libero --task libero_spatial \
                --output /tmp/eval-artifacts/task_descriptions.json
            "
      - name: Copy Libero artifacts from container
        if: always()
        run: |
          mkdir -p /tmp/libero-artifacts
          docker cp libero-eval:/tmp/eval-artifacts/. /tmp/libero-artifacts/ 2>/dev/null || true
          docker rm -f libero-eval || true
      - name: Parse Libero eval metrics
        if: always()
        run: |
          python3 scripts/ci/parse_eval_metrics.py \
            --artifacts-dir /tmp/libero-artifacts \
            --env libero \
            --task libero_spatial \
            --policy pepijn223/smolvla_libero
      - name: Upload Libero rollout video
        if: always()
        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
        with:
          name: libero-rollout-video
          path: /tmp/libero-artifacts/videos/
          if-no-files-found: warn
      - name: Upload Libero eval metrics
        if: always()
        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
        with:
          name: libero-metrics
          path: /tmp/libero-artifacts/metrics.json
          if-no-files-found: warn
      # ── LIBERO TRAIN+EVAL SMOKE ──────────────────────────────────────────────
      # Train SmolVLA for 1 step (batch_size=1, dataset episode 0 only) then
      # immediately runs eval inside the training loop (eval_freq=1, 1 episode).
      # Tests the full train→eval-within-training pipeline end-to-end.
      - name: Run Libero train+eval smoke (1 step, eval_freq=1)
        if: env.HF_USER_TOKEN != ''
        run: |
          docker run --name libero-train-smoke --gpus all \
            --shm-size=4g \
            -e HF_HOME=/tmp/hf \
            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
            lerobot-benchmark-libero:ci \
            bash -c "
              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
              accelerate launch --num_processes=1 \$(which lerobot-train) \
                --policy.path=lerobot/smolvla_base \
                --policy.load_vlm_weights=true \
                --policy.scheduler_decay_steps=25000 \
                --policy.freeze_vision_encoder=false \
                --policy.train_expert_only=false \
                --dataset.repo_id=lerobot/libero \
                --dataset.episodes=[0] \
                --dataset.use_imagenet_stats=false \
                --env.type=libero \
                --env.task=libero_spatial \
                '--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \
                --policy.empty_cameras=1 \
                --output_dir=/tmp/train-smoke \
                --steps=1 \
                --batch_size=1 \
                --eval_freq=1 \
                --eval.n_episodes=1 \
                --eval.batch_size=1 \
                --eval.use_async_envs=false \
                --save_freq=1 \
                --policy.push_to_hub=false \
                '--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.image2\": \"observation.images.camera2\"}'
            "
      - name: Copy Libero train-smoke artifacts from container
        if: always()
        run: |
          mkdir -p /tmp/libero-train-smoke-artifacts
          docker cp libero-train-smoke:/tmp/train-smoke/. /tmp/libero-train-smoke-artifacts/ 2>/dev/null || true
          docker rm -f libero-train-smoke || true
      - name: Upload Libero train-smoke eval video
        if: always()
        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
        with:
          name: libero-train-smoke-video
          path: /tmp/libero-train-smoke-artifacts/eval/
          if-no-files-found: warn
  # ── METAWORLD ─────────────────────────────────────────────────────────────
  # Isolated image: lerobot[metaworld] only (metaworld==3.0.0, mujoco>=3 chain)
  metaworld-integration-test:
    name: MetaWorld — build image + 1-episode eval
    runs-on:
      group: aws-g6-4xlarge-plus
    env:
      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          persist-credentials: false
          lfs: true
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
        with:
          cache-binary: false
      - name: Login to Docker Hub
        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
        with:
          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
      - name: Build MetaWorld benchmark image
        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
        with:
          context: .
          file: docker/Dockerfile.benchmark.metaworld
          push: false
          load: true
          tags: lerobot-benchmark-metaworld:ci
      - name: Run MetaWorld smoke eval (1 episode)
        if: env.HF_USER_TOKEN != ''
        run: |
          docker run --name metaworld-eval --gpus all \
            --shm-size=4g \
            -e HF_HOME=/tmp/hf \
            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
            lerobot-benchmark-metaworld:ci \
            bash -c "
              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
              lerobot-eval \
                --policy.path=pepijn223/smolvla_metaworld \
                --env.type=metaworld \
                --env.task=metaworld-push-v3 \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
                --policy.device=cuda \
                '--rename_map={\"observation.image\": \"observation.images.camera1\"}' \
                --policy.empty_cameras=2 \
                --output_dir=/tmp/eval-artifacts
              python scripts/ci/extract_task_descriptions.py \
                --env metaworld --task metaworld-push-v3 \
                --output /tmp/eval-artifacts/task_descriptions.json
            "
      - name: Copy MetaWorld artifacts from container
        if: always()
        run: |
          mkdir -p /tmp/metaworld-artifacts
          docker cp metaworld-eval:/tmp/eval-artifacts/. /tmp/metaworld-artifacts/ 2>/dev/null || true
          docker rm -f metaworld-eval || true
      - name: Parse MetaWorld eval metrics
        if: always()
        run: |
          python3 scripts/ci/parse_eval_metrics.py \
            --artifacts-dir /tmp/metaworld-artifacts \
            --env metaworld \
            --task metaworld-push-v3 \
            --policy pepijn223/smolvla_metaworld
      - name: Upload MetaWorld rollout video
        if: always()
        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
        with:
          name: metaworld-rollout-video
          path: /tmp/metaworld-artifacts/videos/
          if-no-files-found: warn
      - name: Upload MetaWorld eval metrics
        if: always()
        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
        with:
          name: metaworld-metrics
          path: /tmp/metaworld-artifacts/metrics.json
          if-no-files-found: warn
  # ── LIBERO-plus ───────────────────────────────────────────────────────────
  libero-plus-integration-test:
    name: LIBERO-plus — build image + 1-episode eval
    runs-on:
      group: aws-g6-4xlarge-plus
    env:
      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          persist-credentials: false
          lfs: true
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
        with:
          cache-binary: false
      - name: Build LIBERO-plus benchmark image
        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
        with:
          context: .
          file: docker/Dockerfile.benchmark.libero_plus
          push: false
          load: true
          tags: lerobot-benchmark-libero-plus:ci
          cache-from: type=local,src=/tmp/.buildx-cache-libero-plus
          cache-to: type=local,dest=/tmp/.buildx-cache-libero-plus,mode=max
      - name: Run LIBERO-plus smoke eval (1 episode)
        if: env.HF_USER_TOKEN != ''
        run: |
          docker run --name libero-plus-eval --gpus all \
            --shm-size=4g \
            -e HF_HOME=/tmp/hf \
            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
            lerobot-benchmark-libero-plus:ci \
            bash -c "
              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
              lerobot-eval \
                --policy.path=lerobot/smolvla_libero_plus \
                --env.type=libero_plus \
                --env.task=libero_spatial \
                '--env.task_ids=[0,100,260,500,1000,1500,2000,2400]' \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
                --policy.device=cuda \
                '--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \
                --policy.empty_cameras=1 \
                --output_dir=/tmp/eval-artifacts
              python scripts/ci/extract_task_descriptions.py \
                --env libero_plus --task libero_spatial \
                --output /tmp/eval-artifacts/task_descriptions.json
            "
      - name: Copy LIBERO-plus artifacts from container
        if: always()
        run: |
          mkdir -p /tmp/libero-plus-artifacts
          docker cp libero-plus-eval:/tmp/eval-artifacts/. /tmp/libero-plus-artifacts/ 2>/dev/null || true
          docker rm -f libero-plus-eval || true
      - name: Parse LIBERO-plus eval metrics
        if: always()
        run: |
          python3 scripts/ci/parse_eval_metrics.py \
            --artifacts-dir /tmp/libero-plus-artifacts \
            --env libero_plus \
            --task libero_spatial \
            --policy lerobot/smolvla_libero_plus
      - name: Upload LIBERO-plus rollout video
        if: always()
        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
        with:
          name: libero-plus-rollout-video
          path: /tmp/libero-plus-artifacts/videos/
          if-no-files-found: warn
      - name: Upload LIBERO-plus eval metrics
        if: always()
        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
        with:
          name: libero-plus-metrics
          path: /tmp/libero-plus-artifacts/metrics.json
          if-no-files-found: warn
  # ── ROBOMME ───────────────────────────────────────────────────────────────
  robomme-integration-test:
    name: RoboMME — build image + 1-episode eval
    runs-on:
      group: aws-g6-4xlarge-plus
    env:
      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
          lfs: true
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
        with:
          cache-binary: false
      - name: Build RoboMME benchmark image
        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
        with:
          context: .
          file: docker/Dockerfile.benchmark.robomme
          push: false
          load: true
          tags: lerobot-benchmark-robomme:ci
      - name: Run RoboMME smoke eval (1 episode)
        if: env.HF_USER_TOKEN != ''
        run: |
          docker run --name robomme-eval --gpus all \
            --shm-size=4g \
            -e HF_HOME=/tmp/hf \
            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
            lerobot-benchmark-robomme:ci \
            bash -c "
              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
              lerobot-eval \
                --policy.path=lerobot/smolvla_robomme \
                --env.type=robomme \
                --env.task=PickXtimes,BinFill,StopCube,MoveCube,InsertPeg \
                --env.dataset_split=test \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
                --policy.device=cuda \
                '--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.wrist_image\": \"observation.images.camera2\"}' \
                --policy.empty_cameras=3 \
                --output_dir=/tmp/eval-artifacts
              python scripts/ci/extract_task_descriptions.py \
                --env robomme --task PickXtimes,BinFill,StopCube,MoveCube,InsertPeg \
                --output /tmp/eval-artifacts/task_descriptions.json
            "
      - name: Copy RoboMME artifacts from container
        if: always()
        run: |
          mkdir -p /tmp/robomme-artifacts
          docker cp robomme-eval:/tmp/eval-artifacts/. /tmp/robomme-artifacts/ 2>/dev/null || true
          docker rm -f robomme-eval || true
      - name: Parse RoboMME eval metrics
        if: always()
        run: |
          python3 scripts/ci/parse_eval_metrics.py \
            --artifacts-dir /tmp/robomme-artifacts \
            --env robomme \
            --task PickXtimes \
            --policy lerobot/smolvla_robomme
      - name: Upload RoboMME rollout video
        if: always()
        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
        with:
          name: robomme-rollout-video
          path: /tmp/robomme-artifacts/videos/
          if-no-files-found: warn
      - name: Upload RoboMME eval metrics
        if: always()
        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
        with:
          name: robomme-metrics
          path: /tmp/robomme-artifacts/metrics.json
          if-no-files-found: warn
--- a/.github/workflows/fast_tests.yml
+++ b/.github/workflows/fast_tests.yml
@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# This workflow handles fast testing.
+# This workflow validates each optional-dependency tier in isolation.
 # Each tier installs a different extra and runs the full test suite.
 # Tests that require an extra not installed in the current tier are
 # skipped automatically via pytest.importorskip guards.
 name: Fast Tests
 on:
@@ -54,8 +57,9 @@ concurrency:
  cancel-in-progress: true
 jobs:
-  # This job runs pytests with the default dependencies.
+  # This job runs pytests in isolated dependency tiers.
-  # It runs everytime we commit to a PR or push to main
+  # Each tier installs a different extra and runs the full suite;
  # tests gated behind other extras skip automatically.
  fast-pytest-tests:
    name: Fast Pytest Tests
    runs-on: ubuntu-latest
@@ -89,8 +93,9 @@ jobs:
          version: ${{ env.UV_VERSION }}
          python-version: ${{ env.PYTHON_VERSION }}
-      - name: Install lerobot with test extras
+      # ── Tier 1: Base ──────────────────────────────────────
-        run: uv sync --locked --extra "test"
+      - name: "Tier 1 — Install: base"
        run: uv sync --locked --extra test
      - name: Login to Hugging Face
        if: env.HF_USER_TOKEN != ''
@@ -98,5 +103,26 @@ jobs:
          uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
          uv run hf auth whoami
-      - name: Run pytest
+      - name: "Tier 1 — Test: base"
        run: uv run pytest tests -vv --maxfail=10
      # ── Tier 2: Dataset ──────────────────────────────────
      - name: "Tier 2 — Install: dataset"
        run: uv sync --locked --extra test --extra dataset
      - name: "Tier 2 — Test: dataset"
        run: uv run pytest tests -vv --maxfail=10
      # ── Tier 3: Hardware ─────────────────────────────────
      - name: "Tier 3 — Install: hardware"
        run: uv sync --locked --extra test --extra hardware
      - name: "Tier 3 — Test: hardware"
        run: uv run pytest tests -vv --maxfail=10
      # ── Tier 4: Viz ──────────────────────────────────────
      - name: "Tier 4 — Install: viz"
        run: uv sync --locked --extra test --extra viz
      - name: "Tier 4 — Test: viz"
        run: uv run pytest tests -vv --maxfail=10
--- a/benchmarks/init.py
+++ b/benchmarks/init.py
@@ -0,0 +1 @@
 # Copyright 2026 The HuggingFace Inc. team. All rights reserved.
--- a/benchmarks/libero/README.md
+++ b/benchmarks/libero/README.md
@@ -0,0 +1,60 @@
 # LeRobot LIBERO Training Benchmark
 Train and evaluate all LeRobot policies on [LIBERO](https://libero-project.github.io/) and publish results as a HuggingFace leaderboard dataset.
 ## Policies
 | Policy         | Base Model           | GPUs | LR     | Chunk | Notes                                 |
 | -------------- | -------------------- | ---- | ------ | ----- | ------------------------------------- |
 | pi0            | lerobot/pi0_base     | 8    | 2.5e-5 | 30    | PaliGemma + Gemma flow matching       |
 | pi0_fast       | lerobot/pi0fast-base | 8    | 2.5e-5 | 30    | Requires tokenizer pre-training       |
 | pi05           | lerobot/pi05_base    | 8    | 2.5e-5 | 30    | Quantiles normalization               |
 | groot          | nvidia/GR00T-N1.5-3B | 8    | 1e-4   | 30    | bf16, diffusion head + projector only |
 | act            | From scratch         | 1    | 1e-5   | 30    | ResNet-18, lightweight                |
 | diffusion      | From scratch         | 1    | 1e-4   | 32\*  | U-Net, horizon must be divisible by 8 |
 | smolvla        | lerobot/smolvla_base | 8    | 1e-4   | 30    | SmolVLM2-500M                         |
 | xvla           | lerobot/xvla-widowx  | 4    | 1e-4   | 32\*  | Florence2 + CLIP                      |
 | multi_task_dit | From scratch         | 1    | 2e-5   | 32\*  | CLIP + DiT                            |
 \* These policies use `horizon` rather than `chunk_size`. Set to 32 (nearest valid value to 30).
 ## Training spec
 - **Steps**: 5,000 per policy
 - **Batch size**: 32 per GPU (effective BS = 256 for multi-GPU)
 - **Dataset**: `lerobot/libero` (libero_spatial)
 - **Evaluation**: 20 episodes after training
 - **LR**: each policy's default optimizer/scheduler preset
 - **Results**: each SLURM job publishes its own row to the HF leaderboard dataset automatically
 ## Quick start
 ### 1. Generate SLURM scripts
 ```bash
 python benchmarks/libero/run_benchmark.py \
    --output_dir /scratch/lerobot-benchmark \
    --hub_org lerobot
 ```
 ### 2. Submit jobs
 ```bash
 # If using pi0_fast, submit tokenizer first:
 sbatch /scratch/lerobot-benchmark/slurm_scripts/00_tokenizer.sh
 # Wait, then submit pi0_fast
 # All other policies can run in parallel:
 for script in /scratch/lerobot-benchmark/slurm_scripts/[0-9][0-9]_*.sh; do
    [[ "$script" == *pi0_fast* ]] && continue
    sbatch "$script"
 done
 ```
 Each job publishes its result to `lerobot/benchmark-libero` on the Hub when it finishes.
 ## Prerequisites
 - SLURM cluster with CUDA GPUs (A100 80GB recommended for VLM policies)
 - `pip install lerobot[pi,smolvla,groot,xvla,multi_task_dit,libero] datasets`
 - `huggingface-cli login`
--- a/benchmarks/libero/run_benchmark.py
+++ b/benchmarks/libero/run_benchmark.py
@@ -0,0 +1,606 @@
 #!/usr/bin/env python
 """Generate SLURM sbatch scripts for training all LeRobot policies on LIBERO.
 Each generated script trains one policy, evaluates it, and publishes its
 results row to a HuggingFace leaderboard dataset — no separate collection
 step needed.
 Usage:
    # Generate scripts for all policies:
    python benchmarks/libero/run_benchmark.py \\
        --output_dir /scratch/lerobot-benchmark --hub_org lerobot
    # Generate for a subset:
    python benchmarks/libero/run_benchmark.py \\
        --policies pi0 smolvla act \\
        --output_dir /scratch/lerobot-benchmark --hub_org lerobot
 """
 from __future__ import annotations
 import argparse
 import json
 import subprocess
 import textwrap
 import uuid
 from dataclasses import dataclass, field
 from datetime import UTC, datetime
 from pathlib import Path
 # ──────────────────────────────────────────────────────────────────────
 # Policy benchmark configs
 # ──────────────────────────────────────────────────────────────────────
@dataclass
 class PolicyBenchmarkConfig:
    """Training configuration for a single policy on a benchmark."""
    policy_type: str
    policy_path: str | None = None
    num_gpus: int = 1
    chunk_size: int | None = None  # Set on policies that use chunk_size (not horizon)
    extra_policy_args: dict[str, str] = field(default_factory=dict)
    needs_tokenizer: bool = False
    tokenizer_args: dict[str, str] = field(default_factory=dict)
 COMMON_TRAINING_ARGS: dict[str, str] = {
    "dataset.repo_id": "lerobot/libero",
    "dataset.use_imagenet_stats": "false",
    "env.type": "libero",
    "env.task": "libero_spatial",
    "steps": "5000",
    "batch_size": "32",
    "eval_freq": "0",
    "save_freq": "5000",
    "save_checkpoint": "true",
    "log_freq": "100",
    "wandb.enable": "true",
    "policy.push_to_hub": "true",
    "rename_map": (
        '{"observation.images.image":"observation.images.camera1",'
        '"observation.images.image2":"observation.images.camera2"}'
    ),
 }
 EVAL_ARGS: dict[str, str] = {
    "env.type": "libero",
    "env.task": "libero_spatial",
    "eval.n_episodes": "20",
    "eval.batch_size": "10",
 }
 POLICY_CONFIGS: dict[str, PolicyBenchmarkConfig] = {
    "pi0": PolicyBenchmarkConfig(
        policy_type="pi0",
        policy_path="lerobot/pi0_base",
        num_gpus=8,
        chunk_size=30,
        extra_policy_args={
            "policy.n_action_steps": "30",
            "policy.scheduler_decay_steps": "5000",
        },
    ),
    "pi0_fast": PolicyBenchmarkConfig(
        policy_type="pi0_fast",
        policy_path="lerobot/pi0fast-base",
        num_gpus=8,
        chunk_size=30,
        extra_policy_args={
            "policy.n_action_steps": "30",
            "policy.scheduler_decay_steps": "5000",
        },
        needs_tokenizer=True,
        tokenizer_args={
            "repo_id": "lerobot/libero",
            "action_horizon": "30",
            "encoded_dims": "0:7",
            "normalization_mode": "QUANTILES",
            "vocab_size": "1024",
            "scale": "10.0",
            "push_to_hub": "true",
        },
    ),
    "pi05": PolicyBenchmarkConfig(
        policy_type="pi05",
        policy_path="lerobot/pi05_base",
        num_gpus=8,
        chunk_size=30,
        extra_policy_args={
            "policy.n_action_steps": "30",
            "policy.scheduler_decay_steps": "5000",
        },
    ),
    "groot": PolicyBenchmarkConfig(
        policy_type="groot",
        policy_path=None,
        num_gpus=8,
        chunk_size=30,
        extra_policy_args={
            "policy.n_action_steps": "30",
            "policy.base_model_path": "nvidia/GR00T-N1.5-3B",
            "policy.tune_diffusion_model": "true",
            "policy.tune_projector": "true",
            "policy.tune_llm": "false",
            "policy.tune_visual": "false",
            "policy.use_bf16": "true",
        },
    ),
    "act": PolicyBenchmarkConfig(
        policy_type="act",
        policy_path=None,
        num_gpus=1,
        chunk_size=30,
        extra_policy_args={"policy.n_action_steps": "30"},
    ),
    "diffusion": PolicyBenchmarkConfig(
        policy_type="diffusion",
        policy_path=None,
        num_gpus=1,
        chunk_size=None,
        extra_policy_args={
            "policy.horizon": "32",
            "policy.n_action_steps": "30",
            "policy.n_obs_steps": "2",
        },
    ),
    "smolvla": PolicyBenchmarkConfig(
        policy_type="smolvla",
        policy_path="lerobot/smolvla_base",
        num_gpus=8,
        chunk_size=30,
        extra_policy_args={
            "policy.n_action_steps": "30",
            "policy.load_vlm_weights": "true",
            "policy.freeze_vision_encoder": "false",
            "policy.train_expert_only": "false",
            "policy.scheduler_decay_steps": "5000",
        },
    ),
    "xvla": PolicyBenchmarkConfig(
        policy_type="xvla",
        policy_path="lerobot/xvla-widowx",
        num_gpus=4,
        chunk_size=32,
        extra_policy_args={
            "policy.n_action_steps": "32",
            "policy.scheduler_decay_steps": "5000",
        },
    ),
    "multi_task_dit": PolicyBenchmarkConfig(
        policy_type="multi_task_dit",
        policy_path=None,
        num_gpus=1,
        chunk_size=None,
        extra_policy_args={
            "policy.horizon": "32",
            "policy.n_action_steps": "30",
        },
    ),
 }
 ALL_POLICY_NAMES = list(POLICY_CONFIGS.keys())
 # GPU memory estimates (GB) for SLURM --mem allocation
 GPU_MEM_ESTIMATES: dict[str, int] = {
    "pi0": 320,
    "pi0_fast": 320,
    "pi05": 280,
    "groot": 320,
    "act": 64,
    "diffusion": 64,
    "smolvla": 160,
    "xvla": 160,
    "multi_task_dit": 64,
 }
 # ──────────────────────────────────────────────────────────────────────
 # SLURM script generation
 # ──────────────────────────────────────────────────────────────────────
 def _cli_args(args: dict[str, str]) -> str:
    """Build a backslash-continued CLI arg string with proper shell quoting."""
    lines = []
    for key, value in args.items():
        if any(c in str(value) for c in ["{", "}", " ", '"', "'"]):
            lines.append(f"    --{key}='{value}'")
        else:
            lines.append(f"    --{key}={value}")
    return " \\\n".join(lines)
 def _training_cli_args(
    policy_name: str,
    output_dir: Path,
    hub_org: str,
    benchmark_uuid: str,
 ) -> str:
    cfg = POLICY_CONFIGS[policy_name]
    args: dict[str, str] = {}
    args.update(COMMON_TRAINING_ARGS)
    args["policy.type"] = cfg.policy_type
    if cfg.policy_path:
        args["policy.path"] = cfg.policy_path
    if cfg.chunk_size is not None:
        args["policy.chunk_size"] = str(cfg.chunk_size)
    args.update(cfg.extra_policy_args)
    args["output_dir"] = str(output_dir / "train" / policy_name)
    args["policy.repo_id"] = f"{hub_org}/{policy_name}_libero"
    args["wandb.project"] = "lerobot-libero-benchmark"
    args["wandb.run_name"] = f"{policy_name}_{benchmark_uuid[:8]}"
    return _cli_args(args)
 def _publish_snippet(
    policy_name: str,
    output_dir: Path,
    hub_org: str,
    benchmark_uuid: str,
    hub_dataset: str,
 ) -> str:
    """Inline Python that each SLURM job runs to publish its own result row."""
    cfg = POLICY_CONFIGS[policy_name]
    steps = int(COMMON_TRAINING_ARGS["steps"])
    bs = int(COMMON_TRAINING_ARGS["batch_size"])
    eff_bs = bs * cfg.num_gpus
    train_dir = output_dir / "train" / policy_name
    return textwrap.dedent(f"""\
        python3 -c "
        import json, os, re, sys
        from pathlib import Path
        from datetime import datetime, timezone
        timing = {{}}
        tp = Path('{output_dir}/logs/{policy_name}_timing.txt')
        if tp.exists():
            for ln in tp.read_text().splitlines():
                if '=' in ln:
                    k, _, v = ln.partition('=')
                    timing[k.strip()] = v.strip()
        # Parse eval results
        eval_sr, eval_per_task, eval_n = None, '{{}}', 0
        eval_dir = Path('{train_dir}/eval_results')
        if eval_dir.exists():
            for jf in eval_dir.glob('**/*.json'):
                try:
                    d = json.loads(jf.read_text())
                except Exception:
                    continue
                if 'avg_success_rate' in d:
                    eval_sr = d['avg_success_rate']
                elif 'eval_info' in d and 'avg_success_rate' in d.get('eval_info', {{}}):
                    eval_sr = d['eval_info']['avg_success_rate']
                pt = {{k: v for k, v in d.items() if 'success_rate' in k and k != 'avg_success_rate'}}
                if pt:
                    eval_per_task = json.dumps(pt)
                if 'n_episodes' in d:
                    eval_n = d['n_episodes']
        # Parse final loss from SLURM stdout
        final_loss = None
        for lf in sorted(Path('{output_dir}/logs').glob('{policy_name}_*.out'), reverse=True):
            losses = re.findall(r'\\\"loss\\\"\\s*:\\s*([\\d.e+-]+)', lf.read_text())
            if losses:
                final_loss = float(losses[-1])
                break
        # Parse peak GPU mem
        peak_mem = 0.0
        csv_p = Path('{output_dir}/logs/{policy_name}_gpu_mem.csv')
        if csv_p.exists():
            for ln in csv_p.read_text().splitlines():
                parts = ln.strip().split(',')
                if len(parts) >= 2:
                    try:
                        peak_mem = max(peak_mem, float(parts[1].strip()))
                    except ValueError:
                        pass
        # Parse train config for optimizer details
        lr, opt_wd, sched_type, sched_warmup, sched_decay = 0.0, 0.0, '', 0, 0
        freeze_ve, train_eo, grad_ckpt = False, False, False
        cfg_path = Path('{train_dir}/checkpoints/{steps:06d}/pretrained_model/train_config.json')
        if cfg_path.exists():
            tc = json.loads(cfg_path.read_text())
            o = tc.get('optimizer', {{}})
            lr = o.get('lr', 0.0)
            opt_wd = o.get('weight_decay', 0.0)
            s = tc.get('scheduler', {{}})
            sched_type = s.get('type', '')
            sched_warmup = s.get('num_warmup_steps', 0)
            sched_decay = s.get('num_decay_steps', 0)
            p = tc.get('policy', {{}})
            freeze_ve = p.get('freeze_vision_encoder', False)
            train_eo = p.get('train_expert_only', False)
            grad_ckpt = p.get('gradient_checkpointing', False)
        row = {{
            'benchmark_uuid': '{benchmark_uuid}',
            'policy_type': '{policy_name}',
            'policy_repo_id': '{hub_org}/{policy_name}_libero',
            'base_model_repo_id': '{cfg.policy_path or ""}',
            'dataset_repo_id': '{COMMON_TRAINING_ARGS["dataset.repo_id"]}',
            'env_type': '{COMMON_TRAINING_ARGS["env.type"]}',
            'env_task': '{COMMON_TRAINING_ARGS["env.task"]}',
            'steps': {steps},
            'batch_size_per_gpu': {bs},
            'num_gpus': {cfg.num_gpus},
            'effective_batch_size': {eff_bs},
            'total_samples_seen': {steps * eff_bs},
            'chunk_size': {cfg.chunk_size or 0},
            'learning_rate': lr,
            'optimizer_type': 'AdamW',
            'optimizer_weight_decay': opt_wd,
            'scheduler_type': sched_type,
            'scheduler_warmup_steps': sched_warmup,
            'scheduler_decay_steps': sched_decay,
            'freeze_vision_encoder': freeze_ve,
            'train_expert_only': train_eo,
            'gradient_checkpointing': grad_ckpt,
            'eval_success_rate': eval_sr,
            'eval_success_rate_per_task': eval_per_task,
            'eval_n_episodes': eval_n,
            'final_train_loss': final_loss,
            'training_time_s': float(timing.get('TRAINING_TIME_S', 0)),
            'peak_gpu_memory_mb': peak_mem or float(timing.get('MAX_GPU_MEM_MB', 0)),
            'gpu_type': timing.get('GPU_TYPE', 'unknown'),
            'lerobot_commit': timing.get('LEROBOT_COMMIT', 'unknown'),
            'timestamp': datetime.now(timezone.utc).isoformat(),
        }}
        # Save locally
        Path('{train_dir}/benchmark_result.json').write_text(json.dumps(row, indent=2, default=str))
        # Push to HF dataset
        try:
            from datasets import Dataset, load_dataset
            try:
                existing = load_dataset('{hub_dataset}', split='train')
                rows = existing.to_list() + [row]
            except Exception:
                rows = [row]
            Dataset.from_list(rows).push_to_hub('{hub_dataset}', split='train')
            print('Published result to {hub_dataset}')
        except ImportError:
            print('datasets library not installed — result saved locally only')
        except Exception as e:
            print(f'Failed to push to hub: {{e}} — result saved locally')
        "
    """)
 def _generate_sbatch_script(
    policy_name: str,
    output_dir: Path,
    hub_org: str,
    benchmark_uuid: str,
    hub_dataset: str,
    lerobot_commit: str,
 ) -> str:
    cfg = POLICY_CONFIGS[policy_name]
    steps = int(COMMON_TRAINING_ARGS["steps"])
    log_dir = output_dir / "logs"
    train_dir = output_dir / "train" / policy_name
    checkpoint_path = train_dir / f"checkpoints/{steps:06d}/pretrained_model"
    training_args = _training_cli_args(policy_name, output_dir, hub_org, benchmark_uuid)
    eval_args = _cli_args(EVAL_ARGS)
    publish = _publish_snippet(policy_name, output_dir, hub_org, benchmark_uuid, hub_dataset)
    return textwrap.dedent(f"""\
        #!/bin/bash
        #SBATCH --job-name=bench_{policy_name}
        #SBATCH --nodes=1
        #SBATCH --ntasks-per-node=1
        #SBATCH --gres=gpu:{cfg.num_gpus}
        #SBATCH --cpus-per-task={cfg.num_gpus * 8}
        #SBATCH --mem={GPU_MEM_ESTIMATES.get(policy_name, 128)}G
        #SBATCH --time=06:00:00
        #SBATCH --output={log_dir}/{policy_name}_%j.out
        #SBATCH --error={log_dir}/{policy_name}_%j.err
        set -euo pipefail
        echo "=========================================="
        echo "LeRobot LIBERO Benchmark — {policy_name}"
        echo "UUID: {benchmark_uuid}"
        echo "Start: $(date -Iseconds)"
        echo "Host: $(hostname) | GPUs: {cfg.num_gpus}"
        echo "=========================================="
        START_TIME=$(date +%s)
        # GPU memory monitoring (every 30s)
        nvidia-smi --query-gpu=index,memory.used,memory.total,gpu_name \\
            --format=csv,noheader,nounits -l 30 \\
            > "{log_dir}/{policy_name}_gpu_mem.csv" &
        GPU_MONITOR_PID=$!
        # ── Training ──────────────────────────────────────────────────
        echo "[$(date -Iseconds)] Starting training..."
        accelerate launch --num_processes={cfg.num_gpus} \\
            $(which lerobot-train) \\
        {training_args}
        TRAIN_EXIT=$?
        TRAIN_END=$(date +%s)
        echo "[$(date -Iseconds)] Training exit code: $TRAIN_EXIT"
        # ── Evaluation ────────────────────────────────────────────────
        EVAL_EXIT=1
        if [ $TRAIN_EXIT -eq 0 ]; then
            echo "[$(date -Iseconds)] Starting evaluation..."
            lerobot-eval \\
                --policy.path="{checkpoint_path}" \\
            {eval_args} \\
                --output_dir="{train_dir}/eval_results"
            EVAL_EXIT=$?
            echo "[$(date -Iseconds)] Eval exit code: $EVAL_EXIT"
        else
            echo "[$(date -Iseconds)] Skipping eval — training failed."
        fi
        # ── Timing ────────────────────────────────────────────────────
        END_TIME=$(date +%s)
        kill $GPU_MONITOR_PID 2>/dev/null || true
        cat > "{log_dir}/{policy_name}_timing.txt" <<TIMING_EOF
        BENCHMARK_UUID={benchmark_uuid}
        POLICY_TYPE={policy_name}
        TRAINING_TIME_S=$((TRAIN_END - START_TIME))
        TOTAL_TIME_S=$((END_TIME - START_TIME))
        TRAIN_EXIT=$TRAIN_EXIT
        EVAL_EXIT=$EVAL_EXIT
        MAX_GPU_MEM_MB=$(awk -F',' '{{print $2}}' "{log_dir}/{policy_name}_gpu_mem.csv" 2>/dev/null | sort -n | tail -1)
        GPU_TYPE=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | head -1 | xargs)
        LEROBOT_COMMIT={lerobot_commit}
        TIMING_EOF
        # ── Publish result to HF dataset ──────────────────────────────
        echo "[$(date -Iseconds)] Publishing result..."
        {publish}
        echo "=========================================="
        echo "Done: $(date -Iseconds)"
        echo "Training: $((TRAIN_END - START_TIME))s | Total: $((END_TIME - START_TIME))s"
        echo "=========================================="
    """)
 def _generate_tokenizer_script(
    output_dir: Path,
    hub_org: str,
    benchmark_uuid: str,
 ) -> str:
    cfg = POLICY_CONFIGS["pi0_fast"]
    log_dir = output_dir / "logs"
    tokenizer_hub_repo = f"{hub_org}/fast-tokenizer-libero"
    tok_args = dict(cfg.tokenizer_args)
    tok_args["hub_repo_id"] = tokenizer_hub_repo
    return textwrap.dedent(f"""\
        #!/bin/bash
        #SBATCH --job-name=bench_tokenizer
        #SBATCH --nodes=1
        #SBATCH --ntasks-per-node=1
        #SBATCH --gres=gpu:1
        #SBATCH --cpus-per-task=8
        #SBATCH --mem=64G
        #SBATCH --time=01:00:00
        #SBATCH --output={log_dir}/tokenizer_%j.out
        #SBATCH --error={log_dir}/tokenizer_%j.err
        set -euo pipefail
        echo "LeRobot — FAST Tokenizer | UUID: {benchmark_uuid}"
        lerobot-train-tokenizer \\
        {_cli_args(tok_args)}
        echo "Tokenizer pushed to: {tokenizer_hub_repo}"
    """)
 # ──────────────────────────────────────────────────────────────────────
 # Main
 # ──────────────────────────────────────────────────────────────────────
 def main() -> None:
    parser = argparse.ArgumentParser(description="Generate SLURM scripts for LeRobot LIBERO benchmark.")
    parser.add_argument(
        "--policies",
        nargs="+",
        default=ALL_POLICY_NAMES,
        choices=ALL_POLICY_NAMES,
        help="Policies to benchmark (default: all).",
    )
    parser.add_argument("--output_dir", type=Path, required=True, help="Root output directory.")
    parser.add_argument("--hub_org", type=str, default="lerobot", help="HuggingFace org.")
    parser.add_argument("--hub_dataset", type=str, default=None, help="HF dataset repo for results.")
    parser.add_argument("--uuid", type=str, default=None, help="Override benchmark UUID.")
    args = parser.parse_args()
    benchmark_uuid = args.uuid or str(uuid.uuid4())
    output_dir: Path = args.output_dir.resolve()
    policies: list[str] = args.policies
    hub_org: str = args.hub_org
    hub_dataset: str = args.hub_dataset or f"{hub_org}/benchmark-libero"
    try:
        commit = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()
    except (subprocess.CalledProcessError, FileNotFoundError):
        commit = "unknown"
    scripts_dir = output_dir / "slurm_scripts"
    log_dir = output_dir / "logs"
    scripts_dir.mkdir(parents=True, exist_ok=True)
    log_dir.mkdir(parents=True, exist_ok=True)
    for p in policies:
        (output_dir / "train" / p).mkdir(parents=True, exist_ok=True)
    generated: dict[str, Path] = {}
    # Tokenizer job for pi0_fast
    tokenizer_path = None
    if "pi0_fast" in policies:
        script = _generate_tokenizer_script(output_dir, hub_org, benchmark_uuid)
        tokenizer_path = scripts_dir / "00_tokenizer.sh"
        tokenizer_path.write_text(script)
        tokenizer_path.chmod(0o755)
        generated["tokenizer"] = tokenizer_path
        tokenizer_hub_repo = f"{hub_org}/fast-tokenizer-libero"
        POLICY_CONFIGS["pi0_fast"].extra_policy_args["policy.action_tokenizer_name"] = tokenizer_hub_repo
    # Per-policy scripts
    for i, name in enumerate(sorted(policies), start=1):
        script = _generate_sbatch_script(name, output_dir, hub_org, benchmark_uuid, hub_dataset, commit)
        path = scripts_dir / f"{i:02d}_{name}.sh"
        path.write_text(script)
        path.chmod(0o755)
        generated[name] = path
    # Manifest
    manifest = {
        "benchmark_uuid": benchmark_uuid,
        "timestamp": datetime.now(UTC).isoformat(),
        "lerobot_commit": commit,
        "hub_org": hub_org,
        "hub_dataset": hub_dataset,
        "policies": policies,
        "output_dir": str(output_dir),
        "scripts": {k: str(v) for k, v in generated.items()},
    }
    manifest_path = output_dir / "benchmark_manifest.json"
    manifest_path.write_text(json.dumps(manifest, indent=2))
    # Instructions
    print("=" * 60)
    print("LeRobot LIBERO Benchmark — Scripts Generated")
    print(f"UUID: {benchmark_uuid}")
    print(f"Output: {output_dir}")
    print(f"Results dataset: {hub_dataset}")
    print("=" * 60)
    print()
    for _name, path in sorted(generated.items()):
        print(f"  {path}")
    print()
    if tokenizer_path:
        print("IMPORTANT: pi0_fast requires tokenizer training FIRST.")
        print(f"  1. sbatch {tokenizer_path}")
        print("  2. Wait for completion")
        print(f"  3. sbatch {generated.get('pi0_fast', 'N/A')}")
        print("  4. All other policies can run in parallel")
    else:
        print("All scripts can be submitted in parallel.")
    print()
    print("Each job publishes its result to the HF dataset automatically.")
 if __name__ == "__main__":
    main()
--- a/benchmarks/publish_benchmark_result.py
+++ b/benchmarks/publish_benchmark_result.py
@@ -0,0 +1,156 @@
 #!/usr/bin/env python
 # Copyright 2026 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Publish benchmark rows and lightweight artifacts to a Hub dataset."""
 from __future__ import annotations
 import argparse
 import json
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
 from lerobot.utils.history_repo import UploadTarget, make_hub_file_url, upload_targets, utc_timestamp_slug
 def load_json_if_exists(path: Path) -> dict[str, Any] | None:
    if not path.exists():
        return None
    return json.loads(path.read_text())
 def find_latest_train_config_path(run_root: Path) -> Path | None:
    checkpoints_dir = run_root / "train" / "checkpoints"
    if not checkpoints_dir.exists():
        return None
    candidates = sorted(
        checkpoints_dir.glob("*/pretrained_model/train_config.json"),
        key=lambda path: path.parts[-3],
    )
    return candidates[-1] if candidates else None
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--benchmark", required=True)
    parser.add_argument("--policy", required=True)
    parser.add_argument("--run_root", required=True, type=Path)
    parser.add_argument("--results_repo", required=True)
    parser.add_argument("--git_commit", required=True)
    parser.add_argument("--num_gpus", required=True, type=int)
    parser.add_argument("--microbatch_per_gpu", required=True, type=int)
    parser.add_argument("--gradient_accumulation_steps", required=True, type=int)
    parser.add_argument("--effective_batch_size", required=True, type=int)
    parser.add_argument("--train_wall_time_s", required=True, type=float)
    parser.add_argument("--eval_wall_time_s", required=True, type=float)
    parser.add_argument("--slurm_job_id", default="")
    parser.add_argument("--docker_image", required=True)
    return parser.parse_args()
 def build_row(args: argparse.Namespace) -> tuple[dict[str, Any], list[UploadTarget]]:
    now = datetime.now(UTC)
    created_at = now.isoformat()
    timestamp = utc_timestamp_slug(now)
    run_id = f"{timestamp}__{args.benchmark}__{args.policy}__{args.slurm_job_id or 'manual'}"
    eval_info = load_json_if_exists(args.run_root / "eval" / "eval_info.json") or {}
    train_config_path = find_latest_train_config_path(args.run_root)
    train_config = load_json_if_exists(train_config_path) or {}
    artifact_prefix = f"artifacts/{args.benchmark}/{args.policy}/{run_id}"
    row_path_in_repo = f"rows/{args.benchmark}/{args.policy}/{run_id}.json"
    row = {
        "schema_version": 1,
        "created_at": created_at,
        "run_id": run_id,
        "benchmark": args.benchmark,
        "policy": args.policy,
        "git_commit": args.git_commit,
        "slurm_job_id": args.slurm_job_id or None,
        "docker_image": args.docker_image,
        "resources": {
            "num_gpus": args.num_gpus,
            "microbatch_per_gpu": args.microbatch_per_gpu,
            "gradient_accumulation_steps": args.gradient_accumulation_steps,
            "effective_batch_size": args.effective_batch_size,
        },
        "timings": {
            "train_wall_time_s": args.train_wall_time_s,
            "eval_wall_time_s": args.eval_wall_time_s,
            "total_wall_time_s": args.train_wall_time_s + args.eval_wall_time_s,
        },
        "eval": {
            "overall": eval_info.get("overall", {}),
            "per_group": eval_info.get("per_group", {}),
            "per_task_count": len(eval_info.get("per_task", [])),
        },
        "paths": {
            "run_root": str(args.run_root),
            "train_dir": str(args.run_root / "train"),
            "eval_dir": str(args.run_root / "eval"),
        },
        "train_config": train_config,
        "artifact_urls": {
            "row": make_hub_file_url(args.results_repo, row_path_in_repo),
        },
    }
    row_path = args.run_root / "benchmark_row.json"
    row_path.parent.mkdir(parents=True, exist_ok=True)
    upload_list = [UploadTarget(local_path=row_path, path_in_repo=row_path_in_repo)]
    eval_info_path = args.run_root / "eval" / "eval_info.json"
    if eval_info_path.exists():
        row["artifact_urls"]["eval_info"] = make_hub_file_url(
            args.results_repo, f"{artifact_prefix}/eval_info.json"
        )
        upload_list.append(
            UploadTarget(local_path=eval_info_path, path_in_repo=f"{artifact_prefix}/eval_info.json")
        )
    if train_config_path is not None and train_config_path.exists():
        row["artifact_urls"]["train_config"] = make_hub_file_url(
            args.results_repo, f"{artifact_prefix}/train_config.json"
        )
        upload_list.append(
            UploadTarget(local_path=train_config_path, path_in_repo=f"{artifact_prefix}/train_config.json")
        )
    row_path.write_text(json.dumps(row, indent=2, sort_keys=True))
    return row, upload_list
 def main() -> int:
    args = parse_args()
    row, upload_list = build_row(args)
    uploaded = upload_targets(
        repo_id=args.results_repo,
        targets=upload_list,
        repo_type="dataset",
        private=False,
        commit_message=f"Add benchmark row {row['run_id']}",
    )
    row["uploaded_paths"] = uploaded
    row_path = args.run_root / "benchmark_row.json"
    row_path.write_text(json.dumps(row, indent=2, sort_keys=True))
    print(json.dumps(row, indent=2, sort_keys=True))
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/benchmarks/run_benchmark_matrix.py
+++ b/benchmarks/run_benchmark_matrix.py
@@ -0,0 +1,647 @@
 #!/usr/bin/env python
 # Copyright 2026 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Generate lightweight SLURM jobs for policy x benchmark benchmarking."""
 from __future__ import annotations
 import argparse
 import json
 import math
 import subprocess
 from dataclasses import asdict, dataclass, field
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
 from lerobot.utils.history_repo import utc_timestamp_slug
 MAX_GPUS = 8
 MIN_GPUS = 1
 DEFAULT_STEPS = 20_000
 DEFAULT_EFFECTIVE_BATCH_SIZE = 256
 DEFAULT_MICROBATCH_PER_GPU = 32
 DEFAULT_EVAL_BATCH_SIZE = 1
 DEFAULT_CPUS_PER_GPU = 8
 DEFAULT_MEMORY_PER_GPU_GB = 40
@dataclass(frozen=True)
 class BenchmarkSpec:
    name: str
    dataset_repo_id: str
    docker_image: str
    eval_env_type: str
    eval_task: str
    eval_n_episodes: int
    train_steps: int = DEFAULT_STEPS
    effective_batch_size: int = DEFAULT_EFFECTIVE_BATCH_SIZE
    train_extra_args: dict[str, Any] = field(default_factory=dict)
    eval_extra_args: dict[str, Any] = field(default_factory=dict)
@dataclass(frozen=True)
 class PolicySpec:
    name: str
    policy_type: str
    num_gpus: int
    policy_path: str | None = None
    microbatch_per_gpu: int = DEFAULT_MICROBATCH_PER_GPU
    extra_train_args: dict[str, Any] = field(default_factory=dict)
    extra_eval_args: dict[str, Any] = field(default_factory=dict)
    needs_tokenizer: bool = False
    tokenizer_args: dict[str, Any] = field(default_factory=dict)
@dataclass(frozen=True)
 class PlannedJob:
    benchmark: str
    policy: str
    run_rel: str
    num_gpus: int
    microbatch_per_gpu: int
    gradient_accumulation_steps: int
    effective_batch_size: int
    docker_image: str
    train_args: dict[str, Any]
    eval_args: dict[str, Any]
    tokenizer_args: dict[str, Any] | None
    script_path: str
 BENCHMARKS: dict[str, BenchmarkSpec] = {
    "libero_plus": BenchmarkSpec(
        name="libero_plus",
        dataset_repo_id="lerobot/libero_plus",
        docker_image="lerobot-benchmark-libero-plus:latest",
        eval_env_type="libero_plus",
        eval_task="libero_spatial,libero_object,libero_goal,libero_10",
        eval_n_episodes=10,
        train_extra_args={
            "rename_map": {
                "observation.images.image": "observation.images.camera1",
                "observation.images.image2": "observation.images.camera2",
            },
        },
        eval_extra_args={
            "env.camera_name_mapping": {
                "agentview_image": "camera1",
                "robot0_eye_in_hand_image": "camera2",
            },
            "env.max_parallel_tasks": 1,
            "eval.batch_size": DEFAULT_EVAL_BATCH_SIZE,
            "eval.use_async_envs": False,
            "eval.max_episodes_rendered": 0,
            "policy.device": "cuda",
        },
    ),
    "robomme": BenchmarkSpec(
        name="robomme",
        dataset_repo_id="lerobot/robomme",
        docker_image="lerobot-benchmark-robomme:latest",
        eval_env_type="robomme",
        eval_task=(
            "BinFill,PickXtimes,SwingXtimes,StopCube,VideoUnmask,VideoUnmaskSwap,"
            "ButtonUnmask,ButtonUnmaskSwap,PickHighlight,VideoRepick,VideoPlaceButton,"
            "VideoPlaceOrder,MoveCube,InsertPeg,PatternLock,RouteStick"
        ),
        eval_n_episodes=50,
        train_extra_args={
            "rename_map": {
                "observation.images.image": "observation.images.camera1",
                "observation.images.wrist_image": "observation.images.camera2",
            },
        },
        eval_extra_args={
            "env.dataset_split": "test",
            "env.max_parallel_tasks": 1,
            "rename_map": {
                "observation.images.image": "observation.images.camera1",
                "observation.images.wrist_image": "observation.images.camera2",
            },
            "eval.batch_size": DEFAULT_EVAL_BATCH_SIZE,
            "eval.use_async_envs": False,
            "eval.max_episodes_rendered": 0,
            "policy.device": "cuda",
        },
    ),
 }
 POLICIES: dict[str, PolicySpec] = {
    "pi0": PolicySpec(
        name="pi0",
        policy_type="pi0",
        policy_path="lerobot/pi0_base",
        num_gpus=8,
        extra_train_args={
            "policy.n_action_steps": 30,
            "policy.scheduler_decay_steps": DEFAULT_STEPS,
            "policy.empty_cameras": 0,
        },
    ),
    "pi0_fast": PolicySpec(
        name="pi0_fast",
        policy_type="pi0_fast",
        policy_path="lerobot/pi0fast-base",
        num_gpus=8,
        extra_train_args={
            "policy.n_action_steps": 30,
            "policy.scheduler_decay_steps": DEFAULT_STEPS,
            "policy.empty_cameras": 0,
        },
        needs_tokenizer=True,
        tokenizer_args={
            "action_horizon": 30,
            "encoded_dims": "0:7",
            "normalization_mode": "QUANTILES",
            "vocab_size": 1024,
            "scale": 10.0,
            "push_to_hub": True,
        },
    ),
    "pi05": PolicySpec(
        name="pi05",
        policy_type="pi05",
        policy_path="lerobot/pi05_base",
        num_gpus=8,
        extra_train_args={
            "policy.n_action_steps": 30,
            "policy.scheduler_decay_steps": DEFAULT_STEPS,
            "policy.empty_cameras": 0,
        },
    ),
    "groot": PolicySpec(
        name="groot",
        policy_type="groot",
        num_gpus=8,
        extra_train_args={
            "policy.n_action_steps": 30,
            "policy.base_model_path": "nvidia/GR00T-N1.5-3B",
            "policy.tune_diffusion_model": True,
            "policy.tune_projector": True,
            "policy.tune_llm": False,
            "policy.tune_visual": False,
            "policy.use_bf16": True,
        },
    ),
    "act": PolicySpec(
        name="act",
        policy_type="act",
        num_gpus=1,
        extra_train_args={
            "policy.n_action_steps": 30,
        },
    ),
    "diffusion": PolicySpec(
        name="diffusion",
        policy_type="diffusion",
        num_gpus=1,
        extra_train_args={
            "policy.horizon": 32,
            "policy.n_action_steps": 30,
            "policy.n_obs_steps": 2,
        },
    ),
    "smolvla": PolicySpec(
        name="smolvla",
        policy_type="smolvla",
        policy_path="lerobot/smolvla_base",
        num_gpus=8,
        extra_train_args={
            "policy.n_action_steps": 30,
            "policy.load_vlm_weights": True,
            "policy.freeze_vision_encoder": False,
            "policy.train_expert_only": False,
            "policy.scheduler_decay_steps": DEFAULT_STEPS,
            "policy.empty_cameras": 1,
        },
    ),
    "xvla": PolicySpec(
        name="xvla",
        policy_type="xvla",
        policy_path="lerobot/xvla-widowx",
        num_gpus=4,
        extra_train_args={
            "policy.n_action_steps": 32,
            "policy.scheduler_decay_steps": DEFAULT_STEPS,
            "policy.empty_cameras": 1,
        },
    ),
    "multi_task_dit": PolicySpec(
        name="multi_task_dit",
        policy_type="multi_task_dit",
        num_gpus=1,
        extra_train_args={
            "policy.horizon": 32,
            "policy.n_action_steps": 30,
        },
    ),
 }
 def normalize_repo_id(hub_org: str, repo_or_id: str) -> str:
    return repo_or_id if "/" in repo_or_id else f"{hub_org}/{repo_or_id}"
 def get_requested_names(
    requested: list[str] | None,
    available: dict[str, Any],
    *,
    kind: str,
 ) -> list[str]:
    if not requested:
        return list(available)
    unknown = sorted(set(requested) - set(available))
    if unknown:
        raise ValueError(f"Unknown {kind}: {', '.join(unknown)}. Available: {', '.join(available)}")
    return requested
 def compute_gradient_accumulation_steps(
    *,
    effective_batch_size: int,
    num_gpus: int,
    microbatch_per_gpu: int,
 ) -> int:
    per_step_batch = num_gpus * microbatch_per_gpu
    if effective_batch_size % per_step_batch != 0:
        raise ValueError(
            f"Cannot reach effective batch {effective_batch_size} with {num_gpus=} and "
            f"{microbatch_per_gpu=}."
        )
    return effective_batch_size // per_step_batch
 def make_run_slug() -> str:
    return utc_timestamp_slug()
 def shell_value(value: Any) -> str:
    if isinstance(value, bool):
        value = "true" if value else "false"
    elif isinstance(value, (dict, list)):
        value = json.dumps(value, sort_keys=True)
    else:
        value = str(value)
    escaped = (
        value.replace("\\", "\\\\")
        .replace('"', '\\"')
        .replace("$", "\\$")
        .replace("`", "\\`")
    )
    return f'"{escaped}"'
 def format_cli_args(args: dict[str, Any]) -> str:
    lines = []
    for key, value in args.items():
        lines.append(f"  --{key}={shell_value(value)}")
    return " \\\n".join(lines)
 def build_train_args(
    *,
    benchmark: BenchmarkSpec,
    policy: PolicySpec,
    train_dir: str,
    gradient_accumulation_steps: int,
 ) -> dict[str, Any]:
    args: dict[str, Any] = {
        "dataset.repo_id": benchmark.dataset_repo_id,
        "output_dir": train_dir,
        "steps": benchmark.train_steps,
        "batch_size": policy.microbatch_per_gpu,
        "gradient_accumulation_steps": gradient_accumulation_steps,
        "eval_freq": 0,
        "save_freq": benchmark.train_steps,
        "save_checkpoint": True,
        "log_freq": 100,
        "wandb.enable": False,
        "policy.push_to_hub": False,
        "policy.device": "cuda",
    }
    if policy.policy_path:
        args["policy.path"] = policy.policy_path
    else:
        args["policy.type"] = policy.policy_type
    args.update(benchmark.train_extra_args)
    args.update(policy.extra_train_args)
    return args
 def build_eval_args(
    *,
    benchmark: BenchmarkSpec,
    policy: PolicySpec,
    checkpoint_path: str,
    eval_dir: str,
 ) -> dict[str, Any]:
    args: dict[str, Any] = {
        "policy.path": checkpoint_path,
        "env.type": benchmark.eval_env_type,
        "env.task": benchmark.eval_task,
        "eval.n_episodes": benchmark.eval_n_episodes,
        "output_dir": eval_dir,
    }
    args.update(benchmark.eval_extra_args)
    args.update(policy.extra_eval_args)
    return args
 def plan_jobs(
    *,
    output_dir: Path,
    hub_org: str,
    results_repo: str,
    policies: list[str],
    benchmarks: list[str],
 ) -> list[PlannedJob]:
    _ = hub_org
    _ = results_repo
    scripts_dir = output_dir / "slurm"
    jobs: list[PlannedJob] = []
    for benchmark_name in benchmarks:
        benchmark = BENCHMARKS[benchmark_name]
        for policy_name in policies:
            policy = POLICIES[policy_name]
            num_gpus = max(MIN_GPUS, min(policy.num_gpus, MAX_GPUS))
            run_rel = f"runs/{benchmark_name}/{policy_name}/{make_run_slug()}"
            run_root = f"/benchmark-output/{run_rel}"
            gradient_accumulation_steps = compute_gradient_accumulation_steps(
                effective_batch_size=benchmark.effective_batch_size,
                num_gpus=num_gpus,
                microbatch_per_gpu=policy.microbatch_per_gpu,
            )
            train_dir = f"{run_root}/train"
            checkpoint_path = f"{train_dir}/checkpoints/{benchmark.train_steps:06d}/pretrained_model"
            eval_dir = f"{run_root}/eval"
            train_args = build_train_args(
                benchmark=benchmark,
                policy=policy,
                train_dir=train_dir,
                gradient_accumulation_steps=gradient_accumulation_steps,
            )
            eval_args = build_eval_args(
                benchmark=benchmark,
                policy=policy,
                checkpoint_path=checkpoint_path,
                eval_dir=eval_dir,
            )
            tokenizer_args = None
            if policy.needs_tokenizer:
                tokenizer_repo_id = f"{hub_org}/{policy_name}-{benchmark_name}-tokenizer"
                tokenizer_args = {
                    "repo_id": benchmark.dataset_repo_id,
                    "output_dir": f"{run_root}/tokenizer",
                    "hub_repo_id": tokenizer_repo_id,
                    **policy.tokenizer_args,
                }
                train_args["policy.action_tokenizer_name"] = tokenizer_repo_id
            script_path = str(scripts_dir / f"{benchmark_name}__{policy_name}.sbatch")
            jobs.append(
                PlannedJob(
                    benchmark=benchmark_name,
                    policy=policy_name,
                    run_rel=run_rel,
                    num_gpus=num_gpus,
                    microbatch_per_gpu=policy.microbatch_per_gpu,
                    gradient_accumulation_steps=gradient_accumulation_steps,
                    effective_batch_size=benchmark.effective_batch_size,
                    docker_image=benchmark.docker_image,
                    train_args=train_args,
                    eval_args=eval_args,
                    tokenizer_args=tokenizer_args,
                    script_path=script_path,
                )
            )
    return jobs
 def render_sbatch_script(
    *,
    job: PlannedJob,
    output_dir: Path,
    results_repo_id: str,
    git_commit: str,
 ) -> str:
    host_output_dir = output_dir.resolve()
    run_root = f"/benchmark-output/{job.run_rel}"
    host_run_root = host_output_dir / job.run_rel
    cpus_per_task = max(DEFAULT_CPUS_PER_GPU, DEFAULT_CPUS_PER_GPU * job.num_gpus)
    mem_gb = max(DEFAULT_MEMORY_PER_GPU_GB, DEFAULT_MEMORY_PER_GPU_GB * job.num_gpus)
    gpu_ids_expr = "${GPU_IDS}"
    train_cli = format_cli_args(job.train_args)
    eval_cli = format_cli_args(job.eval_args)
    tokenizer_command = ""
    if job.tokenizer_args:
        tokenizer_cli = format_cli_args(job.tokenizer_args)
        tokenizer_command = f"""
 docker run --rm --gpus all \\
  --shm-size=16g \\
  -e CUDA_VISIBLE_DEVICES={gpu_ids_expr} \\
  -e HF_TOKEN="${{HF_TOKEN:-}}" \\
  -e HF_USER_TOKEN="${{HF_TOKEN:-}}" \\
  -e HF_HOME=/tmp/hf \\
  -v "{host_output_dir}:/benchmark-output" \\
  -w /lerobot \\
  "{job.docker_image}" \\
  bash -lc '
    set -euo pipefail
    if [[ -n "${{HF_TOKEN:-}}" ]]; then
      hf auth login --token "${{HF_TOKEN}}" --add-to-git-credential 2>/dev/null || true
    fi
    lerobot-train-tokenizer \\
 {tokenizer_cli}
  '
 """
    return f"""#!/bin/bash
 #SBATCH --job-name=bench-{job.benchmark}-{job.policy}
 #SBATCH --gres=gpu:{job.num_gpus}
 #SBATCH --cpus-per-task={cpus_per_task}
 #SBATCH --mem={mem_gb}G
 #SBATCH --output={output_dir.resolve()}/logs/{job.benchmark}__{job.policy}__%j.out
 #SBATCH --error={output_dir.resolve()}/logs/{job.benchmark}__{job.policy}__%j.err
 set -euo pipefail
 HF_TOKEN="${{HF_TOKEN:-${{HF_USER_TOKEN:-}}}}"
 GPU_IDS="$(seq -s, 0 $(({job.num_gpus} - 1)))"
 RUN_ROOT="{run_root}"
 mkdir -p "{host_output_dir}/logs"
 mkdir -p "{host_run_root.parent}"
 {tokenizer_command}
 TRAIN_START="$(date +%s)"
 docker run --rm --gpus all \\
  --shm-size=16g \\
  -e CUDA_VISIBLE_DEVICES="${{GPU_IDS}}" \\
  -e HF_TOKEN="${{HF_TOKEN:-}}" \\
  -e HF_USER_TOKEN="${{HF_TOKEN:-}}" \\
  -e HF_HOME=/tmp/hf \\
  -v "{host_output_dir}:/benchmark-output" \\
  -w /lerobot \\
  "{job.docker_image}" \\
  bash -lc '
    set -euo pipefail
    if [[ -n "${{HF_TOKEN:-}}" ]]; then
      hf auth login --token "${{HF_TOKEN}}" --add-to-git-credential 2>/dev/null || true
    fi
    accelerate launch --num_processes={job.num_gpus} $(which lerobot-train) \\
 {train_cli}
  '
 TRAIN_END="$(date +%s)"
 EVAL_START="$(date +%s)"
 docker run --rm --gpus all \\
  --shm-size=16g \\
  -e CUDA_VISIBLE_DEVICES="${{GPU_IDS}}" \\
  -e HF_TOKEN="${{HF_TOKEN:-}}" \\
  -e HF_USER_TOKEN="${{HF_TOKEN:-}}" \\
  -e HF_HOME=/tmp/hf \\
  -v "{host_output_dir}:/benchmark-output" \\
  -w /lerobot \\
  "{job.docker_image}" \\
  bash -lc '
    set -euo pipefail
    if [[ -n "${{HF_TOKEN:-}}" ]]; then
      hf auth login --token "${{HF_TOKEN}}" --add-to-git-credential 2>/dev/null || true
    fi
    lerobot-eval \\
 {eval_cli}
  '
 EVAL_END="$(date +%s)"
 TRAIN_WALL_TIME_S="$((TRAIN_END - TRAIN_START))"
 EVAL_WALL_TIME_S="$((EVAL_END - EVAL_START))"
 docker run --rm --gpus all \\
  --shm-size=16g \\
  -e CUDA_VISIBLE_DEVICES="${{GPU_IDS}}" \\
  -e HF_TOKEN="${{HF_TOKEN:-}}" \\
  -e HF_USER_TOKEN="${{HF_TOKEN:-}}" \\
  -e HF_HOME=/tmp/hf \\
  -e RUN_ROOT="${{RUN_ROOT}}" \\
  -e TRAIN_WALL_TIME_S="${{TRAIN_WALL_TIME_S}}" \\
  -e EVAL_WALL_TIME_S="${{EVAL_WALL_TIME_S}}" \\
  -v "{host_output_dir}:/benchmark-output" \\
  -w /lerobot \\
  "{job.docker_image}" \\
  bash -lc '
    set -euo pipefail
    if [[ -n "${{HF_TOKEN:-}}" ]]; then
      hf auth login --token "${{HF_TOKEN}}" --add-to-git-credential 2>/dev/null || true
    fi
    uv run python benchmarks/publish_benchmark_result.py \\
      --benchmark={job.benchmark} \\
      --policy={job.policy} \\
      --run_root="${{RUN_ROOT}}" \\
      --results_repo={results_repo_id} \\
      --git_commit={git_commit} \\
      --num_gpus={job.num_gpus} \\
      --microbatch_per_gpu={job.microbatch_per_gpu} \\
      --gradient_accumulation_steps={job.gradient_accumulation_steps} \\
      --effective_batch_size={job.effective_batch_size} \\
      --train_wall_time_s="${{TRAIN_WALL_TIME_S}}" \\
      --eval_wall_time_s="${{EVAL_WALL_TIME_S}}" \\
      --slurm_job_id="${{SLURM_JOB_ID:-}}" \\
      --docker_image={job.docker_image}
  '
 """
 def write_manifest(
    *,
    output_dir: Path,
    jobs: list[PlannedJob],
    git_commit: str,
    hub_org: str,
    results_repo: str,
 ) -> Path:
    manifest = {
        "generated_at": datetime.now(UTC).isoformat(),
        "git_commit": git_commit,
        "hub_org": hub_org,
        "results_repo": results_repo,
        "jobs": [asdict(job) for job in jobs],
    }
    manifest_path = output_dir / "manifest.json"
    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
    return manifest_path
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--policies", nargs="*", default=None)
    parser.add_argument("--benchmarks", nargs="*", default=None)
    parser.add_argument("--output_dir", required=True, type=Path)
    parser.add_argument("--hub_org", required=True)
    parser.add_argument("--results_repo", required=True)
    parser.add_argument("--submit", action="store_true")
    return parser.parse_args()
 def get_git_commit() -> str:
    return subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()
 def main() -> int:
    args = parse_args()
    args.output_dir.mkdir(parents=True, exist_ok=True)
    (args.output_dir / "slurm").mkdir(parents=True, exist_ok=True)
    (args.output_dir / "logs").mkdir(parents=True, exist_ok=True)
    selected_policies = get_requested_names(args.policies, POLICIES, kind="policies")
    selected_benchmarks = get_requested_names(args.benchmarks, BENCHMARKS, kind="benchmarks")
    git_commit = get_git_commit()
    results_repo_id = normalize_repo_id(args.hub_org, args.results_repo)
    jobs = plan_jobs(
        output_dir=args.output_dir,
        hub_org=args.hub_org,
        results_repo=results_repo_id,
        policies=selected_policies,
        benchmarks=selected_benchmarks,
    )
    for job in jobs:
        script = render_sbatch_script(
            job=job,
            output_dir=args.output_dir,
            results_repo_id=results_repo_id,
            git_commit=git_commit,
        )
        script_path = Path(job.script_path)
        script_path.write_text(script)
        script_path.chmod(0o755)
        if args.submit:
            subprocess.run(["sbatch", str(script_path)], check=True)
    manifest_path = write_manifest(
        output_dir=args.output_dir,
        jobs=jobs,
        git_commit=git_commit,
        hub_org=args.hub_org,
        results_repo=results_repo_id,
    )
    print(f"Wrote {len(jobs)} benchmark jobs to {args.output_dir}")
    print(f"Manifest: {manifest_path}")
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/docker/Dockerfile.benchmark.libero
+++ b/docker/Dockerfile.benchmark.libero
@@ -0,0 +1,42 @@
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Benchmark image for LIBERO integration tests.
 # Extends the nightly GPU image (which already has all extras installed)
 # with the PR's source code and LIBERO-specific asset setup.
 #
 # Build:  docker build -f docker/Dockerfile.benchmark.libero -t lerobot-benchmark-libero .
 # Run:    docker run --gpus all --rm lerobot-benchmark-libero lerobot-eval ...
 FROM huggingface/lerobot-gpu:latest
 # Pre-download lerobot/libero-assets from HF Hub so nothing is fetched at
 # runtime (which times out on CI). Point the libero config at the cached path.
 # libero/libero/__init__.py calls input() when ~/.libero/config.yaml is missing,
 # so we write the config before any libero import can happen.
 RUN LIBERO_DIR=$(python -c \
      "import importlib.util, os; s=importlib.util.find_spec('libero'); \
       print(os.path.join(os.path.dirname(s.origin), 'libero'))") && \
    mkdir -p /home/user_lerobot/.libero && \
    python -c "\
 from huggingface_hub import snapshot_download; \
 snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \
                  local_dir='/home/user_lerobot/.libero/assets')" && \
    printf "assets: /home/user_lerobot/.libero/assets\nbddl_files: ${LIBERO_DIR}/bddl_files\ndatasets: ${LIBERO_DIR}/../datasets\ninit_states: ${LIBERO_DIR}/init_files\n" \
    > /home/user_lerobot/.libero/config.yaml
 # Overlay the PR's source code on top of the nightly image.
 COPY --chown=user_lerobot:user_lerobot . .
 CMD ["/bin/bash"]
--- a/docker/Dockerfile.benchmark.libero_plus
+++ b/docker/Dockerfile.benchmark.libero_plus
@@ -0,0 +1,48 @@
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 FROM huggingface/lerobot-gpu:latest
 USER root
 RUN apt-get update \
    && apt-get install -y --no-install-recommends \
         unzip libexpat1 libfontconfig1-dev libmagickwand-dev \
    && apt-get clean && rm -rf /var/lib/apt/lists/*
 USER user_lerobot
 RUN uv pip install --no-cache \
        "robosuite==1.4.1" bddl easydict mujoco matplotlib wand scikit-image gym
 ENV LIBERO_PLUS_ROOT=/home/user_lerobot/libero-plus/libero/libero
 RUN git clone --depth=1 https://github.com/sylvestf/LIBERO-plus.git /home/user_lerobot/libero-plus \
    && cd /home/user_lerobot/libero-plus && uv pip install --no-cache --no-deps -e "." \
    && uv pip uninstall hf-libero 2>/dev/null || true
 ENV PYTHONPATH="/home/user_lerobot/libero-plus:${PYTHONPATH}"
 RUN python -c "\
 from huggingface_hub import hf_hub_download; \
 hf_hub_download(repo_id='Sylvest/LIBERO-plus', repo_type='dataset', \
                filename='assets.zip', local_dir='/tmp/libero-plus-dl')" \
    && unzip -q /tmp/libero-plus-dl/assets.zip -d /tmp/libero-plus-dl/extract \
    && mv /tmp/libero-plus-dl/extract/inspire/hdd/project/embodied-multimodality/public/syfei/libero_new/release/dataset/LIBERO-plus-0/assets \
          ${LIBERO_PLUS_ROOT}/assets \
    && rm -rf /tmp/libero-plus-dl
 RUN mkdir -p /home/user_lerobot/.libero \
    && printf "assets: ${LIBERO_PLUS_ROOT}/assets\nbddl_files: ${LIBERO_PLUS_ROOT}/bddl_files\ndatasets: ${LIBERO_PLUS_ROOT}/../datasets\ninit_states: ${LIBERO_PLUS_ROOT}/init_files\n" \
       > /home/user_lerobot/.libero/config.yaml
 COPY --chown=user_lerobot:user_lerobot . .
 CMD ["/bin/bash"]
--- a/docker/Dockerfile.benchmark.metaworld
+++ b/docker/Dockerfile.benchmark.metaworld
@@ -0,0 +1,27 @@
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Benchmark image for MetaWorld integration tests.
 # Extends the nightly GPU image (which already has all extras installed)
 # with the PR's source code.
 #
 # Build:  docker build -f docker/Dockerfile.benchmark.metaworld -t lerobot-benchmark-metaworld .
 # Run:    docker run --gpus all --rm lerobot-benchmark-metaworld lerobot-eval ...
 FROM huggingface/lerobot-gpu:latest
 # Overlay the PR's source code on top of the nightly image.
 COPY --chown=user_lerobot:user_lerobot . .
 CMD ["/bin/bash"]
--- a/docker/Dockerfile.benchmark.robomme
+++ b/docker/Dockerfile.benchmark.robomme
@@ -0,0 +1,39 @@
 # Copyright 2026 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 FROM huggingface/lerobot-gpu:latest
 ENV NVIDIA_DRIVER_CAPABILITIES=all \
    VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json
 USER root
 RUN apt-get update \
    && apt-get install -y --no-install-recommends \
         libvulkan1 libvulkan-dev mesa-vulkan-drivers \
    && mkdir -p /usr/share/vulkan/icd.d \
    && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \
       > /usr/share/vulkan/icd.d/nvidia_icd.json \
    && apt-get clean && rm -rf /var/lib/apt/lists/*
 USER user_lerobot
 COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
 RUN printf 'gymnasium==0.29.1\nnumpy==1.26.4\n' > /tmp/robomme_override.txt \
    && uv pip install --no-cache --override /tmp/robomme_override.txt \
         -e ".[smolvla,av-dep]" \
         "robomme @ git+https://github.com/RoboMME/robomme_benchmark.git@main" \
    && python -c "import robomme; print('robomme import OK')"
 COPY --chown=user_lerobot:user_lerobot . .
 CMD ["/bin/bash"]
--- a/docs/source/adding_benchmarks.mdx
+++ b/docs/source/adding_benchmarks.mdx
@@ -216,7 +216,7 @@ class MyBenchmarkEnvConfig(EnvConfig):
    def get_env_processors(self):
        """Override if your benchmark needs observation/action transforms."""
-        from lerobot.processor.pipeline import PolicyProcessorPipeline
+        from lerobot.processor import PolicyProcessorPipeline
        from lerobot.processor.env_processor import MyBenchmarkProcessorStep
        return (
            PolicyProcessorPipeline(steps=[MyBenchmarkProcessorStep()]),
--- a/docs/source/async.mdx
+++ b/docs/source/async.mdx
@@ -170,7 +170,7 @@ python -m lerobot.async_inference.robot_client \
 ```python
 import threading
 from lerobot.robots.so_follower import SO100FollowerConfig
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
 from lerobot.async_inference.configs import RobotClientConfig
 from lerobot.async_inference.robot_client import RobotClient
 from lerobot.async_inference.helpers import visualize_action_queue_size
--- a/docs/source/backwardcomp.mdx
+++ b/docs/source/backwardcomp.mdx
@@ -41,7 +41,7 @@ The script:
 ```python
 # New usage pattern (after migration)
-from lerobot.policies.factory import make_policy, make_pre_post_processors
+from lerobot.policies import make_policy, make_pre_post_processors
 # Load model and processors separately
 policy = make_policy(config, ds_meta=dataset.meta)
--- a/docs/source/bring_your_own_policies.mdx
+++ b/docs/source/bring_your_own_policies.mdx
@@ -47,9 +47,9 @@ Here is a template to get you started, customize the parameters and methods as n
 ```python
 # configuration_my_custom_policy.py
 from dataclasses import dataclass, field
-from lerobot.configs.policies import PreTrainedConfig
+from lerobot.configs import PreTrainedConfig
-from lerobot.optim.optimizers import AdamWConfig
+from lerobot.optim import AdamWConfig
-from lerobot.optim.schedulers import CosineDecayWithWarmupSchedulerConfig
+from lerobot.optim import CosineDecayWithWarmupSchedulerConfig
@PreTrainedConfig.register_subclass("my_custom_policy")
@dataclass
@@ -120,7 +120,7 @@ import torch
 import torch.nn as nn
 from typing import Any
-from lerobot.policies.pretrained import PreTrainedPolicy
+from lerobot.policies import PreTrainedPolicy
 from lerobot.utils.constants import ACTION
 from .configuration_my_custom_policy import MyCustomPolicyConfig
--- a/docs/source/cameras.mdx
+++ b/docs/source/cameras.mdx
@@ -79,9 +79,8 @@ The following examples show how to use the camera API to configure and capture f
 <!-- prettier-ignore-start -->
 ```python
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCamera, OpenCVCameraConfig
-from lerobot.cameras.opencv.camera_opencv import OpenCVCamera
+from lerobot.cameras import ColorMode, Cv2Rotation
 from lerobot.cameras.configs import ColorMode, Cv2Rotation
 # Construct an `OpenCVCameraConfig` with your desired FPS, resolution, color mode, and rotation.
 config = OpenCVCameraConfig(
@@ -126,9 +125,8 @@ with OpenCVCamera(config) as camera:
 <!-- prettier-ignore-start -->
 ```python
-from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig
+from lerobot.cameras.realsense import RealSenseCamera, RealSenseCameraConfig
-from lerobot.cameras.realsense.camera_realsense import RealSenseCamera
+from lerobot.cameras import ColorMode, Cv2Rotation
 from lerobot.cameras.configs import ColorMode, Cv2Rotation
 # Create a `RealSenseCameraConfig` specifying your camera’s serial number and enabling depth.
 config = RealSenseCameraConfig(
--- a/docs/source/dataset_subtask.mdx
+++ b/docs/source/dataset_subtask.mdx
@@ -95,7 +95,7 @@ After completing your annotation:
 When you load a dataset with subtask annotations, the subtask information is automatically available:
 ```python
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
 # Load a dataset with subtask annotations
 dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
@@ -133,11 +133,10 @@ if has_subtasks:
 The `TokenizerProcessor` automatically handles subtask tokenization for Vision-Language Action (VLA) models:
 ```python
-from lerobot.processor.tokenizer_processor import TokenizerProcessor
+from lerobot.processor import TokenizerProcessorStep
 from lerobot.processor.pipeline import ProcessorPipeline
-# Create a tokenizer processor
+# Create a tokenizer processor step
-tokenizer_processor = TokenizerProcessor(
+tokenizer_processor = TokenizerProcessorStep(
    tokenizer_name_or_path="google/paligemma-3b-pt-224",
    padding="max_length",
    max_length=64,
@@ -158,7 +157,7 @@ When subtasks are available in the batch, the tokenizer processor adds:
 ```python
 import torch
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
 dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
@@ -182,7 +181,7 @@ for batch in dataloader:
 Try loading a dataset with subtask annotations:
 ```python
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
 # Example dataset with subtask annotations
 dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
--- a/docs/source/earthrover_mini_plus.mdx
+++ b/docs/source/earthrover_mini_plus.mdx
@@ -66,10 +66,10 @@ The SDK gives you:
 Follow our [Installation Guide](./installation) to install LeRobot.
-In addition to the base installation, install the EarthRover Mini dependencies:
+In addition to the base installation, install the EarthRover Mini with hardware dependencies:
 ```bash
-pip install -e .
+pip install -e ".[hardware]"
 ```
 ## How It Works
--- a/docs/source/env_processor.mdx
+++ b/docs/source/env_processor.mdx
@@ -173,8 +173,8 @@ observation = {
 The `make_env_pre_post_processors` function follows the same pattern as `make_pre_post_processors` for policies:
 ```python
-from lerobot.envs.factory import make_env_pre_post_processors
+from lerobot.envs import make_env_pre_post_processors, PushtEnv
-from lerobot.envs.configs import LiberoEnv, PushtEnv
+from lerobot.envs.configs import LiberoEnv
 # For LIBERO: Returns LiberoProcessorStep in preprocessor
 libero_cfg = LiberoEnv(task="libero_spatial", camera_name=["agentview"])
@@ -257,7 +257,7 @@ def eval_main(cfg: EvalPipelineConfig):
 The `LiberoProcessorStep` demonstrates a real-world environment processor:
 ```python
-from lerobot.processor.pipeline import ObservationProcessorStep
+from lerobot.processor import ObservationProcessorStep
@dataclass
@ProcessorStepRegistry.register(name="libero_processor")
--- a/docs/source/envhub.mdx
+++ b/docs/source/envhub.mdx
@@ -34,7 +34,7 @@ Finally, your environment must implement the standard `gym.vector.VectorEnv` int
 Loading an environment from the Hub is as simple as:
 ```python
-from lerobot.envs.factory import make_env
+from lerobot.envs import make_env
 # Load a hub environment (requires explicit consent to run remote code)
 env = make_env("lerobot/cartpole-env", trust_remote_code=True)
@@ -191,7 +191,7 @@ api.upload_folder(
 ### Basic Usage
 ```python
-from lerobot.envs.factory import make_env
+from lerobot.envs import make_env
 # Load from the hub
 envs_dict = make_env(
@@ -314,7 +314,7 @@ env = make_env("trusted-org/verified-env@a1b2c3d4", trust_remote_code=True)
 Here's a complete example using the reference CartPole environment:
 ```python
-from lerobot.envs.factory import make_env
+from lerobot.envs import make_env
 import numpy as np
 # Load the environment
--- a/docs/source/envhub_isaaclab_arena.mdx
+++ b/docs/source/envhub_isaaclab_arena.mdx
@@ -58,10 +58,10 @@ pip install -e .
 cd ..
-# 5. Install LeRobot
+# 5. Install LeRobot (evaluation extra for env/policy evaluation)
 git clone https://github.com/huggingface/lerobot.git
 cd lerobot
-pip install -e .
+pip install -e ".[evaluation]"
 cd ..
@@ -262,7 +262,7 @@ def main(cfg: EvalPipelineConfig):
    """Run random action rollout for IsaacLab Arena environment."""
    logging.info(pformat(asdict(cfg)))
-    from lerobot.envs.factory import make_env
+    from lerobot.envs import make_env
    env_dict = make_env(
        cfg.env,
--- a/docs/source/envhub_leisaac.mdx
+++ b/docs/source/envhub_leisaac.mdx
@@ -74,7 +74,7 @@ EnvHub exposes every LeIsaac-supported task in a uniform interface. The examples
 # envhub_random_action.py
 import torch
-from lerobot.envs.factory import make_env
+from lerobot.envs import make_env
 # Load from the hub
 envs_dict = make_env("LightwheelAI/leisaac_env:envs/so101_pick_orange.py", n_envs=1, trust_remote_code=True)
@@ -142,7 +142,7 @@ from lerobot.teleoperators import (  # noqa: F401
 )
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import init_logging
-from lerobot.envs.factory import make_env
+from lerobot.envs import make_env
@dataclass
@@ -282,7 +282,7 @@ Note: when working with `bi_so101_fold_cloth`, call `initialize()` immediately a
 ```python
 import torch
-from lerobot.envs.factory import make_env
+from lerobot.envs import make_env
 # Load from the hub
 envs_dict = make_env("LightwheelAI/leisaac_env:envs/bi_so101_fold_cloth.py", n_envs=1, trust_remote_code=True)
--- a/docs/source/il_robots.mdx
+++ b/docs/source/il_robots.mdx
@@ -58,8 +58,8 @@ lerobot-teleoperate \
 <!-- prettier-ignore-start -->
 ```python
-from lerobot.teleoperators.so_leader import SO101LeaderConfig, SO101Leader
+from lerobot.teleoperators.so_leader import SO101Leader, SO101LeaderConfig
-from lerobot.robots.so_follower import SO101FollowerConfig, SO101Follower
+from lerobot.robots.so_follower import SO101Follower, SO101FollowerConfig
 robot_config = SO101FollowerConfig(
    port="/dev/tty.usbmodem58760431541",
@@ -116,9 +116,9 @@ lerobot-teleoperate \
 <!-- prettier-ignore-start -->
 ```python
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.teleoperators.koch_leader import KochLeaderConfig, KochLeader
+from lerobot.teleoperators.koch_leader import KochLeader, KochLeaderConfig
-from lerobot.robots.koch_follower import KochFollowerConfig, KochFollower
+from lerobot.robots.koch_follower import KochFollower, KochFollowerConfig
 camera_config = {
    "front": OpenCVCameraConfig(index_or_path=0, width=1920, height=1080, fps=30)
@@ -195,13 +195,12 @@ lerobot-record \
 <!-- prettier-ignore-start -->
 ```python
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
-from lerobot.datasets.utils import hw_to_dataset_features
+from lerobot.utils.feature_utils import hw_to_dataset_features
 from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
-from lerobot.teleoperators.so_leader.config_so100_leader import SO100LeaderConfig
+from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
-from lerobot.teleoperators.so_leader.so100_leader import SO100Leader
+from lerobot.common.control_utils import init_keyboard_listener
 from lerobot.utils.control_utils import init_keyboard_listener
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
 from lerobot.scripts.lerobot_record import record_loop
@@ -410,9 +409,8 @@ lerobot-replay \
 ```python
 import time
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
-from lerobot.robots.so_follower.config_so100_follower import SO100FollowerConfig
+from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
 from lerobot.robots.so_follower.so100_follower import SO100Follower
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import log_say
@@ -532,15 +530,14 @@ lerobot-record  \
 <!-- prettier-ignore-start -->
 ```python
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
-from lerobot.datasets.utils import hw_to_dataset_features
+from lerobot.utils.feature_utils import hw_to_dataset_features
-from lerobot.policies.act.modeling_act import ACTPolicy
+from lerobot.policies.act import ACTPolicy
-from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies import make_pre_post_processors
-from lerobot.robots.so_follower.config_so100_follower import SO100FollowerConfig
+from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
 from lerobot.robots.so_follower.so100_follower import SO100Follower
 from lerobot.scripts.lerobot_record import record_loop
-from lerobot.utils.control_utils import init_keyboard_listener
+from lerobot.common.control_utils import init_keyboard_listener
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
--- a/docs/source/installation.mdx
+++ b/docs/source/installation.mdx
@@ -116,6 +116,8 @@ brew install ffmpeg
 ## Step 3: Install LeRobot 🤗
 The base `lerobot` install is intentionally **lightweight** — it includes only core ML dependencies (PyTorch, torchvision, numpy, opencv, einops, draccus, huggingface-hub, gymnasium, safetensors). Heavier dependencies are gated behind optional extras so you only install what you need.
 ### From Source
 First, clone the repository and navigate into the directory:
@@ -131,12 +133,16 @@ Then, install the library in editable mode. This is useful if you plan to contri
 <hfoptions id="install_lerobot_src">
 <hfoption id="conda">
 ```bash
-pip install -e .
+pip install -e ".[core_scripts]"  # For robot workflows (recording, replaying, calibrate)
 pip install -e ".[training]"      # For training policies
 pip install -e ".[all]"           # Everything (all policies, envs, hardware, dev tools)
 ```
 </hfoption>
 <hfoption id="uv">
 ```bash
-uv pip install -e .
+uv pip install -e ".[core_scripts]"  # For robot workflows (recording, replaying, calibrate)
 uv pip install -e ".[training]"      # For training policies
 uv pip install -e ".[all]"           # Everything (all policies, envs, hardware, dev tools)
 ```
 </hfoption>
 </hfoptions>
@@ -162,26 +168,48 @@ uv pip install lerobot
 </hfoptions>
 <!-- prettier-ignore-end -->
-_This installs only the default dependencies._
+_This installs only the core ML dependencies. You will need to add extras for most workflows._
-**Extra Features:**
+**Feature Extras:**
-To install additional functionality, use one of the following (If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.):
+LeRobot provides **feature-scoped extras** that map to common workflows. If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.
 | Extra      | What it adds                                | Typical use case                    |
 | ---------- | ------------------------------------------- | ----------------------------------- |
 | `dataset`  | `datasets`, `av`, `torchcodec`, `jsonlines` | Loading & creating datasets         |
 | `training` | `dataset` + `accelerate`, `wandb`           | Training policies                   |
 | `hardware` | `pynput`, `pyserial`, `deepdiff`            | Connecting to real robots           |
 | `viz`      | `rerun-sdk`                                 | Visualization during recording/eval |
 **Composite Extras** combine feature extras for common CLI scripts:
 | Extra          | Includes                       | Typical use case                                        |
 | -------------- | ------------------------------ | ------------------------------------------------------- |
 | `core_scripts` | `dataset` + `hardware` + `viz` | `lerobot-record`, `lerobot-replay`, `lerobot-calibrate` |
 | `evaluation`   | `av`                           | `lerobot-eval` (add policy + env extras as needed)      |
 | `dataset_viz`  | `dataset` + `viz`              | `lerobot-dataset-viz`, `lerobot-imgtransform-viz`       |
 ```bash
-pip install 'lerobot[all]'          # All available features
+pip install 'lerobot[core_scripts]'          # Record, replay, calibrate
-pip install 'lerobot[aloha,pusht]'  # Specific features (Aloha & Pusht)
+pip install 'lerobot[training]'              # Train policies
-pip install 'lerobot[feetech]'      # Feetech motor support
+pip install 'lerobot[core_scripts,training]' # Record + train
 pip install 'lerobot[all]'                   # Everything
 ```
-_Replace `[...]` with your desired features._
+**Policy, environment, and hardware extras** are still available for specific dependencies:
-**Available Tags:**
+```bash
-For a full list of optional dependencies, see:
+pip install 'lerobot[pi]'             # Pi0/Pi0.5/Pi0-FAST policy deps
-https://pypi.org/project/lerobot/
+pip install 'lerobot[smolvla]'        # SmolVLA policy deps
 pip install 'lerobot[diffusion]'      # Diffusion policy deps (diffusers)
 pip install 'lerobot[aloha,pusht]'    # Simulation environments
 pip install 'lerobot[feetech]'        # Feetech motor support
 ```
 _Multiple extras can be combined (e.g., `.[core_scripts,pi,pusht]`). For a full list of available extras, refer to `pyproject.toml`._
 ### Troubleshooting
-If you encounter build errors, you may need to install additional dependencies: `cmake`, `build-essential`, and `ffmpeg libs`.
+If you encounter build errors, you may need to install additional system dependencies: `cmake`, `build-essential`, and `ffmpeg libs`.
 To install these for Linux run:
 ```bash
@@ -196,8 +224,8 @@ LeRobot provides optional extras for specific functionalities. Multiple extras c
 ### Simulations
-Install environment packages: `aloha` ([gym-aloha](https://github.com/huggingface/gym-aloha)), or `pusht` ([gym-pusht](https://github.com/huggingface/gym-pusht))
+Install environment packages: `aloha` ([gym-aloha](https://github.com/huggingface/gym-aloha)), or `pusht` ([gym-pusht](https://github.com/huggingface/gym-pusht)).
-Example:
+These automatically include the `dataset` extra.
 ```bash
 pip install -e ".[aloha]" # or "[pusht]" for example
@@ -213,7 +241,7 @@ pip install -e ".[feetech]" # or "[dynamixel]" for example
 ### Experiment Tracking
-To use [Weights and Biases](https://docs.wandb.ai/quickstart) for experiment tracking, log in with
+Weights and Biases is included in the `training` extra. To use [Weights and Biases](https://docs.wandb.ai/quickstart) for experiment tracking, log in with:
 ```bash
 wandb login
--- a/docs/source/introduction_processors.mdx
+++ b/docs/source/introduction_processors.mdx
@@ -19,10 +19,10 @@ This means that your favorite policy can be used like this:
 ```python
 import torch
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
-from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies import make_pre_post_processors
 from lerobot.policies.your_policy import YourPolicy
-from lerobot.processor.pipeline import RobotProcessorPipeline, PolicyProcessorPipeline
+from lerobot.processor import RobotProcessorPipeline, PolicyProcessorPipeline
 dataset = LeRobotDataset("hf_user/dataset", episodes=[0])
 sample = dataset[10]
@@ -260,7 +260,7 @@ Since processor pipelines can add new features (like velocity fields), change te
 These functions work together by starting with robot hardware specifications (`create_initial_features()`) then simulating the entire pipeline transformation (`aggregate_pipeline_dataset_features()`) to compute the final feature dictionary that gets passed to `LeRobotDataset.create()`, ensuring perfect alignment between what processors output and what datasets expect to store.
 ```python
-from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features
+from lerobot.datasets import aggregate_pipeline_dataset_features
 # Start with robot's raw features
 initial_features = create_initial_features(
--- a/docs/source/lerobot-dataset-v3.mdx
+++ b/docs/source/lerobot-dataset-v3.mdx
@@ -89,7 +89,7 @@ A core v3 principle is **decoupling storage from the user API**: data is stored
 ```python
 import torch
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
 repo_id = "yaak-ai/L2D-v3"
@@ -135,7 +135,7 @@ for batch in data_loader:
 Use `StreamingLeRobotDataset` to iterate directly from the Hub without local copies. This allows to stream large datasets without the need to downloading them onto disk or loading them onto memory, and is a key feature of the new dataset format.
 ```python
-from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset
+from lerobot.datasets import StreamingLeRobotDataset
 repo_id = "yaak-ai/L2D-v3"
 dataset = StreamingLeRobotDataset(repo_id)  # streams directly from the Hub
@@ -167,8 +167,8 @@ Currently, transforms are applied during **training time only**, not during reco
 Use the `image_transforms` parameter when loading a dataset for training:
 ```python
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
-from lerobot.datasets.transforms import ImageTransforms, ImageTransformsConfig, ImageTransformConfig
+from lerobot.transforms import ImageTransforms, ImageTransformsConfig, ImageTransformConfig
 # Option 1: Use default transform configuration (disabled by default)
 transforms_config = ImageTransformsConfig(
@@ -290,7 +290,7 @@ python -m lerobot.datasets.v30.convert_dataset_v21_to_v30 --repo-id=<HF_USER/DAT
 When creating or recording datasets, you **must** call `dataset.finalize()` to properly close parquet writers. See the [PR #1903](https://github.com/huggingface/lerobot/pull/1903) for more details.
 ```python
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
 # Create dataset and record episodes
 dataset = LeRobotDataset.create(...)
--- a/docs/source/multi_gpu_training.mdx
+++ b/docs/source/multi_gpu_training.mdx
@@ -4,10 +4,10 @@ This guide shows you how to train policies on multiple GPUs using [Hugging Face
 ## Installation
-First, ensure you have accelerate installed:
+`accelerate` is included in the `training` extra. Install it with:
 ```bash
-pip install accelerate
+pip install 'lerobot[training]'
 ```
 ## Training with Multiple GPUs
--- a/docs/source/phone_teleop.mdx
+++ b/docs/source/phone_teleop.mdx
@@ -45,7 +45,8 @@ Modify the examples to use `PhoneOS.IOS` or `PhoneOS.ANDROID` in `PhoneConfig`.
 Teleoperation example:
 ```python
-from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
+from lerobot.teleoperators.phone import Phone, PhoneConfig
 from lerobot.teleoperators.phone.config_phone import PhoneOS
 teleop_config = PhoneConfig(phone_os=PhoneOS.IOS)  # or PhoneOS.ANDROID
 teleop_device = Phone(teleop_config)
--- a/docs/source/pi0.mdx
+++ b/docs/source/pi0.mdx
@@ -110,8 +110,7 @@ lerobot-edit-dataset \
 Or equivalently in Python:
 ```python
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset, recompute_stats
 from lerobot.datasets.dataset_tools import recompute_stats
 dataset = LeRobotDataset("your_dataset")
 recompute_stats(dataset, relative_action=True, chunk_size=50, relative_exclude_joints=["gripper"])
--- a/docs/source/pi05.mdx
+++ b/docs/source/pi05.mdx
@@ -116,8 +116,7 @@ lerobot-edit-dataset \
 Or equivalently in Python:
 ```python
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset, recompute_stats
 from lerobot.datasets.dataset_tools import recompute_stats
 dataset = LeRobotDataset("your_dataset")
 recompute_stats(dataset, relative_action=True, chunk_size=50, relative_exclude_joints=["gripper"])
--- a/docs/source/policy_pi0_README.md
+++ b/docs/source/policy_pi0_README.md
@@ -60,11 +60,10 @@ When `use_relative_actions=true`, the training script automatically:
 ### Recomputing stats for an existing dataset
 If you want to precompute relative action stats offline, use `recompute_stats` from
-`lerobot.datasets.dataset_tools`:
+`lerobot.datasets`:
 ```python
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset, recompute_stats
 from lerobot.datasets.dataset_tools import recompute_stats
 dataset = LeRobotDataset("your_org/your_dataset")
 dataset = recompute_stats(
--- a/docs/source/rtc.mdx
+++ b/docs/source/rtc.mdx
@@ -39,9 +39,8 @@ The snippet below provides a simplified pseudo-example of how RTC operates with
 ```python
 from lerobot.policies.pi0 import PI0Policy, PI0Config
-from lerobot.configs.types import RTCAttentionSchedule
+from lerobot.configs import RTCAttentionSchedule
-from lerobot.policies.rtc.configuration_rtc import RTCConfig
+from lerobot.policies.rtc import RTCConfig, ActionQueue
 from lerobot.policies.rtc.action_queue import ActionQueue
 # Load Pi0 with RTC enabled
 policy_cfg = PI0Config()
--- a/docs/source/xvla.mdx
+++ b/docs/source/xvla.mdx
@@ -418,7 +418,7 @@ Create a custom preprocessing pipeline for your environment:
 ```python
 from lerobot.processor import PolicyProcessorPipeline
-from lerobot.policies.xvla.processor_xvla import (
+from lerobot.policies.xvla import (
    XVLAImageToFloatProcessorStep,
    XVLAImageNetNormalizeProcessorStep,
    XVLAAddDomainIdProcessorStep,
--- a/examples/backward_compatibility/replay.py
+++ b/examples/backward_compatibility/replay.py
@@ -35,7 +35,7 @@ from pprint import pformat
 import draccus
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
 from lerobot.robots import (  # noqa: F401
    Robot,
    RobotConfig,
--- a/examples/dataset/load_lerobot_dataset.py
+++ b/examples/dataset/load_lerobot_dataset.py
@@ -31,17 +31,11 @@ from pprint import pprint
 import torch
 from huggingface_hub import HfApi
-import lerobot
+from lerobot.datasets import LeRobotDataset, LeRobotDatasetMetadata
 from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 def main():
-    # We ported a number of existing datasets ourselves, use this to see the list:
+    # Browse datasets created/ported by the community on the hub using the hub api:
    print("List of available datasets:")
    pprint(lerobot.available_datasets)
    # You can also browse through the datasets created/ported by the community on the hub using the hub api:
    hub_api = HfApi()
    repo_ids = [info.id for info in hub_api.list_datasets(task_categories="robotics", tags=["LeRobot"])]
    pprint(repo_ids)
--- a/examples/dataset/slurm_compute_rabc.py
+++ b/examples/dataset/slurm_compute_rabc.py
@@ -231,7 +231,7 @@ class AggregateProgress(PipelineStep):
        import pyarrow as pa
        import pyarrow.parquet as pq
-        from lerobot.datasets.lerobot_dataset import LeRobotDataset
+        from lerobot.datasets import LeRobotDataset
        from lerobot.utils.utils import init_logging
        init_logging()
--- a/examples/dataset/use_dataset_image_transforms.py
+++ b/examples/dataset/use_dataset_image_transforms.py
@@ -26,8 +26,8 @@ import torch
 from torchvision.transforms import v2
 from torchvision.transforms.functional import to_pil_image
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
-from lerobot.datasets.transforms import ImageTransformConfig, ImageTransforms, ImageTransformsConfig
+from lerobot.transforms import ImageTransformConfig, ImageTransforms, ImageTransformsConfig
 def save_image(tensor, filename):
--- a/examples/dataset/use_dataset_tools.py
+++ b/examples/dataset/use_dataset_tools.py
@@ -29,7 +29,8 @@ Usage:
 import numpy as np
-from lerobot.datasets.dataset_tools import (
+from lerobot.datasets import (
    LeRobotDataset,
    add_features,
    delete_episodes,
    merge_datasets,
@@ -37,7 +38,6 @@ from lerobot.datasets.dataset_tools import (
    remove_feature,
    split_dataset,
 )
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 def main():
--- a/examples/hil/hil_data_collection.py
+++ b/examples/hil/hil_data_collection.py
@@ -112,17 +112,18 @@ from hil_utils import (
    teleop_smooth_move_to,
 )
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
+from lerobot.cameras.opencv import OpenCVCameraConfig  # noqa: F401
-from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
+from lerobot.cameras.realsense import RealSenseCameraConfig  # noqa: F401
-from lerobot.configs import parser
+from lerobot.common.control_utils import is_headless, predict_action
-from lerobot.configs.policies import PreTrainedConfig
+from lerobot.configs import PreTrainedConfig, parser
-from lerobot.datasets.feature_utils import build_dataset_frame, combine_feature_dicts, hw_to_dataset_features
+from lerobot.datasets import (
-from lerobot.datasets.image_writer import safe_stop_image_writer
+    LeRobotDataset,
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+    VideoEncodingManager,
-from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
+    aggregate_pipeline_dataset_features,
-from lerobot.datasets.video_utils import VideoEncodingManager
+    create_initial_features,
-from lerobot.policies.factory import get_policy_class, make_policy, make_pre_post_processors
+    safe_stop_image_writer,
-from lerobot.policies.pretrained import PreTrainedPolicy
+)
 from lerobot.policies import PreTrainedPolicy, get_policy_class, make_policy, make_pre_post_processors
 from lerobot.policies.rtc import ActionInterpolator, ActionQueue, LatencyTracker, RTCConfig
 from lerobot.policies.utils import make_robot_action
 from lerobot.processor import (
@@ -131,18 +132,18 @@ from lerobot.processor import (
    RelativeActionsProcessorStep,
    TransitionKey,
    create_transition,
    rename_stats,
    to_relative_actions,
 )
 from lerobot.processor.relative_action_processor import to_relative_actions
 from lerobot.processor.rename_processor import rename_stats
 from lerobot.robots import Robot, RobotConfig, make_robot_from_config
-from lerobot.robots.bi_openarm_follower.config_bi_openarm_follower import BiOpenArmFollowerConfig
+from lerobot.robots.bi_openarm_follower import BiOpenArmFollowerConfig
-from lerobot.robots.so_follower.config_so_follower import SOFollowerRobotConfig  # noqa: F401
+from lerobot.robots.so_follower import SOFollowerRobotConfig  # noqa: F401
 from lerobot.teleoperators import Teleoperator, TeleoperatorConfig, make_teleoperator_from_config
-from lerobot.teleoperators.openarm_mini.config_openarm_mini import OpenArmMiniConfig  # noqa: F401
+from lerobot.teleoperators.openarm_mini import OpenArmMiniConfig  # noqa: F401
-from lerobot.teleoperators.so_leader.config_so_leader import SOLeaderTeleopConfig  # noqa: F401
+from lerobot.teleoperators.so_leader import SOLeaderTeleopConfig  # noqa: F401
 from lerobot.utils import get_safe_torch_device
 from lerobot.utils.constants import ACTION, OBS_STATE, OBS_STR
-from lerobot.utils.control_utils import is_headless, predict_action
+from lerobot.utils.feature_utils import build_dataset_frame, combine_feature_dicts, hw_to_dataset_features
 from lerobot.utils.device_utils import get_safe_torch_device
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import init_logging, log_say
 from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
--- a/examples/hil/hil_utils.py
+++ b/examples/hil/hil_utils.py
@@ -19,13 +19,12 @@ import time
 from dataclasses import dataclass, field
 from pathlib import Path
 from lerobot.common.control_utils import is_headless
 from lerobot.processor import (
    IdentityProcessorStep,
    RobotAction,
    RobotObservation,
    RobotProcessorPipeline,
 )
 from lerobot.processor.converters import (
    observation_to_transition,
    robot_action_observation_to_transition,
    transition_to_observation,
@@ -33,7 +32,6 @@ from lerobot.processor.converters import (
 )
 from lerobot.robots import Robot
 from lerobot.teleoperators import Teleoperator
 from lerobot.utils.control_utils import is_headless
 from lerobot.utils.robot_utils import precise_sleep
 logger = logging.getLogger(__name__)
--- a/examples/lekiwi/evaluate.py
+++ b/examples/lekiwi/evaluate.py
@@ -14,15 +14,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from lerobot.datasets.feature_utils import hw_to_dataset_features
+from lerobot.common.control_utils import init_keyboard_listener
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
-from lerobot.policies.act.modeling_act import ACTPolicy
+from lerobot.policies import make_pre_post_processors
-from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies.act import ACTPolicy
 from lerobot.processor import make_default_processors
 from lerobot.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
 from lerobot.scripts.lerobot_record import record_loop
 from lerobot.utils.constants import ACTION, OBS_STR
-from lerobot.utils.control_utils import init_keyboard_listener
+from lerobot.utils.feature_utils import hw_to_dataset_features
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
--- a/examples/lekiwi/record.py
+++ b/examples/lekiwi/record.py
@@ -14,16 +14,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from lerobot.datasets.feature_utils import hw_to_dataset_features
+from lerobot.common.control_utils import init_keyboard_listener
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
 from lerobot.processor import make_default_processors
-from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
+from lerobot.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
 from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
 from lerobot.scripts.lerobot_record import record_loop
 from lerobot.teleoperators.keyboard import KeyboardTeleop, KeyboardTeleopConfig
 from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
 from lerobot.utils.constants import ACTION, OBS_STR
-from lerobot.utils.control_utils import init_keyboard_listener
+from lerobot.utils.feature_utils import hw_to_dataset_features
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
--- a/examples/lekiwi/replay.py
+++ b/examples/lekiwi/replay.py
@@ -16,9 +16,8 @@
 import time
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
-from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
+from lerobot.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
 from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
 from lerobot.utils.constants import ACTION
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import log_say
--- a/examples/phone_to_so100/evaluate.py
+++ b/examples/phone_to_so100/evaluate.py
@@ -14,19 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.configs.types import FeatureType, PolicyFeature
+from lerobot.common.control_utils import init_keyboard_listener
-from lerobot.datasets.feature_utils import combine_feature_dicts
+from lerobot.configs import FeatureType, PolicyFeature
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset, aggregate_pipeline_dataset_features, create_initial_features
 from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.policies.act.modeling_act import ACTPolicy
+from lerobot.policies import make_pre_post_processors
-from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies.act import ACTPolicy
 from lerobot.processor import (
    RobotProcessorPipeline,
    make_default_teleop_action_processor,
 )
 from lerobot.processor.converters import (
    observation_to_transition,
    robot_action_observation_to_transition,
    transition_to_observation,
@@ -39,7 +36,7 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
 )
 from lerobot.scripts.lerobot_record import record_loop
 from lerobot.types import RobotAction, RobotObservation
-from lerobot.utils.control_utils import init_keyboard_listener
+from lerobot.utils.feature_utils import combine_feature_dicts
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
--- a/examples/phone_to_so100/record.py
+++ b/examples/phone_to_so100/record.py
@@ -14,13 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.datasets.feature_utils import combine_feature_dicts
+from lerobot.common.control_utils import init_keyboard_listener
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset, aggregate_pipeline_dataset_features, create_initial_features
 from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import (
-from lerobot.processor.converters import (
+    RobotProcessorPipeline,
    observation_to_transition,
    robot_action_observation_to_transition,
    transition_to_observation,
@@ -35,11 +34,11 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
    InverseKinematicsEEToJoints,
 )
 from lerobot.scripts.lerobot_record import record_loop
-from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
+from lerobot.teleoperators.phone import Phone, PhoneConfig
 from lerobot.teleoperators.phone.config_phone import PhoneOS
 from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
 from lerobot.teleoperators.phone.teleop_phone import Phone
 from lerobot.types import RobotAction, RobotObservation
-from lerobot.utils.control_utils import init_keyboard_listener
+from lerobot.utils.feature_utils import combine_feature_dicts
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
--- a/examples/phone_to_so100/replay.py
+++ b/examples/phone_to_so100/replay.py
@@ -16,10 +16,10 @@
 import time
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import (
-from lerobot.processor.converters import (
+    RobotProcessorPipeline,
    robot_action_observation_to_transition,
    transition_to_robot_action,
 )
--- a/examples/phone_to_so100/teleoperate.py
+++ b/examples/phone_to_so100/teleoperate.py
@@ -16,8 +16,8 @@
 import time
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import (
-from lerobot.processor.converters import (
+    RobotProcessorPipeline,
    robot_action_observation_to_transition,
    transition_to_robot_action,
 )
@@ -28,9 +28,9 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
    GripperVelocityToJoint,
    InverseKinematicsEEToJoints,
 )
-from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
+from lerobot.teleoperators.phone import Phone, PhoneConfig
 from lerobot.teleoperators.phone.config_phone import PhoneOS
 from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
 from lerobot.teleoperators.phone.teleop_phone import Phone
 from lerobot.types import RobotAction, RobotObservation
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
--- a/examples/port_datasets/port_droid.py
+++ b/examples/port_datasets/port_droid.py
@@ -22,8 +22,7 @@ from pathlib import Path
 import numpy as np
 import tensorflow_datasets as tfds
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.datasets import LeRobotDataset, LeRobotDatasetMetadata
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds
 DROID_SHARDS = 2048
--- a/examples/port_datasets/slurm_aggregate_shards.py
+++ b/examples/port_datasets/slurm_aggregate_shards.py
@@ -36,7 +36,7 @@ class AggregateDatasets(PipelineStep):
    def run(self, data=None, rank: int = 0, world_size: int = 1):
        import logging
-        from lerobot.datasets.aggregate import aggregate_datasets
+        from lerobot.datasets import aggregate_datasets
        from lerobot.utils.utils import init_logging
        init_logging()
--- a/examples/port_datasets/slurm_upload.py
+++ b/examples/port_datasets/slurm_upload.py
@@ -26,8 +26,7 @@ from huggingface_hub import HfApi
 from huggingface_hub.constants import REPOCARD_NAME
 from port_droid import DROID_SHARDS
-from lerobot.datasets.dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata
+from lerobot.datasets import CODEBASE_VERSION, LeRobotDatasetMetadata, create_lerobot_dataset_card
 from lerobot.datasets.utils import create_lerobot_dataset_card
 from lerobot.utils.utils import init_logging
@@ -155,7 +154,7 @@ class UploadDataset(PipelineStep):
        from datasets.utils.tqdm import disable_progress_bars
        from huggingface_hub import CommitOperationAdd, preupload_lfs_files
-        from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+        from lerobot.datasets import LeRobotDatasetMetadata
        from lerobot.utils.utils import init_logging
        init_logging()
--- a/examples/rtc/eval_dataset.py
+++ b/examples/rtc/eval_dataset.py
@@ -109,15 +109,10 @@ except ImportError:
    MATPLOTLIB_AVAILABLE = False
    plt = None
-from lerobot.configs import parser
+from lerobot.configs import DatasetConfig, PreTrainedConfig, RTCAttentionSchedule, parser
-from lerobot.configs.default import DatasetConfig
+from lerobot.datasets import LeRobotDataset, LeRobotDatasetMetadata, resolve_delta_timestamps
-from lerobot.configs.policies import PreTrainedConfig
+from lerobot.policies import get_policy_class, make_pre_post_processors
-from lerobot.configs.types import RTCAttentionSchedule
+from lerobot.policies.rtc import RTCConfig
 from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
 from lerobot.datasets.factory import resolve_delta_timestamps
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.policies.factory import get_policy_class, make_pre_post_processors
 from lerobot.policies.rtc.configuration_rtc import RTCConfig
 from lerobot.policies.rtc.debug_visualizer import RTCDebugVisualizer
 from lerobot.utils.hub import HubMixin
 from lerobot.utils.utils import init_logging
--- a/examples/rtc/eval_with_real_robot.py
+++ b/examples/rtc/eval_with_real_robot.py
@@ -101,26 +101,21 @@ from threading import Event, Lock, Thread
 import torch
 from torch import Tensor
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
+from lerobot.cameras.opencv import OpenCVCameraConfig  # noqa: F401
-from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
+from lerobot.cameras.realsense import RealSenseCameraConfig  # noqa: F401
-from lerobot.cameras.zmq.configuration_zmq import ZMQCameraConfig  # noqa: F401
+from lerobot.cameras.zmq import ZMQCameraConfig  # noqa: F401
-from lerobot.configs import parser
+from lerobot.configs import PreTrainedConfig, RTCAttentionSchedule, parser
-from lerobot.configs.policies import PreTrainedConfig
+from lerobot.policies import get_policy_class, make_pre_post_processors
 from lerobot.configs.types import RTCAttentionSchedule
 from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features
 from lerobot.policies.factory import get_policy_class, make_pre_post_processors
 from lerobot.policies.rtc import ActionInterpolator, ActionQueue, LatencyTracker, RTCConfig
 from lerobot.processor import (
    NormalizerProcessorStep,
    RelativeActionsProcessorStep,
    TransitionKey,
    create_transition,
 )
 from lerobot.processor.factory import (
    make_default_robot_action_processor,
    make_default_robot_observation_processor,
    to_relative_actions,
 )
 from lerobot.processor.relative_action_processor import to_relative_actions
 from lerobot.rl.process import ProcessSignalHandler
 from lerobot.robots import (  # noqa: F401
    Robot,
@@ -133,6 +128,7 @@ from lerobot.robots import (  # noqa: F401
 )
 from lerobot.robots.utils import make_robot_from_config
 from lerobot.utils.constants import OBS_IMAGES, OBS_STATE
 from lerobot.utils.feature_utils import build_dataset_frame, hw_to_dataset_features
 from lerobot.utils.hub import HubMixin
 from lerobot.utils.utils import init_logging
--- a/examples/so100_to_so100_EE/evaluate.py
+++ b/examples/so100_to_so100_EE/evaluate.py
@@ -14,19 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.configs.types import FeatureType, PolicyFeature
+from lerobot.common.control_utils import init_keyboard_listener
-from lerobot.datasets.feature_utils import combine_feature_dicts
+from lerobot.configs import FeatureType, PolicyFeature
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset, aggregate_pipeline_dataset_features, create_initial_features
 from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.policies.act.modeling_act import ACTPolicy
+from lerobot.policies import make_pre_post_processors
-from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies.act import ACTPolicy
 from lerobot.processor import (
    RobotProcessorPipeline,
    make_default_teleop_action_processor,
 )
 from lerobot.processor.converters import (
    observation_to_transition,
    robot_action_observation_to_transition,
    transition_to_observation,
@@ -39,7 +36,7 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
 )
 from lerobot.scripts.lerobot_record import record_loop
 from lerobot.types import RobotAction, RobotObservation
-from lerobot.utils.control_utils import init_keyboard_listener
+from lerobot.utils.feature_utils import combine_feature_dicts
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
--- a/examples/so100_to_so100_EE/record.py
+++ b/examples/so100_to_so100_EE/record.py
@@ -15,13 +15,12 @@
 # limitations under the License.
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.datasets.feature_utils import combine_feature_dicts
+from lerobot.common.control_utils import init_keyboard_listener
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset, aggregate_pipeline_dataset_features, create_initial_features
 from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import (
-from lerobot.processor.converters import (
+    RobotProcessorPipeline,
    observation_to_transition,
    robot_action_observation_to_transition,
    transition_to_observation,
@@ -36,7 +35,7 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
 from lerobot.scripts.lerobot_record import record_loop
 from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
 from lerobot.types import RobotAction, RobotObservation
-from lerobot.utils.control_utils import init_keyboard_listener
+from lerobot.utils.feature_utils import combine_feature_dicts
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
--- a/examples/so100_to_so100_EE/replay.py
+++ b/examples/so100_to_so100_EE/replay.py
@@ -17,10 +17,10 @@
 import time
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import (
-from lerobot.processor.converters import (
+    RobotProcessorPipeline,
    robot_action_observation_to_transition,
    transition_to_robot_action,
 )
--- a/examples/so100_to_so100_EE/teleoperate.py
+++ b/examples/so100_to_so100_EE/teleoperate.py
@@ -17,8 +17,8 @@
 import time
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import (
-from lerobot.processor.converters import (
+    RobotProcessorPipeline,
    robot_action_observation_to_transition,
    robot_action_to_transition,
    transition_to_robot_action,
--- a/examples/training/train_policy.py
+++ b/examples/training/train_policy.py
@@ -18,13 +18,11 @@ from pathlib import Path
 import torch
-from lerobot.configs.types import FeatureType
+from lerobot.configs import FeatureType
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.datasets import LeRobotDataset, LeRobotDatasetMetadata
-from lerobot.datasets.feature_utils import dataset_to_policy_features
+from lerobot.policies import make_pre_post_processors
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.policies.diffusion import DiffusionConfig, DiffusionPolicy
-from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
+from lerobot.utils.feature_utils import dataset_to_policy_features
 from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
 from lerobot.policies.factory import make_pre_post_processors
 def main():
--- a/examples/training/train_with_streaming.py
+++ b/examples/training/train_with_streaming.py
@@ -19,14 +19,12 @@ from pathlib import Path
 import torch
-from lerobot.configs.types import FeatureType
+from lerobot.configs import FeatureType
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.datasets import LeRobotDatasetMetadata, StreamingLeRobotDataset
-from lerobot.datasets.feature_utils import dataset_to_policy_features
+from lerobot.policies import make_pre_post_processors
-from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset
+from lerobot.policies.act import ACTConfig, ACTPolicy
 from lerobot.policies.act.configuration_act import ACTConfig
 from lerobot.policies.act.modeling_act import ACTPolicy
 from lerobot.policies.factory import make_pre_post_processors
 from lerobot.utils.constants import ACTION
 from lerobot.utils.feature_utils import dataset_to_policy_features
 def main():
--- a/examples/tutorial/act/act_training_example.py
+++ b/examples/tutorial/act/act_training_example.py
@@ -4,13 +4,11 @@ from pathlib import Path
 import torch
-from lerobot.configs.types import FeatureType
+from lerobot.configs import FeatureType
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.datasets import LeRobotDataset, LeRobotDatasetMetadata
-from lerobot.datasets.feature_utils import dataset_to_policy_features
+from lerobot.policies import make_pre_post_processors
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.policies.act import ACTConfig, ACTPolicy
-from lerobot.policies.act.configuration_act import ACTConfig
+from lerobot.utils.feature_utils import dataset_to_policy_features
 from lerobot.policies.act.modeling_act import ACTPolicy
 from lerobot.policies.factory import make_pre_post_processors
 def make_delta_timestamps(delta_indices: list[int] | None, fps: int) -> list[float]:
--- a/examples/tutorial/act/act_using_example.py
+++ b/examples/tutorial/act/act_using_example.py
@@ -1,9 +1,9 @@
 import torch
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.datasets import LeRobotDatasetMetadata
-from lerobot.policies.act.modeling_act import ACTPolicy
+from lerobot.policies import make_pre_post_processors
-from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies.act import ACTPolicy
 from lerobot.policies.utils import build_inference_frame, make_robot_action
 from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
--- a/examples/tutorial/async-inf/robot_client.py
+++ b/examples/tutorial/async-inf/robot_client.py
@@ -3,7 +3,7 @@ import threading
 from lerobot.async_inference.configs import RobotClientConfig
 from lerobot.async_inference.helpers import visualize_action_queue_size
 from lerobot.async_inference.robot_client import RobotClient
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
 from lerobot.robots.so_follower import SO100FollowerConfig
--- a/examples/tutorial/diffusion/diffusion_training_example.py
+++ b/examples/tutorial/diffusion/diffusion_training_example.py
@@ -4,13 +4,11 @@ from pathlib import Path
 import torch
-from lerobot.configs.types import FeatureType
+from lerobot.configs import FeatureType
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.datasets import LeRobotDataset, LeRobotDatasetMetadata
-from lerobot.datasets.feature_utils import dataset_to_policy_features
+from lerobot.policies import make_pre_post_processors
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.policies.diffusion import DiffusionConfig, DiffusionPolicy
-from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
+from lerobot.utils.feature_utils import dataset_to_policy_features
 from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
 from lerobot.policies.factory import make_pre_post_processors
 def make_delta_timestamps(delta_indices: list[int] | None, fps: int) -> list[float]:
--- a/examples/tutorial/diffusion/diffusion_using_example.py
+++ b/examples/tutorial/diffusion/diffusion_using_example.py
@@ -1,9 +1,9 @@
 import torch
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.datasets import LeRobotDatasetMetadata
-from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
+from lerobot.policies import make_pre_post_processors
-from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies.diffusion import DiffusionPolicy
 from lerobot.policies.utils import build_inference_frame, make_robot_action
 from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
--- a/examples/tutorial/pi0/using_pi0_example.py
+++ b/examples/tutorial/pi0/using_pi0_example.py
@@ -1,11 +1,11 @@
 import torch
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.datasets.feature_utils import hw_to_dataset_features
+from lerobot.policies import make_pre_post_processors
-from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies.pi0 import PI0Policy
 from lerobot.policies.pi0.modeling_pi0 import PI0Policy
 from lerobot.policies.utils import build_inference_frame, make_robot_action
 from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
 from lerobot.utils.feature_utils import hw_to_dataset_features
 MAX_EPISODES = 5
 MAX_STEPS_PER_EPISODE = 20
--- a/examples/tutorial/rl/hilserl_example.py
+++ b/examples/tutorial/rl/hilserl_example.py
@@ -6,17 +6,17 @@ from queue import Empty, Full
 import torch
 import torch.optim as optim
-from lerobot.datasets.feature_utils import hw_to_dataset_features
+from lerobot.datasets import LeRobotDataset
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.envs.configs import HILSerlProcessorConfig, HILSerlRobotEnvConfig
-from lerobot.policies.sac.configuration_sac import SACConfig
+from lerobot.policies import SACConfig
 from lerobot.policies.sac.modeling_sac import SACPolicy
 from lerobot.policies.sac.reward_model.modeling_classifier import Classifier
 from lerobot.rl.buffer import ReplayBuffer
 from lerobot.rl.gym_manipulator import make_robot_env
 from lerobot.robots.so_follower import SO100FollowerConfig
 from lerobot.teleoperators import TeleopEvents
 from lerobot.teleoperators.so_leader import SO100LeaderConfig
-from lerobot.teleoperators.utils import TeleopEvents
+from lerobot.utils.feature_utils import hw_to_dataset_features
 LOG_EVERY = 10
 SEND_EVERY = 10
--- a/examples/tutorial/rl/reward_classifier_example.py
+++ b/examples/tutorial/rl/reward_classifier_example.py
@@ -1,8 +1,7 @@
 import torch
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
-from lerobot.policies.factory import make_policy, make_pre_post_processors
+from lerobot.policies import RewardClassifierConfig, make_policy, make_pre_post_processors
 from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig
 def main():
--- a/examples/tutorial/smolvla/using_smolvla_example.py
+++ b/examples/tutorial/smolvla/using_smolvla_example.py
@@ -1,11 +1,11 @@
 import torch
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.datasets.feature_utils import hw_to_dataset_features
+from lerobot.policies import make_pre_post_processors
-from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies.smolvla import SmolVLAPolicy
 from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy
 from lerobot.policies.utils import build_inference_frame, make_robot_action
 from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
 from lerobot.utils.feature_utils import hw_to_dataset_features
 MAX_EPISODES = 5
 MAX_STEPS_PER_EPISODE = 20
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,45 +58,74 @@ classifiers = [
 keywords = ["lerobot", "huggingface", "robotics",  "machine learning", "artificial intelligence"]
 dependencies = [
-
+    # Core ML
    # Hugging Face dependencies
    "datasets>=4.0.0,<5.0.0",
    "diffusers>=0.27.2,<0.36.0",
    "huggingface-hub>=1.0.0,<2.0.0",
    "accelerate>=1.10.0,<2.0.0",
    # Core dependencies
    "numpy>=2.0.0,<2.3.0", # NOTE: Explicitly listing numpy helps the resolver converge faster. Upper bound imposed by opencv-python-headless.
    "setuptools>=71.0.0,<81.0.0",
    "cmake>=3.29.0.1,<4.2.0",
    "packaging>=24.2,<26.0",
    "torch>=2.7,<2.11.0",
    "torchcodec>=0.3.0,<0.11.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # NOTE: Windows support starts at version 0.7 (needs torch==2.8), ffmpeg>=8 support starts at version 0.8.1 (needs torch==2.9), system-wide ffmpeg support starts at version 0.10 (needs torch==2.10).
    "torchvision>=0.22.0,<0.26.0",
-
+    "numpy>=2.0.0,<2.3.0", # NOTE: Explicitly listing numpy helps the resolver converge faster. Upper bound imposed by opencv-python-headless.
    "einops>=0.8.0,<0.9.0",
    "opencv-python-headless>=4.9.0,<4.14.0",
-    "av>=15.0.0,<16.0.0",
+    "Pillow>=10.0.0,<13.0.0",
-    "jsonlines>=4.0.0,<5.0.0",
+    "einops>=0.8.0,<0.9.0",
    "pynput>=1.7.8,<1.9.0",
    "pyserial>=3.5,<4.0",
-    "wandb>=0.24.0,<0.25.0",
+    # Config & Hub
    "draccus==0.10.0", # TODO: Relax version constraint
-    "gymnasium>=1.1.1,<2.0.0",
+    "huggingface-hub>=1.0.0,<2.0.0",
-    "rerun-sdk>=0.24.0,<0.27.0",
+    "requests>=2.32.0,<3.0.0",
-    # Support dependencies
+    # Environments
-    "deepdiff>=7.0.1,<9.0.0",
+    # NOTE: gymnasium is used in lerobot.envs (lerobot-train, lerobot-eval), policies/factory,
-    "imageio[ffmpeg]>=2.34.0,<3.0.0",
+    # and robots/unitree. Moving it to an optional extra would require import guards across many
    # tightly-coupled modules. Candidate for a future refactor to decouple envs from the core.
    "gymnasium>=1.1.1,<2.0.0",
    # Serialization & checkpointing
    "safetensors>=0.4.3,<1.0.0",
    # Lightweight utilities
    "packaging>=24.2,<26.0",
    "termcolor>=2.4.0,<4.0.0",
    "tqdm>=4.66.0,<5.0.0",
    # Build tools (required by opencv-python-headless on some platforms)
    "cmake>=3.29.0.1,<4.2.0",
    "setuptools>=71.0.0,<81.0.0",
 ]
 # Optional dependencies
 [project.optional-dependencies]
 # ── Feature-scoped extras ──────────────────────────────────
 dataset = [
    "datasets>=4.0.0,<5.0.0",
    "pandas>=2.0.0,<3.0.0", # NOTE: Transitive dependency of datasets
    "pyarrow>=21.0.0,<30.0.0", # NOTE: Transitive dependency of datasets
    "lerobot[av-dep]",
    "torchcodec>=0.3.0,<0.11.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # NOTE: Windows support starts at version 0.7 (needs torch==2.8), ffmpeg>=8 support starts at version 0.8.1 (needs torch==2.9), system-wide ffmpeg support starts at version 0.10 (needs torch==2.10).
    "jsonlines>=4.0.0,<5.0.0",
 ]
 training = [
    "lerobot[dataset]",
    "accelerate>=1.10.0,<2.0.0",
    "wandb>=0.24.0,<0.25.0",
 ]
 hardware = [
    "pynput>=1.7.8,<1.9.0",
    "pyserial>=3.5,<4.0",
    "deepdiff>=7.0.1,<9.0.0",
 ]
 viz = [
    "rerun-sdk>=0.24.0,<0.27.0",
 ]
 # ── User-facing composite extras (map to CLI scripts) ─────
 # lerobot-record, lerobot-replay, lerobot-calibrate, lerobot-teleoperate, etc.
 core_scripts = ["lerobot[dataset]", "lerobot[hardware]", "lerobot[viz]"]
 # lerobot-eval -- base evaluation framework. You also need the policy's extra (e.g., lerobot[pi])
 # and the environment's extra (e.g., lerobot[pusht]) if evaluating in simulation.
 evaluation = ["lerobot[av-dep]"]
 # lerobot-dataset-viz, lerobot-imgtransform-viz
 dataset_viz = ["lerobot[dataset]", "lerobot[viz]"]
 # Common
 av-dep = ["av>=15.0.0,<16.0.0"]
 pygame-dep = ["pygame>=2.5.1,<2.7.0"]
 placo-dep = ["placo>=0.9.6,<0.9.17"]
 transformers-dep = ["transformers==5.3.0"] # TODO(Steven): https://github.com/huggingface/lerobot/pull/3249
@@ -104,6 +133,7 @@ grpcio-dep = ["grpcio==1.73.1", "protobuf>=6.31.1,<6.32.0"]
 can-dep = ["python-can>=4.2.0,<5.0.0"]
 peft-dep = ["peft>=0.18.0,<1.0.0"]
 scipy-dep = ["scipy>=1.14.0,<2.0.0"]
 diffusers-dep = ["diffusers>=0.27.2,<0.36.0"]
 qwen-vl-utils-dep = ["qwen-vl-utils>=0.0.11,<0.1.0"]
 matplotlib-dep = ["matplotlib>=3.10.3,<4.0.0", "contourpy>=1.3.0,<2.0.0"] # NOTE: Explicitly listing contourpy helps the resolver converge faster.
@@ -136,28 +166,28 @@ intelrealsense = [
 phone = ["hebi-py>=2.8.0,<2.12.0", "teleop>=0.1.0,<0.2.0", "fastapi<1.0", "lerobot[scipy-dep]"]
 # Policies
 diffusion = ["lerobot[diffusers-dep]"]
 wallx = [
    "lerobot[transformers-dep]",
-    "lerobot[peft]",
+    "lerobot[peft-dep]",
    "lerobot[scipy-dep]",
    "torchdiffeq>=0.2.4,<0.3.0",
    "lerobot[qwen-vl-utils-dep]",
 ]
 pi = ["lerobot[transformers-dep]", "lerobot[scipy-dep]"]
-smolvla = ["lerobot[transformers-dep]", "num2words>=0.5.14,<0.6.0", "accelerate>=1.7.0,<2.0.0", "safetensors>=0.4.3,<1.0.0"]
+smolvla = ["lerobot[transformers-dep]", "num2words>=0.5.14,<0.6.0", "accelerate>=1.7.0,<2.0.0"]
-multi_task_dit = ["lerobot[transformers-dep]"]
+multi_task_dit = ["lerobot[transformers-dep]", "lerobot[diffusers-dep]"]
 groot = [
    "lerobot[transformers-dep]",
-    "lerobot[peft]",
+    "lerobot[peft-dep]",
    "lerobot[diffusers-dep]",
    "dm-tree>=0.1.8,<1.0.0",
    "timm>=1.0.0,<1.1.0",
    "safetensors>=0.4.3,<1.0.0",
    "Pillow>=10.0.0,<13.0.0",
    "decord>=0.6.0,<1.0.0; (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "ninja>=1.11.1,<2.0.0",
    "flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'"
 ]
-sarm = ["lerobot[transformers-dep]", "faker>=33.0.0,<35.0.0", "lerobot[matplotlib-dep]", "lerobot[qwen-vl-utils-dep]"]
+sarm = ["lerobot[transformers-dep]", "pydantic>=2.0.0,<3.0.0", "faker>=33.0.0,<35.0.0", "lerobot[matplotlib-dep]", "lerobot[qwen-vl-utils-dep]"]
 xvla = ["lerobot[transformers-dep]"]
 hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
@@ -166,31 +196,42 @@ async = ["lerobot[grpcio-dep]", "lerobot[matplotlib-dep]"]
 peft = ["lerobot[transformers-dep]", "lerobot[peft-dep]"]
 # Development
-dev = ["pre-commit>=3.7.0,<5.0.0", "debugpy>=1.8.1,<1.9.0", "lerobot[grpcio-dep]", "grpcio-tools==1.73.1", "mypy>=1.19.1"]
+dev = ["pre-commit>=3.7.0,<5.0.0", "debugpy>=1.8.1,<1.9.0", "lerobot[grpcio-dep]", "grpcio-tools==1.73.1", "mypy>=1.19.1", "ruff>=0.14.1"]
 test = ["pytest>=8.1.0,<9.0.0", "pytest-timeout>=2.4.0,<3.0.0", "pytest-cov>=5.0.0,<8.0.0", "mock-serial>=0.0.1,<0.1.0 ; sys_platform != 'win32'"]
 video_benchmark = ["scikit-image>=0.23.2,<0.26.0", "pandas>=2.2.2,<2.4.0"]
 # Simulation
 # NOTE: Explicitly listing scipy helps flatten the dependecy tree.
-aloha = ["gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"]
+aloha = ["lerobot[dataset]", "gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"]
-pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
+pusht = ["lerobot[dataset]", "gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
-libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"]
+libero = ["lerobot[dataset]", "lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"]
-metaworld = ["metaworld==3.0.0", "lerobot[scipy-dep]"]
+metaworld = ["lerobot[dataset]", "metaworld==3.0.0", "lerobot[scipy-dep]"]
 # All
 all = [
    # Feature-scoped extras
    "lerobot[dataset]",
    "lerobot[training]",
    "lerobot[hardware]",
    "lerobot[viz]",
    # NOTE(resolver hint): scipy is pulled in transitively via lerobot[scipy-dep] through
    # multiple extras (aloha, metaworld, pi, wallx, phone). Listing it explicitly
    # helps pip's resolver converge by constraining scipy early, before it encounters
    # the loose scipy requirements from transitive deps like dm-control and metaworld.
    "scipy>=1.14.0,<2.0.0",
    "lerobot[dynamixel]",
    "lerobot[feetech]",
    "lerobot[damiao]",
    "lerobot[robstride]",
    "lerobot[gamepad]",
    "lerobot[hopejr]",
    "lerobot[lekiwi]",
    "lerobot[openarms]",
    "lerobot[reachy2]",
    "lerobot[kinematics]",
    "lerobot[intelrealsense]",
    "lerobot[diffusion]",
    "lerobot[multi_task_dit]",
    "lerobot[wallx]",
    "lerobot[pi]",
    "lerobot[smolvla]",
@@ -267,7 +308,9 @@ ignore = [
 ]
 [tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["F401", "F403"]
+"__init__.py" = ["F401", "F403", "E402"]
 # E402: conditional-import guards (TYPE_CHECKING / is_package_available) must precede the imports they protect
 "src/lerobot/scripts/convert_dataset_v21_to_v30.py" = ["E402"]
 "src/lerobot/policies/wall_x/**" = ["N801", "N812", "SIM102", "SIM108", "SIM210", "SIM211", "B006", "B007", "SIM118"] # Supprese these as they are coming from original Qwen2_5_vl code TODO(pepijn): refactor original
 [tool.ruff.lint.isort]
--- a/scripts/ci/extract_task_descriptions.py
+++ b/scripts/ci/extract_task_descriptions.py
@@ -0,0 +1,114 @@
 #!/usr/bin/env python3
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Extract natural-language task descriptions for a benchmark suite.
 Runs inside the benchmark Docker container (where the env library is installed)
 immediately after lerobot-eval, writing a JSON file that parse_eval_metrics.py
 picks up and embeds in metrics.json.
 Output format: {"<suite>_<task_idx>": "<nl instruction>", ...}
 Usage:
    python scripts/ci/extract_task_descriptions.py \\
        --env libero --task libero_spatial \\
        --output /tmp/eval-artifacts/task_descriptions.json
 """
 from __future__ import annotations
 import argparse
 import json
 import re
 import sys
 from pathlib import Path
 # LIBERO-plus derives task.language by space-joining the perturbation-variant
 # filename, so strip the perturbation metadata blob to recover the base prompt.
 _LIBERO_PERTURBATION_TAIL_RE = re.compile(
    r"(?:\s(?:view|initstate|noise|add|tb|table|light|level)(?:\s\d+)+)+$"
 )
 def _strip_libero_perturbation_tail(instruction: str) -> str:
    return _LIBERO_PERTURBATION_TAIL_RE.sub("", instruction).strip()
 def _libero_descriptions(task_suite: str) -> dict[str, str]:
    from libero.libero import benchmark  # type: ignore[import-untyped]
    suite_dict = benchmark.get_benchmark_dict()
    if task_suite not in suite_dict:
        print(
            f"[extract_task_descriptions] Unknown LIBERO suite '{task_suite}'. "
            f"Available: {list(suite_dict.keys())}",
            file=sys.stderr,
        )
        return {}
    suite = suite_dict[task_suite]()
    return {
        f"{task_suite}_{i}": _strip_libero_perturbation_tail(suite.get_task(i).language)
        for i in range(suite.n_tasks)
    }
 def _metaworld_descriptions(task_name: str) -> dict[str, str]:
    # MetaWorld tasks don't expose a separate NL description attribute;
    # use a cleaned version of the task name as the description.
    label = task_name.removeprefix("metaworld-").replace("-", " ").strip()
    return {f"{task_name}_0": label}
 def _robomme_descriptions(task_names: str) -> dict[str, str]:
    return {
        f"{task_name}_0": task_name.replace("_", " ").strip()
        for task_name in (task.strip() for task in task_names.split(","))
        if task_name
    }
 def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--env", required=True, help="Environment family (libero, metaworld, ...)")
    parser.add_argument("--task", required=True, help="Task/suite name (e.g. libero_spatial)")
    parser.add_argument("--output", required=True, help="Path to write task_descriptions.json")
    args = parser.parse_args()
    descriptions: dict[str, str] = {}
    try:
        if args.env in {"libero", "libero_plus"}:
            descriptions = _libero_descriptions(args.task)
        elif args.env == "metaworld":
            descriptions = _metaworld_descriptions(args.task)
        elif args.env == "robomme":
            descriptions = _robomme_descriptions(args.task)
        else:
            print(
                f"[extract_task_descriptions] No description extractor for env '{args.env}'.",
                file=sys.stderr,
            )
    except Exception as exc:
        print(f"[extract_task_descriptions] Warning: {exc}", file=sys.stderr)
    out_path = Path(args.output)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(descriptions, indent=2))
    print(f"[extract_task_descriptions] {len(descriptions)} descriptions → {out_path}")
    return 0
 if __name__ == "__main__":
    sys.exit(main())
--- a/scripts/ci/parse_eval_metrics.py
+++ b/scripts/ci/parse_eval_metrics.py
@@ -0,0 +1,147 @@
 #!/usr/bin/env python3
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Parse lerobot-eval output into a small metrics.json artifact.
 Reads eval_info.json written by lerobot-eval --output_dir and extracts the
 key metrics needed by the health dashboard. Handles both single-task and
 multi-task eval output formats.
 NOTE: This script runs on the bare CI runner (not inside Docker), so it
 must use only Python stdlib modules. Do not add third-party imports.
 Usage:
    python scripts/ci/parse_eval_metrics.py \\
        --artifacts-dir /tmp/libero-artifacts \\
        --env libero \\
        --task libero_spatial \\
        --policy pepijn223/smolvla_libero
 Writes <artifacts-dir>/metrics.json. The CI workflow then uploads this file
 as a GitHub Actions artifact named "<env>-metrics".
 """
 from __future__ import annotations
 import argparse
 import json
 import math
 import sys
 from pathlib import Path
 def _safe_float(v: float | int | None) -> float | None:
    if v is None:
        return None
    f = float(v)
    return None if math.isnan(f) else f
 def _safe_int(v: float | int | None) -> int | None:
    if v is None:
        return None
    f = float(v)
    return None if math.isnan(f) else int(f)
 def _extract_metrics(info: dict) -> tuple[float | None, int | None, float | None, float | None]:
    """Extract (pc_success, n_episodes, avg_sum_reward, eval_s) from eval_info.json.
    Handles two output shapes:
      - Single-task: {"aggregated": {"pc_success": 80.0, ...}}
      - Multi-task:  {"overall": {"pc_success": 80.0, "n_episodes": 5, ...}}
    """
    for key in ("aggregated", "overall"):
        if key not in info:
            continue
        agg = info[key]
        pc = agg.get("pc_success")
        n = agg.get("n_episodes")
        reward = agg.get("avg_sum_reward")
        eval_s = agg.get("eval_s")
        if pc is not None and not math.isnan(pc):
            return (
                float(pc),
                _safe_int(n),
                _safe_float(reward),
                _safe_float(eval_s),
            )
    return None, None, None, None
 def main() -> int:
    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument("--artifacts-dir", required=True, help="Path to the mounted artifacts volume")
    parser.add_argument("--env", required=True, help="Environment name (e.g. libero)")
    parser.add_argument("--task", required=True, help="Task name (e.g. libero_spatial)")
    parser.add_argument("--policy", required=True, help="Policy hub path (e.g. pepijn223/smolvla_libero)")
    args = parser.parse_args()
    artifacts_dir = Path(args.artifacts_dir)
    eval_info_path = artifacts_dir / "eval_info.json"
    pc_success: float | None = None
    n_episodes: int | None = None
    avg_sum_reward: float | None = None
    eval_s: float | None = None
    if eval_info_path.exists():
        try:
            info = json.loads(eval_info_path.read_text())
            pc_success, n_episodes, avg_sum_reward, eval_s = _extract_metrics(info)
        except (json.JSONDecodeError, KeyError, TypeError) as exc:
            print(f"[parse_eval_metrics] Warning: could not parse eval_info.json: {exc}", file=sys.stderr)
    else:
        print(
            f"[parse_eval_metrics] Warning: {eval_info_path} not found — eval may have failed.",
            file=sys.stderr,
        )
    task_descriptions: dict[str, str] = {}
    task_desc_path = artifacts_dir / "task_descriptions.json"
    if task_desc_path.exists():
        try:
            task_descriptions = json.loads(task_desc_path.read_text())
        except json.JSONDecodeError as exc:
            print(
                f"[parse_eval_metrics] Warning: could not parse task_descriptions.json: {exc}",
                file=sys.stderr,
            )
    metrics = {
        "env": args.env,
        "task": args.task,
        "policy": args.policy,
        "pc_success": pc_success,
        "n_episodes": n_episodes,
        "avg_sum_reward": avg_sum_reward,
        "eval_s": eval_s,
        "task_descriptions": task_descriptions,
    }
    out_path = artifacts_dir / "metrics.json"
    out_path.write_text(json.dumps(metrics, indent=2))
    print(f"[parse_eval_metrics] Written: {out_path}")
    print(json.dumps(metrics, indent=2))
    return 0
 if __name__ == "__main__":
    sys.exit(main())
--- a/spaces/benchmark-leaderboard/README.md
+++ b/spaces/benchmark-leaderboard/README.md
@@ -0,0 +1,27 @@
 ---
 title: LeRobot Benchmark Leaderboard
 emoji: 🤖
 colorFrom: yellow
 colorTo: orange
 sdk: gradio
 sdk_version: 5.29.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 short_description: Benchmark history for LeRobot policy x benchmark runs
 ---
 # LeRobot Benchmark Leaderboard
 This Space reads immutable benchmark rows from a Hugging Face dataset and shows:
 - Latest result per policy and benchmark
 - Historical trends over time
 - Direct links to uploaded eval and config artifacts
 ## Configuration
 Set `BENCHMARK_RESULTS_REPO` in the Space settings if you want to point the UI
 at a different public dataset. The default is:
 - `lerobot/benchmark-history`
--- a/spaces/benchmark-leaderboard/app.py
+++ b/spaces/benchmark-leaderboard/app.py
@@ -0,0 +1,226 @@
 # Copyright 2026 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import annotations
 import json
 import os
 import time
 from pathlib import Path
 from typing import Any
 import gradio as gr
 import pandas as pd
 import plotly.express as px
 from huggingface_hub import HfApi, hf_hub_download
 RESULTS_REPO = os.environ.get("BENCHMARK_RESULTS_REPO", "lerobot/benchmark-history")
 CACHE_DIR = Path("/tmp/benchmark-leaderboard-cache")
 CACHE_DIR.mkdir(parents=True, exist_ok=True)
 CACHE_TTL_S = 300
 _CACHE: dict[str, tuple[float, pd.DataFrame]] = {}
 def _row_to_record(row: dict[str, Any]) -> dict[str, Any]:
    overall = row.get("eval", {}).get("overall", {})
    resources = row.get("resources", {})
    timings = row.get("timings", {})
    artifact_urls = row.get("artifact_urls", {})
    return {
        "created_at": row.get("created_at"),
        "benchmark": row.get("benchmark"),
        "policy": row.get("policy"),
        "success_rate": overall.get("pc_success"),
        "n_episodes": overall.get("n_episodes"),
        "avg_sum_reward": overall.get("avg_sum_reward"),
        "train_wall_time_s": timings.get("train_wall_time_s"),
        "eval_wall_time_s": timings.get("eval_wall_time_s"),
        "total_wall_time_s": timings.get("total_wall_time_s"),
        "num_gpus": resources.get("num_gpus"),
        "microbatch_per_gpu": resources.get("microbatch_per_gpu"),
        "gradient_accumulation_steps": resources.get("gradient_accumulation_steps"),
        "effective_batch_size": resources.get("effective_batch_size"),
        "git_commit": row.get("git_commit"),
        "row_url": artifact_urls.get("row"),
        "eval_info_url": artifact_urls.get("eval_info"),
        "train_config_url": artifact_urls.get("train_config"),
    }
 def load_rows(repo_id: str = RESULTS_REPO) -> pd.DataFrame:
    cache_key = f"rows::{repo_id}"
    cached = _CACHE.get(cache_key)
    if cached is not None and (time.monotonic() - cached[0]) < CACHE_TTL_S:
        return cached[1]
    api = HfApi()
    files = [path for path in api.list_repo_files(repo_id=repo_id, repo_type="dataset") if path.startswith("rows/")]
    records: list[dict[str, Any]] = []
    for path_in_repo in sorted(files, reverse=True):
        local_path = hf_hub_download(repo_id=repo_id, repo_type="dataset", filename=path_in_repo, cache_dir=CACHE_DIR)
        with open(local_path) as f:
            row = json.load(f)
        records.append(_row_to_record(row))
    df = pd.DataFrame.from_records(records)
    if not df.empty:
        df["created_at"] = pd.to_datetime(df["created_at"], utc=True)
        df = df.sort_values("created_at", ascending=False).reset_index(drop=True)
    _CACHE[cache_key] = (time.monotonic(), df)
    return df
 def make_latest_table(df: pd.DataFrame) -> pd.DataFrame:
    if df.empty:
        return df
    latest = (
        df.sort_values("created_at", ascending=False)
        .groupby(["benchmark", "policy"], as_index=False)
        .first()
        .sort_values(["benchmark", "success_rate"], ascending=[True, False], na_position="last")
    )
    return latest[
        [
            "benchmark",
            "policy",
            "success_rate",
            "n_episodes",
            "train_wall_time_s",
            "eval_wall_time_s",
            "num_gpus",
            "effective_batch_size",
            "git_commit",
            "row_url",
            "eval_info_url",
            "train_config_url",
        ]
    ]
 def make_history_figure(df: pd.DataFrame, benchmark: str, policy: str | None) -> Any:
    filtered = df[df["benchmark"] == benchmark]
    if policy and policy != "All":
        filtered = filtered[filtered["policy"] == policy]
    if filtered.empty:
        return px.line(title="No benchmark rows found")
    fig = px.line(
        filtered.sort_values("created_at"),
        x="created_at",
        y="success_rate",
        color="policy",
        markers=True,
        hover_data=["git_commit", "num_gpus", "train_wall_time_s", "eval_wall_time_s"],
        title=f"{benchmark} success rate history",
    )
    fig.update_layout(yaxis_title="Success rate (%)", xaxis_title="Run time")
    return fig
 def make_run_markdown(df: pd.DataFrame, benchmark: str, policy: str | None) -> str:
    filtered = df[df["benchmark"] == benchmark]
    if policy and policy != "All":
        filtered = filtered[filtered["policy"] == policy]
    if filtered.empty:
        return "No matching runs yet."
    latest = filtered.sort_values("created_at", ascending=False).iloc[0]
    row_link = latest["row_url"] if pd.notna(latest["row_url"]) else None
    eval_link = latest["eval_info_url"] if pd.notna(latest["eval_info_url"]) else None
    train_link = latest["train_config_url"] if pd.notna(latest["train_config_url"]) else None
    lines = [
        f"Latest run: `{latest['policy']}` on `{latest['benchmark']}`",
        f"Success rate: `{latest['success_rate']}`",
        f"GPUs: `{latest['num_gpus']}`",
        f"Effective batch size: `{latest['effective_batch_size']}`",
        f"Commit: `{latest['git_commit']}`",
    ]
    if row_link:
        lines.append(f"Row JSON: [open]({row_link})")
    if eval_link:
        lines.append(f"Eval Info: [open]({eval_link})")
    if train_link:
        lines.append(f"Train Config: [open]({train_link})")
    return "\n\n".join(lines)
 def refresh_view(benchmark: str, policy: str) -> tuple[pd.DataFrame, dict[str, Any], Any, str]:
    df = load_rows()
    latest_table = make_latest_table(df)
    benchmark_names = sorted(df["benchmark"].dropna().unique().tolist()) if not df.empty else []
    if benchmark not in benchmark_names and benchmark_names:
        benchmark = benchmark_names[0]
    policy_choices = ["All"]
    if benchmark and not df.empty:
        policy_choices.extend(sorted(df[df["benchmark"] == benchmark]["policy"].dropna().unique().tolist()))
    if policy not in policy_choices:
        policy = "All"
    history = make_history_figure(df, benchmark, policy)
    summary = make_run_markdown(df, benchmark, policy)
    return latest_table, gr.update(choices=policy_choices, value=policy), history, summary
 with gr.Blocks(title="LeRobot Benchmark Leaderboard") as demo:
    gr.Markdown(
        f"""
 # LeRobot Benchmark Leaderboard
 Results dataset: [`{RESULTS_REPO}`](https://huggingface.co/datasets/{RESULTS_REPO})
 """
    )
    with gr.Row():
        benchmark_dropdown = gr.Dropdown(label="Benchmark", choices=[])
        policy_dropdown = gr.Dropdown(label="Policy", choices=["All"], value="All")
        refresh_button = gr.Button("Refresh")
    latest_table = gr.Dataframe(label="Latest Results", interactive=False)
    history_plot = gr.Plot(label="History")
    latest_summary = gr.Markdown()
    def _initial_state():
        df = load_rows()
        benchmarks = sorted(df["benchmark"].dropna().unique().tolist()) if not df.empty else []
        benchmark = benchmarks[0] if benchmarks else ""
        latest, policy_choices, history, summary = refresh_view(benchmark, "All")
        return (
            gr.update(choices=benchmarks, value=benchmark),
            policy_choices,
            latest,
            history,
            summary,
        )
    demo.load(
        _initial_state,
        outputs=[benchmark_dropdown, policy_dropdown, latest_table, history_plot, latest_summary],
    )
    refresh_button.click(
        refresh_view,
        inputs=[benchmark_dropdown, policy_dropdown],
        outputs=[latest_table, policy_dropdown, history_plot, latest_summary],
    )
    benchmark_dropdown.change(
        refresh_view,
        inputs=[benchmark_dropdown, policy_dropdown],
        outputs=[latest_table, policy_dropdown, history_plot, latest_summary],
    )
    policy_dropdown.change(
        refresh_view,
        inputs=[benchmark_dropdown, policy_dropdown],
        outputs=[latest_table, policy_dropdown, history_plot, latest_summary],
    )
 if __name__ == "__main__":
    demo.launch()
--- a/spaces/benchmark-leaderboard/requirements.txt
+++ b/spaces/benchmark-leaderboard/requirements.txt
@@ -0,0 +1,4 @@
 gradio>=5.0.0,<6.0.0
 plotly>=5.18.0
 pandas>=2.0.0
 huggingface-hub>=1.0.0,<2.0.0
--- a/src/lerobot/init.py
+++ b/src/lerobot/init.py
@@ -13,188 +13,39 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-This file contains lists of available environments, dataset and policies to reflect the current state of LeRobot library.
+LeRobot -- PyTorch library for real-world robotics.
 We do not want to import all the dependencies, but instead we keep it lightweight to ensure fast access to these variables.
-Example:
+Provides datasets, pretrained policies, and tools for training, evaluation,
-    ```python
+data collection, and robot control. Integrates with Hugging Face Hub for
-        import lerobot
+model and dataset sharing.
        print(lerobot.available_envs)
        print(lerobot.available_tasks_per_env)
        print(lerobot.available_datasets)
        print(lerobot.available_datasets_per_env)
        print(lerobot.available_real_world_datasets)
        print(lerobot.available_policies)
        print(lerobot.available_policies_per_env)
        print(lerobot.available_robots)
        print(lerobot.available_cameras)
        print(lerobot.available_motors)
    ```
-When implementing a new dataset loadable with LeRobotDataset follow these steps:
+The base install is intentionally lightweight. Feature-specific dependencies
- Update `available_datasets_per_env` in `lerobot/__init__.py`
+are gated behind optional extras::
-When implementing a new environment (e.g. `gym_aloha`), follow these steps:
+    pip install 'lerobot[dataset]'       # dataset loading & creation
- Update `available_tasks_per_env` and `available_datasets_per_env` in `lerobot/__init__.py`
+    pip install 'lerobot[training]'      # training loop + wandb
-
+    pip install 'lerobot[hardware]'      # real robot control
-When implementing a new policy class (e.g. `DiffusionPolicy`) follow these steps:
+    pip install 'lerobot[core_scripts]'  # dataset + hardware + viz (record, replay, calibrate, etc.)
- Update `available_policies` and `available_policies_per_env`, in `lerobot/__init__.py`
+    pip install 'lerobot[all]'           # everything
 - Set the required `name` class attribute.
 - Update variables in `tests/test_available.py` by importing your new Policy class
 """
-import itertools
+from lerobot.__version__ import __version__
-from lerobot.__version__ import __version__  # noqa: F401
+# Maps optional extras to the CLI entry-points they unlock.
-
+available_extras: dict[str, list[str]] = {
-# TODO(rcadene): Improve policies and envs. As of now, an item in `available_policies`
+    "dataset": ["lerobot-dataset-viz", "lerobot-imgtransform-viz", "lerobot-edit-dataset"],
-# refers to a yaml file AND a modeling name. Same for `available_envs` which refers to
+    "training": ["lerobot-train"],
-# a yaml file AND a environment name. The difference should be more obvious.
+    "hardware": [
-available_tasks_per_env = {
+        "lerobot-calibrate",
-    "aloha": [
+        "lerobot-find-port",
-        "AlohaInsertion-v0",
+        "lerobot-find-cameras",
-        "AlohaTransferCube-v0",
+        "lerobot-find-joint-limits",
        "lerobot-setup-motors",
    ],
-    "pusht": ["PushT-v0"],
+    "core_scripts": ["lerobot-record", "lerobot-replay", "lerobot-teleoperate"],
-}
+    "evaluation": ["lerobot-eval"],
 available_envs = list(available_tasks_per_env.keys())
 available_datasets_per_env = {
    "aloha": [
        "lerobot/aloha_sim_insertion_human",
        "lerobot/aloha_sim_insertion_scripted",
        "lerobot/aloha_sim_transfer_cube_human",
        "lerobot/aloha_sim_transfer_cube_scripted",
        "lerobot/aloha_sim_insertion_human_image",
        "lerobot/aloha_sim_insertion_scripted_image",
        "lerobot/aloha_sim_transfer_cube_human_image",
        "lerobot/aloha_sim_transfer_cube_scripted_image",
    ],
    # TODO(alexander-soare): Add "lerobot/pusht_keypoints". Right now we can't because this is too tightly
    # coupled with tests.
    "pusht": ["lerobot/pusht", "lerobot/pusht_image"],
 }
-available_real_world_datasets = [
+__all__ = ["__version__", "available_extras"]
    "lerobot/aloha_mobile_cabinet",
    "lerobot/aloha_mobile_chair",
    "lerobot/aloha_mobile_elevator",
    "lerobot/aloha_mobile_shrimp",
    "lerobot/aloha_mobile_wash_pan",
    "lerobot/aloha_mobile_wipe_wine",
    "lerobot/aloha_static_battery",
    "lerobot/aloha_static_candy",
    "lerobot/aloha_static_coffee",
    "lerobot/aloha_static_coffee_new",
    "lerobot/aloha_static_cups_open",
    "lerobot/aloha_static_fork_pick_up",
    "lerobot/aloha_static_pingpong_test",
    "lerobot/aloha_static_pro_pencil",
    "lerobot/aloha_static_screw_driver",
    "lerobot/aloha_static_tape",
    "lerobot/aloha_static_thread_velcro",
    "lerobot/aloha_static_towel",
    "lerobot/aloha_static_vinh_cup",
    "lerobot/aloha_static_vinh_cup_left",
    "lerobot/aloha_static_ziploc_slide",
    "lerobot/umi_cup_in_the_wild",
    "lerobot/unitreeh1_fold_clothes",
    "lerobot/unitreeh1_rearrange_objects",
    "lerobot/unitreeh1_two_robot_greeting",
    "lerobot/unitreeh1_warehouse",
    "lerobot/nyu_rot_dataset",
    "lerobot/utokyo_saytap",
    "lerobot/imperialcollege_sawyer_wrist_cam",
    "lerobot/utokyo_xarm_bimanual",
    "lerobot/tokyo_u_lsmo",
    "lerobot/utokyo_pr2_opening_fridge",
    "lerobot/cmu_franka_exploration_dataset",
    "lerobot/cmu_stretch",
    "lerobot/asu_table_top",
    "lerobot/utokyo_pr2_tabletop_manipulation",
    "lerobot/utokyo_xarm_pick_and_place",
    "lerobot/ucsd_kitchen_dataset",
    "lerobot/austin_buds_dataset",
    "lerobot/dlr_sara_grid_clamp",
    "lerobot/conq_hose_manipulation",
    "lerobot/columbia_cairlab_pusht_real",
    "lerobot/dlr_sara_pour",
    "lerobot/dlr_edan_shared_control",
    "lerobot/ucsd_pick_and_place_dataset",
    "lerobot/berkeley_cable_routing",
    "lerobot/nyu_franka_play_dataset",
    "lerobot/austin_sirius_dataset",
    "lerobot/cmu_play_fusion",
    "lerobot/berkeley_gnm_sac_son",
    "lerobot/nyu_door_opening_surprising_effectiveness",
    "lerobot/berkeley_fanuc_manipulation",
    "lerobot/jaco_play",
    "lerobot/viola",
    "lerobot/kaist_nonprehensile",
    "lerobot/berkeley_mvp",
    "lerobot/uiuc_d3field",
    "lerobot/berkeley_gnm_recon",
    "lerobot/austin_sailor_dataset",
    "lerobot/utaustin_mutex",
    "lerobot/roboturk",
    "lerobot/stanford_hydra_dataset",
    "lerobot/berkeley_autolab_ur5",
    "lerobot/stanford_robocook",
    "lerobot/toto",
    "lerobot/fmb",
    "lerobot/droid_100",
    "lerobot/berkeley_rpt",
    "lerobot/stanford_kuka_multimodal_dataset",
    "lerobot/iamlab_cmu_pickup_insert",
    "lerobot/taco_play",
    "lerobot/berkeley_gnm_cory_hall",
    "lerobot/usc_cloth_sim",
 ]
 available_datasets = sorted(
    set(itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets))
 )
 # lists all available policies from `lerobot/policies`
 available_policies = ["act", "diffusion", "tdmpc", "vqbet"]
 # lists all available robots from `lerobot/robots`
 available_robots = [
    "koch",
    "koch_bimanual",
    "aloha",
    "so100",
    "so101",
 ]
 # lists all available cameras from `lerobot/cameras`
 available_cameras = [
    "opencv",
    "intelrealsense",
 ]
 # lists all available motors from `lerobot/motors`
 available_motors = [
    "dynamixel",
    "feetech",
 ]
 # keys and values refer to yaml files
 available_policies_per_env = {
    "aloha": ["act"],
    "pusht": ["diffusion", "vqbet"],
    "koch_real": ["act_koch_real"],
    "aloha_real": ["act_aloha_real"],
 }
 env_task_pairs = [(env, task) for env, tasks in available_tasks_per_env.items() for task in tasks]
 env_dataset_pairs = [
    (env, dataset) for env, datasets in available_datasets_per_env.items() for dataset in datasets
 ]
 env_dataset_policy_triplets = [
    (env, dataset, policy)
    for env, datasets in available_datasets_per_env.items()
    for dataset in datasets
    for policy in available_policies_per_env[env]
 ]
--- a/src/lerobot/async_inference/init.py
+++ b/src/lerobot/async_inference/init.py
@@ -0,0 +1,30 @@
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Async inference server/client.
 Requires: ``pip install 'lerobot[async]'``
 Available modules (import directly)::
    from lerobot.async_inference.policy_server import ...
    from lerobot.async_inference.robot_client import ...
 """
 from lerobot.utils.import_utils import require_package
 require_package("grpcio", extra="async", import_name="grpc")
 __all__: list[str] = []
--- a/src/lerobot/async_inference/helpers.py
+++ b/src/lerobot/async_inference/helpers.py
@@ -22,8 +22,7 @@ from typing import Any
 import torch
-from lerobot.configs.types import PolicyFeature
+from lerobot.configs import PolicyFeature
 from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features
 # NOTE: Configs need to be loaded for the client to be able to instantiate the policy config
 from lerobot.policies import (  # noqa: F401
@@ -36,6 +35,7 @@ from lerobot.policies import (  # noqa: F401
 )
 from lerobot.robots.robot import Robot
 from lerobot.utils.constants import OBS_IMAGES, OBS_STATE, OBS_STR
 from lerobot.utils.feature_utils import build_dataset_frame, hw_to_dataset_features
 from lerobot.utils.utils import init_logging
 Action = torch.Tensor
--- a/src/lerobot/async_inference/policy_server.py
+++ b/src/lerobot/async_inference/policy_server.py
@@ -38,7 +38,7 @@ import draccus
 import grpc
 import torch
-from lerobot.policies.factory import get_policy_class, make_pre_post_processors
+from lerobot.policies import get_policy_class, make_pre_post_processors
 from lerobot.processor import PolicyProcessorPipeline
 from lerobot.transport import (
    services_pb2,  # type: ignore
--- a/src/lerobot/async_inference/robot_client.py
+++ b/src/lerobot/async_inference/robot_client.py
@@ -47,8 +47,8 @@ import draccus
 import grpc
 import torch
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
+from lerobot.cameras.opencv import OpenCVCameraConfig  # noqa: F401
-from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
+from lerobot.cameras.realsense import RealSenseCameraConfig  # noqa: F401
 from lerobot.robots import (  # noqa: F401
    Robot,
    RobotConfig,
--- a/src/lerobot/cameras/init.py
+++ b/src/lerobot/cameras/init.py
@@ -15,3 +15,9 @@
 from .camera import Camera
 from .configs import CameraConfig, ColorMode, Cv2Backends, Cv2Rotation
 from .utils import make_cameras_from_configs
 # NOTE: Camera submodule configs and implementations (OpenCVCameraConfig, RealSenseCamera, etc.)
 # are intentionally NOT re-exported here to avoid pulling backend-specific dependencies.
 # Import from submodules: ``from lerobot.cameras.opencv import OpenCVCameraConfig``
 __all__ = ["Camera", "CameraConfig", "ColorMode", "Cv2Backends", "Cv2Rotation", "make_cameras_from_configs"]
--- a/src/lerobot/cameras/reachy2_camera/init.py
+++ b/src/lerobot/cameras/reachy2_camera/init.py
@@ -14,3 +14,5 @@
 from .configuration_reachy2_camera import Reachy2CameraConfig
 from .reachy2_camera import Reachy2Camera
 __all__ = ["Reachy2Camera", "Reachy2CameraConfig"]
--- a/src/lerobot/cameras/realsense/init.py
+++ b/src/lerobot/cameras/realsense/init.py
@@ -14,3 +14,5 @@
 from .camera_realsense import RealSenseCamera
 from .configuration_realsense import RealSenseCameraConfig
 __all__ = ["RealSenseCamera", "RealSenseCameraConfig"]
--- a/src/lerobot/cameras/zmq/image_server.py
+++ b/src/lerobot/cameras/zmq/image_server.py
@@ -31,8 +31,8 @@ import cv2
 import numpy as np
 import zmq
-from lerobot.cameras.configs import ColorMode
+from ..configs import ColorMode
-from lerobot.cameras.opencv import OpenCVCamera, OpenCVCameraConfig
+from ..opencv import OpenCVCamera, OpenCVCameraConfig
 logger = logging.getLogger(__name__)
--- a/src/lerobot/common/init.py
+++ b/src/lerobot/common/init.py
@@ -0,0 +1,30 @@
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Cross-cutting modules that bridge multiple lerobot packages.
 Unlike ``lerobot.utils`` (which must remain dependency-free), modules here
 are allowed to import from ``lerobot.policies``, ``lerobot.processor``,
 ``lerobot.configs``, etc.  They are deliberately NOT re-exported from the
 top-level ``lerobot`` package.
 Available modules (import directly)::
    from lerobot.common.control_utils import predict_action, ...
    from lerobot.common.train_utils import save_checkpoint, ...
    from lerobot.common.wandb_utils import WandBLogger, ...
 """
 __all__: list[str] = []
--- a/src/lerobot/common/control_utils.py
+++ b/src/lerobot/common/control_utils.py
@@ -12,26 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import annotations
 ########################################################################################
 # Utilities
 ########################################################################################
 import logging
 import traceback
 from contextlib import nullcontext
 from copy import copy
 from functools import cache
-from typing import Any
+from typing import TYPE_CHECKING, Any
 import numpy as np
 import torch
 from deepdiff import DeepDiff
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.policies import PreTrainedPolicy, prepare_observation_for_inference
-from lerobot.datasets.utils import DEFAULT_FEATURES
+
-from lerobot.policies.pretrained import PreTrainedPolicy
+if TYPE_CHECKING:
-from lerobot.policies.utils import prepare_observation_for_inference
+    from lerobot.datasets import LeRobotDataset
 from lerobot.processor import PolicyProcessorPipeline
 from lerobot.robots import Robot
 from lerobot.types import PolicyAction
@@ -218,6 +217,13 @@ def sanity_check_dataset_robot_compatibility(
    Raises:
        ValueError: If any of the checked metadata fields do not match.
    """
    from lerobot.utils.import_utils import require_package
    require_package("deepdiff", extra="hardware")
    from deepdiff import DeepDiff
    from lerobot.utils.constants import DEFAULT_FEATURES
    fields = [
        ("robot_type", dataset.meta.robot_type, robot.robot_type),
        ("fps", dataset.fps, fps),
--- a/src/lerobot/common/train_utils.py
+++ b/src/lerobot/common/train_utils.py
@@ -19,10 +19,13 @@ from torch.optim import Optimizer
 from torch.optim.lr_scheduler import LRScheduler
 from lerobot.configs.train import TrainPipelineConfig
-from lerobot.datasets.io_utils import load_json, write_json
+from lerobot.optim import (
-from lerobot.optim.optimizers import load_optimizer_state, save_optimizer_state
+    load_optimizer_state,
-from lerobot.optim.schedulers import load_scheduler_state, save_scheduler_state
+    load_scheduler_state,
-from lerobot.policies.pretrained import PreTrainedPolicy
+    save_optimizer_state,
    save_scheduler_state,
 )
 from lerobot.policies import PreTrainedPolicy
 from lerobot.processor import PolicyProcessorPipeline
 from lerobot.utils.constants import (
    CHECKPOINTS_DIR,
@@ -31,6 +34,7 @@ from lerobot.utils.constants import (
    TRAINING_STATE_DIR,
    TRAINING_STEP,
 )
 from lerobot.utils.io_utils import load_json, write_json
 from lerobot.utils.random_utils import load_rng_state, save_rng_state
--- a/src/lerobot/common/wandb_utils.py
+++ b/src/lerobot/common/wandb_utils.py
--- a/src/lerobot/configs/init.py
+++ b/src/lerobot/configs/init.py
@@ -0,0 +1,47 @@
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Public API for lerobot configuration types and base config classes.
 NOTE: TrainPipelineConfig, EvalPipelineConfig, and TrainRLServerPipelineConfig
 are intentionally NOT re-exported here to avoid circular dependencies
 (they import lerobot.envs and lerobot.policies at module level).
 Import them directly: ``from lerobot.configs.train import TrainPipelineConfig``
 """
 from .default import DatasetConfig, EvalConfig, PeftConfig, WandBConfig
 from .policies import PreTrainedConfig
 from .types import (
    FeatureType,
    NormalizationMode,
    PipelineFeatureType,
    PolicyFeature,
    RTCAttentionSchedule,
 )
 __all__ = [
    # Types
    "FeatureType",
    "NormalizationMode",
    "PipelineFeatureType",
    "PolicyFeature",
    "RTCAttentionSchedule",
    # Config classes
    "DatasetConfig",
    "EvalConfig",
    "PeftConfig",
    "PreTrainedConfig",
    "WandBConfig",
 ]
--- a/src/lerobot/configs/default.py
+++ b/src/lerobot/configs/default.py
@@ -16,8 +16,8 @@
 from dataclasses import dataclass, field
-from lerobot.datasets.transforms import ImageTransformsConfig
+from lerobot.transforms import ImageTransformsConfig
-from lerobot.datasets.video_utils import get_safe_default_codec
+from lerobot.utils.import_utils import get_safe_default_codec
@dataclass
@@ -67,11 +67,17 @@ class EvalConfig:
    # `batch_size` specifies the number of environments to use in a gym.vector.VectorEnv.
    # Set to 0 for auto-tuning based on available CPU cores and n_episodes.
    batch_size: int = 0
    # Number of rollout videos to save per evaluated task. Set to 0 to disable videos.
    max_episodes_rendered: int = 10
    # `use_async_envs` specifies whether to use asynchronous environments (multiprocessing).
    # Defaults to True; automatically downgraded to SyncVectorEnv when batch_size=1.
    use_async_envs: bool = True
    def __post_init__(self) -> None:
        if self.max_episodes_rendered < 0:
            raise ValueError(
                f"`max_episodes_rendered` must be non-negative, got {self.max_episodes_rendered}."
            )
        if self.batch_size == 0:
            self.batch_size = self._auto_batch_size()
        if self.batch_size > self.n_episodes:
--- a/src/lerobot/configs/eval.py
+++ b/src/lerobot/configs/eval.py
@@ -19,8 +19,9 @@ from pathlib import Path
 from lerobot import envs, policies  # noqa: F401
 from lerobot.configs import parser
-from lerobot.configs.default import EvalConfig
+
-from lerobot.configs.policies import PreTrainedConfig
+from .default import EvalConfig
 from .policies import PreTrainedConfig
 logger = getLogger(__name__)
--- a/src/lerobot/configs/policies.py
+++ b/src/lerobot/configs/policies.py
@@ -26,13 +26,13 @@ from huggingface_hub import hf_hub_download
 from huggingface_hub.constants import CONFIG_NAME
 from huggingface_hub.errors import HfHubHTTPError
-from lerobot.configs.types import FeatureType, PolicyFeature
+from lerobot.optim import LRSchedulerConfig, OptimizerConfig
 from lerobot.optim.optimizers import OptimizerConfig
 from lerobot.optim.schedulers import LRSchedulerConfig
 from lerobot.utils.constants import ACTION, OBS_STATE
 from lerobot.utils.device_utils import auto_select_torch_device, is_amp_available, is_torch_device_available
 from lerobot.utils.hub import HubMixin
 from .types import FeatureType, PolicyFeature
 T = TypeVar("T", bound="PreTrainedConfig")
 logger = getLogger(__name__)
--- a/src/lerobot/configs/train.py
+++ b/src/lerobot/configs/train.py
@@ -24,12 +24,12 @@ from huggingface_hub.errors import HfHubHTTPError
 from lerobot import envs
 from lerobot.configs import parser
-from lerobot.configs.default import DatasetConfig, EvalConfig, PeftConfig, WandBConfig
+from lerobot.optim import LRSchedulerConfig, OptimizerConfig
 from lerobot.configs.policies import PreTrainedConfig
 from lerobot.optim import OptimizerConfig
 from lerobot.optim.schedulers import LRSchedulerConfig
 from lerobot.utils.hub import HubMixin
 from .default import DatasetConfig, EvalConfig, PeftConfig, WandBConfig
 from .policies import PreTrainedConfig
 TRAIN_CONFIG_NAME = "train_config.json"
@@ -56,6 +56,7 @@ class TrainPipelineConfig(HubMixin):
    # Number of workers for the dataloader.
    num_workers: int = 4
    batch_size: int = 8
    gradient_accumulation_steps: int = 1
    steps: int = 100_000
    eval_freq: int = 20_000
    log_freq: int = 200
@@ -132,6 +133,11 @@ class TrainPipelineConfig(HubMixin):
        if isinstance(self.dataset.repo_id, list):
            raise NotImplementedError("LeRobotMultiDataset is not currently implemented.")
        if self.gradient_accumulation_steps <= 0:
            raise ValueError(
                f"`gradient_accumulation_steps` must be strictly positive, got {self.gradient_accumulation_steps}."
            )
        if not self.use_policy_training_preset and (self.optimizer is None or self.scheduler is None):
            raise ValueError("Optimizer and Scheduler must be set when the policy presets are not used.")
        elif self.use_policy_training_preset and not self.resume:
--- a/src/lerobot/data_processing/init.py
+++ b/src/lerobot/data_processing/init.py
@@ -11,3 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Data processing utilities (annotation tools, dataset transformations).
 Available sub-modules (import directly)::
    from lerobot.data_processing.sarm_annotations import ...
 """
 __all__: list[str] = []
--- a/src/lerobot/data_processing/sarm_annotations/init.py
+++ b/src/lerobot/data_processing/sarm_annotations/init.py
@@ -11,3 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 SARM subtask annotation tools.
 Available modules (import directly)::
    from lerobot.data_processing.sarm_annotations.subtask_annotation import ...
 """
 __all__: list[str] = []
--- a/src/lerobot/data_processing/sarm_annotations/subtask_annotation.py
+++ b/src/lerobot/data_processing/sarm_annotations/subtask_annotation.py
@@ -76,7 +76,7 @@ import torch
 from pydantic import BaseModel, Field
 from transformers import AutoProcessor, Qwen3VLMoeForConditionalGeneration
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset
 # Pydantic Models for SARM Subtask Annotation
@@ -746,8 +746,7 @@ def save_annotations_to_dataset(
    dataset_path: Path, annotations: dict[int, SubtaskAnnotation], fps: int, prefix: str = "sparse"
 ):
    """Save annotations to LeRobot dataset parquet format."""
-    from lerobot.datasets.io_utils import load_episodes
+    from lerobot.datasets import DEFAULT_EPISODES_PATH, load_episodes
    from lerobot.datasets.utils import DEFAULT_EPISODES_PATH
    episodes_dataset = load_episodes(dataset_path)
    if not episodes_dataset or len(episodes_dataset) == 0:
@@ -841,7 +840,7 @@ def generate_auto_sparse_annotations(
 def load_annotations_from_dataset(dataset_path: Path, prefix: str = "sparse") -> dict[int, SubtaskAnnotation]:
    """Load annotations from LeRobot dataset parquet files."""
-    from lerobot.datasets.io_utils import load_episodes
+    from lerobot.datasets import load_episodes
    episodes_dataset = load_episodes(dataset_path)
    if not episodes_dataset or len(episodes_dataset) == 0:
--- a/src/lerobot/datasets/init.py
+++ b/src/lerobot/datasets/init.py
@@ -15,19 +15,68 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.utils.import_utils import require_package
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
-from lerobot.datasets.multi_dataset import MultiLeRobotDataset
+require_package("datasets", extra="dataset")
-from lerobot.datasets.sampler import EpisodeAwareSampler
+require_package("av", extra="dataset")
-from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset
+
-from lerobot.datasets.transforms import ImageTransforms, ImageTransformsConfig
+from .aggregate import aggregate_datasets
 from .compute_stats import DEFAULT_QUANTILES, aggregate_stats, get_feature_stats
 from .dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata
 from .dataset_tools import (
    add_features,
    convert_image_to_video_dataset,
    delete_episodes,
    merge_datasets,
    modify_features,
    modify_tasks,
    recompute_stats,
    remove_feature,
    split_dataset,
 )
 from .factory import make_dataset, resolve_delta_timestamps
 from .image_writer import safe_stop_image_writer
 from .io_utils import load_episodes, write_stats
 from .lerobot_dataset import LeRobotDataset
 from .multi_dataset import MultiLeRobotDataset
 from .pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
 from .sampler import EpisodeAwareSampler
 from .streaming_dataset import StreamingLeRobotDataset
 from .utils import DEFAULT_EPISODES_PATH, create_lerobot_dataset_card
 from .video_utils import VideoEncodingManager
 # NOTE: Low-level I/O functions (cast_stats_to_numpy, get_parquet_file_size_in_mb, etc.)
 # and legacy migration constants are intentionally NOT re-exported here.
 # Import directly: ``from lerobot.datasets.io_utils import ...``
 __all__ = [
    "CODEBASE_VERSION",
    "DEFAULT_EPISODES_PATH",
    "DEFAULT_QUANTILES",
    "EpisodeAwareSampler",
    "ImageTransforms",
    "ImageTransformsConfig",
    "LeRobotDataset",
    "LeRobotDatasetMetadata",
    "MultiLeRobotDataset",
    "StreamingLeRobotDataset",
    "VideoEncodingManager",
    "add_features",
    "aggregate_datasets",
    "aggregate_pipeline_dataset_features",
    "aggregate_stats",
    "convert_image_to_video_dataset",
    "create_initial_features",
    "create_lerobot_dataset_card",
    "delete_episodes",
    "get_feature_stats",
    "load_episodes",
    "make_dataset",
    "merge_datasets",
    "modify_features",
    "modify_tasks",
    "recompute_stats",
    "remove_feature",
    "resolve_delta_timestamps",
    "safe_stop_image_writer",
    "split_dataset",
    "write_stats",
 ]
--- a/src/lerobot/datasets/aggregate.py
+++ b/src/lerobot/datasets/aggregate.py
@@ -23,10 +23,10 @@ import datasets
 import pandas as pd
 import tqdm
-from lerobot.datasets.compute_stats import aggregate_stats
+from .compute_stats import aggregate_stats
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from .dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.feature_utils import get_hf_features_from_features
+from .feature_utils import get_hf_features_from_features
-from lerobot.datasets.io_utils import (
+from .io_utils import (
    get_file_size_in_mb,
    get_parquet_file_size_in_mb,
    to_parquet_with_hf_images,
@@ -34,7 +34,7 @@ from lerobot.datasets.io_utils import (
    write_stats,
    write_tasks,
 )
-from lerobot.datasets.utils import (
+from .utils import (
    DEFAULT_CHUNK_SIZE,
    DEFAULT_DATA_FILE_SIZE_IN_MB,
    DEFAULT_DATA_PATH,
@@ -43,7 +43,7 @@ from lerobot.datasets.utils import (
    DEFAULT_VIDEO_PATH,
    update_chunk_file_indices,
 )
-from lerobot.datasets.video_utils import concatenate_video_files, get_video_duration_in_s
+from .video_utils import concatenate_video_files, get_video_duration_in_s
 def validate_all_metadata(all_metadata: list[LeRobotDatasetMetadata]):
--- a/src/lerobot/datasets/compute_stats.py
+++ b/src/lerobot/datasets/compute_stats.py
@@ -19,9 +19,11 @@ import logging
 import numpy as np
-from lerobot.datasets.io_utils import load_image_as_numpy
+from lerobot.processor import RelativeActionsProcessorStep
 from lerobot.utils.constants import ACTION, OBS_STATE
 from .io_utils import load_image_as_numpy
 DEFAULT_QUANTILES = [0.01, 0.10, 0.50, 0.90, 0.99]
@@ -696,8 +698,6 @@ def compute_relative_action_stats(
        ValueError: If the dataset has fewer frames than ``chunk_size``.
        RuntimeError: If no valid (single-episode) chunks are found.
    """
    from lerobot.processor.relative_action_processor import RelativeActionsProcessorStep
    if exclude_joints is None:
        exclude_joints = []
--- a/src/lerobot/datasets/dataset_metadata.py
+++ b/src/lerobot/datasets/dataset_metadata.py
@@ -23,9 +23,13 @@ import pyarrow as pa
 import pyarrow.parquet as pq
 from huggingface_hub import snapshot_download
-from lerobot.datasets.compute_stats import aggregate_stats
+from lerobot.utils.constants import DEFAULT_FEATURES, HF_LEROBOT_HOME, HF_LEROBOT_HUB_CACHE
-from lerobot.datasets.feature_utils import _validate_feature_names, create_empty_dataset_info
+from lerobot.utils.feature_utils import _validate_feature_names
-from lerobot.datasets.io_utils import (
+from lerobot.utils.utils import flatten_dict
 from .compute_stats import aggregate_stats
 from .feature_utils import create_empty_dataset_info
 from .io_utils import (
    get_file_size_in_mb,
    load_episodes,
    load_info,
@@ -37,19 +41,16 @@ from lerobot.datasets.io_utils import (
    write_stats,
    write_tasks,
 )
-from lerobot.datasets.utils import (
+from .utils import (
    DEFAULT_EPISODES_PATH,
    DEFAULT_FEATURES,
    INFO_PATH,
    check_version_compatibility,
    flatten_dict,
    get_safe_version,
    has_legacy_hub_download_metadata,
    is_valid_version,
    update_chunk_file_indices,
 )
-from lerobot.datasets.video_utils import get_video_info
+from .video_utils import get_video_info
 from lerobot.utils.constants import HF_LEROBOT_HOME, HF_LEROBOT_HUB_CACHE
 CODEBASE_VERSION = "v3.0"
@@ -180,6 +181,16 @@ class LeRobotDatasetMetadata:
        self.episodes = load_episodes(self.root)
        self.stats = load_stats(self.root)
    def ensure_readable(self) -> None:
        """Guarantee metadata is fully loaded for read operations.
        Idempotent — when metadata is already in memory this is a single
        ``is None`` check.  Call this before transitioning from write to
        read mode on the same instance.
        """
        if self.episodes is None:
            self._load_metadata()
    def _pull_from_repo(
        self,
        allow_patterns: list[str] | str | None = None,
--- a/src/lerobot/datasets/dataset_reader.py
+++ b/src/lerobot/datasets/dataset_reader.py
@@ -21,17 +21,17 @@ from pathlib import Path
 import datasets
 import torch
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from .dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.feature_utils import (
+from .feature_utils import (
    check_delta_timestamps,
    get_delta_indices,
    get_hf_features_from_features,
 )
-from lerobot.datasets.io_utils import (
+from .io_utils import (
    hf_transform_to_torch,
    load_nested_dataset,
 )
-from lerobot.datasets.video_utils import decode_video_frames
+from .video_utils import decode_video_frames
 class DatasetReader:
--- a/src/lerobot/datasets/dataset_tools.py
+++ b/src/lerobot/datasets/dataset_tools.py
@@ -36,22 +36,25 @@ import pyarrow.parquet as pq
 import torch
 from tqdm import tqdm
-from lerobot.datasets.aggregate import aggregate_datasets
+from lerobot.utils.constants import ACTION, HF_LEROBOT_HOME, OBS_IMAGE, OBS_STATE
-from lerobot.datasets.compute_stats import (
+from lerobot.utils.utils import flatten_dict
 from .aggregate import aggregate_datasets
 from .compute_stats import (
    aggregate_stats,
    compute_episode_stats,
    compute_relative_action_stats,
 )
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from .dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.io_utils import (
+from .io_utils import (
    get_parquet_file_size_in_mb,
    load_episodes,
    write_info,
    write_stats,
    write_tasks,
 )
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from .lerobot_dataset import LeRobotDataset
-from lerobot.datasets.utils import (
+from .utils import (
    DATA_DIR,
    DEFAULT_CHUNK_SIZE,
    DEFAULT_DATA_FILE_SIZE_IN_MB,
@@ -59,8 +62,7 @@ from lerobot.datasets.utils import (
    DEFAULT_EPISODES_PATH,
    update_chunk_file_indices,
 )
-from lerobot.datasets.video_utils import encode_video_frames, get_video_info
+from .video_utils import encode_video_frames, get_video_info
 from lerobot.utils.constants import ACTION, HF_LEROBOT_HOME, OBS_IMAGE, OBS_STATE
 def _load_episode_with_stats(src_dataset: LeRobotDataset, episode_idx: int) -> dict:
@@ -829,8 +831,6 @@ def _copy_and_reindex_episodes_metadata(
        data_metadata: Dict mapping new episode index to its data file metadata
        video_metadata: Optional dict mapping new episode index to its video metadata
    """
    from lerobot.datasets.utils import flatten_dict
    if src_dataset.meta.episodes is None:
        src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
@@ -922,8 +922,8 @@ def _write_parquet(df: pd.DataFrame, path: Path, meta: LeRobotDatasetMetadata) -
    This ensures images are properly embedded and the file can be loaded correctly by HF datasets.
    """
-    from lerobot.datasets.feature_utils import get_hf_features_from_features
+    from .feature_utils import get_hf_features_from_features
-    from lerobot.datasets.io_utils import embed_images
+    from .io_utils import embed_images
    hf_features = get_hf_features_from_features(meta.features)
    ep_dataset = datasets.Dataset.from_dict(df.to_dict(orient="list"), features=hf_features, split="train")
@@ -1367,7 +1367,7 @@ def _copy_data_without_images(
        episode_indices: Episodes to include
        img_keys: Image keys to remove
    """
-    from lerobot.datasets.utils import DATA_DIR
+    from .utils import DATA_DIR
    data_dir = src_dataset.root / DATA_DIR
    parquet_files = sorted(data_dir.glob("*/*.parquet"))
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1 @@`
							`# Copyright 2026 The HuggingFace Inc. team. All rights reserved.`