fix(loading errors): improving dataset loading errors handling and logging

2026-06-01 11:21:27 +00:00 · 2026-04-09 17:43:42 +02:00
10 changed files with 5 additions and 766 deletions
--- a/.github/workflows/benchmark_tests.yml
+++ b/.github/workflows/benchmark_tests.yml
@@ -1,311 +0,0 @@
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Integration tests: build an isolated Docker image per benchmark and run a
-# 1-episode smoke eval. Each benchmark gets its own image so incompatible
-# dependency trees (e.g. hf-libero vs metaworld==3.0.0) can never collide.
-#
-# To add a new benchmark:
-#   1. Add docker/Dockerfile.benchmark.<name>  (install only lerobot[<name>])
-#   2. Copy one of the jobs below and adjust the image name and eval command.
-name: Benchmark Integration Tests
-
-on:
-  # Run manually from the Actions tab
-  workflow_dispatch:
-
-  # Run every Monday at 02:00 UTC.
-  schedule:
-    - cron: "0 2 * * 1"
-
-  push:
-    branches:
-      - main
-    paths:
-      - "src/lerobot/envs/**"
-      - "src/lerobot/scripts/lerobot_eval.py"
-      - "docker/Dockerfile.benchmark.*"
-      - ".github/workflows/benchmark_tests.yml"
-      - "pyproject.toml"
-
-  pull_request:
-    branches:
-      - main
-      - feat/benchmark-ci
-    paths:
-      - "src/lerobot/envs/**"
-      - "src/lerobot/scripts/lerobot_eval.py"
-      - "docker/Dockerfile.benchmark.*"
-      - ".github/workflows/benchmark_tests.yml"
-      - "pyproject.toml"
-
-permissions:
-  contents: read
-
-env:
-  UV_VERSION: "0.8.0"
-  PYTHON_VERSION: "3.12"
-
-# Cancel in-flight runs for the same branch/PR.
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  # ── LIBERO ────────────────────────────────────────────────────────────────
-  # Isolated image: lerobot[libero] only (hf-libero, dm-control, mujoco chain)
-  libero-integration-test:
-    name: Libero — build image + 1-episode eval
-    runs-on:
-      group: aws-g6-4xlarge-plus
-    env:
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
-
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-          lfs: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          cache-binary: false
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
-
-      # Build the benchmark-specific image. The Dockerfile separates dep-install
-      # from source-copy, so code-only changes skip the slow uv-sync layer
-      # when the runner has a warm Docker daemon cache.
-      - name: Build Libero benchmark image
-        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
-        with:
-          context: .
-          file: docker/Dockerfile.benchmark.libero
-          push: false
-          load: true
-          tags: lerobot-benchmark-libero:ci
-
-      - name: Run Libero smoke eval (1 episode)
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          # Named container (no --rm) so we can docker cp artifacts out.
-          # Output to /tmp inside the container — /artifacts doesn't exist
-          # and user_lerobot cannot create root-level dirs.
-          docker run --name libero-eval --gpus all \
-            --shm-size=4g \
-            -e HF_HOME=/tmp/hf \
-            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
-            lerobot-benchmark-libero:ci \
-            bash -c "
-              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
-              lerobot-eval \
-                --policy.path=pepijn223/smolvla_libero \
-                --env.type=libero \
-                --env.task=libero_spatial \
-                --eval.batch_size=1 \
-                --eval.n_episodes=1 \
-                --eval.use_async_envs=false \
-                --policy.device=cuda \
-                '--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \
-                --policy.empty_cameras=1 \
-                --output_dir=/tmp/eval-artifacts
-              python scripts/ci/extract_task_descriptions.py \
-                --env libero --task libero_spatial \
-                --output /tmp/eval-artifacts/task_descriptions.json
-            "
-
-      - name: Copy Libero artifacts from container
-        if: always()
-        run: |
-          mkdir -p /tmp/libero-artifacts
-          docker cp libero-eval:/tmp/eval-artifacts/. /tmp/libero-artifacts/ 2>/dev/null || true
-          docker rm -f libero-eval || true
-
-      - name: Parse Libero eval metrics
-        if: always()
-        run: |
-          python3 scripts/ci/parse_eval_metrics.py \
-            --artifacts-dir /tmp/libero-artifacts \
-            --env libero \
-            --task libero_spatial \
-            --policy pepijn223/smolvla_libero
-
-      - name: Upload Libero rollout video
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: libero-rollout-video
-          path: /tmp/libero-artifacts/videos/
-          if-no-files-found: warn
-
-      - name: Upload Libero eval metrics
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: libero-metrics
-          path: /tmp/libero-artifacts/metrics.json
-          if-no-files-found: warn
-
-      # ── LIBERO TRAIN+EVAL SMOKE ──────────────────────────────────────────────
-      # Train SmolVLA for 1 step (batch_size=1, dataset episode 0 only) then
-      # immediately runs eval inside the training loop (eval_freq=1, 1 episode).
-      # Tests the full train→eval-within-training pipeline end-to-end.
-      - name: Run Libero train+eval smoke (1 step, eval_freq=1)
-        run: |
-          docker run --name libero-train-smoke --gpus all \
-            --shm-size=4g \
-            -e HF_HOME=/tmp/hf \
-            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
-            lerobot-benchmark-libero:ci \
-            bash -c "
-              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
-              accelerate launch --num_processes=1 \$(which lerobot-train) \
-                --policy.path=lerobot/smolvla_base \
-                --policy.load_vlm_weights=true \
-                --policy.scheduler_decay_steps=25000 \
-                --policy.freeze_vision_encoder=false \
-                --policy.train_expert_only=false \
-                --dataset.repo_id=lerobot/libero \
-                --dataset.episodes=[0] \
-                --dataset.use_imagenet_stats=false \
-                --env.type=libero \
-                --env.task=libero_spatial \
-                '--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \
-                --policy.empty_cameras=1 \
-                --output_dir=/tmp/train-smoke \
-                --steps=1 \
-                --batch_size=1 \
-                --eval_freq=1 \
-                --eval.n_episodes=1 \
-                --eval.batch_size=1 \
-                --eval.use_async_envs=false \
-                --save_freq=1 \
-                --policy.push_to_hub=false \
-                '--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.image2\": \"observation.images.camera2\"}'
-            "
-
-      - name: Copy Libero train-smoke artifacts from container
-        if: always()
-        run: |
-          mkdir -p /tmp/libero-train-smoke-artifacts
-          docker cp libero-train-smoke:/tmp/train-smoke/. /tmp/libero-train-smoke-artifacts/ 2>/dev/null || true
-          docker rm -f libero-train-smoke || true
-
-      - name: Upload Libero train-smoke eval video
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: libero-train-smoke-video
-          path: /tmp/libero-train-smoke-artifacts/eval/
-          if-no-files-found: warn
-
-  # ── METAWORLD ─────────────────────────────────────────────────────────────
-  # Isolated image: lerobot[metaworld] only (metaworld==3.0.0, mujoco>=3 chain)
-  metaworld-integration-test:
-    name: MetaWorld — build image + 1-episode eval
-    runs-on:
-      group: aws-g6-4xlarge-plus
-    env:
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
-
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-          lfs: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          cache-binary: false
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
-
-      - name: Build MetaWorld benchmark image
-        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
-        with:
-          context: .
-          file: docker/Dockerfile.benchmark.metaworld
-          push: false
-          load: true
-          tags: lerobot-benchmark-metaworld:ci
-
-      - name: Run MetaWorld smoke eval (1 episode)
-        run: |
-          docker run --name metaworld-eval --gpus all \
-            --shm-size=4g \
-            -e HF_HOME=/tmp/hf \
-            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
-            lerobot-benchmark-metaworld:ci \
-            bash -c "
-              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
-              lerobot-eval \
-                --policy.path=pepijn223/smolvla_metaworld \
-                --env.type=metaworld \
-                --env.task=metaworld-push-v3 \
-                --eval.batch_size=1 \
-                --eval.n_episodes=1 \
-                --eval.use_async_envs=false \
-                --policy.device=cuda \
-                '--rename_map={\"observation.image\": \"observation.images.camera1\"}' \
-                --policy.empty_cameras=2 \
-                --output_dir=/tmp/eval-artifacts
-              python scripts/ci/extract_task_descriptions.py \
-                --env metaworld --task metaworld-push-v3 \
-                --output /tmp/eval-artifacts/task_descriptions.json
-            "
-
-      - name: Copy MetaWorld artifacts from container
-        if: always()
-        run: |
-          mkdir -p /tmp/metaworld-artifacts
-          docker cp metaworld-eval:/tmp/eval-artifacts/. /tmp/metaworld-artifacts/ 2>/dev/null || true
-          docker rm -f metaworld-eval || true
-
-      - name: Parse MetaWorld eval metrics
-        if: always()
-        run: |
-          python3 scripts/ci/parse_eval_metrics.py \
-            --artifacts-dir /tmp/metaworld-artifacts \
-            --env metaworld \
-            --task metaworld-push-v3 \
-            --policy pepijn223/smolvla_metaworld
-
-      - name: Upload MetaWorld rollout video
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: metaworld-rollout-video
-          path: /tmp/metaworld-artifacts/videos/
-          if-no-files-found: warn
-
-      - name: Upload MetaWorld eval metrics
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: metaworld-metrics
-          path: /tmp/metaworld-artifacts/metrics.json
-          if-no-files-found: warn
--- a/docker/Dockerfile.benchmark.libero
+++ b/docker/Dockerfile.benchmark.libero
@@ -1,99 +0,0 @@
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Isolated benchmark image for LIBERO integration tests.
-# Installs only lerobot[libero] so its dep tree (hf-libero, dm-control, mujoco)
-# cannot conflict with other benchmarks.
-#
-# Build:  docker build -f docker/Dockerfile.benchmark.libero -t lerobot-benchmark-libero .
-# Run:    docker run --gpus all --rm lerobot-benchmark-libero lerobot-eval ...
-
-ARG CUDA_VERSION=12.4.1
-ARG OS_VERSION=22.04
-FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}
-
-ARG PYTHON_VERSION=3.12
-
-ENV DEBIAN_FRONTEND=noninteractive \
-    MUJOCO_GL=egl \
-    PATH=/lerobot/.venv/bin:$PATH \
-    CUDA_VISIBLE_DEVICES=0 \
-    DEVICE=cuda
-
-# System deps — same set as Dockerfile.internal
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    software-properties-common build-essential git curl \
-    libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
-    libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \
-    cmake pkg-config ninja-build \
-    && add-apt-repository -y ppa:deadsnakes/ppa \
-    && apt-get update \
-    && apt-get install -y --no-install-recommends \
-       python${PYTHON_VERSION} \
-       python${PYTHON_VERSION}-venv \
-       python${PYTHON_VERSION}-dev \
-    && curl -LsSf https://astral.sh/uv/0.8.0/install.sh | sh \
-    && mv /root/.local/bin/uv /usr/local/bin/uv \
-    && useradd --create-home --shell /bin/bash user_lerobot \
-    && usermod -aG sudo user_lerobot \
-    && apt-get clean && rm -rf /var/lib/apt/lists/*
-
-WORKDIR /lerobot
-RUN chown -R user_lerobot:user_lerobot /lerobot
-USER user_lerobot
-
-ENV HOME=/home/user_lerobot \
-    HF_HOME=/home/user_lerobot/.cache/huggingface \
-    HF_LEROBOT_HOME=/home/user_lerobot/.cache/huggingface/lerobot \
-    TORCH_HOME=/home/user_lerobot/.cache/torch \
-    TRITON_CACHE_DIR=/home/user_lerobot/.cache/triton
-
-RUN uv venv --python python${PYTHON_VERSION}
-
-# ── Dependency layer (cached unless pyproject.toml / uv.lock change) ────────
-# Copy only the files uv needs to resolve deps, plus a minimal package stub
-# so the editable install can succeed without the full source tree.
-# Uses `uv pip install` instead of `uv sync` because uv sync validates the
-# entire lockfile across all extras — robomme's numpy<2.0 conflicts with the
-# base numpy>=2.0, making the full lockfile unsatisfiable. pip-style install
-# only resolves the requested extras for the current platform.
-COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
-RUN mkdir -p src/lerobot && touch src/lerobot/__init__.py src/lerobot/py.typed
-
-RUN uv pip install --no-cache -e ".[libero,smolvla]"
-
-# Pre-download lerobot/libero-assets from HF Hub so nothing is fetched at
-# runtime (which times out on CI). Point the libero config at the cached path.
-# libero/libero/__init__.py calls input() when ~/.libero/config.yaml is missing,
-# so we write the config before any libero import can happen.
-RUN LIBERO_DIR=$(python${PYTHON_VERSION} -c \
-      "import importlib.util, os; s=importlib.util.find_spec('libero'); \
-       print(os.path.join(os.path.dirname(s.origin), 'libero'))") && \
-    mkdir -p /home/user_lerobot/.libero && \
-    python${PYTHON_VERSION} -c "\
-from huggingface_hub import snapshot_download; \
-snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \
-                  local_dir='/home/user_lerobot/.libero/assets')" && \
-    printf "assets: /home/user_lerobot/.libero/assets\nbddl_files: ${LIBERO_DIR}/bddl_files\ndatasets: ${LIBERO_DIR}/../datasets\ninit_states: ${LIBERO_DIR}/init_files\n" \
-    > /home/user_lerobot/.libero/config.yaml
-
-# Workaround: Triton ships ptxas without the execute bit set.
-# Without this chmod, any JIT compilation (e.g. torch.compile) fails
-# with "Permission denied".
-RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
-
-# ── Source layer (rebuilds in seconds on code-only changes) ─────────────────
-COPY --chown=user_lerobot:user_lerobot . .
-
-CMD ["/bin/bash"]
--- a/docker/Dockerfile.benchmark.metaworld
+++ b/docker/Dockerfile.benchmark.metaworld
@@ -1,82 +0,0 @@
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Isolated benchmark image for MetaWorld integration tests.
-# Installs only lerobot[metaworld] so its dep tree (metaworld==3.0.0, mujoco>=3)
-# cannot conflict with other benchmarks.
-#
-# Build:  docker build -f docker/Dockerfile.benchmark.metaworld -t lerobot-benchmark-metaworld .
-# Run:    docker run --gpus all --rm lerobot-benchmark-metaworld lerobot-eval ...
-
-ARG CUDA_VERSION=12.4.1
-ARG OS_VERSION=22.04
-FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}
-
-ARG PYTHON_VERSION=3.12
-
-ENV DEBIAN_FRONTEND=noninteractive \
-    MUJOCO_GL=egl \
-    PATH=/lerobot/.venv/bin:$PATH \
-    CUDA_VISIBLE_DEVICES=0 \
-    DEVICE=cuda
-
-# System deps — same set as Dockerfile.internal
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    software-properties-common build-essential git curl \
-    libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
-    libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \
-    cmake pkg-config ninja-build \
-    && add-apt-repository -y ppa:deadsnakes/ppa \
-    && apt-get update \
-    && apt-get install -y --no-install-recommends \
-       python${PYTHON_VERSION} \
-       python${PYTHON_VERSION}-venv \
-       python${PYTHON_VERSION}-dev \
-    && curl -LsSf https://astral.sh/uv/0.8.0/install.sh | sh \
-    && mv /root/.local/bin/uv /usr/local/bin/uv \
-    && useradd --create-home --shell /bin/bash user_lerobot \
-    && usermod -aG sudo user_lerobot \
-    && apt-get clean && rm -rf /var/lib/apt/lists/*
-
-WORKDIR /lerobot
-RUN chown -R user_lerobot:user_lerobot /lerobot
-USER user_lerobot
-
-ENV HOME=/home/user_lerobot \
-    HF_HOME=/home/user_lerobot/.cache/huggingface \
-    HF_LEROBOT_HOME=/home/user_lerobot/.cache/huggingface/lerobot \
-    TORCH_HOME=/home/user_lerobot/.cache/torch \
-    TRITON_CACHE_DIR=/home/user_lerobot/.cache/triton
-
-RUN uv venv --python python${PYTHON_VERSION}
-
-# ── Dependency layer (cached unless pyproject.toml / uv.lock change) ────────
-# Copy only the files uv needs to resolve deps, plus a minimal package stub
-# so the editable install can succeed without the full source tree.
-# Uses `uv pip install` instead of `uv sync` — see Dockerfile.benchmark.libero
-# for rationale (cross-extra numpy conflict with robomme).
-COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
-RUN mkdir -p src/lerobot && touch src/lerobot/__init__.py src/lerobot/py.typed
-
-RUN uv pip install --no-cache -e ".[metaworld,smolvla]"
-
-# Workaround: Triton ships ptxas without the execute bit set.
-# Without this chmod, any JIT compilation (e.g. torch.compile) fails
-# with "Permission denied". See: https://github.com/triton-lang/triton/issues/2due
-RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
-
-# ── Source layer (rebuilds in seconds on code-only changes) ─────────────────
-COPY --chown=user_lerobot:user_lerobot . .
-
-CMD ["/bin/bash"]
--- a/scripts/ci/extract_task_descriptions.py
+++ b/scripts/ci/extract_task_descriptions.py
@@ -1,89 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Extract natural-language task descriptions for a benchmark suite.
-
-Runs inside the benchmark Docker container (where the env library is installed)
-immediately after lerobot-eval, writing a JSON file that parse_eval_metrics.py
-picks up and embeds in metrics.json.
-
-Output format: {"<suite>_<task_idx>": "<nl instruction>", ...}
-
-Usage:
-    python scripts/ci/extract_task_descriptions.py \\
-        --env libero --task libero_spatial \\
-        --output /tmp/eval-artifacts/task_descriptions.json
-"""
-
-from __future__ import annotations
-
-import argparse
-import json
-import sys
-from pathlib import Path
-
-
-def _libero_descriptions(task_suite: str) -> dict[str, str]:
-    from libero.libero import benchmark  # type: ignore[import-untyped]
-
-    suite_dict = benchmark.get_benchmark_dict()
-    if task_suite not in suite_dict:
-        print(
-            f"[extract_task_descriptions] Unknown LIBERO suite '{task_suite}'. "
-            f"Available: {list(suite_dict.keys())}",
-            file=sys.stderr,
-        )
-        return {}
-    suite = suite_dict[task_suite]()
-    return {f"{task_suite}_{i}": suite.get_task(i).language for i in range(suite.n_tasks)}
-
-
-def _metaworld_descriptions(task_name: str) -> dict[str, str]:
-    # MetaWorld tasks don't expose a separate NL description attribute;
-    # use a cleaned version of the task name as the description.
-    label = task_name.removeprefix("metaworld-").replace("-", " ").strip()
-    return {f"{task_name}_0": label}
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--env", required=True, help="Environment family (libero, metaworld, ...)")
-    parser.add_argument("--task", required=True, help="Task/suite name (e.g. libero_spatial)")
-    parser.add_argument("--output", required=True, help="Path to write task_descriptions.json")
-    args = parser.parse_args()
-
-    descriptions: dict[str, str] = {}
-    try:
-        if args.env == "libero":
-            descriptions = _libero_descriptions(args.task)
-        elif args.env == "metaworld":
-            descriptions = _metaworld_descriptions(args.task)
-        else:
-            print(
-                f"[extract_task_descriptions] No description extractor for env '{args.env}'.",
-                file=sys.stderr,
-            )
-    except Exception as exc:
-        print(f"[extract_task_descriptions] Warning: {exc}", file=sys.stderr)
-
-    out_path = Path(args.output)
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    out_path.write_text(json.dumps(descriptions, indent=2))
-    print(f"[extract_task_descriptions] {len(descriptions)} descriptions → {out_path}")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/scripts/ci/parse_eval_metrics.py
+++ b/scripts/ci/parse_eval_metrics.py
@@ -1,147 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Parse lerobot-eval output into a small metrics.json artifact.
-
-Reads eval_info.json written by lerobot-eval --output_dir and extracts the
-key metrics needed by the health dashboard. Handles both single-task and
-multi-task eval output formats.
-
-NOTE: This script runs on the bare CI runner (not inside Docker), so it
-must use only Python stdlib modules. Do not add third-party imports.
-
-Usage:
-    python scripts/ci/parse_eval_metrics.py \\
-        --artifacts-dir /tmp/libero-artifacts \\
-        --env libero \\
-        --task libero_spatial \\
-        --policy pepijn223/smolvla_libero
-
-Writes <artifacts-dir>/metrics.json. The CI workflow then uploads this file
-as a GitHub Actions artifact named "<env>-metrics".
-"""
-
-from __future__ import annotations
-
-import argparse
-import json
-import math
-import sys
-from pathlib import Path
-
-
-def _safe_float(v: float | int | None) -> float | None:
-    if v is None:
-        return None
-    f = float(v)
-    return None if math.isnan(f) else f
-
-
-def _safe_int(v: float | int | None) -> int | None:
-    if v is None:
-        return None
-    f = float(v)
-    return None if math.isnan(f) else int(f)
-
-
-def _extract_metrics(info: dict) -> tuple[float | None, int | None, float | None, float | None]:
-    """Extract (pc_success, n_episodes, avg_sum_reward, eval_s) from eval_info.json.
-
-    Handles two output shapes:
-      - Single-task: {"aggregated": {"pc_success": 80.0, ...}}
-      - Multi-task:  {"overall": {"pc_success": 80.0, "n_episodes": 5, ...}}
-    """
-    for key in ("aggregated", "overall"):
-        if key not in info:
-            continue
-        agg = info[key]
-        pc = agg.get("pc_success")
-        n = agg.get("n_episodes")
-        reward = agg.get("avg_sum_reward")
-        eval_s = agg.get("eval_s")
-
-        if pc is not None and not math.isnan(pc):
-            return (
-                float(pc),
-                _safe_int(n),
-                _safe_float(reward),
-                _safe_float(eval_s),
-            )
-
-    return None, None, None, None
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("--artifacts-dir", required=True, help="Path to the mounted artifacts volume")
-    parser.add_argument("--env", required=True, help="Environment name (e.g. libero)")
-    parser.add_argument("--task", required=True, help="Task name (e.g. libero_spatial)")
-    parser.add_argument("--policy", required=True, help="Policy hub path (e.g. pepijn223/smolvla_libero)")
-    args = parser.parse_args()
-
-    artifacts_dir = Path(args.artifacts_dir)
-    eval_info_path = artifacts_dir / "eval_info.json"
-
-    pc_success: float | None = None
-    n_episodes: int | None = None
-    avg_sum_reward: float | None = None
-    eval_s: float | None = None
-
-    if eval_info_path.exists():
-        try:
-            info = json.loads(eval_info_path.read_text())
-            pc_success, n_episodes, avg_sum_reward, eval_s = _extract_metrics(info)
-        except (json.JSONDecodeError, KeyError, TypeError) as exc:
-            print(f"[parse_eval_metrics] Warning: could not parse eval_info.json: {exc}", file=sys.stderr)
-    else:
-        print(
-            f"[parse_eval_metrics] Warning: {eval_info_path} not found — eval may have failed.",
-            file=sys.stderr,
-        )
-
-    task_descriptions: dict[str, str] = {}
-    task_desc_path = artifacts_dir / "task_descriptions.json"
-    if task_desc_path.exists():
-        try:
-            task_descriptions = json.loads(task_desc_path.read_text())
-        except json.JSONDecodeError as exc:
-            print(
-                f"[parse_eval_metrics] Warning: could not parse task_descriptions.json: {exc}",
-                file=sys.stderr,
-            )
-
-    metrics = {
-        "env": args.env,
-        "task": args.task,
-        "policy": args.policy,
-        "pc_success": pc_success,
-        "n_episodes": n_episodes,
-        "avg_sum_reward": avg_sum_reward,
-        "eval_s": eval_s,
-        "task_descriptions": task_descriptions,
-    }
-
-    out_path = artifacts_dir / "metrics.json"
-    out_path.write_text(json.dumps(metrics, indent=2))
-    print(f"[parse_eval_metrics] Written: {out_path}")
-    print(json.dumps(metrics, indent=2))
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/src/lerobot/datasets/dataset_metadata.py
+++ b/src/lerobot/datasets/dataset_metadata.py
@@ -180,16 +180,6 @@ class LeRobotDatasetMetadata:
        self.episodes = load_episodes(self.root)
        self.stats = load_stats(self.root)

-    def ensure_readable(self) -> None:
-        """Guarantee metadata is fully loaded for read operations.
-
-        Idempotent — when metadata is already in memory this is a single
-        ``is None`` check.  Call this before transitioning from write to
-        read mode on the same instance.
-        """
-        if self.episodes is None:
-            self._load_metadata()
-
    def _pull_from_repo(
        self,
        allow_patterns: list[str] | str | None = None,
--- a/src/lerobot/datasets/dataset_reader.py
+++ b/src/lerobot/datasets/dataset_reader.py
@@ -87,7 +87,7 @@ class DatasetReader:
        """Attempt to load from local cache. Returns True if data is sufficient."""
        try:
            self.hf_dataset = self._load_hf_dataset()
-        except (FileNotFoundError, NotADirectoryError):
+        except (FileNotFoundError, NotADirectoryError, ValueError):
            self.hf_dataset = None
            return False
        if not self._check_cached_episodes_sufficient():
--- a/src/lerobot/datasets/io_utils.py
+++ b/src/lerobot/datasets/io_utils.py
@@ -78,7 +78,10 @@ def load_nested_dataset(
    with SuppressProgressBars():
        # We use .from_parquet() memory-mapped loading for efficiency
        filters = pa_ds.field("episode_index").isin(episodes) if episodes is not None else None
-        return Dataset.from_parquet([str(path) for path in paths], filters=filters, features=features)
+        try:
+            return Dataset.from_parquet([str(path) for path in paths], filters=filters, features=features)
+        except ValueError:
+            raise ValueError(f"Failed to load parquet files in {pq_dir}, make sure the dataset is valid and is not missing any files.")


 def get_parquet_num_frames(parquet_path: str | Path) -> int:
--- a/src/lerobot/datasets/lerobot_dataset.py
+++ b/src/lerobot/datasets/lerobot_dataset.py
@@ -278,7 +278,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
    def _ensure_reader(self) -> DatasetReader:
        """Lazily create the reader on first access."""
        if self.reader is None:
-            self.meta.ensure_readable()
            self.reader = DatasetReader(
                meta=self.meta,
                root=self.root,
--- a/tests/datasets/test_lerobot_dataset.py
+++ b/tests/datasets/test_lerobot_dataset.py
@@ -535,31 +535,6 @@ def test_getitem_works_after_finalize(tmp_path):
    assert "task" in item


-def test_getitem_after_finalize_with_delta_timestamps(tmp_path):
-    """After finalize(), dataset[0] works when delta_timestamps require episode metadata.
-
-    Regression test for https://github.com/huggingface/lerobot/pull/3305.
-    The create -> write -> finalize -> read path left meta.episodes as None
-    because the write path flushes episodes to disk without updating them
-    in memory.  Features that access meta.episodes (video decoding,
-    delta_timestamps) would crash with a TypeError.
-    """
-    dataset = LeRobotDataset.create(
-        repo_id=DUMMY_REPO_ID, fps=DEFAULT_FPS, features=SIMPLE_FEATURES, root=tmp_path / "ds"
-    )
-    for _ in range(5):
-        dataset.add_frame(_make_frame())
-    dataset.save_episode()
-    dataset.finalize()
-
-    # Set delta_timestamps so get_item() accesses meta.episodes via _get_query_indices
-    dataset.delta_timestamps = {"state": [0.0]}
-
-    item = dataset[0]
-    assert "state" in item
-    assert "state_is_pad" in item
-
-
 # ── Property delegation ──────────────────────────────────────────────