From e064cfcb04f99e0df7410e8fc89acce11130ca9f Mon Sep 17 00:00:00 2001 From: Pepijn Date: Thu, 30 Apr 2026 13:05:08 +0200 Subject: [PATCH] fix(annotate): seed Module 3 cameras from camera_keys + camera_key fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Module 3 fast-pathed out (50 episodes in 0.6s) when ``frame_provider.camera_keys`` came back empty even though Module 1/2 worked, because they use ``frame_provider.camera_key`` (singular) and were happy with the explicit ``--vlm.camera_key=...`` override. Two fixes: - ``frames.py``: read ``meta.camera_keys`` (covers both video- and image-stored cameras) instead of ``meta.video_keys`` (video-only), matching :class:`LeRobotDatasetMetadata`'s canonical accessor. If metadata still surfaces nothing but the caller explicitly passed ``--vlm.camera_key=``, fall back to ``[]`` — the key is by definition known to exist on the dataset. - ``general_vqa.py``: emit a one-time WARNING log when Module 3 sees zero cameras so this never silently produces zero VQA again. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../annotations/steerable_pipeline/frames.py | 11 ++++++++++- .../steerable_pipeline/modules/general_vqa.py | 17 ++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/lerobot/annotations/steerable_pipeline/frames.py b/src/lerobot/annotations/steerable_pipeline/frames.py index 8602d2a28..a2f2e4897 100644 --- a/src/lerobot/annotations/steerable_pipeline/frames.py +++ b/src/lerobot/annotations/steerable_pipeline/frames.py @@ -126,7 +126,16 @@ class VideoFrameProvider: from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata # noqa: PLC0415 self._meta = LeRobotDatasetMetadata(repo_id="local", root=self.root) - keys = list(self._meta.video_keys or []) + # ``camera_keys`` covers both image- and video-stored cameras + # (``video_keys`` is video-only). Some datasets declare cameras with + # ``dtype=image``, which would otherwise look empty here and silently + # disable Module 3 even though the videos are there. + keys = list(getattr(self._meta, "camera_keys", None) or self._meta.video_keys or []) + # Last-resort fallback: if metadata didn't surface anything but the + # caller explicitly named a camera (``--vlm.camera_key=...``), trust + # them — the key is by definition known to exist on the dataset. + if not keys and self.camera_key: + keys = [self.camera_key] self._camera_keys = keys if self.camera_key is None: self.camera_key = keys[0] if keys else None diff --git a/src/lerobot/annotations/steerable_pipeline/modules/general_vqa.py b/src/lerobot/annotations/steerable_pipeline/modules/general_vqa.py index 2fe71d5dc..56174bc34 100644 --- a/src/lerobot/annotations/steerable_pipeline/modules/general_vqa.py +++ b/src/lerobot/annotations/steerable_pipeline/modules/general_vqa.py @@ -107,9 +107,20 @@ class GeneralVqaModule: ) cameras = self._target_cameras() if not cameras: - # No camera available — keep behaviour parity with previous - # text-only stub: emit nothing rather than producing untagged - # rows that would fail validation. + # No camera available — emit nothing rather than producing + # untagged rows that would fail validation. Surface a loud one- + # time warning so this is never silently a no-op. + if not getattr(self, "_warned_no_camera", False): + import logging # noqa: PLC0415 + + logging.getLogger(__name__).warning( + "Module 3 (VQA) found no cameras on the frame provider — " + "every episode will emit zero VQA rows. Check that the " + "dataset declares observation.images.* features in " + "meta/info.json; passing --vlm.camera_key= at the " + "CLI now also seeds the cameras list as a fallback." + ) + self._warned_no_camera = True staging.write("module_3", []) return