diff --git a/src/lerobot/annotations/steerable_pipeline/frames.py b/src/lerobot/annotations/steerable_pipeline/frames.py index 8602d2a28..a2f2e4897 100644 --- a/src/lerobot/annotations/steerable_pipeline/frames.py +++ b/src/lerobot/annotations/steerable_pipeline/frames.py @@ -126,7 +126,16 @@ class VideoFrameProvider: from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata # noqa: PLC0415 self._meta = LeRobotDatasetMetadata(repo_id="local", root=self.root) - keys = list(self._meta.video_keys or []) + # ``camera_keys`` covers both image- and video-stored cameras + # (``video_keys`` is video-only). Some datasets declare cameras with + # ``dtype=image``, which would otherwise look empty here and silently + # disable Module 3 even though the videos are there. + keys = list(getattr(self._meta, "camera_keys", None) or self._meta.video_keys or []) + # Last-resort fallback: if metadata didn't surface anything but the + # caller explicitly named a camera (``--vlm.camera_key=...``), trust + # them — the key is by definition known to exist on the dataset. + if not keys and self.camera_key: + keys = [self.camera_key] self._camera_keys = keys if self.camera_key is None: self.camera_key = keys[0] if keys else None diff --git a/src/lerobot/annotations/steerable_pipeline/modules/general_vqa.py b/src/lerobot/annotations/steerable_pipeline/modules/general_vqa.py index 2fe71d5dc..56174bc34 100644 --- a/src/lerobot/annotations/steerable_pipeline/modules/general_vqa.py +++ b/src/lerobot/annotations/steerable_pipeline/modules/general_vqa.py @@ -107,9 +107,20 @@ class GeneralVqaModule: ) cameras = self._target_cameras() if not cameras: - # No camera available — keep behaviour parity with previous - # text-only stub: emit nothing rather than producing untagged - # rows that would fail validation. + # No camera available — emit nothing rather than producing + # untagged rows that would fail validation. Surface a loud one- + # time warning so this is never silently a no-op. + if not getattr(self, "_warned_no_camera", False): + import logging # noqa: PLC0415 + + logging.getLogger(__name__).warning( + "Module 3 (VQA) found no cameras on the frame provider — " + "every episode will emit zero VQA rows. Check that the " + "dataset declares observation.images.* features in " + "meta/info.json; passing --vlm.camera_key= at the " + "CLI now also seeds the cameras list as a fallback." + ) + self._warned_no_camera = True staging.write("module_3", []) return