From b9246ef61bc22afa716ca969034c273189a0cc43 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Wed, 3 Jun 2026 15:56:53 +0200 Subject: [PATCH] tests(annotations): guard on the 'dataset' extra so base fast-test tier skips cleanly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fast Pytest Tests failed at COLLECTION in the base '--extra test' tier with 'ModuleNotFoundError: No module named datasets': tests/annotations/ conftest.py imported the fixture dataset builder (-> lerobot.datasets -> the HF 'datasets' lib + pandas/pyarrow), which only ship under the 'dataset' extra, so the whole annotations package crashed. Fix uses the repo's proven module-level guard pattern (see tests/datasets/test_language.py), NOT a conftest-level importorskip — verified empirically that pytest.importorskip raised during conftest *import* is treated as a collection ERROR (exit 1), while module-level importorskip is a clean SKIP. * conftest.py: import build_annotation_dataset LAZILY inside the fixtures so the conftest itself imports cleanly in every tier. * test_modules / test_validator / test_writer / test_pipeline_recipe_ render: add module-level pytest.importorskip('datasets') + ('pandas') before the pyarrow / lerobot.* imports (# noqa: E402 to match the existing convention). pyarrow-importing modules place the guard before the pyarrow import. * tests/scripts/test_lerobot_annotate.py: same guard (its _push_to_hub path imports lerobot.datasets). Result: - base / hardware / viz tiers (no dataset extra): annotation tests skip cleanly; the rest of the suite runs -> exit 0. - dataset tier: datasets present -> guards pass through -> annotation tests run with the stub VLM. The pipeline modules import only stdlib + relative + lerobot.datasets (no module-level datatrove / vllm / openai), so they import fine there. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/annotations/conftest.py | 12 ++++++++- tests/annotations/test_modules.py | 20 +++++++++----- .../test_pipeline_recipe_render.py | 26 ++++++++++++------- tests/annotations/test_validator.py | 16 +++++++++--- tests/annotations/test_writer.py | 15 ++++++++--- tests/scripts/test_lerobot_annotate.py | 7 +++++ 6 files changed, 72 insertions(+), 24 deletions(-) diff --git a/tests/annotations/conftest.py b/tests/annotations/conftest.py index 8134c79a5..198e90319 100644 --- a/tests/annotations/conftest.py +++ b/tests/annotations/conftest.py @@ -26,12 +26,20 @@ from pathlib import Path import pytest -from tests.fixtures.dataset_factories import build_annotation_dataset +# NOTE: ``build_annotation_dataset`` pulls in ``lerobot.datasets`` (-> the HF +# ``datasets`` library + ``pandas``), which only ship under the ``dataset`` +# extra. It is imported LAZILY inside the fixtures below so this conftest +# imports cleanly in dependency tiers without that extra (e.g. the base +# ``--extra test`` fast-test tier). The annotation test modules guard +# themselves with a module-level ``pytest.importorskip("datasets")`` so +# their collection is skipped — never erroring — when the extra is absent. @pytest.fixture def fixture_dataset_root(tmp_path: Path) -> Path: """A tiny dataset with two episodes, 12 frames each at 10 fps.""" + from tests.fixtures.dataset_factories import build_annotation_dataset + return build_annotation_dataset( tmp_path / "ds", episode_specs=[ @@ -44,6 +52,8 @@ def fixture_dataset_root(tmp_path: Path) -> Path: @pytest.fixture def single_episode_root(tmp_path: Path) -> Path: + from tests.fixtures.dataset_factories import build_annotation_dataset + return build_annotation_dataset( tmp_path / "ds_one", episode_specs=[(0, 30, "Pour water from the bottle into the cup.")], diff --git a/tests/annotations/test_modules.py b/tests/annotations/test_modules.py index 73685a079..189481169 100644 --- a/tests/annotations/test_modules.py +++ b/tests/annotations/test_modules.py @@ -22,21 +22,29 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Any -from lerobot.annotations.steerable_pipeline.config import ( +import pytest + +# ``lerobot.annotations`` imports pull in ``lerobot.datasets`` (-> the HF +# ``datasets`` library), which only ships under the ``dataset`` extra. Skip +# this module in tiers without it instead of erroring at import. +pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])") +pytest.importorskip("pandas", reason="pandas is required (install lerobot[dataset])") + +from lerobot.annotations.steerable_pipeline.config import ( # noqa: E402 InterjectionsConfig, PlanConfig, VqaConfig, ) -from lerobot.annotations.steerable_pipeline.modules import ( +from lerobot.annotations.steerable_pipeline.modules import ( # noqa: E402 GeneralVqaModule, InterjectionsAndSpeechModule, PlanSubtasksMemoryModule, ) -from lerobot.annotations.steerable_pipeline.reader import iter_episodes -from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging -from lerobot.annotations.steerable_pipeline.vlm_client import StubVlmClient +from lerobot.annotations.steerable_pipeline.reader import iter_episodes # noqa: E402 +from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging # noqa: E402 +from lerobot.annotations.steerable_pipeline.vlm_client import StubVlmClient # noqa: E402 -from ._helpers import make_canned_responder +from ._helpers import make_canned_responder # noqa: E402 @dataclass diff --git a/tests/annotations/test_pipeline_recipe_render.py b/tests/annotations/test_pipeline_recipe_render.py index 6d0f6a29f..43a616934 100644 --- a/tests/annotations/test_pipeline_recipe_render.py +++ b/tests/annotations/test_pipeline_recipe_render.py @@ -19,26 +19,34 @@ from __future__ import annotations from pathlib import Path -import pyarrow.parquet as pq +import pytest -from lerobot.annotations.steerable_pipeline.config import ( +# ``pyarrow`` and the ``lerobot.datasets`` chain (-> the HF ``datasets`` +# library) only ship under the ``dataset`` extra. Skip this module in +# tiers without it instead of erroring at import. +pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])") +pytest.importorskip("pandas", reason="pandas is required (install lerobot[dataset])") + +import pyarrow.parquet as pq # noqa: E402 + +from lerobot.annotations.steerable_pipeline.config import ( # noqa: E402 AnnotationPipelineConfig, InterjectionsConfig, PlanConfig, VqaConfig, ) -from lerobot.annotations.steerable_pipeline.executor import Executor -from lerobot.annotations.steerable_pipeline.modules import ( +from lerobot.annotations.steerable_pipeline.executor import Executor # noqa: E402 +from lerobot.annotations.steerable_pipeline.modules import ( # noqa: E402 GeneralVqaModule, InterjectionsAndSpeechModule, PlanSubtasksMemoryModule, ) -from lerobot.annotations.steerable_pipeline.validator import StagingValidator -from lerobot.annotations.steerable_pipeline.writer import LanguageColumnsWriter -from lerobot.configs.recipe import MessageTurn, TrainingRecipe -from lerobot.datasets.language_render import render_sample +from lerobot.annotations.steerable_pipeline.validator import StagingValidator # noqa: E402 +from lerobot.annotations.steerable_pipeline.writer import LanguageColumnsWriter # noqa: E402 +from lerobot.configs.recipe import MessageTurn, TrainingRecipe # noqa: E402 +from lerobot.datasets.language_render import render_sample # noqa: E402 -from ._helpers import make_canned_responder +from ._helpers import make_canned_responder # noqa: E402 def _build_pr1_style_blend_recipe() -> TrainingRecipe: diff --git a/tests/annotations/test_validator.py b/tests/annotations/test_validator.py index c01d862cf..6b421cc98 100644 --- a/tests/annotations/test_validator.py +++ b/tests/annotations/test_validator.py @@ -20,10 +20,18 @@ from __future__ import annotations import json from pathlib import Path -from lerobot.annotations.steerable_pipeline.reader import iter_episodes -from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging -from lerobot.annotations.steerable_pipeline.validator import StagingValidator -from lerobot.annotations.steerable_pipeline.writer import speech_atom +import pytest + +# ``lerobot.annotations`` imports pull in ``lerobot.datasets`` (-> the HF +# ``datasets`` library), which only ships under the ``dataset`` extra. Skip +# this module in tiers without it instead of erroring at import. +pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])") +pytest.importorskip("pandas", reason="pandas is required (install lerobot[dataset])") + +from lerobot.annotations.steerable_pipeline.reader import iter_episodes # noqa: E402 +from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging # noqa: E402 +from lerobot.annotations.steerable_pipeline.validator import StagingValidator # noqa: E402 +from lerobot.annotations.steerable_pipeline.writer import speech_atom # noqa: E402 def _validate(root: Path, staging_dir: Path): diff --git a/tests/annotations/test_writer.py b/tests/annotations/test_writer.py index 29f14c0e8..22dfbcb29 100644 --- a/tests/annotations/test_writer.py +++ b/tests/annotations/test_writer.py @@ -20,12 +20,19 @@ from __future__ import annotations import json from pathlib import Path -import pyarrow.parquet as pq import pytest -from lerobot.annotations.steerable_pipeline.reader import iter_episodes -from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging -from lerobot.annotations.steerable_pipeline.writer import ( +# ``pyarrow`` and the ``lerobot.annotations`` -> ``lerobot.datasets`` chain +# (-> the HF ``datasets`` library) only ship under the ``dataset`` extra. +# Skip this module in tiers without it instead of erroring at import. +pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])") +pytest.importorskip("pandas", reason="pandas is required (install lerobot[dataset])") + +import pyarrow.parquet as pq # noqa: E402 + +from lerobot.annotations.steerable_pipeline.reader import iter_episodes # noqa: E402 +from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging # noqa: E402 +from lerobot.annotations.steerable_pipeline.writer import ( # noqa: E402 LanguageColumnsWriter, speech_atom, ) diff --git a/tests/scripts/test_lerobot_annotate.py b/tests/scripts/test_lerobot_annotate.py index c98ee7cb3..9f80d2e8c 100644 --- a/tests/scripts/test_lerobot_annotate.py +++ b/tests/scripts/test_lerobot_annotate.py @@ -3,6 +3,13 @@ import json from types import SimpleNamespace +import pytest + +# ``lerobot.scripts.lerobot_annotate`` (and the ``_push_to_hub`` path it +# exercises) imports ``lerobot.datasets``, which only ships under the +# ``dataset`` extra. Skip in tiers without it instead of erroring. +pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])") + def test_push_to_hub_tags_uploaded_dataset_revision(tmp_path, monkeypatch): from lerobot.scripts.lerobot_annotate import _push_to_hub