test: add dataset guard + fix imports

update docs + docstrings + examples + add minimal test
add context guards
2026-05-31 10:51:35 +00:00 · 2026-04-20 00:36:02 +02:00 · 2026-04-19 23:53:53 +02:00 · 2026-04-19 23:21:14 +02:00 · 2026-04-19 22:48:08 +02:00 · 2026-04-19 16:50:19 +02:00
13 changed files with 30 additions and 206 deletions
--- a/docs/source/unitree_g1.mdx
+++ b/docs/source/unitree_g1.mdx
@@ -274,8 +274,7 @@ python src/lerobot/scripts/lerobot_train.py \
 Once trained, we recommend deploying policies using inference-time RTC:

 ```bash
-lerobot-rollout \
-  --strategy.type=base \
+python examples/rtc/eval_with_real_robot.py \
  --policy.path=your-username/your-repo-id \
  --policy.device=cuda \
  --robot.type=unitree_g1 \
--- a/src/lerobot/rollout/init.py
+++ b/src/lerobot/rollout/init.py
@@ -23,6 +23,7 @@ from .configs import (
    DAggerKeyboardConfig,
    DAggerPedalConfig,
    DAggerStrategyConfig,
+    DatasetRecordConfig,
    HighlightStrategyConfig,
    RolloutConfig,
    RolloutStrategyConfig,
@@ -56,6 +57,7 @@ __all__ = [
    "DAggerPedalConfig",
    "DAggerStrategyConfig",
    "DatasetContext",
+    "DatasetRecordConfig",
    "HardwareContext",
    "HighlightStrategyConfig",
    "InferenceEngine",
--- a/src/lerobot/rollout/configs.py
+++ b/src/lerobot/rollout/configs.py
@@ -142,9 +142,7 @@ class DAggerStrategyConfig(RolloutStrategyConfig):
    windows, where each correction becomes its own episode.
    """

-    # Number of correction episodes to collect (corrections-only mode).
-    # When None, falls back to ``--dataset.num_episodes``.
-    num_episodes: int | None = None
+    num_episodes: int = 10
    record_autonomous: bool = False
    upload_every_n_episodes: int = 5
    # Target video file size in MB for episode rotation (record_autonomous
@@ -218,17 +216,14 @@ class RolloutConfig:
        if isinstance(self.strategy, DAggerStrategyConfig) and self.teleop is None:
            raise ValueError("DAgger strategy requires --teleop.type to be set")

-        # TODO(Steven): DAgger shouldn't require a dataset (user may want to just rollout+intervene without recording), but for now we require it to simplify the implementation.
-        needs_dataset = isinstance(
-            self.strategy, (SentryStrategyConfig, HighlightStrategyConfig, DAggerStrategyConfig)
-        )
+        needs_dataset = isinstance(self.strategy, (SentryStrategyConfig, HighlightStrategyConfig))
        if needs_dataset and (self.dataset is None or not self.dataset.repo_id):
            raise ValueError(f"{self.strategy.type} strategy requires --dataset.repo_id to be set")

-        # if isinstance(self.strategy, BaseStrategyConfig) and self.dataset is not None:
-        #     raise ValueError(
-        #         "Base strategy does not record data. Use sentry, highlight, or dagger for recording."
-        #     )
+        if isinstance(self.strategy, BaseStrategyConfig) and self.dataset is not None:
+            raise ValueError(
+                "Base strategy does not record data. Use sentry, highlight, or dagger for recording."
+            )

        # Sentry MUST use streaming encoding to avoid disk I/O blocking the control loop
        if (
@@ -249,29 +244,14 @@ class RolloutConfig:
            self.dataset.streaming_encoding = True

        # DAgger: streaming is mandatory only when the autonomous phase is also recorded.
-        if isinstance(self.strategy, DAggerStrategyConfig) and self.dataset is not None:
-            if self.strategy.record_autonomous and not self.dataset.streaming_encoding:
-                logger.warning("DAgger with record_autonomous=True forces streaming_encoding=True")
-                self.dataset.streaming_encoding = True
-            elif not self.strategy.record_autonomous and not self.dataset.streaming_encoding:
-                logger.info(
-                    "Streaming encoding is disabled for DAgger corrections-only mode. "
-                    "Consider enabling it for faster episode saving: "
-                    "--dataset.streaming_encoding=true --dataset.encoder_threads=2"
-                )
-
-        # DAgger: resolve num_episodes from dataset config when not explicitly set.
-        if isinstance(self.strategy, DAggerStrategyConfig) and self.strategy.num_episodes is None:
-            if self.dataset is not None:
-                self.strategy.num_episodes = self.dataset.num_episodes
-                logger.info(
-                    "DAgger num_episodes not set — using --dataset.num_episodes=%d",
-                    self.strategy.num_episodes,
-                )
-            else:
-                raise ValueError(
-                    "DAgger num_episodes must be set either via --strategy.num_episodes or --dataset.num_episodes"
-                )
+        if (
+            isinstance(self.strategy, DAggerStrategyConfig)
+            and self.strategy.record_autonomous
+            and self.dataset is not None
+            and not self.dataset.streaming_encoding
+        ):
+            logger.warning("DAgger with record_autonomous=True forces streaming_encoding=True")
+            self.dataset.streaming_encoding = True

        # --- Policy loading ---
        if self.robot is None:
@@ -285,26 +265,6 @@ class RolloutConfig:
        if self.policy is None:
            raise ValueError("--policy.path is required for rollout")

-        # --- Task resolution ---
-        # When --dataset.rename_map (or any --dataset.* flag) is passed, draccus
-        # creates a DatasetRecordConfig with single_task="".  If the user set
-        # the task via the top-level --task flag, propagate it so that all
-        # downstream consumers (inference engine, dataset frame builders) see it.
-        if self.dataset is not None and not self.dataset.single_task and self.task:
-            self.dataset.single_task = self.task
-        elif self.dataset is not None and self.dataset.single_task and not self.task:
-            self.task = self.dataset.single_task
-
-        # --- Device resolution ---
-        # Resolve device from the policy config when not explicitly set so all
-        # components (policy.to, preprocessor, inference engine) use the same
-        # device string instead of inconsistent fallbacks.
-        if self.device is None and self.policy is not None:
-            resolved = getattr(self.policy, "device", None)
-            if resolved:
-                self.device = resolved
-                logger.info("Resolved device from policy config: %s", self.device)
-
    @classmethod
    def __get_path_fields__(cls) -> list[str]:
        return ["policy"]
--- a/src/lerobot/rollout/context.py
+++ b/src/lerobot/rollout/context.py
@@ -272,17 +272,11 @@ def build_rollout_context(
    #         )

    # --- 4. Features + action-key reconciliation ---------------------
-    # Only `.pos` joint features are used for policy inference — velocity and
-    # torque channels are observation-only and must be excluded from the state
-    # and action tensors that the policy sees.  This matches the filtering
-    # applied by the old ``hil_data_collection`` script.
    all_obs_features = robot.observation_features
    observation_features_hw = {
-        k: v
-        for k, v in all_obs_features.items()
-        if isinstance(v, tuple) or (v is float and k.endswith(".pos"))
+        k: v for k, v in all_obs_features.items() if v is float or isinstance(v, tuple)
    }
-    action_features_hw = {k: v for k, v in robot.action_features.items() if k.endswith(".pos")}
+    action_features_hw = robot.action_features

    # The action side is always needed: sync inference reads action names from
    # ``dataset_features[ACTION]`` to map policy tensors back to robot actions.
@@ -299,56 +293,19 @@ def build_rollout_context(
    )
    dataset_features = combine_feature_dicts(action_dataset_features, observation_dataset_features)
    hw_features = hw_to_dataset_features(observation_features_hw, "observation")
-    raw_action_keys = list(action_features_hw.keys())
+    raw_action_keys = list(robot.action_features.keys())
    policy_action_names = getattr(policy_config, "action_feature_names", None)
    ordered_action_keys = _resolve_action_key_order(
        list(policy_action_names) if policy_action_names else None,
        raw_action_keys,
    )

-    # --- Diagnostic logging ---
-    _act_ft = dataset_features.get("action", {})
-    _obs_ft = dataset_features.get("observation.state", {})
-    logger.info(
-        "Feature reconciliation: action_dim=%d, obs_state_dim=%d, ordered_action_keys=%d",
-        _act_ft.get("shape", (0,))[0],
-        _obs_ft.get("shape", (0,))[0],
-        len(ordered_action_keys),
-    )
-    logger.info("  action names  : %s", _act_ft.get("names", []))
-    logger.info("  obs state names: %s", _obs_ft.get("names", []))
-    logger.info("  ordered keys   : %s", ordered_action_keys)
-    logger.info(
-        "  policy.action_feature_names: %s",
-        list(policy_action_names) if policy_action_names else "None (using raw_action_keys)",
-    )
-    if full_config.input_features:
-        logger.info("  policy input_features: %s", list(full_config.input_features.keys()))
-    else:
-        logger.warning("  policy input_features is EMPTY — policy may not process images!")
-    if full_config.output_features:
-        for k, v in full_config.output_features.items():
-            logger.info("  policy output_features[%s]: shape=%s", k, v.shape)
-    # Validate action dimension consistency
-    if full_config.output_features:
-        for ft in full_config.output_features.values():
-            policy_action_dim = ft.shape[0]
-            if len(ordered_action_keys) != policy_action_dim:
-                logger.error(
-                    "ACTION DIM MISMATCH: policy expects %d dims, hardware produces %d keys. "
-                    "First 5 keys: %s",
-                    policy_action_dim,
-                    len(ordered_action_keys),
-                    ordered_action_keys[:5],
-                )
-            break
-
    # Validate visual features if no rename_map is active
    rename_map = cfg.dataset.rename_map if cfg.dataset else {}
    if not rename_map:
        expected_visuals = {k for k, v in full_config.input_features.items() if v.type == FeatureType.VISUAL}
        provided_visuals = {
-            f"observation.images.{k}" for k, v in robot.observation_features.items() if isinstance(v, tuple)
+            f"observation.{k}" for k, v in robot.observation_features.items() if isinstance(v, tuple)
        }
        policy_subset = expected_visuals.issubset(provided_visuals)
        hw_subset = provided_visuals.issubset(expected_visuals)
@@ -417,35 +374,11 @@ def build_rollout_context(
        pretrained_path=cfg.policy.pretrained_path,
        dataset_stats=dataset_stats,
        preprocessor_overrides={
-            "device_processor": {"device": cfg.device},
+            "device_processor": {"device": cfg.device or getattr(policy_config, "device", "cpu")},
            "rename_observations_processor": {"rename_map": cfg.dataset.rename_map if cfg.dataset else {}},
        },
    )

-    # --- Debug: verify normalizer stats loaded from pretrained ---
-    from lerobot.processor import NormalizerProcessorStep, UnnormalizerProcessorStep
-
-    for step in preprocessor.steps:
-        if isinstance(step, NormalizerProcessorStep):
-            n_stats = sum(len(v) for v in step._tensor_stats.values()) if step._tensor_stats else 0
-            logger.info(
-                "Preprocessor normalizer: %d stat tensors, keys=%s",
-                n_stats,
-                list(step._tensor_stats.keys())[:3],
-            )
-            if n_stats == 0:
-                logger.error("PREPROCESSOR NORMALIZER HAS NO STATS — observations will NOT be normalized!")
-    for step in postprocessor.steps:
-        if isinstance(step, UnnormalizerProcessorStep):
-            n_stats = sum(len(v) for v in step._tensor_stats.values()) if step._tensor_stats else 0
-            logger.info(
-                "Postprocessor unnormalizer: %d stat tensors, keys=%s",
-                n_stats,
-                list(step._tensor_stats.keys())[:3],
-            )
-            if n_stats == 0:
-                logger.error("POSTPROCESSOR UNNORMALIZER HAS NO STATS — actions will NOT be denormalized!")
-
    # --- 7. Inference strategy (needs policy + pre/post + hardware) --
    logger.info(
        "Creating inference engine (type=%s)...",
--- a/src/lerobot/rollout/inference/sync.py
+++ b/src/lerobot/rollout/inference/sync.py
@@ -97,30 +97,10 @@ class SyncInferenceEngine(InferenceEngine):
                observation, self._device, self._task, self._robot_type
            )
            observation = self._preprocessor(observation)
-            action_raw = self._policy.select_action(observation)
-            action = self._postprocessor(action_raw)
+            action = self._policy.select_action(observation)
+            action = self._postprocessor(action)
        action_tensor = action.squeeze(0).cpu()

-        if not hasattr(self, "_log_count"):
-            self._log_count = 0
-        if self._log_count < 3:
-            raw_flat = action_raw.squeeze(0).cpu()
-            logger.info(
-                "[Sync tick %d] raw action (first 5): %s | post-processed (first 5): %s",
-                self._log_count,
-                raw_flat[:5].tolist(),
-                action_tensor[:5].tolist(),
-            )
-            obs_state = obs_frame.get("observation.state")
-            if obs_state is not None:
-                logger.info(
-                    "[Sync tick %d] obs_frame['observation.state'] (first 5): %s | shape: %s",
-                    self._log_count,
-                    obs_state[:5].tolist() if hasattr(obs_state, "tolist") else str(obs_state)[:80],
-                    obs_state.shape if hasattr(obs_state, "shape") else "?",
-                )
-            self._log_count += 1
-
        # Reorder to match dataset action ordering so the caller can treat
        # the returned tensor uniformly across backends.
        action_dict = make_robot_action(action_tensor, self._dataset_features)
--- a/src/lerobot/rollout/strategies/base.py
+++ b/src/lerobot/rollout/strategies/base.py
@@ -48,16 +48,6 @@ class BaseStrategy(RolloutStrategy):

        control_interval = interpolator.get_control_interval(cfg.fps)

-        # Flush a few observation reads so CAN bus / sensor state is fresh
-        # before the first inference.  Without this, the first observation(s)
-        # can return stale or identical values for all joints, poisoning the
-        # entire first action chunk.
-        _OBS_WARMUP_READS = 5
-        for _ in range(_OBS_WARMUP_READS):
-            robot.get_observation()
-            precise_sleep(1 / cfg.fps)
-        logger.info("Flushed %d observation warmup reads", _OBS_WARMUP_READS)
-
        start_time = time.perf_counter()
        engine.resume()
        logger.info("Base strategy control loop started")
@@ -80,7 +70,6 @@ class BaseStrategy(RolloutStrategy):
            self._log_telemetry(obs_processed, action_dict, ctx.runtime)

            dt = time.perf_counter() - loop_start
-            self._warn_if_slow(dt, control_interval, cfg.fps)
            if (sleep_t := control_interval - dt) > 0:
                precise_sleep(sleep_t)

--- a/src/lerobot/rollout/strategies/core.py
+++ b/src/lerobot/rollout/strategies/core.py
@@ -63,10 +63,6 @@ class RolloutStrategy(abc.ABC):
        self._engine = ctx.policy.inference
        logger.info("Starting inference engine...")
        self._engine.start()
-        # Reset policy and processor state so the first inference starts clean
-        # (matches the old HIL script which called policy.reset() / preprocessor.reset()
-        # at the beginning of each episode).
-        self._engine.reset()
        self._warmup_flushed = False
        logger.info("Inference engine started")

@@ -146,20 +142,6 @@ class RolloutStrategy(abc.ABC):
            compress_images=cfg.display_compressed_images,
        )

-    @staticmethod
-    def _warn_if_slow(dt: float, control_interval: float, fps: float) -> None:
-        """Log a warning when the control loop runs slower than target FPS."""
-        if dt > control_interval:
-            actual_fps = 1.0 / dt if dt > 0 else 0
-            logger.warning(
-                "Control loop is running slower (%.1f Hz) than target FPS (%.0f Hz). "
-                "Dataset frames might be dropped and robot control might be unstable. "
-                "Common causes: 1) Camera FPS not keeping up "
-                "2) Policy inference taking too long 3) CPU starvation",
-                actual_fps,
-                fps,
-            )
-
    @abc.abstractmethod
    def setup(self, ctx: RolloutContext) -> None:
        """Strategy-specific initialisation (keyboard listeners, buffers, etc.)."""
@@ -286,17 +268,5 @@ def send_next_action(

    action_dict = {k: interp[i].item() for i, k in enumerate(ordered_keys) if i < len(interp)}
    processed = ctx.processors.robot_action_processor((action_dict, obs_raw))
-
-    if not hasattr(send_next_action, "_log_count"):
-        send_next_action._log_count = 0
-    if send_next_action._log_count < 3:
-        sample = {k: round(v, 4) for k, v in list(processed.items())[:5]}
-        logger.info(
-            "[send_next_action tick %d] action sent to robot (first 5): %s",
-            send_next_action._log_count,
-            sample,
-        )
-        send_next_action._log_count += 1
-
    ctx.hardware.robot_wrapper.send_action(processed)
    return action_dict
--- a/src/lerobot/rollout/strategies/dagger.py
+++ b/src/lerobot/rollout/strategies/dagger.py
@@ -506,7 +506,6 @@ class DAggerStrategy(RolloutStrategy):
                        episode_start = time.perf_counter()

                    dt = time.perf_counter() - loop_start
-                    self._warn_if_slow(dt, control_interval, cfg.fps)
                    if (sleep_t := control_interval - dt) > 0:
                        precise_sleep(sleep_t)

@@ -557,7 +556,6 @@ class DAggerStrategy(RolloutStrategy):
        engine.resume()

        last_action: dict[str, Any] | None = None
-        start_time = time.perf_counter()
        record_tick = 0
        recorded = 0
        logger.info(
@@ -573,10 +571,6 @@ class DAggerStrategy(RolloutStrategy):
                ):
                    loop_start = time.perf_counter()

-                    if cfg.duration > 0 and (time.perf_counter() - start_time) >= cfg.duration:
-                        logger.info("Duration limit reached (%.0fs)", cfg.duration)
-                        break
-
                    # Process transitions
                    transition = events.consume_transition()
                    if transition is not None:
@@ -647,7 +641,6 @@ class DAggerStrategy(RolloutStrategy):
                            last_action = ctx.processors.robot_action_processor((action_dict, obs))

                    dt = time.perf_counter() - loop_start
-                    self._warn_if_slow(dt, control_interval, cfg.fps)
                    if (sleep_t := control_interval - dt) > 0:
                        precise_sleep(sleep_t)

--- a/src/lerobot/rollout/strategies/factory.py
+++ b/src/lerobot/rollout/strategies/factory.py
@@ -25,7 +25,7 @@ from .highlight import HighlightStrategy
 from .sentry import SentryStrategy

 if TYPE_CHECKING:
-    from ..configs import RolloutStrategyConfig
+    from lerobot.rollout import RolloutStrategyConfig


 def create_strategy(config: RolloutStrategyConfig) -> RolloutStrategy:
--- a/src/lerobot/rollout/strategies/highlight.py
+++ b/src/lerobot/rollout/strategies/highlight.py
@@ -187,7 +187,6 @@ class HighlightStrategy(RolloutStrategy):
                            ring.append(frame)

                    dt = time.perf_counter() - loop_start
-                    self._warn_if_slow(dt, control_interval, cfg.fps)
                    if (sleep_t := control_interval - dt) > 0:
                        precise_sleep(sleep_t)

--- a/src/lerobot/rollout/strategies/sentry.py
+++ b/src/lerobot/rollout/strategies/sentry.py
@@ -158,7 +158,6 @@ class SentryStrategy(RolloutStrategy):
                        episode_start = time.perf_counter()

                    dt = time.perf_counter() - loop_start
-                    self._warn_if_slow(dt, control_interval, cfg.fps)
                    if (sleep_t := control_interval - dt) > 0:
                        precise_sleep(sleep_t)

--- a/tests/policies/rtc/test_rtc_relative_actions.py
+++ b/tests/policies/rtc/test_rtc_relative_actions.py
@@ -187,7 +187,7 @@ class TestRTCDenoiseWithRelativeLeftovers:


 class TestFullPipelineRelativeRTC:
-    """End-to-end test of the RTC + relative actions pipeline matching lerobot-rollout flow."""
+    """End-to-end test of the RTC + relative actions pipeline matching eval_with_real_robot.py flow."""

    def test_preprocessor_caches_state_for_postprocessor(self):
        """Preprocessor's relative step should cache state so postprocessor can convert back."""
@@ -240,7 +240,7 @@ class TestFullPipelineRelativeRTC:
        torch.testing.assert_close(recovered, actions, atol=1e-5, rtol=1e-5)

    def test_eval_loop_simulation(self):
-        """Simulate the lerobot-rollout loop with relative actions.
+        """Simulate the eval_with_real_robot.py loop with relative actions.

        Iteration 1: No leftovers → model generates relative actions → store for RTC
        Iteration 2: Use leftovers as RTC guidance → model generates new relative actions
@@ -401,12 +401,12 @@ class TestStateRebasingApproximation:


 def _detect_relative_actions(preprocessor) -> bool:
-    """Mirror of the helper in lerobot-rollout for testing without importing it."""
+    """Mirror of the helper in eval_with_real_robot.py for testing without importing it."""
    return any(isinstance(step, RelativeActionsProcessorStep) and step.enabled for step in preprocessor.steps)


 class TestDetectRelativeActions:
-    """Test the _detect_relative_actions helper logic used by lerobot-rollout."""
+    """Test the _detect_relative_actions helper logic used by eval_with_real_robot.py."""

    def test_detects_enabled_relative_step(self):
        class FakePipeline:
--- a/tests/test_rollout.py
+++ b/tests/test_rollout.py
@@ -80,7 +80,7 @@ def test_dagger_config_defaults():
    from lerobot.rollout import DAggerStrategyConfig

    cfg = DAggerStrategyConfig()
-    assert cfg.num_episodes is None
+    assert cfg.num_episodes == 10
    assert cfg.record_autonomous is False
    assert cfg.input_device == "keyboard"
Author	SHA1	Message	Date
Steven Palma	8e21268c29	test: add dataset guard + fix imports	2026-04-20 00:36:02 +02:00
Steven Palma	4130d4a4a5	update docs + docstrings + examples + add minimal test	2026-04-19 23:53:53 +02:00
Steven Palma	47bb840a55	add context guards	2026-04-19 23:21:14 +02:00
Steven Palma	9519ff5e09	Merge branch 'main' into feat/decouple_record_script Signed-off-by: Steven Palma <imstevenpmwork@ieee.org>	2026-04-19 22:48:08 +02:00
Steven Palma	32a27cae8a	filesize default change + more logs + filesize mb based episode + go back to init pos + rerun log + date end of repo_id	2026-04-19 16:50:19 +02:00
Steven Palma	8cee56e2d6	fix pre-commit	2026-04-17 16:46:58 +02:00
Steven Palma	a76874f35e	test dagger	2026-04-17 16:46:38 +02:00
Steven Palma	35bb2c7459	simplify dagger	2026-04-17 15:55:03 +02:00
Steven Palma	051f6c6803	Merge branch 'main' into feat/decouple_record_script	2026-04-17 14:25:18 +02:00
Steven Palma	04ae0312a2	HW tests fixes	2026-04-16 17:29:22 +02:00
Steven Palma	cc634de9e7	add docstrings	2026-04-16 16:40:33 +02:00
Steven Palma	3eda5712d3	some more iterations	2026-04-16 15:52:23 +02:00
Steven Palma	783ec6e232	minor improvements	2026-04-16 14:34:22 +02:00
Steven Palma	4e3175ff15	address review	2026-04-15 19:31:53 +02:00
Steven Palma	edd7fc52a8	feat: introduce inference engine strategy	2026-04-15 17:51:44 +02:00
Steven Palma	0f0f8b8961	imports and comments	2026-04-15 16:28:56 +02:00
Steven Palma	79db54dc34	Merge branch 'main' into feat/decouple_record_script	2026-04-15 11:06:45 +02:00
Steven Palma	6ae07878f7	Merge branch 'main' into feat/decouple_record_script	2026-04-14 22:54:29 +02:00
Steven Palma	10d05e03bc	Merge branch 'main' into feat/decouple_record_script	2026-04-14 21:35:26 +02:00
Steven Palma	f2c29d78cf	more improvements and fixes	2026-04-14 17:51:03 +02:00
Steven Palma	8bc47e4318	target review	2026-04-14 17:14:09 +02:00
Steven Palma	49f32b9796	some more iterations	2026-04-14 16:34:52 +02:00
Steven Palma	f55782f9f7	pre-commit run	2026-04-14 15:42:19 +02:00
Steven Palma	05a2604d6e	first iteration	2026-04-14 15:42:04 +02:00