diff --git a/src/lerobot/rollout/strategies/base.py b/src/lerobot/rollout/strategies/base.py index 6dca99b00..959c9c28a 100644 --- a/src/lerobot/rollout/strategies/base.py +++ b/src/lerobot/rollout/strategies/base.py @@ -48,6 +48,16 @@ class BaseStrategy(RolloutStrategy): control_interval = interpolator.get_control_interval(cfg.fps) + # Flush a few observation reads so CAN bus / sensor state is fresh + # before the first inference. Without this, the first observation(s) + # can return stale or identical values for all joints, poisoning the + # entire first action chunk. + _OBS_WARMUP_READS = 5 + for _ in range(_OBS_WARMUP_READS): + robot.get_observation() + precise_sleep(1 / cfg.fps) + logger.info("Flushed %d observation warmup reads", _OBS_WARMUP_READS) + start_time = time.perf_counter() engine.resume() logger.info("Base strategy control loop started") diff --git a/src/lerobot/rollout/strategies/core.py b/src/lerobot/rollout/strategies/core.py index 384b10a0c..3967d8804 100644 --- a/src/lerobot/rollout/strategies/core.py +++ b/src/lerobot/rollout/strategies/core.py @@ -63,6 +63,10 @@ class RolloutStrategy(abc.ABC): self._engine = ctx.policy.inference logger.info("Starting inference engine...") self._engine.start() + # Reset policy and processor state so the first inference starts clean + # (matches the old HIL script which called policy.reset() / preprocessor.reset() + # at the beginning of each episode). + self._engine.reset() self._warmup_flushed = False logger.info("Inference engine started")