From fbcac95662ba8aa353d23f3ced3c1eb8aae5ca0e Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 12 May 2026 17:42:13 +0200 Subject: [PATCH] feat(smolvla2-runtime): scrollback in autonomous panel + empty-gen counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two improvements for diagnosing why ``last_raw`` stays empty: 1. The autonomous panel-redraw thread calls console.clear() every 0.5 s, wiping any log lines the runtime printed since the last redraw. So warnings from generation (``[warn] subtask gen failed: ...``, ``[info] subtask gen rejected (gibberish): ...``) flashed for milliseconds and disappeared, leaving the operator blind. Capture log_lines from each tick into a bounded scrollback (last 12 entries) and render them inside the panel itself, below the diag row. They now stick across redraws until rotated out. 2. ``empty`` counter for subtask gen. Persistent empty completions are their own failure mode — the LM head EOS-es immediately from the chat-template generation prompt, distinct from "generated something but filter rejected it". The diag row now reads: subtask diag repeat:0 gibberish:0 empty:14 last_raw: '(empty)' ^^^^^^^ plus a periodic log line every 10 empties so the cause is also surfaced in the scrollback. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../policies/smolvla2/inference/steps.py | 15 +++++++ .../scripts/lerobot_smolvla2_runtime.py | 43 +++++++++++++++++-- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/lerobot/policies/smolvla2/inference/steps.py b/src/lerobot/policies/smolvla2/inference/steps.py index 8ffe66cd0..513ce714c 100644 --- a/src/lerobot/policies/smolvla2/inference/steps.py +++ b/src/lerobot/policies/smolvla2/inference/steps.py @@ -374,6 +374,21 @@ class HighLevelSubtaskFwd(InferenceStep): # subtask N times in a row" or "gibberish_count rising while # current_subtask is stuck". The state panel renders these. state["last_subtask_raw"] = msg or "" + # Persistent empty completion is its own failure mode (model + # immediately EOS-es from the chat-template generation + # prompt) — surface it once every N occurrences so the + # operator can distinguish "generation failing silently" + # from "generating fine but filter rejecting". + if not msg: + empties = state.get("subtask_empty_count", 0) + 1 + state["subtask_empty_count"] = empties + if empties == 1 or empties % 10 == 0: + push_log( + state, + f" [info] subtask gen returned empty (×{empties}) — " + "model EOS-ing immediately or generation raised " + "(check stderr / -v for traceback).", + ) if msg and _looks_like_gibberish(msg): # Bump a counter so the operator can see the model is # struggling without spamming the log every tick. A first diff --git a/src/lerobot/scripts/lerobot_smolvla2_runtime.py b/src/lerobot/scripts/lerobot_smolvla2_runtime.py index 8055a5aed..d7c567b50 100644 --- a/src/lerobot/scripts/lerobot_smolvla2_runtime.py +++ b/src/lerobot/scripts/lerobot_smolvla2_runtime.py @@ -738,7 +738,28 @@ def _run_autonomous( ) thread.start() - redraw = _make_state_panel_renderer(runtime, mode_label="autonomous") + # Capture log lines flushed by the runtime each tick into a + # bounded scrollback that the panel renderer prints inside the + # rule block. Without this, ``runtime._flush_logs`` just calls + # ``print(...)`` which the 2 Hz panel redraw clears immediately — + # so failure messages from generation (e.g. ``[warn] subtask gen + # failed: ...``) flash for ≤ 0.5 s and disappear, leaving the + # operator with no idea why ``last_raw`` stays empty. + _scrollback: list[str] = [] + _scrollback_max = 12 + + def _flush_into_scrollback() -> None: + for line in runtime.state.get("log_lines") or []: + _scrollback.append(line) + # Trim to the cap so the panel doesn't grow unbounded. + if len(_scrollback) > _scrollback_max: + del _scrollback[: len(_scrollback) - _scrollback_max] + + runtime._flush_logs = _flush_into_scrollback # type: ignore[method-assign] + + redraw = _make_state_panel_renderer( + runtime, mode_label="autonomous", scrollback=_scrollback + ) redraw() print( " [autonomous] type interjections / '?' questions on stdin, " @@ -843,6 +864,7 @@ def _make_state_panel_renderer( runtime: Any, *, mode_label: str, + scrollback: list[str] | None = None, ) -> Callable[[list[str] | None], None]: """Return a closure that prints the task/subtask/plan/memory panel. @@ -899,12 +921,18 @@ def _make_state_panel_renderer( raw_subtask = st.get("last_subtask_raw") sub_rep = int(st.get("subtask_repeat_count") or 0) sub_gib = int(st.get("subtask_gibberish_count") or 0) - if raw_subtask is not None or sub_rep or sub_gib: + sub_empty = int(st.get("subtask_empty_count") or 0) + if raw_subtask is not None or sub_rep or sub_gib or sub_empty: raw_display = (raw_subtask or "(empty)")[:80] - color = "yellow" if (sub_rep >= 3 or sub_gib >= 3) else "dim" + color = ( + "yellow" + if (sub_rep >= 3 or sub_gib >= 3 or sub_empty >= 3) + else "dim" + ) console.print( f" [{color}]subtask diag repeat:{sub_rep} " - f"gibberish:{sub_gib} last_raw: {raw_display!r}[/]" + f"gibberish:{sub_gib} empty:{sub_empty} " + f"last_raw: {raw_display!r}[/]" ) # Same diagnostics for memory and plan when available. @@ -915,6 +943,13 @@ def _make_state_panel_renderer( f" [dim]gen rejects memory:{mem_gib} plan:{plan_gib}[/]" ) console.rule(style="cyan") + # Runtime scrollback — log lines pushed from generation steps + # (warnings, gibberish rejections, plan/say speech, vqa + # answers). Last N lines, oldest first. + if scrollback: + for line in scrollback: + console.print(f" [magenta]{line.rstrip()}[/]") + console.rule(style="cyan") if robot_lines: for line in robot_lines: console.print(f" [magenta]{line.strip()}[/]")