feat(smolvla2-runtime): scrollback in autonomous panel + empty-gen counter

Two improvements for diagnosing why ``last_raw`` stays empty: 1. The autonomous panel-redraw thread calls console.clear() every 0.5 s, wiping any log lines the runtime printed since the last redraw. So warnings from generation (``[warn] subtask gen failed: ...``, ``[info] subtask gen rejected (gibberish): ...``) flashed for milliseconds and disappeared, leaving the operator blind. Capture log_lines from each tick into a bounded scrollback (last 12 entries) and render them inside the panel itself, below the diag row. They now stick across redraws until rotated out. 2. ``empty`` counter for subtask gen. Persistent empty completions are their own failure mode — the LM head EOS-es immediately from the chat-template generation prompt, distinct from "generated something but filter rejected it". The diag row now reads: subtask diag repeat:0 gibberish:0 empty:14 last_raw: '(empty)' ^^^^^^^ plus a periodic log line every 10 empties so the cause is also surfaced in the scrollback. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-31 19:01:28 +00:00 · 2026-05-12 17:42:13 +02:00
parent b9db4d21a2
commit fbcac95662
2 changed files with 54 additions and 4 deletions
--- a/src/lerobot/policies/smolvla2/inference/steps.py
+++ b/src/lerobot/policies/smolvla2/inference/steps.py
@@ -374,6 +374,21 @@ class HighLevelSubtaskFwd(InferenceStep):
        # subtask N times in a row" or "gibberish_count rising while
        # current_subtask is stuck". The state panel renders these.
        state["last_subtask_raw"] = msg or ""
+        # Persistent empty completion is its own failure mode (model
+        # immediately EOS-es from the chat-template generation
+        # prompt) — surface it once every N occurrences so the
+        # operator can distinguish "generation failing silently"
+        # from "generating fine but filter rejecting".
+        if not msg:
+            empties = state.get("subtask_empty_count", 0) + 1
+            state["subtask_empty_count"] = empties
+            if empties == 1 or empties % 10 == 0:
+                push_log(
+                    state,
+                    f"  [info] subtask gen returned empty (×{empties}) — "
+                    "model EOS-ing immediately or generation raised "
+                    "(check stderr / -v for traceback).",
+                )
        if msg and _looks_like_gibberish(msg):
            # Bump a counter so the operator can see the model is
            # struggling without spamming the log every tick. A first
--- a/src/lerobot/scripts/lerobot_smolvla2_runtime.py
+++ b/src/lerobot/scripts/lerobot_smolvla2_runtime.py
@@ -738,7 +738,28 @@ def _run_autonomous(
    )
    thread.start()

-    redraw = _make_state_panel_renderer(runtime, mode_label="autonomous")
+    # Capture log lines flushed by the runtime each tick into a
+    # bounded scrollback that the panel renderer prints inside the
+    # rule block. Without this, ``runtime._flush_logs`` just calls
+    # ``print(...)`` which the 2 Hz panel redraw clears immediately —
+    # so failure messages from generation (e.g. ``[warn] subtask gen
+    # failed: ...``) flash for ≤ 0.5 s and disappear, leaving the
+    # operator with no idea why ``last_raw`` stays empty.
+    _scrollback: list[str] = []
+    _scrollback_max = 12
+
+    def _flush_into_scrollback() -> None:
+        for line in runtime.state.get("log_lines") or []:
+            _scrollback.append(line)
+        # Trim to the cap so the panel doesn't grow unbounded.
+        if len(_scrollback) > _scrollback_max:
+            del _scrollback[: len(_scrollback) - _scrollback_max]
+
+    runtime._flush_logs = _flush_into_scrollback  # type: ignore[method-assign]
+
+    redraw = _make_state_panel_renderer(
+        runtime, mode_label="autonomous", scrollback=_scrollback
+    )
    redraw()
    print(
        "  [autonomous] type interjections / '?' questions on stdin, "
@@ -843,6 +864,7 @@ def _make_state_panel_renderer(
    runtime: Any,
    *,
    mode_label: str,
+    scrollback: list[str] | None = None,
 ) -> Callable[[list[str] | None], None]:
    """Return a closure that prints the task/subtask/plan/memory panel.

@@ -899,12 +921,18 @@ def _make_state_panel_renderer(
        raw_subtask = st.get("last_subtask_raw")
        sub_rep = int(st.get("subtask_repeat_count") or 0)
        sub_gib = int(st.get("subtask_gibberish_count") or 0)
-        if raw_subtask is not None or sub_rep or sub_gib:
+        sub_empty = int(st.get("subtask_empty_count") or 0)
+        if raw_subtask is not None or sub_rep or sub_gib or sub_empty:
            raw_display = (raw_subtask or "(empty)")[:80]
-            color = "yellow" if (sub_rep >= 3 or sub_gib >= 3) else "dim"
+            color = (
+                "yellow"
+                if (sub_rep >= 3 or sub_gib >= 3 or sub_empty >= 3)
+                else "dim"
+            )
            console.print(
                f"  [{color}]subtask diag    repeat:{sub_rep}  "
-                f"gibberish:{sub_gib}  last_raw: {raw_display!r}[/]"
+                f"gibberish:{sub_gib}  empty:{sub_empty}  "
+                f"last_raw: {raw_display!r}[/]"
            )

        # Same diagnostics for memory and plan when available.
@@ -915,6 +943,13 @@ def _make_state_panel_renderer(
                f"  [dim]gen rejects     memory:{mem_gib}  plan:{plan_gib}[/]"
            )
        console.rule(style="cyan")
+        # Runtime scrollback — log lines pushed from generation steps
+        # (warnings, gibberish rejections, plan/say speech, vqa
+        # answers). Last N lines, oldest first.
+        if scrollback:
+            for line in scrollback:
+                console.print(f"  [magenta]{line.rstrip()}[/]")
+            console.rule(style="cyan")
        if robot_lines:
            for line in robot_lines:
                console.print(f"  [magenta]{line.strip()}[/]")