From fbcac95662ba8aa353d23f3ced3c1eb8aae5ca0e Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 12 May 2026 17:42:13 +0200
Subject: [PATCH] feat(smolvla2-runtime): scrollback in autonomous panel +
 empty-gen counter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two improvements for diagnosing why ``last_raw`` stays empty:

1. The autonomous panel-redraw thread calls console.clear() every
   0.5 s, wiping any log lines the runtime printed since the last
   redraw. So warnings from generation (``[warn] subtask gen failed:
   ...``, ``[info] subtask gen rejected (gibberish): ...``) flashed
   for milliseconds and disappeared, leaving the operator blind.

   Capture log_lines from each tick into a bounded scrollback
   (last 12 entries) and render them inside the panel itself, below
   the diag row. They now stick across redraws until rotated out.

2. ``empty`` counter for subtask gen. Persistent empty completions
   are their own failure mode — the LM head EOS-es immediately from
   the chat-template generation prompt, distinct from "generated
   something but filter rejected it". The diag row now reads:

     subtask diag    repeat:0  gibberish:0  empty:14  last_raw: '(empty)'
                                            ^^^^^^^
   plus a periodic log line every 10 empties so the cause is also
   surfaced in the scrollback.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../policies/smolvla2/inference/steps.py      | 15 +++++++
 .../scripts/lerobot_smolvla2_runtime.py       | 43 +++++++++++++++++--
 2 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/src/lerobot/policies/smolvla2/inference/steps.py b/src/lerobot/policies/smolvla2/inference/steps.py
index 8ffe66cd0..513ce714c 100644
--- a/src/lerobot/policies/smolvla2/inference/steps.py
+++ b/src/lerobot/policies/smolvla2/inference/steps.py
@@ -374,6 +374,21 @@ class HighLevelSubtaskFwd(InferenceStep):
         # subtask N times in a row" or "gibberish_count rising while
         # current_subtask is stuck". The state panel renders these.
         state["last_subtask_raw"] = msg or ""
+        # Persistent empty completion is its own failure mode (model
+        # immediately EOS-es from the chat-template generation
+        # prompt) — surface it once every N occurrences so the
+        # operator can distinguish "generation failing silently"
+        # from "generating fine but filter rejecting".
+        if not msg:
+            empties = state.get("subtask_empty_count", 0) + 1
+            state["subtask_empty_count"] = empties
+            if empties == 1 or empties % 10 == 0:
+                push_log(
+                    state,
+                    f"  [info] subtask gen returned empty (×{empties}) — "
+                    "model EOS-ing immediately or generation raised "
+                    "(check stderr / -v for traceback).",
+                )
         if msg and _looks_like_gibberish(msg):
             # Bump a counter so the operator can see the model is
             # struggling without spamming the log every tick. A first
diff --git a/src/lerobot/scripts/lerobot_smolvla2_runtime.py b/src/lerobot/scripts/lerobot_smolvla2_runtime.py
index 8055a5aed..d7c567b50 100644
--- a/src/lerobot/scripts/lerobot_smolvla2_runtime.py
+++ b/src/lerobot/scripts/lerobot_smolvla2_runtime.py
@@ -738,7 +738,28 @@ def _run_autonomous(
     )
     thread.start()
 
-    redraw = _make_state_panel_renderer(runtime, mode_label="autonomous")
+    # Capture log lines flushed by the runtime each tick into a
+    # bounded scrollback that the panel renderer prints inside the
+    # rule block. Without this, ``runtime._flush_logs`` just calls
+    # ``print(...)`` which the 2 Hz panel redraw clears immediately —
+    # so failure messages from generation (e.g. ``[warn] subtask gen
+    # failed: ...``) flash for ≤ 0.5 s and disappear, leaving the
+    # operator with no idea why ``last_raw`` stays empty.
+    _scrollback: list[str] = []
+    _scrollback_max = 12
+
+    def _flush_into_scrollback() -> None:
+        for line in runtime.state.get("log_lines") or []:
+            _scrollback.append(line)
+        # Trim to the cap so the panel doesn't grow unbounded.
+        if len(_scrollback) > _scrollback_max:
+            del _scrollback[: len(_scrollback) - _scrollback_max]
+
+    runtime._flush_logs = _flush_into_scrollback  # type: ignore[method-assign]
+
+    redraw = _make_state_panel_renderer(
+        runtime, mode_label="autonomous", scrollback=_scrollback
+    )
     redraw()
     print(
         "  [autonomous] type interjections / '?' questions on stdin, "
@@ -843,6 +864,7 @@ def _make_state_panel_renderer(
     runtime: Any,
     *,
     mode_label: str,
+    scrollback: list[str] | None = None,
 ) -> Callable[[list[str] | None], None]:
     """Return a closure that prints the task/subtask/plan/memory panel.
 
@@ -899,12 +921,18 @@ def _make_state_panel_renderer(
         raw_subtask = st.get("last_subtask_raw")
         sub_rep = int(st.get("subtask_repeat_count") or 0)
         sub_gib = int(st.get("subtask_gibberish_count") or 0)
-        if raw_subtask is not None or sub_rep or sub_gib:
+        sub_empty = int(st.get("subtask_empty_count") or 0)
+        if raw_subtask is not None or sub_rep or sub_gib or sub_empty:
             raw_display = (raw_subtask or "(empty)")[:80]
-            color = "yellow" if (sub_rep >= 3 or sub_gib >= 3) else "dim"
+            color = (
+                "yellow"
+                if (sub_rep >= 3 or sub_gib >= 3 or sub_empty >= 3)
+                else "dim"
+            )
             console.print(
                 f"  [{color}]subtask diag    repeat:{sub_rep}  "
-                f"gibberish:{sub_gib}  last_raw: {raw_display!r}[/]"
+                f"gibberish:{sub_gib}  empty:{sub_empty}  "
+                f"last_raw: {raw_display!r}[/]"
             )
 
         # Same diagnostics for memory and plan when available.
@@ -915,6 +943,13 @@ def _make_state_panel_renderer(
                 f"  [dim]gen rejects     memory:{mem_gib}  plan:{plan_gib}[/]"
             )
         console.rule(style="cyan")
+        # Runtime scrollback — log lines pushed from generation steps
+        # (warnings, gibberish rejections, plan/say speech, vqa
+        # answers). Last N lines, oldest first.
+        if scrollback:
+            for line in scrollback:
+                console.print(f"  [magenta]{line.rstrip()}[/]")
+            console.rule(style="cyan")
         if robot_lines:
             for line in robot_lines:
                 console.print(f"  [magenta]{line.strip()}[/]")