mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-31 10:51:35 +00:00
feat(smolvla2-runtime): overfit/memorisation diagnostics on the panel
The autonomous-mode panel now surfaces what the model is *actually*
producing at every chunk boundary, not just what got accepted:
* last_subtask_raw most recent generation (accepted or not)
* subtask_repeat_count times the same accepted string regenerated
* subtask_gibberish_count rejections by the gibberish filter
* memory_gibberish_count / plan_gibberish_count for the other heads
These let the operator see memorisation collapse without scrolling
back through logs:
subtask diag repeat:8 gibberish:0 last_raw: '<same string>'
^^^^^^^^^^ → model can't move past current phase
subtask diag repeat:0 gibberish:14 last_raw: 'Ass:::'
^^^^^^^^^^^^^^^^^^^^^^ → LM collapsed to template salad
Also silences the per-action ``Relative goal position magnitude had
to be clamped`` warning. The clamp fires every dispatch tick when the
model emits stale joint targets, flooding the panel at ctrl_hz=30.
Replaced the bare ``logging.warning`` call in robots/utils.py with a
module logger so it can be selectively raised to ERROR. Operators
who need the per-tick clamp detail can use ``-v``.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -368,6 +368,12 @@ class HighLevelSubtaskFwd(InferenceStep):
|
||||
msg = _generate_with_policy(
|
||||
self.policy, ctx, observation=observation, state=state, label="subtask gen"
|
||||
)
|
||||
# Diagnostics: surface what the model is *actually* producing
|
||||
# at chunk boundaries, even when the output gets rejected or
|
||||
# repeats. Memorisation collapse looks like "same accepted
|
||||
# subtask N times in a row" or "gibberish_count rising while
|
||||
# current_subtask is stuck". The state panel renders these.
|
||||
state["last_subtask_raw"] = msg or ""
|
||||
if msg and _looks_like_gibberish(msg):
|
||||
# Bump a counter so the operator can see the model is
|
||||
# struggling without spamming the log every tick. A first
|
||||
@@ -385,6 +391,17 @@ class HighLevelSubtaskFwd(InferenceStep):
|
||||
if changed:
|
||||
# Subtask change is a downstream trigger.
|
||||
state.setdefault("events_this_tick", []).append("subtask_change")
|
||||
state["subtask_repeat_count"] = 0
|
||||
else:
|
||||
# Same accepted string regenerated — memorisation tell.
|
||||
# Once this counter climbs past a few, you're seeing
|
||||
# the model unable to move past the current subtask
|
||||
# despite the chunk having drained (visual scene may
|
||||
# have changed but the LM is replaying training
|
||||
# tokens).
|
||||
state["subtask_repeat_count"] = (
|
||||
state.get("subtask_repeat_count", 0) + 1
|
||||
)
|
||||
# Silently skip empty completions — common when the model
|
||||
# warms up or generates only EOS; logging it every tick at
|
||||
# ctrl_hz is just noise.
|
||||
@@ -417,8 +434,14 @@ class MemoryUpdateFwd(InferenceStep):
|
||||
new_memory = _generate_with_policy(
|
||||
self.policy, ctx, observation=observation, state=state, label="memory gen"
|
||||
)
|
||||
state["last_memory_raw"] = new_memory or ""
|
||||
if new_memory and _looks_like_gibberish(new_memory):
|
||||
push_log(state, f" [info] memory gen rejected (gibberish): {new_memory[:60]!r}")
|
||||
count = state.get("memory_gibberish_count", 0) + 1
|
||||
state["memory_gibberish_count"] = count
|
||||
push_log(
|
||||
state,
|
||||
f" [info] memory gen rejected (gibberish ×{count}): {new_memory[:60]!r}",
|
||||
)
|
||||
return None
|
||||
if new_memory:
|
||||
set_if_changed(state, "current_memory", new_memory, label="memory")
|
||||
@@ -456,7 +479,12 @@ class UserInterjectionFwd(InferenceStep):
|
||||
# re-trigger by typing again.
|
||||
return None
|
||||
if _looks_like_gibberish(out):
|
||||
push_log(state, f" [info] plan/say gen rejected (gibberish): {out[:60]!r}")
|
||||
count = state.get("plan_gibberish_count", 0) + 1
|
||||
state["plan_gibberish_count"] = count
|
||||
push_log(
|
||||
state,
|
||||
f" [info] plan/say gen rejected (gibberish ×{count}): {out[:60]!r}",
|
||||
)
|
||||
return None
|
||||
# Heuristic split: model is trained to emit one assistant turn
|
||||
# carrying both plan text AND a `say` tool call. Look for a
|
||||
|
||||
@@ -21,6 +21,8 @@ from lerobot.utils.import_utils import make_device_from_device_class
|
||||
from .config import RobotConfig
|
||||
from .robot import Robot
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def make_robot_from_config(config: RobotConfig) -> Robot:
|
||||
# TODO(Steven): Consider just using the make_device_from_device_class for all types
|
||||
@@ -110,7 +112,7 @@ def ensure_safe_goal_position(
|
||||
}
|
||||
|
||||
if warnings_dict:
|
||||
logging.warning(
|
||||
logger.warning(
|
||||
"Relative goal position magnitude had to be clamped to be safe.\n"
|
||||
f"{pformat(warnings_dict, indent=4)}"
|
||||
)
|
||||
|
||||
@@ -883,6 +883,37 @@ def _make_state_panel_renderer(
|
||||
f"dispatched: {dispatched} "
|
||||
f"pending tool calls: {pending}[/]"
|
||||
)
|
||||
|
||||
# Overfit / memorisation diagnostics. The high-level steps
|
||||
# surface the raw generation each time they fire (even when
|
||||
# rejected as gibberish or unchanged), plus repeat/rejection
|
||||
# counters. Rule of thumb:
|
||||
#
|
||||
# * subtask repeat ≥ ~5 and queue_len cycles fully → model
|
||||
# can't move past current subtask (memorised one phase
|
||||
# of the task — classic overfit signature)
|
||||
# * subtask gibberish climbing → LM head collapsed to
|
||||
# chat-template fragments / one-token salads
|
||||
# * last raw differs from accepted → at least the LM is
|
||||
# varying, the gibberish filter is doing its job
|
||||
raw_subtask = st.get("last_subtask_raw")
|
||||
sub_rep = int(st.get("subtask_repeat_count") or 0)
|
||||
sub_gib = int(st.get("subtask_gibberish_count") or 0)
|
||||
if raw_subtask is not None or sub_rep or sub_gib:
|
||||
raw_display = (raw_subtask or "(empty)")[:80]
|
||||
color = "yellow" if (sub_rep >= 3 or sub_gib >= 3) else "dim"
|
||||
console.print(
|
||||
f" [{color}]subtask diag repeat:{sub_rep} "
|
||||
f"gibberish:{sub_gib} last_raw: {raw_display!r}[/]"
|
||||
)
|
||||
|
||||
# Same diagnostics for memory and plan when available.
|
||||
mem_gib = int(st.get("memory_gibberish_count") or 0)
|
||||
plan_gib = int(st.get("plan_gibberish_count") or 0)
|
||||
if mem_gib or plan_gib:
|
||||
console.print(
|
||||
f" [dim]gen rejects memory:{mem_gib} plan:{plan_gib}[/]"
|
||||
)
|
||||
console.rule(style="cyan")
|
||||
if robot_lines:
|
||||
for line in robot_lines:
|
||||
@@ -939,6 +970,16 @@ def _silence_noisy_loggers() -> None:
|
||||
):
|
||||
logging.getLogger(name).setLevel(logging.WARNING)
|
||||
|
||||
# The robot's relative-goal-position clamp warning fires *every*
|
||||
# dispatch tick on a memorised model — the LM is trying to jump
|
||||
# the wrist far past where max_relative_target allows, so the
|
||||
# warning floods the panel at ~30 Hz. Promote it from WARNING to
|
||||
# DEBUG: the dispatch counter on the panel already tells the
|
||||
# operator the loop is running, and the panel itself shows
|
||||
# whether motion is happening. If anyone needs the per-action
|
||||
# clamp detail, ``-v`` puts it back via DEBUG.
|
||||
logging.getLogger("lerobot.robots.utils").setLevel(logging.ERROR)
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
args = _parse_args(argv)
|
||||
|
||||
Reference in New Issue
Block a user