From f65f3f7a4a8bc2eb405d692ed297b9f9a3828e20 Mon Sep 17 00:00:00 2001 From: Reece O'Mahoney <66252930+reeceomahoney@users.noreply.github.com> Date: Tue, 26 May 2026 13:01:19 +0100 Subject: [PATCH 1/5] Fix policy.path in YAML configs (PR #3145 followup) (#3597) PR #3145 added YAML support for policy.path but left two bugs: 1. extract_path_fields_from_config only deleted config_data[field] when no sibling overrides existed. With siblings, the dict stayed in place and draccus crashed decoding it as PreTrainedConfig (no 'type' key). Sibling overrides go into _config_yaml_overrides and are applied later by from_pretrained(), so the field can always be removed. 2. wrap() updated config_path_cli to the cleaned temp file path but never propagated it to the draccus.parse fallback branch. cli_args still contained --config_path=, so draccus read the original YAML with path: still present. Tests passed because they (a) called extract_path_fields_from_config directly and (b) included type: alongside path: in the YAML, sidestepping both bugs. Co-authored-by: Steven Palma --- src/lerobot/configs/parser.py | 11 +++- tests/test_yaml_policy_path.py | 116 +++++++++++++++++++++++++++++++-- 2 files changed, 117 insertions(+), 10 deletions(-) diff --git a/src/lerobot/configs/parser.py b/src/lerobot/configs/parser.py index d55fa44aa..46cff2b48 100644 --- a/src/lerobot/configs/parser.py +++ b/src/lerobot/configs/parser.py @@ -255,8 +255,7 @@ def extract_path_fields_from_config(config_path: str, path_fields: list[str]) -> remaining = config_data[field] if remaining: _config_yaml_overrides[field] = _flatten_to_cli_args(remaining) - else: - del config_data[field] + del config_data[field] modified = True if not modified: @@ -311,7 +310,13 @@ def wrap(config_path: Path | None = None) -> Callable[[F], F]: cli_args = filter_arg("config_path", cli_args) cfg = argtype.from_pretrained(config_path_cli, cli_args=cli_args) else: - cfg = draccus.parse(config_class=argtype, config_path=config_path, args=cli_args) + if config_path_cli: + cli_args = filter_arg("config_path", cli_args) + cfg = draccus.parse( + config_class=argtype, + config_path=config_path_cli or config_path, + args=cli_args, + ) response = fn(cfg, *args, **kwargs) return response diff --git a/tests/test_yaml_policy_path.py b/tests/test_yaml_policy_path.py index 710a71c9a..8d8f7f2ec 100644 --- a/tests/test_yaml_policy_path.py +++ b/tests/test_yaml_policy_path.py @@ -1,10 +1,14 @@ """Tests for policy.path support in YAML config files (issue #2957).""" import json +import sys import tempfile +from dataclasses import dataclass, field +from unittest.mock import patch import yaml +from lerobot.configs import parser from lerobot.configs.parser import ( _config_path_args, _config_yaml_overrides, @@ -16,7 +20,8 @@ from lerobot.configs.parser import ( def test_extract_path_fields_from_yaml(): - """Test that policy.path is extracted from a YAML config and removed.""" + """Test that policy.path is extracted from a YAML config and the policy block + is removed entirely (siblings are captured separately as cli_overrides).""" config = { "dataset": {"repo_id": "lerobot/pusht"}, "policy": {"type": "smolvla", "path": "lerobot/smolvla_base", "push_to_hub": False}, @@ -26,26 +31,33 @@ def test_extract_path_fields_from_yaml(): config_path = f.name _config_path_args.clear() + _config_yaml_overrides.clear() cleaned_path = extract_path_fields_from_config(config_path, ["policy"]) # Path should be extracted and stored assert _config_path_args["policy"] == "lerobot/smolvla_base" - # Cleaned config should not have the path field + # Cleaned config should not have the policy block at all -- draccus must not + # try to decode it as PreTrainedConfig; the actual config comes from + # from_pretrained(path) with the captured overrides applied on top. with open(cleaned_path) as f: cleaned = yaml.safe_load(f) - assert "path" not in cleaned["policy"] - assert cleaned["policy"]["type"] == "smolvla" - assert cleaned["policy"]["push_to_hub"] is False + assert "policy" not in cleaned # Original dataset should be untouched assert cleaned["dataset"]["repo_id"] == "lerobot/pusht" + # Sibling overrides (excluding type/path) captured for from_pretrained. + overrides = get_yaml_overrides("policy") + assert any("push_to_hub=false" in o for o in overrides) + _config_path_args.clear() + _config_yaml_overrides.clear() def test_extract_path_fields_from_json(): - """Test that policy.path is extracted from a JSON config.""" + """Test that policy.path is extracted from a JSON config and the policy + block is removed entirely.""" config = { "policy": {"type": "act", "path": "some/local/path"}, } @@ -54,15 +66,17 @@ def test_extract_path_fields_from_json(): config_path = f.name _config_path_args.clear() + _config_yaml_overrides.clear() cleaned_path = extract_path_fields_from_config(config_path, ["policy"]) assert _config_path_args["policy"] == "some/local/path" with open(cleaned_path) as f: cleaned = json.load(f) - assert "path" not in cleaned["policy"] + assert "policy" not in cleaned _config_path_args.clear() + _config_yaml_overrides.clear() def test_extract_no_path_returns_original(): @@ -216,3 +230,91 @@ def test_flatten_nested_with_bools(): args = _flatten_to_cli_args(d) assert "--optimizer.use_warmup=true" in args assert "--optimizer.lr=0.01" in args + + +def test_extract_removes_field_with_siblings_and_no_type(): + """Regression: when policy.path has siblings but no type:, the entire policy + block must still be removed from the cleaned config. Otherwise draccus tries + to decode the leftover dict as PreTrainedConfig and crashes on the missing + type discriminator. + """ + config = { + "dataset": {"repo_id": "lerobot/pusht"}, + "policy": { + "path": "lerobot/smolvla_base", + "n_action_steps": 10, + "dtype": "bfloat16", + }, + } + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(config, f) + config_path = f.name + + _config_path_args.clear() + _config_yaml_overrides.clear() + cleaned_path = extract_path_fields_from_config(config_path, ["policy"]) + + with open(cleaned_path) as f: + cleaned = yaml.safe_load(f) or {} + assert "policy" not in cleaned, "policy block should be fully removed when path is present" + assert cleaned["dataset"]["repo_id"] == "lerobot/pusht" + assert _config_path_args["policy"] == "lerobot/smolvla_base" + overrides = get_yaml_overrides("policy") + assert any("n_action_steps=10" in o for o in overrides) + assert any("dtype=bfloat16" in o for o in overrides) + + _config_path_args.clear() + _config_yaml_overrides.clear() + + +@dataclass +class _DummyNested: + foo: int = 0 + + +@dataclass +class _DummyConfig: + nested: _DummyNested = field(default_factory=_DummyNested) + other: str = "default" + + @classmethod + def __get_path_fields__(cls): + return ["nested"] + + +def test_wrap_uses_cleaned_config_for_draccus_parse(): + """Regression: wrap() updates config_path_cli to point at the cleaned temp + file but must propagate that to the draccus.parse fallback branch. Without + the fix, cli_args still contains --config_path= and draccus reads + the original YAML with `path:` still in it, crashing on the unknown field. + """ + config = { + "nested": {"path": "some/checkpoint", "foo": 42}, + "other": "set-via-yaml", + } + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(config, f) + config_path = f.name + + _config_path_args.clear() + _config_yaml_overrides.clear() + + captured: dict = {} + + @parser.wrap() + def main(cfg: _DummyConfig) -> _DummyConfig: + captured["cfg"] = cfg + return cfg + + with patch.object(sys, "argv", ["prog", f"--config_path={config_path}"]): + main() + + assert captured["cfg"].other == "set-via-yaml" + assert _config_path_args["nested"] == "some/checkpoint" + # Cleaned config dropped `nested:` entirely; defaults stand for this wrapper + # class (a real PreTrainedConfig would now load the checkpoint and apply + # the captured yaml_overrides via from_pretrained()). + assert captured["cfg"].nested.foo == 0 + + _config_path_args.clear() + _config_yaml_overrides.clear() From 5c98e80430d4a747926b45893568e388105a2400 Mon Sep 17 00:00:00 2001 From: Haoming Song Date: Tue, 26 May 2026 20:04:22 +0800 Subject: [PATCH 2/5] fix(gr00t): fix Eagle25VL model and processor crash in transformers>=5.4.0, <5.6.0 (#3652) Co-authored-by: Steven Palma --- .../policies/groot/eagle2_hg_model/modeling_eagle2_5_vl.py | 1 + .../groot/eagle2_hg_model/processing_eagle2_5_vl.py | 1 - src/lerobot/policies/groot/processor_groot.py | 6 +++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lerobot/policies/groot/eagle2_hg_model/modeling_eagle2_5_vl.py b/src/lerobot/policies/groot/eagle2_hg_model/modeling_eagle2_5_vl.py index 5a66cfbce..6e5532ea4 100755 --- a/src/lerobot/policies/groot/eagle2_hg_model/modeling_eagle2_5_vl.py +++ b/src/lerobot/policies/groot/eagle2_hg_model/modeling_eagle2_5_vl.py @@ -60,6 +60,7 @@ class Eagle25VLPreTrainedModel(PreTrainedModel): "SiglipEncoderLayer", ] _skip_keys_device_placement = "past_key_values" + _supports_flash_attn = True _supports_flash_attn_2 = True _supports_cache_class = True _supports_static_cache = True diff --git a/src/lerobot/policies/groot/eagle2_hg_model/processing_eagle2_5_vl.py b/src/lerobot/policies/groot/eagle2_hg_model/processing_eagle2_5_vl.py index 7b1f67fef..b36e70c47 100755 --- a/src/lerobot/policies/groot/eagle2_hg_model/processing_eagle2_5_vl.py +++ b/src/lerobot/policies/groot/eagle2_hg_model/processing_eagle2_5_vl.py @@ -124,7 +124,6 @@ class Eagle25VLProcessor(ProcessorMixin): "videos_kwargs", "text_kwargs", ] - image_processor_class = "AutoImageProcessor" tokenizer_class = "AutoTokenizer" def __init__( diff --git a/src/lerobot/policies/groot/processor_groot.py b/src/lerobot/policies/groot/processor_groot.py index 3367de711..6848c7c84 100644 --- a/src/lerobot/policies/groot/processor_groot.py +++ b/src/lerobot/policies/groot/processor_groot.py @@ -206,7 +206,11 @@ def _build_eagle_processor(tokenizer_assets_repo: str = DEFAULT_TOKENIZER_ASSETS "Vendor files are copied during model creation. Create the policy/model first, " "or call ensure_eagle_cache_ready() before building processors." ) - proc = AutoProcessor.from_pretrained(str(cache_dir), trust_remote_code=True, use_fast=True) + proc = AutoProcessor.from_pretrained( + str(cache_dir), + trust_remote_code=True, + fix_mistral_regex=False, + ) proc.tokenizer.padding_side = "left" return proc From e86f5af5bf30d7cd442d07b862b3fbb82f5c79b2 Mon Sep 17 00:00:00 2001 From: Khalil Meftah Date: Wed, 27 May 2026 14:24:31 +0200 Subject: [PATCH 3/5] feat(rewards): add TOPReward reward model (#3629) * feat(rewards): add TOPReward reward model * refactor(rewards): clean up TOPReward processor/model * fix(rewards/topreward): add missing input keys mm_token_type_ids * fix(rewards/topreward): fix pyproject extra typo and simplify processor (#3653) Add lerobot[topreward] extra to all in pyproject.toml, drop the redundant labels arg in scoring, and collapse the dead-branch shape check in the encoder processor. * optmize topreward input processing (#3660) --------- Co-authored-by: Cole <91766445+jcoleharrison@users.noreply.github.com> Co-authored-by: Haoming Song --- docs/source/_toctree.yml | 2 + docs/source/topreward.mdx | 177 +++++++++ pyproject.toml | 2 + src/lerobot/rewards/__init__.py | 2 + src/lerobot/rewards/factory.py | 19 +- src/lerobot/rewards/topreward/__init__.py | 19 + .../rewards/topreward/compute_rabc_weights.py | 353 ++++++++++++++++++ .../topreward/configuration_topreward.py | 146 ++++++++ .../rewards/topreward/modeling_topreward.py | 238 ++++++++++++ .../rewards/topreward/processor_topreward.py | 305 +++++++++++++++ .../lerobot_rewardmodel_modelcard_template.md | 2 + tests/rewards/test_modeling_topreward.py | 296 +++++++++++++++ tests/rewards/test_topreward.py | 80 ++++ tests/rewards/test_topreward_processor.py | 246 ++++++++++++ uv.lock | 7 +- 15 files changed, 1891 insertions(+), 3 deletions(-) create mode 100644 docs/source/topreward.mdx create mode 100644 src/lerobot/rewards/topreward/__init__.py create mode 100644 src/lerobot/rewards/topreward/compute_rabc_weights.py create mode 100644 src/lerobot/rewards/topreward/configuration_topreward.py create mode 100644 src/lerobot/rewards/topreward/modeling_topreward.py create mode 100644 src/lerobot/rewards/topreward/processor_topreward.py create mode 100644 tests/rewards/test_modeling_topreward.py create mode 100644 tests/rewards/test_topreward.py create mode 100644 tests/rewards/test_topreward_processor.py diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 412386e2d..527cb7e63 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -73,6 +73,8 @@ - sections: - local: sarm title: SARM + - local: topreward + title: TOPReward title: "Reward Models" - sections: - local: inference diff --git a/docs/source/topreward.mdx b/docs/source/topreward.mdx new file mode 100644 index 000000000..f84fbed49 --- /dev/null +++ b/docs/source/topreward.mdx @@ -0,0 +1,177 @@ +# TOPReward + +TOPReward is a **zero-shot reward model** that extracts token log-probabilities from an off-the-shelf vision-language model (VLM) as a robotic reward signal. Given a video trajectory and a task instruction, it returns the VLM's log-likelihood that the instruction is true — no fine-tuning required. + +**Paper**: [TOPReward: Token Probabilities as Hidden Zero-Shot Rewards for Robotics](https://arxiv.org/abs/2602.19313) +**Project**: [topreward.github.io](https://topreward.github.io/webpage/) +**Original code**: [github.com/TOPReward/TOPReward](https://github.com/TOPReward/TOPReward) +**Default backbone**: [Qwen/Qwen3-VL-8B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-8B-Instruct) + +## Overview + +TOPReward asks a generic VLM how likely a task instruction is, **conditioned on the video** of a robot trying to complete that task. Concretely, given: + +- A trajectory video (a sequence of frames). +- A task instruction (e.g. _"open the drawer"_). + +it builds a chat prompt of the form + +```text +