Address review: split persistent/event schemas, drop event timestamps

- recipe.py: derive _VALID_ROLES/_VALID_STREAMS from MessageRole/MessageStream Literals - dataset_metadata.py: keep CODEBASE_VERSION at v3.0 - language.py: remove RESERVED_STYLES; split arrow/feature schemas into persistent (with timestamp) and event (without timestamp); add docstrings - language_render.py: events use frame-row timestamp implicitly; no per-event timestamp filtering or sorting - converters.py: drop unused subtask_key passthrough - add docstrings to new public APIs (recipe, render_messages_processor, collate) - update tests for split schemas; revert uv.lock Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 20:01:25 +00:00 · 2026-04-27 13:38:23 +02:00
parent 8833d735a1
commit 2b71221194
10 changed files with 210 additions and 60 deletions
--- a/src/lerobot/processor/render_messages_processor.py
+++ b/src/lerobot/processor/render_messages_processor.py
@@ -31,10 +31,19 @@ from .pipeline import ProcessorStep, ProcessorStepRegistry
@dataclass
@ProcessorStepRegistry.register(name="render_messages_processor")
 class RenderMessagesStep(ProcessorStep):
+    """Processor step that turns raw language columns into rendered chat messages.
+
+    Reads ``language_persistent`` and ``language_events`` from the transition's
+    complementary data, renders them through ``recipe`` at the sample timestamp,
+    and replaces the raw columns with the resulting ``messages`` /
+    ``message_streams`` / ``target_message_indices`` keys.
+    """
+
    recipe: TrainingRecipe
    dataset_ctx: Any | None = None

    def __call__(self, transition: EnvTransition) -> EnvTransition | None:
+        """Render messages for a single transition; return ``None`` to drop it."""
        complementary_data = transition.get(TransitionKey.COMPLEMENTARY_DATA) or {}
        persistent = complementary_data.get(LANGUAGE_PERSISTENT) or []
        events = complementary_data.get(LANGUAGE_EVENTS) or []