chore(processor): rename RobotProcessor -> DataProcessorPipeline (#1850)

2026-06-01 03:11:29 +00:00 · 2025-09-03 17:13:16 +02:00
parent 4ebe482a7e
commit 8c796b39f5
38 changed files with 326 additions and 298 deletions
--- a/tests/processor/test_tokenizer_processor.py
+++ b/tests/processor/test_tokenizer_processor.py
@@ -10,7 +10,7 @@ import torch

 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.constants import OBS_LANGUAGE
-from lerobot.processor import RobotProcessor, TokenizerProcessor, TransitionKey
+from lerobot.processor import DataProcessorPipeline, TokenizerProcessor, TransitionKey
 from tests.utils import require_package


@@ -388,7 +388,9 @@ def test_integration_with_robot_processor(mock_auto_tokenizer):
    mock_auto_tokenizer.from_pretrained.return_value = mock_tokenizer

    tokenizer_processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=6)
-    robot_processor = RobotProcessor([tokenizer_processor], to_transition=lambda x: x, to_output=lambda x: x)
+    robot_processor = DataProcessorPipeline(
+        [tokenizer_processor], to_transition=lambda x: x, to_output=lambda x: x
+    )

    transition = create_transition(
        observation={"state": torch.tensor([1.0, 2.0])},
@@ -426,14 +428,16 @@ def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer):
        tokenizer_name="test-tokenizer", max_length=32, task_key="instruction"
    )

-    robot_processor = RobotProcessor([original_processor], to_transition=lambda x: x, to_output=lambda x: x)
+    robot_processor = DataProcessorPipeline(
+        [original_processor], to_transition=lambda x: x, to_output=lambda x: x
+    )

    with tempfile.TemporaryDirectory() as temp_dir:
        # Save processor
        robot_processor.save_pretrained(temp_dir)

        # Load processor - tokenizer will be recreated from saved config
-        loaded_processor = RobotProcessor.from_pretrained(
+        loaded_processor = DataProcessorPipeline.from_pretrained(
            temp_dir, to_transition=lambda x: x, to_output=lambda x: x
        )

@@ -457,14 +461,16 @@ def test_save_and_load_pretrained_with_tokenizer_object():

    original_processor = TokenizerProcessor(tokenizer=mock_tokenizer, max_length=32, task_key="instruction")

-    robot_processor = RobotProcessor([original_processor], to_transition=lambda x: x, to_output=lambda x: x)
+    robot_processor = DataProcessorPipeline(
+        [original_processor], to_transition=lambda x: x, to_output=lambda x: x
+    )

    with tempfile.TemporaryDirectory() as temp_dir:
        # Save processor
        robot_processor.save_pretrained(temp_dir)

        # Load processor with tokenizer override (since tokenizer object wasn't saved)
-        loaded_processor = RobotProcessor.from_pretrained(
+        loaded_processor = DataProcessorPipeline.from_pretrained(
            temp_dir,
            overrides={"tokenizer_processor": {"tokenizer": mock_tokenizer}},
            to_transition=lambda x: x,
@@ -956,7 +962,7 @@ def test_integration_with_device_processor(mock_auto_tokenizer):
    # Create pipeline with TokenizerProcessor then DeviceProcessor
    tokenizer_processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=6)
    device_processor = DeviceProcessor(device="cuda:0")
-    robot_processor = RobotProcessor(
+    robot_processor = DataProcessorPipeline(
        [tokenizer_processor, device_processor], to_transition=lambda x: x, to_output=lambda x: x
    )