chore(processor): rename RobotProcessor -> DataProcessorPipeline (#1850)

This commit is contained in:
Steven Palma
2025-09-03 17:13:16 +02:00
committed by GitHub
parent 4ebe482a7e
commit 8c796b39f5
38 changed files with 326 additions and 298 deletions

View File

@@ -10,7 +10,7 @@ import torch
from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.constants import OBS_LANGUAGE
from lerobot.processor import RobotProcessor, TokenizerProcessor, TransitionKey
from lerobot.processor import DataProcessorPipeline, TokenizerProcessor, TransitionKey
from tests.utils import require_package
@@ -388,7 +388,9 @@ def test_integration_with_robot_processor(mock_auto_tokenizer):
mock_auto_tokenizer.from_pretrained.return_value = mock_tokenizer
tokenizer_processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=6)
robot_processor = RobotProcessor([tokenizer_processor], to_transition=lambda x: x, to_output=lambda x: x)
robot_processor = DataProcessorPipeline(
[tokenizer_processor], to_transition=lambda x: x, to_output=lambda x: x
)
transition = create_transition(
observation={"state": torch.tensor([1.0, 2.0])},
@@ -426,14 +428,16 @@ def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer):
tokenizer_name="test-tokenizer", max_length=32, task_key="instruction"
)
robot_processor = RobotProcessor([original_processor], to_transition=lambda x: x, to_output=lambda x: x)
robot_processor = DataProcessorPipeline(
[original_processor], to_transition=lambda x: x, to_output=lambda x: x
)
with tempfile.TemporaryDirectory() as temp_dir:
# Save processor
robot_processor.save_pretrained(temp_dir)
# Load processor - tokenizer will be recreated from saved config
loaded_processor = RobotProcessor.from_pretrained(
loaded_processor = DataProcessorPipeline.from_pretrained(
temp_dir, to_transition=lambda x: x, to_output=lambda x: x
)
@@ -457,14 +461,16 @@ def test_save_and_load_pretrained_with_tokenizer_object():
original_processor = TokenizerProcessor(tokenizer=mock_tokenizer, max_length=32, task_key="instruction")
robot_processor = RobotProcessor([original_processor], to_transition=lambda x: x, to_output=lambda x: x)
robot_processor = DataProcessorPipeline(
[original_processor], to_transition=lambda x: x, to_output=lambda x: x
)
with tempfile.TemporaryDirectory() as temp_dir:
# Save processor
robot_processor.save_pretrained(temp_dir)
# Load processor with tokenizer override (since tokenizer object wasn't saved)
loaded_processor = RobotProcessor.from_pretrained(
loaded_processor = DataProcessorPipeline.from_pretrained(
temp_dir,
overrides={"tokenizer_processor": {"tokenizer": mock_tokenizer}},
to_transition=lambda x: x,
@@ -956,7 +962,7 @@ def test_integration_with_device_processor(mock_auto_tokenizer):
# Create pipeline with TokenizerProcessor then DeviceProcessor
tokenizer_processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=6)
device_processor = DeviceProcessor(device="cuda:0")
robot_processor = RobotProcessor(
robot_processor = DataProcessorPipeline(
[tokenizer_processor, device_processor], to_transition=lambda x: x, to_output=lambda x: x
)