#!/usr/bin/env python # Copyright 2025 HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from lerobot.configs import PipelineFeatureType, PolicyFeature from .pipeline import ComplementaryDataProcessorStep, ProcessorStepRegistry # NOTE: The registry name "smolvla_new_line_processor" is kept for backward compatibility # with serialized processor configs that reference this name. @ProcessorStepRegistry.register(name="smolvla_new_line_processor") class NewLineTaskProcessorStep(ComplementaryDataProcessorStep): """ A processor step that ensures the 'task' description ends with a newline character. This step is necessary for certain tokenizers (e.g., PaliGemma) that expect a newline at the end of the prompt. It handles both single string tasks and lists of string tasks. """ def complementary_data(self, complementary_data): if "task" not in complementary_data: return complementary_data task = complementary_data["task"] if task is None: return complementary_data new_complementary_data = dict(complementary_data) # Handle both string and list of strings if isinstance(task, str): # Single string: add newline if not present if not task.endswith("\n"): new_complementary_data["task"] = f"{task}\n" elif isinstance(task, list) and all(isinstance(t, str) for t in task): # List of strings: add newline to each if not present new_complementary_data["task"] = [t if t.endswith("\n") else f"{t}\n" for t in task] # If task is neither string nor list of strings, leave unchanged return new_complementary_data def transform_features( self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: return features