mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-04 12:51:27 +00:00
60 lines
2.4 KiB
Python
60 lines
2.4 KiB
Python
#!/usr/bin/env python
|
|
|
|
# Copyright 2025 HuggingFace Inc. team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from lerobot.configs import PipelineFeatureType, PolicyFeature
|
|
|
|
from .pipeline import ComplementaryDataProcessorStep, ProcessorStepRegistry
|
|
|
|
|
|
# NOTE: The registry name "smolvla_new_line_processor" is kept for backward compatibility
|
|
# with serialized processor configs that reference this name.
|
|
@ProcessorStepRegistry.register(name="smolvla_new_line_processor")
|
|
class NewLineTaskProcessorStep(ComplementaryDataProcessorStep):
|
|
"""
|
|
A processor step that ensures the 'task' description ends with a newline character.
|
|
|
|
This step is necessary for certain tokenizers (e.g., PaliGemma) that expect a
|
|
newline at the end of the prompt. It handles both single string tasks and lists
|
|
of string tasks.
|
|
"""
|
|
|
|
def complementary_data(self, complementary_data):
|
|
if "task" not in complementary_data:
|
|
return complementary_data
|
|
|
|
task = complementary_data["task"]
|
|
if task is None:
|
|
return complementary_data
|
|
|
|
new_complementary_data = dict(complementary_data)
|
|
|
|
# Handle both string and list of strings
|
|
if isinstance(task, str):
|
|
# Single string: add newline if not present
|
|
if not task.endswith("\n"):
|
|
new_complementary_data["task"] = f"{task}\n"
|
|
elif isinstance(task, list) and all(isinstance(t, str) for t in task):
|
|
# List of strings: add newline to each if not present
|
|
new_complementary_data["task"] = [t if t.endswith("\n") else f"{t}\n" for t in task]
|
|
# If task is neither string nor list of strings, leave unchanged
|
|
|
|
return new_complementary_data
|
|
|
|
def transform_features(
|
|
self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
|
|
) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
|
|
return features
|