feat(processor): multiple improvements to the pipeline porting (#1749)

* [Port codebase pipeline] General fixes for RL and scripts (#1748) * Refactor dataset configuration in documentation and codebase - Updated dataset configuration keys from `dataset_root` to `root` and `num_episodes` to `num_episodes_to_record` for consistency. - Adjusted replay episode handling by renaming `episode` to `replay_episode`. - Enhanced documentation - added specific processor to transform from policy actions to delta actions * Added Robot action to tensor processor Added new processor script for dealing with gym specific action processing * removed RobotAction2Tensor processor; imrpoved choosing observations in actor * nit in delta action * added missing reset functions to kinematics * Adapt teleoperate and replay to pipeline similar to record * refactor(processors): move to inheritance (#1750) * fix(teleoperator): improvements phone implementation (#1752) * fix(teleoperator): protect shared state in phone implementation * refactor(teleop): separate classes in phone * fix: solve breaking changes (#1753) * refactor(policies): multiple improvements (#1754) * refactor(processor): simpler logic in device processor (#1755) * refactor(processor): euclidean distance in delta action processor (#1757) * refactor(processor): improvements to joint observations processor migration (#1758) * refactor(processor): improvements to tokenizer migration (#1759) * refactor(processor): improvements to tokenizer migration * fix(tests): tokenizer tests regression from #1750 * fix(processors): fix float comparison and config in hil processors (#1760) * chore(teleop): remove unnecessary callbacks in KeyboardEndEffectorTeleop (#1761) * refactor(processor): improvements normalize pipeline migration (#1756) * refactor(processor): several improvements normalize processor step * refactor(processor): more improvements normalize processor * refactor(processor): more changes to normalizer * refactor(processor): take a different approach to DRY * refactor(processor): final design * chore(record): revert comment and continue deleted (#1764) * refactor(examples): pipeline phone examples (#1769) * refactor(examples): phone teleop + teleop script * refactor(examples): phone replay + replay * chore(examples): rename phone example files & folders * feat(processor): fix improvements to the pipeline porting (#1796) * refactor(processor): enhance tensor device handling in normalization process (#1795) * refactor(tests): remove unsupported device detection test for complementary data (#1797) * chore(tests): update ToBatchProcessor test (#1798) * refactor(tests): remove in-place mutation tests for actions and complementary data in batch processor * test(tests): add tests for action and task processing in batch processor * add names for android and ios phone (#1799) * use _tensor_stats in normalize processor (#1800) * fix(normalize_processor): correct device reference for tensor epsilon handling (#1801) * add point 5 add missing feature contracts (#1806) * Fix PR comments 1452 (#1807) * use key to determine image * Address rest of PR comments * use PolicyFeatures in transform_features --------- Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> --------- Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com>
2026-06-01 03:11:29 +00:00 · 2025-08-31 20:38:52 +02:00
parent 35c5d43255
commit ce665160ae
55 changed files with 1549 additions and 2024 deletions
--- a/src/lerobot/processor/tokenizer_processor.py
+++ b/src/lerobot/processor/tokenizer_processor.py
@@ -11,7 +11,12 @@ import torch

 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.constants import OBS_LANGUAGE
-from lerobot.processor.pipeline import EnvTransition, ProcessorStepRegistry, TransitionKey
+from lerobot.processor.pipeline import (
+    EnvTransition,
+    ObservationProcessor,
+    ProcessorStepRegistry,
+    TransitionKey,
+)
 from lerobot.utils.import_utils import _transformers_available

 if TYPE_CHECKING or _transformers_available:
@@ -22,7 +27,7 @@ else:

@dataclass
@ProcessorStepRegistry.register(name="tokenizer_processor")
-class TokenizerProcessor:
+class TokenizerProcessor(ObservationProcessor):
    """Tokenizes text tasks in complementary data using a huggingface tokenizer.

    This processor handles tokenization of task strings found in the complementary_data
@@ -118,7 +123,7 @@ class TokenizerProcessor:

        return None

-    def __call__(self, transition: EnvTransition) -> EnvTransition:
+    def observation(self, observation):
        """Process the transition by tokenizing the task text.

        Args:
@@ -130,15 +135,15 @@ class TokenizerProcessor:
        Raises:
            ValueError: If tokenizer initialization failed.
        """
-        task = self.get_task(transition)
+        task = self.get_task(self.transition)
        if task is None:
-            return transition
+            return observation

        # Tokenize the task (creates CPU tensors)
        tokenized_prompt = self._tokenize_text(task)

        # Detect device from existing tensors in the transition
-        target_device = self._detect_device(transition)
+        target_device = self._detect_device(self.transition)

        # Move tokenized tensors to match the device of other data
        if target_device is not None:
@@ -148,20 +153,15 @@ class TokenizerProcessor:
            }

        # Get or create observation dict
-        observation = transition.get(TransitionKey.OBSERVATION)
-        if observation is None:
-            observation = {}
-        else:
-            observation = dict(observation)  # Make a copy
+        new_observation = dict(observation)

        # Add tokenized data to observation
-        observation[f"{OBS_LANGUAGE}.tokens"] = tokenized_prompt["input_ids"]
-        observation[f"{OBS_LANGUAGE}.attention_mask"] = tokenized_prompt["attention_mask"].to(
+        new_observation[f"{OBS_LANGUAGE}.tokens"] = tokenized_prompt["input_ids"]
+        new_observation[f"{OBS_LANGUAGE}.attention_mask"] = tokenized_prompt["attention_mask"].to(
            dtype=torch.bool
        )

-        transition[TransitionKey.OBSERVATION.value] = observation  # type: ignore[misc]
-        return transition
+        return new_observation

    def _detect_device(self, transition: EnvTransition) -> torch.device | None:
        """Detect device from existing tensors in the transition.
@@ -187,19 +187,6 @@ class TokenizerProcessor:
        if isinstance(action, torch.Tensor):
            return action.device

-        # Check other tensor fields
-        for key in [TransitionKey.REWARD, TransitionKey.DONE, TransitionKey.TRUNCATED]:
-            value = transition.get(key)
-            if isinstance(value, torch.Tensor):
-                return value.device
-
-        # Check complementary data for tensors
-        complementary_data = transition.get(TransitionKey.COMPLEMENTARY_DATA)
-        if complementary_data:
-            for value in complementary_data.values():
-                if isinstance(value, torch.Tensor):
-                    return value.device
-
        return None  # No tensors found, keep on CPU

    def _tokenize_text(self, text: str | list[str]) -> dict[str, torch.Tensor]:
@@ -235,23 +222,12 @@ class TokenizerProcessor:
        }

        # Only include tokenizer_name if it was used (not when tokenizer object was provided)
-        if self.tokenizer_name is not None:
+        # TODO(steven): Consider saving the name of the _tokenizer if it was loaded
+        if self.tokenizer_name is not None and self.tokenizer is None:
            config["tokenizer_name"] = self.tokenizer_name

        return config

-    def state_dict(self) -> dict[str, torch.Tensor]:
-        """Return state dictionary (empty for this processor)."""
-        return {}
-
-    def load_state_dict(self, state: dict[str, torch.Tensor]) -> None:
-        """Load state dictionary (no-op for this processor)."""
-        pass
-
-    def reset(self) -> None:
-        """Reset processor state (no-op for this processor)."""
-        pass
-
    def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
        """Add tokenized task features to the feature contract.