From ff3cbaa872367d53fc2591d521159adb736ea960 Mon Sep 17 00:00:00 2001 From: Adil Zouitine Date: Wed, 3 Sep 2025 18:20:12 +0200 Subject: [PATCH] refactor(processor): rename internal tokenizer variable for clarity (#1855) - Changed the internal tokenizer variable name from `_tokenizer` to `input_tokenizer` for improved readability and consistency. - Updated references throughout the class to reflect the new variable name. --- src/lerobot/processor/tokenizer_processor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lerobot/processor/tokenizer_processor.py b/src/lerobot/processor/tokenizer_processor.py index 0f54644b4..54ca17098 100644 --- a/src/lerobot/processor/tokenizer_processor.py +++ b/src/lerobot/processor/tokenizer_processor.py @@ -69,7 +69,7 @@ class TokenizerProcessorStep(ObservationProcessorStep): truncation: bool = True # Internal tokenizer instance (not serialized) - _tokenizer: Any = field(default=None, init=False, repr=False) + input_tokenizer: Any = field(default=None, init=False, repr=False) def __post_init__(self): """Initialize the tokenizer from the provided tokenizer or tokenizer name.""" @@ -81,11 +81,11 @@ class TokenizerProcessorStep(ObservationProcessorStep): if self.tokenizer is not None: # Use provided tokenizer object directly - self._tokenizer = self.tokenizer + self.input_tokenizer = self.tokenizer elif self.tokenizer_name is not None: if AutoTokenizer is None: raise ImportError("AutoTokenizer is not available") - self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) + self.input_tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) else: raise ValueError( "Either 'tokenizer' or 'tokenizer_name' must be provided. " @@ -193,7 +193,7 @@ class TokenizerProcessorStep(ObservationProcessorStep): Returns: Dictionary containing tokenized output with keys like 'input_ids', 'attention_mask'. """ - return self._tokenizer( + return self.input_tokenizer( text, max_length=self.max_length, truncation=self.truncation,