diff --git a/src/lerobot/processor/pipeline.py b/src/lerobot/processor/pipeline.py index ea08e2d1a..8fcbeca91 100644 --- a/src/lerobot/processor/pipeline.py +++ b/src/lerobot/processor/pipeline.py @@ -448,7 +448,12 @@ class RobotProcessor(ModelHubMixin): for key, tensor in state.items(): cloned_state[key] = tensor.clone() - state_filename = f"step_{step_index}.safetensors" + # Use registry name for more meaningful filenames when available + if registry_name: + state_filename = f"{registry_name}.safetensors" + else: + state_filename = f"step_{step_index}.safetensors" + save_file(cloned_state, os.path.join(destination_path, state_filename)) step_entry["state_file"] = state_filename @@ -707,23 +712,37 @@ class RobotProcessor(ModelHubMixin): for fn in self.reset_hooks: fn() - def profile_steps(self, transition: EnvTransition, num_runs: int = 100) -> dict[str, float]: + def profile_steps( + self, transition: EnvTransition, num_runs: int = 100, warmup_runs: int = 5 + ) -> dict[str, float]: """Profile the execution time of each step for performance optimization.""" + import copy import time profile_results = {} + # Make a copy to avoid altering the original transition + transition_copy = copy.deepcopy(transition) + + # Get intermediate transitions for each step using step_through + intermediate_transitions = list(self.step_through(transition_copy)) + for idx, processor_step in enumerate(self.steps): step_name = f"step_{idx}_{processor_step.__class__.__name__}" - # Warm up - for _ in range(5): - _ = processor_step(transition) + # Use the appropriate input transition for this step + input_transition = intermediate_transitions[idx] - # Time the step + # Warm up - copy transition for each run to ensure consistent conditions + for _ in range(warmup_runs): + transition_copy = copy.deepcopy(input_transition) + _ = processor_step(transition_copy) + + # Time the step - copy transition for each run to ensure consistent conditions start_time = time.perf_counter() for _ in range(num_runs): - transition = processor_step(transition) + transition_copy = copy.deepcopy(input_transition) + _ = processor_step(transition_copy) end_time = time.perf_counter() avg_time = (end_time - start_time) / num_runs * 1000 # Convert to milliseconds