Package folder structure (#1417)

* Move files * Replace imports & paths * Update relative paths * Update doc symlinks * Update instructions paths * Fix imports * Update grpc files * Update more instructions * Downgrade grpc-tools * Update manifest * Update more paths * Update config paths * Update CI paths * Update bandit exclusions * Remove walkthrough section
2026-06-04 04:41:24 +00:00 · 2025-07-01 16:34:46 +02:00
parent 483be9aac2
commit d4ee470b00
268 changed files with 862 additions and 890 deletions
--- a/src/lerobot/utils/benchmark.py
+++ b/src/lerobot/utils/benchmark.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import threading
+import time
+from contextlib import ContextDecorator
+
+
+class TimeBenchmark(ContextDecorator):
+    """
+    Measures execution time using a context manager or decorator.
+
+    This class supports both context manager and decorator usage, and is thread-safe for multithreaded
+    environments.
+
+    Args:
+        print: If True, prints the elapsed time upon exiting the context or completing the function. Defaults
+        to False.
+
+    Examples:
+
+        Using as a context manager:
+
+        >>> benchmark = TimeBenchmark()
+        >>> with benchmark:
+        ...     time.sleep(1)
+        >>> print(f"Block took {benchmark.result:.4f} seconds")
+        Block took approximately 1.0000 seconds
+
+        Using with multithreading:
+
+        ```python
+        import threading
+
+        benchmark = TimeBenchmark()
+
+        def context_manager_example():
+            with benchmark:
+                time.sleep(0.01)
+            print(f"Block took {benchmark.result_ms:.2f} milliseconds")
+
+        threads = []
+        for _ in range(3):
+            t1 = threading.Thread(target=context_manager_example)
+            threads.append(t1)
+
+        for t in threads:
+            t.start()
+
+        for t in threads:
+            t.join()
+        ```
+        Expected output:
+        Block took approximately 10.00 milliseconds
+        Block took approximately 10.00 milliseconds
+        Block took approximately 10.00 milliseconds
+    """
+
+    def __init__(self, print=False):
+        self.local = threading.local()
+        self.print_time = print
+
+    def __enter__(self):
+        self.local.start_time = time.perf_counter()
+        return self
+
+    def __exit__(self, *exc):
+        self.local.end_time = time.perf_counter()
+        self.local.elapsed_time = self.local.end_time - self.local.start_time
+        if self.print_time:
+            print(f"Elapsed time: {self.local.elapsed_time:.4f} seconds")
+        return False
+
+    @property
+    def result(self):
+        return getattr(self.local, "elapsed_time", None)
+
+    @property
+    def result_ms(self):
+        return self.result * 1e3
--- a/src/lerobot/utils/buffer.py
+++ b/src/lerobot/utils/buffer.py
@@ -0,0 +1,841 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+from contextlib import suppress
+from typing import Callable, Sequence, TypedDict
+
+import torch
+import torch.nn.functional as F  # noqa: N812
+from tqdm import tqdm
+
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.utils.transition import Transition
+
+
+class BatchTransition(TypedDict):
+    state: dict[str, torch.Tensor]
+    action: torch.Tensor
+    reward: torch.Tensor
+    next_state: dict[str, torch.Tensor]
+    done: torch.Tensor
+    truncated: torch.Tensor
+    complementary_info: dict[str, torch.Tensor | float | int] | None = None
+
+
+def random_crop_vectorized(images: torch.Tensor, output_size: tuple) -> torch.Tensor:
+    """
+    Perform a per-image random crop over a batch of images in a vectorized way.
+    (Same as shown previously.)
+    """
+    B, C, H, W = images.shape  # noqa: N806
+    crop_h, crop_w = output_size
+
+    if crop_h > H or crop_w > W:
+        raise ValueError(
+            f"Requested crop size ({crop_h}, {crop_w}) is bigger than the image size ({H}, {W})."
+        )
+
+    tops = torch.randint(0, H - crop_h + 1, (B,), device=images.device)
+    lefts = torch.randint(0, W - crop_w + 1, (B,), device=images.device)
+
+    rows = torch.arange(crop_h, device=images.device).unsqueeze(0) + tops.unsqueeze(1)
+    cols = torch.arange(crop_w, device=images.device).unsqueeze(0) + lefts.unsqueeze(1)
+
+    rows = rows.unsqueeze(2).expand(-1, -1, crop_w)  # (B, crop_h, crop_w)
+    cols = cols.unsqueeze(1).expand(-1, crop_h, -1)  # (B, crop_h, crop_w)
+
+    images_hwcn = images.permute(0, 2, 3, 1)  # (B, H, W, C)
+
+    # Gather pixels
+    cropped_hwcn = images_hwcn[torch.arange(B, device=images.device).view(B, 1, 1), rows, cols, :]
+    # cropped_hwcn => (B, crop_h, crop_w, C)
+
+    cropped = cropped_hwcn.permute(0, 3, 1, 2)  # (B, C, crop_h, crop_w)
+    return cropped
+
+
+def random_shift(images: torch.Tensor, pad: int = 4):
+    """Vectorized random shift, imgs: (B,C,H,W), pad: #pixels"""
+    _, _, h, w = images.shape
+    images = F.pad(input=images, pad=(pad, pad, pad, pad), mode="replicate")
+    return random_crop_vectorized(images=images, output_size=(h, w))
+
+
+class ReplayBuffer:
+    def __init__(
+        self,
+        capacity: int,
+        device: str = "cuda:0",
+        state_keys: Sequence[str] | None = None,
+        image_augmentation_function: Callable | None = None,
+        use_drq: bool = True,
+        storage_device: str = "cpu",
+        optimize_memory: bool = False,
+    ):
+        """
+        Replay buffer for storing transitions.
+        It will allocate tensors on the specified device, when the first transition is added.
+        NOTE: If you encounter memory issues, you can try to use the `optimize_memory` flag to save memory or
+        and use the `storage_device` flag to store the buffer on a different device.
+        Args:
+            capacity (int): Maximum number of transitions to store in the buffer.
+            device (str): The device where the tensors will be moved when sampling ("cuda:0" or "cpu").
+            state_keys (List[str]): The list of keys that appear in `state` and `next_state`.
+            image_augmentation_function (Optional[Callable]): A function that takes a batch of images
+                and returns a batch of augmented images. If None, a default augmentation function is used.
+            use_drq (bool): Whether to use the default DRQ image augmentation style, when sampling in the buffer.
+            storage_device: The device (e.g. "cpu" or "cuda:0") where the data will be stored.
+                Using "cpu" can help save GPU memory.
+            optimize_memory (bool): If True, optimizes memory by not storing duplicate next_states when
+                they can be derived from states. This is useful for large datasets where next_state[i] = state[i+1].
+        """
+        if capacity <= 0:
+            raise ValueError("Capacity must be greater than 0.")
+
+        self.capacity = capacity
+        self.device = device
+        self.storage_device = storage_device
+        self.position = 0
+        self.size = 0
+        self.initialized = False
+        self.optimize_memory = optimize_memory
+
+        # Track episode boundaries for memory optimization
+        self.episode_ends = torch.zeros(capacity, dtype=torch.bool, device=storage_device)
+
+        # If no state_keys provided, default to an empty list
+        self.state_keys = state_keys if state_keys is not None else []
+
+        self.image_augmentation_function = image_augmentation_function
+
+        if image_augmentation_function is None:
+            base_function = functools.partial(random_shift, pad=4)
+            self.image_augmentation_function = torch.compile(base_function)
+        self.use_drq = use_drq
+
+    def _initialize_storage(
+        self,
+        state: dict[str, torch.Tensor],
+        action: torch.Tensor,
+        complementary_info: dict[str, torch.Tensor] | None = None,
+    ):
+        """Initialize the storage tensors based on the first transition."""
+        # Determine shapes from the first transition
+        state_shapes = {key: val.squeeze(0).shape for key, val in state.items()}
+        action_shape = action.squeeze(0).shape
+
+        # Pre-allocate tensors for storage
+        self.states = {
+            key: torch.empty((self.capacity, *shape), device=self.storage_device)
+            for key, shape in state_shapes.items()
+        }
+        self.actions = torch.empty((self.capacity, *action_shape), device=self.storage_device)
+        self.rewards = torch.empty((self.capacity,), device=self.storage_device)
+
+        if not self.optimize_memory:
+            # Standard approach: store states and next_states separately
+            self.next_states = {
+                key: torch.empty((self.capacity, *shape), device=self.storage_device)
+                for key, shape in state_shapes.items()
+            }
+        else:
+            # Memory-optimized approach: don't allocate next_states buffer
+            # Just create a reference to states for consistent API
+            self.next_states = self.states  # Just a reference for API consistency
+
+        self.dones = torch.empty((self.capacity,), dtype=torch.bool, device=self.storage_device)
+        self.truncateds = torch.empty((self.capacity,), dtype=torch.bool, device=self.storage_device)
+
+        # Initialize storage for complementary_info
+        self.has_complementary_info = complementary_info is not None
+        self.complementary_info_keys = []
+        self.complementary_info = {}
+
+        if self.has_complementary_info:
+            self.complementary_info_keys = list(complementary_info.keys())
+            # Pre-allocate tensors for each key in complementary_info
+            for key, value in complementary_info.items():
+                if isinstance(value, torch.Tensor):
+                    value_shape = value.squeeze(0).shape
+                    self.complementary_info[key] = torch.empty(
+                        (self.capacity, *value_shape), device=self.storage_device
+                    )
+                elif isinstance(value, (int, float)):
+                    # Handle scalar values similar to reward
+                    self.complementary_info[key] = torch.empty((self.capacity,), device=self.storage_device)
+                else:
+                    raise ValueError(f"Unsupported type {type(value)} for complementary_info[{key}]")
+
+        self.initialized = True
+
+    def __len__(self):
+        return self.size
+
+    def add(
+        self,
+        state: dict[str, torch.Tensor],
+        action: torch.Tensor,
+        reward: float,
+        next_state: dict[str, torch.Tensor],
+        done: bool,
+        truncated: bool,
+        complementary_info: dict[str, torch.Tensor] | None = None,
+    ):
+        """Saves a transition, ensuring tensors are stored on the designated storage device."""
+        # Initialize storage if this is the first transition
+        if not self.initialized:
+            self._initialize_storage(state=state, action=action, complementary_info=complementary_info)
+
+        # Store the transition in pre-allocated tensors
+        for key in self.states:
+            self.states[key][self.position].copy_(state[key].squeeze(dim=0))
+
+            if not self.optimize_memory:
+                # Only store next_states if not optimizing memory
+                self.next_states[key][self.position].copy_(next_state[key].squeeze(dim=0))
+
+        self.actions[self.position].copy_(action.squeeze(dim=0))
+        self.rewards[self.position] = reward
+        self.dones[self.position] = done
+        self.truncateds[self.position] = truncated
+
+        # Handle complementary_info if provided and storage is initialized
+        if complementary_info is not None and self.has_complementary_info:
+            # Store the complementary_info
+            for key in self.complementary_info_keys:
+                if key in complementary_info:
+                    value = complementary_info[key]
+                    if isinstance(value, torch.Tensor):
+                        self.complementary_info[key][self.position].copy_(value.squeeze(dim=0))
+                    elif isinstance(value, (int, float)):
+                        self.complementary_info[key][self.position] = value
+
+        self.position = (self.position + 1) % self.capacity
+        self.size = min(self.size + 1, self.capacity)
+
+    def sample(self, batch_size: int) -> BatchTransition:
+        """Sample a random batch of transitions and collate them into batched tensors."""
+        if not self.initialized:
+            raise RuntimeError("Cannot sample from an empty buffer. Add transitions first.")
+
+        batch_size = min(batch_size, self.size)
+        high = max(0, self.size - 1) if self.optimize_memory and self.size < self.capacity else self.size
+
+        # Random indices for sampling - create on the same device as storage
+        idx = torch.randint(low=0, high=high, size=(batch_size,), device=self.storage_device)
+
+        # Identify image keys that need augmentation
+        image_keys = [k for k in self.states if k.startswith("observation.image")] if self.use_drq else []
+
+        # Create batched state and next_state
+        batch_state = {}
+        batch_next_state = {}
+
+        # First pass: load all state tensors to target device
+        for key in self.states:
+            batch_state[key] = self.states[key][idx].to(self.device)
+
+            if not self.optimize_memory:
+                # Standard approach - load next_states directly
+                batch_next_state[key] = self.next_states[key][idx].to(self.device)
+            else:
+                # Memory-optimized approach - get next_state from the next index
+                next_idx = (idx + 1) % self.capacity
+                batch_next_state[key] = self.states[key][next_idx].to(self.device)
+
+        # Apply image augmentation in a batched way if needed
+        if self.use_drq and image_keys:
+            # Concatenate all images from state and next_state
+            all_images = []
+            for key in image_keys:
+                all_images.append(batch_state[key])
+                all_images.append(batch_next_state[key])
+
+            # Optimization: Batch all images and apply augmentation once
+            all_images_tensor = torch.cat(all_images, dim=0)
+            augmented_images = self.image_augmentation_function(all_images_tensor)
+
+            # Split the augmented images back to their sources
+            for i, key in enumerate(image_keys):
+                # Calculate offsets for the current image key:
+                # For each key, we have 2*batch_size images (batch_size for states, batch_size for next_states)
+                # States start at index i*2*batch_size and take up batch_size slots
+                batch_state[key] = augmented_images[i * 2 * batch_size : (i * 2 + 1) * batch_size]
+                # Next states start after the states at index (i*2+1)*batch_size and also take up batch_size slots
+                batch_next_state[key] = augmented_images[(i * 2 + 1) * batch_size : (i + 1) * 2 * batch_size]
+
+        # Sample other tensors
+        batch_actions = self.actions[idx].to(self.device)
+        batch_rewards = self.rewards[idx].to(self.device)
+        batch_dones = self.dones[idx].to(self.device).float()
+        batch_truncateds = self.truncateds[idx].to(self.device).float()
+
+        # Sample complementary_info if available
+        batch_complementary_info = None
+        if self.has_complementary_info:
+            batch_complementary_info = {}
+            for key in self.complementary_info_keys:
+                batch_complementary_info[key] = self.complementary_info[key][idx].to(self.device)
+
+        return BatchTransition(
+            state=batch_state,
+            action=batch_actions,
+            reward=batch_rewards,
+            next_state=batch_next_state,
+            done=batch_dones,
+            truncated=batch_truncateds,
+            complementary_info=batch_complementary_info,
+        )
+
+    def get_iterator(
+        self,
+        batch_size: int,
+        async_prefetch: bool = True,
+        queue_size: int = 2,
+    ):
+        """
+        Creates an infinite iterator that yields batches of transitions.
+        Will automatically restart when internal iterator is exhausted.
+
+        Args:
+            batch_size (int): Size of batches to sample
+            async_prefetch (bool): Whether to use asynchronous prefetching with threads (default: True)
+            queue_size (int): Number of batches to prefetch (default: 2)
+
+        Yields:
+            BatchTransition: Batched transitions
+        """
+        while True:  # Create an infinite loop
+            if async_prefetch:
+                # Get the standard iterator
+                iterator = self._get_async_iterator(queue_size=queue_size, batch_size=batch_size)
+            else:
+                iterator = self._get_naive_iterator(batch_size=batch_size, queue_size=queue_size)
+
+            # Yield all items from the iterator
+            with suppress(StopIteration):
+                yield from iterator
+
+    def _get_async_iterator(self, batch_size: int, queue_size: int = 2):
+        """
+        Create an iterator that continuously yields prefetched batches in a
+        background thread. The design is intentionally simple and avoids busy
+        waiting / complex state management.
+
+        Args:
+            batch_size (int): Size of batches to sample.
+            queue_size (int): Maximum number of prefetched batches to keep in
+                memory.
+
+        Yields:
+            BatchTransition: A batch sampled from the replay buffer.
+        """
+        import queue
+        import threading
+
+        data_queue: queue.Queue = queue.Queue(maxsize=queue_size)
+        shutdown_event = threading.Event()
+
+        def producer() -> None:
+            """Continuously put sampled batches into the queue until shutdown."""
+            while not shutdown_event.is_set():
+                try:
+                    batch = self.sample(batch_size)
+                    # The timeout ensures the thread unblocks if the queue is full
+                    # and the shutdown event gets set meanwhile.
+                    data_queue.put(batch, block=True, timeout=0.5)
+                except queue.Full:
+                    # Queue is full – loop again (will re-check shutdown_event)
+                    continue
+                except Exception:
+                    # Surface any unexpected error and terminate the producer.
+                    shutdown_event.set()
+
+        producer_thread = threading.Thread(target=producer, daemon=True)
+        producer_thread.start()
+
+        try:
+            while not shutdown_event.is_set():
+                try:
+                    yield data_queue.get(block=True)
+                except Exception:
+                    # If the producer already set the shutdown flag we exit.
+                    if shutdown_event.is_set():
+                        break
+        finally:
+            shutdown_event.set()
+            # Drain the queue quickly to help the thread exit if it's blocked on `put`.
+            while not data_queue.empty():
+                _ = data_queue.get_nowait()
+            # Give the producer thread a bit of time to finish.
+            producer_thread.join(timeout=1.0)
+
+    def _get_naive_iterator(self, batch_size: int, queue_size: int = 2):
+        """
+        Creates a simple non-threaded iterator that yields batches.
+
+        Args:
+            batch_size (int): Size of batches to sample
+            queue_size (int): Number of initial batches to prefetch
+
+        Yields:
+            BatchTransition: Batch transitions
+        """
+        import collections
+
+        queue = collections.deque()
+
+        def enqueue(n):
+            for _ in range(n):
+                data = self.sample(batch_size)
+                queue.append(data)
+
+        enqueue(queue_size)
+        while queue:
+            yield queue.popleft()
+            enqueue(1)
+
+    @classmethod
+    def from_lerobot_dataset(
+        cls,
+        lerobot_dataset: LeRobotDataset,
+        device: str = "cuda:0",
+        state_keys: Sequence[str] | None = None,
+        capacity: int | None = None,
+        image_augmentation_function: Callable | None = None,
+        use_drq: bool = True,
+        storage_device: str = "cpu",
+        optimize_memory: bool = False,
+    ) -> "ReplayBuffer":
+        """
+        Convert a LeRobotDataset into a ReplayBuffer.
+
+        Args:
+            lerobot_dataset (LeRobotDataset): The dataset to convert.
+            device (str): The device for sampling tensors. Defaults to "cuda:0".
+            state_keys (Sequence[str] | None): The list of keys that appear in `state` and `next_state`.
+            capacity (int | None): Buffer capacity. If None, uses dataset length.
+            action_mask (Sequence[int] | None): Indices of action dimensions to keep.
+            image_augmentation_function (Callable | None): Function for image augmentation.
+                If None, uses default random shift with pad=4.
+            use_drq (bool): Whether to use DrQ image augmentation when sampling.
+            storage_device (str): Device for storing tensor data. Using "cpu" saves GPU memory.
+            optimize_memory (bool): If True, reduces memory usage by not duplicating state data.
+
+        Returns:
+            ReplayBuffer: The replay buffer with dataset transitions.
+        """
+        if capacity is None:
+            capacity = len(lerobot_dataset)
+
+        if capacity < len(lerobot_dataset):
+            raise ValueError(
+                "The capacity of the ReplayBuffer must be greater than or equal to the length of the LeRobotDataset."
+            )
+
+        # Create replay buffer with image augmentation and DrQ settings
+        replay_buffer = cls(
+            capacity=capacity,
+            device=device,
+            state_keys=state_keys,
+            image_augmentation_function=image_augmentation_function,
+            use_drq=use_drq,
+            storage_device=storage_device,
+            optimize_memory=optimize_memory,
+        )
+
+        # Convert dataset to transitions
+        list_transition = cls._lerobotdataset_to_transitions(dataset=lerobot_dataset, state_keys=state_keys)
+
+        # Initialize the buffer with the first transition to set up storage tensors
+        if list_transition:
+            first_transition = list_transition[0]
+            first_state = {k: v.to(device) for k, v in first_transition["state"].items()}
+            first_action = first_transition["action"].to(device)
+
+            # Get complementary info if available
+            first_complementary_info = None
+            if (
+                "complementary_info" in first_transition
+                and first_transition["complementary_info"] is not None
+            ):
+                first_complementary_info = {
+                    k: v.to(device) for k, v in first_transition["complementary_info"].items()
+                }
+
+            replay_buffer._initialize_storage(
+                state=first_state, action=first_action, complementary_info=first_complementary_info
+            )
+
+        # Fill the buffer with all transitions
+        for data in list_transition:
+            for k, v in data.items():
+                if isinstance(v, dict):
+                    for key, tensor in v.items():
+                        v[key] = tensor.to(storage_device)
+                elif isinstance(v, torch.Tensor):
+                    data[k] = v.to(storage_device)
+
+            action = data["action"]
+
+            replay_buffer.add(
+                state=data["state"],
+                action=action,
+                reward=data["reward"],
+                next_state=data["next_state"],
+                done=data["done"],
+                truncated=False,  # NOTE: Truncation are not supported yet in lerobot dataset
+                complementary_info=data.get("complementary_info", None),
+            )
+
+        return replay_buffer
+
+    def to_lerobot_dataset(
+        self,
+        repo_id: str,
+        fps=1,
+        root=None,
+        task_name="from_replay_buffer",
+    ) -> LeRobotDataset:
+        """
+        Converts all transitions in this ReplayBuffer into a single LeRobotDataset object.
+        """
+        if self.size == 0:
+            raise ValueError("The replay buffer is empty. Cannot convert to a dataset.")
+
+        # Create features dictionary for the dataset
+        features = {
+            "index": {"dtype": "int64", "shape": [1]},  # global index across episodes
+            "episode_index": {"dtype": "int64", "shape": [1]},  # which episode
+            "frame_index": {"dtype": "int64", "shape": [1]},  # index inside an episode
+            "timestamp": {"dtype": "float32", "shape": [1]},  # for now we store dummy
+            "task_index": {"dtype": "int64", "shape": [1]},
+        }
+
+        # Add "action"
+        sample_action = self.actions[0]
+        act_info = guess_feature_info(t=sample_action, name="action")
+        features["action"] = act_info
+
+        # Add "reward" and "done"
+        features["next.reward"] = {"dtype": "float32", "shape": (1,)}
+        features["next.done"] = {"dtype": "bool", "shape": (1,)}
+
+        # Add state keys
+        for key in self.states:
+            sample_val = self.states[key][0]
+            f_info = guess_feature_info(t=sample_val, name=key)
+            features[key] = f_info
+
+        # Add complementary_info keys if available
+        if self.has_complementary_info:
+            for key in self.complementary_info_keys:
+                sample_val = self.complementary_info[key][0]
+                if isinstance(sample_val, torch.Tensor) and sample_val.ndim == 0:
+                    sample_val = sample_val.unsqueeze(0)
+                f_info = guess_feature_info(t=sample_val, name=f"complementary_info.{key}")
+                features[f"complementary_info.{key}"] = f_info
+
+        # Create an empty LeRobotDataset
+        lerobot_dataset = LeRobotDataset.create(
+            repo_id=repo_id,
+            fps=fps,
+            root=root,
+            robot_type=None,
+            features=features,
+            use_videos=True,
+        )
+
+        # Start writing images if needed
+        lerobot_dataset.start_image_writer(num_processes=0, num_threads=3)
+
+        # Convert transitions into episodes and frames
+        episode_index = 0
+        lerobot_dataset.episode_buffer = lerobot_dataset.create_episode_buffer(episode_index=episode_index)
+
+        frame_idx_in_episode = 0
+        for idx in range(self.size):
+            actual_idx = (self.position - self.size + idx) % self.capacity
+
+            frame_dict = {}
+
+            # Fill the data for state keys
+            for key in self.states:
+                frame_dict[key] = self.states[key][actual_idx].cpu()
+
+            # Fill action, reward, done
+            frame_dict["action"] = self.actions[actual_idx].cpu()
+            frame_dict["next.reward"] = torch.tensor([self.rewards[actual_idx]], dtype=torch.float32).cpu()
+            frame_dict["next.done"] = torch.tensor([self.dones[actual_idx]], dtype=torch.bool).cpu()
+
+            # Add complementary_info if available
+            if self.has_complementary_info:
+                for key in self.complementary_info_keys:
+                    val = self.complementary_info[key][actual_idx]
+                    # Convert tensors to CPU
+                    if isinstance(val, torch.Tensor):
+                        if val.ndim == 0:
+                            val = val.unsqueeze(0)
+                        frame_dict[f"complementary_info.{key}"] = val.cpu()
+                    # Non-tensor values can be used directly
+                    else:
+                        frame_dict[f"complementary_info.{key}"] = val
+
+            # Add to the dataset's buffer
+            lerobot_dataset.add_frame(frame_dict, task=task_name)
+
+            # Move to next frame
+            frame_idx_in_episode += 1
+
+            # If we reached an episode boundary, call save_episode, reset counters
+            if self.dones[actual_idx] or self.truncateds[actual_idx]:
+                lerobot_dataset.save_episode()
+                episode_index += 1
+                frame_idx_in_episode = 0
+                lerobot_dataset.episode_buffer = lerobot_dataset.create_episode_buffer(
+                    episode_index=episode_index
+                )
+
+        # Save any remaining frames in the buffer
+        if lerobot_dataset.episode_buffer["size"] > 0:
+            lerobot_dataset.save_episode()
+
+        lerobot_dataset.stop_image_writer()
+
+        return lerobot_dataset
+
+    @staticmethod
+    def _lerobotdataset_to_transitions(
+        dataset: LeRobotDataset,
+        state_keys: Sequence[str] | None = None,
+    ) -> list[Transition]:
+        """
+        Convert a LeRobotDataset into a list of RL (s, a, r, s', done) transitions.
+
+        Args:
+            dataset (LeRobotDataset):
+                The dataset to convert. Each item in the dataset is expected to have
+                at least the following keys:
+                {
+                    "action": ...
+                    "next.reward": ...
+                    "next.done": ...
+                    "episode_index": ...
+                }
+                plus whatever your 'state_keys' specify.
+
+            state_keys (Sequence[str] | None):
+                The dataset keys to include in 'state' and 'next_state'. Their names
+                will be kept as-is in the output transitions. E.g.
+                ["observation.state", "observation.environment_state"].
+                If None, you must handle or define default keys.
+
+        Returns:
+            transitions (List[Transition]):
+                A list of Transition dictionaries with the same length as `dataset`.
+        """
+        if state_keys is None:
+            raise ValueError("State keys must be provided when converting LeRobotDataset to Transitions.")
+
+        transitions = []
+        num_frames = len(dataset)
+
+        # Check if the dataset has "next.done" key
+        sample = dataset[0]
+        has_done_key = "next.done" in sample
+
+        # Check for complementary_info keys
+        complementary_info_keys = [key for key in sample if key.startswith("complementary_info.")]
+        has_complementary_info = len(complementary_info_keys) > 0
+
+        # If not, we need to infer it from episode boundaries
+        if not has_done_key:
+            print("'next.done' key not found in dataset. Inferring from episode boundaries...")
+
+        for i in tqdm(range(num_frames)):
+            current_sample = dataset[i]
+
+            # ----- 1) Current state -----
+            current_state: dict[str, torch.Tensor] = {}
+            for key in state_keys:
+                val = current_sample[key]
+                current_state[key] = val.unsqueeze(0)  # Add batch dimension
+
+            # ----- 2) Action -----
+            action = current_sample["action"].unsqueeze(0)  # Add batch dimension
+
+            # ----- 3) Reward and done -----
+            reward = float(current_sample["next.reward"].item())  # ensure float
+
+            # Determine done flag - use next.done if available, otherwise infer from episode boundaries
+            if has_done_key:
+                done = bool(current_sample["next.done"].item())  # ensure bool
+            else:
+                # If this is the last frame or if next frame is in a different episode, mark as done
+                done = False
+                if i == num_frames - 1:
+                    done = True
+                elif i < num_frames - 1:
+                    next_sample = dataset[i + 1]
+                    if next_sample["episode_index"] != current_sample["episode_index"]:
+                        done = True
+
+            # TODO: (azouitine) Handle truncation (using the same value as done for now)
+            truncated = done
+
+            # ----- 4) Next state -----
+            # If not done and the next sample is in the same episode, we pull the next sample's state.
+            # Otherwise (done=True or next sample crosses to a new episode), next_state = current_state.
+            next_state = current_state  # default
+            if not done and (i < num_frames - 1):
+                next_sample = dataset[i + 1]
+                if next_sample["episode_index"] == current_sample["episode_index"]:
+                    # Build next_state from the same keys
+                    next_state_data: dict[str, torch.Tensor] = {}
+                    for key in state_keys:
+                        val = next_sample[key]
+                        next_state_data[key] = val.unsqueeze(0)  # Add batch dimension
+                    next_state = next_state_data
+
+            # ----- 5) Complementary info (if available) -----
+            complementary_info = None
+            if has_complementary_info:
+                complementary_info = {}
+                for key in complementary_info_keys:
+                    # Strip the "complementary_info." prefix to get the actual key
+                    clean_key = key[len("complementary_info.") :]
+                    val = current_sample[key]
+                    # Handle tensor and non-tensor values differently
+                    if isinstance(val, torch.Tensor):
+                        complementary_info[clean_key] = val.unsqueeze(0)  # Add batch dimension
+                    else:
+                        # TODO: (azouitine) Check if it's necessary to convert to tensor
+                        # For non-tensor values, use directly
+                        complementary_info[clean_key] = val
+
+            # ----- Construct the Transition -----
+            transition = Transition(
+                state=current_state,
+                action=action,
+                reward=reward,
+                next_state=next_state,
+                done=done,
+                truncated=truncated,
+                complementary_info=complementary_info,
+            )
+            transitions.append(transition)
+
+        return transitions
+
+
+# Utility function to guess shapes/dtypes from a tensor
+def guess_feature_info(t, name: str):
+    """
+    Return a dictionary with the 'dtype' and 'shape' for a given tensor or scalar value.
+    If it looks like a 3D (C,H,W) shape, we might consider it an 'image'.
+    Otherwise default to appropriate dtype for numeric.
+    """
+
+    shape = tuple(t.shape)
+    # Basic guess: if we have exactly 3 dims and shape[0] in {1, 3}, guess 'image'
+    if len(shape) == 3 and shape[0] in [1, 3]:
+        return {
+            "dtype": "image",
+            "shape": shape,
+        }
+    else:
+        # Otherwise treat as numeric
+        return {
+            "dtype": "float32",
+            "shape": shape,
+        }
+
+
+def concatenate_batch_transitions(
+    left_batch_transitions: BatchTransition, right_batch_transition: BatchTransition
+) -> BatchTransition:
+    """
+    Concatenates two BatchTransition objects into one.
+
+    This function merges the right BatchTransition into the left one by concatenating
+    all corresponding tensors along dimension 0. The operation modifies the left_batch_transitions
+    in place and also returns it.
+
+    Args:
+        left_batch_transitions (BatchTransition): The first batch to concatenate and the one
+            that will be modified in place.
+        right_batch_transition (BatchTransition): The second batch to append to the first one.
+
+    Returns:
+        BatchTransition: The concatenated batch (same object as left_batch_transitions).
+
+    Warning:
+        This function modifies the left_batch_transitions object in place.
+    """
+    # Concatenate state fields
+    left_batch_transitions["state"] = {
+        key: torch.cat(
+            [left_batch_transitions["state"][key], right_batch_transition["state"][key]],
+            dim=0,
+        )
+        for key in left_batch_transitions["state"]
+    }
+
+    # Concatenate basic fields
+    left_batch_transitions["action"] = torch.cat(
+        [left_batch_transitions["action"], right_batch_transition["action"]], dim=0
+    )
+    left_batch_transitions["reward"] = torch.cat(
+        [left_batch_transitions["reward"], right_batch_transition["reward"]], dim=0
+    )
+
+    # Concatenate next_state fields
+    left_batch_transitions["next_state"] = {
+        key: torch.cat(
+            [left_batch_transitions["next_state"][key], right_batch_transition["next_state"][key]],
+            dim=0,
+        )
+        for key in left_batch_transitions["next_state"]
+    }
+
+    # Concatenate done and truncated fields
+    left_batch_transitions["done"] = torch.cat(
+        [left_batch_transitions["done"], right_batch_transition["done"]], dim=0
+    )
+    left_batch_transitions["truncated"] = torch.cat(
+        [left_batch_transitions["truncated"], right_batch_transition["truncated"]],
+        dim=0,
+    )
+
+    # Handle complementary_info
+    left_info = left_batch_transitions.get("complementary_info")
+    right_info = right_batch_transition.get("complementary_info")
+
+    # Only process if right_info exists
+    if right_info is not None:
+        # Initialize left complementary_info if needed
+        if left_info is None:
+            left_batch_transitions["complementary_info"] = right_info
+        else:
+            # Concatenate each field
+            for key in right_info:
+                if key in left_info:
+                    left_info[key] = torch.cat([left_info[key], right_info[key]], dim=0)
+                else:
+                    left_info[key] = right_info[key]
+
+    return left_batch_transitions
--- a/src/lerobot/utils/control_utils.py
+++ b/src/lerobot/utils/control_utils.py
@@ -0,0 +1,215 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+########################################################################################
+# Utilities
+########################################################################################
+
+
+import logging
+import traceback
+from contextlib import nullcontext
+from copy import copy
+from functools import cache
+
+import numpy as np
+import torch
+from deepdiff import DeepDiff
+from termcolor import colored
+
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.utils import DEFAULT_FEATURES
+from lerobot.policies.pretrained import PreTrainedPolicy
+from lerobot.robots import Robot
+
+
+def log_control_info(robot: Robot, dt_s, episode_index=None, frame_index=None, fps=None):
+    log_items = []
+    if episode_index is not None:
+        log_items.append(f"ep:{episode_index}")
+    if frame_index is not None:
+        log_items.append(f"frame:{frame_index}")
+
+    def log_dt(shortname, dt_val_s):
+        nonlocal log_items, fps
+        info_str = f"{shortname}:{dt_val_s * 1000:5.2f} ({1 / dt_val_s:3.1f}hz)"
+        if fps is not None:
+            actual_fps = 1 / dt_val_s
+            if actual_fps < fps - 1:
+                info_str = colored(info_str, "yellow")
+        log_items.append(info_str)
+
+    # total step time displayed in milliseconds and its frequency
+    log_dt("dt", dt_s)
+
+    # TODO(aliberts): move robot-specific logs logic in robot.print_logs()
+    if not robot.robot_type.startswith("stretch"):
+        for name in robot.leader_arms:
+            key = f"read_leader_{name}_pos_dt_s"
+            if key in robot.logs:
+                log_dt("dtRlead", robot.logs[key])
+
+        for name in robot.follower_arms:
+            key = f"write_follower_{name}_goal_pos_dt_s"
+            if key in robot.logs:
+                log_dt("dtWfoll", robot.logs[key])
+
+            key = f"read_follower_{name}_pos_dt_s"
+            if key in robot.logs:
+                log_dt("dtRfoll", robot.logs[key])
+
+        for name in robot.cameras:
+            key = f"read_camera_{name}_dt_s"
+            if key in robot.logs:
+                log_dt(f"dtR{name}", robot.logs[key])
+
+    info_str = " ".join(log_items)
+    logging.info(info_str)
+
+
+@cache
+def is_headless():
+    """Detects if python is running without a monitor."""
+    try:
+        import pynput  # noqa
+
+        return False
+    except Exception:
+        print(
+            "Error trying to import pynput. Switching to headless mode. "
+            "As a result, the video stream from the cameras won't be shown, "
+            "and you won't be able to change the control flow with keyboards. "
+            "For more info, see traceback below.\n"
+        )
+        traceback.print_exc()
+        print()
+        return True
+
+
+def predict_action(
+    observation: dict[str, np.ndarray],
+    policy: PreTrainedPolicy,
+    device: torch.device,
+    use_amp: bool,
+    task: str | None = None,
+    robot_type: str | None = None,
+):
+    observation = copy(observation)
+    with (
+        torch.inference_mode(),
+        torch.autocast(device_type=device.type) if device.type == "cuda" and use_amp else nullcontext(),
+    ):
+        # Convert to pytorch format: channel first and float32 in [0,1] with batch dimension
+        for name in observation:
+            observation[name] = torch.from_numpy(observation[name])
+            if "image" in name:
+                observation[name] = observation[name].type(torch.float32) / 255
+                observation[name] = observation[name].permute(2, 0, 1).contiguous()
+            observation[name] = observation[name].unsqueeze(0)
+            observation[name] = observation[name].to(device)
+
+        observation["task"] = task if task else ""
+        observation["robot_type"] = robot_type if robot_type else ""
+
+        # Compute the next action with the policy
+        # based on the current observation
+        action = policy.select_action(observation)
+
+        # Remove batch dimension
+        action = action.squeeze(0)
+
+        # Move to cpu, if not already the case
+        action = action.to("cpu")
+
+    return action
+
+
+def init_keyboard_listener():
+    # Allow to exit early while recording an episode or resetting the environment,
+    # by tapping the right arrow key '->'. This might require a sudo permission
+    # to allow your terminal to monitor keyboard events.
+    events = {}
+    events["exit_early"] = False
+    events["rerecord_episode"] = False
+    events["stop_recording"] = False
+
+    if is_headless():
+        logging.warning(
+            "Headless environment detected. On-screen cameras display and keyboard inputs will not be available."
+        )
+        listener = None
+        return listener, events
+
+    # Only import pynput if not in a headless environment
+    from pynput import keyboard
+
+    def on_press(key):
+        try:
+            if key == keyboard.Key.right:
+                print("Right arrow key pressed. Exiting loop...")
+                events["exit_early"] = True
+            elif key == keyboard.Key.left:
+                print("Left arrow key pressed. Exiting loop and rerecord the last episode...")
+                events["rerecord_episode"] = True
+                events["exit_early"] = True
+            elif key == keyboard.Key.esc:
+                print("Escape key pressed. Stopping data recording...")
+                events["stop_recording"] = True
+                events["exit_early"] = True
+        except Exception as e:
+            print(f"Error handling key press: {e}")
+
+    listener = keyboard.Listener(on_press=on_press)
+    listener.start()
+
+    return listener, events
+
+
+def sanity_check_dataset_name(repo_id, policy_cfg):
+    _, dataset_name = repo_id.split("/")
+    # either repo_id doesnt start with "eval_" and there is no policy
+    # or repo_id starts with "eval_" and there is a policy
+
+    # Check if dataset_name starts with "eval_" but policy is missing
+    if dataset_name.startswith("eval_") and policy_cfg is None:
+        raise ValueError(
+            f"Your dataset name begins with 'eval_' ({dataset_name}), but no policy is provided ({policy_cfg.type})."
+        )
+
+    # Check if dataset_name does not start with "eval_" but policy is provided
+    if not dataset_name.startswith("eval_") and policy_cfg is not None:
+        raise ValueError(
+            f"Your dataset name does not begin with 'eval_' ({dataset_name}), but a policy is provided ({policy_cfg.type})."
+        )
+
+
+def sanity_check_dataset_robot_compatibility(
+    dataset: LeRobotDataset, robot: Robot, fps: int, features: dict
+) -> None:
+    fields = [
+        ("robot_type", dataset.meta.robot_type, robot.robot_type),
+        ("fps", dataset.fps, fps),
+        ("features", dataset.features, {**features, **DEFAULT_FEATURES}),
+    ]
+
+    mismatches = []
+    for field, dataset_value, present_value in fields:
+        diff = DeepDiff(dataset_value, present_value, exclude_regex_paths=[r".*\['info'\]$"])
+        if diff:
+            mismatches.append(f"{field}: expected {present_value}, got {dataset_value}")
+
+    if mismatches:
+        raise ValueError(
+            "Dataset metadata compatibility check failed with mismatches:\n" + "\n".join(mismatches)
+        )
--- a/src/lerobot/utils/encoding_utils.py
+++ b/src/lerobot/utils/encoding_utils.py
@@ -0,0 +1,67 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def encode_sign_magnitude(value: int, sign_bit_index: int):
+    """
+    https://en.wikipedia.org/wiki/Signed_number_representations#Sign%E2%80%93magnitude
+    """
+    max_magnitude = (1 << sign_bit_index) - 1
+    magnitude = abs(value)
+    if magnitude > max_magnitude:
+        raise ValueError(f"Magnitude {magnitude} exceeds {max_magnitude} (max for {sign_bit_index=})")
+
+    direction_bit = 1 if value < 0 else 0
+    return (direction_bit << sign_bit_index) | magnitude
+
+
+def decode_sign_magnitude(encoded_value: int, sign_bit_index: int):
+    """
+    https://en.wikipedia.org/wiki/Signed_number_representations#Sign%E2%80%93magnitude
+    """
+    direction_bit = (encoded_value >> sign_bit_index) & 1
+    magnitude_mask = (1 << sign_bit_index) - 1
+    magnitude = encoded_value & magnitude_mask
+    return -magnitude if direction_bit else magnitude
+
+
+def encode_twos_complement(value: int, n_bytes: int):
+    """
+    https://en.wikipedia.org/wiki/Signed_number_representations#Two%27s_complement
+    """
+
+    bit_width = n_bytes * 8
+    min_val = -(1 << (bit_width - 1))
+    max_val = (1 << (bit_width - 1)) - 1
+
+    if not (min_val <= value <= max_val):
+        raise ValueError(
+            f"Value {value} out of range for {n_bytes}-byte two's complement: [{min_val}, {max_val}]"
+        )
+
+    if value >= 0:
+        return value
+
+    return (1 << bit_width) + value
+
+
+def decode_twos_complement(value: int, n_bytes: int) -> int:
+    """
+    https://en.wikipedia.org/wiki/Signed_number_representations#Two%27s_complement
+    """
+    bits = n_bytes * 8
+    sign_bit = 1 << (bits - 1)
+    if value & sign_bit:
+        value -= 1 << bits
+    return value
--- a/src/lerobot/utils/hub.py
+++ b/src/lerobot/utils/hub.py
@@ -0,0 +1,202 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Any, Type, TypeVar
+
+from huggingface_hub import HfApi
+from huggingface_hub.utils import validate_hf_hub_args
+
+T = TypeVar("T", bound="HubMixin")
+
+
+class HubMixin:
+    """
+    A Mixin containing the functionality to push an object to the hub.
+
+    This is similar to huggingface_hub.ModelHubMixin but is lighter and makes less assumptions about its
+    subclasses (in particular, the fact that it's not necessarily a model).
+
+    The inheriting classes must implement '_save_pretrained' and 'from_pretrained'.
+    """
+
+    def save_pretrained(
+        self,
+        save_directory: str | Path,
+        *,
+        repo_id: str | None = None,
+        push_to_hub: bool = False,
+        card_kwargs: dict[str, Any] | None = None,
+        **push_to_hub_kwargs,
+    ) -> str | None:
+        """
+        Save object in local directory.
+
+        Args:
+            save_directory (`str` or `Path`):
+                Path to directory in which the object will be saved.
+            push_to_hub (`bool`, *optional*, defaults to `False`):
+                Whether or not to push your object to the Huggingface Hub after saving it.
+            repo_id (`str`, *optional*):
+                ID of your repository on the Hub. Used only if `push_to_hub=True`. Will default to the folder name if
+                not provided.
+            card_kwargs (`Dict[str, Any]`, *optional*):
+                Additional arguments passed to the card template to customize the card.
+            push_to_hub_kwargs:
+                Additional key word arguments passed along to the [`~HubMixin.push_to_hub`] method.
+        Returns:
+            `str` or `None`: url of the commit on the Hub if `push_to_hub=True`, `None` otherwise.
+        """
+        save_directory = Path(save_directory)
+        save_directory.mkdir(parents=True, exist_ok=True)
+
+        # save object (weights, files, etc.)
+        self._save_pretrained(save_directory)
+
+        # push to the Hub if required
+        if push_to_hub:
+            if repo_id is None:
+                repo_id = save_directory.name  # Defaults to `save_directory` name
+            return self.push_to_hub(repo_id=repo_id, card_kwargs=card_kwargs, **push_to_hub_kwargs)
+        return None
+
+    def _save_pretrained(self, save_directory: Path) -> None:
+        """
+        Overwrite this method in subclass to define how to save your object.
+
+        Args:
+            save_directory (`str` or `Path`):
+                Path to directory in which the object files will be saved.
+        """
+        raise NotImplementedError
+
+    @classmethod
+    @validate_hf_hub_args
+    def from_pretrained(
+        cls: Type[T],
+        pretrained_name_or_path: str | Path,
+        *,
+        force_download: bool = False,
+        resume_download: bool | None = None,
+        proxies: dict | None = None,
+        token: str | bool | None = None,
+        cache_dir: str | Path | None = None,
+        local_files_only: bool = False,
+        revision: str | None = None,
+        **kwargs,
+    ) -> T:
+        """
+        Download the object from the Huggingface Hub and instantiate it.
+
+        Args:
+            pretrained_name_or_path (`str`, `Path`):
+                - Either the `repo_id` (string) of the object hosted on the Hub, e.g. `lerobot/diffusion_pusht`.
+                - Or a path to a `directory` containing the object files saved using `.save_pretrained`,
+                    e.g., `../path/to/my_model_directory/`.
+            revision (`str`, *optional*):
+                Revision on the Hub. Can be a branch name, a git tag or any commit id.
+                Defaults to the latest commit on `main` branch.
+            force_download (`bool`, *optional*, defaults to `False`):
+                Whether to force (re-)downloading the files from the Hub, overriding the existing cache.
+            proxies (`Dict[str, str]`, *optional*):
+                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
+                'http://hostname': 'foo.bar:4012'}`. The proxies are used on every request.
+            token (`str` or `bool`, *optional*):
+                The token to use as HTTP bearer authorization for remote files. By default, it will use the token
+                cached when running `huggingface-cli login`.
+            cache_dir (`str`, `Path`, *optional*):
+                Path to the folder where cached files are stored.
+            local_files_only (`bool`, *optional*, defaults to `False`):
+                If `True`, avoid downloading the file and return the path to the local cached file if it exists.
+            kwargs (`Dict`, *optional*):
+                Additional kwargs to pass to the object during initialization.
+        """
+        raise NotImplementedError
+
+    @validate_hf_hub_args
+    def push_to_hub(
+        self,
+        repo_id: str,
+        *,
+        commit_message: str | None = None,
+        private: bool | None = None,
+        token: str | None = None,
+        branch: str | None = None,
+        create_pr: bool | None = None,
+        allow_patterns: list[str] | str | None = None,
+        ignore_patterns: list[str] | str | None = None,
+        delete_patterns: list[str] | str | None = None,
+        card_kwargs: dict[str, Any] | None = None,
+    ) -> str:
+        """
+        Upload model checkpoint to the Hub.
+
+        Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
+        `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
+        details.
+
+        Args:
+            repo_id (`str`):
+                ID of the repository to push to (example: `"username/my-model"`).
+            commit_message (`str`, *optional*):
+                Message to commit while pushing.
+            private (`bool`, *optional*):
+                Whether the repository created should be private.
+                If `None` (default), the repo will be public unless the organization's default is private.
+            token (`str`, *optional*):
+                The token to use as HTTP bearer authorization for remote files. By default, it will use the token
+                cached when running `huggingface-cli login`.
+            branch (`str`, *optional*):
+                The git branch on which to push the model. This defaults to `"main"`.
+            create_pr (`boolean`, *optional*):
+                Whether or not to create a Pull Request from `branch` with that commit. Defaults to `False`.
+            allow_patterns (`List[str]` or `str`, *optional*):
+                If provided, only files matching at least one pattern are pushed.
+            ignore_patterns (`List[str]` or `str`, *optional*):
+                If provided, files matching any of the patterns are not pushed.
+            delete_patterns (`List[str]` or `str`, *optional*):
+                If provided, remote files matching any of the patterns will be deleted from the repo.
+            card_kwargs (`Dict[str, Any]`, *optional*):
+                Additional arguments passed to the card template to customize the card.
+
+        Returns:
+            The url of the commit of your object in the given repository.
+        """
+        api = HfApi(token=token)
+        repo_id = api.create_repo(repo_id=repo_id, private=private, exist_ok=True).repo_id
+
+        if commit_message is None:
+            if "Policy" in self.__class__.__name__:
+                commit_message = "Upload policy"
+            elif "Config" in self.__class__.__name__:
+                commit_message = "Upload config"
+            else:
+                commit_message = f"Upload {self.__class__.__name__}"
+
+        # Push the files to the repo in a single commit
+        with TemporaryDirectory(ignore_cleanup_errors=True) as tmp:
+            saved_path = Path(tmp) / repo_id
+            self.save_pretrained(saved_path, card_kwargs=card_kwargs)
+            return api.upload_folder(
+                repo_id=repo_id,
+                repo_type="model",
+                folder_path=saved_path,
+                commit_message=commit_message,
+                revision=branch,
+                create_pr=create_pr,
+                allow_patterns=allow_patterns,
+                ignore_patterns=ignore_patterns,
+                delete_patterns=delete_patterns,
+            )
--- a/src/lerobot/utils/import_utils.py
+++ b/src/lerobot/utils/import_utils.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib
+import logging
+
+
+def is_package_available(pkg_name: str, return_version: bool = False) -> tuple[bool, str] | bool:
+    """Copied from https://github.com/huggingface/transformers/blob/main/src/transformers/utils/import_utils.py
+    Check if the package spec exists and grab its version to avoid importing a local directory.
+    **Note:** this doesn't work for all packages.
+    """
+    package_exists = importlib.util.find_spec(pkg_name) is not None
+    package_version = "N/A"
+    if package_exists:
+        try:
+            # Primary method to get the package version
+            package_version = importlib.metadata.version(pkg_name)
+
+        except importlib.metadata.PackageNotFoundError:
+            # Fallback method: Only for "torch" and versions containing "dev"
+            if pkg_name == "torch":
+                try:
+                    package = importlib.import_module(pkg_name)
+                    temp_version = getattr(package, "__version__", "N/A")
+                    # Check if the version contains "dev"
+                    if "dev" in temp_version:
+                        package_version = temp_version
+                        package_exists = True
+                    else:
+                        package_exists = False
+                except ImportError:
+                    # If the package can't be imported, it's not available
+                    package_exists = False
+            elif pkg_name == "grpc":
+                package = importlib.import_module(pkg_name)
+                package_version = getattr(package, "__version__", "N/A")
+            else:
+                # For packages other than "torch", don't attempt the fallback and set as not available
+                package_exists = False
+        logging.debug(f"Detected {pkg_name} version: {package_version}")
+    if return_version:
+        return package_exists, package_version
+    else:
+        return package_exists
+
+
+_torch_available, _torch_version = is_package_available("torch", return_version=True)
+_gym_xarm_available = is_package_available("gym_xarm")
+_gym_aloha_available = is_package_available("gym_aloha")
+_gym_pusht_available = is_package_available("gym_pusht")
--- a/src/lerobot/utils/io_utils.py
+++ b/src/lerobot/utils/io_utils.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import warnings
+from pathlib import Path
+from typing import TypeVar
+
+import imageio
+
+JsonLike = str | int | float | bool | None | list["JsonLike"] | dict[str, "JsonLike"] | tuple["JsonLike", ...]
+T = TypeVar("T", bound=JsonLike)
+
+
+def write_video(video_path, stacked_frames, fps):
+    # Filter out DeprecationWarnings raised from pkg_resources
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", "pkg_resources is deprecated as an API", category=DeprecationWarning
+        )
+        imageio.mimsave(video_path, stacked_frames, fps=fps)
+
+
+def deserialize_json_into_object(fpath: Path, obj: T) -> T:
+    """
+    Loads the JSON data from `fpath` and recursively fills `obj` with the
+    corresponding values (strictly matching structure and types).
+    Tuples in `obj` are expected to be lists in the JSON data, which will be
+    converted back into tuples.
+    """
+    with open(fpath, encoding="utf-8") as f:
+        data = json.load(f)
+
+    def _deserialize(target, source):
+        """
+        Recursively overwrite the structure in `target` with data from `source`,
+        performing strict checks on structure and type.
+        Returns the updated version of `target` (especially important for tuples).
+        """
+
+        # If the target is a dictionary, source must be a dictionary as well.
+        if isinstance(target, dict):
+            if not isinstance(source, dict):
+                raise TypeError(f"Type mismatch: expected dict, got {type(source)}")
+
+            # Check that they have exactly the same set of keys.
+            if target.keys() != source.keys():
+                raise ValueError(
+                    f"Dictionary keys do not match.\nExpected: {target.keys()}, got: {source.keys()}"
+                )
+
+            # Recursively update each key.
+            for k in target:
+                target[k] = _deserialize(target[k], source[k])
+
+            return target
+
+        # If the target is a list, source must be a list as well.
+        elif isinstance(target, list):
+            if not isinstance(source, list):
+                raise TypeError(f"Type mismatch: expected list, got {type(source)}")
+
+            # Check length
+            if len(target) != len(source):
+                raise ValueError(f"List length mismatch: expected {len(target)}, got {len(source)}")
+
+            # Recursively update each element.
+            for i in range(len(target)):
+                target[i] = _deserialize(target[i], source[i])
+
+            return target
+
+        # If the target is a tuple, the source must be a list in JSON,
+        # which we'll convert back to a tuple.
+        elif isinstance(target, tuple):
+            if not isinstance(source, list):
+                raise TypeError(f"Type mismatch: expected list (for tuple), got {type(source)}")
+
+            if len(target) != len(source):
+                raise ValueError(f"Tuple length mismatch: expected {len(target)}, got {len(source)}")
+
+            # Convert each element, forming a new tuple.
+            converted_items = []
+            for t_item, s_item in zip(target, source, strict=False):
+                converted_items.append(_deserialize(t_item, s_item))
+
+            # Return a brand new tuple (tuples are immutable in Python).
+            return tuple(converted_items)
+
+        # Otherwise, we're dealing with a "primitive" (int, float, str, bool, None).
+        else:
+            # Check the exact type.  If these must match 1:1, do:
+            if type(target) is not type(source):
+                raise TypeError(f"Type mismatch: expected {type(target)}, got {type(source)}")
+            return source
+
+    # Perform the in-place/recursive deserialization
+    updated_obj = _deserialize(obj, data)
+    return updated_obj
--- a/src/lerobot/utils/logging_utils.py
+++ b/src/lerobot/utils/logging_utils.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any
+
+from lerobot.utils.utils import format_big_number
+
+
+class AverageMeter:
+    """
+    Computes and stores the average and current value
+    Adapted from https://github.com/pytorch/examples/blob/main/imagenet/main.py
+    """
+
+    def __init__(self, name: str, fmt: str = ":f"):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+
+    def reset(self) -> None:
+        self.val = 0.0
+        self.avg = 0.0
+        self.sum = 0.0
+        self.count = 0.0
+
+    def update(self, val: float, n: int = 1) -> None:
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+    def __str__(self):
+        fmtstr = "{name}:{avg" + self.fmt + "}"
+        return fmtstr.format(**self.__dict__)
+
+
+class MetricsTracker:
+    """
+    A helper class to track and log metrics over time.
+
+    Usage pattern:
+
+    ```python
+    # initialize, potentially with non-zero initial step (e.g. if resuming run)
+    metrics = {"loss": AverageMeter("loss", ":.3f")}
+    train_metrics = MetricsTracker(cfg, dataset, metrics, initial_step=step)
+
+    # update metrics derived from step (samples, episodes, epochs) at each training step
+    train_metrics.step()
+
+    # update various metrics
+    loss = policy.forward(batch)
+    train_metrics.loss = loss
+
+    # display current metrics
+    logging.info(train_metrics)
+
+    # export for wandb
+    wandb.log(train_metrics.to_dict())
+
+    # reset averages after logging
+    train_metrics.reset_averages()
+    ```
+    """
+
+    __keys__ = [
+        "_batch_size",
+        "_num_frames",
+        "_avg_samples_per_ep",
+        "metrics",
+        "steps",
+        "samples",
+        "episodes",
+        "epochs",
+    ]
+
+    def __init__(
+        self,
+        batch_size: int,
+        num_frames: int,
+        num_episodes: int,
+        metrics: dict[str, AverageMeter],
+        initial_step: int = 0,
+    ):
+        self.__dict__.update(dict.fromkeys(self.__keys__))
+        self._batch_size = batch_size
+        self._num_frames = num_frames
+        self._avg_samples_per_ep = num_frames / num_episodes
+        self.metrics = metrics
+
+        self.steps = initial_step
+        # A sample is an (observation,action) pair, where observation and action
+        # can be on multiple timestamps. In a batch, we have `batch_size` number of samples.
+        self.samples = self.steps * self._batch_size
+        self.episodes = self.samples / self._avg_samples_per_ep
+        self.epochs = self.samples / self._num_frames
+
+    def __getattr__(self, name: str) -> int | dict[str, AverageMeter] | AverageMeter | Any:
+        if name in self.__dict__:
+            return self.__dict__[name]
+        elif name in self.metrics:
+            return self.metrics[name]
+        else:
+            raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
+
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in self.__dict__:
+            super().__setattr__(name, value)
+        elif name in self.metrics:
+            self.metrics[name].update(value)
+        else:
+            raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
+
+    def step(self) -> None:
+        """
+        Updates metrics that depend on 'step' for one step.
+        """
+        self.steps += 1
+        self.samples += self._batch_size
+        self.episodes = self.samples / self._avg_samples_per_ep
+        self.epochs = self.samples / self._num_frames
+
+    def __str__(self) -> str:
+        display_list = [
+            f"step:{format_big_number(self.steps)}",
+            # number of samples seen during training
+            f"smpl:{format_big_number(self.samples)}",
+            # number of episodes seen during training
+            f"ep:{format_big_number(self.episodes)}",
+            # number of time all unique samples are seen
+            f"epch:{self.epochs:.2f}",
+            *[str(m) for m in self.metrics.values()],
+        ]
+        return " ".join(display_list)
+
+    def to_dict(self, use_avg: bool = True) -> dict[str, int | float]:
+        """
+        Returns the current metric values (or averages if `use_avg=True`) as a dict.
+        """
+        return {
+            "steps": self.steps,
+            "samples": self.samples,
+            "episodes": self.episodes,
+            "epochs": self.epochs,
+            **{k: m.avg if use_avg else m.val for k, m in self.metrics.items()},
+        }
+
+    def reset_averages(self) -> None:
+        """Resets average meters."""
+        for m in self.metrics.values():
+            m.reset()
--- a/src/lerobot/utils/process.py
+++ b/src/lerobot/utils/process.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import signal
+import sys
+
+
+class ProcessSignalHandler:
+    """Utility class to attach graceful shutdown signal handlers.
+
+    The class exposes a shutdown_event attribute that is set when a shutdown
+    signal is received. A counter tracks how many shutdown signals have been
+    caught. On the second signal the process exits with status 1.
+    """
+
+    _SUPPORTED_SIGNALS = ("SIGINT", "SIGTERM", "SIGHUP", "SIGQUIT")
+
+    def __init__(self, use_threads: bool, display_pid: bool = False):
+        # TODO: Check if we can use Event from threading since Event from
+        # multiprocessing is the a clone of threading.Event.
+        # https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Event
+        if use_threads:
+            from threading import Event
+        else:
+            from multiprocessing import Event
+
+        self.shutdown_event = Event()
+        self._counter: int = 0
+        self._display_pid = display_pid
+
+        self._register_handlers()
+
+    @property
+    def counter(self) -> int:  # pragma: no cover – simple accessor
+        """Number of shutdown signals that have been intercepted."""
+        return self._counter
+
+    def _register_handlers(self):
+        """Attach the internal _signal_handler to a subset of POSIX signals."""
+
+        def _signal_handler(signum, frame):
+            pid_str = ""
+            if self._display_pid:
+                pid_str = f"[PID: {os.getpid()}]"
+            logging.info(f"{pid_str} Shutdown signal {signum} received. Cleaning up…")
+            self.shutdown_event.set()
+            self._counter += 1
+
+            # On a second Ctrl-C (or any supported signal) force the exit to
+            # mimic the previous behaviour while giving the caller one chance to
+            # shutdown gracefully.
+            # TODO: Investigate if we need it later
+            if self._counter > 1:
+                logging.info("Force shutdown")
+                sys.exit(1)
+
+        for sig_name in self._SUPPORTED_SIGNALS:
+            sig = getattr(signal, sig_name, None)
+            if sig is None:
+                # The signal is not available on this platform (Windows for
+                # instance does not provide SIGHUP, SIGQUIT…). Skip it.
+                continue
+            try:
+                signal.signal(sig, _signal_handler)
+            except (ValueError, OSError):  # pragma: no cover – unlikely but safe
+                # Signal not supported or we are in a non-main thread.
+                continue
--- a/src/lerobot/utils/queue.py
+++ b/src/lerobot/utils/queue.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from queue import Empty
+from typing import Any
+
+from torch.multiprocessing import Queue
+
+
+def get_last_item_from_queue(queue: Queue, block=True, timeout: float = 0.1) -> Any:
+    if block:
+        try:
+            item = queue.get(timeout=timeout)
+        except Empty:
+            return None
+    else:
+        item = None
+
+    # Drain queue and keep only the most recent parameters
+    try:
+        while True:
+            item = queue.get_nowait()
+    except Empty:
+        pass
+
+    return item
--- a/src/lerobot/utils/random_utils.py
+++ b/src/lerobot/utils/random_utils.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import random
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any, Generator
+
+import numpy as np
+import torch
+from safetensors.torch import load_file, save_file
+
+from lerobot.constants import RNG_STATE
+from lerobot.datasets.utils import flatten_dict, unflatten_dict
+
+
+def serialize_python_rng_state() -> dict[str, torch.Tensor]:
+    """
+    Returns the rng state for `random` in the form of a flat dict[str, torch.Tensor] to be saved using
+    `safetensors.save_file()` or `torch.save()`.
+    """
+    py_state = random.getstate()
+    return {
+        "py_rng_version": torch.tensor([py_state[0]], dtype=torch.int64),
+        "py_rng_state": torch.tensor(py_state[1], dtype=torch.int64),
+    }
+
+
+def deserialize_python_rng_state(rng_state_dict: dict[str, torch.Tensor]) -> None:
+    """
+    Restores the rng state for `random` from a dictionary produced by `serialize_python_rng_state()`.
+    """
+    py_state = (rng_state_dict["py_rng_version"].item(), tuple(rng_state_dict["py_rng_state"].tolist()), None)
+    random.setstate(py_state)
+
+
+def serialize_numpy_rng_state() -> dict[str, torch.Tensor]:
+    """
+    Returns the rng state for `numpy` in the form of a flat dict[str, torch.Tensor] to be saved using
+    `safetensors.save_file()` or `torch.save()`.
+    """
+    np_state = np.random.get_state()
+    # Ensure no breaking changes from numpy
+    assert np_state[0] == "MT19937"
+    return {
+        "np_rng_state_values": torch.tensor(np_state[1], dtype=torch.int64),
+        "np_rng_state_index": torch.tensor([np_state[2]], dtype=torch.int64),
+        "np_rng_has_gauss": torch.tensor([np_state[3]], dtype=torch.int64),
+        "np_rng_cached_gaussian": torch.tensor([np_state[4]], dtype=torch.float32),
+    }
+
+
+def deserialize_numpy_rng_state(rng_state_dict: dict[str, torch.Tensor]) -> None:
+    """
+    Restores the rng state for `numpy` from a dictionary produced by `serialize_numpy_rng_state()`.
+    """
+    np_state = (
+        "MT19937",
+        rng_state_dict["np_rng_state_values"].numpy(),
+        rng_state_dict["np_rng_state_index"].item(),
+        rng_state_dict["np_rng_has_gauss"].item(),
+        rng_state_dict["np_rng_cached_gaussian"].item(),
+    )
+    np.random.set_state(np_state)
+
+
+def serialize_torch_rng_state() -> dict[str, torch.Tensor]:
+    """
+    Returns the rng state for `torch` in the form of a flat dict[str, torch.Tensor] to be saved using
+    `safetensors.save_file()` or `torch.save()`.
+    """
+    torch_rng_state_dict = {"torch_rng_state": torch.get_rng_state()}
+    if torch.cuda.is_available():
+        torch_rng_state_dict["torch_cuda_rng_state"] = torch.cuda.get_rng_state()
+    return torch_rng_state_dict
+
+
+def deserialize_torch_rng_state(rng_state_dict: dict[str, torch.Tensor]) -> None:
+    """
+    Restores the rng state for `torch` from a dictionary produced by `serialize_torch_rng_state()`.
+    """
+    torch.set_rng_state(rng_state_dict["torch_rng_state"])
+    if torch.cuda.is_available() and "torch_cuda_rng_state" in rng_state_dict:
+        torch.cuda.set_rng_state(rng_state_dict["torch_cuda_rng_state"])
+
+
+def serialize_rng_state() -> dict[str, torch.Tensor]:
+    """
+    Returns the rng state for `random`, `numpy`, and `torch`, in the form of a flat
+    dict[str, torch.Tensor] to be saved using `safetensors.save_file()` `torch.save()`.
+    """
+    py_rng_state_dict = serialize_python_rng_state()
+    np_rng_state_dict = serialize_numpy_rng_state()
+    torch_rng_state_dict = serialize_torch_rng_state()
+
+    return {
+        **py_rng_state_dict,
+        **np_rng_state_dict,
+        **torch_rng_state_dict,
+    }
+
+
+def deserialize_rng_state(rng_state_dict: dict[str, torch.Tensor]) -> None:
+    """
+    Restores the rng state for `random`, `numpy`, and `torch` from a dictionary produced by
+    `serialize_rng_state()`.
+    """
+    py_rng_state_dict = {k: v for k, v in rng_state_dict.items() if k.startswith("py")}
+    np_rng_state_dict = {k: v for k, v in rng_state_dict.items() if k.startswith("np")}
+    torch_rng_state_dict = {k: v for k, v in rng_state_dict.items() if k.startswith("torch")}
+
+    deserialize_python_rng_state(py_rng_state_dict)
+    deserialize_numpy_rng_state(np_rng_state_dict)
+    deserialize_torch_rng_state(torch_rng_state_dict)
+
+
+def save_rng_state(save_dir: Path) -> None:
+    rng_state_dict = serialize_rng_state()
+    flat_rng_state_dict = flatten_dict(rng_state_dict)
+    save_file(flat_rng_state_dict, save_dir / RNG_STATE)
+
+
+def load_rng_state(save_dir: Path) -> None:
+    flat_rng_state_dict = load_file(save_dir / RNG_STATE)
+    rng_state_dict = unflatten_dict(flat_rng_state_dict)
+    deserialize_rng_state(rng_state_dict)
+
+
+def get_rng_state() -> dict[str, Any]:
+    """Get the random state for `random`, `numpy`, and `torch`."""
+    random_state_dict = {
+        "random_state": random.getstate(),
+        "numpy_random_state": np.random.get_state(),
+        "torch_random_state": torch.random.get_rng_state(),
+    }
+    if torch.cuda.is_available():
+        random_state_dict["torch_cuda_random_state"] = torch.cuda.random.get_rng_state()
+    return random_state_dict
+
+
+def set_rng_state(random_state_dict: dict[str, Any]):
+    """Set the random state for `random`, `numpy`, and `torch`.
+
+    Args:
+        random_state_dict: A dictionary of the form returned by `get_rng_state`.
+    """
+    random.setstate(random_state_dict["random_state"])
+    np.random.set_state(random_state_dict["numpy_random_state"])
+    torch.random.set_rng_state(random_state_dict["torch_random_state"])
+    if torch.cuda.is_available():
+        torch.cuda.random.set_rng_state(random_state_dict["torch_cuda_random_state"])
+
+
+def set_seed(seed) -> None:
+    """Set seed for reproducibility."""
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+
+
+@contextmanager
+def seeded_context(seed: int) -> Generator[None, None, None]:
+    """Set the seed when entering a context, and restore the prior random state at exit.
+
+    Example usage:
+
+    ```
+    a = random.random()  # produces some random number
+    with seeded_context(1337):
+        b = random.random()  # produces some other random number
+    c = random.random()  # produces yet another random number, but the same it would have if we never made `b`
+    ```
+    """
+    random_state_dict = get_rng_state()
+    set_seed(seed)
+    yield None
+    set_rng_state(random_state_dict)
--- a/src/lerobot/utils/robot_utils.py
+++ b/src/lerobot/utils/robot_utils.py
@@ -0,0 +1,44 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import platform
+import time
+
+
+def busy_wait(seconds):
+    if platform.system() == "Darwin":
+        # On Mac, `time.sleep` is not accurate and we need to use this while loop trick,
+        # but it consumes CPU cycles.
+        # TODO(rcadene): find an alternative: from python 11, time.sleep is precise
+        end_time = time.perf_counter() + seconds
+        while time.perf_counter() < end_time:
+            pass
+    else:
+        # On Linux time.sleep is accurate
+        if seconds > 0:
+            time.sleep(seconds)
+
+
+def safe_disconnect(func):
+    # TODO(aliberts): Allow to pass custom exceptions
+    # (e.g. ThreadServiceExit, KeyboardInterrupt, SystemExit, UnpluggedError, DynamixelCommError)
+    def wrapper(robot, *args, **kwargs):
+        try:
+            return func(robot, *args, **kwargs)
+        except Exception as e:
+            if robot.is_connected:
+                robot.disconnect()
+            raise e
+
+    return wrapper
--- a/src/lerobot/utils/train_utils.py
+++ b/src/lerobot/utils/train_utils.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from pathlib import Path
+
+from termcolor import colored
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import LRScheduler
+
+from lerobot.configs.train import TrainPipelineConfig
+from lerobot.constants import (
+    CHECKPOINTS_DIR,
+    LAST_CHECKPOINT_LINK,
+    PRETRAINED_MODEL_DIR,
+    TRAINING_STATE_DIR,
+    TRAINING_STEP,
+)
+from lerobot.datasets.utils import load_json, write_json
+from lerobot.optim.optimizers import load_optimizer_state, save_optimizer_state
+from lerobot.optim.schedulers import load_scheduler_state, save_scheduler_state
+from lerobot.policies.pretrained import PreTrainedPolicy
+from lerobot.utils.random_utils import load_rng_state, save_rng_state
+
+
+def log_output_dir(out_dir):
+    logging.info(colored("Output dir:", "yellow", attrs=["bold"]) + f" {out_dir}")
+
+
+def get_step_identifier(step: int, total_steps: int) -> str:
+    num_digits = max(6, len(str(total_steps)))
+    return f"{step:0{num_digits}d}"
+
+
+def get_step_checkpoint_dir(output_dir: Path, total_steps: int, step: int) -> Path:
+    """Returns the checkpoint sub-directory corresponding to the step number."""
+    step_identifier = get_step_identifier(step, total_steps)
+    return output_dir / CHECKPOINTS_DIR / step_identifier
+
+
+def save_training_step(step: int, save_dir: Path) -> None:
+    write_json({"step": step}, save_dir / TRAINING_STEP)
+
+
+def load_training_step(save_dir: Path) -> int:
+    training_step = load_json(save_dir / TRAINING_STEP)
+    return training_step["step"]
+
+
+def update_last_checkpoint(checkpoint_dir: Path) -> Path:
+    last_checkpoint_dir = checkpoint_dir.parent / LAST_CHECKPOINT_LINK
+    if last_checkpoint_dir.is_symlink():
+        last_checkpoint_dir.unlink()
+    relative_target = checkpoint_dir.relative_to(checkpoint_dir.parent)
+    last_checkpoint_dir.symlink_to(relative_target)
+
+
+def save_checkpoint(
+    checkpoint_dir: Path,
+    step: int,
+    cfg: TrainPipelineConfig,
+    policy: PreTrainedPolicy,
+    optimizer: Optimizer,
+    scheduler: LRScheduler | None = None,
+) -> None:
+    """This function creates the following directory structure:
+
+    005000/  #  training step at checkpoint
+    ├── pretrained_model/
+    │   ├── config.json  # policy config
+    │   ├── model.safetensors  # policy weights
+    │   └── train_config.json  # train config
+    └── training_state/
+        ├── optimizer_param_groups.json  #  optimizer param groups
+        ├── optimizer_state.safetensors  # optimizer state
+        ├── rng_state.safetensors  # rng states
+        ├── scheduler_state.json  # scheduler state
+        └── training_step.json  # training step
+
+    Args:
+        cfg (TrainPipelineConfig): The training config used for this run.
+        step (int): The training step at that checkpoint.
+        policy (PreTrainedPolicy): The policy to save.
+        optimizer (Optimizer | None, optional): The optimizer to save the state from. Defaults to None.
+        scheduler (LRScheduler | None, optional): The scheduler to save the state from. Defaults to None.
+    """
+    pretrained_dir = checkpoint_dir / PRETRAINED_MODEL_DIR
+    policy.save_pretrained(pretrained_dir)
+    cfg.save_pretrained(pretrained_dir)
+    save_training_state(checkpoint_dir, step, optimizer, scheduler)
+
+
+def save_training_state(
+    checkpoint_dir: Path,
+    train_step: int,
+    optimizer: Optimizer | None = None,
+    scheduler: LRScheduler | None = None,
+) -> None:
+    """
+    Saves the training step, optimizer state, scheduler state, and rng state.
+
+    Args:
+        save_dir (Path): The directory to save artifacts to.
+        train_step (int): Current training step.
+        optimizer (Optimizer | None, optional): The optimizer from which to save the state_dict.
+            Defaults to None.
+        scheduler (LRScheduler | None, optional): The scheduler from which to save the state_dict.
+            Defaults to None.
+    """
+    save_dir = checkpoint_dir / TRAINING_STATE_DIR
+    save_dir.mkdir(parents=True, exist_ok=True)
+    save_training_step(train_step, save_dir)
+    save_rng_state(save_dir)
+    if optimizer is not None:
+        save_optimizer_state(optimizer, save_dir)
+    if scheduler is not None:
+        save_scheduler_state(scheduler, save_dir)
+
+
+def load_training_state(
+    checkpoint_dir: Path, optimizer: Optimizer, scheduler: LRScheduler | None
+) -> tuple[int, Optimizer, LRScheduler | None]:
+    """
+    Loads the training step, optimizer state, scheduler state, and rng state.
+    This is used to resume a training run.
+
+    Args:
+        checkpoint_dir (Path): The checkpoint directory. Should contain a 'training_state' dir.
+        optimizer (Optimizer): The optimizer to load the state_dict to.
+        scheduler (LRScheduler | None): The scheduler to load the state_dict to (can be None).
+
+    Raises:
+        NotADirectoryError: If 'checkpoint_dir' doesn't contain a 'training_state' dir
+
+    Returns:
+        tuple[int, Optimizer, LRScheduler | None]: training step, optimizer and scheduler with their
+            state_dict loaded.
+    """
+    training_state_dir = checkpoint_dir / TRAINING_STATE_DIR
+    if not training_state_dir.is_dir():
+        raise NotADirectoryError(training_state_dir)
+
+    load_rng_state(training_state_dir)
+    step = load_training_step(training_state_dir)
+    optimizer = load_optimizer_state(optimizer, training_state_dir)
+    if scheduler is not None:
+        scheduler = load_scheduler_state(scheduler, training_state_dir)
+
+    return step, optimizer, scheduler
--- a/src/lerobot/utils/transition.py
+++ b/src/lerobot/utils/transition.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TypedDict
+
+import torch
+
+
+class Transition(TypedDict):
+    state: dict[str, torch.Tensor]
+    action: torch.Tensor
+    reward: float
+    next_state: dict[str, torch.Tensor]
+    done: bool
+    truncated: bool
+    complementary_info: dict[str, torch.Tensor | float | int] | None = None
+
+
+def move_transition_to_device(transition: Transition, device: str = "cpu") -> Transition:
+    device = torch.device(device)
+    non_blocking = device.type == "cuda"
+
+    # Move state tensors to device
+    transition["state"] = {
+        key: val.to(device, non_blocking=non_blocking) for key, val in transition["state"].items()
+    }
+
+    # Move action to device
+    transition["action"] = transition["action"].to(device, non_blocking=non_blocking)
+
+    # Move reward and done if they are tensors
+    if isinstance(transition["reward"], torch.Tensor):
+        transition["reward"] = transition["reward"].to(device, non_blocking=non_blocking)
+
+    if isinstance(transition["done"], torch.Tensor):
+        transition["done"] = transition["done"].to(device, non_blocking=non_blocking)
+
+    if isinstance(transition["truncated"], torch.Tensor):
+        transition["truncated"] = transition["truncated"].to(device, non_blocking=non_blocking)
+
+    # Move next_state tensors to device
+    transition["next_state"] = {
+        key: val.to(device, non_blocking=non_blocking) for key, val in transition["next_state"].items()
+    }
+
+    # Move complementary_info tensors if present
+    if transition.get("complementary_info") is not None:
+        for key, val in transition["complementary_info"].items():
+            if isinstance(val, torch.Tensor):
+                transition["complementary_info"][key] = val.to(device, non_blocking=non_blocking)
+            elif isinstance(val, (int, float, bool)):
+                transition["complementary_info"][key] = torch.tensor(val, device=device)
+            else:
+                raise ValueError(f"Unsupported type {type(val)} for complementary_info[{key}]")
+    return transition
+
+
+def move_state_dict_to_device(state_dict, device="cpu"):
+    """
+    Recursively move all tensors in a (potentially) nested
+    dict/list/tuple structure to the CPU.
+    """
+    if isinstance(state_dict, torch.Tensor):
+        return state_dict.to(device)
+    elif isinstance(state_dict, dict):
+        return {k: move_state_dict_to_device(v, device=device) for k, v in state_dict.items()}
+    elif isinstance(state_dict, list):
+        return [move_state_dict_to_device(v, device=device) for v in state_dict]
+    elif isinstance(state_dict, tuple):
+        return tuple(move_state_dict_to_device(v, device=device) for v in state_dict)
+    else:
+        return state_dict
--- a/src/lerobot/utils/utils.py
+++ b/src/lerobot/utils/utils.py
@@ -0,0 +1,374 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+import os.path as osp
+import platform
+import select
+import subprocess
+import sys
+import time
+from copy import copy, deepcopy
+from datetime import datetime, timezone
+from pathlib import Path
+from statistics import mean
+
+import numpy as np
+import torch
+
+
+def none_or_int(value):
+    if value == "None":
+        return None
+    return int(value)
+
+
+def inside_slurm():
+    """Check whether the python process was launched through slurm"""
+    # TODO(rcadene): return False for interactive mode `--pty bash`
+    return "SLURM_JOB_ID" in os.environ
+
+
+def auto_select_torch_device() -> torch.device:
+    """Tries to select automatically a torch device."""
+    if torch.cuda.is_available():
+        logging.info("Cuda backend detected, using cuda.")
+        return torch.device("cuda")
+    elif torch.backends.mps.is_available():
+        logging.info("Metal backend detected, using cuda.")
+        return torch.device("mps")
+    else:
+        logging.warning("No accelerated backend detected. Using default cpu, this will be slow.")
+        return torch.device("cpu")
+
+
+# TODO(Steven): Remove log. log shouldn't be an argument, this should be handled by the logger level
+def get_safe_torch_device(try_device: str, log: bool = False) -> torch.device:
+    """Given a string, return a torch.device with checks on whether the device is available."""
+    try_device = str(try_device)
+    match try_device:
+        case "cuda":
+            assert torch.cuda.is_available()
+            device = torch.device("cuda")
+        case "mps":
+            assert torch.backends.mps.is_available()
+            device = torch.device("mps")
+        case "cpu":
+            device = torch.device("cpu")
+            if log:
+                logging.warning("Using CPU, this will be slow.")
+        case _:
+            device = torch.device(try_device)
+            if log:
+                logging.warning(f"Using custom {try_device} device.")
+
+    return device
+
+
+def get_safe_dtype(dtype: torch.dtype, device: str | torch.device):
+    """
+    mps is currently not compatible with float64
+    """
+    if isinstance(device, torch.device):
+        device = device.type
+    if device == "mps" and dtype == torch.float64:
+        return torch.float32
+    else:
+        return dtype
+
+
+def is_torch_device_available(try_device: str) -> bool:
+    try_device = str(try_device)  # Ensure try_device is a string
+    if try_device == "cuda":
+        return torch.cuda.is_available()
+    elif try_device == "mps":
+        return torch.backends.mps.is_available()
+    elif try_device == "cpu":
+        return True
+    else:
+        raise ValueError(f"Unknown device {try_device}. Supported devices are: cuda, mps or cpu.")
+
+
+def is_amp_available(device: str):
+    if device in ["cuda", "cpu"]:
+        return True
+    elif device == "mps":
+        return False
+    else:
+        raise ValueError(f"Unknown device '{device}.")
+
+
+def init_logging(log_file: Path | None = None, display_pid: bool = False):
+    def custom_format(record):
+        dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        fnameline = f"{record.pathname}:{record.lineno}"
+
+        # NOTE: Display PID is useful for multi-process logging.
+        if display_pid:
+            pid_str = f"[PID: {os.getpid()}]"
+            message = f"{record.levelname} {pid_str} {dt} {fnameline[-15:]:>15} {record.msg}"
+        else:
+            message = f"{record.levelname} {dt} {fnameline[-15:]:>15} {record.msg}"
+        return message
+
+    logging.basicConfig(level=logging.INFO)
+
+    for handler in logging.root.handlers[:]:
+        logging.root.removeHandler(handler)
+
+    formatter = logging.Formatter()
+    formatter.format = custom_format
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(formatter)
+    logging.getLogger().addHandler(console_handler)
+
+    if log_file is not None:
+        # Additionally write logs to file
+        file_handler = logging.FileHandler(log_file)
+        file_handler.setFormatter(formatter)
+        logging.getLogger().addHandler(file_handler)
+
+
+def format_big_number(num, precision=0):
+    suffixes = ["", "K", "M", "B", "T", "Q"]
+    divisor = 1000.0
+
+    for suffix in suffixes:
+        if abs(num) < divisor:
+            return f"{num:.{precision}f}{suffix}"
+        num /= divisor
+
+    return num
+
+
+def _relative_path_between(path1: Path, path2: Path) -> Path:
+    """Returns path1 relative to path2."""
+    path1 = path1.absolute()
+    path2 = path2.absolute()
+    try:
+        return path1.relative_to(path2)
+    except ValueError:  # most likely because path1 is not a subpath of path2
+        common_parts = Path(osp.commonpath([path1, path2])).parts
+        return Path(
+            "/".join([".."] * (len(path2.parts) - len(common_parts)) + list(path1.parts[len(common_parts) :]))
+        )
+
+
+def print_cuda_memory_usage():
+    """Use this function to locate and debug memory leak."""
+    import gc
+
+    gc.collect()
+    # Also clear the cache if you want to fully release the memory
+    torch.cuda.empty_cache()
+    print("Current GPU Memory Allocated: {:.2f} MB".format(torch.cuda.memory_allocated(0) / 1024**2))
+    print("Maximum GPU Memory Allocated: {:.2f} MB".format(torch.cuda.max_memory_allocated(0) / 1024**2))
+    print("Current GPU Memory Reserved: {:.2f} MB".format(torch.cuda.memory_reserved(0) / 1024**2))
+    print("Maximum GPU Memory Reserved: {:.2f} MB".format(torch.cuda.max_memory_reserved(0) / 1024**2))
+
+
+def capture_timestamp_utc():
+    return datetime.now(timezone.utc)
+
+
+def say(text: str, blocking: bool = False):
+    system = platform.system()
+
+    if system == "Darwin":
+        cmd = ["say", text]
+
+    elif system == "Linux":
+        cmd = ["spd-say", text]
+        if blocking:
+            cmd.append("--wait")
+
+    elif system == "Windows":
+        cmd = [
+            "PowerShell",
+            "-Command",
+            "Add-Type -AssemblyName System.Speech; "
+            f"(New-Object System.Speech.Synthesis.SpeechSynthesizer).Speak('{text}')",
+        ]
+
+    else:
+        raise RuntimeError("Unsupported operating system for text-to-speech.")
+
+    if blocking:
+        subprocess.run(cmd, check=True)
+    else:
+        subprocess.Popen(cmd, creationflags=subprocess.CREATE_NO_WINDOW if system == "Windows" else 0)
+
+
+def log_say(text: str, play_sounds: bool = True, blocking: bool = False):
+    logging.info(text)
+
+    if play_sounds:
+        say(text, blocking)
+
+
+def get_channel_first_image_shape(image_shape: tuple) -> tuple:
+    shape = copy(image_shape)
+    if shape[2] < shape[0] and shape[2] < shape[1]:  # (h, w, c) -> (c, h, w)
+        shape = (shape[2], shape[0], shape[1])
+    elif not (shape[0] < shape[1] and shape[0] < shape[2]):
+        raise ValueError(image_shape)
+
+    return shape
+
+
+def has_method(cls: object, method_name: str) -> bool:
+    return hasattr(cls, method_name) and callable(getattr(cls, method_name))
+
+
+def is_valid_numpy_dtype_string(dtype_str: str) -> bool:
+    """
+    Return True if a given string can be converted to a numpy dtype.
+    """
+    try:
+        # Attempt to convert the string to a numpy dtype
+        np.dtype(dtype_str)
+        return True
+    except TypeError:
+        # If a TypeError is raised, the string is not a valid dtype
+        return False
+
+
+def enter_pressed() -> bool:
+    if platform.system() == "Windows":
+        import msvcrt
+
+        if msvcrt.kbhit():
+            key = msvcrt.getch()
+            return key in (b"\r", b"\n")  # enter key
+        return False
+    else:
+        return select.select([sys.stdin], [], [], 0)[0] and sys.stdin.readline().strip() == ""
+
+
+def move_cursor_up(lines):
+    """Move the cursor up by a specified number of lines."""
+    print(f"\033[{lines}A", end="")
+
+
+class TimerManager:
+    """
+    Lightweight utility to measure elapsed time.
+
+    Examples
+    --------
+    ```python
+    # Example 1: Using context manager
+    timer = TimerManager("Policy", log=False)
+    for _ in range(3):
+        with timer:
+            time.sleep(0.01)
+    print(timer.last, timer.fps_avg, timer.percentile(90))  # Prints: 0.01 100.0 0.01
+    ```
+
+    ```python
+    # Example 2: Using start/stop methods
+    timer = TimerManager("Policy", log=False)
+    timer.start()
+    time.sleep(0.01)
+    timer.stop()
+    print(timer.last, timer.fps_avg, timer.percentile(90))  # Prints: 0.01 100.0 0.01
+    ```
+    """
+
+    def __init__(
+        self,
+        label: str = "Elapsed-time",
+        log: bool = True,
+        logger: logging.Logger | None = None,
+    ):
+        self.label = label
+        self.log = log
+        self.logger = logger
+        self._start: float | None = None
+        self._history: list[float] = []
+
+    def __enter__(self):
+        return self.start()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+
+    def start(self):
+        self._start = time.perf_counter()
+        return self
+
+    def stop(self) -> float:
+        if self._start is None:
+            raise RuntimeError("Timer was never started.")
+        elapsed = time.perf_counter() - self._start
+        self._history.append(elapsed)
+        self._start = None
+        if self.log:
+            if self.logger is not None:
+                self.logger.info(f"{self.label}: {elapsed:.6f} s")
+            else:
+                logging.info(f"{self.label}: {elapsed:.6f} s")
+        return elapsed
+
+    def reset(self):
+        self._history.clear()
+
+    @property
+    def last(self) -> float:
+        return self._history[-1] if self._history else 0.0
+
+    @property
+    def avg(self) -> float:
+        return mean(self._history) if self._history else 0.0
+
+    @property
+    def total(self) -> float:
+        return sum(self._history)
+
+    @property
+    def count(self) -> int:
+        return len(self._history)
+
+    @property
+    def history(self) -> list[float]:
+        return deepcopy(self._history)
+
+    @property
+    def fps_history(self) -> list[float]:
+        return [1.0 / t for t in self._history]
+
+    @property
+    def fps_last(self) -> float:
+        return 0.0 if self.last == 0 else 1.0 / self.last
+
+    @property
+    def fps_avg(self) -> float:
+        return 0.0 if self.avg == 0 else 1.0 / self.avg
+
+    def percentile(self, p: float) -> float:
+        """
+        Return the p-th percentile of recorded times.
+        """
+        if not self._history:
+            return 0.0
+        return float(np.percentile(self._history, p))
+
+    def fps_percentile(self, p: float) -> float:
+        """
+        FPS corresponding to the p-th percentile time.
+        """
+        val = self.percentile(p)
+        return 0.0 if val == 0 else 1.0 / val
--- a/src/lerobot/utils/visualization_utils.py
+++ b/src/lerobot/utils/visualization_utils.py
@@ -0,0 +1,26 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import rerun as rr
+
+
+def _init_rerun(session_name: str = "lerobot_control_loop") -> None:
+    """Initializes the Rerun SDK for visualizing the control loop."""
+    batch_size = os.getenv("RERUN_FLUSH_NUM_BYTES", "8000")
+    os.environ["RERUN_FLUSH_NUM_BYTES"] = batch_size
+    rr.init(session_name)
+    memory_limit = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "10%")
+    rr.spawn(memory_limit=memory_limit)
--- a/src/lerobot/utils/wandb_utils.py
+++ b/src/lerobot/utils/wandb_utils.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+import re
+from glob import glob
+from pathlib import Path
+
+from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
+from termcolor import colored
+
+from lerobot.configs.train import TrainPipelineConfig
+from lerobot.constants import PRETRAINED_MODEL_DIR
+
+
+def cfg_to_group(cfg: TrainPipelineConfig, return_list: bool = False) -> list[str] | str:
+    """Return a group name for logging. Optionally returns group name as list."""
+    lst = [
+        f"policy:{cfg.policy.type}",
+        f"seed:{cfg.seed}",
+    ]
+    if cfg.dataset is not None:
+        lst.append(f"dataset:{cfg.dataset.repo_id}")
+    if cfg.env is not None:
+        lst.append(f"env:{cfg.env.type}")
+    return lst if return_list else "-".join(lst)
+
+
+def get_wandb_run_id_from_filesystem(log_dir: Path) -> str:
+    # Get the WandB run ID.
+    paths = glob(str(log_dir / "wandb/latest-run/run-*"))
+    if len(paths) != 1:
+        raise RuntimeError("Couldn't get the previous WandB run ID for run resumption.")
+    match = re.search(r"run-([^\.]+).wandb", paths[0].split("/")[-1])
+    if match is None:
+        raise RuntimeError("Couldn't get the previous WandB run ID for run resumption.")
+    wandb_run_id = match.groups(0)[0]
+    return wandb_run_id
+
+
+def get_safe_wandb_artifact_name(name: str):
+    """WandB artifacts don't accept ":" or "/" in their name."""
+    return name.replace(":", "_").replace("/", "_")
+
+
+class WandBLogger:
+    """A helper class to log object using wandb."""
+
+    def __init__(self, cfg: TrainPipelineConfig):
+        self.cfg = cfg.wandb
+        self.log_dir = cfg.output_dir
+        self.job_name = cfg.job_name
+        self.env_fps = cfg.env.fps if cfg.env else None
+        self._group = cfg_to_group(cfg)
+
+        # Set up WandB.
+        os.environ["WANDB_SILENT"] = "True"
+        import wandb
+
+        wandb_run_id = (
+            cfg.wandb.run_id
+            if cfg.wandb.run_id
+            else get_wandb_run_id_from_filesystem(self.log_dir)
+            if cfg.resume
+            else None
+        )
+        wandb.init(
+            id=wandb_run_id,
+            project=self.cfg.project,
+            entity=self.cfg.entity,
+            name=self.job_name,
+            notes=self.cfg.notes,
+            tags=cfg_to_group(cfg, return_list=True),
+            dir=self.log_dir,
+            config=cfg.to_dict(),
+            # TODO(rcadene): try set to True
+            save_code=False,
+            # TODO(rcadene): split train and eval, and run async eval with job_type="eval"
+            job_type="train_eval",
+            resume="must" if cfg.resume else None,
+            mode=self.cfg.mode if self.cfg.mode in ["online", "offline", "disabled"] else "online",
+        )
+        run_id = wandb.run.id
+        # NOTE: We will override the cfg.wandb.run_id with the wandb run id.
+        # This is because we want to be able to resume the run from the wandb run id.
+        cfg.wandb.run_id = run_id
+        # Handle custom step key for rl asynchronous training.
+        self._wandb_custom_step_key: set[str] | None = None
+        print(colored("Logs will be synced with wandb.", "blue", attrs=["bold"]))
+        logging.info(f"Track this run --> {colored(wandb.run.get_url(), 'yellow', attrs=['bold'])}")
+        self._wandb = wandb
+
+    def log_policy(self, checkpoint_dir: Path):
+        """Checkpoints the policy to wandb."""
+        if self.cfg.disable_artifact:
+            return
+
+        step_id = checkpoint_dir.name
+        artifact_name = f"{self._group}-{step_id}"
+        artifact_name = get_safe_wandb_artifact_name(artifact_name)
+        artifact = self._wandb.Artifact(artifact_name, type="model")
+        artifact.add_file(checkpoint_dir / PRETRAINED_MODEL_DIR / SAFETENSORS_SINGLE_FILE)
+        self._wandb.log_artifact(artifact)
+
+    def log_dict(
+        self, d: dict, step: int | None = None, mode: str = "train", custom_step_key: str | None = None
+    ):
+        if mode not in {"train", "eval"}:
+            raise ValueError(mode)
+        if step is None and custom_step_key is None:
+            raise ValueError("Either step or custom_step_key must be provided.")
+
+        # NOTE: This is not simple. Wandb step must always monotonically increase and it
+        # increases with each wandb.log call, but in the case of asynchronous RL for example,
+        # multiple time steps is possible. For example, the interaction step with the environment,
+        # the training step, the evaluation step, etc. So we need to define a custom step key
+        # to log the correct step for each metric.
+        if custom_step_key is not None:
+            if self._wandb_custom_step_key is None:
+                self._wandb_custom_step_key = set()
+            new_custom_key = f"{mode}/{custom_step_key}"
+            if new_custom_key not in self._wandb_custom_step_key:
+                self._wandb_custom_step_key.add(new_custom_key)
+                self._wandb.define_metric(new_custom_key, hidden=True)
+
+        for k, v in d.items():
+            if not isinstance(v, (int, float, str)):
+                logging.warning(
+                    f'WandB logging of key "{k}" was ignored as its type "{type(v)}" is not handled by this wrapper.'
+                )
+                continue
+
+            # Do not log the custom step key itself.
+            if self._wandb_custom_step_key is not None and k in self._wandb_custom_step_key:
+                continue
+
+            if custom_step_key is not None:
+                value_custom_step = d[custom_step_key]
+                data = {f"{mode}/{k}": v, f"{mode}/{custom_step_key}": value_custom_step}
+                self._wandb.log(data)
+                continue
+
+            self._wandb.log(data={f"{mode}/{k}": v}, step=step)
+
+    def log_video(self, video_path: str, step: int, mode: str = "train"):
+        if mode not in {"train", "eval"}:
+            raise ValueError(mode)
+
+        wandb_video = self._wandb.Video(video_path, fps=self.env_fps, format="mp4")
+        self._wandb.log({f"{mode}/video": wandb_video}, step=step)