feat(scripts): lerobot-rollout

2026-06-03 12:21:27 +00:00 · 2026-04-14 15:42:04 +02:00
parent 5c43fa1cce
commit bc06cb44ca
54 changed files with 5204 additions and 2816 deletions
--- a/src/lerobot/scripts/lerobot_record.py
+++ b/src/lerobot/scripts/lerobot_record.py
@@ -13,70 +13,62 @@
 # limitations under the License.

 """
-Records a dataset. Actions for the robot can be either generated by teleoperation or by a policy.
+Records a dataset via teleoperation.  This is a pure data-collection
+tool — no policy inference.  For deploying trained policies, use
+``lerobot-rollout`` instead.

 Requires: pip install 'lerobot[core_scripts]'  (includes dataset + hardware + viz extras)

 Example:

 ```shell
-lerobot-record \
-    --robot.type=so100_follower \
-    --robot.port=/dev/tty.usbmodem58760431541 \
-    --robot.cameras="{laptop: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \
-    --robot.id=black \
-    --dataset.repo_id=<my_username>/<my_dataset_name> \
-    --dataset.num_episodes=2 \
-    --dataset.single_task="Grab the cube" \
-    --dataset.streaming_encoding=true \
-    --dataset.encoder_threads=2 \
+lerobot-record \\
+    --robot.type=so100_follower \\
+    --robot.port=/dev/tty.usbmodem58760431541 \\
+    --robot.cameras="{laptop: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \\
+    --robot.id=black \\
+    --teleop.type=so100_leader \\
+    --teleop.port=/dev/tty.usbmodem58760431551 \\
+    --teleop.id=blue \\
+    --dataset.repo_id=<my_username>/<my_dataset_name> \\
+    --dataset.num_episodes=2 \\
+    --dataset.single_task="Grab the cube" \\
+    --dataset.streaming_encoding=true \\
+    --dataset.encoder_threads=2 \\
    --display_data=true
-    # <- Optional: specify video codec (auto, h264, hevc, libsvtav1). Default is libsvtav1. \
-    # --dataset.vcodec=h264 \
-    # <- Teleop optional if you want to teleoperate to record or in between episodes with a policy \
-    # --teleop.type=so100_leader \
-    # --teleop.port=/dev/tty.usbmodem58760431551 \
-    # --teleop.id=blue \
-    # <- Policy optional if you want to record with a policy \
-    # --policy.path=${HF_USER}/my_policy \
 ```

 Example recording with bimanual so100:
 ```shell
-lerobot-record \
-  --robot.type=bi_so_follower \
-  --robot.left_arm_config.port=/dev/tty.usbmodem5A460822851 \
-  --robot.right_arm_config.port=/dev/tty.usbmodem5A460814411 \
-  --robot.id=bimanual_follower \
+lerobot-record \\
+  --robot.type=bi_so_follower \\
+  --robot.left_arm_config.port=/dev/tty.usbmodem5A460822851 \\
+  --robot.right_arm_config.port=/dev/tty.usbmodem5A460814411 \\
+  --robot.id=bimanual_follower \\
  --robot.left_arm_config.cameras='{
    wrist: {"type": "opencv", "index_or_path": 1, "width": 640, "height": 480, "fps": 30},
    top: {"type": "opencv", "index_or_path": 3, "width": 640, "height": 480, "fps": 30},
  }' --robot.right_arm_config.cameras='{
    wrist: {"type": "opencv", "index_or_path": 2, "width": 640, "height": 480, "fps": 30},
    front: {"type": "opencv", "index_or_path": 4, "width": 640, "height": 480, "fps": 30},
-  }' \
-  --teleop.type=bi_so_leader \
-  --teleop.left_arm_config.port=/dev/tty.usbmodem5A460852721 \
-  --teleop.right_arm_config.port=/dev/tty.usbmodem5A460819811 \
-  --teleop.id=bimanual_leader \
-  --display_data=true \
-  --dataset.repo_id=${HF_USER}/bimanual-so-handover-cube \
-  --dataset.num_episodes=25 \
-  --dataset.single_task="Grab and handover the red cube to the other arm" \
-  --dataset.streaming_encoding=true \
-  # --dataset.vcodec=auto \
+  }' \\
+  --teleop.type=bi_so_leader \\
+  --teleop.left_arm_config.port=/dev/tty.usbmodem5A460852721 \\
+  --teleop.right_arm_config.port=/dev/tty.usbmodem5A460819811 \\
+  --teleop.id=bimanual_leader \\
+  --display_data=true \\
+  --dataset.repo_id=${HF_USER}/bimanual-so-handover-cube \\
+  --dataset.num_episodes=25 \\
+  --dataset.single_task="Grab and handover the red cube to the other arm" \\
+  --dataset.streaming_encoding=true \\
  --dataset.encoder_threads=2
 ```
 """

 import logging
 import time
-from dataclasses import asdict, dataclass, field
-from pathlib import Path
+from dataclasses import asdict, dataclass
 from pprint import pformat
-from typing import Any
-
-import torch

 from lerobot.cameras import CameraConfig  # noqa: F401
 from lerobot.cameras.opencv import OpenCVCameraConfig  # noqa: F401
@@ -86,11 +78,10 @@ from lerobot.cameras.zmq import ZMQCameraConfig  # noqa: F401
 from lerobot.common.control_utils import (
    init_keyboard_listener,
    is_headless,
-    predict_action,
-    sanity_check_dataset_name,
    sanity_check_dataset_robot_compatibility,
 )
-from lerobot.configs import PreTrainedConfig, parser
+from lerobot.configs import parser
+from lerobot.configs.dataset import DatasetRecordConfig
 from lerobot.datasets import (
    LeRobotDataset,
    VideoEncodingManager,
@@ -98,21 +89,11 @@ from lerobot.datasets import (
    create_initial_features,
    safe_stop_image_writer,
 )
-from lerobot.policies import (
-    ActionInterpolator,
-    PreTrainedPolicy,
-    make_policy,
-    make_pre_post_processors,
-    make_robot_action,
-)
 from lerobot.processor import (
-    PolicyAction,
-    PolicyProcessorPipeline,
    RobotAction,
    RobotObservation,
    RobotProcessorPipeline,
    make_default_processors,
-    rename_stats,
 )
 from lerobot.robots import (  # noqa: F401
    Robot,
@@ -146,7 +127,6 @@ from lerobot.teleoperators import (  # noqa: F401
 )
 from lerobot.teleoperators.keyboard import KeyboardTeleop
 from lerobot.utils.constants import ACTION, OBS_STR
-from lerobot.utils.device_utils import get_safe_torch_device
 from lerobot.utils.feature_utils import build_dataset_frame, combine_feature_dicts
 from lerobot.utils.import_utils import register_third_party_plugins
 from lerobot.utils.robot_utils import precise_sleep
@@ -157,71 +137,12 @@ from lerobot.utils.utils import (
 from lerobot.utils.visualization_utils import init_rerun, log_rerun_data


-@dataclass
-class DatasetRecordConfig:
-    # Dataset identifier. By convention it should match '{hf_username}/{dataset_name}' (e.g. `lerobot/test`).
-    repo_id: str
-    # A short but accurate description of the task performed during the recording (e.g. "Pick the Lego block and drop it in the box on the right.")
-    single_task: str
-    # Root directory where the dataset will be stored (e.g. 'dataset/path'). If None, defaults to $HF_LEROBOT_HOME/repo_id.
-    root: str | Path | None = None
-    # Limit the frames per second.
-    fps: int = 30
-    # Number of seconds for data recording for each episode.
-    episode_time_s: int | float = 60
-    # Number of seconds for resetting the environment after each episode.
-    reset_time_s: int | float = 60
-    # Number of episodes to record.
-    num_episodes: int = 50
-    # Encode frames in the dataset into video
-    video: bool = True
-    # Upload dataset to Hugging Face hub.
-    push_to_hub: bool = True
-    # Upload on private repository on the Hugging Face hub.
-    private: bool = False
-    # Add tags to your dataset on the hub.
-    tags: list[str] | None = None
-    # Number of subprocesses handling the saving of frames as PNG. Set to 0 to use threads only;
-    # set to ≥1 to use subprocesses, each using threads to write images. The best number of processes
-    # and threads depends on your system. We recommend 4 threads per camera with 0 processes.
-    # If fps is unstable, adjust the thread count. If still unstable, try using 1 or more subprocesses.
-    num_image_writer_processes: int = 0
-    # Number of threads writing the frames as png images on disk, per camera.
-    # Too many threads might cause unstable teleoperation fps due to main thread being blocked.
-    # Not enough threads might cause low camera fps.
-    num_image_writer_threads_per_camera: int = 4
-    # Number of episodes to record before batch encoding videos
-    # Set to 1 for immediate encoding (default behavior), or higher for batched encoding
-    video_encoding_batch_size: int = 1
-    # Video codec for encoding videos. Options: 'h264', 'hevc', 'libsvtav1', 'auto',
-    # or hardware-specific: 'h264_videotoolbox', 'h264_nvenc', 'h264_vaapi', 'h264_qsv'.
-    # Use 'auto' to auto-detect the best available hardware encoder.
-    vcodec: str = "libsvtav1"
-    # Enable streaming video encoding: encode frames in real-time during capture instead
-    # of writing PNG images first. Makes save_episode() near-instant. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding
-    streaming_encoding: bool = False
-    # Maximum number of frames to buffer per camera when using streaming encoding.
-    # ~1s buffer at 30fps. Provides backpressure if the encoder can't keep up.
-    encoder_queue_maxsize: int = 30
-    # Number of threads per encoder instance. None = auto (codec default).
-    # Lower values reduce CPU usage, maps to 'lp' (via svtav1-params) for libsvtav1 and 'threads' for h264/hevc..
-    encoder_threads: int | None = None
-    # Rename map for the observation to override the image and state keys
-    rename_map: dict[str, str] = field(default_factory=dict)
-
-    def __post_init__(self):
-        if self.single_task is None:
-            raise ValueError("You need to provide a task as argument in `single_task`.")
-
-
@dataclass
 class RecordConfig:
    robot: RobotConfig
    dataset: DatasetRecordConfig
-    # Whether to control the robot with a teleoperator
+    # Teleoperator to control the robot (required)
    teleop: TeleoperatorConfig | None = None
-    # Whether to control the robot with a policy
-    policy: PreTrainedConfig | None = None
    # Display all cameras on screen
    display_data: bool = False
    # Display data on a remote Rerun server
@@ -234,27 +155,14 @@ class RecordConfig:
    play_sounds: bool = True
    # Resume recording on an existing dataset.
    resume: bool = False
-    # Action interpolation multiplier for smoother policy control (1=off, 2=2x, 3=3x)
-    # Only applies when using a policy (not teleop)
-    interpolation_multiplier: int = 1

    def __post_init__(self):
-        # HACK: We parse again the cli args here to get the pretrained path if there was one.
-        policy_path = parser.get_path_arg("policy")
-
-        if policy_path:
-            cli_overrides = parser.get_cli_overrides("policy")
-
-            self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides)
-            self.policy.pretrained_path = policy_path
-
-        if self.teleop is None and self.policy is None:
-            raise ValueError("Choose a policy, a teleoperator or both to control the robot")
-
-    @classmethod
-    def __get_path_fields__(cls) -> list[str]:
-        """This enables the parser to load config from the policy using `--policy.path=local/dir`"""
-        return ["policy"]
+        if self.teleop is None:
+            raise ValueError(
+                "A teleoperator is required for recording. "
+                "Use --teleop.type=... to specify one. "
+                "For policy-based deployment, use lerobot-rollout instead."
+            )


 """ --------------- record_loop() data flow --------------------------
@@ -264,18 +172,14 @@ class RecordConfig:
           V
     [ robot_observation_processor ] ---> processed_obs
           V
-     .-----( ACTION LOGIC )------------------.
-     V                                       V
-     [ From Teleoperator ]                   [ From Policy ]
-     |                                       |
-     |  [teleop.get_action] -> raw_action    |   [predict_action]
-     |          |                            |          |
-     |          V                            |          V
-     | [teleop_action_processor]             |          |
-     |          |                            |          |
-     '---> processed_teleop_action           '---> processed_policy_action
-     |                                       |
-     '-------------------------.-------------'
+     [ Teleoperator ]
+     |
+     |  [teleop.get_action] -> raw_action
+     |          |
+     |          V
+     | [teleop_action_processor]
+     |          |
+     '---> processed_teleop_action
                               V
                  [ robot_action_processor ] --> robot_action_to_send
                               V
@@ -303,13 +207,9 @@ def record_loop(
    ],  # runs after robot
    dataset: LeRobotDataset | None = None,
    teleop: Teleoperator | list[Teleoperator] | None = None,
-    policy: PreTrainedPolicy | None = None,
-    preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]] | None = None,
-    postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction] | None = None,
    control_time_s: int | None = None,
    single_task: str | None = None,
    display_data: bool = False,
-    interpolator: ActionInterpolator | None = None,
    display_compressed_images: bool = False,
 ):
    if dataset is not None and dataset.fps != fps:
@@ -340,21 +240,7 @@ def record_loop(
                "For multi-teleop, the list must contain exactly one KeyboardTeleop and one arm teleoperator. Currently only supported for LeKiwi robot."
            )

-    # Reset policy and processor if they are provided
-    if policy is not None and preprocessor is not None and postprocessor is not None:
-        policy.reset()
-        preprocessor.reset()
-        postprocessor.reset()
-
-    # Reset interpolator if provided
-    if interpolator is not None:
-        interpolator.reset()
-
-    # Calculate control interval based on interpolation
-    use_interpolation = interpolator is not None and interpolator.enabled and policy is not None
-    control_interval = interpolator.get_control_interval(fps) if interpolator else 1 / fps
-    # Pre-compute action key order outside the hot loop — it won't change mid-episode.
-    action_keys = sorted(robot.action_features) if use_interpolation else []
+    control_interval = 1 / fps

    no_action_count = 0
    timestamp = 0
@@ -372,63 +258,11 @@ def record_loop(
        # Applies a pipeline to the raw robot observation, default is IdentityProcessor
        obs_processed = robot_observation_processor(obs)

-        if policy is not None or dataset is not None:
+        if dataset is not None:
            observation_frame = build_dataset_frame(dataset.features, obs_processed, prefix=OBS_STR)

-        # Track whether this iteration should be recorded to the dataset.
-        # Interpolated-only iterations send actions to the robot but don't record frames,
-        # keeping the dataset at the original fps while the robot moves at the higher rate.
-        is_record_frame = True
-
-        # Get action from either policy or teleop
-        if policy is not None and preprocessor is not None and postprocessor is not None:
-            # With interpolation: only call policy when interpolator needs new action
-            if use_interpolation:
-                ran_inference = False
-
-                if interpolator.needs_new_action():
-                    action_values = predict_action(
-                        observation=observation_frame,
-                        policy=policy,
-                        device=get_safe_torch_device(policy.config.device),
-                        preprocessor=preprocessor,
-                        postprocessor=postprocessor,
-                        use_amp=policy.config.use_amp,
-                        task=single_task,
-                        robot_type=robot.robot_type,
-                    )
-                    act_processed_policy = make_robot_action(action_values, dataset.features)
-                    robot_action_to_send = robot_action_processor((act_processed_policy, obs))
-
-                    action_tensor = torch.tensor([robot_action_to_send[k] for k in action_keys])
-                    interpolator.add(action_tensor)
-                    ran_inference = True
-
-                interp_action = interpolator.get()
-                if interp_action is not None:
-                    robot_action_to_send = {k: interp_action[i].item() for i, k in enumerate(action_keys)}
-                    action_values = robot_action_to_send
-                else:
-                    continue
-
-                is_record_frame = ran_inference
-            else:
-                action_values = predict_action(
-                    observation=observation_frame,
-                    policy=policy,
-                    device=get_safe_torch_device(policy.config.device),
-                    preprocessor=preprocessor,
-                    postprocessor=postprocessor,
-                    use_amp=policy.config.use_amp,
-                    task=single_task,
-                    robot_type=robot.robot_type,
-                )
-                act_processed_policy: RobotAction = make_robot_action(action_values, dataset.features)
-                # Applies a pipeline to the action, default is IdentityProcessor
-                robot_action_to_send = robot_action_processor((act_processed_policy, obs))
-                action_values = robot_action_to_send
-
-        elif policy is None and isinstance(teleop, Teleoperator):
+        # Get action from teleop
+        if isinstance(teleop, Teleoperator):
            act = teleop.get_action()
            if robot.name == "unitree_g1":
                teleop.send_feedback(obs)
@@ -438,7 +272,7 @@ def record_loop(
            action_values = act_processed_teleop
            robot_action_to_send = robot_action_processor((act_processed_teleop, obs))

-        elif policy is None and isinstance(teleop, list):
+        elif isinstance(teleop, list):
            arm_action = teleop_arm.get_action()
            arm_action = {f"arm_{k}": v for k, v in arm_action.items()}
            keyboard_action = teleop_keyboard.get_action()
@@ -451,7 +285,7 @@ def record_loop(
            no_action_count += 1
            if no_action_count == 1 or no_action_count % 10 == 0:
                logging.warning(
-                    "No policy or teleoperator provided, skipping action generation. "
+                    "No teleoperator provided, skipping action generation. "
                    "This is likely to happen when resetting the environment without a teleop device. "
                    "The robot won't be at its rest position at the start of the next episode."
                )
@@ -463,8 +297,8 @@ def record_loop(
        # TODO(steven, pepijn, adil): we should use a pipeline step to clip the action, so the sent action is the action that we input to the robot.
        _sent_action = robot.send_action(robot_action_to_send)

-        # Write to dataset (only on real policy frames, not interpolated-only iterations)
-        if dataset is not None and is_record_frame:
+        # Write to dataset
+        if dataset is not None:
            action_frame = build_dataset_frame(dataset.features, action_values, prefix=ACTION)
            frame = {**observation_frame, **action_frame, "task": single_task}
            dataset.add_frame(frame)
@@ -488,7 +322,12 @@ def record_loop(


@parser.wrap()
-def record(cfg: RecordConfig) -> LeRobotDataset:
+def record(
+    cfg: RecordConfig,
+    teleop_action_processor: RobotProcessorPipeline | None = None,
+    robot_action_processor: RobotProcessorPipeline | None = None,
+    robot_observation_processor: RobotProcessorPipeline | None = None,
+) -> LeRobotDataset:
    init_logging()
    logging.info(pformat(asdict(cfg)))
    if cfg.display_data:
@@ -502,7 +341,16 @@ def record(cfg: RecordConfig) -> LeRobotDataset:
    robot = make_robot_from_config(cfg.robot)
    teleop = make_teleoperator_from_config(cfg.teleop) if cfg.teleop is not None else None

-    teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors()
+    # Fall back to identity pipelines when the caller doesn't supply processors.
+    if (
+        teleop_action_processor is None
+        or robot_action_processor is None
+        or robot_observation_processor is None
+    ):
+        _t, _r, _o = make_default_processors()
+        teleop_action_processor = teleop_action_processor or _t
+        robot_action_processor = robot_action_processor or _r
+        robot_observation_processor = robot_observation_processor or _o

    dataset_features = combine_feature_dicts(
        aggregate_pipeline_dataset_features(
@@ -540,8 +388,12 @@ def record(cfg: RecordConfig) -> LeRobotDataset:
            )
            sanity_check_dataset_robot_compatibility(dataset, robot, cfg.dataset.fps, dataset_features)
        else:
-            # Create empty dataset or load existing saved episodes
-            sanity_check_dataset_name(cfg.dataset.repo_id, cfg.policy)
+            # Reject eval_ prefix — for policy evaluation use lerobot-rollout
+            if cfg.dataset.repo_id.startswith("eval_"):
+                raise ValueError(
+                    "Dataset names starting with 'eval_' are reserved for policy evaluation. "
+                    "lerobot-record is for data collection only. Use lerobot-rollout for policy deployment."
+                )
            dataset = LeRobotDataset.create(
                cfg.dataset.repo_id,
                cfg.dataset.fps,
@@ -558,30 +410,6 @@ def record(cfg: RecordConfig) -> LeRobotDataset:
                encoder_threads=cfg.dataset.encoder_threads,
            )

-        # Load pretrained policy
-        policy = (
-            None
-            if cfg.policy is None
-            else make_policy(cfg.policy, ds_meta=dataset.meta, rename_map=cfg.dataset.rename_map)
-        )
-        preprocessor = None
-        postprocessor = None
-        interpolator = None
-        if cfg.policy is not None:
-            preprocessor, postprocessor = make_pre_post_processors(
-                policy_cfg=cfg.policy,
-                pretrained_path=cfg.policy.pretrained_path,
-                dataset_stats=rename_stats(dataset.meta.stats, cfg.dataset.rename_map),
-                preprocessor_overrides={
-                    "device_processor": {"device": cfg.policy.device},
-                    "rename_observations_processor": {"rename_map": cfg.dataset.rename_map},
-                },
-            )
-            # Create interpolator for smoother policy control
-            if cfg.interpolation_multiplier > 1:
-                interpolator = ActionInterpolator(multiplier=cfg.interpolation_multiplier)
-                logging.info(f"Action interpolation enabled: {cfg.interpolation_multiplier}x control rate")
-
        robot.connect()
        if teleop is not None:
            teleop.connect()
@@ -605,14 +433,10 @@ def record(cfg: RecordConfig) -> LeRobotDataset:
                    robot_action_processor=robot_action_processor,
                    robot_observation_processor=robot_observation_processor,
                    teleop=teleop,
-                    policy=policy,
-                    preprocessor=preprocessor,
-                    postprocessor=postprocessor,
                    dataset=dataset,
                    control_time_s=cfg.dataset.episode_time_s,
                    single_task=cfg.dataset.single_task,
                    display_data=cfg.display_data,
-                    interpolator=interpolator,
                    display_compressed_images=display_compressed_images,
                )

@@ -660,7 +484,10 @@ def record(cfg: RecordConfig) -> LeRobotDataset:
            listener.stop()

        if cfg.dataset.push_to_hub:
-            dataset.push_to_hub(tags=cfg.dataset.tags, private=cfg.dataset.private)
+            if dataset and dataset.num_episodes > 0:
+                dataset.push_to_hub(tags=cfg.dataset.tags, private=cfg.dataset.private)
+            else:
+                logging.warning("No episodes saved — skipping push to hub")

        log_say("Exiting", cfg.play_sounds)
    return dataset