# Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Records a dataset via teleoperation. This is a pure data-collection tool — no policy inference. For deploying trained policies, use ``lerobot-rollout`` instead. Requires: pip install 'lerobot[core_scripts]' (includes dataset + hardware + viz extras) Example: ```shell lerobot-record \\ --robot.type=so100_follower \\ --robot.port=/dev/tty.usbmodem58760431541 \\ --robot.cameras="{laptop: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \\ --robot.id=black \\ --teleop.type=so100_leader \\ --teleop.port=/dev/tty.usbmodem58760431551 \\ --teleop.id=blue \\ --dataset.repo_id=/ \\ --dataset.num_episodes=2 \\ --dataset.single_task="Grab the cube" \\ --dataset.streaming_encoding=true \\ --dataset.encoder_threads=2 \\ --display_data=true ``` Example recording with bimanual so100: ```shell lerobot-record \\ --robot.type=bi_so_follower \\ --robot.left_arm_config.port=/dev/tty.usbmodem5A460822851 \\ --robot.right_arm_config.port=/dev/tty.usbmodem5A460814411 \\ --robot.id=bimanual_follower \\ --robot.left_arm_config.cameras='{ wrist: {"type": "opencv", "index_or_path": 1, "width": 640, "height": 480, "fps": 30}, top: {"type": "opencv", "index_or_path": 3, "width": 640, "height": 480, "fps": 30}, }' --robot.right_arm_config.cameras='{ wrist: {"type": "opencv", "index_or_path": 2, "width": 640, "height": 480, "fps": 30}, front: {"type": "opencv", "index_or_path": 4, "width": 640, "height": 480, "fps": 30}, }' \\ --teleop.type=bi_so_leader \\ --teleop.left_arm_config.port=/dev/tty.usbmodem5A460852721 \\ --teleop.right_arm_config.port=/dev/tty.usbmodem5A460819811 \\ --teleop.id=bimanual_leader \\ --display_data=true \\ --dataset.repo_id=${HF_USER}/bimanual-so-handover-cube \\ --dataset.num_episodes=25 \\ --dataset.single_task="Grab and handover the red cube to the other arm" \\ --dataset.streaming_encoding=true \\ --dataset.encoder_threads=2 ``` """ import logging import time from dataclasses import asdict, dataclass from pprint import pformat from lerobot.cameras import CameraConfig # noqa: F401 from lerobot.cameras.opencv import OpenCVCameraConfig # noqa: F401 from lerobot.cameras.reachy2_camera import Reachy2CameraConfig # noqa: F401 from lerobot.cameras.realsense import RealSenseCameraConfig # noqa: F401 from lerobot.cameras.zmq import ZMQCameraConfig # noqa: F401 from lerobot.common.control_utils import ( init_keyboard_listener, is_headless, sanity_check_dataset_robot_compatibility, ) from lerobot.configs import parser from lerobot.configs.dataset import DatasetRecordConfig from lerobot.datasets import ( LeRobotDataset, VideoEncodingManager, aggregate_pipeline_dataset_features, create_initial_features, safe_stop_image_writer, ) from lerobot.processor import ( RobotAction, RobotObservation, RobotProcessorPipeline, make_default_processors, ) from lerobot.robots import ( # noqa: F401 Robot, RobotConfig, bi_openarm_follower, bi_so_follower, earthrover_mini_plus, hope_jr, koch_follower, make_robot_from_config, omx_follower, openarm_follower, reachy2, so_follower, unitree_g1 as unitree_g1_robot, ) from lerobot.teleoperators import ( # noqa: F401 Teleoperator, TeleoperatorConfig, bi_openarm_leader, bi_so_leader, homunculus, koch_leader, make_teleoperator_from_config, omx_leader, openarm_leader, openarm_mini, reachy2_teleoperator, so_leader, unitree_g1, ) from lerobot.teleoperators.keyboard import KeyboardTeleop from lerobot.utils.constants import ACTION, OBS_STR from lerobot.utils.feature_utils import build_dataset_frame, combine_feature_dicts from lerobot.utils.import_utils import register_third_party_plugins from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import ( init_logging, log_say, ) from lerobot.utils.visualization_utils import init_rerun, log_rerun_data @dataclass class RecordConfig: robot: RobotConfig dataset: DatasetRecordConfig # Teleoperator to control the robot (required) teleop: TeleoperatorConfig | None = None # Display all cameras on screen display_data: bool = False # Display data on a remote Rerun server display_ip: str | None = None # Port of the remote Rerun server display_port: int | None = None # Whether to display compressed images in Rerun display_compressed_images: bool = False # Use vocal synthesis to read events. play_sounds: bool = True # Resume recording on an existing dataset. resume: bool = False def __post_init__(self): if self.teleop is None: raise ValueError( "A teleoperator is required for recording. " "Use --teleop.type=... to specify one. " "For policy-based deployment, use lerobot-rollout instead." ) """ --------------- record_loop() data flow -------------------------- [ Robot ] V [ robot.get_observation() ] ---> raw_obs V [ robot_observation_processor ] ---> processed_obs V [ Teleoperator ] | | [teleop.get_action] -> raw_action | | | V | [teleop_action_processor] | | '---> processed_teleop_action V [ robot_action_processor ] --> robot_action_to_send V [ robot.send_action() ] -- (Robot Executes) V ( Save to Dataset ) V ( Rerun Log / Loop Wait ) """ @safe_stop_image_writer def record_loop( robot: Robot, events: dict, fps: int, teleop_action_processor: RobotProcessorPipeline[ tuple[RobotAction, RobotObservation], RobotAction ], # runs after teleop robot_action_processor: RobotProcessorPipeline[ tuple[RobotAction, RobotObservation], RobotAction ], # runs before robot robot_observation_processor: RobotProcessorPipeline[ RobotObservation, RobotObservation ], # runs after robot dataset: LeRobotDataset | None = None, teleop: Teleoperator | list[Teleoperator] | None = None, control_time_s: int | None = None, single_task: str | None = None, display_data: bool = False, display_compressed_images: bool = False, ): if dataset is not None and dataset.fps != fps: raise ValueError(f"The dataset fps should be equal to requested fps ({dataset.fps} != {fps}).") teleop_arm = teleop_keyboard = None if isinstance(teleop, list): teleop_keyboard = next((t for t in teleop if isinstance(t, KeyboardTeleop)), None) teleop_arm = next( ( t for t in teleop if isinstance( t, ( so_leader.SO100Leader | so_leader.SO101Leader | koch_leader.KochLeader | omx_leader.OmxLeader ), ) ), None, ) if not (teleop_arm and teleop_keyboard and len(teleop) == 2 and robot.name == "lekiwi_client"): raise ValueError( "For multi-teleop, the list must contain exactly one KeyboardTeleop and one arm teleoperator. Currently only supported for LeKiwi robot." ) control_interval = 1 / fps no_action_count = 0 timestamp = 0 start_episode_t = time.perf_counter() while timestamp < control_time_s: start_loop_t = time.perf_counter() if events["exit_early"]: events["exit_early"] = False break # Get robot observation obs = robot.get_observation() # Applies a pipeline to the raw robot observation, default is IdentityProcessor obs_processed = robot_observation_processor(obs) if dataset is not None: observation_frame = build_dataset_frame(dataset.features, obs_processed, prefix=OBS_STR) # Get action from teleop if isinstance(teleop, Teleoperator): act = teleop.get_action() if robot.name == "unitree_g1": teleop.send_feedback(obs) # Applies a pipeline to the raw teleop action, default is IdentityProcessor act_processed_teleop = teleop_action_processor((act, obs)) action_values = act_processed_teleop robot_action_to_send = robot_action_processor((act_processed_teleop, obs)) elif isinstance(teleop, list): arm_action = teleop_arm.get_action() arm_action = {f"arm_{k}": v for k, v in arm_action.items()} keyboard_action = teleop_keyboard.get_action() base_action = robot._from_keyboard_to_base_action(keyboard_action) act = {**arm_action, **base_action} if len(base_action) > 0 else arm_action act_processed_teleop = teleop_action_processor((act, obs)) action_values = act_processed_teleop robot_action_to_send = robot_action_processor((act_processed_teleop, obs)) else: no_action_count += 1 if no_action_count == 1 or no_action_count % 10 == 0: logging.warning( "No teleoperator provided, skipping action generation. " "This is likely to happen when resetting the environment without a teleop device. " "The robot won't be at its rest position at the start of the next episode." ) continue # Send action to robot # Action can eventually be clipped using `max_relative_target`, # so action actually sent is saved in the dataset. action = postprocessor.process(action) # TODO(steven, pepijn, adil): we should use a pipeline step to clip the action, so the sent action is the action that we input to the robot. _sent_action = robot.send_action(robot_action_to_send) # Write to dataset if dataset is not None: action_frame = build_dataset_frame(dataset.features, action_values, prefix=ACTION) frame = {**observation_frame, **action_frame, "task": single_task} dataset.add_frame(frame) if display_data: log_rerun_data( observation=obs_processed, action=action_values, compress_images=display_compressed_images ) dt_s = time.perf_counter() - start_loop_t sleep_time_s: float = control_interval - dt_s if sleep_time_s < 0: logging.warning( f"Record loop is running slower ({1 / dt_s:.1f} Hz) than the target FPS ({fps} Hz). Dataset frames might be dropped and robot control might be unstable. Common causes are: 1) Camera FPS not keeping up 2) Policy inference taking too long 3) CPU starvation" ) precise_sleep(max(sleep_time_s, 0.0)) timestamp = time.perf_counter() - start_episode_t @parser.wrap() def record( cfg: RecordConfig, teleop_action_processor: RobotProcessorPipeline | None = None, robot_action_processor: RobotProcessorPipeline | None = None, robot_observation_processor: RobotProcessorPipeline | None = None, ) -> LeRobotDataset: init_logging() logging.info(pformat(asdict(cfg))) if cfg.display_data: init_rerun(session_name="recording", ip=cfg.display_ip, port=cfg.display_port) display_compressed_images = ( True if (cfg.display_data and cfg.display_ip is not None and cfg.display_port is not None) else cfg.display_compressed_images ) robot = make_robot_from_config(cfg.robot) teleop = make_teleoperator_from_config(cfg.teleop) if cfg.teleop is not None else None # Fall back to identity pipelines when the caller doesn't supply processors. if ( teleop_action_processor is None or robot_action_processor is None or robot_observation_processor is None ): _t, _r, _o = make_default_processors() teleop_action_processor = teleop_action_processor or _t robot_action_processor = robot_action_processor or _r robot_observation_processor = robot_observation_processor or _o dataset_features = combine_feature_dicts( aggregate_pipeline_dataset_features( pipeline=teleop_action_processor, initial_features=create_initial_features( action=robot.action_features ), # TODO(steven, pepijn): in future this should be come from teleop or policy use_videos=cfg.dataset.video, ), aggregate_pipeline_dataset_features( pipeline=robot_observation_processor, initial_features=create_initial_features(observation=robot.observation_features), use_videos=cfg.dataset.video, ), ) dataset = None listener = None try: if cfg.resume: num_cameras = len(robot.cameras) if hasattr(robot, "cameras") else 0 dataset = LeRobotDataset.resume( cfg.dataset.repo_id, root=cfg.dataset.root, batch_encoding_size=cfg.dataset.video_encoding_batch_size, vcodec=cfg.dataset.vcodec, streaming_encoding=cfg.dataset.streaming_encoding, encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize, encoder_threads=cfg.dataset.encoder_threads, image_writer_processes=cfg.dataset.num_image_writer_processes if num_cameras > 0 else 0, image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * num_cameras if num_cameras > 0 else 0, ) sanity_check_dataset_robot_compatibility(dataset, robot, cfg.dataset.fps, dataset_features) else: # Reject eval_ prefix — for policy evaluation use lerobot-rollout if cfg.dataset.repo_id.startswith("eval_"): raise ValueError( "Dataset names starting with 'eval_' are reserved for policy evaluation. " "lerobot-record is for data collection only. Use lerobot-rollout for policy deployment." ) dataset = LeRobotDataset.create( cfg.dataset.repo_id, cfg.dataset.fps, root=cfg.dataset.root, robot_type=robot.name, features=dataset_features, use_videos=cfg.dataset.video, image_writer_processes=cfg.dataset.num_image_writer_processes, image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * len(robot.cameras), batch_encoding_size=cfg.dataset.video_encoding_batch_size, vcodec=cfg.dataset.vcodec, streaming_encoding=cfg.dataset.streaming_encoding, encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize, encoder_threads=cfg.dataset.encoder_threads, ) robot.connect() if teleop is not None: teleop.connect() listener, events = init_keyboard_listener() if not cfg.dataset.streaming_encoding: logging.info( "Streaming encoding is disabled. If you have capable hardware, consider enabling it for way faster episode saving. --dataset.streaming_encoding=true --dataset.encoder_threads=2 # --dataset.vcodec=auto. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding" ) with VideoEncodingManager(dataset): recorded_episodes = 0 while recorded_episodes < cfg.dataset.num_episodes and not events["stop_recording"]: log_say(f"Recording episode {dataset.num_episodes}", cfg.play_sounds) record_loop( robot=robot, events=events, fps=cfg.dataset.fps, teleop_action_processor=teleop_action_processor, robot_action_processor=robot_action_processor, robot_observation_processor=robot_observation_processor, teleop=teleop, dataset=dataset, control_time_s=cfg.dataset.episode_time_s, single_task=cfg.dataset.single_task, display_data=cfg.display_data, display_compressed_images=display_compressed_images, ) # Execute a few seconds without recording to give time to manually reset the environment # Skip reset for the last episode to be recorded if not events["stop_recording"] and ( (recorded_episodes < cfg.dataset.num_episodes - 1) or events["rerecord_episode"] ): log_say("Reset the environment", cfg.play_sounds) record_loop( robot=robot, events=events, fps=cfg.dataset.fps, teleop_action_processor=teleop_action_processor, robot_action_processor=robot_action_processor, robot_observation_processor=robot_observation_processor, teleop=teleop, control_time_s=cfg.dataset.reset_time_s, single_task=cfg.dataset.single_task, display_data=cfg.display_data, ) if events["rerecord_episode"]: log_say("Re-record episode", cfg.play_sounds) events["rerecord_episode"] = False events["exit_early"] = False dataset.clear_episode_buffer() continue dataset.save_episode() recorded_episodes += 1 finally: log_say("Stop recording", cfg.play_sounds, blocking=True) if dataset: dataset.finalize() if robot.is_connected: robot.disconnect() if teleop and teleop.is_connected: teleop.disconnect() if not is_headless() and listener: listener.stop() if cfg.dataset.push_to_hub: if dataset and dataset.num_episodes > 0: dataset.push_to_hub(tags=cfg.dataset.tags, private=cfg.dataset.private) else: logging.warning("No episodes saved — skipping push to hub") log_say("Exiting", cfg.play_sounds) return dataset def main(): register_third_party_plugins() record() if __name__ == "__main__": main()