mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-04 12:51:27 +00:00
* chore(video backend): renaming codec into video_backend in get_safe_default_video_backend() * feat(pyav utils): adding suport for PyAV encoding parameters validation * feat(VideoEncoderConfig): creating a VideoEncoderConfig to encapsulate encoding parameters * feat(VideoEncoderConfig): propagating the VideoEncoderConfig in the codebase * chore(docs): updating the docs * feat(metadata): adding encoding parameters in dataset metadata * fix(concatenation compatibility): adding compatibility check when concatenating video files * feat(VideoEncoderConfig init): making VideoEncoderConfig more robust and adaptable to multiple backends * feat(pyav checks): making pyav parameters checks more robust * chore(duplicate): removing duplicate get_codec_options definition * test(existing): adapting existing tests * test(new): adding new tests for encoding related features * chore(format): fixing formatting issues * chore(PyAV): cleaning up PyAV utils and encoding parameters checks to stick to the minimun required tooling. * chore(format): formatting code * chore(doctrings): updating docstrings * fix(camera_encoder_config): Removing camera_encoder_config from LeRobotDataset, as it's only required in LeRobotDatasetWriter. * feat(default values): applying a consistent naming convention for default RGB cameras video encoder parameters * fix(rollout): propagating VideoEncoderConfig to the latest recording modes * chore(format): formatting code, fixing error messages and variable names * fix(arguments order): reverting changes in arguments order in StreamingVideoEncoder * chore(relative imports): switching to relative local imports within lerobot.datasets * test(artifacts): cleaning up artifacts for the video encoding tests * chore(docs): updating docs * chore(fromat): formatting code * fix(imports): refactoring the file architecture to avoid circular imports. VideoEncoderConfig is now defined in lerobot.configs and lazily imports av at runtime. * fix(typos): fixing typos and small mistakes * test(factories): updating factories * feat(aggregate): updating dataset aggregation procedure. Encoding tuning paramters (crf, g,...) are ignored for validation and changed to None in the aggregated dataset if incompatible. * docs(typos): fixing typos * fix(deletion): reverting unwanted deletion * fix(typos): fixing multiple typos * feat(codec options): passing codec options to lerobot_edit_dataset episode deletion tool * typo(typo): typo * fix(typos): fixing remaining typos * chore(rename): renaming camera_encoder_config to camera_encoder * docs(clean): cleaning and formating docs * docs(dataset): addind details about datasets * chore(format): formatting code * docs(warning): adding warning regarding encoding parameters modification * fix(re-encoding): removing inconsistent re-encoding option in lerobot_edit_dataset * typos(typos): typos * chore(format): resolving prettier issues * fix(h264_nvenc): fixing crf handling for h264_nvenc * docs(clean): removing too technical parts of the docs * fix(imports): fixing imports at the __init__ level * fix(imports): fixing not very pretty imports in video config file
82 lines
4.2 KiB
Python
82 lines
4.2 KiB
Python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Shared dataset recording configuration used by both ``lerobot-record`` and ``lerobot-rollout``."""
|
|
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
from .video import VideoEncoderConfig, camera_encoder_defaults
|
|
|
|
|
|
@dataclass
|
|
class DatasetRecordConfig:
|
|
# Dataset identifier. By convention it should match '{hf_username}/{dataset_name}' (e.g. `lerobot/test`).
|
|
repo_id: str = ""
|
|
# A short but accurate description of the task performed during the recording (e.g. "Pick the Lego block and drop it in the box on the right.")
|
|
single_task: str = ""
|
|
# Root directory where the dataset will be stored (e.g. 'dataset/path'). If None, defaults to $HF_LEROBOT_HOME/repo_id.
|
|
root: str | Path | None = None
|
|
# Limit the frames per second.
|
|
fps: int = 30
|
|
# Number of seconds for data recording for each episode.
|
|
episode_time_s: int | float = 60
|
|
# Number of seconds for resetting the environment after each episode.
|
|
reset_time_s: int | float = 60
|
|
# Number of episodes to record.
|
|
num_episodes: int = 50
|
|
# Encode frames in the dataset into video
|
|
video: bool = True
|
|
# Upload dataset to Hugging Face hub.
|
|
push_to_hub: bool = True
|
|
# Upload on private repository on the Hugging Face hub.
|
|
private: bool = False
|
|
# Add tags to your dataset on the hub.
|
|
tags: list[str] | None = None
|
|
# Number of subprocesses handling the saving of frames as PNG. Set to 0 to use threads only;
|
|
# set to ≥1 to use subprocesses, each using threads to write images. The best number of processes
|
|
# and threads depends on your system. We recommend 4 threads per camera with 0 processes.
|
|
# If fps is unstable, adjust the thread count. If still unstable, try using 1 or more subprocesses.
|
|
num_image_writer_processes: int = 0
|
|
# Number of threads writing the frames as png images on disk, per camera.
|
|
# Too many threads might cause unstable teleoperation fps due to main thread being blocked.
|
|
# Not enough threads might cause low camera fps.
|
|
num_image_writer_threads_per_camera: int = 4
|
|
# Number of episodes to record before batch encoding videos
|
|
# Set to 1 for immediate encoding (default behavior), or higher for batched encoding
|
|
video_encoding_batch_size: int = 1
|
|
# Video encoder settings for camera MP4s (codec, quality, GOP, etc.). Tuned via CLI nested keys,
|
|
# e.g. ``--dataset.camera_encoder.vcodec=h264`` (see ``VideoEncoderConfig``).
|
|
camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
|
|
# Enable streaming video encoding: encode frames in real-time during capture instead
|
|
# of writing PNG images first. Makes save_episode() near-instant. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding
|
|
streaming_encoding: bool = False
|
|
# Maximum number of frames to buffer per camera when using streaming encoding.
|
|
# ~1s buffer at 30fps. Provides backpressure if the encoder can't keep up.
|
|
encoder_queue_maxsize: int = 30
|
|
# Number of threads per encoder instance. None = auto (codec default).
|
|
# Lower values reduce CPU usage, maps to 'lp' (via svtav1-params) for libsvtav1 and 'threads' for h264/hevc..
|
|
encoder_threads: int | None = None
|
|
|
|
def stamp_repo_id(self) -> None:
|
|
"""Append a date-time tag to ``repo_id`` so each recording session gets a unique name.
|
|
|
|
Must be called explicitly at dataset *creation* time — not on resume,
|
|
where the existing ``repo_id`` (already stamped) must be preserved.
|
|
"""
|
|
if self.repo_id:
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
self.repo_id = f"{self.repo_id}_{timestamp}"
|