From e44f86e516f0a4e480884554ccb4aa6ea5376b9e Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Wed, 22 Apr 2026 21:49:50 +0200 Subject: [PATCH] feat(metadata): adding encoding parameters in dataset metadata --- src/lerobot/datasets/dataset_metadata.py | 23 +++++++++++++++++++---- src/lerobot/datasets/dataset_tools.py | 4 +++- src/lerobot/datasets/dataset_writer.py | 4 +++- src/lerobot/datasets/video_utils.py | 21 ++++++++++++++++++--- 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/lerobot/datasets/dataset_metadata.py b/src/lerobot/datasets/dataset_metadata.py index 4f89ba2a4..57b967ac5 100644 --- a/src/lerobot/datasets/dataset_metadata.py +++ b/src/lerobot/datasets/dataset_metadata.py @@ -48,7 +48,7 @@ from .utils import ( is_valid_version, update_chunk_file_indices, ) -from .video_utils import get_video_info +from .video_utils import VideoEncoderConfig, get_video_info CODEBASE_VERSION = "v3.0" @@ -510,10 +510,23 @@ class LeRobotDatasetMetadata: self.stats = aggregate_stats([self.stats, episode_stats]) if self.stats is not None else episode_stats write_stats(self.stats, self.root) - def update_video_info(self, video_key: str | None = None) -> None: - """ + def update_video_info( + self, + video_key: str | None = None, + camera_encoder_config: VideoEncoderConfig | None = None, + ) -> None: + """Populate per-feature video info in ``info.json``. + Warning: this function writes info from first episode videos, implicitly assuming that all videos have been encoded the same way. Also, this means it assumes the first episode exists. + + Args: + video_key: If provided, only update this video key. Otherwise update + all video keys in the dataset. + camera_encoder_config: Encoder configuration used to produce the + videos. When provided, its fields are recorded as + ``video.`` entries alongside the stream-derived + ``video.*`` entries (see :func:`get_video_info`). """ if video_key is not None and video_key not in self.video_keys: raise ValueError(f"Video key {video_key} not found in dataset") @@ -522,7 +535,9 @@ class LeRobotDatasetMetadata: for key in video_keys: if not self.features[key].get("info", None): video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0) - self.info.features[key]["info"] = get_video_info(video_path) + self.info.features[key]["info"] = get_video_info( + video_path, camera_encoder_config=camera_encoder_config + ) def update_chunk_settings( self, diff --git a/src/lerobot/datasets/dataset_tools.py b/src/lerobot/datasets/dataset_tools.py index dd162f150..e8c0d26af 100644 --- a/src/lerobot/datasets/dataset_tools.py +++ b/src/lerobot/datasets/dataset_tools.py @@ -1850,7 +1850,9 @@ def convert_image_to_video_dataset( video_path = new_meta.root / new_meta.video_path.format( video_key=img_key, chunk_index=0, file_index=0 ) - new_meta.info.features[img_key]["info"] = get_video_info(video_path) + new_meta.info.features[img_key]["info"] = get_video_info( + video_path, camera_encoder_config=camera_encoder_config + ) write_info(new_meta.info, new_meta.root) diff --git a/src/lerobot/datasets/dataset_writer.py b/src/lerobot/datasets/dataset_writer.py index 060e7bc48..645d2752a 100644 --- a/src/lerobot/datasets/dataset_writer.py +++ b/src/lerobot/datasets/dataset_writer.py @@ -502,7 +502,9 @@ class DatasetWriter: # Update video info (only needed when first episode is encoded) if episode_index == 0: - self._meta.update_video_info(video_key) + self._meta.update_video_info( + video_key, camera_encoder_config=self._camera_encoder_config + ) write_info(self._meta.info, self._meta.root) metadata = { diff --git a/src/lerobot/datasets/video_utils.py b/src/lerobot/datasets/video_utils.py index 8f5004df6..2badab394 100644 --- a/src/lerobot/datasets/video_utils.py +++ b/src/lerobot/datasets/video_utils.py @@ -22,7 +22,7 @@ import shutil import tempfile import threading import warnings -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from fractions import Fraction from pathlib import Path from threading import Lock @@ -1033,8 +1033,18 @@ def get_audio_info(video_path: Path | str) -> dict: return audio_info -def get_video_info(video_path: Path | str) -> dict: - # Set logging level +def get_video_info( + video_path: Path | str, + camera_encoder_config: "VideoEncoderConfig | None" = None, +) -> dict: + """Build the ``video.*`` / ``audio.*`` info dict persisted in ``info.json``. + + Args: + video_path: Path to the encoded video file to probe. + camera_encoder_config: If provided, record the exact encoder settings used to encode this + video. Stream-derived values take precedence — encoder fields are only written for keys + not already populated from the video file itself. + """ logging.getLogger("libav").setLevel(av.logging.WARNING) # Getting video stream information @@ -1065,6 +1075,11 @@ def get_video_info(video_path: Path | str) -> dict: # Adding audio stream information video_info.update(**get_audio_info(video_path)) + # Add additional encoder configuration if provided + if camera_encoder_config is not None: + for field_name, field_value in asdict(camera_encoder_config).items(): + video_info.setdefault(f"video.{field_name}", field_value) + return video_info