feat(metadata): adding encoding parameters in dataset metadata

This commit is contained in:
CarolinePascal
2026-04-22 21:49:50 +02:00
parent a0e3acdb67
commit e44f86e516
4 changed files with 43 additions and 9 deletions

View File

@@ -48,7 +48,7 @@ from .utils import (
is_valid_version,
update_chunk_file_indices,
)
from .video_utils import get_video_info
from .video_utils import VideoEncoderConfig, get_video_info
CODEBASE_VERSION = "v3.0"
@@ -510,10 +510,23 @@ class LeRobotDatasetMetadata:
self.stats = aggregate_stats([self.stats, episode_stats]) if self.stats is not None else episode_stats
write_stats(self.stats, self.root)
def update_video_info(self, video_key: str | None = None) -> None:
"""
def update_video_info(
self,
video_key: str | None = None,
camera_encoder_config: VideoEncoderConfig | None = None,
) -> None:
"""Populate per-feature video info in ``info.json``.
Warning: this function writes info from first episode videos, implicitly assuming that all videos have
been encoded the same way. Also, this means it assumes the first episode exists.
Args:
video_key: If provided, only update this video key. Otherwise update
all video keys in the dataset.
camera_encoder_config: Encoder configuration used to produce the
videos. When provided, its fields are recorded as
``video.<field>`` entries alongside the stream-derived
``video.*`` entries (see :func:`get_video_info`).
"""
if video_key is not None and video_key not in self.video_keys:
raise ValueError(f"Video key {video_key} not found in dataset")
@@ -522,7 +535,9 @@ class LeRobotDatasetMetadata:
for key in video_keys:
if not self.features[key].get("info", None):
video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0)
self.info.features[key]["info"] = get_video_info(video_path)
self.info.features[key]["info"] = get_video_info(
video_path, camera_encoder_config=camera_encoder_config
)
def update_chunk_settings(
self,

View File

@@ -1850,7 +1850,9 @@ def convert_image_to_video_dataset(
video_path = new_meta.root / new_meta.video_path.format(
video_key=img_key, chunk_index=0, file_index=0
)
new_meta.info.features[img_key]["info"] = get_video_info(video_path)
new_meta.info.features[img_key]["info"] = get_video_info(
video_path, camera_encoder_config=camera_encoder_config
)
write_info(new_meta.info, new_meta.root)

View File

@@ -502,7 +502,9 @@ class DatasetWriter:
# Update video info (only needed when first episode is encoded)
if episode_index == 0:
self._meta.update_video_info(video_key)
self._meta.update_video_info(
video_key, camera_encoder_config=self._camera_encoder_config
)
write_info(self._meta.info, self._meta.root)
metadata = {

View File

@@ -22,7 +22,7 @@ import shutil
import tempfile
import threading
import warnings
from dataclasses import dataclass, field
from dataclasses import asdict, dataclass, field
from fractions import Fraction
from pathlib import Path
from threading import Lock
@@ -1033,8 +1033,18 @@ def get_audio_info(video_path: Path | str) -> dict:
return audio_info
def get_video_info(video_path: Path | str) -> dict:
# Set logging level
def get_video_info(
video_path: Path | str,
camera_encoder_config: "VideoEncoderConfig | None" = None,
) -> dict:
"""Build the ``video.*`` / ``audio.*`` info dict persisted in ``info.json``.
Args:
video_path: Path to the encoded video file to probe.
camera_encoder_config: If provided, record the exact encoder settings used to encode this
video. Stream-derived values take precedence — encoder fields are only written for keys
not already populated from the video file itself.
"""
logging.getLogger("libav").setLevel(av.logging.WARNING)
# Getting video stream information
@@ -1065,6 +1075,11 @@ def get_video_info(video_path: Path | str) -> dict:
# Adding audio stream information
video_info.update(**get_audio_info(video_path))
# Add additional encoder configuration if provided
if camera_encoder_config is not None:
for field_name, field_value in asdict(camera_encoder_config).items():
video_info.setdefault(f"video.{field_name}", field_value)
return video_info