mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-30 18:31:25 +00:00
feat(metadata): adding encoding parameters in dataset metadata
This commit is contained in:
@@ -48,7 +48,7 @@ from .utils import (
|
|||||||
is_valid_version,
|
is_valid_version,
|
||||||
update_chunk_file_indices,
|
update_chunk_file_indices,
|
||||||
)
|
)
|
||||||
from .video_utils import get_video_info
|
from .video_utils import VideoEncoderConfig, get_video_info
|
||||||
|
|
||||||
CODEBASE_VERSION = "v3.0"
|
CODEBASE_VERSION = "v3.0"
|
||||||
|
|
||||||
@@ -510,10 +510,23 @@ class LeRobotDatasetMetadata:
|
|||||||
self.stats = aggregate_stats([self.stats, episode_stats]) if self.stats is not None else episode_stats
|
self.stats = aggregate_stats([self.stats, episode_stats]) if self.stats is not None else episode_stats
|
||||||
write_stats(self.stats, self.root)
|
write_stats(self.stats, self.root)
|
||||||
|
|
||||||
def update_video_info(self, video_key: str | None = None) -> None:
|
def update_video_info(
|
||||||
"""
|
self,
|
||||||
|
video_key: str | None = None,
|
||||||
|
camera_encoder_config: VideoEncoderConfig | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Populate per-feature video info in ``info.json``.
|
||||||
|
|
||||||
Warning: this function writes info from first episode videos, implicitly assuming that all videos have
|
Warning: this function writes info from first episode videos, implicitly assuming that all videos have
|
||||||
been encoded the same way. Also, this means it assumes the first episode exists.
|
been encoded the same way. Also, this means it assumes the first episode exists.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_key: If provided, only update this video key. Otherwise update
|
||||||
|
all video keys in the dataset.
|
||||||
|
camera_encoder_config: Encoder configuration used to produce the
|
||||||
|
videos. When provided, its fields are recorded as
|
||||||
|
``video.<field>`` entries alongside the stream-derived
|
||||||
|
``video.*`` entries (see :func:`get_video_info`).
|
||||||
"""
|
"""
|
||||||
if video_key is not None and video_key not in self.video_keys:
|
if video_key is not None and video_key not in self.video_keys:
|
||||||
raise ValueError(f"Video key {video_key} not found in dataset")
|
raise ValueError(f"Video key {video_key} not found in dataset")
|
||||||
@@ -522,7 +535,9 @@ class LeRobotDatasetMetadata:
|
|||||||
for key in video_keys:
|
for key in video_keys:
|
||||||
if not self.features[key].get("info", None):
|
if not self.features[key].get("info", None):
|
||||||
video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0)
|
video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0)
|
||||||
self.info.features[key]["info"] = get_video_info(video_path)
|
self.info.features[key]["info"] = get_video_info(
|
||||||
|
video_path, camera_encoder_config=camera_encoder_config
|
||||||
|
)
|
||||||
|
|
||||||
def update_chunk_settings(
|
def update_chunk_settings(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -1850,7 +1850,9 @@ def convert_image_to_video_dataset(
|
|||||||
video_path = new_meta.root / new_meta.video_path.format(
|
video_path = new_meta.root / new_meta.video_path.format(
|
||||||
video_key=img_key, chunk_index=0, file_index=0
|
video_key=img_key, chunk_index=0, file_index=0
|
||||||
)
|
)
|
||||||
new_meta.info.features[img_key]["info"] = get_video_info(video_path)
|
new_meta.info.features[img_key]["info"] = get_video_info(
|
||||||
|
video_path, camera_encoder_config=camera_encoder_config
|
||||||
|
)
|
||||||
|
|
||||||
write_info(new_meta.info, new_meta.root)
|
write_info(new_meta.info, new_meta.root)
|
||||||
|
|
||||||
|
|||||||
@@ -502,7 +502,9 @@ class DatasetWriter:
|
|||||||
|
|
||||||
# Update video info (only needed when first episode is encoded)
|
# Update video info (only needed when first episode is encoded)
|
||||||
if episode_index == 0:
|
if episode_index == 0:
|
||||||
self._meta.update_video_info(video_key)
|
self._meta.update_video_info(
|
||||||
|
video_key, camera_encoder_config=self._camera_encoder_config
|
||||||
|
)
|
||||||
write_info(self._meta.info, self._meta.root)
|
write_info(self._meta.info, self._meta.root)
|
||||||
|
|
||||||
metadata = {
|
metadata = {
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ import shutil
|
|||||||
import tempfile
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
import warnings
|
import warnings
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import asdict, dataclass, field
|
||||||
from fractions import Fraction
|
from fractions import Fraction
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
@@ -1033,8 +1033,18 @@ def get_audio_info(video_path: Path | str) -> dict:
|
|||||||
return audio_info
|
return audio_info
|
||||||
|
|
||||||
|
|
||||||
def get_video_info(video_path: Path | str) -> dict:
|
def get_video_info(
|
||||||
# Set logging level
|
video_path: Path | str,
|
||||||
|
camera_encoder_config: "VideoEncoderConfig | None" = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Build the ``video.*`` / ``audio.*`` info dict persisted in ``info.json``.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_path: Path to the encoded video file to probe.
|
||||||
|
camera_encoder_config: If provided, record the exact encoder settings used to encode this
|
||||||
|
video. Stream-derived values take precedence — encoder fields are only written for keys
|
||||||
|
not already populated from the video file itself.
|
||||||
|
"""
|
||||||
logging.getLogger("libav").setLevel(av.logging.WARNING)
|
logging.getLogger("libav").setLevel(av.logging.WARNING)
|
||||||
|
|
||||||
# Getting video stream information
|
# Getting video stream information
|
||||||
@@ -1065,6 +1075,11 @@ def get_video_info(video_path: Path | str) -> dict:
|
|||||||
# Adding audio stream information
|
# Adding audio stream information
|
||||||
video_info.update(**get_audio_info(video_path))
|
video_info.update(**get_audio_info(video_path))
|
||||||
|
|
||||||
|
# Add additional encoder configuration if provided
|
||||||
|
if camera_encoder_config is not None:
|
||||||
|
for field_name, field_value in asdict(camera_encoder_config).items():
|
||||||
|
video_info.setdefault(f"video.{field_name}", field_value)
|
||||||
|
|
||||||
return video_info
|
return video_info
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user