feat(VideoEncoderConfig init): making VideoEncoderConfig more robust and adaptable to multiple backends

This commit is contained in:
CarolinePascal
2026-04-24 16:43:54 +02:00
parent 4a87ee1537
commit b2c2bb7641
3 changed files with 57 additions and 45 deletions

View File

@@ -41,8 +41,8 @@ from .lerobot_dataset import LeRobotDataset
from .multi_dataset import MultiLeRobotDataset
from .pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from .pyav_utils import (
check_config_against_bundled_ffmpeg,
detect_available_encoders,
check_video_encoder_config_pyav,
detect_available_encoders_pyav,
get_codec,
)
from .sampler import EpisodeAwareSampler
@@ -74,12 +74,12 @@ __all__ = [
"aggregate_datasets",
"aggregate_pipeline_dataset_features",
"aggregate_stats",
"check_config_against_bundled_ffmpeg",
"check_video_encoder_config_pyav",
"convert_image_to_video_dataset",
"create_initial_features",
"create_lerobot_dataset_card",
"delete_episodes",
"detect_available_encoders",
"detect_available_encoders_pyav",
"get_codec",
"get_feature_stats",
"load_episodes",

View File

@@ -74,31 +74,18 @@ def _get_codec_video_formats(vcodec: str) -> tuple[str, ...]:
return tuple(fmt.name for fmt in (codec.video_formats or []))
@functools.cache
def _all_video_encoders() -> tuple[str, ...]:
"""Every video encoder PyAV exposes in the local FFmpeg build, sorted by name."""
result: list[str] = []
for name in sorted(av.codecs_available):
codec = get_codec(name)
if codec is not None and codec.type == "video":
result.append(name)
return tuple(result)
def detect_available_encoders(encoders: list[str] | str | None = None) -> list[str]:
def detect_available_encoders_pyav(encoders: list[str] | str) -> list[str]:
"""Return the subset of *encoders* available as video encoders in the local FFmpeg build.
``None`` returns every video encoder PyAV exposes; a single ``str`` is probed as a list of one.
Each name is probed directly via :func:`get_codec`; input order is preserved.
"""
if encoders is None:
return list(_all_video_encoders())
if isinstance(encoders, str):
encoders = [encoders]
video_encoders = set(_all_video_encoders())
available = []
available: list[str] = []
for name in encoders:
if name in video_encoders:
codec = get_codec(name)
if codec is not None and codec.type == "video":
available.append(name)
else:
logger.debug("encoder '%s' not available as video encoder", name)
@@ -242,7 +229,7 @@ def _check_extra_options(
_validate_extra_option(vcodec, key, value, opt)
def check_config_against_bundled_ffmpeg(config: VideoEncoderConfig) -> None:
def check_video_encoder_config_pyav(config: VideoEncoderConfig) -> None:
"""Verify *config* is compatible with the bundled FFmpeg build.
Checks pixel format, tuning-field availability, value range/choices for

View File

@@ -38,8 +38,8 @@ from datasets.features.features import register_feature
from PIL import Image
from lerobot.datasets.pyav_utils import (
check_config_against_bundled_ffmpeg,
detect_available_encoders,
check_video_encoder_config_pyav,
detect_available_encoders_pyav,
)
from lerobot.utils.import_utils import get_safe_default_video_backend
@@ -93,13 +93,56 @@ class VideoEncoderConfig:
extra_options: dict[str, Any] = field(default_factory=dict)
def __post_init__(self) -> None:
self.vcodec = resolve_vcodec(self.vcodec)
self.resolve_vcodec()
# Empty-constructor ergonomics: ``VideoEncoderConfig()`` must "just work".
if self.preset is None and self.vcodec == "libsvtav1":
self.preset = LIBSVTAV1_DEFAULT_PRESET
check_config_against_bundled_ffmpeg(self)
self.validate()
def detect_available_encoders(self, encoders: list[str] | str) -> list[str]:
"""Detect available encoders based on the video backend."""
if self.video_backend == "pyav":
return detect_available_encoders_pyav(encoders)
else:
return []
def validate(self) -> None:
"""Validate the video encoder config."""
if self.video_backend == "pyav":
check_video_encoder_config_pyav(self)
def resolve_vcodec(self) -> None:
"""Validate vcodec and resolve 'auto' to best available HW encoder, fallback to libsvtav1.
Any explicitly-requested codec that isn't in the local FFmpeg build is
also silently rewritten to ``libsvtav1`` so encoding never hard-fails on
a host missing the requested encoder.
"""
if self.vcodec not in VALID_VIDEO_CODECS:
raise ValueError(
f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}"
)
if self.vcodec == "auto":
available = self.detect_available_encoders(HW_ENCODERS)
for encoder in HW_ENCODERS:
if encoder in available:
logger.info(f"Auto-selected video codec: {encoder}")
self.vcodec = encoder
return
logger.info("No hardware encoder available, falling back to software encoder 'libsvtav1'")
self.vcodec = "libsvtav1"
if self.detect_available_encoders(self.vcodec):
logger.info(f"Using video codec: {self.vcodec}")
self.vcodec = self.vcodec
return
raise ValueError(f"Unsupported video codec: {self.vcodec} with video backend {self.video_backend}")
def get_codec_options(self, encoder_threads: int | None = None) -> dict[str, str]:
"""Build codec-specific FFmpeg options from the tuning fields.
@@ -186,24 +229,6 @@ def _get_codec_options(
return opts
def resolve_vcodec(vcodec: str) -> str:
"""Validate vcodec and resolve 'auto' to best available HW encoder, fallback to libsvtav1."""
if vcodec not in VALID_VIDEO_CODECS:
raise ValueError(f"Invalid vcodec '{vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}")
if vcodec != "auto" and detect_available_encoders(vcodec) != []:
logger.info(f"Using video codec: {vcodec}")
return vcodec
elif vcodec == "auto":
available = detect_available_encoders()
for encoder in HW_ENCODERS:
if encoder in available:
logger.info(f"Auto-selected video codec: {encoder}")
return encoder
else:
logger.info("No hardware encoder available, falling back to software encoder 'libsvtav1'")
return "libsvtav1"
def decode_video_frames(
video_path: Path | str,
timestamps: list[float],