From b2c2bb7641cec1f5676d6758bd2ff7c2526f4ceb Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Fri, 24 Apr 2026 16:43:54 +0200 Subject: [PATCH] feat(VideoEncoderConfig init): making VideoEncoderConfig more robust and adaptable to multiple backends --- src/lerobot/datasets/__init__.py | 8 ++-- src/lerobot/datasets/pyav_utils.py | 25 +++-------- src/lerobot/datasets/video_utils.py | 69 ++++++++++++++++++++--------- 3 files changed, 57 insertions(+), 45 deletions(-) diff --git a/src/lerobot/datasets/__init__.py b/src/lerobot/datasets/__init__.py index 2515a1fd2..d916ec7bb 100644 --- a/src/lerobot/datasets/__init__.py +++ b/src/lerobot/datasets/__init__.py @@ -41,8 +41,8 @@ from .lerobot_dataset import LeRobotDataset from .multi_dataset import MultiLeRobotDataset from .pipeline_features import aggregate_pipeline_dataset_features, create_initial_features from .pyav_utils import ( - check_config_against_bundled_ffmpeg, - detect_available_encoders, + check_video_encoder_config_pyav, + detect_available_encoders_pyav, get_codec, ) from .sampler import EpisodeAwareSampler @@ -74,12 +74,12 @@ __all__ = [ "aggregate_datasets", "aggregate_pipeline_dataset_features", "aggregate_stats", - "check_config_against_bundled_ffmpeg", + "check_video_encoder_config_pyav", "convert_image_to_video_dataset", "create_initial_features", "create_lerobot_dataset_card", "delete_episodes", - "detect_available_encoders", + "detect_available_encoders_pyav", "get_codec", "get_feature_stats", "load_episodes", diff --git a/src/lerobot/datasets/pyav_utils.py b/src/lerobot/datasets/pyav_utils.py index bd267a737..44501e43f 100644 --- a/src/lerobot/datasets/pyav_utils.py +++ b/src/lerobot/datasets/pyav_utils.py @@ -74,31 +74,18 @@ def _get_codec_video_formats(vcodec: str) -> tuple[str, ...]: return tuple(fmt.name for fmt in (codec.video_formats or [])) -@functools.cache -def _all_video_encoders() -> tuple[str, ...]: - """Every video encoder PyAV exposes in the local FFmpeg build, sorted by name.""" - result: list[str] = [] - for name in sorted(av.codecs_available): - codec = get_codec(name) - if codec is not None and codec.type == "video": - result.append(name) - return tuple(result) - - -def detect_available_encoders(encoders: list[str] | str | None = None) -> list[str]: +def detect_available_encoders_pyav(encoders: list[str] | str) -> list[str]: """Return the subset of *encoders* available as video encoders in the local FFmpeg build. - ``None`` returns every video encoder PyAV exposes; a single ``str`` is probed as a list of one. + Each name is probed directly via :func:`get_codec`; input order is preserved. """ - if encoders is None: - return list(_all_video_encoders()) if isinstance(encoders, str): encoders = [encoders] - video_encoders = set(_all_video_encoders()) - available = [] + available: list[str] = [] for name in encoders: - if name in video_encoders: + codec = get_codec(name) + if codec is not None and codec.type == "video": available.append(name) else: logger.debug("encoder '%s' not available as video encoder", name) @@ -242,7 +229,7 @@ def _check_extra_options( _validate_extra_option(vcodec, key, value, opt) -def check_config_against_bundled_ffmpeg(config: VideoEncoderConfig) -> None: +def check_video_encoder_config_pyav(config: VideoEncoderConfig) -> None: """Verify *config* is compatible with the bundled FFmpeg build. Checks pixel format, tuning-field availability, value range/choices for diff --git a/src/lerobot/datasets/video_utils.py b/src/lerobot/datasets/video_utils.py index fe64d166e..7ef097d88 100644 --- a/src/lerobot/datasets/video_utils.py +++ b/src/lerobot/datasets/video_utils.py @@ -38,8 +38,8 @@ from datasets.features.features import register_feature from PIL import Image from lerobot.datasets.pyav_utils import ( - check_config_against_bundled_ffmpeg, - detect_available_encoders, + check_video_encoder_config_pyav, + detect_available_encoders_pyav, ) from lerobot.utils.import_utils import get_safe_default_video_backend @@ -93,13 +93,56 @@ class VideoEncoderConfig: extra_options: dict[str, Any] = field(default_factory=dict) def __post_init__(self) -> None: - self.vcodec = resolve_vcodec(self.vcodec) + self.resolve_vcodec() # Empty-constructor ergonomics: ``VideoEncoderConfig()`` must "just work". if self.preset is None and self.vcodec == "libsvtav1": self.preset = LIBSVTAV1_DEFAULT_PRESET - check_config_against_bundled_ffmpeg(self) + self.validate() + + + def detect_available_encoders(self, encoders: list[str] | str) -> list[str]: + """Detect available encoders based on the video backend.""" + if self.video_backend == "pyav": + return detect_available_encoders_pyav(encoders) + else: + return [] + + + def validate(self) -> None: + """Validate the video encoder config.""" + if self.video_backend == "pyav": + check_video_encoder_config_pyav(self) + + + def resolve_vcodec(self) -> None: + """Validate vcodec and resolve 'auto' to best available HW encoder, fallback to libsvtav1. + + Any explicitly-requested codec that isn't in the local FFmpeg build is + also silently rewritten to ``libsvtav1`` so encoding never hard-fails on + a host missing the requested encoder. + """ + if self.vcodec not in VALID_VIDEO_CODECS: + raise ValueError( + f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}" + ) + if self.vcodec == "auto": + available = self.detect_available_encoders(HW_ENCODERS) + for encoder in HW_ENCODERS: + if encoder in available: + logger.info(f"Auto-selected video codec: {encoder}") + self.vcodec = encoder + return + logger.info("No hardware encoder available, falling back to software encoder 'libsvtav1'") + self.vcodec = "libsvtav1" + + if self.detect_available_encoders(self.vcodec): + logger.info(f"Using video codec: {self.vcodec}") + self.vcodec = self.vcodec + return + raise ValueError(f"Unsupported video codec: {self.vcodec} with video backend {self.video_backend}") + def get_codec_options(self, encoder_threads: int | None = None) -> dict[str, str]: """Build codec-specific FFmpeg options from the tuning fields. @@ -186,24 +229,6 @@ def _get_codec_options( return opts -def resolve_vcodec(vcodec: str) -> str: - """Validate vcodec and resolve 'auto' to best available HW encoder, fallback to libsvtav1.""" - if vcodec not in VALID_VIDEO_CODECS: - raise ValueError(f"Invalid vcodec '{vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}") - if vcodec != "auto" and detect_available_encoders(vcodec) != []: - logger.info(f"Using video codec: {vcodec}") - return vcodec - elif vcodec == "auto": - available = detect_available_encoders() - for encoder in HW_ENCODERS: - if encoder in available: - logger.info(f"Auto-selected video codec: {encoder}") - return encoder - else: - logger.info("No hardware encoder available, falling back to software encoder 'libsvtav1'") - return "libsvtav1" - - def decode_video_frames( video_path: Path | str, timestamps: list[float],