diff --git a/src/lerobot/datasets/pyav_utils.py b/src/lerobot/datasets/pyav_utils.py new file mode 100644 index 000000000..bd267a737 --- /dev/null +++ b/src/lerobot/datasets/pyav_utils.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python + +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyAV-based compatibility checks for :class:`VideoEncoderConfig`. + +Centralises all :mod:`av` introspection of the bundled FFmpeg build. +Checks degrade to a no-op when the target codec isn't available locally. +""" + +from __future__ import annotations + +import functools +import logging +from typing import TYPE_CHECKING, Any + +import av + +if TYPE_CHECKING: + from lerobot.datasets.video_utils import VideoEncoderConfig + +logger = logging.getLogger(__name__) + +FFMPEG_NUMERIC_OPTION_TYPES = ("INT", "INT64", "UINT64", "FLOAT", "DOUBLE") + +# Codec-specific FFmpeg private option whose value is controlled by the +# abstract ``crf`` tuning field. +CRF_OPTION_BY_CODEC: dict[str, str] = { + "libsvtav1": "crf", + "h264": "crf", + "hevc": "crf", + "h264_nvenc": "qp", + "hevc_nvenc": "qp", + "h264_vaapi": "qp", + "h264_qsv": "global_quality", +} + + +@functools.cache +def get_codec(vcodec: str) -> av.codec.Codec | None: + """PyAV write-mode ``Codec`` for *vcodec*, or ``None`` if unavailable.""" + try: + return av.codec.Codec(vcodec, "w") + except Exception: + return None + + +@functools.cache +def _get_codec_options_by_name(vcodec: str) -> dict[str, av.option.Option]: + """Private-option name → PyAV ``Option`` for *vcodec* (empty if unavailable).""" + codec = get_codec(vcodec) + if codec is None: + return {} + return {opt.name: opt for opt in codec.descriptor.options} + + +@functools.cache +def _get_codec_video_formats(vcodec: str) -> tuple[str, ...]: + """Pixel formats accepted by *vcodec* in PyAV's preferred order (empty if unknown).""" + codec = get_codec(vcodec) + if codec is None: + return () + return tuple(fmt.name for fmt in (codec.video_formats or [])) + + +@functools.cache +def _all_video_encoders() -> tuple[str, ...]: + """Every video encoder PyAV exposes in the local FFmpeg build, sorted by name.""" + result: list[str] = [] + for name in sorted(av.codecs_available): + codec = get_codec(name) + if codec is not None and codec.type == "video": + result.append(name) + return tuple(result) + + +def detect_available_encoders(encoders: list[str] | str | None = None) -> list[str]: + """Return the subset of *encoders* available as video encoders in the local FFmpeg build. + + ``None`` returns every video encoder PyAV exposes; a single ``str`` is probed as a list of one. + """ + if encoders is None: + return list(_all_video_encoders()) + if isinstance(encoders, str): + encoders = [encoders] + + video_encoders = set(_all_video_encoders()) + available = [] + for name in encoders: + if name in video_encoders: + available.append(name) + else: + logger.debug("encoder '%s' not available as video encoder", name) + return available + + +def _is_field_supported( + field_name: str, vcodec: str, options: dict[str, av.option.Option] +) -> bool: + """Whether tuning option *field_name* is meaningful for *vcodec*.""" + # GOP is a stream-level option (AVStream.gop_size) not stored in private options. + # Every video codec accepts it. + if field_name == "g": + return True + if field_name == "crf": + # Semantic "crf" maps to the codec's private option (see + # CRF_OPTION_BY_CODEC), or to stream-level q:v for VideoToolbox. + opt_name = CRF_OPTION_BY_CODEC.get(vcodec) + return (opt_name is not None and opt_name in options) or vcodec in { + "h264_videotoolbox", + "hevc_videotoolbox", + } + if field_name == "fast_decode": + # libsvtav1: svtav1-params:fast-decode=N — h264/hevc: tune=fastdecode. + return "svtav1-params" in options or "tune" in options + # preset and any future private-option-backed field: direct membership test. + return field_name in options + + +def _check_numeric_range( + label: str, num: float, opt: av.option.Option, vcodec: str +) -> None: + """Raise if *num* lies outside *opt*'s numeric range (no-op if range is degenerate).""" + lo, hi = float(opt.min), float(opt.max) + if lo < hi and not (lo <= num <= hi): + raise ValueError( + f"{label}={num} is out of range for codec {vcodec!r}; must be in [{lo}, {hi}]" + ) + + +def _validate_option_value( + vcodec: str, field_name: str, value: Any, opt: av.option.Option +) -> None: + """Range-check numeric *value* and choice-check string *value* against *opt*. + + Type mismatches fall through to FFmpeg's own validation at encode time. + """ + type_name = opt.type.name + if type_name in FFMPEG_NUMERIC_OPTION_TYPES: + if isinstance(value, bool) or not isinstance(value, (int, float)): + return + _check_numeric_range(field_name, float(value), opt, vcodec) + elif type_name == "STRING": + if not isinstance(value, str): + return + choices = [c.name for c in (opt.choices or [])] + if choices and value not in choices: + raise ValueError( + f"{field_name}={value!r} is not a supported choice for codec " + f"{vcodec!r}; valid choices: {choices}" + ) + else: + return + + +def _validate_extra_option( + vcodec: str, key: str, value: Any, opt: av.option.Option +) -> None: + """Validate an ``extra_options`` entry: enforce numeric range/type only. + + Non-numeric options are passed through (FFmpeg accepts many ad-hoc strings). + """ + if opt.type.name not in FFMPEG_NUMERIC_OPTION_TYPES: + return + + label = f"extra_options[{key!r}]" + not_numeric = ValueError( + f"{label}={value!r} is not numeric; codec {vcodec!r} expects a number for this option." + ) + if isinstance(value, bool): + raise not_numeric + if isinstance(value, (int, float)): + num = float(value) + elif isinstance(value, str): + try: + num = float(value) + except ValueError as e: + raise not_numeric from e + else: + raise not_numeric + + _check_numeric_range(label, num, opt, vcodec) + + +def _check_pixel_format(vcodec: str, pix_fmt: str, formats: tuple[str, ...]) -> None: + if formats and pix_fmt not in formats: + raise ValueError( + f"pix_fmt={pix_fmt!r} is not supported by codec {vcodec!r}; " + f"supported pixel formats: {list(formats)}" + ) + + +def _check_tuning_fields( + config: VideoEncoderConfig, vcodec: str, options: dict[str, av.option.Option] +) -> None: + tuning_options: tuple[str, ...] = config._TUNING_OPTIONS + supported_fields = [f for f in tuning_options if _is_field_supported(f, vcodec, options)] + for field_name in tuning_options: + value = getattr(config, field_name) + if not value: + continue + if field_name not in supported_fields: + raise ValueError( + f"{field_name}={value!r} is not supported by codec {vcodec!r}; " + f"supported fields for this codec: {supported_fields}" + ) + # Value shape is only cross-checkable when the field maps directly + # to a private option: ``preset`` is literally ``"preset"``; + # ``crf`` maps per-codec. ``g`` (stream-level) and ``fast_decode`` + # (composite) fall through to FFmpeg at encode time. + if field_name == "preset": + opt = options.get("preset") + elif field_name == "crf": + opt = options.get(CRF_OPTION_BY_CODEC.get(vcodec, "")) + else: + continue + if opt is not None: + _validate_option_value(vcodec, field_name, value, opt) + + +def _check_extra_options( + config: VideoEncoderConfig, vcodec: str, options: dict[str, av.option.Option] +) -> None: + # Torchcodec-style: only validate keys the codec exposes as AVOptions, + # and only enforce numeric range / numeric-type. Everything else is + # passed through (muxer options, ``x264-params``-style strings, etc.). + for key, value in config.extra_options.items(): + opt = options.get(key) + if opt is None: + continue + _validate_extra_option(vcodec, key, value, opt) + + +def check_config_against_bundled_ffmpeg(config: VideoEncoderConfig) -> None: + """Verify *config* is compatible with the bundled FFmpeg build. + + Checks pixel format, tuning-field availability, value range/choices for + fields that map to a private option, and numeric ``extra_options``. + No-op when ``config.vcodec`` isn't in the local FFmpeg build. + + Raises: + ValueError: on the first incompatibility encountered. + """ + vcodec = config.vcodec + options = _get_codec_options_by_name(vcodec) + if not options: + logger.warning( + "Codec %r is not available in the bundled FFmpeg build; ", + vcodec, + ) + return + _check_pixel_format(vcodec, config.pix_fmt, _get_codec_video_formats(vcodec)) + _check_tuning_fields(config, vcodec, options) + _check_extra_options(config, vcodec, options)