mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-31 19:01:28 +00:00
feat(pyav utils): adding suport for PyAV encoding parameters validation
This commit is contained in:
265
src/lerobot/datasets/pyav_utils.py
Normal file
265
src/lerobot/datasets/pyav_utils.py
Normal file
@@ -0,0 +1,265 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""PyAV-based compatibility checks for :class:`VideoEncoderConfig`.
|
||||
|
||||
Centralises all :mod:`av` introspection of the bundled FFmpeg build.
|
||||
Checks degrade to a no-op when the target codec isn't available locally.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import av
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from lerobot.datasets.video_utils import VideoEncoderConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
FFMPEG_NUMERIC_OPTION_TYPES = ("INT", "INT64", "UINT64", "FLOAT", "DOUBLE")
|
||||
|
||||
# Codec-specific FFmpeg private option whose value is controlled by the
|
||||
# abstract ``crf`` tuning field.
|
||||
CRF_OPTION_BY_CODEC: dict[str, str] = {
|
||||
"libsvtav1": "crf",
|
||||
"h264": "crf",
|
||||
"hevc": "crf",
|
||||
"h264_nvenc": "qp",
|
||||
"hevc_nvenc": "qp",
|
||||
"h264_vaapi": "qp",
|
||||
"h264_qsv": "global_quality",
|
||||
}
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_codec(vcodec: str) -> av.codec.Codec | None:
|
||||
"""PyAV write-mode ``Codec`` for *vcodec*, or ``None`` if unavailable."""
|
||||
try:
|
||||
return av.codec.Codec(vcodec, "w")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _get_codec_options_by_name(vcodec: str) -> dict[str, av.option.Option]:
|
||||
"""Private-option name → PyAV ``Option`` for *vcodec* (empty if unavailable)."""
|
||||
codec = get_codec(vcodec)
|
||||
if codec is None:
|
||||
return {}
|
||||
return {opt.name: opt for opt in codec.descriptor.options}
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _get_codec_video_formats(vcodec: str) -> tuple[str, ...]:
|
||||
"""Pixel formats accepted by *vcodec* in PyAV's preferred order (empty if unknown)."""
|
||||
codec = get_codec(vcodec)
|
||||
if codec is None:
|
||||
return ()
|
||||
return tuple(fmt.name for fmt in (codec.video_formats or []))
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _all_video_encoders() -> tuple[str, ...]:
|
||||
"""Every video encoder PyAV exposes in the local FFmpeg build, sorted by name."""
|
||||
result: list[str] = []
|
||||
for name in sorted(av.codecs_available):
|
||||
codec = get_codec(name)
|
||||
if codec is not None and codec.type == "video":
|
||||
result.append(name)
|
||||
return tuple(result)
|
||||
|
||||
|
||||
def detect_available_encoders(encoders: list[str] | str | None = None) -> list[str]:
|
||||
"""Return the subset of *encoders* available as video encoders in the local FFmpeg build.
|
||||
|
||||
``None`` returns every video encoder PyAV exposes; a single ``str`` is probed as a list of one.
|
||||
"""
|
||||
if encoders is None:
|
||||
return list(_all_video_encoders())
|
||||
if isinstance(encoders, str):
|
||||
encoders = [encoders]
|
||||
|
||||
video_encoders = set(_all_video_encoders())
|
||||
available = []
|
||||
for name in encoders:
|
||||
if name in video_encoders:
|
||||
available.append(name)
|
||||
else:
|
||||
logger.debug("encoder '%s' not available as video encoder", name)
|
||||
return available
|
||||
|
||||
|
||||
def _is_field_supported(
|
||||
field_name: str, vcodec: str, options: dict[str, av.option.Option]
|
||||
) -> bool:
|
||||
"""Whether tuning option *field_name* is meaningful for *vcodec*."""
|
||||
# GOP is a stream-level option (AVStream.gop_size) not stored in private options.
|
||||
# Every video codec accepts it.
|
||||
if field_name == "g":
|
||||
return True
|
||||
if field_name == "crf":
|
||||
# Semantic "crf" maps to the codec's private option (see
|
||||
# CRF_OPTION_BY_CODEC), or to stream-level q:v for VideoToolbox.
|
||||
opt_name = CRF_OPTION_BY_CODEC.get(vcodec)
|
||||
return (opt_name is not None and opt_name in options) or vcodec in {
|
||||
"h264_videotoolbox",
|
||||
"hevc_videotoolbox",
|
||||
}
|
||||
if field_name == "fast_decode":
|
||||
# libsvtav1: svtav1-params:fast-decode=N — h264/hevc: tune=fastdecode.
|
||||
return "svtav1-params" in options or "tune" in options
|
||||
# preset and any future private-option-backed field: direct membership test.
|
||||
return field_name in options
|
||||
|
||||
|
||||
def _check_numeric_range(
|
||||
label: str, num: float, opt: av.option.Option, vcodec: str
|
||||
) -> None:
|
||||
"""Raise if *num* lies outside *opt*'s numeric range (no-op if range is degenerate)."""
|
||||
lo, hi = float(opt.min), float(opt.max)
|
||||
if lo < hi and not (lo <= num <= hi):
|
||||
raise ValueError(
|
||||
f"{label}={num} is out of range for codec {vcodec!r}; must be in [{lo}, {hi}]"
|
||||
)
|
||||
|
||||
|
||||
def _validate_option_value(
|
||||
vcodec: str, field_name: str, value: Any, opt: av.option.Option
|
||||
) -> None:
|
||||
"""Range-check numeric *value* and choice-check string *value* against *opt*.
|
||||
|
||||
Type mismatches fall through to FFmpeg's own validation at encode time.
|
||||
"""
|
||||
type_name = opt.type.name
|
||||
if type_name in FFMPEG_NUMERIC_OPTION_TYPES:
|
||||
if isinstance(value, bool) or not isinstance(value, (int, float)):
|
||||
return
|
||||
_check_numeric_range(field_name, float(value), opt, vcodec)
|
||||
elif type_name == "STRING":
|
||||
if not isinstance(value, str):
|
||||
return
|
||||
choices = [c.name for c in (opt.choices or [])]
|
||||
if choices and value not in choices:
|
||||
raise ValueError(
|
||||
f"{field_name}={value!r} is not a supported choice for codec "
|
||||
f"{vcodec!r}; valid choices: {choices}"
|
||||
)
|
||||
else:
|
||||
return
|
||||
|
||||
|
||||
def _validate_extra_option(
|
||||
vcodec: str, key: str, value: Any, opt: av.option.Option
|
||||
) -> None:
|
||||
"""Validate an ``extra_options`` entry: enforce numeric range/type only.
|
||||
|
||||
Non-numeric options are passed through (FFmpeg accepts many ad-hoc strings).
|
||||
"""
|
||||
if opt.type.name not in FFMPEG_NUMERIC_OPTION_TYPES:
|
||||
return
|
||||
|
||||
label = f"extra_options[{key!r}]"
|
||||
not_numeric = ValueError(
|
||||
f"{label}={value!r} is not numeric; codec {vcodec!r} expects a number for this option."
|
||||
)
|
||||
if isinstance(value, bool):
|
||||
raise not_numeric
|
||||
if isinstance(value, (int, float)):
|
||||
num = float(value)
|
||||
elif isinstance(value, str):
|
||||
try:
|
||||
num = float(value)
|
||||
except ValueError as e:
|
||||
raise not_numeric from e
|
||||
else:
|
||||
raise not_numeric
|
||||
|
||||
_check_numeric_range(label, num, opt, vcodec)
|
||||
|
||||
|
||||
def _check_pixel_format(vcodec: str, pix_fmt: str, formats: tuple[str, ...]) -> None:
|
||||
if formats and pix_fmt not in formats:
|
||||
raise ValueError(
|
||||
f"pix_fmt={pix_fmt!r} is not supported by codec {vcodec!r}; "
|
||||
f"supported pixel formats: {list(formats)}"
|
||||
)
|
||||
|
||||
|
||||
def _check_tuning_fields(
|
||||
config: VideoEncoderConfig, vcodec: str, options: dict[str, av.option.Option]
|
||||
) -> None:
|
||||
tuning_options: tuple[str, ...] = config._TUNING_OPTIONS
|
||||
supported_fields = [f for f in tuning_options if _is_field_supported(f, vcodec, options)]
|
||||
for field_name in tuning_options:
|
||||
value = getattr(config, field_name)
|
||||
if not value:
|
||||
continue
|
||||
if field_name not in supported_fields:
|
||||
raise ValueError(
|
||||
f"{field_name}={value!r} is not supported by codec {vcodec!r}; "
|
||||
f"supported fields for this codec: {supported_fields}"
|
||||
)
|
||||
# Value shape is only cross-checkable when the field maps directly
|
||||
# to a private option: ``preset`` is literally ``"preset"``;
|
||||
# ``crf`` maps per-codec. ``g`` (stream-level) and ``fast_decode``
|
||||
# (composite) fall through to FFmpeg at encode time.
|
||||
if field_name == "preset":
|
||||
opt = options.get("preset")
|
||||
elif field_name == "crf":
|
||||
opt = options.get(CRF_OPTION_BY_CODEC.get(vcodec, ""))
|
||||
else:
|
||||
continue
|
||||
if opt is not None:
|
||||
_validate_option_value(vcodec, field_name, value, opt)
|
||||
|
||||
|
||||
def _check_extra_options(
|
||||
config: VideoEncoderConfig, vcodec: str, options: dict[str, av.option.Option]
|
||||
) -> None:
|
||||
# Torchcodec-style: only validate keys the codec exposes as AVOptions,
|
||||
# and only enforce numeric range / numeric-type. Everything else is
|
||||
# passed through (muxer options, ``x264-params``-style strings, etc.).
|
||||
for key, value in config.extra_options.items():
|
||||
opt = options.get(key)
|
||||
if opt is None:
|
||||
continue
|
||||
_validate_extra_option(vcodec, key, value, opt)
|
||||
|
||||
|
||||
def check_config_against_bundled_ffmpeg(config: VideoEncoderConfig) -> None:
|
||||
"""Verify *config* is compatible with the bundled FFmpeg build.
|
||||
|
||||
Checks pixel format, tuning-field availability, value range/choices for
|
||||
fields that map to a private option, and numeric ``extra_options``.
|
||||
No-op when ``config.vcodec`` isn't in the local FFmpeg build.
|
||||
|
||||
Raises:
|
||||
ValueError: on the first incompatibility encountered.
|
||||
"""
|
||||
vcodec = config.vcodec
|
||||
options = _get_codec_options_by_name(vcodec)
|
||||
if not options:
|
||||
logger.warning(
|
||||
"Codec %r is not available in the bundled FFmpeg build; ",
|
||||
vcodec,
|
||||
)
|
||||
return
|
||||
_check_pixel_format(vcodec, config.pix_fmt, _get_codec_video_formats(vcodec))
|
||||
_check_tuning_fields(config, vcodec, options)
|
||||
_check_extra_options(config, vcodec, options)
|
||||
Reference in New Issue
Block a user