mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-31 10:51:35 +00:00
feat(VideoEncoderConfig init): making VideoEncoderConfig more robust and adaptable to multiple backends
This commit is contained in:
@@ -41,8 +41,8 @@ from .lerobot_dataset import LeRobotDataset
|
||||
from .multi_dataset import MultiLeRobotDataset
|
||||
from .pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
|
||||
from .pyav_utils import (
|
||||
check_config_against_bundled_ffmpeg,
|
||||
detect_available_encoders,
|
||||
check_video_encoder_config_pyav,
|
||||
detect_available_encoders_pyav,
|
||||
get_codec,
|
||||
)
|
||||
from .sampler import EpisodeAwareSampler
|
||||
@@ -74,12 +74,12 @@ __all__ = [
|
||||
"aggregate_datasets",
|
||||
"aggregate_pipeline_dataset_features",
|
||||
"aggregate_stats",
|
||||
"check_config_against_bundled_ffmpeg",
|
||||
"check_video_encoder_config_pyav",
|
||||
"convert_image_to_video_dataset",
|
||||
"create_initial_features",
|
||||
"create_lerobot_dataset_card",
|
||||
"delete_episodes",
|
||||
"detect_available_encoders",
|
||||
"detect_available_encoders_pyav",
|
||||
"get_codec",
|
||||
"get_feature_stats",
|
||||
"load_episodes",
|
||||
|
||||
@@ -74,31 +74,18 @@ def _get_codec_video_formats(vcodec: str) -> tuple[str, ...]:
|
||||
return tuple(fmt.name for fmt in (codec.video_formats or []))
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _all_video_encoders() -> tuple[str, ...]:
|
||||
"""Every video encoder PyAV exposes in the local FFmpeg build, sorted by name."""
|
||||
result: list[str] = []
|
||||
for name in sorted(av.codecs_available):
|
||||
codec = get_codec(name)
|
||||
if codec is not None and codec.type == "video":
|
||||
result.append(name)
|
||||
return tuple(result)
|
||||
|
||||
|
||||
def detect_available_encoders(encoders: list[str] | str | None = None) -> list[str]:
|
||||
def detect_available_encoders_pyav(encoders: list[str] | str) -> list[str]:
|
||||
"""Return the subset of *encoders* available as video encoders in the local FFmpeg build.
|
||||
|
||||
``None`` returns every video encoder PyAV exposes; a single ``str`` is probed as a list of one.
|
||||
Each name is probed directly via :func:`get_codec`; input order is preserved.
|
||||
"""
|
||||
if encoders is None:
|
||||
return list(_all_video_encoders())
|
||||
if isinstance(encoders, str):
|
||||
encoders = [encoders]
|
||||
|
||||
video_encoders = set(_all_video_encoders())
|
||||
available = []
|
||||
available: list[str] = []
|
||||
for name in encoders:
|
||||
if name in video_encoders:
|
||||
codec = get_codec(name)
|
||||
if codec is not None and codec.type == "video":
|
||||
available.append(name)
|
||||
else:
|
||||
logger.debug("encoder '%s' not available as video encoder", name)
|
||||
@@ -242,7 +229,7 @@ def _check_extra_options(
|
||||
_validate_extra_option(vcodec, key, value, opt)
|
||||
|
||||
|
||||
def check_config_against_bundled_ffmpeg(config: VideoEncoderConfig) -> None:
|
||||
def check_video_encoder_config_pyav(config: VideoEncoderConfig) -> None:
|
||||
"""Verify *config* is compatible with the bundled FFmpeg build.
|
||||
|
||||
Checks pixel format, tuning-field availability, value range/choices for
|
||||
|
||||
@@ -38,8 +38,8 @@ from datasets.features.features import register_feature
|
||||
from PIL import Image
|
||||
|
||||
from lerobot.datasets.pyav_utils import (
|
||||
check_config_against_bundled_ffmpeg,
|
||||
detect_available_encoders,
|
||||
check_video_encoder_config_pyav,
|
||||
detect_available_encoders_pyav,
|
||||
)
|
||||
from lerobot.utils.import_utils import get_safe_default_video_backend
|
||||
|
||||
@@ -93,13 +93,56 @@ class VideoEncoderConfig:
|
||||
extra_options: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.vcodec = resolve_vcodec(self.vcodec)
|
||||
self.resolve_vcodec()
|
||||
|
||||
# Empty-constructor ergonomics: ``VideoEncoderConfig()`` must "just work".
|
||||
if self.preset is None and self.vcodec == "libsvtav1":
|
||||
self.preset = LIBSVTAV1_DEFAULT_PRESET
|
||||
|
||||
check_config_against_bundled_ffmpeg(self)
|
||||
self.validate()
|
||||
|
||||
|
||||
def detect_available_encoders(self, encoders: list[str] | str) -> list[str]:
|
||||
"""Detect available encoders based on the video backend."""
|
||||
if self.video_backend == "pyav":
|
||||
return detect_available_encoders_pyav(encoders)
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def validate(self) -> None:
|
||||
"""Validate the video encoder config."""
|
||||
if self.video_backend == "pyav":
|
||||
check_video_encoder_config_pyav(self)
|
||||
|
||||
|
||||
def resolve_vcodec(self) -> None:
|
||||
"""Validate vcodec and resolve 'auto' to best available HW encoder, fallback to libsvtav1.
|
||||
|
||||
Any explicitly-requested codec that isn't in the local FFmpeg build is
|
||||
also silently rewritten to ``libsvtav1`` so encoding never hard-fails on
|
||||
a host missing the requested encoder.
|
||||
"""
|
||||
if self.vcodec not in VALID_VIDEO_CODECS:
|
||||
raise ValueError(
|
||||
f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}"
|
||||
)
|
||||
if self.vcodec == "auto":
|
||||
available = self.detect_available_encoders(HW_ENCODERS)
|
||||
for encoder in HW_ENCODERS:
|
||||
if encoder in available:
|
||||
logger.info(f"Auto-selected video codec: {encoder}")
|
||||
self.vcodec = encoder
|
||||
return
|
||||
logger.info("No hardware encoder available, falling back to software encoder 'libsvtav1'")
|
||||
self.vcodec = "libsvtav1"
|
||||
|
||||
if self.detect_available_encoders(self.vcodec):
|
||||
logger.info(f"Using video codec: {self.vcodec}")
|
||||
self.vcodec = self.vcodec
|
||||
return
|
||||
raise ValueError(f"Unsupported video codec: {self.vcodec} with video backend {self.video_backend}")
|
||||
|
||||
|
||||
def get_codec_options(self, encoder_threads: int | None = None) -> dict[str, str]:
|
||||
"""Build codec-specific FFmpeg options from the tuning fields.
|
||||
@@ -186,24 +229,6 @@ def _get_codec_options(
|
||||
return opts
|
||||
|
||||
|
||||
def resolve_vcodec(vcodec: str) -> str:
|
||||
"""Validate vcodec and resolve 'auto' to best available HW encoder, fallback to libsvtav1."""
|
||||
if vcodec not in VALID_VIDEO_CODECS:
|
||||
raise ValueError(f"Invalid vcodec '{vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}")
|
||||
if vcodec != "auto" and detect_available_encoders(vcodec) != []:
|
||||
logger.info(f"Using video codec: {vcodec}")
|
||||
return vcodec
|
||||
elif vcodec == "auto":
|
||||
available = detect_available_encoders()
|
||||
for encoder in HW_ENCODERS:
|
||||
if encoder in available:
|
||||
logger.info(f"Auto-selected video codec: {encoder}")
|
||||
return encoder
|
||||
else:
|
||||
logger.info("No hardware encoder available, falling back to software encoder 'libsvtav1'")
|
||||
return "libsvtav1"
|
||||
|
||||
|
||||
def decode_video_frames(
|
||||
video_path: Path | str,
|
||||
timestamps: list[float],
|
||||
|
||||
Reference in New Issue
Block a user