mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-31 19:01:28 +00:00
Compare commits
1 Commits
feat/add_d
...
docs/add-e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ccc1a104db |
112
examples/evaluate/evaluate_libero.py
Normal file
112
examples/evaluate/evaluate_libero.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
This script demonstrates how to evaluate pretrained vision-language-action (VLA) policies
|
||||||
|
such as SmolVLA on Libero benchmark tasks using the LeRobot framework.
|
||||||
|
|
||||||
|
It showcases the full evaluation pipeline — from environment creation to policy inference,
|
||||||
|
visualization, and result logging — and is intended as a reference for benchmarking or
|
||||||
|
integrating new robotic policies.
|
||||||
|
|
||||||
|
Features included in this script:
|
||||||
|
- loading Libero environments (e.g., libero_spatial, libero_object) via `make_env`.
|
||||||
|
- initializing pretrained policies (e.g., SmolVLA) from Hugging Face using `make_policy`.
|
||||||
|
- applying preprocessing and postprocessing transformations for model compatibility.
|
||||||
|
- running evaluation rollouts and recording rendered frames from the simulator.
|
||||||
|
- computing success metrics and saving rollout videos as MP4 for qualitative analysis.
|
||||||
|
|
||||||
|
The script ends by saving a rollout video (`rollout.mp4`) and printing per-environment
|
||||||
|
success indicators for quick visual and numerical evaluation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import imageio.v2 as imageio
|
||||||
|
from lerobot.envs.factory import make_env, make_env_config
|
||||||
|
from lerobot.policies.factory import make_policy, make_pre_post_processors
|
||||||
|
from lerobot.policies.factory import make_policy_config
|
||||||
|
from lerobot.envs.utils import (
|
||||||
|
add_envs_task,
|
||||||
|
preprocess_observation,
|
||||||
|
)
|
||||||
|
import os
|
||||||
|
os.environ["MUJOCO_GL"] = "egl"
|
||||||
|
|
||||||
|
SMOLVLA_LIBERO_PATH = "HuggingFaceVLA/smolvla_libero"
|
||||||
|
LIBERO_CONFIG = make_env_config("libero", task="libero_spatial")
|
||||||
|
breakpoint()
|
||||||
|
POLICY_CONFIG = make_policy_config("smolvla", pretrained_path=SMOLVLA_LIBERO_PATH)
|
||||||
|
policy = make_policy(
|
||||||
|
cfg=POLICY_CONFIG,
|
||||||
|
env_cfg=LIBERO_CONFIG,
|
||||||
|
)
|
||||||
|
breakpoint()
|
||||||
|
libero_env = make_env(LIBERO_CONFIG)
|
||||||
|
breakpoint()
|
||||||
|
print(type(libero_env)) # <class 'dict'>
|
||||||
|
print(libero_env.keys()) # dict_keys(['libero_spatial', 'libero_object'])
|
||||||
|
|
||||||
|
# initilize your policy, here we use smolvla
|
||||||
|
breakpoint()
|
||||||
|
policy.eval()
|
||||||
|
preprocessor, postprocessor = make_pre_post_processors(
|
||||||
|
policy_cfg=POLICY_CONFIG,
|
||||||
|
pretrained_path=SMOLVLA_LIBERO_PATH,
|
||||||
|
# The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility.
|
||||||
|
preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}},
|
||||||
|
)
|
||||||
|
policy.reset()
|
||||||
|
# for the sake of this exemple we only use one env from each task
|
||||||
|
libero_spatial_env = libero_env['libero_spatial'][0]
|
||||||
|
# libero_object_env = libero_env['libero_object'][0]
|
||||||
|
|
||||||
|
# let's first run an evaluation throgut the first task
|
||||||
|
observation, info = libero_spatial_env.reset() # you can pass seeds
|
||||||
|
max_steps = 220
|
||||||
|
step = 0
|
||||||
|
all_images = []
|
||||||
|
done = np.array([False] * libero_spatial_env.num_envs)
|
||||||
|
while not np.all(done) and step < max_steps:
|
||||||
|
observation = preprocess_observation(observation)
|
||||||
|
observation = add_envs_task(libero_spatial_env, observation)
|
||||||
|
observation = preprocessor(observation)
|
||||||
|
with torch.inference_mode():
|
||||||
|
action = policy.select_action(observation)
|
||||||
|
action = postprocessor(action)
|
||||||
|
# Convert to CPU / numpy.
|
||||||
|
action_numpy = action.to("cpu").numpy()
|
||||||
|
# Apply the next action.
|
||||||
|
# let's render the video
|
||||||
|
image = libero_spatial_env.call("render")[0]
|
||||||
|
all_images.append(image)
|
||||||
|
observation, reward, terminated, truncated, info = libero_spatial_env.step(action_numpy)
|
||||||
|
if "final_info" in info:
|
||||||
|
final_info = info["final_info"]
|
||||||
|
if not isinstance(final_info, dict):
|
||||||
|
raise RuntimeError(
|
||||||
|
"Unsupported `final_info` format: expected dict (Gymnasium >= 1.0). "
|
||||||
|
"You're likely using an older version of gymnasium (< 1.0). Please upgrade."
|
||||||
|
)
|
||||||
|
successes = final_info["is_success"].tolist()
|
||||||
|
else:
|
||||||
|
successes = [False] * libero_spatial_env.num_envs
|
||||||
|
|
||||||
|
done = terminated | truncated | done
|
||||||
|
if step + 1 == max_steps:
|
||||||
|
done = np.ones_like(done, dtype=bool)
|
||||||
|
step += 1
|
||||||
|
|
||||||
|
print("The success: ", successes)
|
||||||
|
|
||||||
@@ -365,7 +365,7 @@ def make_policy(
|
|||||||
if not cfg.input_features:
|
if not cfg.input_features:
|
||||||
cfg.input_features = {key: ft for key, ft in features.items() if key not in cfg.output_features}
|
cfg.input_features = {key: ft for key, ft in features.items() if key not in cfg.output_features}
|
||||||
kwargs["config"] = cfg
|
kwargs["config"] = cfg
|
||||||
|
breakpoint()
|
||||||
if cfg.pretrained_path:
|
if cfg.pretrained_path:
|
||||||
# Load a pretrained policy and override the config if needed (for example, if there are inference-time
|
# Load a pretrained policy and override the config if needed (for example, if there are inference-time
|
||||||
# hyperparameters that we want to vary).
|
# hyperparameters that we want to vary).
|
||||||
|
|||||||
@@ -502,7 +502,6 @@ def eval_main(cfg: EvalPipelineConfig):
|
|||||||
cfg=cfg.policy,
|
cfg=cfg.policy,
|
||||||
env_cfg=cfg.env,
|
env_cfg=cfg.env,
|
||||||
)
|
)
|
||||||
|
|
||||||
policy.eval()
|
policy.eval()
|
||||||
preprocessor, postprocessor = make_pre_post_processors(
|
preprocessor, postprocessor = make_pre_post_processors(
|
||||||
policy_cfg=cfg.policy,
|
policy_cfg=cfg.policy,
|
||||||
|
|||||||
Reference in New Issue
Block a user