mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-30 18:31:25 +00:00
Compare commits
1 Commits
user/khali
...
docs/add-e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ccc1a104db |
112
examples/evaluate/evaluate_libero.py
Normal file
112
examples/evaluate/evaluate_libero.py
Normal file
@@ -0,0 +1,112 @@
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
This script demonstrates how to evaluate pretrained vision-language-action (VLA) policies
|
||||
such as SmolVLA on Libero benchmark tasks using the LeRobot framework.
|
||||
|
||||
It showcases the full evaluation pipeline — from environment creation to policy inference,
|
||||
visualization, and result logging — and is intended as a reference for benchmarking or
|
||||
integrating new robotic policies.
|
||||
|
||||
Features included in this script:
|
||||
- loading Libero environments (e.g., libero_spatial, libero_object) via `make_env`.
|
||||
- initializing pretrained policies (e.g., SmolVLA) from Hugging Face using `make_policy`.
|
||||
- applying preprocessing and postprocessing transformations for model compatibility.
|
||||
- running evaluation rollouts and recording rendered frames from the simulator.
|
||||
- computing success metrics and saving rollout videos as MP4 for qualitative analysis.
|
||||
|
||||
The script ends by saving a rollout video (`rollout.mp4`) and printing per-environment
|
||||
success indicators for quick visual and numerical evaluation.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import imageio.v2 as imageio
|
||||
from lerobot.envs.factory import make_env, make_env_config
|
||||
from lerobot.policies.factory import make_policy, make_pre_post_processors
|
||||
from lerobot.policies.factory import make_policy_config
|
||||
from lerobot.envs.utils import (
|
||||
add_envs_task,
|
||||
preprocess_observation,
|
||||
)
|
||||
import os
|
||||
os.environ["MUJOCO_GL"] = "egl"
|
||||
|
||||
SMOLVLA_LIBERO_PATH = "HuggingFaceVLA/smolvla_libero"
|
||||
LIBERO_CONFIG = make_env_config("libero", task="libero_spatial")
|
||||
breakpoint()
|
||||
POLICY_CONFIG = make_policy_config("smolvla", pretrained_path=SMOLVLA_LIBERO_PATH)
|
||||
policy = make_policy(
|
||||
cfg=POLICY_CONFIG,
|
||||
env_cfg=LIBERO_CONFIG,
|
||||
)
|
||||
breakpoint()
|
||||
libero_env = make_env(LIBERO_CONFIG)
|
||||
breakpoint()
|
||||
print(type(libero_env)) # <class 'dict'>
|
||||
print(libero_env.keys()) # dict_keys(['libero_spatial', 'libero_object'])
|
||||
|
||||
# initilize your policy, here we use smolvla
|
||||
breakpoint()
|
||||
policy.eval()
|
||||
preprocessor, postprocessor = make_pre_post_processors(
|
||||
policy_cfg=POLICY_CONFIG,
|
||||
pretrained_path=SMOLVLA_LIBERO_PATH,
|
||||
# The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility.
|
||||
preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}},
|
||||
)
|
||||
policy.reset()
|
||||
# for the sake of this exemple we only use one env from each task
|
||||
libero_spatial_env = libero_env['libero_spatial'][0]
|
||||
# libero_object_env = libero_env['libero_object'][0]
|
||||
|
||||
# let's first run an evaluation throgut the first task
|
||||
observation, info = libero_spatial_env.reset() # you can pass seeds
|
||||
max_steps = 220
|
||||
step = 0
|
||||
all_images = []
|
||||
done = np.array([False] * libero_spatial_env.num_envs)
|
||||
while not np.all(done) and step < max_steps:
|
||||
observation = preprocess_observation(observation)
|
||||
observation = add_envs_task(libero_spatial_env, observation)
|
||||
observation = preprocessor(observation)
|
||||
with torch.inference_mode():
|
||||
action = policy.select_action(observation)
|
||||
action = postprocessor(action)
|
||||
# Convert to CPU / numpy.
|
||||
action_numpy = action.to("cpu").numpy()
|
||||
# Apply the next action.
|
||||
# let's render the video
|
||||
image = libero_spatial_env.call("render")[0]
|
||||
all_images.append(image)
|
||||
observation, reward, terminated, truncated, info = libero_spatial_env.step(action_numpy)
|
||||
if "final_info" in info:
|
||||
final_info = info["final_info"]
|
||||
if not isinstance(final_info, dict):
|
||||
raise RuntimeError(
|
||||
"Unsupported `final_info` format: expected dict (Gymnasium >= 1.0). "
|
||||
"You're likely using an older version of gymnasium (< 1.0). Please upgrade."
|
||||
)
|
||||
successes = final_info["is_success"].tolist()
|
||||
else:
|
||||
successes = [False] * libero_spatial_env.num_envs
|
||||
|
||||
done = terminated | truncated | done
|
||||
if step + 1 == max_steps:
|
||||
done = np.ones_like(done, dtype=bool)
|
||||
step += 1
|
||||
|
||||
print("The success: ", successes)
|
||||
|
||||
@@ -365,7 +365,7 @@ def make_policy(
|
||||
if not cfg.input_features:
|
||||
cfg.input_features = {key: ft for key, ft in features.items() if key not in cfg.output_features}
|
||||
kwargs["config"] = cfg
|
||||
|
||||
breakpoint()
|
||||
if cfg.pretrained_path:
|
||||
# Load a pretrained policy and override the config if needed (for example, if there are inference-time
|
||||
# hyperparameters that we want to vary).
|
||||
|
||||
@@ -502,7 +502,6 @@ def eval_main(cfg: EvalPipelineConfig):
|
||||
cfg=cfg.policy,
|
||||
env_cfg=cfg.env,
|
||||
)
|
||||
|
||||
policy.eval()
|
||||
preprocessor, postprocessor = make_pre_post_processors(
|
||||
policy_cfg=cfg.policy,
|
||||
|
||||
Reference in New Issue
Block a user