diff --git a/examples/evaluate/evaluate_libero.py b/examples/evaluate/evaluate_libero.py new file mode 100644 index 000000000..949096f77 --- /dev/null +++ b/examples/evaluate/evaluate_libero.py @@ -0,0 +1,112 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script demonstrates how to evaluate pretrained vision-language-action (VLA) policies +such as SmolVLA on Libero benchmark tasks using the LeRobot framework. + +It showcases the full evaluation pipeline — from environment creation to policy inference, +visualization, and result logging — and is intended as a reference for benchmarking or +integrating new robotic policies. + +Features included in this script: +- loading Libero environments (e.g., libero_spatial, libero_object) via `make_env`. +- initializing pretrained policies (e.g., SmolVLA) from Hugging Face using `make_policy`. +- applying preprocessing and postprocessing transformations for model compatibility. +- running evaluation rollouts and recording rendered frames from the simulator. +- computing success metrics and saving rollout videos as MP4 for qualitative analysis. + +The script ends by saving a rollout video (`rollout.mp4`) and printing per-environment +success indicators for quick visual and numerical evaluation. +""" + +import numpy as np +import torch +import imageio.v2 as imageio +from lerobot.envs.factory import make_env, make_env_config +from lerobot.policies.factory import make_policy, make_pre_post_processors +from lerobot.policies.factory import make_policy_config +from lerobot.envs.utils import ( + add_envs_task, + preprocess_observation, +) +import os +os.environ["MUJOCO_GL"] = "egl" + +SMOLVLA_LIBERO_PATH = "HuggingFaceVLA/smolvla_libero" +LIBERO_CONFIG = make_env_config("libero", task="libero_spatial") +breakpoint() +POLICY_CONFIG = make_policy_config("smolvla", pretrained_path=SMOLVLA_LIBERO_PATH) +policy = make_policy( + cfg=POLICY_CONFIG, + env_cfg=LIBERO_CONFIG, +) +breakpoint() +libero_env = make_env(LIBERO_CONFIG) +breakpoint() +print(type(libero_env)) # +print(libero_env.keys()) # dict_keys(['libero_spatial', 'libero_object']) + +# initilize your policy, here we use smolvla +breakpoint() +policy.eval() +preprocessor, postprocessor = make_pre_post_processors( + policy_cfg=POLICY_CONFIG, + pretrained_path=SMOLVLA_LIBERO_PATH, + # The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility. + preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}}, + ) +policy.reset() +# for the sake of this exemple we only use one env from each task +libero_spatial_env = libero_env['libero_spatial'][0] +# libero_object_env = libero_env['libero_object'][0] + +# let's first run an evaluation throgut the first task +observation, info = libero_spatial_env.reset() # you can pass seeds +max_steps = 220 +step = 0 +all_images = [] +done = np.array([False] * libero_spatial_env.num_envs) +while not np.all(done) and step < max_steps: + observation = preprocess_observation(observation) + observation = add_envs_task(libero_spatial_env, observation) + observation = preprocessor(observation) + with torch.inference_mode(): + action = policy.select_action(observation) + action = postprocessor(action) + # Convert to CPU / numpy. + action_numpy = action.to("cpu").numpy() + # Apply the next action. + # let's render the video + image = libero_spatial_env.call("render")[0] + all_images.append(image) + observation, reward, terminated, truncated, info = libero_spatial_env.step(action_numpy) + if "final_info" in info: + final_info = info["final_info"] + if not isinstance(final_info, dict): + raise RuntimeError( + "Unsupported `final_info` format: expected dict (Gymnasium >= 1.0). " + "You're likely using an older version of gymnasium (< 1.0). Please upgrade." + ) + successes = final_info["is_success"].tolist() + else: + successes = [False] * libero_spatial_env.num_envs + + done = terminated | truncated | done + if step + 1 == max_steps: + done = np.ones_like(done, dtype=bool) + step += 1 + +print("The success: ", successes) + diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py index 9c67e317a..e4246a285 100644 --- a/src/lerobot/policies/factory.py +++ b/src/lerobot/policies/factory.py @@ -365,7 +365,7 @@ def make_policy( if not cfg.input_features: cfg.input_features = {key: ft for key, ft in features.items() if key not in cfg.output_features} kwargs["config"] = cfg - + breakpoint() if cfg.pretrained_path: # Load a pretrained policy and override the config if needed (for example, if there are inference-time # hyperparameters that we want to vary). diff --git a/src/lerobot/scripts/lerobot_eval.py b/src/lerobot/scripts/lerobot_eval.py index 0dec18be6..07cf6136a 100644 --- a/src/lerobot/scripts/lerobot_eval.py +++ b/src/lerobot/scripts/lerobot_eval.py @@ -502,7 +502,6 @@ def eval_main(cfg: EvalPipelineConfig): cfg=cfg.policy, env_cfg=cfg.env, ) - policy.eval() preprocessor, postprocessor = make_pre_post_processors( policy_cfg=cfg.policy,