From 438c1be1cab3cfbed29b7f6c0e814261afb96c7e Mon Sep 17 00:00:00 2001
From: Pepijn Kooijmans <pepijn@huggingface.co>
Date: Tue, 7 Apr 2026 13:12:42 +0200
Subject: [PATCH] fix(eval): use task_description instead of task for language
 conditioning

env.call("task") returns the LIBERO task name with underscores
(e.g. "pick_up_the_black_bowl_...") instead of the natural language
description ("pick up the black bowl ..."). The VLM tokenizes these
completely differently, causing 0.0 reward across all episodes.

Made-with: Cursor
---
 src/lerobot/scripts/lerobot_eval.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/lerobot/scripts/lerobot_eval.py b/src/lerobot/scripts/lerobot_eval.py
index 1603942a0..9739a77fd 100644
--- a/src/lerobot/scripts/lerobot_eval.py
+++ b/src/lerobot/scripts/lerobot_eval.py
@@ -165,9 +165,15 @@ def rollout(
         if return_observations:
             all_observations.append(deepcopy(observation))
 
-        # Infer "task" from sub-environments.
+        # Infer "task" from sub-environments (prefer natural language description).
         # env.call() works with both SyncVectorEnv and AsyncVectorEnv.
-        observation["task"] = list(env.call("task"))
+        try:
+            observation["task"] = list(env.call("task_description"))
+        except Exception:
+            try:
+                observation["task"] = list(env.call("task"))
+            except Exception:
+                observation["task"] = [""] * env.num_envs
 
         # Apply environment-specific preprocessing (e.g., LiberoProcessorStep for LIBERO)
         observation = env_preprocessor(observation)