From b06f134fe45de08b454d69a37f96349d6aece1ca Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 14 Apr 2026 17:39:21 +0200 Subject: [PATCH] fix(robotwin): re-enable autograd for CuRobo planner warmup and take_action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lerobot_eval wraps the full rollout in torch.no_grad() (lerobot_eval.py:566), but RoboTwin's setup_demo → load_robot → CuroboPlanner(...) runs motion_gen.warmup(), which invokes Newton's-method trajectory optimization. That optimizer calls cost.backward() internally, which raises RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn when autograd is disabled. take_action() hits the same planner path at every step. Wrap both setup_demo and take_action in torch.enable_grad() so CuRobo's optimizer can build its computation graph. Policy inference is unaffected — rollout()'s inner torch.inference_mode() block around select_action() is untouched, so we still don't allocate grad buffers during policy forward. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/lerobot/envs/robotwin.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/lerobot/envs/robotwin.py b/src/lerobot/envs/robotwin.py index 05889fd7d..dc8bc657c 100644 --- a/src/lerobot/envs/robotwin.py +++ b/src/lerobot/envs/robotwin.py @@ -23,6 +23,7 @@ from typing import Any import gymnasium as gym import numpy as np +import torch from gymnasium import spaces from lerobot.types import RobotObservation @@ -320,7 +321,11 @@ class RoboTwinEnv(gym.Env): actual_seed = self.episode_index if seed is None else seed setup_kwargs = _load_robotwin_setup_kwargs(self.task_name) setup_kwargs.update(seed=actual_seed, is_test=True) - self._env.setup_demo(**setup_kwargs) + # setup_demo → load_robot → CuroboPlanner.warmup() runs Newton's-method + # trajectory optimization, which requires autograd. lerobot_eval wraps + # the whole rollout in torch.no_grad(), so re-enable grad here. + with torch.enable_grad(): + self._env.setup_demo(**setup_kwargs) self.episode_index += self._reset_stride self._step_count = 0 @@ -333,10 +338,13 @@ class RoboTwinEnv(gym.Env): raise ValueError(f"Expected 1-D action of shape ({ACTION_DIM},), got {action.shape}") # RoboTwin 2.0 uses take_action(); fall back to step() for older forks. - if hasattr(self._env, "take_action"): - self._env.take_action(action) - else: - self._env.step(action) + # take_action() invokes the CuRobo planner, which needs autograd — + # lerobot_eval wraps the rollout in torch.no_grad(). + with torch.enable_grad(): + if hasattr(self._env, "take_action"): + self._env.take_action(action) + else: + self._env.step(action) self._step_count += 1