From d2a046dfc5b6f79df34577aa45f32403d897c0a3 Mon Sep 17 00:00:00 2001 From: Khalil Meftah Date: Sun, 26 Apr 2026 18:11:26 +0200 Subject: [PATCH] fix(rl): mirror gym_manipulator in actor --- src/lerobot/rl/actor.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/lerobot/rl/actor.py b/src/lerobot/rl/actor.py index 6167456dc..f75dfac11 100644 --- a/src/lerobot/rl/actor.py +++ b/src/lerobot/rl/actor.py @@ -89,9 +89,9 @@ from lerobot.utils.utils import ( ) from .gym_manipulator import ( - create_transition, make_processors, make_robot_env, + reset_and_build_transition, step_env_and_process_transition, ) from .process import ProcessSignalHandler @@ -266,13 +266,7 @@ def act_with_policy( dataset_stats=cfg.policy.dataset_stats, ) - obs, info = online_env.reset() - env_processor.reset() - action_processor.reset() - - # Process initial observation - transition = create_transition(observation=obs, info=info) - transition = env_processor(transition) + transition = reset_and_build_transition(online_env, env_processor, action_processor) # NOTE: For the moment we will solely handle the case of a single environment sum_reward_episode = 0 @@ -395,14 +389,7 @@ def act_with_policy( episode_intervention_steps = 0 episode_total_steps = 0 - # Reset environment and processors - obs, info = online_env.reset() - env_processor.reset() - action_processor.reset() - - # Process initial observation - transition = create_transition(observation=obs, info=info) - transition = env_processor(transition) + transition = reset_and_build_transition(online_env, env_processor, action_processor) if cfg.env.fps is not None: dt_time = time.perf_counter() - start_time