From 2dd366436ed30ed9729b4f18076a54fec7ec589b Mon Sep 17 00:00:00 2001 From: Khalil Date: Thu, 19 Feb 2026 14:35:02 +0100 Subject: [PATCH] Fix gym-hil integration with the new LeRobot pipeline. (#2482) * Add GymHILAdapterProcessorStep for gym-hil environment integration * Fix action features in control loop for None teleop device with gym-hil * Finalize dataset before pushing to hub for visualization on the hub * Fix neutral action for gripper * fix pre-commit --- src/lerobot/processor/__init__.py | 2 ++ src/lerobot/processor/gym_action_processor.py | 8 +++++ src/lerobot/processor/hil_processor.py | 31 +++++++++++++++++++ src/lerobot/rl/gym_manipulator.py | 15 +++++++-- 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/lerobot/processor/__init__.py b/src/lerobot/processor/__init__.py index 164f7da03..0b63e1606 100644 --- a/src/lerobot/processor/__init__.py +++ b/src/lerobot/processor/__init__.py @@ -44,6 +44,7 @@ from .hil_processor import ( AddTeleopActionAsComplimentaryDataStep, AddTeleopEventsAsInfoStep, GripperPenaltyProcessorStep, + GymHILAdapterProcessorStep, ImageCropResizeProcessorStep, InterventionActionProcessorStep, RewardClassifierProcessorStep, @@ -87,6 +88,7 @@ __all__ = [ "DoneProcessorStep", "EnvAction", "EnvTransition", + "GymHILAdapterProcessorStep", "GripperPenaltyProcessorStep", "hotswap_stats", "IdentityProcessorStep", diff --git a/src/lerobot/processor/gym_action_processor.py b/src/lerobot/processor/gym_action_processor.py index 8fa8cfd86..4f225af92 100644 --- a/src/lerobot/processor/gym_action_processor.py +++ b/src/lerobot/processor/gym_action_processor.py @@ -20,6 +20,7 @@ from lerobot.configs.types import PipelineFeatureType, PolicyFeature from .converters import to_tensor from .core import EnvAction, EnvTransition, PolicyAction +from .hil_processor import TELEOP_ACTION_KEY from .pipeline import ActionProcessorStep, ProcessorStep, ProcessorStepRegistry @@ -89,6 +90,13 @@ class Numpy2TorchActionProcessorStep(ProcessorStep): torch_action = to_tensor(action, dtype=None) # Preserve original dtype new_transition[TransitionKey.ACTION] = torch_action + complementary_data = new_transition.get(TransitionKey.COMPLEMENTARY_DATA, {}) + if TELEOP_ACTION_KEY in complementary_data: + teleop_action = complementary_data[TELEOP_ACTION_KEY] + if isinstance(teleop_action, EnvAction): + complementary_data[TELEOP_ACTION_KEY] = to_tensor(teleop_action) + new_transition[TransitionKey.COMPLEMENTARY_DATA] = complementary_data + return new_transition def transform_features( diff --git a/src/lerobot/processor/hil_processor.py b/src/lerobot/processor/hil_processor.py index 24b5628fa..34eaeed51 100644 --- a/src/lerobot/processor/hil_processor.py +++ b/src/lerobot/processor/hil_processor.py @@ -312,6 +312,37 @@ class TimeLimitProcessorStep(TruncatedProcessorStep): return features +@ProcessorStepRegistry.register("gym_hil_adapter_processor") +class GymHILAdapterProcessorStep(ProcessorStep): + """ + Adapts the output of the `gym-hil` environment to the format expected by `lerobot` processors. + + This step normalizes the `transition` object by: + 1. Copying `teleop_action` from `info` to `complementary_data`. + 2. Copying `is_intervention` from `info` (using the string key) to `info` (using the enum key). + """ + + def __call__(self, transition: EnvTransition) -> EnvTransition: + info = transition.get(TransitionKey.INFO, {}) + complementary_data = transition.get(TransitionKey.COMPLEMENTARY_DATA, {}) + + if TELEOP_ACTION_KEY in info: + complementary_data[TELEOP_ACTION_KEY] = info[TELEOP_ACTION_KEY] + + if "is_intervention" in info: + info[TeleopEvents.IS_INTERVENTION] = info["is_intervention"] + + transition[TransitionKey.INFO] = info + transition[TransitionKey.COMPLEMENTARY_DATA] = complementary_data + + return transition + + def transform_features( + self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] + ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: + return features + + @dataclass @ProcessorStepRegistry.register("gripper_penalty_processor") class GripperPenaltyProcessorStep(ProcessorStep): diff --git a/src/lerobot/rl/gym_manipulator.py b/src/lerobot/rl/gym_manipulator.py index 1c1cb752f..f5fcb7437 100644 --- a/src/lerobot/rl/gym_manipulator.py +++ b/src/lerobot/rl/gym_manipulator.py @@ -36,6 +36,7 @@ from lerobot.processor import ( DeviceProcessorStep, EnvTransition, GripperPenaltyProcessorStep, + GymHILAdapterProcessorStep, ImageCropResizeProcessorStep, InterventionActionProcessorStep, MapDeltaActionToRobotActionStep, @@ -379,6 +380,7 @@ def make_processors( ] env_pipeline_steps = [ + GymHILAdapterProcessorStep(), Numpy2TorchActionProcessorStep(), VanillaObservationProcessorStep(), AddBatchDimensionProcessorStep(), @@ -608,7 +610,14 @@ def control_loop( dataset = None if cfg.mode == "record": - action_features = teleop_device.action_features + if teleop_device: + action_features = teleop_device.action_features + else: + action_features = { + "dtype": "float32", + "shape": (4,), + "names": ["delta_x", "delta_y", "delta_z", "gripper"], + } features = { ACTION: action_features, REWARD: {"dtype": "float32", "shape": (1,), "names": None}, @@ -656,7 +665,7 @@ def control_loop( # Create a neutral action (no movement) neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32) if use_gripper: - neutral_action = torch.cat([neutral_action, torch.tensor([1.0])]) # Gripper stay + neutral_action = torch.cat([neutral_action, torch.tensor([0.0])]) # Gripper stay # Use the new step function transition = step_env_and_process_transition( @@ -725,6 +734,8 @@ def control_loop( precise_sleep(max(dt - (time.perf_counter() - step_start_time), 0.0)) if dataset is not None and cfg.dataset.push_to_hub: + logging.info("Finalizing dataset before pushing to hub") + dataset.finalize() logging.info("Pushing dataset to hub") dataset.push_to_hub()