Add missing RL config options: add_ee_pose_to_observation and gripper_penalty_in_reward (#2873)

* fix(RL) add missing config arguments * respond to copilot review * fix(revert penalty in reward): reverting gripper penalty addition in reward. This is already done in compute_loss_discrete_critic. --------- Co-authored-by: CarolinePascal <caroline8.pascal@gmail.com>
2026-06-05 05:11:25 +00:00 · 2026-02-02 22:14:03 +01:00
parent 9c24a09665
commit 14a15f90e7
3 changed files with 23 additions and 12 deletions
--- a/src/lerobot/processor/hil_processor.py
+++ b/src/lerobot/processor/hil_processor.py
@@ -314,7 +314,7 @@ class TimeLimitProcessorStep(TruncatedProcessorStep):

@dataclass
@ProcessorStepRegistry.register("gripper_penalty_processor")
-class GripperPenaltyProcessorStep(ComplementaryDataProcessorStep):
+class GripperPenaltyProcessorStep(ProcessorStep):
    """
    Applies a penalty for inefficient gripper usage.

@@ -329,26 +329,27 @@ class GripperPenaltyProcessorStep(ComplementaryDataProcessorStep):
    penalty: float = -0.01
    max_gripper_pos: float = 30.0

-    def complementary_data(self, complementary_data: dict) -> dict:
+    def __call__(self, transition: EnvTransition) -> EnvTransition:
        """
        Calculates the gripper penalty and adds it to the complementary data.

        Args:
-            complementary_data: The incoming complementary data, which should contain
-                                raw joint positions.
+            transition: The incoming environment transition.

        Returns:
-            A new complementary data dictionary with the `discrete_penalty` key added.
+            The modified transition with the penalty added to complementary data.
        """
-        action = self.transition.get(TransitionKey.ACTION)
+        new_transition = transition.copy()
+        action = new_transition.get(TransitionKey.ACTION)
+        complementary_data = new_transition.get(TransitionKey.COMPLEMENTARY_DATA, {})

        raw_joint_positions = complementary_data.get("raw_joint_positions")
        if raw_joint_positions is None:
-            return complementary_data
+            return new_transition

        current_gripper_pos = raw_joint_positions.get(GRIPPER_KEY, None)
        if current_gripper_pos is None:
-            return complementary_data
+            return new_transition

        # Gripper action is a PolicyAction at this stage
        gripper_action = action[-1].item()
@@ -364,11 +365,12 @@ class GripperPenaltyProcessorStep(ComplementaryDataProcessorStep):

        gripper_penalty = self.penalty * int(gripper_penalty_bool)

-        # Create new complementary data with penalty info
+        # Update complementary data with penalty info
        new_complementary_data = dict(complementary_data)
        new_complementary_data[DISCRETE_PENALTY_KEY] = gripper_penalty
+        new_transition[TransitionKey.COMPLEMENTARY_DATA] = new_complementary_data

-        return new_complementary_data
+        return new_transition

    def get_config(self) -> dict[str, Any]:
        """