Hardcoded some normalization parameters. TODO refactor

Added masking actions on the level of the intervention actions and offline dataset Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
2026-05-31 19:01:28 +00:00 · 2025-02-13 14:27:14 +01:00
parent 98c6557869
commit 5195f40fd3
6 changed files with 36 additions and 8 deletions
--- a/lerobot/scripts/server/actor_server.py
+++ b/lerobot/scripts/server/actor_server.py
@@ -201,6 +201,7 @@ def act_with_policy(cfg: DictConfig, robot: Robot, reward_classifier: nn.Module)
        "action": {"min": min_action_space, "max": max_action_space}
    }
    cfg.policy.output_normalization_params = output_normalization_params
+    cfg.policy.output_shapes["action"] = online_env.action_space.spaces[0].shape

    ### Instantiate the policy in both the actor and learner processes
    ### To avoid sending a SACPolicy object through the port, we create a policy intance
@@ -252,6 +253,8 @@ def act_with_policy(cfg: DictConfig, robot: Robot, reward_classifier: nn.Module)
        # NOTE: We overide the action if the intervention is True, because the action applied is the intervention action
        if info["is_intervention"]:
            # TODO: Check the shape
+            # NOTE: The action space for demonstration before hand is with the full action space
+            # but sometimes for example we want to deactivate the gripper
            action = info["action_intervention"]
            episode_intervention = True