Add end effector action space to hil-serl (#861)

Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2026-06-01 03:11:29 +00:00 · 2025-03-17 14:22:33 +01:00
parent 66816fd871
commit 7b01e16439
13 changed files with 2138 additions and 139 deletions
--- a/lerobot/scripts/server/actor_server.py
+++ b/lerobot/scripts/server/actor_server.py
@@ -54,6 +54,7 @@ from lerobot.scripts.server.network_utils import (
 )
 from lerobot.scripts.server.gym_manipulator import get_classifier, make_robot_env
 from lerobot.scripts.server import learner_service
+from lerobot.common.robot_devices.utils import busy_wait

 from torch.multiprocessing import Queue, Event
 from queue import Empty
@@ -312,17 +313,6 @@ def act_with_policy(

    logging.info("make_policy")

-    # HACK: This is an ugly hack to pass the normalization parameters to the policy
-    # Because the action space is dynamic so we override the output normalization parameters
-    # it's ugly, we know ... and we will fix it
-    min_action_space: list = online_env.action_space.spaces[0].low.tolist()
-    max_action_space: list = online_env.action_space.spaces[0].high.tolist()
-    output_normalization_params: dict[dict[str, list]] = {
-        "action": {"min": min_action_space, "max": max_action_space}
-    }
-    cfg.policy.output_normalization_params = output_normalization_params
-    cfg.policy.output_shapes["action"] = online_env.action_space.spaces[0].shape
-
    ### Instantiate the policy in both the actor and learner processes
    ### To avoid sending a SACPolicy object through the port, we create a policy intance
    ### on both sides, the learner sends the updated parameters every n steps to update the actor's parameters
@@ -347,6 +337,7 @@ def act_with_policy(
    episode_intervention = False

    for interaction_step in range(cfg.training.online_steps):
+        start_time = time.perf_counter()
        if shutdown_event.is_set():
            logging.info("[ACTOR] Shutting down act_with_policy")
            return
@@ -408,7 +399,6 @@ def act_with_policy(
                complementary_info=info,  # TODO Handle information for the transition, is_demonstraction: bool
            )
        )
-
        # assign obs to the next obs and continue the rollout
        obs = next_obs

@@ -449,6 +439,10 @@ def act_with_policy(
            episode_intervention = False
            obs, info = online_env.reset()

+        if cfg.fps is not None:
+            dt_time = time.perf_counter() - start_time
+            busy_wait(1 / cfg.fps - dt_time)
+

 def push_transitions_to_transport_queue(transitions: list, transitions_queue):
    """Send transitions to learner in smaller chunks to avoid network issues.