mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-01 03:11:29 +00:00
Add end effector action space to hil-serl (#861)
Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -54,6 +54,7 @@ from lerobot.scripts.server.network_utils import (
|
||||
)
|
||||
from lerobot.scripts.server.gym_manipulator import get_classifier, make_robot_env
|
||||
from lerobot.scripts.server import learner_service
|
||||
from lerobot.common.robot_devices.utils import busy_wait
|
||||
|
||||
from torch.multiprocessing import Queue, Event
|
||||
from queue import Empty
|
||||
@@ -312,17 +313,6 @@ def act_with_policy(
|
||||
|
||||
logging.info("make_policy")
|
||||
|
||||
# HACK: This is an ugly hack to pass the normalization parameters to the policy
|
||||
# Because the action space is dynamic so we override the output normalization parameters
|
||||
# it's ugly, we know ... and we will fix it
|
||||
min_action_space: list = online_env.action_space.spaces[0].low.tolist()
|
||||
max_action_space: list = online_env.action_space.spaces[0].high.tolist()
|
||||
output_normalization_params: dict[dict[str, list]] = {
|
||||
"action": {"min": min_action_space, "max": max_action_space}
|
||||
}
|
||||
cfg.policy.output_normalization_params = output_normalization_params
|
||||
cfg.policy.output_shapes["action"] = online_env.action_space.spaces[0].shape
|
||||
|
||||
### Instantiate the policy in both the actor and learner processes
|
||||
### To avoid sending a SACPolicy object through the port, we create a policy intance
|
||||
### on both sides, the learner sends the updated parameters every n steps to update the actor's parameters
|
||||
@@ -347,6 +337,7 @@ def act_with_policy(
|
||||
episode_intervention = False
|
||||
|
||||
for interaction_step in range(cfg.training.online_steps):
|
||||
start_time = time.perf_counter()
|
||||
if shutdown_event.is_set():
|
||||
logging.info("[ACTOR] Shutting down act_with_policy")
|
||||
return
|
||||
@@ -408,7 +399,6 @@ def act_with_policy(
|
||||
complementary_info=info, # TODO Handle information for the transition, is_demonstraction: bool
|
||||
)
|
||||
)
|
||||
|
||||
# assign obs to the next obs and continue the rollout
|
||||
obs = next_obs
|
||||
|
||||
@@ -449,6 +439,10 @@ def act_with_policy(
|
||||
episode_intervention = False
|
||||
obs, info = online_env.reset()
|
||||
|
||||
if cfg.fps is not None:
|
||||
dt_time = time.perf_counter() - start_time
|
||||
busy_wait(1 / cfg.fps - dt_time)
|
||||
|
||||
|
||||
def push_transitions_to_transport_queue(transitions: list, transitions_queue):
|
||||
"""Send transitions to learner in smaller chunks to avoid network issues.
|
||||
|
||||
Reference in New Issue
Block a user