mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-02 11:51:25 +00:00
[Port HIl-Serl] Refactor gym-manipulator (#1034)
This commit is contained in:
@@ -20,13 +20,11 @@ from functools import lru_cache
|
||||
from queue import Empty
|
||||
from statistics import mean, quantiles
|
||||
|
||||
# from lerobot.scripts.eval import eval_policy
|
||||
import grpc
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.multiprocessing import Event, Queue
|
||||
|
||||
# TODO: Remove the import of maniskill
|
||||
from lerobot.common.policies.factory import make_policy
|
||||
from lerobot.common.policies.sac.modeling_sac import SACPolicy
|
||||
from lerobot.common.robot_devices.utils import busy_wait
|
||||
@@ -39,20 +37,21 @@ from lerobot.common.utils.utils import (
|
||||
from lerobot.configs import parser
|
||||
from lerobot.configs.train import TrainPipelineConfig
|
||||
from lerobot.scripts.server import hilserl_pb2, hilserl_pb2_grpc, learner_service
|
||||
from lerobot.scripts.server.buffer import (
|
||||
Transition,
|
||||
bytes_to_state_dict,
|
||||
move_state_dict_to_device,
|
||||
move_transition_to_device,
|
||||
python_object_to_bytes,
|
||||
transitions_to_bytes,
|
||||
)
|
||||
from lerobot.scripts.server.buffer import Transition
|
||||
from lerobot.scripts.server.gym_manipulator import make_robot_env
|
||||
from lerobot.scripts.server.network_utils import (
|
||||
bytes_to_state_dict,
|
||||
python_object_to_bytes,
|
||||
receive_bytes_in_chunks,
|
||||
send_bytes_in_chunks,
|
||||
transitions_to_bytes,
|
||||
)
|
||||
from lerobot.scripts.server.utils import (
|
||||
get_last_item_from_queue,
|
||||
move_state_dict_to_device,
|
||||
move_transition_to_device,
|
||||
setup_process_handlers,
|
||||
)
|
||||
from lerobot.scripts.server.utils import get_last_item_from_queue, setup_process_handlers
|
||||
|
||||
ACTOR_SHUTDOWN_TIMEOUT = 30
|
||||
|
||||
@@ -134,21 +133,8 @@ def actor_cli(cfg: TrainPipelineConfig):
|
||||
interactions_process.start()
|
||||
receive_policy_process.start()
|
||||
|
||||
# HACK: FOR MANISKILL we do not have a reward classifier
|
||||
# TODO: Remove this once we merge into main
|
||||
reward_classifier = None
|
||||
# if (
|
||||
# cfg.env.reward_classifier["pretrained_path"] is not None
|
||||
# and cfg.env.reward_classifier["config_path"] is not None
|
||||
# ):
|
||||
# reward_classifier = get_classifier(
|
||||
# pretrained_path=cfg.env.reward_classifier["pretrained_path"],
|
||||
# config_path=cfg.env.reward_classifier["config_path"],
|
||||
# )
|
||||
|
||||
act_with_policy(
|
||||
cfg=cfg,
|
||||
reward_classifier=reward_classifier,
|
||||
shutdown_event=shutdown_event,
|
||||
parameters_queue=parameters_queue,
|
||||
transitions_queue=transitions_queue,
|
||||
@@ -183,7 +169,6 @@ def actor_cli(cfg: TrainPipelineConfig):
|
||||
|
||||
def act_with_policy(
|
||||
cfg: TrainPipelineConfig,
|
||||
reward_classifier: nn.Module,
|
||||
shutdown_event: any, # Event,
|
||||
parameters_queue: Queue,
|
||||
transitions_queue: Queue,
|
||||
@@ -197,7 +182,6 @@ def act_with_policy(
|
||||
|
||||
Args:
|
||||
cfg: Configuration settings for the interaction process.
|
||||
reward_classifier: Reward classifier to use for the interaction process.
|
||||
shutdown_event: Event to check if the process should shutdown.
|
||||
parameters_queue: Queue to receive updated network parameters from the learner.
|
||||
transitions_queue: Queue to send transitions to the learner.
|
||||
@@ -262,16 +246,10 @@ def act_with_policy(
|
||||
|
||||
log_policy_frequency_issue(policy_fps=policy_fps, cfg=cfg, interaction_step=interaction_step)
|
||||
|
||||
next_obs, reward, done, truncated, info = online_env.step(action.squeeze(dim=0).cpu().numpy())
|
||||
else:
|
||||
# TODO (azouitine): Make a custom space for torch tensor
|
||||
action = online_env.action_space.sample()
|
||||
next_obs, reward, done, truncated, info = online_env.step(action)
|
||||
|
||||
# HACK: We have only one env but we want to batch it, it will be resolved with the torch box
|
||||
action = (
|
||||
torch.from_numpy(action[0]).to(device, non_blocking=device.type == "cuda").unsqueeze(dim=0)
|
||||
)
|
||||
next_obs, reward, done, truncated, info = online_env.step(action)
|
||||
|
||||
sum_reward_episode += float(reward)
|
||||
# Increment total steps counter for intervention rate
|
||||
@@ -286,11 +264,6 @@ def act_with_policy(
|
||||
# Increment intervention steps counter
|
||||
episode_intervention_steps += 1
|
||||
|
||||
# Check for NaN values in observations
|
||||
for key, tensor in obs.items():
|
||||
if torch.isnan(tensor).any():
|
||||
logging.error(f"[ACTOR] NaN values found in obs[{key}] at step {interaction_step}")
|
||||
|
||||
list_transition_to_send_to_learner.append(
|
||||
Transition(
|
||||
state=obs,
|
||||
|
||||
Reference in New Issue
Block a user