mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-31 19:01:28 +00:00
Added logging for interventions to monitor the rate of interventions through time
Added an s keyboard command to force success in the case the reward classifier fails Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
This commit is contained in:
@@ -223,6 +223,7 @@ def act_with_policy(cfg: DictConfig, robot: Robot, reward_classifier: nn.Module)
|
||||
sum_reward_episode = 0
|
||||
list_transition_to_send_to_learner = []
|
||||
list_policy_time = []
|
||||
episode_intervention = False
|
||||
|
||||
for interaction_step in range(cfg.training.online_steps):
|
||||
if interaction_step >= cfg.training.online_step_before_learning:
|
||||
@@ -252,6 +253,7 @@ def act_with_policy(cfg: DictConfig, robot: Robot, reward_classifier: nn.Module)
|
||||
if info["is_intervention"]:
|
||||
# TODO: Check the shape
|
||||
action = info["action_intervention"]
|
||||
episode_intervention = True
|
||||
|
||||
# Check for NaN values in observations
|
||||
for key, tensor in obs.items():
|
||||
@@ -295,11 +297,13 @@ def act_with_policy(cfg: DictConfig, robot: Robot, reward_classifier: nn.Module)
|
||||
interaction_message={
|
||||
"Episodic reward": sum_reward_episode,
|
||||
"Interaction step": interaction_step,
|
||||
"Episode intervention": int(episode_intervention),
|
||||
**stats,
|
||||
}
|
||||
)
|
||||
)
|
||||
sum_reward_episode = 0.0
|
||||
episode_intervention = False
|
||||
obs, info = online_env.reset()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user