mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-03 20:31:25 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
@@ -45,17 +45,21 @@ Note that in both examples, the repo/folder should contain at least `config.json
|
||||
|
||||
You can learn about the CLI options for this script in the `EvalPipelineConfig` in lerobot/configs/eval.py
|
||||
"""
|
||||
import concurrent
|
||||
|
||||
import concurrent.futures as cf
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from collections.abc import Callable
|
||||
from contextlib import nullcontext
|
||||
from copy import deepcopy
|
||||
from dataclasses import asdict
|
||||
from pathlib import Path
|
||||
from pprint import pformat
|
||||
from typing import Dict, List, Tuple, TypedDict
|
||||
from collections.abc import Iterator
|
||||
|
||||
import einops
|
||||
import gymnasium as gym
|
||||
@@ -68,7 +72,11 @@ from tqdm import trange
|
||||
from lerobot.configs import parser
|
||||
from lerobot.configs.eval import EvalPipelineConfig
|
||||
from lerobot.envs.factory import make_env
|
||||
from lerobot.envs.utils import add_envs_task, check_env_attributes_and_types, preprocess_observation, preprocess_observation1
|
||||
from lerobot.envs.utils import (
|
||||
add_envs_task,
|
||||
check_env_attributes_and_types,
|
||||
preprocess_observation,
|
||||
)
|
||||
from lerobot.policies.factory import make_policy
|
||||
from lerobot.policies.pretrained import PreTrainedPolicy
|
||||
from lerobot.policies.utils import get_device_from_parameters
|
||||
@@ -79,9 +87,6 @@ from lerobot.utils.utils import (
|
||||
init_logging,
|
||||
inside_slurm,
|
||||
)
|
||||
from typing import TypedDict, Dict, List, Tuple, Iterator
|
||||
from collections import defaultdict
|
||||
import concurrent.futures as cf
|
||||
|
||||
|
||||
def rollout(
|
||||
@@ -485,8 +490,12 @@ def _compile_episode_data(
|
||||
data_dict["index"] = torch.arange(start_data_index, start_data_index + total_frames, 1)
|
||||
|
||||
return data_dict
|
||||
from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy
|
||||
|
||||
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
|
||||
from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy
|
||||
|
||||
|
||||
def _inject_normalization_stats(policy: SmolVLAPolicy, dataset_meta: LeRobotDatasetMetadata):
|
||||
"""Recreate normalization layers with proper stats from the dataset."""
|
||||
from lerobot.policies.normalize import Normalize, Unnormalize
|
||||
@@ -518,7 +527,8 @@ def _inject_normalization_stats(policy: SmolVLAPolicy, dataset_meta: LeRobotData
|
||||
|
||||
def load_smolvla(cfg, dataset_repo: str, policy):
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
dataset = LeRobotDataset(dataset_repo, root='/raid/jade/.cache/huggingface/datasets/')
|
||||
|
||||
dataset = LeRobotDataset(dataset_repo, root="/raid/jade/.cache/huggingface/datasets/")
|
||||
_inject_normalization_stats(policy=policy, dataset_meta=dataset.meta) # only needed if stats are missing
|
||||
return policy.to("cuda"), dataset
|
||||
|
||||
@@ -529,8 +539,8 @@ def eval_main(cfg: EvalPipelineConfig):
|
||||
|
||||
# Check device is available
|
||||
device = get_safe_torch_device(cfg.policy.device, log=True)
|
||||
#login to hf
|
||||
from huggingface_hub import login
|
||||
# login to hf
|
||||
|
||||
# login()
|
||||
torch.backends.cudnn.benchmark = True
|
||||
torch.backends.cuda.matmul.allow_tf32 = True
|
||||
@@ -549,7 +559,7 @@ def eval_main(cfg: EvalPipelineConfig):
|
||||
breakpoint()
|
||||
# policy, _ = load_smolvla(cfg.policy, "physical-intelligence/libero", policy)
|
||||
# rename "image" -> "observation.image"
|
||||
|
||||
|
||||
policy.eval()
|
||||
with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext():
|
||||
info = eval_policy_all(
|
||||
@@ -584,10 +594,11 @@ def eval_main(cfg: EvalPipelineConfig):
|
||||
|
||||
# ---- typed payload returned by one task eval ----
|
||||
class TaskMetrics(TypedDict):
|
||||
sum_rewards: List[float]
|
||||
max_rewards: List[float]
|
||||
successes: List[bool]
|
||||
video_paths: List[str]
|
||||
sum_rewards: list[float]
|
||||
max_rewards: list[float]
|
||||
successes: list[bool]
|
||||
video_paths: list[str]
|
||||
|
||||
|
||||
ACC_KEYS = ("sum_rewards", "max_rewards", "successes", "video_paths")
|
||||
|
||||
@@ -610,7 +621,7 @@ def eval_policy_all(
|
||||
"""
|
||||
global_start = time.time()
|
||||
|
||||
# inner: evaluate a single (suite, task)
|
||||
# inner: evaluate a single (suite, task)
|
||||
def eval_one(
|
||||
task_group: str,
|
||||
task_id: int,
|
||||
@@ -650,27 +661,36 @@ def eval_policy_all(
|
||||
video_paths=task_result.get("video_paths", []),
|
||||
)
|
||||
|
||||
# result producer: sequential or threaded, same consumer
|
||||
def iter_task_results() -> Iterator[Tuple[str, int, TaskMetrics]]:
|
||||
# result producer: sequential or threaded, same consumer
|
||||
def iter_task_results() -> Iterator[tuple[str, int, TaskMetrics]]:
|
||||
if max_parallel_tasks == 1:
|
||||
for task_group, tasks in envs.items():
|
||||
for task_id, vec in tasks.items():
|
||||
yield task_group, task_id, eval_one(
|
||||
task_group, task_id, vec,
|
||||
policy=policy,
|
||||
n_episodes=n_episodes,
|
||||
max_episodes_rendered=max_episodes_rendered,
|
||||
videos_dir=videos_dir,
|
||||
return_episode_data=return_episode_data,
|
||||
start_seed=start_seed,
|
||||
yield (
|
||||
task_group,
|
||||
task_id,
|
||||
eval_one(
|
||||
task_group,
|
||||
task_id,
|
||||
vec,
|
||||
policy=policy,
|
||||
n_episodes=n_episodes,
|
||||
max_episodes_rendered=max_episodes_rendered,
|
||||
videos_dir=videos_dir,
|
||||
return_episode_data=return_episode_data,
|
||||
start_seed=start_seed,
|
||||
),
|
||||
)
|
||||
else:
|
||||
with cf.ThreadPoolExecutor(max_workers=max_parallel_tasks) as executor:
|
||||
fut2key: Dict[cf.Future, Tuple[str, int]] = {}
|
||||
fut2key: dict[cf.Future, tuple[str, int]] = {}
|
||||
for task_group, tasks in envs.items():
|
||||
for task_id, vec in tasks.items():
|
||||
fut = executor.submit(
|
||||
eval_one, task_group, task_id, vec,
|
||||
eval_one,
|
||||
task_group,
|
||||
task_id,
|
||||
vec,
|
||||
policy=policy,
|
||||
n_episodes=n_episodes,
|
||||
max_episodes_rendered=max_episodes_rendered,
|
||||
@@ -683,9 +703,9 @@ def eval_policy_all(
|
||||
task_group, task_id = fut2key[fut]
|
||||
yield task_group, task_id, fut.result()
|
||||
|
||||
# single accumulator path on the main thread
|
||||
group_acc: Dict[str, Dict[str, List]] = defaultdict(lambda: {k: [] for k in ACC_KEYS})
|
||||
overall: Dict[str, List] = {k: [] for k in ACC_KEYS}
|
||||
# single accumulator path on the main thread
|
||||
group_acc: dict[str, dict[str, list]] = defaultdict(lambda: {k: [] for k in ACC_KEYS})
|
||||
overall: dict[str, list] = {k: [] for k in ACC_KEYS}
|
||||
|
||||
for task_group, task_id, metrics in iter_task_results():
|
||||
acc = group_acc[task_group]
|
||||
@@ -694,7 +714,7 @@ def eval_policy_all(
|
||||
overall[k].extend(metrics[k])
|
||||
|
||||
# build outputs
|
||||
results: Dict[str, dict] = {}
|
||||
results: dict[str, dict] = {}
|
||||
for task_group, data in group_acc.items():
|
||||
suite_rewards = data["sum_rewards"]
|
||||
suite_max = data["max_rewards"]
|
||||
@@ -720,9 +740,15 @@ def eval_policy_all(
|
||||
global_eval_ep_s = global_eval_s / max(1, len(overall["sum_rewards"]))
|
||||
results["overall"] = {
|
||||
"aggregated": {
|
||||
"avg_sum_reward": float(np.nanmean(overall["sum_rewards"])) if overall["sum_rewards"] else float("nan"),
|
||||
"avg_max_reward": float(np.nanmean(overall["max_rewards"])) if overall["max_rewards"] else float("nan"),
|
||||
"pc_success": float(np.nanmean(overall["successes"]) * 100) if overall["successes"] else float("nan"),
|
||||
"avg_sum_reward": float(np.nanmean(overall["sum_rewards"]))
|
||||
if overall["sum_rewards"]
|
||||
else float("nan"),
|
||||
"avg_max_reward": float(np.nanmean(overall["max_rewards"]))
|
||||
if overall["max_rewards"]
|
||||
else float("nan"),
|
||||
"pc_success": float(np.nanmean(overall["successes"]) * 100)
|
||||
if overall["successes"]
|
||||
else float("nan"),
|
||||
"eval_s": global_eval_s,
|
||||
"eval_ep_s": global_eval_ep_s,
|
||||
},
|
||||
@@ -732,7 +758,6 @@ def eval_policy_all(
|
||||
return results
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
init_logging()
|
||||
eval_main()
|
||||
|
||||
@@ -105,6 +105,7 @@ def update_policy(
|
||||
train_metrics.update_s = time.perf_counter() - start_time
|
||||
return train_metrics, output_dict
|
||||
|
||||
|
||||
# def _inject_normalization_stats(policy: SmolVLAPolicy, dataset_meta: LeRobotDatasetMetadata):
|
||||
# """Recreate normalization layers with dataset stats if missing (Adil's workaround)."""
|
||||
# from lerobot.policies.normalize import Normalize, Unnormalize
|
||||
@@ -132,6 +133,7 @@ def update_policy(
|
||||
|
||||
# print("✅ Normalization layers injected with dataset stats.")
|
||||
|
||||
|
||||
@parser.wrap()
|
||||
def train(cfg: TrainPipelineConfig):
|
||||
cfg.validate()
|
||||
@@ -271,9 +273,12 @@ def train(cfg: TrainPipelineConfig):
|
||||
if cfg.env and is_eval_step:
|
||||
step_id = get_step_identifier(step, cfg.steps)
|
||||
logging.info(f"Eval policy at step {step}")
|
||||
with torch.no_grad(), (torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext()):
|
||||
with (
|
||||
torch.no_grad(),
|
||||
torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext(),
|
||||
):
|
||||
eval_info = eval_policy_all(
|
||||
eval_env, # dict[suite][task_id] -> vec_env
|
||||
eval_env, # dict[suite][task_id] -> vec_env
|
||||
policy,
|
||||
cfg.eval.n_episodes,
|
||||
videos_dir=videos_dir,
|
||||
@@ -295,15 +300,15 @@ def train(cfg: TrainPipelineConfig):
|
||||
# meters/tracker
|
||||
eval_metrics = {
|
||||
"avg_sum_reward": AverageMeter("∑rwrd", ":.3f"),
|
||||
"pc_success": AverageMeter("success", ":.1f"),
|
||||
"eval_s": AverageMeter("eval_s", ":.3f"),
|
||||
"pc_success": AverageMeter("success", ":.1f"),
|
||||
"eval_s": AverageMeter("eval_s", ":.3f"),
|
||||
}
|
||||
eval_tracker = MetricsTracker(
|
||||
cfg.batch_size, dataset.num_frames, dataset.num_episodes, eval_metrics, initial_step=step
|
||||
)
|
||||
eval_tracker.eval_s = aggregated.get("eval_s", 0.0)
|
||||
eval_tracker.eval_s = aggregated.get("eval_s", 0.0)
|
||||
eval_tracker.avg_sum_reward = aggregated.get("avg_sum_reward", float("nan"))
|
||||
eval_tracker.pc_success = aggregated.get("pc_success", float("nan"))
|
||||
eval_tracker.pc_success = aggregated.get("pc_success", float("nan"))
|
||||
if wandb_logger:
|
||||
wandb_log_dict = {**eval_tracker.to_dict(), **eval_info}
|
||||
wandb_logger.log_dict(wandb_log_dict, step, mode="eval")
|
||||
|
||||
@@ -104,6 +104,7 @@ def update_policy(
|
||||
train_metrics.update_s = time.perf_counter() - start_time
|
||||
return train_metrics, output_dict
|
||||
|
||||
|
||||
def _inject_normalization_stats(policy: SmolVLAPolicy, dataset_meta: LeRobotDatasetMetadata):
|
||||
"""Recreate normalization layers with dataset stats if missing (Adil's workaround)."""
|
||||
from lerobot.policies.normalize import Normalize, Unnormalize
|
||||
@@ -115,15 +116,15 @@ def _inject_normalization_stats(policy: SmolVLAPolicy, dataset_meta: LeRobotData
|
||||
stats = {}
|
||||
for key, stat_dict in dataset_meta.stats.items():
|
||||
stats[key] = {
|
||||
stat_type: torch.as_tensor(stat_array)
|
||||
if isinstance(stat_array, np.ndarray)
|
||||
else stat_array
|
||||
stat_type: torch.as_tensor(stat_array) if isinstance(stat_array, np.ndarray) else stat_array
|
||||
for stat_type, stat_array in stat_dict.items()
|
||||
}
|
||||
|
||||
normalize_inputs = Normalize(policy.config.input_features, policy.config.normalization_mapping, stats)
|
||||
normalize_targets = Normalize(policy.config.output_features, policy.config.normalization_mapping, stats)
|
||||
unnormalize_outputs = Unnormalize(policy.config.output_features, policy.config.normalization_mapping, stats)
|
||||
unnormalize_outputs = Unnormalize(
|
||||
policy.config.output_features, policy.config.normalization_mapping, stats
|
||||
)
|
||||
|
||||
policy.normalize_inputs = normalize_inputs
|
||||
policy.normalize_targets = normalize_targets
|
||||
@@ -131,6 +132,7 @@ def _inject_normalization_stats(policy: SmolVLAPolicy, dataset_meta: LeRobotData
|
||||
|
||||
print("✅ Normalization layers injected with dataset stats.")
|
||||
|
||||
|
||||
@parser.wrap()
|
||||
def train(cfg: TrainPipelineConfig):
|
||||
cfg.validate()
|
||||
|
||||
@@ -24,12 +24,15 @@ from accelerate.utils import set_seed as accelerate_set_seed
|
||||
from termcolor import colored
|
||||
from torch.optim import Optimizer
|
||||
|
||||
from lerobot.configs import parser
|
||||
from lerobot.configs.train import TrainPipelineConfig
|
||||
from lerobot.datasets.factory import make_dataset
|
||||
from lerobot.datasets.sampler import EpisodeAwareSampler
|
||||
from lerobot.envs.factory import make_env
|
||||
from lerobot.optim.factory import make_optimizer_and_scheduler
|
||||
from lerobot.policies.factory import make_policy
|
||||
from lerobot.policies.pretrained import PreTrainedPolicy
|
||||
from lerobot.scripts.eval import eval_policy
|
||||
from lerobot.utils.logging_utils import AverageMeter, MetricsTracker
|
||||
from lerobot.utils.train_utils import (
|
||||
get_step_checkpoint_dir,
|
||||
@@ -43,9 +46,6 @@ from lerobot.utils.utils import (
|
||||
has_method,
|
||||
init_logging,
|
||||
)
|
||||
from lerobot.configs import parser
|
||||
from lerobot.configs.train import TrainPipelineConfig
|
||||
from lerobot.scripts.eval import eval_policy
|
||||
|
||||
|
||||
def update_policy(
|
||||
@@ -100,6 +100,7 @@ def train(cfg: TrainPipelineConfig):
|
||||
|
||||
# Initialize accelerator
|
||||
from accelerate.utils import DistributedDataParallelKwargs
|
||||
|
||||
# added by jade 2 lines
|
||||
ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=False)
|
||||
accelerator = Accelerator(..., kwargs_handlers=[ddp_kwargs])
|
||||
@@ -357,7 +358,7 @@ def train(cfg: TrainPipelineConfig):
|
||||
|
||||
if accelerator.is_main_process:
|
||||
logging.info("End of training")
|
||||
accelerator.end_training() # added by jade
|
||||
accelerator.end_training() # added by jade
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user