Files
lerobot-clone/src/lerobot/scripts/train_accelerate.py
Jade Choghari 5c628f1700 new things
2025-09-10 11:32:54 +02:00

366 lines
13 KiB
Python

#!/usr/bin/env python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import time
from pprint import pformat
from typing import Any
import torch
from accelerate import Accelerator
from accelerate.utils import set_seed as accelerate_set_seed
from termcolor import colored
from torch.optim import Optimizer
from lerobot.datasets.factory import make_dataset
from lerobot.datasets.sampler import EpisodeAwareSampler
from lerobot.envs.factory import make_env
from lerobot.optim.factory import make_optimizer_and_scheduler
from lerobot.policies.factory import make_policy
from lerobot.policies.pretrained import PreTrainedPolicy
from lerobot.utils.logging_utils import AverageMeter, MetricsTracker
from lerobot.utils.train_utils import (
get_step_checkpoint_dir,
get_step_identifier,
load_training_state,
save_checkpoint,
update_last_checkpoint,
)
from lerobot.utils.utils import (
format_big_number,
has_method,
init_logging,
)
from lerobot.configs import parser
from lerobot.configs.train import TrainPipelineConfig
from lerobot.scripts.eval import eval_policy
def update_policy(
train_metrics: MetricsTracker,
policy: PreTrainedPolicy,
batch: Any,
optimizer: Optimizer,
grad_clip_norm: float,
accelerator: Accelerator,
lr_scheduler=None,
) -> tuple[MetricsTracker, dict]:
start_time = time.perf_counter()
policy.train()
# Use accelerator's autocast context if mixed precision is enabled
with accelerator.autocast():
loss, output_dict = policy.forward(batch)
# TODO(rcadene): policy.unnormalize_outputs(out_dict)
# Use accelerator for backward pass
accelerator.backward(loss)
# Gradient clipping - accelerator handles unscaling automatically
if accelerator.sync_gradients and grad_clip_norm > 0:
grad_norm = accelerator.clip_grad_norm_(policy.parameters(), grad_clip_norm)
else:
grad_norm = torch.tensor(0.0)
optimizer.step()
lr_scheduler.step() if lr_scheduler is not None else None
optimizer.zero_grad()
# Update policy-specific buffers if needed
if has_method(policy, "update"):
policy.update()
# Gather metrics across all processes
loss_value = accelerator.gather(loss.detach()).mean().item()
grad_norm_value = accelerator.gather(grad_norm).mean().item()
train_metrics.loss = loss_value
train_metrics.grad_norm = grad_norm_value
train_metrics.lr = optimizer.param_groups[0]["lr"]
train_metrics.update_s = time.perf_counter() - start_time
return train_metrics, output_dict
@parser.wrap()
def train(cfg: TrainPipelineConfig):
cfg.validate()
logging.info(pformat(cfg.to_dict()))
# Initialize accelerator
from accelerate.utils import DistributedDataParallelKwargs
# added by jade 2 lines
ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=False)
accelerator = Accelerator(..., kwargs_handlers=[ddp_kwargs])
from lerobot.utils.wandb_utils import cfg_to_group, get_wandb_run_id_from_filesystem
ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
accelerator = Accelerator(
mixed_precision="bf16" if cfg.policy.use_amp else "no",
gradient_accumulation_steps=cfg.policy.gradient_accumulation_steps,
log_with="wandb" if cfg.wandb.enable else None,
kwargs_handlers=[ddp_kwargs],
project_dir=cfg.output_dir,
)
accelerator.init_trackers(
project_name=cfg.wandb.project,
init_kwargs={
"wandb": {
"entity": cfg.wandb.entity,
"name": cfg.job_name,
"notes": cfg.wandb.notes,
"tags": cfg_to_group(cfg, return_list=True),
"dir": cfg.output_dir,
"config": cfg.to_dict(),
"save_code": False,
"job_type": "train_eval",
"mode": cfg.wandb.mode if cfg.wandb.mode in ["online", "offline", "disabled"] else "online",
"resume": "must" if cfg.resume else None,
"id": cfg.wandb.run_id
if cfg.wandb.run_id
else (get_wandb_run_id_from_filesystem(cfg.output_dir) if cfg.resume else None),
}
},
)
# Set seed for reproducibility
if cfg.seed is not None:
accelerate_set_seed(cfg.seed)
# Setup device - accelerator handles device placement
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
# Create dataset
if accelerator.is_main_process:
logging.info("Creating dataset")
dataset = make_dataset(cfg)
print("c")
# Create evaluation environment (only on main process)
eval_env = None
if cfg.eval_freq > 0 and cfg.env is not None and accelerator.is_main_process:
logging.info("Creating env")
eval_env = make_env(cfg.env, n_envs=cfg.eval.batch_size, use_async_envs=cfg.eval.use_async_envs)
# Create policy
if accelerator.is_main_process:
logging.info("Creating policy")
# Use accelerator's device instead of cfg.policy.device
with accelerator.main_process_first():
policy = make_policy(
cfg=cfg.policy,
ds_meta=dataset.meta,
)
# Create optimizer and scheduler
if accelerator.is_main_process:
logging.info("Creating optimizer and scheduler")
optimizer, lr_scheduler = make_optimizer_and_scheduler(cfg, policy)
step = 0 # number of policy updates
if cfg.resume:
step, optimizer, lr_scheduler = load_training_state(cfg.checkpoint_path, optimizer, lr_scheduler)
# Prepare dataloader
if hasattr(cfg.policy, "drop_n_last_frames"):
shuffle = False
sampler = EpisodeAwareSampler(
dataset.episode_data_index,
drop_n_last_frames=cfg.policy.drop_n_last_frames,
shuffle=True,
)
else:
shuffle = True
sampler = None
dataloader = torch.utils.data.DataLoader(
dataset,
num_workers=cfg.num_workers,
batch_size=cfg.batch_size,
shuffle=shuffle,
sampler=sampler,
pin_memory=True,
drop_last=True, # Important for distributed training
)
# Prepare for distributed training
policy, optimizer, dataloader, lr_scheduler = accelerator.prepare(
policy, optimizer, dataloader, lr_scheduler
)
# Log training info (only on main process)
if accelerator.is_main_process:
num_learnable_params = sum(p.numel() for p in policy.parameters() if p.requires_grad)
num_total_params = sum(p.numel() for p in policy.parameters())
logging.info(colored("Output dir:", "yellow", attrs=["bold"]) + f" {cfg.output_dir}")
if cfg.env is not None:
logging.info(f"{cfg.env.task=}")
logging.info(f"{cfg.steps=} ({format_big_number(cfg.steps)})")
logging.info(f"{dataset.num_frames=} ({format_big_number(dataset.num_frames)})")
logging.info(f"{dataset.num_episodes=}")
logging.info(f"{num_learnable_params=} ({format_big_number(num_learnable_params)})")
logging.info(f"{num_total_params=} ({format_big_number(num_total_params)})")
logging.info(f"Number of processes: {accelerator.num_processes}")
logging.info(f"Device: {accelerator.device}")
logging.info(f"Mixed precision: {accelerator.mixed_precision}")
# Create metrics trackers
train_metrics = {
"loss": AverageMeter("loss", ":.3f"),
"grad_norm": AverageMeter("grdn", ":.3f"),
"lr": AverageMeter("lr", ":0.1e"),
"update_s": AverageMeter("updt_s", ":.3f"),
"dataloading_s": AverageMeter("data_s", ":.3f"),
}
train_tracker = MetricsTracker(
cfg.batch_size * accelerator.num_processes, # Account for all processes
dataset.num_frames,
dataset.num_episodes,
train_metrics,
initial_step=step,
)
# Training loop
policy.train()
if accelerator.is_main_process:
logging.info("Start offline training on a fixed dataset")
# Create iterator from dataloader
dl_iter = iter(dataloader)
for current_step in range(step, cfg.steps):
start_time = time.perf_counter()
# Get next batch, cycling through dataloader if needed
try:
batch = next(dl_iter)
print("data laoder batch keys: ", batch.keys())
breakpoint()
except StopIteration:
dl_iter = iter(dataloader)
batch = next(dl_iter)
train_tracker.dataloading_s = time.perf_counter() - start_time
# Update policy
train_tracker, output_dict = update_policy(
train_tracker,
policy,
batch,
optimizer,
cfg.optimizer.grad_clip_norm,
accelerator,
lr_scheduler=lr_scheduler,
)
# Increment step counter
step += 1
train_tracker.step()
# Determine if we should log, save, or evaluate
is_log_step = cfg.log_freq > 0 and step % cfg.log_freq == 0
is_saving_step = step % cfg.save_freq == 0 or step == cfg.steps
is_eval_step = cfg.eval_freq > 0 and step % cfg.eval_freq == 0
# Logging (only on main process)
if is_log_step and accelerator.is_main_process:
logging.info(train_tracker)
wandb_log_dict = train_tracker.to_dict()
if output_dict:
wandb_log_dict.update(output_dict)
for k, v in wandb_log_dict.items():
accelerator.log({f"{'train'}/{k}": v}, step=step)
train_tracker.reset_averages()
# Checkpointing (only on main process)
if cfg.save_checkpoint and is_saving_step:
# ✅ all processes wait here
accelerator.wait_for_everyone()
if accelerator.is_main_process:
logging.info(f"Checkpoint policy after step {step}")
checkpoint_dir = get_step_checkpoint_dir(cfg.output_dir, cfg.steps, step)
unwrapped_policy = accelerator.unwrap_model(policy)
save_checkpoint(checkpoint_dir, step, cfg, unwrapped_policy, optimizer, lr_scheduler)
update_last_checkpoint(checkpoint_dir)
# ✅ all processes sync again after saving
accelerator.wait_for_everyone()
# if wandb_logger:
# wandb_logger.log_policy(checkpoint_dir)
# Evaluation (only on main process)
if cfg.env and is_eval_step and accelerator.is_main_process:
step_id = get_step_identifier(step, cfg.steps)
logging.info(f"Eval policy at step {step}")
# Unwrap model for evaluation
unwrapped_policy = accelerator.unwrap_model(policy)
unwrapped_policy.eval()
with torch.no_grad():
eval_info = eval_policy(
eval_env,
unwrapped_policy,
cfg.eval.n_episodes,
videos_dir=cfg.output_dir / "eval" / f"videos_step_{step_id}",
max_episodes_rendered=4,
start_seed=cfg.seed,
)
eval_metrics = {
"avg_sum_reward": AverageMeter("∑rwrd", ":.3f"),
"pc_success": AverageMeter("success", ":.1f"),
"eval_s": AverageMeter("eval_s", ":.3f"),
}
eval_tracker = MetricsTracker(
cfg.batch_size * accelerator.num_processes,
dataset.num_frames,
dataset.num_episodes,
eval_metrics,
initial_step=step,
)
eval_tracker.eval_s = eval_info["aggregated"].pop("eval_s")
eval_tracker.avg_sum_reward = eval_info["aggregated"].pop("avg_sum_reward")
eval_tracker.pc_success = eval_info["aggregated"].pop("pc_success")
logging.info(eval_tracker)
wandb_log_dict = {**eval_tracker.to_dict(), **eval_info}
for k, v in wandb_log_dict.items():
accelerator.log({f"{'eval'}/{k}": v}, step=step)
# Set back to training mode
policy.train()
# Wait for all processes to finish
accelerator.wait_for_everyone()
# Cleanup
if eval_env and accelerator.is_main_process:
eval_env.close()
if accelerator.is_main_process:
logging.info("End of training")
accelerator.end_training() # added by jade
if __name__ == "__main__":
init_logging()
train()