lerobot/scripts/eval.py

import threading
from pathlib import Path

import hydra
import imageio
import numpy as np
import torch
from tensordict.nn import TensorDictModule
from termcolor import colored
from torchrl.envs import EnvBase

from lerobot.common.envs.factory import make_env
from lerobot.common.policies.factory import make_policy
from lerobot.common.utils import set_seed


def write_video(video_path, stacked_frames, fps):
    imageio.mimsave(video_path, stacked_frames, fps=fps)


def eval_policy(
    env: EnvBase,
    policy: TensorDictModule = None,
    num_episodes: int = 10,
    max_steps: int = 30,
    save_video: bool = False,
    video_dir: Path = None,
    fps: int = 15,
    env_step: int = None,
    wandb=None,
):
    if wandb is not None:
        assert env_step is not None
    sum_rewards = []
    max_rewards = []
    successes = []
    threads = []
    for i in range(num_episodes):
        ep_frames = []

        def rendering_callback(env, td=None):
            ep_frames.append(env.render())

        tensordict = env.reset()
        if save_video or wandb:
            # render first frame before rollout
            rendering_callback(env)

        with torch.inference_mode():
            rollout = env.rollout(
                max_steps=max_steps,
                policy=policy,
                callback=rendering_callback if save_video or wandb else None,
                auto_reset=False,
                tensordict=tensordict,
                auto_cast_to_device=True,
            )
        # print(", ".join([f"{x:.3f}" for x in rollout["next", "reward"][:,0].tolist()]))
        ep_sum_reward = rollout["next", "reward"].sum()
        ep_max_reward = rollout["next", "reward"].max()
        ep_success = rollout["next", "success"].any()
        sum_rewards.append(ep_sum_reward.item())
        max_rewards.append(ep_max_reward.item())
        successes.append(ep_success.item())

        if save_video or wandb:
            stacked_frames = np.stack(ep_frames)

            if save_video:
                video_dir.mkdir(parents=True, exist_ok=True)
                video_path = video_dir / f"eval_episode_{i}.mp4"
                thread = threading.Thread(
                    target=write_video,
                    args=(str(video_path), stacked_frames, fps),
                )
                thread.start()
                threads.append(thread)

            first_episode = i == 0
            if wandb and first_episode:
                eval_video = wandb.Video(
                    stacked_frames.transpose(0, 3, 1, 2), fps=fps, format="mp4"
                )
                wandb.log({"eval_video": eval_video}, step=env_step)

    for thread in threads:
        thread.join()

    metrics = {
        "avg_sum_reward": np.nanmean(sum_rewards),
        "avg_max_reward": np.nanmean(max_rewards),
        "pc_success": np.nanmean(successes) * 100,
    }
    return metrics


@hydra.main(version_base=None, config_name="default", config_path="../configs")
def eval_cli(cfg: dict):
    eval(cfg, out_dir=hydra.core.hydra_config.HydraConfig.get().runtime.output_dir)


def eval(cfg: dict, out_dir=None):
    if out_dir is None:
        raise NotImplementedError()

    assert torch.cuda.is_available()
    torch.backends.cudnn.benchmark = True
    set_seed(cfg.seed)
    print(colored("Log dir:", "yellow", attrs=["bold"]), out_dir)

    env = make_env(cfg)

    if cfg.pretrained_model_path:
        policy = make_policy(cfg)
        policy = TensorDictModule(
            policy,
            in_keys=["observation", "step_count"],
            out_keys=["action"],
        )
    else:
        # when policy is None, rollout a random policy
        policy = None

    metrics = eval_policy(
        env,
        policy=policy,
        save_video=True,
        video_dir=Path(out_dir) / "eval",
        fps=cfg.fps,
        max_steps=cfg.episode_length,
        num_episodes=cfg.eval_episodes,
    )
    print(metrics)


if __name__ == "__main__":
    eval_cli()
format 2024-02-24 18:19:18 +00:00			`import threading`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00			`from pathlib import Path`

			`import hydra`
			`import imageio`
			`import numpy as np`
			`import torch`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`from tensordict.nn import TensorDictModule`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00			`from termcolor import colored`
Add option for random policy 2024-01-31 13:54:32 +00:00			`from torchrl.envs import EnvBase`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`from lerobot.common.envs.factory import make_env`
Add policies/factory, Add test, Add _self_ in config 2024-02-25 10:50:23 +00:00			`from lerobot.common.policies.factory import make_policy`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`from lerobot.common.utils import set_seed`
format 2024-02-24 18:19:18 +00:00
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00
Add multithreading for video generation, Speed policy sampling 2024-02-24 18:18:39 +00:00			`def write_video(video_path, stacked_frames, fps):`
			`imageio.mimsave(video_path, stacked_frames, fps=fps)`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00
format 2024-02-24 18:19:18 +00:00
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`def eval_policy(`
Add option for random policy 2024-01-31 13:54:32 +00:00			`env: EnvBase,`
			`policy: TensorDictModule = None,`
			`num_episodes: int = 10,`
			`max_steps: int = 30,`
			`save_video: bool = False,`
			`video_dir: Path = None,`
Add pusht dataset (TODO verify reward is aligned), Refactor visualize_dataset, Add video_dir, fps, state_dim, action_dim to config (Training works) 2024-02-21 00:49:40 +00:00			`fps: int = 15,`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`env_step: int = None,`
			`wandb=None,`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00			`):`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`if wandb is not None:`
			`assert env_step is not None`
			`sum_rewards = []`
			`max_rewards = []`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`successes = []`
Add multithreading for video generation, Speed policy sampling 2024-02-24 18:18:39 +00:00			`threads = []`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00			`for i in range(num_episodes):`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`ep_frames = []`

			`def rendering_callback(env, td=None):`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`ep_frames.append(env.render())`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00
			`tensordict = env.reset()`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`if save_video or wandb:`
Eval reproduced! Train running (but not reproduced) 2024-02-10 15:46:24 +00:00			`# render first frame before rollout`
			`rendering_callback(env)`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00
offline training + online finetuning converge to 33 reward! 2024-02-18 01:23:44 +00:00			`with torch.inference_mode():`
			`rollout = env.rollout(`
			`max_steps=max_steps,`
			`policy=policy,`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`callback=rendering_callback if save_video or wandb else None,`
offline training + online finetuning converge to 33 reward! 2024-02-18 01:23:44 +00:00			`auto_reset=False,`
			`tensordict=tensordict,`
			`auto_cast_to_device=True,`
			`)`
Eval reproduced! Train running (but not reproduced) 2024-02-10 15:46:24 +00:00			`# print(", ".join([f"{x:.3f}" for x in rollout["next", "reward"][:,0].tolist()]))`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`ep_sum_reward = rollout["next", "reward"].sum()`
			`ep_max_reward = rollout["next", "reward"].max()`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`ep_success = rollout["next", "success"].any()`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`sum_rewards.append(ep_sum_reward.item())`
			`max_rewards.append(ep_max_reward.item())`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`successes.append(ep_success.item())`
eval.mp4 works! 2024-01-30 23:30:14 +00:00
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`if save_video or wandb:`
			`stacked_frames = np.stack(ep_frames)`

			`if save_video:`
			`video_dir.mkdir(parents=True, exist_ok=True)`
			`video_path = video_dir / f"eval_episode_{i}.mp4"`
Add multithreading for video generation, Speed policy sampling 2024-02-24 18:18:39 +00:00			`thread = threading.Thread(`
			`target=write_video,`
			`args=(str(video_path), stacked_frames, fps),`
			`)`
			`thread.start()`
			`threads.append(thread)`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00
			`first_episode = i == 0`
			`if wandb and first_episode:`
			`eval_video = wandb.Video(`
			`stacked_frames.transpose(0, 3, 1, 2), fps=fps, format="mp4"`
			`)`
			`wandb.log({"eval_video": eval_video}, step=env_step)`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00
Add multithreading for video generation, Speed policy sampling 2024-02-24 18:18:39 +00:00			`for thread in threads:`
			`thread.join()`

Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`metrics = {`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`"avg_sum_reward": np.nanmean(sum_rewards),`
			`"avg_max_reward": np.nanmean(max_rewards),`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`"pc_success": np.nanmean(successes) * 100,`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00			`}`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`return metrics`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00

			`@hydra.main(version_base=None, config_name="default", config_path="../configs")`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`def eval_cli(cfg: dict):`
			`eval(cfg, out_dir=hydra.core.hydra_config.HydraConfig.get().runtime.output_dir)`


			`def eval(cfg: dict, out_dir=None):`
			`if out_dir is None:`
			`raise NotImplementedError()`

First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00			`assert torch.cuda.is_available()`
Add multithreading for video generation, Speed policy sampling 2024-02-24 18:18:39 +00:00			`torch.backends.cudnn.benchmark = True`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00			`set_seed(cfg.seed)`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`print(colored("Log dir:", "yellow", attrs=["bold"]), out_dir)`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00
			`env = make_env(cfg)`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00
Fix unit tests, Refactor, Add pusht env, (TODO pusht replay buffer, image preprocessing) 2024-02-20 12:26:57 +00:00			`if cfg.pretrained_model_path:`
Add policies/factory, Add test, Add _self_ in config 2024-02-25 10:50:23 +00:00			`policy = make_policy(cfg)`
Fix unit tests, Refactor, Add pusht env, (TODO pusht replay buffer, image preprocessing) 2024-02-20 12:26:57 +00:00			`policy = TensorDictModule(`
			`policy,`
			`in_keys=["observation", "step_count"],`
			`out_keys=["action"],`
			`)`
			`else:`
			`# when policy is None, rollout a random policy`
			`policy = None`

Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`metrics = eval_policy(`
eval.mp4 works! 2024-01-30 23:30:14 +00:00			`env,`
Add option for random policy 2024-01-31 13:54:32 +00:00			`policy=policy,`
Fix unit tests, Refactor, Add pusht env, (TODO pusht replay buffer, image preprocessing) 2024-02-20 12:26:57 +00:00			`save_video=True,`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`video_dir=Path(out_dir) / "eval",`
Add pusht dataset (TODO verify reward is aligned), Refactor visualize_dataset, Add video_dir, fps, state_dim, action_dim to config (Training works) 2024-02-21 00:49:40 +00:00			`fps=cfg.fps,`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`max_steps=cfg.episode_length,`
			`num_episodes=cfg.eval_episodes,`
eval.mp4 works! 2024-01-30 23:30:14 +00:00			`)`
Add common, refactor eval with eval_policy 2024-01-31 13:48:12 +00:00			`print(metrics)`
First real commit, simxarm env added with torchrl! 2024-01-29 12:49:30 +00:00

			`if __name__ == "__main__":`
Wandb works, One output dir 2024-02-22 12:14:12 +00:00			`eval_cli()`