Files
lerobot-clone/src/lerobot/policies/diffusion/modeling_diffusion.py

765 lines
32 KiB
Python
Raw Normal View History

2024-05-15 12:13:09 +02:00
#!/usr/bin/env python
# Copyright 2024 Columbia Artificial Intelligence, Robotics Lab,
# and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2024-04-16 13:43:58 +01:00
"""Diffusion Policy as per "Diffusion Policy: Visuomotor Policy Learning via Action Diffusion"
2024-04-15 19:06:44 +01:00
TODO(alexander-soare):
2024-04-16 12:51:32 +01:00
- Remove reliance on diffusers for DDPMScheduler and LR scheduler.
2024-04-15 19:06:44 +01:00
"""
import math
from collections import deque
from collections.abc import Callable
2024-04-15 19:06:44 +01:00
import einops
import numpy as np
2024-04-15 19:06:44 +01:00
import torch
import torch.nn.functional as F # noqa: N812
import torchvision
from diffusers.schedulers.scheduling_ddim import DDIMScheduler
2024-04-15 19:06:44 +01:00
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
from torch import Tensor, nn
from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
from lerobot.policies.pretrained import PreTrainedPolicy
from lerobot.policies.utils import (
2024-04-15 19:06:44 +01:00
get_device_from_parameters,
get_dtype_from_parameters,
get_output_shape,
2024-04-15 19:06:44 +01:00
populate_queues,
)
2025-09-24 11:11:53 +02:00
from lerobot.utils.constants import ACTION, OBS_ENV_STATE, OBS_IMAGES, OBS_STATE
2024-04-15 19:06:44 +01:00
class DiffusionPolicy(PreTrainedPolicy):
2024-04-15 19:06:44 +01:00
"""
Diffusion Policy as per "Diffusion Policy: Visuomotor Policy Learning via Action Diffusion"
2025-06-12 09:58:59 +02:00
(paper: https://huggingface.co/papers/2303.04137, code: https://github.com/real-stanford/diffusion_policy).
2024-04-15 19:06:44 +01:00
"""
config_class = DiffusionConfig
2024-04-15 19:06:44 +01:00
name = "diffusion"
def __init__(
self,
config: DiffusionConfig,
Add sarm (#2639) * add initial modeling * make rewind pretrained policy * add annotation * small fix * add sarm * subtasks * fix spawn * fix rewind discrepancies * Add script to generate embedding for dataset (#2138) * Add generate and validate script * fix precommit * Improve generate embeddings function by using dataset tools (#2206) --------- Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> * cleanup * change order train log * print batch size * update sarm processor * add reward output * change expected features * add image validation * change validation * get state input from dataset stats * raise if no state key is found * pass stats * cleanup and refactor * add episode inddex to complementary data * add subtask init and detection * revert lerobot_train changes * pass dataset metadata to policy * change loadig subtasks * add small logging * fix progress conversion and adding initial frame * use large offset for initial frame (ugly) * Remove rewind, use clip tokenizer * add tests, implement formula 1,2 correctly and cleanup * use task from dataset, cleanup visualizer * simplify * simplify and cleanup code and move compute_temporal_proportions to utils * fix normalization in visualization * Fix visualization and change prompt * fix formatting * add visualize subtask annotations * use qwen thinking * try different prompt * format * update prompt * higher temp, long output * different settings * use instruct * show full resp * split message * Temp: increase tolerance dataset * Fix RA-BC (#2572) * Add next observation loading for RA-BC progress deltas * Compute weights based on temporal progress deltas instead of static rewards * Add hard-masking for negative progress deltas in weight computation * Feat/add dual head (#2582) * Add dual dense sparse head and annotation * Add docs * add dual to procesor * cleanup * change sampling in visualize and cleanup * remove validation * remove compile * Feat/test uniform (#2587) * test uniform * add different string for misaligned * Fix rewind and add tests * uncomment text implementation * run precommit * Add head mode for ra-bc * fix visalization of single task * add * return per sample loss * Fix RA_BC (#2602) * update rabc implementation * compute rabc beforehand * fix import * add only progress calulation * use precomputed progress * multi gpu processing * import * fix dataset meta data extraction * add logging * logging * log * progress per episode * split differently * move clip to gpu * pre decode frames for an episode * fix cuda initalization * fix import * multi processing * rename * fix import * fix * fix rabc * use last known progress if oob * use last known progress if oob * add misalignment loss with random embeddings * discard previous changes * add selection of models to docs for ra_bc * add transformers dep * extend tolerance * initial commit with new codebase * add tests * fix * remove temporal sampler * drop last frame for sampler * use original ref * some fixes * fix visualization * remove smoothing and fix order subtasks * add stride rabc computation * add push to hub * add explanation * add kappa expllaination * better rabc logging * feedback pr * remove dataset tolerance * revert dataset tool * revert dataset changes * add credit * run precommit * change path for generate ra_bc * fix type * include sarm in all in pyproject * fix precommit * lazy import matplotlib * lazy import qwen * remove rich console * skip if transformers is not installed? * run only when we have faker * place transformer lazy loading * Dont test if low transformer version * fix * increase transformer * increase as 4.57.0 is yanked * remove pi from all * go back --------- Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com>
2025-12-18 12:50:32 +01:00
**kwargs,
):
2024-04-16 17:15:51 +01:00
"""
Args:
config: Policy configuration class instance or None, in which case the default instantiation of
the configuration class is used.
dataset_stats: Dataset statistics to be used for normalization. If not passed here, it is expected
that they will be passed with a call to `load_state_dict` before the policy is used.
2024-04-16 17:15:51 +01:00
"""
super().__init__(config)
config.validate_features()
self.config = config
2024-04-15 19:06:44 +01:00
# queues are populated during rollout of the policy, they contain the n latest observations and actions
self._queues = None
self.diffusion = DiffusionModel(config)
2024-04-15 19:06:44 +01:00
self.reset()
def get_optim_params(self) -> dict:
return self.diffusion.parameters()
2024-04-15 19:06:44 +01:00
def reset(self):
"""Clear observation and action queues. Should be called on `env.reset()`"""
2024-04-15 19:06:44 +01:00
self._queues = {
OBS_STATE: deque(maxlen=self.config.n_obs_steps),
ACTION: deque(maxlen=self.config.n_action_steps),
2024-04-15 19:06:44 +01:00
}
if self.config.image_features:
self._queues[OBS_IMAGES] = deque(maxlen=self.config.n_obs_steps)
if self.config.env_state_feature:
self._queues[OBS_ENV_STATE] = deque(maxlen=self.config.n_obs_steps)
2024-04-15 19:06:44 +01:00
@torch.no_grad()
def predict_action_chunk(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor:
Add direct access to action chunks (#1020) * fix: sharing predicted chunk with user * [pre-commit.ci] pre-commit autoupdate (#1011) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Revert "[pre-commit.ci] pre-commit autoupdate" (#1025) * fix(ci): Pin draccus (<0.10.0) and torch (<2.7) to fix pipeline (#1022) Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix(ci): Pin `torchcodec` (==0.2.1) to fix pipeline temporarly (#1030) * Update tutorial (#1021) Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Add description motor order SO-101 leader (#1051) * feat(encoding): switching to PyAV for ffmpeg related tasks (#983) * feat(docs): Add new docs build process (#1046) Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * Docs: adapt text + fix video code (#1064) * Fix typos (#1070) * docs: minor corrections and clean-up (#1089) * Update 10_use_so100.md; use diff syntax (#944) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * Update 12_use_so101.md (#1081) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * bug fix for #1071 When --display_data=true, Failed running control_robot. (#1073) * Add editable -e for feetech install command (#1133) * Fix: emptying action queue between resets (#1117) * fix: typos and grammar (#1148) * Update README.md (#1160) * Update README.md (#1163) * [Fix] Unpin torch beyond 2.6.0 & torchcodec beyond 0.2.1 (#1127) * (hotfix): nightly CI by clipping pymunk version below 7.0.0 (#1182) * [pre-commit.ci] pre-commit autoupdate (#1048) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * Add SmolVLA (#1175) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: fracapuano <francesco.capuano@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> * Fix SmolVLA loss not sent to wandb (#1198) * Hardware API redesign (#777) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: Pepijn <pepijn@huggingface.co> * fix(smolvla): update record.py, fix populate_queues and remove unused dependencies (#1208) * replaced OBS_ROBOT with OBS_STATE constant (#1211) * Fix test_teleoperate (#1216) * Fix LeKiwi example (#1217) * Fix smolVLA dependencies (#1218) * fix(pyserial): adding pyserial dependency to global ones (#1219) * Update SmolVLA README.md (#1228) * Fix unable to set camera width/height to non-default (#1225) * Update tutorial link (#1250) * update KochFollower.get_observation() so it returns same observation structure as SO101 (#1248) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [pre-commit.ci] pre-commit autoupdate (#1185) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Proposal for fix for enter_pressed on Windows (#1230) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix: update pi0 dependency version constraint (#1247) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Match motor names with ids lekiwi (#1261) * fix issues: checkpoints keys mismatch and 'task' tokenisation in smolvla (#1256) Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): update realsense documentation (#1268) * Use HF Papers (#1120) * Skip normalization parameters in load_smolvla (#1274) * fix(record): no teleop needed when running with policy (#1284) * Port HIL SERL (#644) Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): SmolVLA fine-tuning getting started (#1201) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Francesco Capuano <francesco_capuano@aol.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * chore(teleop): print calibration path saved (#1286) * chore(dependencies): add gamepad support with pygame and hidapi (#1287) * Robot integration tutorial (#1285) * fix(docs): update send_feedback docstrings * Add sim tutorial, fix lekiwi motor config, add notebook links (#1275) Co-authored-by: AdilZouitine <adilzouitinegm@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> * Fixes on robot integration tutorial (#1290) * Add keyboard teleop device to control the end effector robot (#1289) * Improve type hints (#1293) * fix(record): no teleop arg in reset environment (#1294) * `learner.py` import so101_leader instead of so100 (#1295) Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> * Fixing `PI0` Policy (#1297) * `gym_manipulator.py` Remove None value action_intervention of BaseLeaderTeleoperator (#1299) * (chore): incorrect resume parameter in recording documentation (#1301) * Update lekiwi.mdx (#1229) * bump `pi0` and `hil` transformers version (#1298) * docs: fix imitation learning robots docs command (#1308) * fix(benchmarks): remove .numpy() from frame in benchmark script (#1354) * add smolvla to the supported policies to run tests (: * add: chunk-level access for the policy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add: smolvla in availables * remove: smolvla from library supported policies * fix: change env for training, xarm is broken as of now * add: predict_action_chunk to all supported policies * fix: add robot type constants * add: predict action chunk in base policy class * restore original Makefile * fix: minor * fix: dict keys come from lerobot/constants * fix: improve act encapsulation, properly supporting temporal ensembling * fix: smolvla action chunking * fix: very minor, but very annoying * fix: minor * fix minor naming Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: refactoring inference for single actions and chunks into different components * fix: minor * fix: temporal ensembling * fix: moving populate queues out of modular component for batch preparation * fix: minor for CI * fix: smovla debug * fix: reward classifier, maybe the last policy lacking? --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Caroline Pascal <caroline8.pascal@gmail.com> Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: omahs <73983677+omahs@users.noreply.github.com> Co-authored-by: CharlesCNorton <135471798+CharlesCNorton@users.noreply.github.com> Co-authored-by: masato-ka <jp6uzv@gmail.com> Co-authored-by: Ragnar <rodiondenmark@gmail.com> Co-authored-by: mshukor <mustafa.shukor97@gmail.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Ben Zhang <5977478+ben-z@users.noreply.github.com> Co-authored-by: Pepijn <pepijn@huggingface.co> Co-authored-by: Dhruva <51377003+utterwqlnut@users.noreply.github.com> Co-authored-by: Daisuke Sato <tiryoh@gmail.com> Co-authored-by: Sarunas Kalade <sarunas.kalade@amd.com> Co-authored-by: koenvanwijk <koenvanwijk@users.noreply.github.com> Co-authored-by: Yushun Xiang <73413365+YushunXiang@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: tidely <43219534+tidely@users.noreply.github.com> Co-authored-by: David <17435126+DavidLMS@users.noreply.github.com>
2025-06-27 10:19:19 +02:00
"""Predict a chunk of actions given environment observations."""
# stack n latest observations from the queue
batch = {k: torch.stack(list(self._queues[k]), dim=1) for k in batch if k in self._queues}
actions = self.diffusion.generate_actions(batch, noise=noise)
Add direct access to action chunks (#1020) * fix: sharing predicted chunk with user * [pre-commit.ci] pre-commit autoupdate (#1011) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Revert "[pre-commit.ci] pre-commit autoupdate" (#1025) * fix(ci): Pin draccus (<0.10.0) and torch (<2.7) to fix pipeline (#1022) Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix(ci): Pin `torchcodec` (==0.2.1) to fix pipeline temporarly (#1030) * Update tutorial (#1021) Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Add description motor order SO-101 leader (#1051) * feat(encoding): switching to PyAV for ffmpeg related tasks (#983) * feat(docs): Add new docs build process (#1046) Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * Docs: adapt text + fix video code (#1064) * Fix typos (#1070) * docs: minor corrections and clean-up (#1089) * Update 10_use_so100.md; use diff syntax (#944) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * Update 12_use_so101.md (#1081) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * bug fix for #1071 When --display_data=true, Failed running control_robot. (#1073) * Add editable -e for feetech install command (#1133) * Fix: emptying action queue between resets (#1117) * fix: typos and grammar (#1148) * Update README.md (#1160) * Update README.md (#1163) * [Fix] Unpin torch beyond 2.6.0 & torchcodec beyond 0.2.1 (#1127) * (hotfix): nightly CI by clipping pymunk version below 7.0.0 (#1182) * [pre-commit.ci] pre-commit autoupdate (#1048) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * Add SmolVLA (#1175) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: fracapuano <francesco.capuano@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> * Fix SmolVLA loss not sent to wandb (#1198) * Hardware API redesign (#777) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: Pepijn <pepijn@huggingface.co> * fix(smolvla): update record.py, fix populate_queues and remove unused dependencies (#1208) * replaced OBS_ROBOT with OBS_STATE constant (#1211) * Fix test_teleoperate (#1216) * Fix LeKiwi example (#1217) * Fix smolVLA dependencies (#1218) * fix(pyserial): adding pyserial dependency to global ones (#1219) * Update SmolVLA README.md (#1228) * Fix unable to set camera width/height to non-default (#1225) * Update tutorial link (#1250) * update KochFollower.get_observation() so it returns same observation structure as SO101 (#1248) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [pre-commit.ci] pre-commit autoupdate (#1185) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Proposal for fix for enter_pressed on Windows (#1230) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix: update pi0 dependency version constraint (#1247) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Match motor names with ids lekiwi (#1261) * fix issues: checkpoints keys mismatch and 'task' tokenisation in smolvla (#1256) Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): update realsense documentation (#1268) * Use HF Papers (#1120) * Skip normalization parameters in load_smolvla (#1274) * fix(record): no teleop needed when running with policy (#1284) * Port HIL SERL (#644) Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): SmolVLA fine-tuning getting started (#1201) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Francesco Capuano <francesco_capuano@aol.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * chore(teleop): print calibration path saved (#1286) * chore(dependencies): add gamepad support with pygame and hidapi (#1287) * Robot integration tutorial (#1285) * fix(docs): update send_feedback docstrings * Add sim tutorial, fix lekiwi motor config, add notebook links (#1275) Co-authored-by: AdilZouitine <adilzouitinegm@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> * Fixes on robot integration tutorial (#1290) * Add keyboard teleop device to control the end effector robot (#1289) * Improve type hints (#1293) * fix(record): no teleop arg in reset environment (#1294) * `learner.py` import so101_leader instead of so100 (#1295) Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> * Fixing `PI0` Policy (#1297) * `gym_manipulator.py` Remove None value action_intervention of BaseLeaderTeleoperator (#1299) * (chore): incorrect resume parameter in recording documentation (#1301) * Update lekiwi.mdx (#1229) * bump `pi0` and `hil` transformers version (#1298) * docs: fix imitation learning robots docs command (#1308) * fix(benchmarks): remove .numpy() from frame in benchmark script (#1354) * add smolvla to the supported policies to run tests (: * add: chunk-level access for the policy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add: smolvla in availables * remove: smolvla from library supported policies * fix: change env for training, xarm is broken as of now * add: predict_action_chunk to all supported policies * fix: add robot type constants * add: predict action chunk in base policy class * restore original Makefile * fix: minor * fix: dict keys come from lerobot/constants * fix: improve act encapsulation, properly supporting temporal ensembling * fix: smolvla action chunking * fix: very minor, but very annoying * fix: minor * fix minor naming Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: refactoring inference for single actions and chunks into different components * fix: minor * fix: temporal ensembling * fix: moving populate queues out of modular component for batch preparation * fix: minor for CI * fix: smovla debug * fix: reward classifier, maybe the last policy lacking? --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Caroline Pascal <caroline8.pascal@gmail.com> Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: omahs <73983677+omahs@users.noreply.github.com> Co-authored-by: CharlesCNorton <135471798+CharlesCNorton@users.noreply.github.com> Co-authored-by: masato-ka <jp6uzv@gmail.com> Co-authored-by: Ragnar <rodiondenmark@gmail.com> Co-authored-by: mshukor <mustafa.shukor97@gmail.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Ben Zhang <5977478+ben-z@users.noreply.github.com> Co-authored-by: Pepijn <pepijn@huggingface.co> Co-authored-by: Dhruva <51377003+utterwqlnut@users.noreply.github.com> Co-authored-by: Daisuke Sato <tiryoh@gmail.com> Co-authored-by: Sarunas Kalade <sarunas.kalade@amd.com> Co-authored-by: koenvanwijk <koenvanwijk@users.noreply.github.com> Co-authored-by: Yushun Xiang <73413365+YushunXiang@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: tidely <43219534+tidely@users.noreply.github.com> Co-authored-by: David <17435126+DavidLMS@users.noreply.github.com>
2025-06-27 10:19:19 +02:00
return actions
@torch.no_grad()
def select_action(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor:
2024-04-15 19:06:44 +01:00
"""Select a single action given environment observations.
This method handles caching a history of observations and an action trajectory generated by the
underlying diffusion model. Here's how it works:
- `n_obs_steps` steps worth of observations are cached (for the first steps, the observation is
copied `n_obs_steps` times to fill the cache).
- The diffusion model generates `horizon` steps worth of actions.
- `n_action_steps` worth of actions are actually kept for execution, starting from the current step.
Schematically this looks like:
2024-04-16 12:51:32 +01:00
----------------------------------------------------------------------------------------------
2024-04-15 19:06:44 +01:00
(legend: o = n_obs_steps, h = horizon, a = n_action_steps)
|timestep | n-o+1 | n-o+2 | ..... | n | ..... | n+a-1 | n+a | ..... | n-o+h |
|observation is used | YES | YES | YES | YES | NO | NO | NO | NO | NO |
2024-04-15 19:06:44 +01:00
|action is generated | YES | YES | YES | YES | YES | YES | YES | YES | YES |
|action is used | NO | NO | NO | YES | YES | YES | NO | NO | NO |
2024-04-16 12:51:32 +01:00
----------------------------------------------------------------------------------------------
Note that this means we require: `n_action_steps <= horizon - n_obs_steps + 1`. Also, note that
2024-04-15 19:06:44 +01:00
"horizon" may not the best name to describe what the variable actually means, because this period is
actually measured from the first observation which (if `n_obs_steps` > 1) happened in the past.
"""
# NOTE: for offline evaluation, we have action in the batch, so we need to pop it out
if ACTION in batch:
batch.pop(ACTION)
if self.config.image_features:
batch = dict(batch) # shallow copy so that adding a key doesn't modify the original
Add direct access to action chunks (#1020) * fix: sharing predicted chunk with user * [pre-commit.ci] pre-commit autoupdate (#1011) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Revert "[pre-commit.ci] pre-commit autoupdate" (#1025) * fix(ci): Pin draccus (<0.10.0) and torch (<2.7) to fix pipeline (#1022) Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix(ci): Pin `torchcodec` (==0.2.1) to fix pipeline temporarly (#1030) * Update tutorial (#1021) Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Add description motor order SO-101 leader (#1051) * feat(encoding): switching to PyAV for ffmpeg related tasks (#983) * feat(docs): Add new docs build process (#1046) Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * Docs: adapt text + fix video code (#1064) * Fix typos (#1070) * docs: minor corrections and clean-up (#1089) * Update 10_use_so100.md; use diff syntax (#944) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * Update 12_use_so101.md (#1081) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * bug fix for #1071 When --display_data=true, Failed running control_robot. (#1073) * Add editable -e for feetech install command (#1133) * Fix: emptying action queue between resets (#1117) * fix: typos and grammar (#1148) * Update README.md (#1160) * Update README.md (#1163) * [Fix] Unpin torch beyond 2.6.0 & torchcodec beyond 0.2.1 (#1127) * (hotfix): nightly CI by clipping pymunk version below 7.0.0 (#1182) * [pre-commit.ci] pre-commit autoupdate (#1048) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * Add SmolVLA (#1175) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: fracapuano <francesco.capuano@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> * Fix SmolVLA loss not sent to wandb (#1198) * Hardware API redesign (#777) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: Pepijn <pepijn@huggingface.co> * fix(smolvla): update record.py, fix populate_queues and remove unused dependencies (#1208) * replaced OBS_ROBOT with OBS_STATE constant (#1211) * Fix test_teleoperate (#1216) * Fix LeKiwi example (#1217) * Fix smolVLA dependencies (#1218) * fix(pyserial): adding pyserial dependency to global ones (#1219) * Update SmolVLA README.md (#1228) * Fix unable to set camera width/height to non-default (#1225) * Update tutorial link (#1250) * update KochFollower.get_observation() so it returns same observation structure as SO101 (#1248) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [pre-commit.ci] pre-commit autoupdate (#1185) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Proposal for fix for enter_pressed on Windows (#1230) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix: update pi0 dependency version constraint (#1247) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Match motor names with ids lekiwi (#1261) * fix issues: checkpoints keys mismatch and 'task' tokenisation in smolvla (#1256) Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): update realsense documentation (#1268) * Use HF Papers (#1120) * Skip normalization parameters in load_smolvla (#1274) * fix(record): no teleop needed when running with policy (#1284) * Port HIL SERL (#644) Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): SmolVLA fine-tuning getting started (#1201) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Francesco Capuano <francesco_capuano@aol.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * chore(teleop): print calibration path saved (#1286) * chore(dependencies): add gamepad support with pygame and hidapi (#1287) * Robot integration tutorial (#1285) * fix(docs): update send_feedback docstrings * Add sim tutorial, fix lekiwi motor config, add notebook links (#1275) Co-authored-by: AdilZouitine <adilzouitinegm@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> * Fixes on robot integration tutorial (#1290) * Add keyboard teleop device to control the end effector robot (#1289) * Improve type hints (#1293) * fix(record): no teleop arg in reset environment (#1294) * `learner.py` import so101_leader instead of so100 (#1295) Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> * Fixing `PI0` Policy (#1297) * `gym_manipulator.py` Remove None value action_intervention of BaseLeaderTeleoperator (#1299) * (chore): incorrect resume parameter in recording documentation (#1301) * Update lekiwi.mdx (#1229) * bump `pi0` and `hil` transformers version (#1298) * docs: fix imitation learning robots docs command (#1308) * fix(benchmarks): remove .numpy() from frame in benchmark script (#1354) * add smolvla to the supported policies to run tests (: * add: chunk-level access for the policy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add: smolvla in availables * remove: smolvla from library supported policies * fix: change env for training, xarm is broken as of now * add: predict_action_chunk to all supported policies * fix: add robot type constants * add: predict action chunk in base policy class * restore original Makefile * fix: minor * fix: dict keys come from lerobot/constants * fix: improve act encapsulation, properly supporting temporal ensembling * fix: smolvla action chunking * fix: very minor, but very annoying * fix: minor * fix minor naming Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: refactoring inference for single actions and chunks into different components * fix: minor * fix: temporal ensembling * fix: moving populate queues out of modular component for batch preparation * fix: minor for CI * fix: smovla debug * fix: reward classifier, maybe the last policy lacking? --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Caroline Pascal <caroline8.pascal@gmail.com> Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: omahs <73983677+omahs@users.noreply.github.com> Co-authored-by: CharlesCNorton <135471798+CharlesCNorton@users.noreply.github.com> Co-authored-by: masato-ka <jp6uzv@gmail.com> Co-authored-by: Ragnar <rodiondenmark@gmail.com> Co-authored-by: mshukor <mustafa.shukor97@gmail.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Ben Zhang <5977478+ben-z@users.noreply.github.com> Co-authored-by: Pepijn <pepijn@huggingface.co> Co-authored-by: Dhruva <51377003+utterwqlnut@users.noreply.github.com> Co-authored-by: Daisuke Sato <tiryoh@gmail.com> Co-authored-by: Sarunas Kalade <sarunas.kalade@amd.com> Co-authored-by: koenvanwijk <koenvanwijk@users.noreply.github.com> Co-authored-by: Yushun Xiang <73413365+YushunXiang@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: tidely <43219534+tidely@users.noreply.github.com> Co-authored-by: David <17435126+DavidLMS@users.noreply.github.com>
2025-06-27 10:19:19 +02:00
batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
# NOTE: It's important that this happens after stacking the images into a single key.
2024-04-15 19:06:44 +01:00
self._queues = populate_queues(self._queues, batch)
Add direct access to action chunks (#1020) * fix: sharing predicted chunk with user * [pre-commit.ci] pre-commit autoupdate (#1011) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Revert "[pre-commit.ci] pre-commit autoupdate" (#1025) * fix(ci): Pin draccus (<0.10.0) and torch (<2.7) to fix pipeline (#1022) Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix(ci): Pin `torchcodec` (==0.2.1) to fix pipeline temporarly (#1030) * Update tutorial (#1021) Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Add description motor order SO-101 leader (#1051) * feat(encoding): switching to PyAV for ffmpeg related tasks (#983) * feat(docs): Add new docs build process (#1046) Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * Docs: adapt text + fix video code (#1064) * Fix typos (#1070) * docs: minor corrections and clean-up (#1089) * Update 10_use_so100.md; use diff syntax (#944) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * Update 12_use_so101.md (#1081) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * bug fix for #1071 When --display_data=true, Failed running control_robot. (#1073) * Add editable -e for feetech install command (#1133) * Fix: emptying action queue between resets (#1117) * fix: typos and grammar (#1148) * Update README.md (#1160) * Update README.md (#1163) * [Fix] Unpin torch beyond 2.6.0 & torchcodec beyond 0.2.1 (#1127) * (hotfix): nightly CI by clipping pymunk version below 7.0.0 (#1182) * [pre-commit.ci] pre-commit autoupdate (#1048) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * Add SmolVLA (#1175) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: fracapuano <francesco.capuano@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> * Fix SmolVLA loss not sent to wandb (#1198) * Hardware API redesign (#777) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: Pepijn <pepijn@huggingface.co> * fix(smolvla): update record.py, fix populate_queues and remove unused dependencies (#1208) * replaced OBS_ROBOT with OBS_STATE constant (#1211) * Fix test_teleoperate (#1216) * Fix LeKiwi example (#1217) * Fix smolVLA dependencies (#1218) * fix(pyserial): adding pyserial dependency to global ones (#1219) * Update SmolVLA README.md (#1228) * Fix unable to set camera width/height to non-default (#1225) * Update tutorial link (#1250) * update KochFollower.get_observation() so it returns same observation structure as SO101 (#1248) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [pre-commit.ci] pre-commit autoupdate (#1185) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Proposal for fix for enter_pressed on Windows (#1230) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix: update pi0 dependency version constraint (#1247) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Match motor names with ids lekiwi (#1261) * fix issues: checkpoints keys mismatch and 'task' tokenisation in smolvla (#1256) Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): update realsense documentation (#1268) * Use HF Papers (#1120) * Skip normalization parameters in load_smolvla (#1274) * fix(record): no teleop needed when running with policy (#1284) * Port HIL SERL (#644) Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): SmolVLA fine-tuning getting started (#1201) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Francesco Capuano <francesco_capuano@aol.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * chore(teleop): print calibration path saved (#1286) * chore(dependencies): add gamepad support with pygame and hidapi (#1287) * Robot integration tutorial (#1285) * fix(docs): update send_feedback docstrings * Add sim tutorial, fix lekiwi motor config, add notebook links (#1275) Co-authored-by: AdilZouitine <adilzouitinegm@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> * Fixes on robot integration tutorial (#1290) * Add keyboard teleop device to control the end effector robot (#1289) * Improve type hints (#1293) * fix(record): no teleop arg in reset environment (#1294) * `learner.py` import so101_leader instead of so100 (#1295) Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> * Fixing `PI0` Policy (#1297) * `gym_manipulator.py` Remove None value action_intervention of BaseLeaderTeleoperator (#1299) * (chore): incorrect resume parameter in recording documentation (#1301) * Update lekiwi.mdx (#1229) * bump `pi0` and `hil` transformers version (#1298) * docs: fix imitation learning robots docs command (#1308) * fix(benchmarks): remove .numpy() from frame in benchmark script (#1354) * add smolvla to the supported policies to run tests (: * add: chunk-level access for the policy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add: smolvla in availables * remove: smolvla from library supported policies * fix: change env for training, xarm is broken as of now * add: predict_action_chunk to all supported policies * fix: add robot type constants * add: predict action chunk in base policy class * restore original Makefile * fix: minor * fix: dict keys come from lerobot/constants * fix: improve act encapsulation, properly supporting temporal ensembling * fix: smolvla action chunking * fix: very minor, but very annoying * fix: minor * fix minor naming Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: refactoring inference for single actions and chunks into different components * fix: minor * fix: temporal ensembling * fix: moving populate queues out of modular component for batch preparation * fix: minor for CI * fix: smovla debug * fix: reward classifier, maybe the last policy lacking? --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Caroline Pascal <caroline8.pascal@gmail.com> Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: omahs <73983677+omahs@users.noreply.github.com> Co-authored-by: CharlesCNorton <135471798+CharlesCNorton@users.noreply.github.com> Co-authored-by: masato-ka <jp6uzv@gmail.com> Co-authored-by: Ragnar <rodiondenmark@gmail.com> Co-authored-by: mshukor <mustafa.shukor97@gmail.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Ben Zhang <5977478+ben-z@users.noreply.github.com> Co-authored-by: Pepijn <pepijn@huggingface.co> Co-authored-by: Dhruva <51377003+utterwqlnut@users.noreply.github.com> Co-authored-by: Daisuke Sato <tiryoh@gmail.com> Co-authored-by: Sarunas Kalade <sarunas.kalade@amd.com> Co-authored-by: koenvanwijk <koenvanwijk@users.noreply.github.com> Co-authored-by: Yushun Xiang <73413365+YushunXiang@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: tidely <43219534+tidely@users.noreply.github.com> Co-authored-by: David <17435126+DavidLMS@users.noreply.github.com>
2025-06-27 10:19:19 +02:00
if len(self._queues[ACTION]) == 0:
actions = self.predict_action_chunk(batch, noise=noise)
Add direct access to action chunks (#1020) * fix: sharing predicted chunk with user * [pre-commit.ci] pre-commit autoupdate (#1011) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Revert "[pre-commit.ci] pre-commit autoupdate" (#1025) * fix(ci): Pin draccus (<0.10.0) and torch (<2.7) to fix pipeline (#1022) Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix(ci): Pin `torchcodec` (==0.2.1) to fix pipeline temporarly (#1030) * Update tutorial (#1021) Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Add description motor order SO-101 leader (#1051) * feat(encoding): switching to PyAV for ffmpeg related tasks (#983) * feat(docs): Add new docs build process (#1046) Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * Docs: adapt text + fix video code (#1064) * Fix typos (#1070) * docs: minor corrections and clean-up (#1089) * Update 10_use_so100.md; use diff syntax (#944) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * Update 12_use_so101.md (#1081) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * bug fix for #1071 When --display_data=true, Failed running control_robot. (#1073) * Add editable -e for feetech install command (#1133) * Fix: emptying action queue between resets (#1117) * fix: typos and grammar (#1148) * Update README.md (#1160) * Update README.md (#1163) * [Fix] Unpin torch beyond 2.6.0 & torchcodec beyond 0.2.1 (#1127) * (hotfix): nightly CI by clipping pymunk version below 7.0.0 (#1182) * [pre-commit.ci] pre-commit autoupdate (#1048) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * Add SmolVLA (#1175) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: fracapuano <francesco.capuano@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> * Fix SmolVLA loss not sent to wandb (#1198) * Hardware API redesign (#777) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: Pepijn <pepijn@huggingface.co> * fix(smolvla): update record.py, fix populate_queues and remove unused dependencies (#1208) * replaced OBS_ROBOT with OBS_STATE constant (#1211) * Fix test_teleoperate (#1216) * Fix LeKiwi example (#1217) * Fix smolVLA dependencies (#1218) * fix(pyserial): adding pyserial dependency to global ones (#1219) * Update SmolVLA README.md (#1228) * Fix unable to set camera width/height to non-default (#1225) * Update tutorial link (#1250) * update KochFollower.get_observation() so it returns same observation structure as SO101 (#1248) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [pre-commit.ci] pre-commit autoupdate (#1185) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Proposal for fix for enter_pressed on Windows (#1230) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix: update pi0 dependency version constraint (#1247) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Match motor names with ids lekiwi (#1261) * fix issues: checkpoints keys mismatch and 'task' tokenisation in smolvla (#1256) Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): update realsense documentation (#1268) * Use HF Papers (#1120) * Skip normalization parameters in load_smolvla (#1274) * fix(record): no teleop needed when running with policy (#1284) * Port HIL SERL (#644) Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): SmolVLA fine-tuning getting started (#1201) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Francesco Capuano <francesco_capuano@aol.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * chore(teleop): print calibration path saved (#1286) * chore(dependencies): add gamepad support with pygame and hidapi (#1287) * Robot integration tutorial (#1285) * fix(docs): update send_feedback docstrings * Add sim tutorial, fix lekiwi motor config, add notebook links (#1275) Co-authored-by: AdilZouitine <adilzouitinegm@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> * Fixes on robot integration tutorial (#1290) * Add keyboard teleop device to control the end effector robot (#1289) * Improve type hints (#1293) * fix(record): no teleop arg in reset environment (#1294) * `learner.py` import so101_leader instead of so100 (#1295) Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> * Fixing `PI0` Policy (#1297) * `gym_manipulator.py` Remove None value action_intervention of BaseLeaderTeleoperator (#1299) * (chore): incorrect resume parameter in recording documentation (#1301) * Update lekiwi.mdx (#1229) * bump `pi0` and `hil` transformers version (#1298) * docs: fix imitation learning robots docs command (#1308) * fix(benchmarks): remove .numpy() from frame in benchmark script (#1354) * add smolvla to the supported policies to run tests (: * add: chunk-level access for the policy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add: smolvla in availables * remove: smolvla from library supported policies * fix: change env for training, xarm is broken as of now * add: predict_action_chunk to all supported policies * fix: add robot type constants * add: predict action chunk in base policy class * restore original Makefile * fix: minor * fix: dict keys come from lerobot/constants * fix: improve act encapsulation, properly supporting temporal ensembling * fix: smolvla action chunking * fix: very minor, but very annoying * fix: minor * fix minor naming Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: refactoring inference for single actions and chunks into different components * fix: minor * fix: temporal ensembling * fix: moving populate queues out of modular component for batch preparation * fix: minor for CI * fix: smovla debug * fix: reward classifier, maybe the last policy lacking? --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Caroline Pascal <caroline8.pascal@gmail.com> Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: omahs <73983677+omahs@users.noreply.github.com> Co-authored-by: CharlesCNorton <135471798+CharlesCNorton@users.noreply.github.com> Co-authored-by: masato-ka <jp6uzv@gmail.com> Co-authored-by: Ragnar <rodiondenmark@gmail.com> Co-authored-by: mshukor <mustafa.shukor97@gmail.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Ben Zhang <5977478+ben-z@users.noreply.github.com> Co-authored-by: Pepijn <pepijn@huggingface.co> Co-authored-by: Dhruva <51377003+utterwqlnut@users.noreply.github.com> Co-authored-by: Daisuke Sato <tiryoh@gmail.com> Co-authored-by: Sarunas Kalade <sarunas.kalade@amd.com> Co-authored-by: koenvanwijk <koenvanwijk@users.noreply.github.com> Co-authored-by: Yushun Xiang <73413365+YushunXiang@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: tidely <43219534+tidely@users.noreply.github.com> Co-authored-by: David <17435126+DavidLMS@users.noreply.github.com>
2025-06-27 10:19:19 +02:00
self._queues[ACTION].extend(actions.transpose(0, 1))
2024-04-15 19:06:44 +01:00
Add direct access to action chunks (#1020) * fix: sharing predicted chunk with user * [pre-commit.ci] pre-commit autoupdate (#1011) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Revert "[pre-commit.ci] pre-commit autoupdate" (#1025) * fix(ci): Pin draccus (<0.10.0) and torch (<2.7) to fix pipeline (#1022) Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix(ci): Pin `torchcodec` (==0.2.1) to fix pipeline temporarly (#1030) * Update tutorial (#1021) Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Add description motor order SO-101 leader (#1051) * feat(encoding): switching to PyAV for ffmpeg related tasks (#983) * feat(docs): Add new docs build process (#1046) Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * Docs: adapt text + fix video code (#1064) * Fix typos (#1070) * docs: minor corrections and clean-up (#1089) * Update 10_use_so100.md; use diff syntax (#944) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * Update 12_use_so101.md (#1081) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * bug fix for #1071 When --display_data=true, Failed running control_robot. (#1073) * Add editable -e for feetech install command (#1133) * Fix: emptying action queue between resets (#1117) * fix: typos and grammar (#1148) * Update README.md (#1160) * Update README.md (#1163) * [Fix] Unpin torch beyond 2.6.0 & torchcodec beyond 0.2.1 (#1127) * (hotfix): nightly CI by clipping pymunk version below 7.0.0 (#1182) * [pre-commit.ci] pre-commit autoupdate (#1048) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * Add SmolVLA (#1175) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: fracapuano <francesco.capuano@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> * Fix SmolVLA loss not sent to wandb (#1198) * Hardware API redesign (#777) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: Pepijn <pepijn@huggingface.co> * fix(smolvla): update record.py, fix populate_queues and remove unused dependencies (#1208) * replaced OBS_ROBOT with OBS_STATE constant (#1211) * Fix test_teleoperate (#1216) * Fix LeKiwi example (#1217) * Fix smolVLA dependencies (#1218) * fix(pyserial): adding pyserial dependency to global ones (#1219) * Update SmolVLA README.md (#1228) * Fix unable to set camera width/height to non-default (#1225) * Update tutorial link (#1250) * update KochFollower.get_observation() so it returns same observation structure as SO101 (#1248) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [pre-commit.ci] pre-commit autoupdate (#1185) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Proposal for fix for enter_pressed on Windows (#1230) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix: update pi0 dependency version constraint (#1247) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Match motor names with ids lekiwi (#1261) * fix issues: checkpoints keys mismatch and 'task' tokenisation in smolvla (#1256) Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): update realsense documentation (#1268) * Use HF Papers (#1120) * Skip normalization parameters in load_smolvla (#1274) * fix(record): no teleop needed when running with policy (#1284) * Port HIL SERL (#644) Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): SmolVLA fine-tuning getting started (#1201) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Francesco Capuano <francesco_capuano@aol.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * chore(teleop): print calibration path saved (#1286) * chore(dependencies): add gamepad support with pygame and hidapi (#1287) * Robot integration tutorial (#1285) * fix(docs): update send_feedback docstrings * Add sim tutorial, fix lekiwi motor config, add notebook links (#1275) Co-authored-by: AdilZouitine <adilzouitinegm@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> * Fixes on robot integration tutorial (#1290) * Add keyboard teleop device to control the end effector robot (#1289) * Improve type hints (#1293) * fix(record): no teleop arg in reset environment (#1294) * `learner.py` import so101_leader instead of so100 (#1295) Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> * Fixing `PI0` Policy (#1297) * `gym_manipulator.py` Remove None value action_intervention of BaseLeaderTeleoperator (#1299) * (chore): incorrect resume parameter in recording documentation (#1301) * Update lekiwi.mdx (#1229) * bump `pi0` and `hil` transformers version (#1298) * docs: fix imitation learning robots docs command (#1308) * fix(benchmarks): remove .numpy() from frame in benchmark script (#1354) * add smolvla to the supported policies to run tests (: * add: chunk-level access for the policy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add: smolvla in availables * remove: smolvla from library supported policies * fix: change env for training, xarm is broken as of now * add: predict_action_chunk to all supported policies * fix: add robot type constants * add: predict action chunk in base policy class * restore original Makefile * fix: minor * fix: dict keys come from lerobot/constants * fix: improve act encapsulation, properly supporting temporal ensembling * fix: smolvla action chunking * fix: very minor, but very annoying * fix: minor * fix minor naming Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: refactoring inference for single actions and chunks into different components * fix: minor * fix: temporal ensembling * fix: moving populate queues out of modular component for batch preparation * fix: minor for CI * fix: smovla debug * fix: reward classifier, maybe the last policy lacking? --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Caroline Pascal <caroline8.pascal@gmail.com> Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: omahs <73983677+omahs@users.noreply.github.com> Co-authored-by: CharlesCNorton <135471798+CharlesCNorton@users.noreply.github.com> Co-authored-by: masato-ka <jp6uzv@gmail.com> Co-authored-by: Ragnar <rodiondenmark@gmail.com> Co-authored-by: mshukor <mustafa.shukor97@gmail.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Ben Zhang <5977478+ben-z@users.noreply.github.com> Co-authored-by: Pepijn <pepijn@huggingface.co> Co-authored-by: Dhruva <51377003+utterwqlnut@users.noreply.github.com> Co-authored-by: Daisuke Sato <tiryoh@gmail.com> Co-authored-by: Sarunas Kalade <sarunas.kalade@amd.com> Co-authored-by: koenvanwijk <koenvanwijk@users.noreply.github.com> Co-authored-by: Yushun Xiang <73413365+YushunXiang@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: tidely <43219534+tidely@users.noreply.github.com> Co-authored-by: David <17435126+DavidLMS@users.noreply.github.com>
2025-06-27 10:19:19 +02:00
action = self._queues[ACTION].popleft()
2024-04-15 19:06:44 +01:00
return action
def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, None]:
2024-04-17 16:21:37 +01:00
"""Run the batch through the model and compute the loss for training or validation."""
if self.config.image_features:
batch = dict(batch) # shallow copy so that adding a key doesn't modify the original
Add direct access to action chunks (#1020) * fix: sharing predicted chunk with user * [pre-commit.ci] pre-commit autoupdate (#1011) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Revert "[pre-commit.ci] pre-commit autoupdate" (#1025) * fix(ci): Pin draccus (<0.10.0) and torch (<2.7) to fix pipeline (#1022) Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix(ci): Pin `torchcodec` (==0.2.1) to fix pipeline temporarly (#1030) * Update tutorial (#1021) Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Add description motor order SO-101 leader (#1051) * feat(encoding): switching to PyAV for ffmpeg related tasks (#983) * feat(docs): Add new docs build process (#1046) Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * Docs: adapt text + fix video code (#1064) * Fix typos (#1070) * docs: minor corrections and clean-up (#1089) * Update 10_use_so100.md; use diff syntax (#944) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * Update 12_use_so101.md (#1081) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> * bug fix for #1071 When --display_data=true, Failed running control_robot. (#1073) * Add editable -e for feetech install command (#1133) * Fix: emptying action queue between resets (#1117) * fix: typos and grammar (#1148) * Update README.md (#1160) * Update README.md (#1163) * [Fix] Unpin torch beyond 2.6.0 & torchcodec beyond 0.2.1 (#1127) * (hotfix): nightly CI by clipping pymunk version below 7.0.0 (#1182) * [pre-commit.ci] pre-commit autoupdate (#1048) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * Add SmolVLA (#1175) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: fracapuano <francesco.capuano@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> * Fix SmolVLA loss not sent to wandb (#1198) * Hardware API redesign (#777) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: Pepijn <pepijn@huggingface.co> * fix(smolvla): update record.py, fix populate_queues and remove unused dependencies (#1208) * replaced OBS_ROBOT with OBS_STATE constant (#1211) * Fix test_teleoperate (#1216) * Fix LeKiwi example (#1217) * Fix smolVLA dependencies (#1218) * fix(pyserial): adding pyserial dependency to global ones (#1219) * Update SmolVLA README.md (#1228) * Fix unable to set camera width/height to non-default (#1225) * Update tutorial link (#1250) * update KochFollower.get_observation() so it returns same observation structure as SO101 (#1248) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [pre-commit.ci] pre-commit autoupdate (#1185) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * Proposal for fix for enter_pressed on Windows (#1230) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> * fix: update pi0 dependency version constraint (#1247) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Match motor names with ids lekiwi (#1261) * fix issues: checkpoints keys mismatch and 'task' tokenisation in smolvla (#1256) Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): update realsense documentation (#1268) * Use HF Papers (#1120) * Skip normalization parameters in load_smolvla (#1274) * fix(record): no teleop needed when running with policy (#1284) * Port HIL SERL (#644) Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> * fix(docs): SmolVLA fine-tuning getting started (#1201) Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Francesco Capuano <francesco_capuano@aol.com> Co-authored-by: Steven Palma <steven.palma@huggingface.co> * chore(teleop): print calibration path saved (#1286) * chore(dependencies): add gamepad support with pygame and hidapi (#1287) * Robot integration tutorial (#1285) * fix(docs): update send_feedback docstrings * Add sim tutorial, fix lekiwi motor config, add notebook links (#1275) Co-authored-by: AdilZouitine <adilzouitinegm@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> * Fixes on robot integration tutorial (#1290) * Add keyboard teleop device to control the end effector robot (#1289) * Improve type hints (#1293) * fix(record): no teleop arg in reset environment (#1294) * `learner.py` import so101_leader instead of so100 (#1295) Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> * Fixing `PI0` Policy (#1297) * `gym_manipulator.py` Remove None value action_intervention of BaseLeaderTeleoperator (#1299) * (chore): incorrect resume parameter in recording documentation (#1301) * Update lekiwi.mdx (#1229) * bump `pi0` and `hil` transformers version (#1298) * docs: fix imitation learning robots docs command (#1308) * fix(benchmarks): remove .numpy() from frame in benchmark script (#1354) * add smolvla to the supported policies to run tests (: * add: chunk-level access for the policy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add: smolvla in availables * remove: smolvla from library supported policies * fix: change env for training, xarm is broken as of now * add: predict_action_chunk to all supported policies * fix: add robot type constants * add: predict action chunk in base policy class * restore original Makefile * fix: minor * fix: dict keys come from lerobot/constants * fix: improve act encapsulation, properly supporting temporal ensembling * fix: smolvla action chunking * fix: very minor, but very annoying * fix: minor * fix minor naming Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: refactoring inference for single actions and chunks into different components * fix: minor * fix: temporal ensembling * fix: moving populate queues out of modular component for batch preparation * fix: minor for CI * fix: smovla debug * fix: reward classifier, maybe the last policy lacking? --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: imstevenpmwork <steven.palma@huggingface.co> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> Co-authored-by: Caroline Pascal <caroline8.pascal@gmail.com> Co-authored-by: Mishig Davaadorj <dmishig@gmail.com> Co-authored-by: omahs <73983677+omahs@users.noreply.github.com> Co-authored-by: CharlesCNorton <135471798+CharlesCNorton@users.noreply.github.com> Co-authored-by: masato-ka <jp6uzv@gmail.com> Co-authored-by: Ragnar <rodiondenmark@gmail.com> Co-authored-by: mshukor <mustafa.shukor97@gmail.com> Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Ben Zhang <5977478+ben-z@users.noreply.github.com> Co-authored-by: Pepijn <pepijn@huggingface.co> Co-authored-by: Dhruva <51377003+utterwqlnut@users.noreply.github.com> Co-authored-by: Daisuke Sato <tiryoh@gmail.com> Co-authored-by: Sarunas Kalade <sarunas.kalade@amd.com> Co-authored-by: koenvanwijk <koenvanwijk@users.noreply.github.com> Co-authored-by: Yushun Xiang <73413365+YushunXiang@users.noreply.github.com> Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz> Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Eugene Mironov <helper2424@gmail.com> Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com> Co-authored-by: Ke Wang <superwk1017@gmail.com> Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com> Co-authored-by: tidely <43219534+tidely@users.noreply.github.com> Co-authored-by: David <17435126+DavidLMS@users.noreply.github.com>
2025-06-27 10:19:19 +02:00
batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
loss = self.diffusion.compute_loss(batch)
# no output_dict so returning None
return loss, None
2024-04-15 19:06:44 +01:00
def _make_noise_scheduler(name: str, **kwargs: dict) -> DDPMScheduler | DDIMScheduler:
"""
Factory for noise scheduler instances of the requested type. All kwargs are passed
to the scheduler.
"""
if name == "DDPM":
return DDPMScheduler(**kwargs)
elif name == "DDIM":
return DDIMScheduler(**kwargs)
else:
raise ValueError(f"Unsupported noise scheduler type {name}")
class DiffusionModel(nn.Module):
def __init__(self, config: DiffusionConfig):
2024-04-15 19:06:44 +01:00
super().__init__()
self.config = config
2024-04-15 19:06:44 +01:00
# Build observation encoders (depending on which observations are provided).
global_cond_dim = self.config.robot_state_feature.shape[0]
if self.config.image_features:
num_images = len(self.config.image_features)
if self.config.use_separate_rgb_encoder_per_camera:
encoders = [DiffusionRgbEncoder(config) for _ in range(num_images)]
self.rgb_encoder = nn.ModuleList(encoders)
global_cond_dim += encoders[0].feature_dim * num_images
else:
self.rgb_encoder = DiffusionRgbEncoder(config)
global_cond_dim += self.rgb_encoder.feature_dim * num_images
if self.config.env_state_feature:
global_cond_dim += self.config.env_state_feature.shape[0]
self.unet = DiffusionConditionalUnet1d(config, global_cond_dim=global_cond_dim * config.n_obs_steps)
2024-04-15 19:06:44 +01:00
self.noise_scheduler = _make_noise_scheduler(
config.noise_scheduler_type,
num_train_timesteps=config.num_train_timesteps,
beta_start=config.beta_start,
beta_end=config.beta_end,
beta_schedule=config.beta_schedule,
clip_sample=config.clip_sample,
clip_sample_range=config.clip_sample_range,
prediction_type=config.prediction_type,
2024-04-16 12:51:32 +01:00
)
2024-04-15 19:06:44 +01:00
if config.num_inference_steps is None:
2024-04-16 12:51:32 +01:00
self.num_inference_steps = self.noise_scheduler.config.num_train_timesteps
else:
self.num_inference_steps = config.num_inference_steps
2024-04-15 19:06:44 +01:00
# ========= inference ============
2024-04-16 12:51:32 +01:00
def conditional_sample(
self,
batch_size: int,
global_cond: Tensor | None = None,
generator: torch.Generator | None = None,
noise: Tensor | None = None,
2024-04-16 12:51:32 +01:00
) -> Tensor:
2024-04-15 19:06:44 +01:00
device = get_device_from_parameters(self)
dtype = get_dtype_from_parameters(self)
# Sample prior.
sample = (
noise
if noise is not None
else torch.randn(
size=(batch_size, self.config.horizon, self.config.action_feature.shape[0]),
dtype=dtype,
device=device,
generator=generator,
)
2024-04-15 19:06:44 +01:00
)
self.noise_scheduler.set_timesteps(self.num_inference_steps)
for t in self.noise_scheduler.timesteps:
# Predict model output.
model_output = self.unet(
sample,
torch.full(sample.shape[:1], t, dtype=torch.long, device=sample.device),
global_cond=global_cond,
)
# Compute previous image: x_t -> x_t-1
sample = self.noise_scheduler.step(model_output, t, sample, generator=generator).prev_sample
return sample
def _prepare_global_conditioning(self, batch: dict[str, Tensor]) -> Tensor:
"""Encode image features and concatenate them all together along with the state vector."""
batch_size, n_obs_steps = batch[OBS_STATE].shape[:2]
global_cond_feats = [batch[OBS_STATE]]
# Extract image features.
if self.config.image_features:
if self.config.use_separate_rgb_encoder_per_camera:
# Combine batch and sequence dims while rearranging to make the camera index dimension first.
images_per_camera = einops.rearrange(batch[OBS_IMAGES], "b s n ... -> n (b s) ...")
img_features_list = torch.cat(
[
encoder(images)
for encoder, images in zip(self.rgb_encoder, images_per_camera, strict=True)
]
)
# Separate batch and sequence dims back out. The camera index dim gets absorbed into the
# feature dim (effectively concatenating the camera features).
img_features = einops.rearrange(
img_features_list, "(n b s) ... -> b s (n ...)", b=batch_size, s=n_obs_steps
)
else:
# Combine batch, sequence, and "which camera" dims before passing to shared encoder.
img_features = self.rgb_encoder(
einops.rearrange(batch[OBS_IMAGES], "b s n ... -> (b s n) ...")
)
# Separate batch dim and sequence dim back out. The camera index dim gets absorbed into the
# feature dim (effectively concatenating the camera features).
img_features = einops.rearrange(
img_features, "(b s n) ... -> b s (n ...)", b=batch_size, s=n_obs_steps
)
global_cond_feats.append(img_features)
if self.config.env_state_feature:
global_cond_feats.append(batch[OBS_ENV_STATE])
# Concatenate features then flatten to (B, global_cond_dim).
return torch.cat(global_cond_feats, dim=-1).flatten(start_dim=1)
def generate_actions(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor:
2024-04-15 19:06:44 +01:00
"""
This function expects `batch` to have:
2024-04-15 19:06:44 +01:00
{
"observation.state": (B, n_obs_steps, state_dim)
"observation.images": (B, n_obs_steps, num_cameras, C, H, W)
AND/OR
"observation.environment_state": (B, n_obs_steps, environment_dim)
2024-04-15 19:06:44 +01:00
}
"""
batch_size, n_obs_steps = batch[OBS_STATE].shape[:2]
assert n_obs_steps == self.config.n_obs_steps
2024-04-15 19:06:44 +01:00
# Encode image features and concatenate them all together along with the state vector.
global_cond = self._prepare_global_conditioning(batch) # (B, global_cond_dim)
2024-04-15 19:06:44 +01:00
# run sampling
actions = self.conditional_sample(batch_size, global_cond=global_cond, noise=noise)
2024-04-15 19:06:44 +01:00
# Extract `n_action_steps` steps worth of actions (from the current observation).
start = n_obs_steps - 1
end = start + self.config.n_action_steps
2024-04-16 12:51:32 +01:00
actions = actions[:, start:end]
2024-04-15 19:06:44 +01:00
2024-04-16 12:51:32 +01:00
return actions
2024-04-15 19:06:44 +01:00
def compute_loss(self, batch: dict[str, Tensor]) -> Tensor:
"""
This function expects `batch` to have (at least):
{
"observation.state": (B, n_obs_steps, state_dim)
"observation.images": (B, n_obs_steps, num_cameras, C, H, W)
AND/OR
"observation.environment_state": (B, n_obs_steps, environment_dim)
2024-04-15 19:06:44 +01:00
"action": (B, horizon, action_dim)
"action_is_pad": (B, horizon)
}
"""
# Input validation.
assert set(batch).issuperset({OBS_STATE, ACTION, "action_is_pad"})
assert OBS_IMAGES in batch or OBS_ENV_STATE in batch
n_obs_steps = batch[OBS_STATE].shape[1]
horizon = batch[ACTION].shape[1]
assert horizon == self.config.horizon
assert n_obs_steps == self.config.n_obs_steps
2024-04-15 19:06:44 +01:00
# Encode image features and concatenate them all together along with the state vector.
global_cond = self._prepare_global_conditioning(batch) # (B, global_cond_dim)
2024-04-15 19:06:44 +01:00
# Forward diffusion.
trajectory = batch[ACTION]
2024-04-15 19:06:44 +01:00
# Sample noise to add to the trajectory.
eps = torch.randn(trajectory.shape, device=trajectory.device)
# Sample a random noising timestep for each item in the batch.
timesteps = torch.randint(
low=0,
high=self.noise_scheduler.config.num_train_timesteps,
size=(trajectory.shape[0],),
device=trajectory.device,
).long()
# Add noise to the clean trajectories according to the noise magnitude at each timestep.
noisy_trajectory = self.noise_scheduler.add_noise(trajectory, eps, timesteps)
# Run the denoising network (that might denoise the trajectory, or attempt to predict the noise).
pred = self.unet(noisy_trajectory, timesteps, global_cond=global_cond)
# Compute the loss.
2024-04-16 12:51:32 +01:00
# The target is either the original trajectory, or the noise.
if self.config.prediction_type == "epsilon":
2024-04-15 19:06:44 +01:00
target = eps
elif self.config.prediction_type == "sample":
target = batch[ACTION]
2024-04-15 19:06:44 +01:00
else:
raise ValueError(f"Unsupported prediction type {self.config.prediction_type}")
2024-04-15 19:06:44 +01:00
loss = F.mse_loss(pred, target, reduction="none")
# Mask loss wherever the action is padded with copies (edges of the dataset trajectory).
if self.config.do_mask_loss_for_padding:
if "action_is_pad" not in batch:
raise ValueError(
"You need to provide 'action_is_pad' in the batch when "
f"{self.config.do_mask_loss_for_padding=}."
)
2024-04-15 19:06:44 +01:00
in_episode_bound = ~batch["action_is_pad"]
loss = loss * in_episode_bound.unsqueeze(-1)
return loss.mean()
class SpatialSoftmax(nn.Module):
"""
Spatial Soft Argmax operation described in "Deep Spatial Autoencoders for Visuomotor Learning" by Finn et al.
2025-06-12 09:58:59 +02:00
(https://huggingface.co/papers/1509.06113). A minimal port of the robomimic implementation.
At a high level, this takes 2D feature maps (from a convnet/ViT) and returns the "center of mass"
of activations of each channel, i.e., keypoints in the image space for the policy to focus on.
Example: take feature maps of size (512x10x12). We generate a grid of normalized coordinates (10x12x2):
-----------------------------------------------------
| (-1., -1.) | (-0.82, -1.) | ... | (1., -1.) |
| (-1., -0.78) | (-0.82, -0.78) | ... | (1., -0.78) |
| ... | ... | ... | ... |
| (-1., 1.) | (-0.82, 1.) | ... | (1., 1.) |
-----------------------------------------------------
This is achieved by applying channel-wise softmax over the activations (512x120) and computing the dot
product with the coordinates (120x2) to get expected points of maximal activation (512x2).
The example above results in 512 keypoints (corresponding to the 512 input channels). We can optionally
provide num_kp != None to control the number of keypoints. This is achieved by a first applying a learnable
linear mapping (in_channels, H, W) -> (num_kp, H, W).
"""
def __init__(self, input_shape, num_kp=None):
"""
Args:
input_shape (list): (C, H, W) input feature map shape.
num_kp (int): number of keypoints in output. If None, output will have the same number of channels as input.
"""
super().__init__()
assert len(input_shape) == 3
self._in_c, self._in_h, self._in_w = input_shape
if num_kp is not None:
self.nets = torch.nn.Conv2d(self._in_c, num_kp, kernel_size=1)
self._out_c = num_kp
else:
self.nets = None
self._out_c = self._in_c
# we could use torch.linspace directly but that seems to behave slightly differently than numpy
# and causes a small degradation in pc_success of pre-trained models.
pos_x, pos_y = np.meshgrid(np.linspace(-1.0, 1.0, self._in_w), np.linspace(-1.0, 1.0, self._in_h))
pos_x = torch.from_numpy(pos_x.reshape(self._in_h * self._in_w, 1)).float()
pos_y = torch.from_numpy(pos_y.reshape(self._in_h * self._in_w, 1)).float()
# register as buffer so it's moved to the correct device.
self.register_buffer("pos_grid", torch.cat([pos_x, pos_y], dim=1))
def forward(self, features: Tensor) -> Tensor:
"""
Args:
features: (B, C, H, W) input feature maps.
Returns:
(B, K, 2) image-space coordinates of keypoints.
"""
if self.nets is not None:
features = self.nets(features)
# [B, K, H, W] -> [B * K, H * W] where K is number of keypoints
features = features.reshape(-1, self._in_h * self._in_w)
# 2d softmax normalization
attention = F.softmax(features, dim=-1)
# [B * K, H * W] x [H * W, 2] -> [B * K, 2] for spatial coordinate mean in x and y dimensions
expected_xy = attention @ self.pos_grid
# reshape to [B, K, 2]
feature_keypoints = expected_xy.view(-1, self._out_c, 2)
return feature_keypoints
class DiffusionRgbEncoder(nn.Module):
"""Encodes an RGB image into a 1D feature vector.
2024-04-15 19:06:44 +01:00
Includes the ability to normalize and crop the image first.
"""
def __init__(self, config: DiffusionConfig):
2024-04-15 19:06:44 +01:00
super().__init__()
# Set up optional preprocessing.
if config.crop_shape is not None:
2024-04-15 19:06:44 +01:00
self.do_crop = True
# Always use center crop for eval
self.center_crop = torchvision.transforms.CenterCrop(config.crop_shape)
if config.crop_is_random:
self.maybe_random_crop = torchvision.transforms.RandomCrop(config.crop_shape)
2024-04-15 19:06:44 +01:00
else:
self.maybe_random_crop = self.center_crop
else:
self.do_crop = False
# Set up backbone.
backbone_model = getattr(torchvision.models, config.vision_backbone)(
weights=config.pretrained_backbone_weights
2024-04-16 12:51:32 +01:00
)
2024-04-15 19:06:44 +01:00
# Note: This assumes that the layer4 feature map is children()[-3]
# TODO(alexander-soare): Use a safer alternative.
self.backbone = nn.Sequential(*(list(backbone_model.children())[:-2]))
if config.use_group_norm:
if config.pretrained_backbone_weights:
2024-04-15 19:06:44 +01:00
raise ValueError(
"You can't replace BatchNorm in a pretrained model without ruining the weights!"
)
self.backbone = _replace_submodules(
root_module=self.backbone,
predicate=lambda x: isinstance(x, nn.BatchNorm2d),
func=lambda x: nn.GroupNorm(num_groups=x.num_features // 16, num_channels=x.num_features),
)
# Set up pooling and final layers.
# Use a dry run to get the feature map shape.
# The dummy input should take the number of image channels from `config.image_features` and it should
# use the height and width from `config.crop_shape` if it is provided, otherwise it should use the
# height and width from `config.image_features`.
# Note: we have a check in the config class to make sure all images have the same shape.
images_shape = next(iter(config.image_features.values())).shape
dummy_shape_h_w = config.crop_shape if config.crop_shape is not None else images_shape[1:]
dummy_shape = (1, images_shape[0], *dummy_shape_h_w)
feature_map_shape = get_output_shape(self.backbone, dummy_shape)[1:]
2024-05-08 14:57:29 +01:00
self.pool = SpatialSoftmax(feature_map_shape, num_kp=config.spatial_softmax_num_keypoints)
self.feature_dim = config.spatial_softmax_num_keypoints * 2
self.out = nn.Linear(config.spatial_softmax_num_keypoints * 2, self.feature_dim)
2024-04-15 19:06:44 +01:00
self.relu = nn.ReLU()
def forward(self, x: Tensor) -> Tensor:
"""
Args:
x: (B, C, H, W) image tensor with pixel values in [0, 1].
Returns:
(B, D) image feature.
"""
# Preprocess: maybe crop (if it was set up in the __init__).
2024-04-15 19:06:44 +01:00
if self.do_crop:
if self.training: # noqa: SIM108
x = self.maybe_random_crop(x)
else:
# Always use center crop for eval.
x = self.center_crop(x)
# Extract backbone feature.
x = torch.flatten(self.pool(self.backbone(x)), start_dim=1)
# Final linear layer with non-linearity.
x = self.relu(self.out(x))
return x
def _replace_submodules(
root_module: nn.Module, predicate: Callable[[nn.Module], bool], func: Callable[[nn.Module], nn.Module]
) -> nn.Module:
"""
Args:
root_module: The module for which the submodules need to be replaced
predicate: Takes a module as an argument and must return True if the that module is to be replaced.
func: Takes a module as an argument and returns a new module to replace it with.
Returns:
The root module with its submodules replaced.
"""
if predicate(root_module):
return func(root_module)
replace_list = [k.split(".") for k, m in root_module.named_modules(remove_duplicate=True) if predicate(m)]
for *parents, k in replace_list:
parent_module = root_module
if len(parents) > 0:
parent_module = root_module.get_submodule(".".join(parents))
if isinstance(parent_module, nn.Sequential):
src_module = parent_module[int(k)]
else:
src_module = getattr(parent_module, k)
tgt_module = func(src_module)
if isinstance(parent_module, nn.Sequential):
parent_module[int(k)] = tgt_module
else:
setattr(parent_module, k, tgt_module)
# verify that all BN are replaced
assert not any(predicate(m) for _, m in root_module.named_modules(remove_duplicate=True))
return root_module
class DiffusionSinusoidalPosEmb(nn.Module):
2024-04-16 12:51:32 +01:00
"""1D sinusoidal positional embeddings as in Attention is All You Need."""
def __init__(self, dim: int):
2024-04-15 19:06:44 +01:00
super().__init__()
self.dim = dim
2024-04-16 12:51:32 +01:00
def forward(self, x: Tensor) -> Tensor:
2024-04-15 19:06:44 +01:00
device = x.device
half_dim = self.dim // 2
emb = math.log(10000) / (half_dim - 1)
emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
2024-04-16 12:51:32 +01:00
emb = x.unsqueeze(-1) * emb.unsqueeze(0)
2024-04-15 19:06:44 +01:00
emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
return emb
class DiffusionConv1dBlock(nn.Module):
2024-04-15 19:06:44 +01:00
"""Conv1d --> GroupNorm --> Mish"""
def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8):
super().__init__()
self.block = nn.Sequential(
nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2),
nn.GroupNorm(n_groups, out_channels),
nn.Mish(),
)
def forward(self, x):
return self.block(x)
class DiffusionConditionalUnet1d(nn.Module):
2024-04-15 19:06:44 +01:00
"""A 1D convolutional UNet with FiLM modulation for conditioning.
2024-04-16 12:51:32 +01:00
Note: this removes local conditioning as compared to the original diffusion policy code.
2024-04-15 19:06:44 +01:00
"""
def __init__(self, config: DiffusionConfig, global_cond_dim: int):
2024-04-15 19:06:44 +01:00
super().__init__()
self.config = config
2024-04-15 19:06:44 +01:00
# Encoder for the diffusion timestep.
self.diffusion_step_encoder = nn.Sequential(
DiffusionSinusoidalPosEmb(config.diffusion_step_embed_dim),
nn.Linear(config.diffusion_step_embed_dim, config.diffusion_step_embed_dim * 4),
2024-04-15 19:06:44 +01:00
nn.Mish(),
nn.Linear(config.diffusion_step_embed_dim * 4, config.diffusion_step_embed_dim),
2024-04-15 19:06:44 +01:00
)
# The FiLM conditioning dimension.
cond_dim = config.diffusion_step_embed_dim + global_cond_dim
2024-04-15 19:06:44 +01:00
# In channels / out channels for each downsampling block in the Unet's encoder. For the decoder, we
# just reverse these.
in_out = [(config.action_feature.shape[0], config.down_dims[0])] + list(
zip(config.down_dims[:-1], config.down_dims[1:], strict=True)
2024-04-16 12:51:32 +01:00
)
2024-04-15 19:06:44 +01:00
# Unet encoder.
2024-04-16 12:51:32 +01:00
common_res_block_kwargs = {
"cond_dim": cond_dim,
"kernel_size": config.kernel_size,
"n_groups": config.n_groups,
"use_film_scale_modulation": config.use_film_scale_modulation,
2024-04-16 12:51:32 +01:00
}
2024-04-15 19:06:44 +01:00
self.down_modules = nn.ModuleList([])
for ind, (dim_in, dim_out) in enumerate(in_out):
is_last = ind >= (len(in_out) - 1)
self.down_modules.append(
nn.ModuleList(
[
DiffusionConditionalResidualBlock1d(dim_in, dim_out, **common_res_block_kwargs),
DiffusionConditionalResidualBlock1d(dim_out, dim_out, **common_res_block_kwargs),
2024-04-15 19:06:44 +01:00
# Downsample as long as it is not the last block.
nn.Conv1d(dim_out, dim_out, 3, 2, 1) if not is_last else nn.Identity(),
]
)
)
# Processing in the middle of the auto-encoder.
self.mid_modules = nn.ModuleList(
[
DiffusionConditionalResidualBlock1d(
config.down_dims[-1], config.down_dims[-1], **common_res_block_kwargs
),
DiffusionConditionalResidualBlock1d(
config.down_dims[-1], config.down_dims[-1], **common_res_block_kwargs
),
2024-04-15 19:06:44 +01:00
]
)
# Unet decoder.
self.up_modules = nn.ModuleList([])
for ind, (dim_out, dim_in) in enumerate(reversed(in_out[1:])):
is_last = ind >= (len(in_out) - 1)
self.up_modules.append(
nn.ModuleList(
[
2024-04-16 12:51:32 +01:00
# dim_in * 2, because it takes the encoder's skip connection as well
DiffusionConditionalResidualBlock1d(dim_in * 2, dim_out, **common_res_block_kwargs),
DiffusionConditionalResidualBlock1d(dim_out, dim_out, **common_res_block_kwargs),
2024-04-15 19:06:44 +01:00
# Upsample as long as it is not the last block.
nn.ConvTranspose1d(dim_out, dim_out, 4, 2, 1) if not is_last else nn.Identity(),
]
)
)
self.final_conv = nn.Sequential(
DiffusionConv1dBlock(config.down_dims[0], config.down_dims[0], kernel_size=config.kernel_size),
nn.Conv1d(config.down_dims[0], config.action_feature.shape[0], 1),
2024-04-15 19:06:44 +01:00
)
2024-04-16 12:51:32 +01:00
def forward(self, x: Tensor, timestep: Tensor | int, global_cond=None) -> Tensor:
2024-04-15 19:06:44 +01:00
"""
Args:
x: (B, T, input_dim) tensor for input to the Unet.
timestep: (B,) tensor of (timestep_we_are_denoising_from - 1).
global_cond: (B, global_cond_dim)
output: (B, T, input_dim)
Returns:
2024-04-16 12:51:32 +01:00
(B, T, input_dim) diffusion model prediction.
2024-04-15 19:06:44 +01:00
"""
# For 1D convolutions we'll need feature dimension first.
x = einops.rearrange(x, "b t d -> b d t")
timesteps_embed = self.diffusion_step_encoder(timestep)
# If there is a global conditioning feature, concatenate it to the timestep embedding.
if global_cond is not None:
global_feature = torch.cat([timesteps_embed, global_cond], axis=-1)
else:
global_feature = timesteps_embed
2024-04-16 12:51:32 +01:00
# Run encoder, keeping track of skip features to pass to the decoder.
2024-04-15 19:06:44 +01:00
encoder_skip_features: list[Tensor] = []
2024-04-16 12:51:32 +01:00
for resnet, resnet2, downsample in self.down_modules:
2024-04-15 19:06:44 +01:00
x = resnet(x, global_feature)
x = resnet2(x, global_feature)
encoder_skip_features.append(x)
x = downsample(x)
for mid_module in self.mid_modules:
x = mid_module(x, global_feature)
2024-04-16 12:51:32 +01:00
# Run decoder, using the skip features from the encoder.
for resnet, resnet2, upsample in self.up_modules:
2024-04-15 19:06:44 +01:00
x = torch.cat((x, encoder_skip_features.pop()), dim=1)
x = resnet(x, global_feature)
x = resnet2(x, global_feature)
x = upsample(x)
x = self.final_conv(x)
x = einops.rearrange(x, "b d t -> b t d")
return x
class DiffusionConditionalResidualBlock1d(nn.Module):
2024-04-15 19:06:44 +01:00
"""ResNet style 1D convolutional block with FiLM modulation for conditioning."""
def __init__(
self,
in_channels: int,
out_channels: int,
cond_dim: int,
kernel_size: int = 3,
n_groups: int = 8,
# Set to True to do scale modulation with FiLM as well as bias modulation (defaults to False meaning
# FiLM just modulates bias).
2024-04-16 12:51:32 +01:00
use_film_scale_modulation: bool = False,
2024-04-15 19:06:44 +01:00
):
super().__init__()
2024-04-16 12:51:32 +01:00
self.use_film_scale_modulation = use_film_scale_modulation
2024-04-15 19:06:44 +01:00
self.out_channels = out_channels
self.conv1 = DiffusionConv1dBlock(in_channels, out_channels, kernel_size, n_groups=n_groups)
2024-04-15 19:06:44 +01:00
2025-06-12 09:58:59 +02:00
# FiLM modulation (https://huggingface.co/papers/1709.07871) outputs per-channel bias and (maybe) scale.
2024-04-16 12:51:32 +01:00
cond_channels = out_channels * 2 if use_film_scale_modulation else out_channels
2024-04-15 19:06:44 +01:00
self.cond_encoder = nn.Sequential(nn.Mish(), nn.Linear(cond_dim, cond_channels))
self.conv2 = DiffusionConv1dBlock(out_channels, out_channels, kernel_size, n_groups=n_groups)
2024-04-15 19:06:44 +01:00
# A final convolution for dimension matching the residual (if needed).
self.residual_conv = (
nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else nn.Identity()
)
def forward(self, x: Tensor, cond: Tensor) -> Tensor:
"""
Args:
x: (B, in_channels, T)
cond: (B, cond_dim)
Returns:
(B, out_channels, T)
"""
out = self.conv1(x)
# Get condition embedding. Unsqueeze for broadcasting to `out`, resulting in (B, out_channels, 1).
cond_embed = self.cond_encoder(cond).unsqueeze(-1)
2024-04-16 12:51:32 +01:00
if self.use_film_scale_modulation:
2024-04-15 19:06:44 +01:00
# Treat the embedding as a list of scales and biases.
scale = cond_embed[:, : self.out_channels]
bias = cond_embed[:, self.out_channels :]
out = scale * out + bias
else:
# Treat the embedding as biases.
out = out + cond_embed
out = self.conv2(out)
out = out + self.residual_conv(x)
return out