mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-31 10:51:35 +00:00
Compare commits
38 Commits
user/fraca
...
smolvla-de
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
61580a8596 | ||
|
|
6c8f1f962b | ||
|
|
7848b15bfb | ||
|
|
008b592545 | ||
|
|
55a61259e8 | ||
|
|
e94d78f8a0 | ||
|
|
67c8d27e9c | ||
|
|
c8b51ef205 | ||
|
|
9779c58b07 | ||
|
|
5fbe4f9987 | ||
|
|
f47fd7aabf | ||
|
|
a9251e612f | ||
|
|
aec1b29d23 | ||
|
|
63ddfefa08 | ||
|
|
596e9050bd | ||
|
|
6047bbee10 | ||
|
|
1522e60f83 | ||
|
|
d4ee470b00 | ||
|
|
2b27084d63 | ||
|
|
ddb26b7189 | ||
|
|
483be9aac2 | ||
|
|
96550e4ad1 | ||
|
|
69901b9b6a | ||
|
|
c0146eed7f | ||
|
|
2f9ba4e2cc | ||
|
|
f3d931e1b2 | ||
|
|
0447604fce | ||
|
|
0b2285d1ec | ||
|
|
a989c79558 | ||
|
|
06450c6777 | ||
|
|
fe88c5942c | ||
|
|
a5727e37b4 | ||
|
|
c940676bdd | ||
|
|
2b71789e15 | ||
|
|
7c8be7fb9b | ||
|
|
b8637c09ec | ||
|
|
1688fa3a88 | ||
|
|
b852d15774 |
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -24,7 +24,7 @@ Examples:
|
||||
pytest -sx tests/test_stuff.py::test_something
|
||||
```
|
||||
```bash
|
||||
python lerobot/scripts/train.py --some.option=true
|
||||
python -m lerobot.scripts.train --some.option=true
|
||||
```
|
||||
|
||||
## SECTION TO REMOVE BEFORE SUBMITTING YOUR PR
|
||||
|
||||
4
.github/workflows/nightly-tests.yml
vendored
4
.github/workflows/nightly-tests.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
working-directory: /lerobot
|
||||
steps:
|
||||
- name: Tests
|
||||
run: pytest -v --cov=./lerobot --disable-warnings tests
|
||||
run: pytest -v --cov=./src/lerobot --disable-warnings tests
|
||||
|
||||
- name: Tests end-to-end
|
||||
run: make test-end-to-end
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Test
|
||||
run: pytest -v --cov=./lerobot --cov-report=xml --disable-warnings tests
|
||||
run: pytest -v --cov=./src/lerobot --cov-report=xml --disable-warnings tests
|
||||
# TODO(aliberts): Link with HF Codecov account
|
||||
# - name: Upload coverage reports to Codecov with GitHub Action
|
||||
# uses: codecov/codecov-action@v4
|
||||
|
||||
8
.github/workflows/test.yml
vendored
8
.github/workflows/test.yml
vendored
@@ -17,7 +17,7 @@ name: Tests
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "lerobot/**"
|
||||
- "src/**"
|
||||
- "tests/**"
|
||||
- "examples/**"
|
||||
- ".github/**"
|
||||
@@ -29,7 +29,7 @@ on:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "lerobot/**"
|
||||
- "src/**"
|
||||
- "tests/**"
|
||||
- "examples/**"
|
||||
- ".github/**"
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
uv run pytest tests -v --cov=./lerobot --durations=0 \
|
||||
uv run pytest tests -v --cov=./src/lerobot --durations=0 \
|
||||
-W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \
|
||||
-W ignore::UserWarning:torch.utils.data.dataloader:558 \
|
||||
-W ignore::UserWarning:gymnasium.utils.env_checker:247 \
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
uv run pytest tests -v --cov=./lerobot --durations=0 \
|
||||
uv run pytest tests -v --cov=./src/lerobot --durations=0 \
|
||||
-W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \
|
||||
-W ignore::UserWarning:torch.utils.data.dataloader:558 \
|
||||
-W ignore::UserWarning:gymnasium.utils.env_checker:247 \
|
||||
|
||||
@@ -67,7 +67,7 @@ post it.
|
||||
|
||||
## Adding new policies, datasets or environments
|
||||
|
||||
Look at our implementations for [datasets](./lerobot/common/datasets/), [policies](./lerobot/common/policies/),
|
||||
Look at our implementations for [datasets](./src/lerobot/datasets/), [policies](./src/lerobot/policies/),
|
||||
environments ([aloha](https://github.com/huggingface/gym-aloha),
|
||||
[xarm](https://github.com/huggingface/gym-xarm),
|
||||
[pusht](https://github.com/huggingface/gym-pusht))
|
||||
|
||||
2
MANIFEST.in
Normal file
2
MANIFEST.in
Normal file
@@ -0,0 +1,2 @@
|
||||
include src/lerobot/templates/lerobot_modelcard_template.md
|
||||
include src/lerobot/datasets/card_template.md
|
||||
52
Makefile
52
Makefile
@@ -40,14 +40,17 @@ test-end-to-end:
|
||||
${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-eval
|
||||
${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train
|
||||
${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-eval
|
||||
${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-train
|
||||
${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-eval
|
||||
|
||||
test-act-ete-train:
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--policy.type=act \
|
||||
--policy.dim_model=64 \
|
||||
--policy.n_action_steps=20 \
|
||||
--policy.chunk_size=20 \
|
||||
--policy.device=$(DEVICE) \
|
||||
--policy.push_to_hub=false \
|
||||
--env.type=aloha \
|
||||
--env.episode_length=5 \
|
||||
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
|
||||
@@ -65,12 +68,12 @@ test-act-ete-train:
|
||||
--output_dir=tests/outputs/act/
|
||||
|
||||
test-act-ete-train-resume:
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--config_path=tests/outputs/act/checkpoints/000002/pretrained_model/train_config.json \
|
||||
--resume=true
|
||||
|
||||
test-act-ete-eval:
|
||||
python lerobot/scripts/eval.py \
|
||||
python -m lerobot.scripts.eval \
|
||||
--policy.path=tests/outputs/act/checkpoints/000004/pretrained_model \
|
||||
--policy.device=$(DEVICE) \
|
||||
--env.type=aloha \
|
||||
@@ -79,12 +82,13 @@ test-act-ete-eval:
|
||||
--eval.batch_size=1
|
||||
|
||||
test-diffusion-ete-train:
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--policy.type=diffusion \
|
||||
--policy.down_dims='[64,128,256]' \
|
||||
--policy.diffusion_step_embed_dim=32 \
|
||||
--policy.num_inference_steps=10 \
|
||||
--policy.device=$(DEVICE) \
|
||||
--policy.push_to_hub=false \
|
||||
--env.type=pusht \
|
||||
--env.episode_length=5 \
|
||||
--dataset.repo_id=lerobot/pusht \
|
||||
@@ -102,7 +106,7 @@ test-diffusion-ete-train:
|
||||
--output_dir=tests/outputs/diffusion/
|
||||
|
||||
test-diffusion-ete-eval:
|
||||
python lerobot/scripts/eval.py \
|
||||
python -m lerobot.scripts.eval \
|
||||
--policy.path=tests/outputs/diffusion/checkpoints/000002/pretrained_model \
|
||||
--policy.device=$(DEVICE) \
|
||||
--env.type=pusht \
|
||||
@@ -111,9 +115,10 @@ test-diffusion-ete-eval:
|
||||
--eval.batch_size=1
|
||||
|
||||
test-tdmpc-ete-train:
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--policy.type=tdmpc \
|
||||
--policy.device=$(DEVICE) \
|
||||
--policy.push_to_hub=false \
|
||||
--env.type=xarm \
|
||||
--env.task=XarmLift-v0 \
|
||||
--env.episode_length=5 \
|
||||
@@ -132,7 +137,7 @@ test-tdmpc-ete-train:
|
||||
--output_dir=tests/outputs/tdmpc/
|
||||
|
||||
test-tdmpc-ete-eval:
|
||||
python lerobot/scripts/eval.py \
|
||||
python -m lerobot.scripts.eval \
|
||||
--policy.path=tests/outputs/tdmpc/checkpoints/000002/pretrained_model \
|
||||
--policy.device=$(DEVICE) \
|
||||
--env.type=xarm \
|
||||
@@ -140,3 +145,36 @@ test-tdmpc-ete-eval:
|
||||
--env.task=XarmLift-v0 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1
|
||||
|
||||
|
||||
test-smolvla-ete-train:
|
||||
python -m lerobot.scripts.train \
|
||||
--policy.type=smolvla \
|
||||
--policy.n_action_steps=20 \
|
||||
--policy.chunk_size=20 \
|
||||
--policy.device=$(DEVICE) \
|
||||
--policy.push_to_hub=false \
|
||||
--env.type=aloha \
|
||||
--env.episode_length=5 \
|
||||
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
|
||||
--dataset.image_transforms.enable=true \
|
||||
--dataset.episodes="[0]" \
|
||||
--batch_size=2 \
|
||||
--steps=4 \
|
||||
--eval_freq=2 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1 \
|
||||
--save_freq=2 \
|
||||
--save_checkpoint=true \
|
||||
--log_freq=1 \
|
||||
--wandb.enable=false \
|
||||
--output_dir=tests/outputs/smolvla/
|
||||
|
||||
test-smolvla-ete-eval:
|
||||
python -m lerobot.scripts.eval \
|
||||
--policy.path=tests/outputs/smolvla/checkpoints/000004/pretrained_model \
|
||||
--policy.device=$(DEVICE) \
|
||||
--env.type=aloha \
|
||||
--env.episode_length=5 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1
|
||||
|
||||
44
README.md
44
README.md
@@ -130,7 +130,7 @@ pip install -e .
|
||||
```
|
||||
|
||||
> **NOTE:** If you encounter build errors, you may need to install additional dependencies (`cmake`, `build-essential`, and `ffmpeg libs`). On Linux, run:
|
||||
`sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev pkg-config`. For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg)
|
||||
`sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev`. For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg)
|
||||
|
||||
For simulations, 🤗 LeRobot comes with gymnasium environments that can be installed as extras:
|
||||
- [aloha](https://github.com/huggingface/gym-aloha)
|
||||
@@ -149,44 +149,20 @@ wandb login
|
||||
|
||||
(note: you will also need to enable WandB in the configuration. See below.)
|
||||
|
||||
## Walkthrough
|
||||
|
||||
```
|
||||
.
|
||||
├── examples # contains demonstration examples, start here to learn about LeRobot
|
||||
| └── advanced # contains even more examples for those who have mastered the basics
|
||||
├── lerobot
|
||||
| ├── configs # contains config classes with all options that you can override in the command line
|
||||
| ├── common # contains classes and utilities
|
||||
| | ├── datasets # various datasets of human demonstrations: aloha, pusht, xarm
|
||||
| | ├── envs # various sim environments: aloha, pusht, xarm
|
||||
| | ├── policies # various policies: act, diffusion, tdmpc
|
||||
| | ├── robot_devices # various real devices: dynamixel motors, opencv cameras, koch robots
|
||||
| | └── utils # various utilities
|
||||
| └── scripts # contains functions to execute via command line
|
||||
| ├── eval.py # load policy and evaluate it on an environment
|
||||
| ├── train.py # train a policy via imitation learning and/or reinforcement learning
|
||||
| ├── control_robot.py # teleoperate a real robot, record data, run a policy
|
||||
| ├── push_dataset_to_hub.py # convert your dataset into LeRobot dataset format and upload it to the Hugging Face hub
|
||||
| └── visualize_dataset.py # load a dataset and render its demonstrations
|
||||
├── outputs # contains results of scripts execution: logs, videos, model checkpoints
|
||||
└── tests # contains pytest utilities for continuous integration
|
||||
```
|
||||
|
||||
### Visualize datasets
|
||||
|
||||
Check out [example 1](./examples/1_load_lerobot_dataset.py) that illustrates how to use our dataset class which automatically downloads data from the Hugging Face hub.
|
||||
|
||||
You can also locally visualize episodes from a dataset on the hub by executing our script from the command line:
|
||||
```bash
|
||||
python lerobot/scripts/visualize_dataset.py \
|
||||
python -m lerobot.scripts.visualize_dataset \
|
||||
--repo-id lerobot/pusht \
|
||||
--episode-index 0
|
||||
```
|
||||
|
||||
or from a dataset in a local folder with the `root` option and the `--local-files-only` (in the following case the dataset will be searched for in `./my_local_data_dir/lerobot/pusht`)
|
||||
```bash
|
||||
python lerobot/scripts/visualize_dataset.py \
|
||||
python -m lerobot.scripts.visualize_dataset \
|
||||
--repo-id lerobot/pusht \
|
||||
--root ./my_local_data_dir \
|
||||
--local-files-only 1 \
|
||||
@@ -199,7 +175,7 @@ It will open `rerun.io` and display the camera streams, robot states and actions
|
||||
https://github-production-user-asset-6210df.s3.amazonaws.com/4681518/328035972-fd46b787-b532-47e2-bb6f-fd536a55a7ed.mov?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20240505%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240505T172924Z&X-Amz-Expires=300&X-Amz-Signature=d680b26c532eeaf80740f08af3320d22ad0b8a4e4da1bcc4f33142c15b509eda&X-Amz-SignedHeaders=host&actor_id=24889239&key_id=0&repo_id=748713144
|
||||
|
||||
|
||||
Our script can also visualize datasets stored on a distant server. See `python lerobot/scripts/visualize_dataset.py --help` for more instructions.
|
||||
Our script can also visualize datasets stored on a distant server. See `python -m lerobot.scripts.visualize_dataset --help` for more instructions.
|
||||
|
||||
### The `LeRobotDataset` format
|
||||
|
||||
@@ -252,7 +228,7 @@ Check out [example 2](./examples/2_evaluate_pretrained_policy.py) that illustrat
|
||||
|
||||
We also provide a more capable script to parallelize the evaluation over multiple environments during the same rollout. Here is an example with a pretrained model hosted on [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht):
|
||||
```bash
|
||||
python lerobot/scripts/eval.py \
|
||||
python -m lerobot.scripts.eval \
|
||||
--policy.path=lerobot/diffusion_pusht \
|
||||
--env.type=pusht \
|
||||
--eval.batch_size=10 \
|
||||
@@ -264,10 +240,10 @@ python lerobot/scripts/eval.py \
|
||||
Note: After training your own policy, you can re-evaluate the checkpoints with:
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/eval.py --policy.path={OUTPUT_DIR}/checkpoints/last/pretrained_model
|
||||
python -m lerobot.scripts.eval --policy.path={OUTPUT_DIR}/checkpoints/last/pretrained_model
|
||||
```
|
||||
|
||||
See `python lerobot/scripts/eval.py --help` for more instructions.
|
||||
See `python -m lerobot.scripts.eval --help` for more instructions.
|
||||
|
||||
### Train your own policy
|
||||
|
||||
@@ -279,14 +255,14 @@ A link to the wandb logs for the run will also show up in yellow in your termina
|
||||
|
||||

|
||||
|
||||
Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. You may use `--eval.n_episodes=500` to evaluate on more episodes than the default. Or, after training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python lerobot/scripts/eval.py --help` for more instructions.
|
||||
Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. You may use `--eval.n_episodes=500` to evaluate on more episodes than the default. Or, after training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python -m lerobot.scripts.eval --help` for more instructions.
|
||||
|
||||
#### Reproduce state-of-the-art (SOTA)
|
||||
|
||||
We provide some pretrained policies on our [hub page](https://huggingface.co/lerobot) that can achieve state-of-the-art performances.
|
||||
You can reproduce their training by loading the config from their run. Simply running:
|
||||
```bash
|
||||
python lerobot/scripts/train.py --config_path=lerobot/diffusion_pusht
|
||||
python -m lerobot.scripts.train --config_path=lerobot/diffusion_pusht
|
||||
```
|
||||
reproduces SOTA results for Diffusion Policy on the PushT task.
|
||||
|
||||
@@ -312,7 +288,7 @@ python lerobot/scripts/push_dataset_to_hub.py \
|
||||
|
||||
See `python lerobot/scripts/push_dataset_to_hub.py --help` for more instructions.
|
||||
|
||||
If your dataset format is not supported, implement your own in `lerobot/common/datasets/push_dataset_to_hub/${raw_format}_format.py` by copying examples like [pusht_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py), [umi_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py), [aloha_hdf5](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py), or [xarm_pkl](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py). -->
|
||||
If your dataset format is not supported, implement your own in `lerobot/datasets/push_dataset_to_hub/${raw_format}_format.py` by copying examples like [pusht_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/pusht_zarr_format.py), [umi_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/umi_zarr_format.py), [aloha_hdf5](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/aloha_hdf5_format.py), or [xarm_pkl](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/xarm_pkl_format.py). -->
|
||||
|
||||
|
||||
### Add a pretrained policy
|
||||
|
||||
2
benchmarks/video/capture_camera_feed.py
Normal file → Executable file
2
benchmarks/video/capture_camera_feed.py
Normal file → Executable file
@@ -55,7 +55,7 @@ def display_and_save_video_stream(output_dir: Path, fps: int, width: int, height
|
||||
if not ret:
|
||||
print("Error: Could not read frame.")
|
||||
break
|
||||
rr.log("video/stream", rr.Image(frame.numpy()), static=True)
|
||||
rr.log("video/stream", rr.Image(frame), static=True)
|
||||
cv2.imwrite(str(capture_dir / f"frame_{frame_index:06d}.png"), frame)
|
||||
frame_index += 1
|
||||
|
||||
|
||||
@@ -35,12 +35,12 @@ import torch
|
||||
from skimage.metrics import mean_squared_error, peak_signal_noise_ratio, structural_similarity
|
||||
from tqdm import tqdm
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.common.datasets.video_utils import (
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.video_utils import (
|
||||
decode_video_frames_torchvision,
|
||||
encode_video_frames,
|
||||
)
|
||||
from lerobot.common.utils.benchmark import TimeBenchmark
|
||||
from lerobot.utils.benchmark import TimeBenchmark
|
||||
|
||||
BASE_ENCODING = OrderedDict(
|
||||
[
|
||||
|
||||
@@ -22,7 +22,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
COPY . /lerobot
|
||||
WORKDIR /lerobot
|
||||
RUN /opt/venv/bin/pip install --upgrade --no-cache-dir pip \
|
||||
&& /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht]" \
|
||||
&& /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, smolvla]" \
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
# Execute in bash shell rather than python
|
||||
|
||||
@@ -21,4 +21,4 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
COPY . /lerobot
|
||||
WORKDIR /lerobot
|
||||
RUN /opt/venv/bin/pip install --upgrade --no-cache-dir pip \
|
||||
&& /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]"
|
||||
&& /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel, smolvla]"
|
||||
|
||||
@@ -8,7 +8,7 @@ To instantiate a camera, you need a camera identifier. This identifier might cha
|
||||
|
||||
To find the camera indices of the cameras plugged into your system, run the following script:
|
||||
```bash
|
||||
python lerobot/find_cameras.py opencv # or realsense for Intel Realsense cameras
|
||||
python -m lerobot.find_cameras opencv # or realsense for Intel Realsense cameras
|
||||
```
|
||||
|
||||
The output will look something like this if you have two cameras connected:
|
||||
@@ -44,9 +44,9 @@ Below are two examples, demonstrating how to work with the API.
|
||||
<hfoption id="Open CV Camera">
|
||||
|
||||
```python
|
||||
from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.common.cameras.opencv.camera_opencv import OpenCVCamera
|
||||
from lerobot.common.cameras.configs import ColorMode, Cv2Rotation
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.cameras.opencv.camera_opencv import OpenCVCamera
|
||||
from lerobot.cameras.configs import ColorMode, Cv2Rotation
|
||||
|
||||
# Construct an `OpenCVCameraConfig` with your desired FPS, resolution, color mode, and rotation.
|
||||
config = OpenCVCameraConfig(
|
||||
@@ -75,9 +75,9 @@ finally:
|
||||
<hfoption id="Intel Realsense Camera">
|
||||
|
||||
```python
|
||||
from lerobot.common.cameras.realsense.configuration_realsense import RealSenseCameraConfig
|
||||
from lerobot.common.cameras.realsense.camera_realsense import RealSenseCamera
|
||||
from lerobot.common.cameras.configs import ColorMode, Cv2Rotation
|
||||
from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig
|
||||
from lerobot.cameras.realsense.camera_realsense import RealSenseCamera
|
||||
from lerobot.cameras.configs import ColorMode, Cv2Rotation
|
||||
|
||||
# Create a `RealSenseCameraConfig` specifying your camera’s serial number and enabling depth.
|
||||
config = RealSenseCameraConfig(
|
||||
|
||||
@@ -24,6 +24,7 @@ This guide provides step-by-step instructions for training a robot policy using
|
||||
- A gamepad (recommended) or keyboard to control the robot
|
||||
- A Nvidia GPU
|
||||
- A real robot with a follower and leader arm (optional if you use the keyboard or the gamepad)
|
||||
- A URDF file for the robot for the kinematics package (check `lerobot/common/model/kinematics.py`)
|
||||
|
||||
## What kind of tasks can I train?
|
||||
|
||||
@@ -50,12 +51,12 @@ pip install -e ".[hilserl]"
|
||||
|
||||
### Understanding Configuration
|
||||
|
||||
The training process begins with proper configuration for the HILSerl environment. The configuration class of interest is `HILSerlRobotEnvConfig` in `lerobot/common/envs/configs.py`. Which is defined as:
|
||||
The training process begins with proper configuration for the HILSerl environment. The configuration class of interest is `HILSerlRobotEnvConfig` in `lerobot/envs/configs.py`. Which is defined as:
|
||||
|
||||
```python
|
||||
class HILSerlRobotEnvConfig(EnvConfig):
|
||||
robot: RobotConfig | None = None # Main robot agent (defined in `lerobot/common/robots`)
|
||||
teleop: TeleoperatorConfig | None = None # Teleoperator agent, e.g., gamepad or leader arm, (defined in `lerobot/common/teleoperators`)
|
||||
robot: RobotConfig | None = None # Main robot agent (defined in `lerobot/robots`)
|
||||
teleop: TeleoperatorConfig | None = None # Teleoperator agent, e.g., gamepad or leader arm, (defined in `lerobot/teleoperators`)
|
||||
wrapper: EnvTransformConfig | None = None # Environment wrapper settings; check `lerobot/scripts/server/gym_manipulator.py`
|
||||
fps: int = 10 # Control frequency
|
||||
name: str = "real_robot" # Environment name
|
||||
@@ -172,7 +173,7 @@ class SO100FollowerEndEffectorConfig(SO100FollowerConfig):
|
||||
)
|
||||
```
|
||||
|
||||
The `Teleoperator` defines the teleoperation device. You can check the list of available teleoperators in `lerobot/common/teleoperators`.
|
||||
The `Teleoperator` defines the teleoperation device. You can check the list of available teleoperators in `lerobot/teleoperators`.
|
||||
|
||||
**Setting up the Gamepad**
|
||||
|
||||
@@ -226,7 +227,7 @@ During the online training, press `space` to take over the policy and `space` ag
|
||||
Start the recording process, an example of the config file can be found [here](https://huggingface.co/datasets/aractingi/lerobot-example-config-files/blob/main/env_config_so100.json):
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/rl/gym_manipulator.py --config_path lerobot/configs/env_config_so100.json
|
||||
python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config_so100.json
|
||||
```
|
||||
|
||||
During recording:
|
||||
@@ -256,7 +257,7 @@ Note: If you already know the crop parameters, you can skip this step and just s
|
||||
Use the `crop_dataset_roi.py` script to interactively select regions of interest in your camera images:
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/rl/crop_dataset_roi.py --repo-id username/pick_lift_cube
|
||||
python -m lerobot.scripts.rl.crop_dataset_roi --repo-id username/pick_lift_cube
|
||||
```
|
||||
|
||||
1. For each camera view, the script will display the first frame
|
||||
@@ -313,7 +314,7 @@ Before training, you need to collect a dataset with labeled examples. The `recor
|
||||
To collect a dataset, you need to modify some parameters in the environment configuration based on HILSerlRobotEnvConfig.
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/rl/gym_manipulator.py --config_path lerobot/configs/reward_classifier_train_config.json
|
||||
python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/reward_classifier_train_config.json
|
||||
```
|
||||
|
||||
**Key Parameters for Data Collection**
|
||||
@@ -387,7 +388,7 @@ Example configuration for training the [reward classifier](https://huggingface.c
|
||||
To train the classifier, use the `train.py` script with your configuration:
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/train.py --config_path path/to/reward_classifier_train_config.json
|
||||
python -m lerobot.scripts.train --config_path path/to/reward_classifier_train_config.json
|
||||
```
|
||||
|
||||
**Deploying and Testing the Model**
|
||||
@@ -410,7 +411,7 @@ or set the argument in the json config file.
|
||||
|
||||
Run `gym_manipulator.py` to test the model.
|
||||
```bash
|
||||
python lerobot/scripts/rl/gym_manipulator.py --config_path path/to/env_config.json
|
||||
python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config.json
|
||||
```
|
||||
|
||||
The reward classifier will automatically provide rewards based on the visual input from the robot's cameras.
|
||||
@@ -422,17 +423,17 @@ The reward classifier will automatically provide rewards based on the visual inp
|
||||
|
||||
2. **Collect a dataset**:
|
||||
```bash
|
||||
python lerobot/scripts/rl/gym_manipulator.py --config_path lerobot/configs/env_config.json
|
||||
python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config.json
|
||||
```
|
||||
|
||||
3. **Train the classifier**:
|
||||
```bash
|
||||
python lerobot/scripts/train.py --config_path lerobot/configs/reward_classifier_train_config.json
|
||||
python -m lerobot.scripts.train --config_path src/lerobot/configs/reward_classifier_train_config.json
|
||||
```
|
||||
|
||||
4. **Test the classifier**:
|
||||
```bash
|
||||
python lerobot/scripts/rl/gym_manipulator.py --config_path lerobot/configs/env_config.json
|
||||
python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config.json
|
||||
```
|
||||
|
||||
### Training with Actor-Learner
|
||||
@@ -446,7 +447,7 @@ Create a training configuration file (example available [here](https://huggingfa
|
||||
1. Configure the policy settings (`type="sac"`, `device`, etc.)
|
||||
2. Set `dataset` to your cropped dataset
|
||||
3. Configure environment settings with crop parameters
|
||||
4. Check the other parameters related to SAC in [configuration_sac.py](https://github.com/huggingface/lerobot/blob/19bb621a7d0a31c20cd3cc08b1dbab68d3031454/lerobot/common/policies/sac/configuration_sac.py#L79).
|
||||
4. Check the other parameters related to SAC in [configuration_sac.py](https://github.com/huggingface/lerobot/blob/19bb621a7d0a31c20cd3cc08b1dbab68d3031454/lerobot/policies/sac/configuration_sac.py#L79).
|
||||
5. Verify that the `policy` config is correct with the right `input_features` and `output_features` for your task.
|
||||
|
||||
**Starting the Learner**
|
||||
@@ -454,7 +455,7 @@ Create a training configuration file (example available [here](https://huggingfa
|
||||
First, start the learner server process:
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/rl/learner.py --config_path lerobot/configs/train_config_hilserl_so100.json
|
||||
python -m lerobot.scripts.rl.learner --config_path src/lerobot/configs/train_config_hilserl_so100.json
|
||||
```
|
||||
|
||||
The learner:
|
||||
@@ -468,7 +469,7 @@ The learner:
|
||||
In a separate terminal, start the actor process with the same configuration:
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/rl/actor.py --config_path lerobot/configs/train_config_hilserl_so100.json
|
||||
python -m lerobot.scripts.rl.actor --config_path src/lerobot/configs/train_config_hilserl_so100.json
|
||||
```
|
||||
|
||||
The actor:
|
||||
|
||||
@@ -77,7 +77,7 @@ Important parameters:
|
||||
To run the environment, set mode to null:
|
||||
|
||||
```python
|
||||
python lerobot/scripts/rl/gym_manipulator.py --config_path path/to/gym_hil_env.json
|
||||
python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.json
|
||||
```
|
||||
|
||||
### Recording a Dataset
|
||||
@@ -85,7 +85,7 @@ python lerobot/scripts/rl/gym_manipulator.py --config_path path/to/gym_hil_env.j
|
||||
To collect a dataset, set the mode to `record` whilst defining the repo_id and number of episodes to record:
|
||||
|
||||
```python
|
||||
python lerobot/scripts/rl/gym_manipulator.py --config_path path/to/gym_hil_env.json
|
||||
python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.json
|
||||
```
|
||||
|
||||
### Training a Policy
|
||||
@@ -93,13 +93,13 @@ python lerobot/scripts/rl/gym_manipulator.py --config_path path/to/gym_hil_env.j
|
||||
To train a policy, checkout the configuration example available [here](https://huggingface.co/datasets/aractingi/lerobot-example-config-files/blob/main/train_gym_hil_env.json) and run the actor and learner servers:
|
||||
|
||||
```python
|
||||
python lerobot/scripts/rl/actor.py --config_path path/to/train_gym_hil_env.json
|
||||
python -m lerobot.scripts.rl.actor --config_path path/to/train_gym_hil_env.json
|
||||
```
|
||||
|
||||
In a different terminal, run the learner server:
|
||||
|
||||
```python
|
||||
python lerobot/scripts/rl/learner.py --config_path path/to/train_gym_hil_env.json
|
||||
python -m lerobot.scripts.rl.learner --config_path path/to/train_gym_hil_env.json
|
||||
```
|
||||
|
||||
The simulation environment provides a safe and repeatable way to develop and test your Human-In-the-Loop reinforcement learning components before deploying to real robots.
|
||||
|
||||
@@ -52,8 +52,8 @@ python -m lerobot.teleoperate \
|
||||
</hfoption>
|
||||
<hfoption id="API example">
|
||||
```python
|
||||
from lerobot.common.teleoperators.so101_leader import SO101LeaderConfig, SO101Leader
|
||||
from lerobot.common.robots.so101_follower import SO101FollowerConfig, SO101Follower
|
||||
from lerobot.teleoperators.so101_leader import SO101LeaderConfig, SO101Leader
|
||||
from lerobot.robots.so101_follower import SO101FollowerConfig, SO101Follower
|
||||
|
||||
robot_config = SO101FollowerConfig(
|
||||
port="/dev/tty.usbmodem58760431541",
|
||||
@@ -105,9 +105,9 @@ python -m lerobot.teleoperate \
|
||||
</hfoption>
|
||||
<hfoption id="API example">
|
||||
```python
|
||||
from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.common.teleoperators.koch_leader import KochLeaderConfig, KochLeader
|
||||
from lerobot.common.robots.koch_follower import KochFollowerConfig, KochFollower
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.teleoperators.koch_leader import KochLeaderConfig, KochLeader
|
||||
from lerobot.robots.koch_follower import KochFollowerConfig, KochFollower
|
||||
|
||||
camera_config = {
|
||||
"front": OpenCVCameraConfig(index_or_path=0, width=1920, height=1080, fps=30)
|
||||
@@ -154,7 +154,10 @@ HF_USER=$(huggingface-cli whoami | head -n 1)
|
||||
echo $HF_USER
|
||||
```
|
||||
|
||||
Now you can record a dataset. To record 2 episodes and upload your dataset to the hub, execute this command tailored to the SO101.
|
||||
Now you can record a dataset. To record 5 episodes and upload your dataset to the hub, adapt the code below for your robot and execute the command or API example.
|
||||
|
||||
<hfoptions id="record">
|
||||
<hfoption id="Command">
|
||||
```bash
|
||||
python -m lerobot.record \
|
||||
--robot.type=so101_follower \
|
||||
@@ -166,9 +169,109 @@ python -m lerobot.record \
|
||||
--teleop.id=my_awesome_leader_arm \
|
||||
--display_data=true \
|
||||
--dataset.repo_id=${HF_USER}/record-test \
|
||||
--dataset.num_episodes=2 \
|
||||
--dataset.num_episodes=5 \
|
||||
--dataset.single_task="Grab the black cube"
|
||||
```
|
||||
</hfoption>
|
||||
<hfoption id="API example">
|
||||
```python
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.utils import hw_to_dataset_features
|
||||
from lerobot.robots.so100_follower import SO100Follower, SO100FollowerConfig
|
||||
from lerobot.teleoperators.so100_leader.config_so100_leader import SO100LeaderConfig
|
||||
from lerobot.teleoperators.so100_leader.so100_leader import SO100Leader
|
||||
from lerobot.utils.control_utils import init_keyboard_listener
|
||||
from lerobot.utils.utils import log_say
|
||||
from lerobot.utils.visualization_utils import _init_rerun
|
||||
from lerobot.record import record_loop
|
||||
|
||||
NUM_EPISODES = 5
|
||||
FPS = 30
|
||||
EPISODE_TIME_SEC = 60
|
||||
RESET_TIME_SEC = 10
|
||||
TASK_DESCRIPTION = "My task description"
|
||||
|
||||
# Create the robot and teleoperator configurations
|
||||
camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
|
||||
robot_config = SO100FollowerConfig(
|
||||
port="/dev/tty.usbmodem58760434471", id="my_awesome_follower_arm", cameras=camera_config
|
||||
)
|
||||
teleop_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm")
|
||||
|
||||
# Initialize the robot and teleoperator
|
||||
robot = SO100Follower(robot_config)
|
||||
teleop = SO100Leader(teleop_config)
|
||||
|
||||
# Configure the dataset features
|
||||
action_features = hw_to_dataset_features(robot.action_features, "action")
|
||||
obs_features = hw_to_dataset_features(robot.observation_features, "observation")
|
||||
dataset_features = {**action_features, **obs_features}
|
||||
|
||||
# Create the dataset
|
||||
dataset = LeRobotDataset.create(
|
||||
repo_id="<hf_username>/<dataset_repo_id>",
|
||||
fps=FPS,
|
||||
features=dataset_features,
|
||||
robot_type=robot.name,
|
||||
use_videos=True,
|
||||
image_writer_threads=4,
|
||||
)
|
||||
|
||||
# Initialize the keyboard listener and rerun visualization
|
||||
_, events = init_keyboard_listener()
|
||||
_init_rerun(session_name="recording")
|
||||
|
||||
# Connect the robot and teleoperator
|
||||
robot.connect()
|
||||
teleop.connect()
|
||||
|
||||
episode_idx = 0
|
||||
while episode_idx < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
teleop=teleop,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
teleop=teleop,
|
||||
control_time_s=RESET_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
)
|
||||
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-recording episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
|
||||
dataset.save_episode()
|
||||
episode_idx += 1
|
||||
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
teleop.disconnect()
|
||||
dataset.push_to_hub()
|
||||
```
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
#### Dataset upload
|
||||
Locally, your dataset is stored in this folder: `~/.cache/huggingface/lerobot/{repo-id}`. At the end of data recording, your dataset will be uploaded on your Hugging Face page (e.g. https://huggingface.co/datasets/cadene/so101_test) that you can obtain by running:
|
||||
@@ -190,7 +293,7 @@ The `record` function provides a suite of tools for capturing and managing data
|
||||
|
||||
##### 2. Checkpointing and Resuming
|
||||
- Checkpoints are automatically created during recording.
|
||||
- If an issue occurs, you can resume by re-running the same command with `--control.resume=true`.
|
||||
- If an issue occurs, you can resume by re-running the same command with `--resume=true`.
|
||||
- To start recording from scratch, **manually delete** the dataset directory.
|
||||
|
||||
##### 3. Recording Parameters
|
||||
@@ -233,7 +336,10 @@ echo ${HF_USER}/so101_test
|
||||
|
||||
A useful feature is the `replay` function, which allows you to replay any episode that you've recorded or episodes from any dataset out there. This function helps you test the repeatability of your robot's actions and assess transferability across robots of the same model.
|
||||
|
||||
You can replay the first episode on your robot with:
|
||||
You can replay the first episode on your robot with either the command below or with the API example:
|
||||
|
||||
<hfoptions id="replay">
|
||||
<hfoption id="Command">
|
||||
```bash
|
||||
python -m lerobot.replay \
|
||||
--robot.type=so101_follower \
|
||||
@@ -242,25 +348,62 @@ python -m lerobot.replay \
|
||||
--dataset.repo_id=${HF_USER}/record-test \
|
||||
--dataset.episode=0 # choose the episode you want to replay
|
||||
```
|
||||
</hfoption>
|
||||
<hfoption id="API example">
|
||||
```python
|
||||
import time
|
||||
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig
|
||||
from lerobot.robots.so100_follower.so100_follower import SO100Follower
|
||||
from lerobot.utils.robot_utils import busy_wait
|
||||
from lerobot.utils.utils import log_say
|
||||
|
||||
episode_idx = 0
|
||||
|
||||
robot_config = SO100FollowerConfig(port="/dev/tty.usbmodem58760434471", id="my_awesome_follower_arm")
|
||||
|
||||
robot = SO100Follower(robot_config)
|
||||
robot.connect()
|
||||
|
||||
dataset = LeRobotDataset("<hf_username>/<dataset_repo_id>", episodes=[episode_idx])
|
||||
actions = dataset.hf_dataset.select_columns("action")
|
||||
|
||||
log_say(f"Replaying episode {episode_idx}")
|
||||
for idx in range(dataset.num_frames):
|
||||
t0 = time.perf_counter()
|
||||
|
||||
action = {
|
||||
name: float(actions[idx]["action"][i]) for i, name in enumerate(dataset.features["action"]["names"])
|
||||
}
|
||||
robot.send_action(action)
|
||||
|
||||
busy_wait(1.0 / dataset.fps - (time.perf_counter() - t0))
|
||||
|
||||
robot.disconnect()
|
||||
```
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
Your robot should replicate movements similar to those you recorded. For example, check out [this video](https://x.com/RemiCadene/status/1793654950905680090) where we use `replay` on a Aloha robot from [Trossen Robotics](https://www.trossenrobotics.com).
|
||||
|
||||
## Train a policy
|
||||
|
||||
To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
|
||||
To train a policy to control your robot, use the [`python -m lerobot.scripts.train`](../src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--dataset.repo_id=${HF_USER}/so101_test \
|
||||
--policy.type=act \
|
||||
--output_dir=outputs/train/act_so101_test \
|
||||
--job_name=act_so101_test \
|
||||
--policy.device=cuda \
|
||||
--wandb.enable=true
|
||||
--wandb.enable=true \
|
||||
--policy.repo_id=${HF_USER}/my_policy
|
||||
```
|
||||
|
||||
Let's explain the command:
|
||||
1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/so101_test`.
|
||||
2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
|
||||
2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../src/lerobot/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
|
||||
4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
|
||||
5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
|
||||
|
||||
@@ -268,11 +411,15 @@ Training should take several hours. You will find checkpoints in `outputs/train/
|
||||
|
||||
To resume training from a checkpoint, below is an example command to resume from `last` checkpoint of the `act_so101_test` policy:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--config_path=outputs/train/act_so101_test/checkpoints/last/pretrained_model/train_config.json \
|
||||
--resume=true
|
||||
```
|
||||
|
||||
If you do not want to push your model to the hub after training use `--policy.push_to_hub=false`.
|
||||
|
||||
Additionally you can provide extra `tags` or specify a `license` for your model or make the model repo `private` by adding this: `--policy.private=true --policy.tags=\[ppo,rl\] --policy.license=mit`
|
||||
|
||||
#### Train using Collab
|
||||
If your local computer doesn't have a powerful GPU you could utilize Google Collab to train your model by following the [ACT training notebook](./notebooks#training-act).
|
||||
|
||||
@@ -291,9 +438,12 @@ huggingface-cli upload ${HF_USER}/act_so101_test${CKPT} \
|
||||
outputs/train/act_so101_test/checkpoints/${CKPT}/pretrained_model
|
||||
```
|
||||
|
||||
## Evaluate your policy
|
||||
## Run inference and evaluate your policy
|
||||
|
||||
You can use the `record` script from [`lerobot/record.py`](https://github.com/huggingface/lerobot/blob/main/lerobot/record.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
|
||||
You can use the `record` script from [`lerobot/record.py`](https://github.com/huggingface/lerobot/blob/main/lerobot/record.py) with a policy checkpoint as input, to run inference and evaluate your policy. For instance, run this command or API example to run inference and record 10 evaluation episodes:
|
||||
|
||||
<hfoptions id="eval">
|
||||
<hfoption id="Command">
|
||||
```bash
|
||||
python -m lerobot.record \
|
||||
--robot.type=so100_follower \
|
||||
@@ -301,7 +451,7 @@ python -m lerobot.record \
|
||||
--robot.cameras="{ up: {type: opencv, index_or_path: /dev/video10, width: 640, height: 480, fps: 30}, side: {type: intelrealsense, serial_number_or_name: 233522074606, width: 640, height: 480, fps: 30}}" \
|
||||
--robot.id=my_awesome_follower_arm \
|
||||
--display_data=false \
|
||||
--dataset.repo_id=$HF_USER/eval_so100 \
|
||||
--dataset.repo_id=${HF_USER}/eval_so100 \
|
||||
--dataset.single_task="Put lego brick into the transparent box" \
|
||||
# <- Teleop optional if you want to teleoperate in between episodes \
|
||||
# --teleop.type=so100_leader \
|
||||
@@ -309,6 +459,82 @@ python -m lerobot.record \
|
||||
# --teleop.id=my_awesome_leader_arm \
|
||||
--policy.path=${HF_USER}/my_policy
|
||||
```
|
||||
</hfoption>
|
||||
<hfoption id="API example">
|
||||
```python
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.utils import hw_to_dataset_features
|
||||
from lerobot.policies.act.modeling_act import ACTPolicy
|
||||
from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig
|
||||
from lerobot.robots.so100_follower.so100_follower import SO100Follower
|
||||
from lerobot.utils.control_utils import init_keyboard_listener
|
||||
from lerobot.utils.utils import log_say
|
||||
from lerobot.utils.visualization_utils import _init_rerun
|
||||
from lerobot.record import record_loop
|
||||
|
||||
NUM_EPISODES = 5
|
||||
FPS = 30
|
||||
EPISODE_TIME_SEC = 60
|
||||
TASK_DESCRIPTION = "My task description"
|
||||
|
||||
# Create the robot configuration
|
||||
camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
|
||||
robot_config = SO100FollowerConfig(
|
||||
port="/dev/tty.usbmodem58760434471", id="my_awesome_follower_arm", cameras=camera_config
|
||||
)
|
||||
|
||||
# Initialize the robot
|
||||
robot = SO100Follower(robot_config)
|
||||
|
||||
# Initialize the policy
|
||||
policy = ACTPolicy.from_pretrained("<hf_username>/<my_policy_repo_id>")
|
||||
|
||||
# Configure the dataset features
|
||||
action_features = hw_to_dataset_features(robot.action_features, "action")
|
||||
obs_features = hw_to_dataset_features(robot.observation_features, "observation")
|
||||
dataset_features = {**action_features, **obs_features}
|
||||
|
||||
# Create the dataset
|
||||
dataset = LeRobotDataset.create(
|
||||
repo_id="<hf_username>/eval_<dataset_repo_id>",
|
||||
fps=FPS,
|
||||
features=dataset_features,
|
||||
robot_type=robot.name,
|
||||
use_videos=True,
|
||||
image_writer_threads=4,
|
||||
)
|
||||
|
||||
# Initialize the keyboard listener and rerun visualization
|
||||
_, events = init_keyboard_listener()
|
||||
_init_rerun(session_name="recording")
|
||||
|
||||
# Connect the robot
|
||||
robot.connect()
|
||||
|
||||
for episode_idx in range(NUM_EPISODES):
|
||||
log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||
|
||||
# Run the policy inference loop
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
policy=policy,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
)
|
||||
|
||||
dataset.save_episode()
|
||||
|
||||
# Clean up
|
||||
robot.disconnect()
|
||||
dataset.push_to_hub()
|
||||
```
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
|
||||
1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_so101_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_so101_test`).
|
||||
|
||||
@@ -35,14 +35,14 @@ Then we can run this command to start:
|
||||
<hfoption id="Linux">
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/rl/gym_manipulator.py --config_path path/to/env_config_gym_hil_il.json
|
||||
python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="MacOS">
|
||||
|
||||
```bash
|
||||
mjpython lerobot/scripts/rl/gym_manipulator.py --config_path path/to/env_config_gym_hil_il.json
|
||||
mjpython -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
@@ -81,9 +81,9 @@ If you uploaded your dataset to the hub you can [visualize your dataset online](
|
||||
|
||||
## Train a policy
|
||||
|
||||
To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
|
||||
To train a policy to control your robot, use the [`python -m lerobot.scripts.train`](../src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--dataset.repo_id=${HF_USER}/il_gym \
|
||||
--policy.type=act \
|
||||
--output_dir=outputs/train/il_sim_test \
|
||||
@@ -94,7 +94,7 @@ python lerobot/scripts/train.py \
|
||||
|
||||
Let's explain the command:
|
||||
1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/il_gym`.
|
||||
2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
|
||||
2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../src/lerobot/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
|
||||
4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
|
||||
5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
|
||||
|
||||
@@ -130,14 +130,14 @@ Then you can run this command to visualize your trained policy
|
||||
<hfoption id="Linux">
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/rl/eval_policy.py --config_path=path/to/eval_config_gym_hil.json
|
||||
python -m lerobot.scripts.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="MacOS">
|
||||
|
||||
```bash
|
||||
mjpython lerobot/scripts/rl/eval_policy.py --config_path=path/to/eval_config_gym_hil.json
|
||||
mjpython -m lerobot.scripts.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
This tutorial will explain how to integrate your own robot design into the LeRobot ecosystem and have it access all of our tools (data collection, control pipelines, policy training and inference).
|
||||
|
||||
To that end, we provide the [`Robot`](https://github.com/huggingface/lerobot/blob/main/lerobot/common/robots/robot.py) base class in the LeRobot which specifies a standard interface for physical robot integration. Let's see how to implement it.
|
||||
To that end, we provide the [`Robot`](https://github.com/huggingface/lerobot/blob/main/lerobot/robots/robot.py) base class in the LeRobot which specifies a standard interface for physical robot integration. Let's see how to implement it.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
@@ -14,11 +14,11 @@ To that end, we provide the [`Robot`](https://github.com/huggingface/lerobot/blo
|
||||
|
||||
If you're using Feetech or Dynamixel motors, LeRobot provides built-in bus interfaces:
|
||||
|
||||
- [`FeetechMotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/common/motors/feetech/feetech.py) – for controlling Feetech servos
|
||||
- [`DynamixelMotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/common/motors/dynamixel/dynamixel.py) – for controlling Dynamixel servos
|
||||
- [`FeetechMotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/feetech/feetech.py) – for controlling Feetech servos
|
||||
- [`DynamixelMotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/dynamixel/dynamixel.py) – for controlling Dynamixel servos
|
||||
|
||||
Please refer to the [`MotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/common/motors/motors_bus.py) abstract class to learn about its API.
|
||||
For a good example of how it can be used, you can have a look at our own [SO101 follower implementation](https://github.com/huggingface/lerobot/blob/main/lerobot/common/robots/so101_follower/so101_follower.py)
|
||||
Please refer to the [`MotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/motors_bus.py) abstract class to learn about its API.
|
||||
For a good example of how it can be used, you can have a look at our own [SO101 follower implementation](https://github.com/huggingface/lerobot/blob/main/lerobot/robots/so101_follower/so101_follower.py)
|
||||
|
||||
Use these if compatible. Otherwise, you'll need to find or write a Python interface (not covered in this tutorial):
|
||||
- Find an existing SDK in Python (or use bindings to C/C++)
|
||||
@@ -32,7 +32,7 @@ For Feetech and Dynamixel, we currently support these servos:
|
||||
- SCS series (protocol 1): `scs0009`
|
||||
- Dynamixel (protocol 2.0 only): `xl330-m077`, `xl330-m288`, `xl430-w250`, `xm430-w350`, `xm540-w270`, `xc430-w150`
|
||||
|
||||
If you are using Feetech or Dynamixel servos that are not in this list, you can add those in the [Feetech table](https://github.com/huggingface/lerobot/blob/main/lerobot/common/motors/feetech/tables.py) or [Dynamixel table](https://github.com/huggingface/lerobot/blob/main/lerobot/common/motors/dynamixel/tables.py). Depending on the model, this will require you to add model-specific information. In most cases though, there shouldn't be a lot of additions to do.
|
||||
If you are using Feetech or Dynamixel servos that are not in this list, you can add those in the [Feetech table](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/feetech/tables.py) or [Dynamixel table](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/dynamixel/tables.py). Depending on the model, this will require you to add model-specific information. In most cases though, there shouldn't be a lot of additions to do.
|
||||
|
||||
In the next sections, we'll use a `FeetechMotorsBus` as the motors interface for the examples. Replace it and adapt to your motors if necessary.
|
||||
|
||||
@@ -44,9 +44,9 @@ Here, we'll add the port name and one camera by default for our robot:
|
||||
```python
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from lerobot.common.cameras import CameraConfig
|
||||
from lerobot.common.cameras.opencv import OpenCVCameraConfig
|
||||
from lerobot.common.robots import RobotConfig
|
||||
from lerobot.cameras import CameraConfig
|
||||
from lerobot.cameras.opencv import OpenCVCameraConfig
|
||||
from lerobot.robots import RobotConfig
|
||||
|
||||
|
||||
@RobotConfig.register_subclass("my_cool_robot")
|
||||
@@ -72,10 +72,10 @@ Next, we'll create our actual robot class which inherits from `Robot`. This abst
|
||||
Here we'll create a simple 5-DoF robot with one camera. It could be a simple arm but notice that the `Robot` abstract class does not assume anything on your robot's form factor. You can let you imagination run wild when designing new robots!
|
||||
|
||||
```python
|
||||
from lerobot.common.cameras import make_cameras_from_configs
|
||||
from lerobot.common.motors import Motor, MotorNormMode
|
||||
from lerobot.common.motors.feetech import FeetechMotorsBus
|
||||
from lerobot.common.robots import Robot
|
||||
from lerobot.cameras import make_cameras_from_configs
|
||||
from lerobot.motors import Motor, MotorNormMode
|
||||
from lerobot.motors.feetech import FeetechMotorsBus
|
||||
from lerobot.robots import Robot
|
||||
|
||||
class MyCoolRobot(Robot):
|
||||
config_class = MyCoolRobotConfig
|
||||
@@ -303,7 +303,7 @@ def send_action(self, action: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
## Adding a Teleoperator
|
||||
|
||||
For implementing teleoperation devices, we also provide a [`Teleoperator`](https://github.com/huggingface/lerobot/blob/main/lerobot/common/teleoperators/teleoperator.py) base class. This class is very similar to the `Robot` base class and also doesn't assume anything on form factor.
|
||||
For implementing teleoperation devices, we also provide a [`Teleoperator`](https://github.com/huggingface/lerobot/blob/main/lerobot/teleoperators/teleoperator.py) base class. This class is very similar to the `Robot` base class and also doesn't assume anything on form factor.
|
||||
|
||||
The main differences are in the I/O functions: a teleoperator allows you to produce action via `get_action` and can receive feedback actions via `send_feedback`. Feedback could be anything controllable on the teleoperation device that could help the person controlling it understand the consequences of the actions sent. Think motion/force feedback on a leader arm, vibrations on a gamepad controller for example. To implement a teleoperator, you can follow this same tutorial and adapt it for these two methods.
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
../../lerobot/common/robots/koch_follower/koch.mdx
|
||||
../../src/lerobot/robots/koch_follower/koch.mdx
|
||||
@@ -1 +1 @@
|
||||
../../lerobot/common/robots/lekiwi/lekiwi.mdx
|
||||
../../src/lerobot/robots/lekiwi/lekiwi.mdx
|
||||
@@ -44,7 +44,7 @@ If you don't have a gpu device, you can train using our notebook on [.
|
||||
|
||||
```bash
|
||||
cd lerobot && python lerobot/scripts/train.py \
|
||||
cd lerobot && python -m lerobot.scripts.train \
|
||||
--policy.path=lerobot/smolvla_base \
|
||||
--dataset.repo_id=${HF_USER}/mydataset \
|
||||
--batch_size=64 \
|
||||
@@ -62,7 +62,7 @@ You can start with a small batch size and increase it incrementally, if the GPU
|
||||
Fine-tuning is an art. For a complete overview of the options for finetuning, run
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/train.py --help
|
||||
python -m lerobot.scripts.train --help
|
||||
```
|
||||
|
||||
<p align="center">
|
||||
|
||||
@@ -1 +1 @@
|
||||
../../lerobot/common/robots/so100_follower/so100.mdx
|
||||
../../src/lerobot/robots/so100_follower/so100.mdx
|
||||
@@ -1 +1 @@
|
||||
../../lerobot/common/robots/so101_follower/so101.mdx
|
||||
../../src/lerobot/robots/so101_follower/so101.mdx
|
||||
@@ -32,7 +32,7 @@ import torch
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
import lerobot
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
|
||||
# We ported a number of existing datasets ourselves, use this to see the list:
|
||||
print("List of available datasets:")
|
||||
|
||||
@@ -30,7 +30,7 @@ import imageio
|
||||
import numpy
|
||||
import torch
|
||||
|
||||
from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
|
||||
# Create a directory to store the video of the evaluation
|
||||
output_directory = Path("outputs/eval/example_pusht_diffusion")
|
||||
|
||||
@@ -22,11 +22,11 @@ from pathlib import Path
|
||||
|
||||
import torch
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
from lerobot.common.datasets.utils import dataset_to_policy_features
|
||||
from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig
|
||||
from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
from lerobot.configs.types import FeatureType
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
from lerobot.datasets.utils import dataset_to_policy_features
|
||||
from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
|
||||
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
@@ -4,7 +4,7 @@ This tutorial will explain the training script, how to use it, and particularly
|
||||
|
||||
## The training script
|
||||
|
||||
LeRobot offers a training script at [`lerobot/scripts/train.py`](../lerobot/scripts/train.py). At a high level it does the following:
|
||||
LeRobot offers a training script at [`lerobot/scripts/train.py`](../src/lerobot/scripts/train.py). At a high level it does the following:
|
||||
|
||||
- Initialize/load a configuration for the following steps using.
|
||||
- Instantiates a dataset.
|
||||
@@ -21,7 +21,7 @@ In the training script, the main function `train` expects a `TrainPipelineConfig
|
||||
def train(cfg: TrainPipelineConfig):
|
||||
```
|
||||
|
||||
You can inspect the `TrainPipelineConfig` defined in [`lerobot/configs/train.py`](../lerobot/configs/train.py) (which is heavily commented and meant to be a reference to understand any option)
|
||||
You can inspect the `TrainPipelineConfig` defined in [`lerobot/configs/train.py`](../src/lerobot/configs/train.py) (which is heavily commented and meant to be a reference to understand any option)
|
||||
|
||||
When running the script, inputs for the command line are parsed thanks to the `@parser.wrap()` decorator and an instance of this class is automatically generated. Under the hood, this is done with [Draccus](https://github.com/dlwh/draccus) which is a tool dedicated to this purpose. If you're familiar with Hydra, Draccus can similarly load configurations from config files (.json, .yaml) and also override their values through command line inputs. Unlike Hydra, these configurations are pre-defined in the code through dataclasses rather than being defined entirely in config files. This allows for more rigorous serialization/deserialization, typing, and to manipulate configuration as objects directly in the code and not as dictionaries or namespaces (which enables nice features in an IDE such as autocomplete, jump-to-def, etc.)
|
||||
|
||||
@@ -50,9 +50,9 @@ By default, every field takes its default value specified in the dataclass. If a
|
||||
|
||||
## Specifying values from the CLI
|
||||
|
||||
Let's say that we want to train [Diffusion Policy](../lerobot/common/policies/diffusion) on the [pusht](https://huggingface.co/datasets/lerobot/pusht) dataset, using the [gym_pusht](https://github.com/huggingface/gym-pusht) environment for evaluation. The command to do so would look like this:
|
||||
Let's say that we want to train [Diffusion Policy](../src/lerobot/policies/diffusion) on the [pusht](https://huggingface.co/datasets/lerobot/pusht) dataset, using the [gym_pusht](https://github.com/huggingface/gym-pusht) environment for evaluation. The command to do so would look like this:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--dataset.repo_id=lerobot/pusht \
|
||||
--policy.type=diffusion \
|
||||
--env.type=pusht
|
||||
@@ -60,12 +60,12 @@ python lerobot/scripts/train.py \
|
||||
|
||||
Let's break this down:
|
||||
- To specify the dataset, we just need to specify its `repo_id` on the hub which is the only required argument in the `DatasetConfig`. The rest of the fields have default values and in this case we are fine with those so we can just add the option `--dataset.repo_id=lerobot/pusht`.
|
||||
- To specify the policy, we can just select diffusion policy using `--policy` appended with `.type`. Here, `.type` is a special argument which allows us to select config classes inheriting from `draccus.ChoiceRegistry` and that have been decorated with the `register_subclass()` method. To have a better explanation of this feature, have a look at this [Draccus demo](https://github.com/dlwh/draccus?tab=readme-ov-file#more-flexible-configuration-with-choice-types). In our code, we use this mechanism mainly to select policies, environments, robots, and some other components like optimizers. The policies available to select are located in [lerobot/common/policies](../lerobot/common/policies)
|
||||
- Similarly, we select the environment with `--env.type=pusht`. The different environment configs are available in [`lerobot/common/envs/configs.py`](../lerobot/common/envs/configs.py)
|
||||
- To specify the policy, we can just select diffusion policy using `--policy` appended with `.type`. Here, `.type` is a special argument which allows us to select config classes inheriting from `draccus.ChoiceRegistry` and that have been decorated with the `register_subclass()` method. To have a better explanation of this feature, have a look at this [Draccus demo](https://github.com/dlwh/draccus?tab=readme-ov-file#more-flexible-configuration-with-choice-types). In our code, we use this mechanism mainly to select policies, environments, robots, and some other components like optimizers. The policies available to select are located in [lerobot/policies](../src/lerobot/policies)
|
||||
- Similarly, we select the environment with `--env.type=pusht`. The different environment configs are available in [`lerobot/envs/configs.py`](../src/lerobot/envs/configs.py)
|
||||
|
||||
Let's see another example. Let's say you've been training [ACT](../lerobot/common/policies/act) on [lerobot/aloha_sim_insertion_human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human) using the [gym-aloha](https://github.com/huggingface/gym-aloha) environment for evaluation with:
|
||||
Let's see another example. Let's say you've been training [ACT](../src/lerobot/policies/act) on [lerobot/aloha_sim_insertion_human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human) using the [gym-aloha](https://github.com/huggingface/gym-aloha) environment for evaluation with:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--policy.type=act \
|
||||
--dataset.repo_id=lerobot/aloha_sim_insertion_human \
|
||||
--env.type=aloha \
|
||||
@@ -74,9 +74,9 @@ python lerobot/scripts/train.py \
|
||||
> Notice we added `--output_dir` to explicitly tell where to write outputs from this run (checkpoints, training state, configs etc.). This is not mandatory and if you don't specify it, a default directory will be created from the current date and time, env.type and policy.type. This will typically look like `outputs/train/2025-01-24/16-10-05_aloha_act`.
|
||||
|
||||
We now want to train a different policy for aloha on another task. We'll change the dataset and use [lerobot/aloha_sim_transfer_cube_human](https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_human) instead. Of course, we also need to change the task of the environment as well to match this other task.
|
||||
Looking at the [`AlohaEnv`](../lerobot/common/envs/configs.py) config, the task is `"AlohaInsertion-v0"` by default, which corresponds to the task we trained on in the command above. The [gym-aloha](https://github.com/huggingface/gym-aloha?tab=readme-ov-file#description) environment also has the `AlohaTransferCube-v0` task which corresponds to this other task we want to train on. Putting this together, we can train this new policy on this different task using:
|
||||
Looking at the [`AlohaEnv`](../src/lerobot/envs/configs.py) config, the task is `"AlohaInsertion-v0"` by default, which corresponds to the task we trained on in the command above. The [gym-aloha](https://github.com/huggingface/gym-aloha?tab=readme-ov-file#description) environment also has the `AlohaTransferCube-v0` task which corresponds to this other task we want to train on. Putting this together, we can train this new policy on this different task using:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--policy.type=act \
|
||||
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
|
||||
--env.type=aloha \
|
||||
@@ -111,7 +111,7 @@ Now, let's assume that we want to reproduce the run just above. That run has pro
|
||||
|
||||
We can then simply load the config values from this file using:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
|
||||
--output_dir=outputs/train/act_aloha_transfer_2
|
||||
```
|
||||
@@ -119,7 +119,7 @@ python lerobot/scripts/train.py \
|
||||
|
||||
Similarly to Hydra, we can still override some parameters in the CLI if we want to, e.g.:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
|
||||
--output_dir=outputs/train/act_aloha_transfer_2
|
||||
--policy.n_action_steps=80
|
||||
@@ -128,7 +128,7 @@ python lerobot/scripts/train.py \
|
||||
|
||||
`--config_path` can also accept the repo_id of a repo on the hub that contains a `train_config.json` file, e.g. running:
|
||||
```bash
|
||||
python lerobot/scripts/train.py --config_path=lerobot/diffusion_pusht
|
||||
python -m lerobot.scripts.train --config_path=lerobot/diffusion_pusht
|
||||
```
|
||||
will start a training run with the same configuration used for training [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht)
|
||||
|
||||
@@ -139,7 +139,7 @@ Being able to resume a training run is important in case it crashed or aborted f
|
||||
|
||||
Let's reuse the command from the previous run and add a few more options:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--policy.type=act \
|
||||
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
|
||||
--env.type=aloha \
|
||||
@@ -155,7 +155,7 @@ INFO 2025-01-24 16:10:56 ts/train.py:263 Checkpoint policy after step 100
|
||||
```
|
||||
Now let's simulate a crash by killing the process (hit `ctrl`+`c`). We can then simply resume this run from the last checkpoint available with:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
|
||||
--resume=true
|
||||
```
|
||||
@@ -164,7 +164,7 @@ You should see from the logging that your training picks up from where it left o
|
||||
Another reason for which you might want to resume a run is simply to extend training and add more training steps. The number of training steps is set by the option `--steps`, which is 100 000 by default.
|
||||
You could double the number of steps of the previous run with:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
|
||||
--resume=true \
|
||||
--steps=200000
|
||||
@@ -195,7 +195,7 @@ In addition to the features currently in Draccus, we've added a special `.path`
|
||||
|
||||
For example, we could fine-tune a [policy pre-trained on the aloha transfer task](https://huggingface.co/lerobot/act_aloha_sim_transfer_cube_human) on the aloha insertion task. We can achieve this with:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--policy.path=lerobot/act_aloha_sim_transfer_cube_human \
|
||||
--dataset.repo_id=lerobot/aloha_sim_insertion_human \
|
||||
--env.type=aloha \
|
||||
@@ -236,7 +236,7 @@ We'll summarize here the main use cases to remember from this tutorial.
|
||||
|
||||
#### Train a policy from scratch – CLI
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--policy.type=act \ # <- select 'act' policy
|
||||
--env.type=pusht \ # <- select 'pusht' environment
|
||||
--dataset.repo_id=lerobot/pusht # <- train on this dataset
|
||||
@@ -244,14 +244,14 @@ python lerobot/scripts/train.py \
|
||||
|
||||
#### Train a policy from scratch - config file + CLI
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--config_path=path/to/pretrained_model \ # <- can also be a repo_id
|
||||
--policy.n_action_steps=80 # <- you may still override values
|
||||
```
|
||||
|
||||
#### Resume/continue a training run
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--config_path=checkpoint/pretrained_model/ \
|
||||
--resume=true \
|
||||
--steps=200000 # <- you can change some training parameters
|
||||
@@ -259,7 +259,7 @@ python lerobot/scripts/train.py \
|
||||
|
||||
#### Fine-tuning
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
python -m lerobot.scripts.train \
|
||||
--policy.path=lerobot/act_aloha_sim_transfer_cube_human \ # <- can also be a local path to a checkpoint
|
||||
--dataset.repo_id=lerobot/aloha_sim_insertion_human \
|
||||
--env.type=aloha \
|
||||
|
||||
@@ -22,7 +22,7 @@ from pathlib import Path
|
||||
|
||||
from torchvision.transforms import ToPILImage, v2
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
dataset_repo_id = "lerobot/aloha_static_screw_driver"
|
||||
|
||||
|
||||
@@ -26,8 +26,8 @@ import math
|
||||
|
||||
import torch
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
@@ -35,8 +35,8 @@ from pprint import pformat
|
||||
|
||||
import draccus
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.common.robots import ( # noqa: F401
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.robots import ( # noqa: F401
|
||||
Robot,
|
||||
RobotConfig,
|
||||
koch_follower,
|
||||
@@ -44,8 +44,8 @@ from lerobot.common.robots import ( # noqa: F401
|
||||
so100_follower,
|
||||
so101_follower,
|
||||
)
|
||||
from lerobot.common.utils.robot_utils import busy_wait
|
||||
from lerobot.common.utils.utils import (
|
||||
from lerobot.utils.robot_utils import busy_wait
|
||||
from lerobot.utils.utils import (
|
||||
init_logging,
|
||||
log_say,
|
||||
)
|
||||
|
||||
@@ -1,32 +1,90 @@
|
||||
from lerobot.common.datasets.utils import build_dataset_frame, hw_to_dataset_features
|
||||
from lerobot.common.policies.act.modeling_act import ACTPolicy
|
||||
from lerobot.common.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
|
||||
from lerobot.common.utils.control_utils import predict_action
|
||||
from lerobot.common.utils.utils import get_safe_torch_device
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.utils import hw_to_dataset_features
|
||||
from lerobot.policies.act.modeling_act import ACTPolicy
|
||||
from lerobot.record import record_loop
|
||||
from lerobot.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
|
||||
from lerobot.utils.control_utils import init_keyboard_listener
|
||||
from lerobot.utils.utils import log_say
|
||||
from lerobot.utils.visualization_utils import _init_rerun
|
||||
|
||||
NB_CYCLES_CLIENT_CONNECTION = 1000
|
||||
NUM_EPISODES = 2
|
||||
FPS = 30
|
||||
EPISODE_TIME_SEC = 60
|
||||
TASK_DESCRIPTION = "My task description"
|
||||
|
||||
# Create the robot and teleoperator configurations
|
||||
robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
|
||||
robot = LeKiwiClient(robot_config)
|
||||
|
||||
policy = ACTPolicy.from_pretrained("<hf_username>/<policy_repo_id>")
|
||||
|
||||
# Configure the dataset features
|
||||
action_features = hw_to_dataset_features(robot.action_features, "action")
|
||||
obs_features = hw_to_dataset_features(robot.observation_features, "observation")
|
||||
dataset_features = {**action_features, **obs_features}
|
||||
|
||||
# Create the dataset
|
||||
dataset = LeRobotDataset.create(
|
||||
repo_id="<hf_username>/<eval_dataset_repo_id>",
|
||||
fps=FPS,
|
||||
features=dataset_features,
|
||||
robot_type=robot.name,
|
||||
use_videos=True,
|
||||
image_writer_threads=4,
|
||||
)
|
||||
|
||||
# To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi`
|
||||
robot.connect()
|
||||
|
||||
policy = ACTPolicy.from_pretrained("pepijn223/act_lekiwi_circle")
|
||||
policy.reset()
|
||||
_init_rerun(session_name="recording")
|
||||
|
||||
obs_features = hw_to_dataset_features(robot.observation_features, "observation")
|
||||
listener, events = init_keyboard_listener()
|
||||
|
||||
print("Running inference")
|
||||
i = 0
|
||||
while i < NB_CYCLES_CLIENT_CONNECTION:
|
||||
obs = robot.get_observation()
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
|
||||
observation_frame = build_dataset_frame(obs_features, obs, prefix="observation")
|
||||
action_values = predict_action(
|
||||
observation_frame, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp
|
||||
recorded_episodes = 0
|
||||
while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Running inference, recording eval episode {recorded_episodes} of {NUM_EPISODES}")
|
||||
|
||||
# Run the policy inference loop
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
policy=policy,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
)
|
||||
action = {key: action_values[i].item() for i, key in enumerate(robot.action_features)}
|
||||
robot.send_action(action)
|
||||
i += 1
|
||||
|
||||
# Logic for reset env
|
||||
if not events["stop_recording"] and (
|
||||
(recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
)
|
||||
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
|
||||
dataset.save_episode()
|
||||
recorded_episodes += 1
|
||||
|
||||
# Upload to hub and clean up
|
||||
dataset.push_to_hub()
|
||||
|
||||
robot.disconnect()
|
||||
listener.stop()
|
||||
|
||||
@@ -1,67 +1,101 @@
|
||||
import time
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.utils import hw_to_dataset_features
|
||||
from lerobot.record import record_loop
|
||||
from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
|
||||
from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
|
||||
from lerobot.teleoperators.keyboard import KeyboardTeleop, KeyboardTeleopConfig
|
||||
from lerobot.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
|
||||
from lerobot.utils.control_utils import init_keyboard_listener
|
||||
from lerobot.utils.utils import log_say
|
||||
from lerobot.utils.visualization_utils import _init_rerun
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.common.datasets.utils import hw_to_dataset_features
|
||||
from lerobot.common.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
|
||||
from lerobot.common.robots.lekiwi.lekiwi_client import LeKiwiClient
|
||||
from lerobot.common.teleoperators.keyboard import KeyboardTeleop, KeyboardTeleopConfig
|
||||
from lerobot.common.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
|
||||
|
||||
NB_CYCLES_CLIENT_CONNECTION = 250
|
||||
|
||||
leader_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem58760431551")
|
||||
leader_arm = SO100Leader(leader_arm_config)
|
||||
NUM_EPISODES = 3
|
||||
FPS = 30
|
||||
EPISODE_TIME_SEC = 30
|
||||
RESET_TIME_SEC = 10
|
||||
TASK_DESCRIPTION = "My task description"
|
||||
|
||||
# Create the robot and teleoperator configurations
|
||||
robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
|
||||
leader_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm")
|
||||
keyboard_config = KeyboardTeleopConfig()
|
||||
|
||||
robot = LeKiwiClient(robot_config)
|
||||
leader_arm = SO100Leader(leader_arm_config)
|
||||
keyboard = KeyboardTeleop(keyboard_config)
|
||||
|
||||
robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
|
||||
robot = LeKiwiClient(robot_config)
|
||||
|
||||
# Configure the dataset features
|
||||
action_features = hw_to_dataset_features(robot.action_features, "action")
|
||||
obs_features = hw_to_dataset_features(robot.observation_features, "observation")
|
||||
dataset_features = {**action_features, **obs_features}
|
||||
|
||||
# Create the dataset
|
||||
dataset = LeRobotDataset.create(
|
||||
repo_id="pepijn223/lekiwi" + str(int(time.time())),
|
||||
fps=10,
|
||||
repo_id="<hf_username>/<dataset_repo_id>",
|
||||
fps=FPS,
|
||||
features=dataset_features,
|
||||
robot_type=robot.name,
|
||||
use_videos=True,
|
||||
image_writer_threads=4,
|
||||
)
|
||||
|
||||
# To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi`
|
||||
robot.connect()
|
||||
leader_arm.connect()
|
||||
keyboard.connect()
|
||||
robot.connect()
|
||||
|
||||
_init_rerun(session_name="lekiwi_record")
|
||||
|
||||
listener, events = init_keyboard_listener()
|
||||
|
||||
if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
|
||||
exit()
|
||||
raise ValueError("Robot, leader arm of keyboard is not connected!")
|
||||
|
||||
print("Starting LeKiwi recording")
|
||||
i = 0
|
||||
while i < NB_CYCLES_CLIENT_CONNECTION:
|
||||
arm_action = leader_arm.get_action()
|
||||
arm_action = {f"arm_{k}": v for k, v in arm_action.items()}
|
||||
recorded_episodes = 0
|
||||
while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Recording episode {recorded_episodes}")
|
||||
|
||||
keyboard_keys = keyboard.get_action()
|
||||
# Run the record loop
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
dataset=dataset,
|
||||
teleop=[leader_arm, keyboard],
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
)
|
||||
|
||||
base_action = robot._from_keyboard_to_base_action(keyboard_keys)
|
||||
# Logic for reset env
|
||||
if not events["stop_recording"] and (
|
||||
(recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
teleop=[leader_arm, keyboard],
|
||||
control_time_s=RESET_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
)
|
||||
|
||||
action = {**arm_action, **base_action} if len(base_action) > 0 else arm_action
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
|
||||
action_sent = robot.send_action(action)
|
||||
observation = robot.get_observation()
|
||||
dataset.save_episode()
|
||||
recorded_episodes += 1
|
||||
|
||||
frame = {**action_sent, **observation}
|
||||
task = "Dummy Example Task Dataset"
|
||||
# Upload to hub and clean up
|
||||
dataset.push_to_hub()
|
||||
|
||||
dataset.add_frame(frame, task)
|
||||
i += 1
|
||||
|
||||
print("Disconnecting Teleop Devices and LeKiwi Client")
|
||||
robot.disconnect()
|
||||
leader_arm.disconnect()
|
||||
keyboard.disconnect()
|
||||
|
||||
print("Uploading dataset to the hub")
|
||||
dataset.save_episode()
|
||||
dataset.push_to_hub()
|
||||
listener.stop()
|
||||
|
||||
@@ -1,25 +1,33 @@
|
||||
import time
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.common.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
|
||||
from lerobot.common.robots.lekiwi.lekiwi_client import LeKiwiClient
|
||||
from lerobot.common.utils.robot_utils import busy_wait
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
|
||||
from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
|
||||
from lerobot.utils.robot_utils import busy_wait
|
||||
from lerobot.utils.utils import log_say
|
||||
|
||||
EPISODE_IDX = 0
|
||||
|
||||
robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
|
||||
robot = LeKiwiClient(robot_config)
|
||||
|
||||
dataset = LeRobotDataset("pepijn223/lekiwi1749025613", episodes=[0])
|
||||
dataset = LeRobotDataset("<hf_username>/<dataset_repo_id>", episodes=[EPISODE_IDX])
|
||||
actions = dataset.hf_dataset.select_columns("action")
|
||||
|
||||
robot.connect()
|
||||
|
||||
print("Replaying episode…")
|
||||
for _, action_array in enumerate(dataset.hf_dataset["action"]):
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
|
||||
log_say(f"Replaying episode {EPISODE_IDX}")
|
||||
for idx in range(dataset.num_frames):
|
||||
t0 = time.perf_counter()
|
||||
|
||||
action = {name: float(action_array[i]) for i, name in enumerate(dataset.features["action"]["names"])}
|
||||
action = {
|
||||
name: float(actions[idx]["action"][i]) for i, name in enumerate(dataset.features["action"]["names"])
|
||||
}
|
||||
robot.send_action(action)
|
||||
|
||||
busy_wait(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
|
||||
|
||||
print("Disconnecting LeKiwi Client")
|
||||
robot.disconnect()
|
||||
|
||||
@@ -1,32 +1,47 @@
|
||||
from lerobot.common.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
|
||||
from lerobot.common.teleoperators.keyboard.teleop_keyboard import KeyboardTeleop, KeyboardTeleopConfig
|
||||
from lerobot.common.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
|
||||
import time
|
||||
|
||||
from lerobot.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
|
||||
from lerobot.teleoperators.keyboard.teleop_keyboard import KeyboardTeleop, KeyboardTeleopConfig
|
||||
from lerobot.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
|
||||
from lerobot.utils.robot_utils import busy_wait
|
||||
from lerobot.utils.visualization_utils import _init_rerun, log_rerun_data
|
||||
|
||||
FPS = 30
|
||||
|
||||
# Create the robot and teleoperator configurations
|
||||
robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="my_lekiwi")
|
||||
|
||||
teleop__arm_config = SO100LeaderConfig(
|
||||
port="/dev/tty.usbmodem58760431551",
|
||||
id="my_awesome_leader_arm",
|
||||
)
|
||||
|
||||
teleop_keyboard_config = KeyboardTeleopConfig(
|
||||
id="my_laptop_keyboard",
|
||||
)
|
||||
teleop_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm")
|
||||
keyboard_config = KeyboardTeleopConfig(id="my_laptop_keyboard")
|
||||
|
||||
robot = LeKiwiClient(robot_config)
|
||||
teleop_arm = SO100Leader(teleop__arm_config)
|
||||
telep_keyboard = KeyboardTeleop(teleop_keyboard_config)
|
||||
leader_arm = SO100Leader(teleop_arm_config)
|
||||
keyboard = KeyboardTeleop(keyboard_config)
|
||||
|
||||
# To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi`
|
||||
robot.connect()
|
||||
teleop_arm.connect()
|
||||
telep_keyboard.connect()
|
||||
leader_arm.connect()
|
||||
keyboard.connect()
|
||||
|
||||
_init_rerun(session_name="lekiwi_teleop")
|
||||
|
||||
if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
|
||||
raise ValueError("Robot, leader arm of keyboard is not connected!")
|
||||
|
||||
while True:
|
||||
t0 = time.perf_counter()
|
||||
|
||||
observation = robot.get_observation()
|
||||
|
||||
arm_action = teleop_arm.get_action()
|
||||
arm_action = leader_arm.get_action()
|
||||
arm_action = {f"arm_{k}": v for k, v in arm_action.items()}
|
||||
|
||||
keyboard_keys = telep_keyboard.get_action()
|
||||
keyboard_keys = keyboard.get_action()
|
||||
base_action = robot._from_keyboard_to_base_action(keyboard_keys)
|
||||
|
||||
robot.send_action(arm_action | base_action)
|
||||
log_rerun_data(observation, {**arm_action, **base_action})
|
||||
|
||||
action = {**arm_action, **base_action} if len(base_action) > 0 else arm_action
|
||||
|
||||
robot.send_action(action)
|
||||
|
||||
busy_wait(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
|
||||
|
||||
@@ -1,483 +0,0 @@
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import numpy as np
|
||||
from numpy.typing import NDArray
|
||||
from scipy.spatial.transform import Rotation
|
||||
|
||||
|
||||
def skew_symmetric(w: NDArray[np.float32]) -> NDArray[np.float32]:
|
||||
"""Creates the skew-symmetric matrix from a 3D vector."""
|
||||
return np.array([[0, -w[2], w[1]], [w[2], 0, -w[0]], [-w[1], w[0], 0]])
|
||||
|
||||
|
||||
def rodrigues_rotation(w: NDArray[np.float32], theta: float) -> NDArray[np.float32]:
|
||||
"""Computes the rotation matrix using Rodrigues' formula."""
|
||||
w_hat = skew_symmetric(w)
|
||||
return np.eye(3) + np.sin(theta) * w_hat + (1 - np.cos(theta)) * w_hat @ w_hat
|
||||
|
||||
|
||||
def screw_axis_to_transform(s: NDArray[np.float32], theta: float) -> NDArray[np.float32]:
|
||||
"""Converts a screw axis to a 4x4 transformation matrix."""
|
||||
screw_axis_rot = s[:3]
|
||||
screw_axis_trans = s[3:]
|
||||
|
||||
# Pure translation
|
||||
if np.allclose(screw_axis_rot, 0) and np.linalg.norm(screw_axis_trans) == 1:
|
||||
transform = np.eye(4)
|
||||
transform[:3, 3] = screw_axis_trans * theta
|
||||
|
||||
# Rotation (and potentially translation)
|
||||
elif np.linalg.norm(screw_axis_rot) == 1:
|
||||
w_hat = skew_symmetric(screw_axis_rot)
|
||||
rot_mat = np.eye(3) + np.sin(theta) * w_hat + (1 - np.cos(theta)) * w_hat @ w_hat
|
||||
t = (
|
||||
np.eye(3) * theta + (1 - np.cos(theta)) * w_hat + (theta - np.sin(theta)) * w_hat @ w_hat
|
||||
) @ screw_axis_trans
|
||||
transform = np.eye(4)
|
||||
transform[:3, :3] = rot_mat
|
||||
transform[:3, 3] = t
|
||||
else:
|
||||
raise ValueError("Invalid screw axis parameters")
|
||||
return transform
|
||||
|
||||
|
||||
def pose_difference_se3(pose1: NDArray[np.float32], pose2: NDArray[np.float32]) -> NDArray[np.float32]:
|
||||
"""
|
||||
Calculates the SE(3) difference between two 4x4 homogeneous transformation matrices.
|
||||
SE(3) (Special Euclidean Group) represents rigid body transformations in 3D space,
|
||||
combining rotation (SO(3)) and translation.
|
||||
|
||||
Each 4x4 matrix has the following structure:
|
||||
[R11 R12 R13 tx]
|
||||
[R21 R22 R23 ty]
|
||||
[R31 R32 R33 tz]
|
||||
[ 0 0 0 1]
|
||||
|
||||
where R is the 3x3 rotation matrix and [tx,ty,tz] is the translation vector.
|
||||
|
||||
Args:
|
||||
pose1: A 4x4 numpy array representing the first pose.
|
||||
pose2: A 4x4 numpy array representing the second pose.
|
||||
|
||||
Returns:
|
||||
A 6D numpy array concatenating translation and rotation differences.
|
||||
First 3 elements are the translational difference (position).
|
||||
Last 3 elements are the rotational difference in axis-angle representation.
|
||||
"""
|
||||
rot1 = pose1[:3, :3]
|
||||
rot2 = pose2[:3, :3]
|
||||
|
||||
translation_diff = pose1[:3, 3] - pose2[:3, 3]
|
||||
|
||||
# Calculate rotational difference using scipy's Rotation library
|
||||
rot_diff = Rotation.from_matrix(rot1 @ rot2.T)
|
||||
rotation_diff = rot_diff.as_rotvec() # Axis-angle representation
|
||||
|
||||
return np.concatenate([translation_diff, rotation_diff])
|
||||
|
||||
|
||||
def se3_error(target_pose: NDArray[np.float32], current_pose: NDArray[np.float32]) -> NDArray[np.float32]:
|
||||
pos_error = target_pose[:3, 3] - current_pose[:3, 3]
|
||||
|
||||
rot_target = target_pose[:3, :3]
|
||||
rot_current = current_pose[:3, :3]
|
||||
rot_error_mat = rot_target @ rot_current.T
|
||||
rot_error = Rotation.from_matrix(rot_error_mat).as_rotvec()
|
||||
|
||||
return np.concatenate([pos_error, rot_error])
|
||||
|
||||
|
||||
class RobotKinematics:
|
||||
"""Robot kinematics class supporting multiple robot models."""
|
||||
|
||||
# Robot measurements dictionary
|
||||
ROBOT_MEASUREMENTS = {
|
||||
"koch": {
|
||||
"gripper": [0.239, -0.001, 0.024],
|
||||
"wrist": [0.209, 0, 0.024],
|
||||
"forearm": [0.108, 0, 0.02],
|
||||
"humerus": [0, 0, 0.036],
|
||||
"shoulder": [0, 0, 0],
|
||||
"base": [0, 0, 0.02],
|
||||
},
|
||||
"moss": {
|
||||
"gripper": [0.246, 0.013, 0.111],
|
||||
"wrist": [0.245, 0.002, 0.064],
|
||||
"forearm": [0.122, 0, 0.064],
|
||||
"humerus": [0.001, 0.001, 0.063],
|
||||
"shoulder": [0, 0, 0],
|
||||
"base": [0, 0, 0.02],
|
||||
},
|
||||
"so_old_calibration": {
|
||||
"gripper": [0.320, 0, 0.050],
|
||||
"wrist": [0.278, 0, 0.050],
|
||||
"forearm": [0.143, 0, 0.044],
|
||||
"humerus": [0.031, 0, 0.072],
|
||||
"shoulder": [0, 0, 0],
|
||||
"base": [0, 0, 0.02],
|
||||
},
|
||||
"so_new_calibration": {
|
||||
"gripper": [0.33, 0.0, 0.285],
|
||||
"wrist": [0.30, 0.0, 0.267],
|
||||
"forearm": [0.25, 0.0, 0.266],
|
||||
"humerus": [0.06, 0.0, 0.264],
|
||||
"shoulder": [0.0, 0.0, 0.238],
|
||||
"base": [0.0, 0.0, 0.12],
|
||||
},
|
||||
}
|
||||
|
||||
def __init__(self, robot_type: str = "so100"):
|
||||
"""Initialize kinematics for the specified robot type.
|
||||
|
||||
Args:
|
||||
robot_type: String specifying the robot model ("koch", "so100", or "moss")
|
||||
"""
|
||||
if robot_type not in self.ROBOT_MEASUREMENTS:
|
||||
raise ValueError(
|
||||
f"Unknown robot type: {robot_type}. Available types: {list(self.ROBOT_MEASUREMENTS.keys())}"
|
||||
)
|
||||
|
||||
self.robot_type = robot_type
|
||||
self.measurements = self.ROBOT_MEASUREMENTS[robot_type]
|
||||
|
||||
# Initialize all transformation matrices and screw axes
|
||||
self._setup_transforms()
|
||||
|
||||
def _create_translation_matrix(
|
||||
self, x: float = 0.0, y: float = 0.0, z: float = 0.0
|
||||
) -> NDArray[np.float32]:
|
||||
"""Create a 4x4 translation matrix."""
|
||||
return np.array([[1, 0, 0, x], [0, 1, 0, y], [0, 0, 1, z], [0, 0, 0, 1]])
|
||||
|
||||
def _setup_transforms(self):
|
||||
"""Setup all transformation matrices and screw axes for the robot."""
|
||||
# Set up rotation matrices (constant across robot types)
|
||||
|
||||
# Gripper orientation
|
||||
self.gripper_X0 = np.array(
|
||||
[
|
||||
[1, 0, 0, 0],
|
||||
[0, 0, 1, 0],
|
||||
[0, -1, 0, 0],
|
||||
[0, 0, 0, 1],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Wrist orientation
|
||||
self.wrist_X0 = np.array(
|
||||
[
|
||||
[0, -1, 0, 0],
|
||||
[1, 0, 0, 0],
|
||||
[0, 0, 1, 0],
|
||||
[0, 0, 0, 1],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Base orientation
|
||||
self.base_X0 = np.array(
|
||||
[
|
||||
[0, 0, 1, 0],
|
||||
[1, 0, 0, 0],
|
||||
[0, 1, 0, 0],
|
||||
[0, 0, 0, 1],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Gripper
|
||||
# Screw axis of gripper frame wrt base frame
|
||||
self.S_BG = np.array(
|
||||
[
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
self.measurements["gripper"][2],
|
||||
-self.measurements["gripper"][1],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Gripper origin to centroid transform
|
||||
self.X_GoGc = self._create_translation_matrix(x=0.07)
|
||||
|
||||
# Gripper origin to tip transform
|
||||
self.X_GoGt = self._create_translation_matrix(x=0.12)
|
||||
|
||||
# 0-position gripper frame pose wrt base
|
||||
self.X_BoGo = self._create_translation_matrix(
|
||||
x=self.measurements["gripper"][0],
|
||||
y=self.measurements["gripper"][1],
|
||||
z=self.measurements["gripper"][2],
|
||||
)
|
||||
|
||||
# Wrist
|
||||
# Screw axis of wrist frame wrt base frame
|
||||
self.S_BR = np.array(
|
||||
[0, 1, 0, -self.measurements["wrist"][2], 0, self.measurements["wrist"][0]], dtype=np.float32
|
||||
)
|
||||
|
||||
# 0-position origin to centroid transform
|
||||
self.X_RoRc = self._create_translation_matrix(x=0.0035, y=-0.002)
|
||||
|
||||
# 0-position wrist frame pose wrt base
|
||||
self.X_BR = self._create_translation_matrix(
|
||||
x=self.measurements["wrist"][0],
|
||||
y=self.measurements["wrist"][1],
|
||||
z=self.measurements["wrist"][2],
|
||||
)
|
||||
|
||||
# Forearm
|
||||
# Screw axis of forearm frame wrt base frame
|
||||
self.S_BF = np.array(
|
||||
[
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
-self.measurements["forearm"][2],
|
||||
0,
|
||||
self.measurements["forearm"][0],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Forearm origin + centroid transform
|
||||
self.X_ForearmFc = self._create_translation_matrix(x=0.036)
|
||||
|
||||
# 0-position forearm frame pose wrt base
|
||||
self.X_BF = self._create_translation_matrix(
|
||||
x=self.measurements["forearm"][0],
|
||||
y=self.measurements["forearm"][1],
|
||||
z=self.measurements["forearm"][2],
|
||||
)
|
||||
|
||||
# Humerus
|
||||
# Screw axis of humerus frame wrt base frame
|
||||
self.S_BH = np.array(
|
||||
[
|
||||
0,
|
||||
-1,
|
||||
0,
|
||||
self.measurements["humerus"][2],
|
||||
0,
|
||||
-self.measurements["humerus"][0],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Humerus origin to centroid transform
|
||||
self.X_HoHc = self._create_translation_matrix(x=0.0475)
|
||||
|
||||
# 0-position humerus frame pose wrt base
|
||||
self.X_BH = self._create_translation_matrix(
|
||||
x=self.measurements["humerus"][0],
|
||||
y=self.measurements["humerus"][1],
|
||||
z=self.measurements["humerus"][2],
|
||||
)
|
||||
|
||||
# Shoulder
|
||||
# Screw axis of shoulder frame wrt Base frame
|
||||
self.S_BS = np.array([0, 0, -1, 0, 0, 0], dtype=np.float32)
|
||||
|
||||
# Shoulder origin to centroid transform
|
||||
self.X_SoSc = self._create_translation_matrix(x=-0.017, z=0.0235)
|
||||
|
||||
# 0-position shoulder frame pose wrt base
|
||||
self.X_BS = self._create_translation_matrix(
|
||||
x=self.measurements["shoulder"][0],
|
||||
y=self.measurements["shoulder"][1],
|
||||
z=self.measurements["shoulder"][2],
|
||||
)
|
||||
|
||||
# Base
|
||||
# Base origin to centroid transform
|
||||
self.X_BoBc = self._create_translation_matrix(y=0.015)
|
||||
|
||||
# World to base transform
|
||||
self.X_WoBo = self._create_translation_matrix(
|
||||
x=self.measurements["base"][0],
|
||||
y=self.measurements["base"][1],
|
||||
z=self.measurements["base"][2],
|
||||
)
|
||||
|
||||
# Pre-compute gripper post-multiplication matrix
|
||||
self._fk_gripper_post = self.X_GoGc @ self.X_BoGo @ self.gripper_X0
|
||||
|
||||
def forward_kinematics(
|
||||
self,
|
||||
robot_pos_deg: NDArray[np.float32],
|
||||
frame: str = "gripper_tip",
|
||||
) -> NDArray[np.float32]:
|
||||
"""Generic forward kinematics.
|
||||
|
||||
Args:
|
||||
robot_pos_deg: Joint positions in degrees. Can be ``None`` when
|
||||
computing the *base* frame as it does not depend on joint
|
||||
angles.
|
||||
frame: Target frame. One of
|
||||
``{"base", "shoulder", "humerus", "forearm", "wrist", "gripper", "gripper_tip"}``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
NDArray[np.float32]
|
||||
4×4 homogeneous transformation matrix of the requested frame
|
||||
expressed in the world coordinate system.
|
||||
"""
|
||||
frame = frame.lower()
|
||||
if frame not in {
|
||||
"base",
|
||||
"shoulder",
|
||||
"humerus",
|
||||
"forearm",
|
||||
"wrist",
|
||||
"gripper",
|
||||
"gripper_tip",
|
||||
}:
|
||||
raise ValueError(
|
||||
f"Unknown frame '{frame}'. Valid options are base, shoulder, humerus, forearm, wrist, gripper, gripper_tip."
|
||||
)
|
||||
|
||||
# Base frame does not rely on joint angles.
|
||||
if frame == "base":
|
||||
return self.X_WoBo @ self.X_BoBc @ self.base_X0
|
||||
|
||||
robot_pos_rad = robot_pos_deg / 180 * np.pi
|
||||
|
||||
# Extract joint angles (note the sign convention for shoulder lift).
|
||||
theta_shoulder_pan = robot_pos_rad[0]
|
||||
theta_shoulder_lift = -robot_pos_rad[1]
|
||||
theta_elbow_flex = robot_pos_rad[2]
|
||||
theta_wrist_flex = robot_pos_rad[3]
|
||||
theta_wrist_roll = robot_pos_rad[4]
|
||||
|
||||
# Start with the world-to-base transform; incrementally add successive links.
|
||||
transformation_matrix = self.X_WoBo @ screw_axis_to_transform(self.S_BS, theta_shoulder_pan)
|
||||
if frame == "shoulder":
|
||||
return transformation_matrix @ self.X_SoSc @ self.X_BS
|
||||
|
||||
transformation_matrix = transformation_matrix @ screw_axis_to_transform(
|
||||
self.S_BH, theta_shoulder_lift
|
||||
)
|
||||
if frame == "humerus":
|
||||
return transformation_matrix @ self.X_HoHc @ self.X_BH
|
||||
|
||||
transformation_matrix = transformation_matrix @ screw_axis_to_transform(self.S_BF, theta_elbow_flex)
|
||||
if frame == "forearm":
|
||||
return transformation_matrix @ self.X_ForearmFc @ self.X_BF
|
||||
|
||||
transformation_matrix = transformation_matrix @ screw_axis_to_transform(self.S_BR, theta_wrist_flex)
|
||||
if frame == "wrist":
|
||||
return transformation_matrix @ self.X_RoRc @ self.X_BR @ self.wrist_X0
|
||||
|
||||
transformation_matrix = transformation_matrix @ screw_axis_to_transform(self.S_BG, theta_wrist_roll)
|
||||
if frame == "gripper":
|
||||
return transformation_matrix @ self._fk_gripper_post
|
||||
else: # frame == "gripper_tip"
|
||||
return transformation_matrix @ self.X_GoGt @ self.X_BoGo @ self.gripper_X0
|
||||
|
||||
def compute_jacobian(
|
||||
self, robot_pos_deg: NDArray[np.float32], frame: str = "gripper_tip"
|
||||
) -> NDArray[np.float32]:
|
||||
"""Finite differences to compute the Jacobian.
|
||||
J(i, j) represents how the ith component of the end-effector's velocity changes wrt a small change
|
||||
in the jth joint's velocity.
|
||||
|
||||
Args:
|
||||
robot_pos_deg: Current joint positions in degrees
|
||||
fk_func: Forward kinematics function to use (defaults to fk_gripper)
|
||||
"""
|
||||
|
||||
eps = 1e-8
|
||||
jac = np.zeros(shape=(6, 5))
|
||||
delta = np.zeros(len(robot_pos_deg[:-1]), dtype=np.float64)
|
||||
for el_ix in range(len(robot_pos_deg[:-1])):
|
||||
delta *= 0
|
||||
delta[el_ix] = eps / 2
|
||||
sdot = (
|
||||
pose_difference_se3(
|
||||
self.forward_kinematics(robot_pos_deg[:-1] + delta, frame),
|
||||
self.forward_kinematics(robot_pos_deg[:-1] - delta, frame),
|
||||
)
|
||||
/ eps
|
||||
)
|
||||
jac[:, el_ix] = sdot
|
||||
return jac
|
||||
|
||||
def compute_positional_jacobian(
|
||||
self, robot_pos_deg: NDArray[np.float32], frame: str = "gripper_tip"
|
||||
) -> NDArray[np.float32]:
|
||||
"""Finite differences to compute the positional Jacobian.
|
||||
J(i, j) represents how the ith component of the end-effector's position changes wrt a small change
|
||||
in the jth joint's velocity.
|
||||
|
||||
Args:
|
||||
robot_pos_deg: Current joint positions in degrees
|
||||
fk_func: Forward kinematics function to use (defaults to fk_gripper)
|
||||
"""
|
||||
eps = 1e-8
|
||||
jac = np.zeros(shape=(3, 5))
|
||||
delta = np.zeros(len(robot_pos_deg[:-1]), dtype=np.float64)
|
||||
for el_ix in range(len(robot_pos_deg[:-1])):
|
||||
delta *= 0
|
||||
delta[el_ix] = eps / 2
|
||||
sdot = (
|
||||
self.forward_kinematics(robot_pos_deg[:-1] + delta, frame)[:3, 3]
|
||||
- self.forward_kinematics(robot_pos_deg[:-1] - delta, frame)[:3, 3]
|
||||
) / eps
|
||||
jac[:, el_ix] = sdot
|
||||
return jac
|
||||
|
||||
def ik(
|
||||
self,
|
||||
current_joint_pos: NDArray[np.float32],
|
||||
desired_ee_pose: NDArray[np.float32],
|
||||
position_only: bool = True,
|
||||
frame: str = "gripper_tip",
|
||||
max_iterations: int = 5,
|
||||
learning_rate: float = 1,
|
||||
) -> NDArray[np.float32]:
|
||||
"""Inverse kinematics using gradient descent.
|
||||
|
||||
Args:
|
||||
current_joint_state: Initial joint positions in degrees
|
||||
desired_ee_pose: Target end-effector pose as a 4x4 transformation matrix
|
||||
position_only: If True, only match end-effector position, not orientation
|
||||
frame: Target frame. One of
|
||||
``{"base", "shoulder", "humerus", "forearm", "wrist", "gripper", "gripper_tip"}``.
|
||||
max_iterations: Maximum number of iterations to run
|
||||
learning_rate: Learning rate for gradient descent
|
||||
|
||||
Returns:
|
||||
Joint positions in degrees that achieve the desired end-effector pose
|
||||
"""
|
||||
# Do gradient descent.
|
||||
current_joint_state = current_joint_pos.copy()
|
||||
for _ in range(max_iterations):
|
||||
current_ee_pose = self.forward_kinematics(current_joint_state, frame)
|
||||
if not position_only:
|
||||
error = se3_error(desired_ee_pose, current_ee_pose)
|
||||
jac = self.compute_jacobian(current_joint_state, frame)
|
||||
else:
|
||||
error = desired_ee_pose[:3, 3] - current_ee_pose[:3, 3]
|
||||
jac = self.compute_positional_jacobian(current_joint_state, frame)
|
||||
delta_angles = np.linalg.pinv(jac) @ error
|
||||
current_joint_state[:-1] += learning_rate * delta_angles
|
||||
|
||||
if np.linalg.norm(error) < 5e-3:
|
||||
return current_joint_state
|
||||
return current_joint_state
|
||||
@@ -1,45 +0,0 @@
|
||||
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
# NO CHECKED-IN PROTOBUF GENCODE
|
||||
# source: lerobot/common/transport/services.proto
|
||||
# Protobuf Python Version: 5.29.0
|
||||
"""Generated protocol buffer code."""
|
||||
from google.protobuf import descriptor as _descriptor
|
||||
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||
from google.protobuf import runtime_version as _runtime_version
|
||||
from google.protobuf import symbol_database as _symbol_database
|
||||
from google.protobuf.internal import builder as _builder
|
||||
_runtime_version.ValidateProtobufRuntimeVersion(
|
||||
_runtime_version.Domain.PUBLIC,
|
||||
5,
|
||||
29,
|
||||
0,
|
||||
'',
|
||||
'lerobot/common/transport/services.proto'
|
||||
)
|
||||
# @@protoc_insertion_point(imports)
|
||||
|
||||
_sym_db = _symbol_database.Default()
|
||||
|
||||
|
||||
|
||||
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\'lerobot/common/transport/services.proto\x12\ttransport\"L\n\nTransition\x12\x30\n\x0etransfer_state\x18\x01 \x01(\x0e\x32\x18.transport.TransferState\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\"L\n\nParameters\x12\x30\n\x0etransfer_state\x18\x01 \x01(\x0e\x32\x18.transport.TransferState\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\"T\n\x12InteractionMessage\x12\x30\n\x0etransfer_state\x18\x01 \x01(\x0e\x32\x18.transport.TransferState\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\"\x07\n\x05\x45mpty*`\n\rTransferState\x12\x14\n\x10TRANSFER_UNKNOWN\x10\x00\x12\x12\n\x0eTRANSFER_BEGIN\x10\x01\x12\x13\n\x0fTRANSFER_MIDDLE\x10\x02\x12\x10\n\x0cTRANSFER_END\x10\x03\x32\x81\x02\n\x0eLearnerService\x12=\n\x10StreamParameters\x12\x10.transport.Empty\x1a\x15.transport.Parameters0\x01\x12<\n\x0fSendTransitions\x12\x15.transport.Transition\x1a\x10.transport.Empty(\x01\x12\x45\n\x10SendInteractions\x12\x1d.transport.InteractionMessage\x1a\x10.transport.Empty(\x01\x12+\n\x05Ready\x12\x10.transport.Empty\x1a\x10.transport.Emptyb\x06proto3')
|
||||
|
||||
_globals = globals()
|
||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
||||
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'lerobot.common.transport.services_pb2', _globals)
|
||||
if not _descriptor._USE_C_DESCRIPTORS:
|
||||
DESCRIPTOR._loaded_options = None
|
||||
_globals['_TRANSFERSTATE']._serialized_start=305
|
||||
_globals['_TRANSFERSTATE']._serialized_end=401
|
||||
_globals['_TRANSITION']._serialized_start=54
|
||||
_globals['_TRANSITION']._serialized_end=130
|
||||
_globals['_PARAMETERS']._serialized_start=132
|
||||
_globals['_PARAMETERS']._serialized_end=208
|
||||
_globals['_INTERACTIONMESSAGE']._serialized_start=210
|
||||
_globals['_INTERACTIONMESSAGE']._serialized_end=294
|
||||
_globals['_EMPTY']._serialized_start=296
|
||||
_globals['_EMPTY']._serialized_end=303
|
||||
_globals['_LEARNERSERVICE']._serialized_start=404
|
||||
_globals['_LEARNERSERVICE']._serialized_end=661
|
||||
# @@protoc_insertion_point(module_scope)
|
||||
@@ -1,71 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Once you have trained a policy with our training script (lerobot/scripts/train.py), use this script to push it
|
||||
to the hub.
|
||||
|
||||
Example:
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/push_pretrained.py \
|
||||
--pretrained_path=outputs/train/act_aloha_sim_transfer_cube_human/checkpoints/last/pretrained_model \
|
||||
--repo_id=lerobot/act_aloha_sim_transfer_cube_human
|
||||
```
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import draccus
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
|
||||
@dataclass
|
||||
class PushPreTrainedConfig:
|
||||
pretrained_path: Path
|
||||
repo_id: str
|
||||
branch: str | None = None
|
||||
private: bool = False
|
||||
exist_ok: bool = False
|
||||
|
||||
|
||||
@draccus.wrap()
|
||||
def main(cfg: PushPreTrainedConfig):
|
||||
hub_api = HfApi()
|
||||
hub_api.create_repo(
|
||||
repo_id=cfg.repo_id,
|
||||
private=cfg.private,
|
||||
repo_type="model",
|
||||
exist_ok=cfg.exist_ok,
|
||||
)
|
||||
if cfg.branch:
|
||||
hub_api.create_branch(
|
||||
repo_id=cfg.repo_id,
|
||||
branch=cfg.branch,
|
||||
repo_type="model",
|
||||
exist_ok=cfg.exist_ok,
|
||||
)
|
||||
|
||||
hub_api.upload_folder(
|
||||
repo_id=cfg.repo_id,
|
||||
folder_path=cfg.pretrained_path,
|
||||
repo_type="model",
|
||||
revision=cfg.branch,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -68,7 +68,6 @@ dependencies = [
|
||||
"pyserial>=3.5",
|
||||
"pyzmq>=26.2.1",
|
||||
"rerun-sdk>=0.21.0",
|
||||
"scipy>=1.14.0",
|
||||
"termcolor>=2.4.0",
|
||||
"torch>=2.2.1",
|
||||
"torchcodec>=0.2.1; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')",
|
||||
@@ -87,11 +86,12 @@ dora = [
|
||||
dynamixel = ["dynamixel-sdk>=3.7.31"]
|
||||
feetech = ["feetech-servo-sdk>=1.0.0"]
|
||||
gamepad = ["pygame>=2.5.1", "hidapi>=0.14.0"]
|
||||
kinematics = ["placo>=0.9.6"]
|
||||
intelrealsense = [
|
||||
"pyrealsense2>=2.55.1.6486 ; sys_platform != 'darwin'",
|
||||
"pyrealsense2-macosx>=2.54 ; sys_platform == 'darwin'",
|
||||
]
|
||||
pi0 = ["transformers>=4.48.0"]
|
||||
pi0 = ["transformers>=4.50.3"]
|
||||
smolvla = ["transformers>=4.50.3", "num2words>=0.5.14", "accelerate>=1.7.0", "safetensors>=0.4.3"]
|
||||
pusht = ["gym-pusht>=0.1.5 ; python_version < '4.0'"]
|
||||
stretch = [
|
||||
@@ -100,13 +100,16 @@ stretch = [
|
||||
"pyrealsense2>=2.55.1.6486 ; sys_platform != 'darwin'"
|
||||
]
|
||||
test = ["pytest>=8.1.0", "pytest-timeout>=2.4.0", "pytest-cov>=5.0.0", "pyserial>=3.5", "mock-serial>=0.0.1 ; sys_platform != 'win32'"]
|
||||
hilserl = ["transformers>=4.48", "gym-hil>=0.1.8", "protobuf>=5.29.3", "grpcio==1.71.0"]
|
||||
hilserl = ["transformers>=4.50.3", "gym-hil>=0.1.9", "protobuf>=5.29.3", "grpcio==1.71.0", "placo>=0.9.6"]
|
||||
umi = ["imagecodecs>=2024.1.1"]
|
||||
video_benchmark = ["scikit-image>=0.23.2", "pandas>=2.2.2"]
|
||||
xarm = ["gym-xarm>=0.1.1 ; python_version < '4.0'"]
|
||||
|
||||
[tool.poetry]
|
||||
requires-poetry = ">=2.1"
|
||||
packages = [
|
||||
{ include = "lerobot", from = "src" }
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 110
|
||||
@@ -123,10 +126,10 @@ select = ["E4", "E7", "E9", "F", "I", "N", "B", "C4", "SIM"]
|
||||
exclude_dirs = [
|
||||
"tests",
|
||||
"benchmarks",
|
||||
"lerobot/common/datasets/push_dataset_to_hub",
|
||||
"lerobot/common/datasets/v2/convert_dataset_v1_to_v2",
|
||||
"lerobot/common/policies/pi0/conversion_scripts",
|
||||
"lerobot/scripts/push_dataset_to_hub.py",
|
||||
"src/lerobot/datasets/push_dataset_to_hub",
|
||||
"src/lerobot/datasets/v2/convert_dataset_v1_to_v2",
|
||||
"src/lerobot/policies/pi0/conversion_scripts",
|
||||
"src/lerobot/scripts/push_dataset_to_hub.py",
|
||||
]
|
||||
skips = ["B101", "B311", "B404", "B603"]
|
||||
|
||||
|
||||
@@ -167,10 +167,10 @@ available_datasets = sorted(
|
||||
set(itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets))
|
||||
)
|
||||
|
||||
# lists all available policies from `lerobot/common/policies`
|
||||
# lists all available policies from `lerobot/policies`
|
||||
available_policies = ["act", "diffusion", "tdmpc", "vqbet"]
|
||||
|
||||
# lists all available robots from `lerobot/common/robot_devices/robots`
|
||||
# lists all available robots from `lerobot/robot_devices/robots`
|
||||
available_robots = [
|
||||
"koch",
|
||||
"koch_bimanual",
|
||||
@@ -179,13 +179,13 @@ available_robots = [
|
||||
"so101",
|
||||
]
|
||||
|
||||
# lists all available cameras from `lerobot/common/robot_devices/cameras`
|
||||
# lists all available cameras from `lerobot/robot_devices/cameras`
|
||||
available_cameras = [
|
||||
"opencv",
|
||||
"intelrealsense",
|
||||
]
|
||||
|
||||
# lists all available motors from `lerobot/common/robot_devices/motors`
|
||||
# lists all available motors from `lerobot/robot_devices/motors`
|
||||
available_motors = [
|
||||
"dynamixel",
|
||||
"feetech",
|
||||
@@ -31,9 +31,9 @@ from pprint import pformat
|
||||
|
||||
import draccus
|
||||
|
||||
from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig # noqa: F401
|
||||
from lerobot.common.cameras.realsense.configuration_realsense import RealSenseCameraConfig # noqa: F401
|
||||
from lerobot.common.robots import ( # noqa: F401
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig # noqa: F401
|
||||
from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig # noqa: F401
|
||||
from lerobot.robots import ( # noqa: F401
|
||||
Robot,
|
||||
RobotConfig,
|
||||
koch_follower,
|
||||
@@ -42,7 +42,7 @@ from lerobot.common.robots import ( # noqa: F401
|
||||
so100_follower,
|
||||
so101_follower,
|
||||
)
|
||||
from lerobot.common.teleoperators import ( # noqa: F401
|
||||
from lerobot.teleoperators import ( # noqa: F401
|
||||
Teleoperator,
|
||||
TeleoperatorConfig,
|
||||
koch_leader,
|
||||
@@ -50,7 +50,7 @@ from lerobot.common.teleoperators import ( # noqa: F401
|
||||
so100_leader,
|
||||
so101_leader,
|
||||
)
|
||||
from lerobot.common.utils.utils import init_logging
|
||||
from lerobot.utils.utils import init_logging
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -27,7 +27,7 @@ from typing import Any, Dict, List
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
|
||||
from lerobot.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
|
||||
|
||||
from ..camera import Camera
|
||||
from ..utils import get_cv2_backend, get_cv2_rotation
|
||||
@@ -64,8 +64,8 @@ class OpenCVCamera(Camera):
|
||||
|
||||
Example:
|
||||
```python
|
||||
from lerobot.common.cameras.opencv import OpenCVCamera
|
||||
from lerobot.common.cameras.configuration_opencv import OpenCVCameraConfig, ColorMode, Cv2Rotation
|
||||
from lerobot.cameras.opencv import OpenCVCamera
|
||||
from lerobot.cameras.configuration_opencv import OpenCVCameraConfig, ColorMode, Cv2Rotation
|
||||
|
||||
# Basic usage with camera index 0
|
||||
config = OpenCVCameraConfig(index_or_path=0)
|
||||
@@ -29,7 +29,7 @@ try:
|
||||
except Exception as e:
|
||||
logging.info(f"Could not import realsense: {e}")
|
||||
|
||||
from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
|
||||
from lerobot.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
|
||||
|
||||
from ..camera import Camera
|
||||
from ..configs import ColorMode
|
||||
@@ -63,8 +63,8 @@ class RealSenseCamera(Camera):
|
||||
|
||||
Example:
|
||||
```python
|
||||
from lerobot.common.cameras.realsense import RealSenseCamera, RealSenseCameraConfig
|
||||
from lerobot.common.cameras import ColorMode, Cv2Rotation
|
||||
from lerobot.cameras.realsense import RealSenseCamera, RealSenseCameraConfig
|
||||
from lerobot.cameras import ColorMode, Cv2Rotation
|
||||
|
||||
# Basic usage with serial number
|
||||
config = RealSenseCameraConfig(serial_number_or_name="0123456789") # Replace with actual SN
|
||||
3425
src/lerobot/configs/datasets.py
Normal file
3425
src/lerobot/configs/datasets.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -16,11 +16,11 @@
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from lerobot.common import (
|
||||
from lerobot import (
|
||||
policies, # noqa: F401
|
||||
)
|
||||
from lerobot.common.datasets.transforms import ImageTransformsConfig
|
||||
from lerobot.common.datasets.video_utils import get_safe_default_codec
|
||||
from lerobot.datasets.transforms import ImageTransformsConfig
|
||||
from lerobot.datasets.video_utils import get_safe_default_codec
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -37,6 +37,21 @@ class DatasetConfig:
|
||||
revision: str | None = None
|
||||
use_imagenet_stats: bool = True
|
||||
video_backend: str = field(default_factory=get_safe_default_codec)
|
||||
# Multi-dataset support
|
||||
sampling_weights: str | None = None
|
||||
max_action_dim: int | None = None
|
||||
max_state_dim: int | None = None
|
||||
max_num_images: int | None = None
|
||||
max_image_dim: int | None = None
|
||||
train_on_all_features: bool = False
|
||||
features_version: int = 0
|
||||
discard_first_n_frames: int = 0
|
||||
min_fps: int = 1
|
||||
max_fps: int = 100
|
||||
discard_first_idle_frames: bool = False
|
||||
motion_threshold: float = 5e-2
|
||||
motion_window_size: int = 10
|
||||
motion_buffer: int = 3
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -17,7 +17,7 @@ import logging
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from lerobot.common import envs, policies # noqa: F401
|
||||
from lerobot import envs, policies # noqa: F401
|
||||
from lerobot.configs import parser
|
||||
from lerobot.configs.default import EvalConfig
|
||||
from lerobot.configs.policies import PreTrainedConfig
|
||||
@@ -22,7 +22,7 @@ from typing import Sequence
|
||||
|
||||
import draccus
|
||||
|
||||
from lerobot.common.utils.utils import has_method
|
||||
from lerobot.utils.utils import has_method
|
||||
|
||||
PATH_KEY = "path"
|
||||
PLUGIN_DISCOVERY_SUFFIX = "discover_packages_path"
|
||||
@@ -23,11 +23,11 @@ from huggingface_hub import hf_hub_download
|
||||
from huggingface_hub.constants import CONFIG_NAME
|
||||
from huggingface_hub.errors import HfHubHTTPError
|
||||
|
||||
from lerobot.common.optim.optimizers import OptimizerConfig
|
||||
from lerobot.common.optim.schedulers import LRSchedulerConfig
|
||||
from lerobot.common.utils.hub import HubMixin
|
||||
from lerobot.common.utils.utils import auto_select_torch_device, is_amp_available, is_torch_device_available
|
||||
from lerobot.configs.types import FeatureType, NormalizationMode, PolicyFeature
|
||||
from lerobot.optim.optimizers import OptimizerConfig
|
||||
from lerobot.optim.schedulers import LRSchedulerConfig
|
||||
from lerobot.utils.hub import HubMixin
|
||||
from lerobot.utils.utils import auto_select_torch_device, is_amp_available, is_torch_device_available
|
||||
|
||||
# Generic variable that is either PreTrainedConfig or a subclass thereof
|
||||
T = TypeVar("T", bound="PreTrainedConfig")
|
||||
@@ -60,6 +60,16 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
|
||||
# automatic gradient scaling is used.
|
||||
use_amp: bool = False
|
||||
|
||||
push_to_hub: bool = True
|
||||
repo_id: str | None = None
|
||||
|
||||
# Upload on private repository on the Hugging Face hub.
|
||||
private: bool | None = None
|
||||
# Add tags to your policy on the hub.
|
||||
tags: list[str] | None = None
|
||||
# Add tags to your policy on the hub.
|
||||
license: str | None = None
|
||||
|
||||
def __post_init__(self):
|
||||
self.pretrained_path = None
|
||||
if not self.device or not is_torch_device_available(self.device):
|
||||
@@ -21,13 +21,13 @@ import draccus
|
||||
from huggingface_hub import hf_hub_download
|
||||
from huggingface_hub.errors import HfHubHTTPError
|
||||
|
||||
from lerobot.common import envs
|
||||
from lerobot.common.optim import OptimizerConfig
|
||||
from lerobot.common.optim.schedulers import LRSchedulerConfig
|
||||
from lerobot.common.utils.hub import HubMixin
|
||||
from lerobot import envs
|
||||
from lerobot.configs import parser
|
||||
from lerobot.configs.default import DatasetConfig, EvalConfig, WandBConfig
|
||||
from lerobot.configs.policies import PreTrainedConfig
|
||||
from lerobot.optim import OptimizerConfig
|
||||
from lerobot.optim.schedulers import LRSchedulerConfig
|
||||
from lerobot.utils.hub import HubMixin
|
||||
|
||||
TRAIN_CONFIG_NAME = "train_config.json"
|
||||
|
||||
@@ -116,6 +116,11 @@ class TrainPipelineConfig(HubMixin):
|
||||
self.optimizer = self.policy.get_optimizer_preset()
|
||||
self.scheduler = self.policy.get_scheduler_preset()
|
||||
|
||||
if self.policy.push_to_hub and not self.policy.repo_id:
|
||||
raise ValueError(
|
||||
"'policy.repo_id' argument missing. Please specify it to push the model to the hub."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def __get_path_fields__(cls) -> list[str]:
|
||||
"""This enables the parser to load config from the policy using `--policy.path=local/dir`"""
|
||||
@@ -22,9 +22,14 @@ OBS_STATE = "observation.state"
|
||||
OBS_IMAGE = "observation.image"
|
||||
OBS_IMAGES = "observation.images"
|
||||
ACTION = "action"
|
||||
OBS_IMAGE_2 = "observation.image2"
|
||||
OBS_IMAGE_3 = "observation.image3"
|
||||
OBS_IMAGE_4 = "observation.image4"
|
||||
REWARD = "next.reward"
|
||||
|
||||
ROBOTS = "robots"
|
||||
TASK = "task"
|
||||
ROBOT_TYPE = "robot_type"
|
||||
TELEOPERATORS = "teleoperators"
|
||||
|
||||
# files & directories
|
||||
@@ -20,7 +20,7 @@ The dataset you requested ({repo_id}) is in {version} format.
|
||||
We introduced a new format since v2.0 which is not backward compatible with v1.x.
|
||||
Please, use our conversion script. Modify the following command with your own task description:
|
||||
```
|
||||
python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \\
|
||||
python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \\
|
||||
--repo-id {repo_id} \\
|
||||
--single-task "TASK DESCRIPTION." # <---- /!\\ Replace TASK DESCRIPTION /!\\
|
||||
```
|
||||
@@ -40,7 +40,7 @@ The dataset you requested ({repo_id}) is in {version} format.
|
||||
While current version of LeRobot is backward-compatible with it, the version of your dataset still uses global
|
||||
stats instead of per-episode stats. Update your dataset stats to the new format using this command:
|
||||
```
|
||||
python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py --repo-id={repo_id}
|
||||
python -m lerobot.datasets.v21.convert_dataset_v20_to_v21 --repo-id={repo_id}
|
||||
```
|
||||
|
||||
If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb)
|
||||
68
src/lerobot/datasets/collators.py
Normal file
68
src/lerobot/datasets/collators.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from typing import Dict, List
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch.utils.data.dataloader import default_collate
|
||||
|
||||
|
||||
def is_batch_need_padding(values: list[torch.Tensor], pad_dim: int = -1) -> int:
|
||||
return len(values[0].shape) > 0 # and len(set([v.shape[pad_dim] for v in values])) > 1
|
||||
|
||||
|
||||
def pad_tensor(
|
||||
tensor: torch.Tensor, max_size: int, pad_dim: int = -1, pad_value: float = 0.0
|
||||
) -> torch.Tensor:
|
||||
is_numpy = isinstance(tensor, np.ndarray)
|
||||
if is_numpy:
|
||||
tensor = torch.tensor(tensor)
|
||||
pad = max_size - tensor.shape[pad_dim]
|
||||
if pad > 0:
|
||||
pad_sizes = (0, pad) # pad right
|
||||
tensor = torch.nn.functional.pad(tensor, pad_sizes, value=pad_value)
|
||||
return tensor.numpy() if is_numpy else tensor
|
||||
|
||||
|
||||
def pad_list_of_tensors(
|
||||
tensors: List[torch.Tensor], pad_dim: int = -1, pad_value: float = 0.0
|
||||
) -> List[torch.Tensor]:
|
||||
max_size = max([v.shape[pad_dim] for v in tensors])
|
||||
return [pad_tensor(tensor, max_size, pad_dim=pad_dim, pad_value=pad_value) for tensor in tensors]
|
||||
|
||||
|
||||
def multidataset_collate_fn(
|
||||
batch: List[Dict[str, torch.Tensor]],
|
||||
pad_dim: int = -1,
|
||||
pad_value: float = 0.0,
|
||||
keys_to_max_dim: dict = {},
|
||||
) -> Dict[str, torch.Tensor]:
|
||||
"""
|
||||
Custom collate function to pad tensors with multiple dimensions.
|
||||
|
||||
Args:
|
||||
batch (List[Dict[str, torch.Tensor]]): List of dataset samples (each sample is a dictionary).
|
||||
|
||||
Returns:
|
||||
Dict[str, torch.Tensor]: Batch with padded tensors.
|
||||
"""
|
||||
batch_keys = batch[0].keys()
|
||||
collated_batch = [{} for _ in range(len(batch))]
|
||||
# FIXME(mshukor): pad to max shape per feature type
|
||||
for key in batch_keys:
|
||||
values = [sample[key] for sample in batch]
|
||||
if (
|
||||
key in keys_to_max_dim
|
||||
and isinstance(values[0], torch.Tensor)
|
||||
and is_batch_need_padding(values, pad_dim=pad_dim)
|
||||
and keys_to_max_dim[key] is not None
|
||||
):
|
||||
max_size = keys_to_max_dim[key]
|
||||
for i in range(len(batch)):
|
||||
collated_batch[i][key] = pad_tensor(
|
||||
batch[i][key], max_size, pad_dim=pad_dim, pad_value=pad_value
|
||||
)
|
||||
else:
|
||||
for i in range(len(batch)):
|
||||
collated_batch[i][key] = batch[i][key]
|
||||
collated_batch = default_collate(collated_batch)
|
||||
|
||||
return collated_batch
|
||||
@@ -15,7 +15,7 @@
|
||||
# limitations under the License.
|
||||
import numpy as np
|
||||
|
||||
from lerobot.common.datasets.utils import load_image_as_numpy
|
||||
from lerobot.datasets.utils import load_image_as_numpy
|
||||
|
||||
|
||||
def estimate_num_samples(
|
||||
@@ -125,9 +125,30 @@ def _assert_type_and_shape(stats_list: list[dict[str, dict]]):
|
||||
|
||||
def aggregate_feature_stats(stats_ft_list: list[dict[str, dict]]) -> dict[str, dict[str, np.ndarray]]:
|
||||
"""Aggregates stats for a single feature."""
|
||||
means = np.stack([s["mean"] for s in stats_ft_list])
|
||||
variances = np.stack([s["std"] ** 2 for s in stats_ft_list])
|
||||
counts = np.stack([s["count"] for s in stats_ft_list])
|
||||
# Filter out stats that don't have required keys
|
||||
valid_stats = []
|
||||
for s in stats_ft_list:
|
||||
if all(key in s for key in ["mean", "std", "count", "min", "max"]):
|
||||
valid_stats.append(s)
|
||||
else:
|
||||
# If count is missing, add it with a default value
|
||||
if "count" not in s:
|
||||
s["count"] = np.array([1]) # Default count
|
||||
valid_stats.append(s)
|
||||
|
||||
if not valid_stats:
|
||||
# If no valid stats, return empty stats
|
||||
return {
|
||||
"min": np.array([0]),
|
||||
"max": np.array([0]),
|
||||
"mean": np.array([0]),
|
||||
"std": np.array([0]),
|
||||
"count": np.array([0]),
|
||||
}
|
||||
|
||||
means = np.stack([s["mean"] for s in valid_stats])
|
||||
variances = np.stack([s["std"] ** 2 for s in valid_stats])
|
||||
counts = np.stack([s["count"] for s in valid_stats])
|
||||
total_count = counts.sum(axis=0)
|
||||
|
||||
# Prepare weighted mean by matching number of dimensions
|
||||
@@ -142,12 +163,13 @@ def aggregate_feature_stats(stats_ft_list: list[dict[str, dict]]) -> dict[str, d
|
||||
delta_means = means - total_mean
|
||||
weighted_variances = (variances + delta_means**2) * counts
|
||||
total_variance = weighted_variances.sum(axis=0) / total_count
|
||||
total_std = np.sqrt(total_variance)
|
||||
|
||||
return {
|
||||
"min": np.min(np.stack([s["min"] for s in stats_ft_list]), axis=0),
|
||||
"max": np.max(np.stack([s["max"] for s in stats_ft_list]), axis=0),
|
||||
"min": np.min(np.stack([s["min"] for s in valid_stats]), axis=0),
|
||||
"max": np.max(np.stack([s["max"] for s in valid_stats]), axis=0),
|
||||
"mean": total_mean,
|
||||
"std": np.sqrt(total_variance),
|
||||
"std": total_std,
|
||||
"count": total_count,
|
||||
}
|
||||
|
||||
@@ -18,20 +18,22 @@ from pprint import pformat
|
||||
|
||||
import torch
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import (
|
||||
from lerobot.configs.policies import PreTrainedConfig
|
||||
from lerobot.configs.train import TrainPipelineConfig
|
||||
from lerobot.datasets.lerobot_dataset import (
|
||||
LeRobotDataset,
|
||||
LeRobotDatasetMetadata,
|
||||
MultiLeRobotDataset,
|
||||
)
|
||||
from lerobot.common.datasets.transforms import ImageTransforms
|
||||
from lerobot.configs.policies import PreTrainedConfig
|
||||
from lerobot.configs.train import TrainPipelineConfig
|
||||
from lerobot.datasets.transforms import ImageTransforms
|
||||
|
||||
IMAGENET_STATS = {
|
||||
"mean": [[[0.485]], [[0.456]], [[0.406]]], # (c,1,1)
|
||||
"std": [[[0.229]], [[0.224]], [[0.225]]], # (c,1,1)
|
||||
}
|
||||
|
||||
from lerobot.datasets.utils_must import EPISODES_DATASET_MAPPING, FEATURE_KEYS_MAPPING
|
||||
|
||||
|
||||
def resolve_delta_timestamps(
|
||||
cfg: PreTrainedConfig, ds_meta: LeRobotDatasetMetadata
|
||||
@@ -81,37 +83,87 @@ def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDatas
|
||||
image_transforms = (
|
||||
ImageTransforms(cfg.dataset.image_transforms) if cfg.dataset.image_transforms.enable else None
|
||||
)
|
||||
|
||||
if isinstance(cfg.dataset.repo_id, str):
|
||||
if "," in cfg.dataset.repo_id:
|
||||
repo_id = cfg.dataset.repo_id.split(",")
|
||||
repo_id = [r for r in repo_id if r]
|
||||
else:
|
||||
repo_id = cfg.dataset.repo_id
|
||||
sampling_weights = cfg.dataset.sampling_weights.split(",") if cfg.dataset.sampling_weights else None
|
||||
feature_keys_mapping = FEATURE_KEYS_MAPPING
|
||||
if isinstance(repo_id, str):
|
||||
revision = getattr(cfg.dataset, "revision", None)
|
||||
ds_meta = LeRobotDatasetMetadata(
|
||||
cfg.dataset.repo_id, root=cfg.dataset.root, revision=cfg.dataset.revision
|
||||
cfg.dataset.repo_id,
|
||||
feature_keys_mapping=feature_keys_mapping,
|
||||
revision=revision,
|
||||
)
|
||||
delta_timestamps = resolve_delta_timestamps(cfg.policy, ds_meta)
|
||||
dataset = LeRobotDataset(
|
||||
cfg.dataset.repo_id,
|
||||
root=cfg.dataset.root,
|
||||
root=getattr(cfg.dataset, "root", None),
|
||||
episodes=cfg.dataset.episodes,
|
||||
delta_timestamps=delta_timestamps,
|
||||
image_transforms=image_transforms,
|
||||
revision=cfg.dataset.revision,
|
||||
revision=revision,
|
||||
video_backend=cfg.dataset.video_backend,
|
||||
download_videos=True,
|
||||
feature_keys_mapping=feature_keys_mapping,
|
||||
max_action_dim=cfg.dataset.max_action_dim,
|
||||
max_state_dim=cfg.dataset.max_state_dim,
|
||||
max_num_images=cfg.dataset.max_num_images,
|
||||
max_image_dim=cfg.dataset.max_image_dim,
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError("The MultiLeRobotDataset isn't supported for now.")
|
||||
delta_timestamps = {}
|
||||
episodes = {}
|
||||
for i in range(len(repo_id)):
|
||||
ds_meta = LeRobotDatasetMetadata(
|
||||
repo_id[i],
|
||||
feature_keys_mapping=feature_keys_mapping,
|
||||
) # FIXME(mshukor): ?
|
||||
delta_timestamps[repo_id[i]] = resolve_delta_timestamps(cfg.policy, ds_meta)
|
||||
episodes[repo_id[i]] = EPISODES_DATASET_MAPPING.get(repo_id[i], cfg.dataset.episodes)
|
||||
# training_features = TRAINING_FEATURES.get(cfg.dataset.features_version, None)
|
||||
# FIXME: (jadechoghari): check support for training features
|
||||
training_features = None
|
||||
dataset = MultiLeRobotDataset(
|
||||
cfg.dataset.repo_id,
|
||||
repo_id,
|
||||
# TODO(aliberts): add proper support for multi dataset
|
||||
# delta_timestamps=delta_timestamps,
|
||||
episodes=episodes,
|
||||
delta_timestamps=delta_timestamps,
|
||||
image_transforms=image_transforms,
|
||||
video_backend=cfg.dataset.video_backend,
|
||||
download_videos=True,
|
||||
sampling_weights=sampling_weights,
|
||||
feature_keys_mapping=feature_keys_mapping,
|
||||
max_action_dim=cfg.policy.max_action_dim,
|
||||
max_state_dim=cfg.policy.max_state_dim,
|
||||
max_num_images=cfg.dataset.max_num_images,
|
||||
max_image_dim=cfg.dataset.max_image_dim,
|
||||
train_on_all_features=cfg.dataset.train_on_all_features,
|
||||
training_features=training_features,
|
||||
discard_first_n_frames=cfg.dataset.discard_first_n_frames,
|
||||
min_fps=cfg.dataset.min_fps,
|
||||
max_fps=cfg.dataset.max_fps,
|
||||
discard_first_idle_frames=cfg.dataset.discard_first_idle_frames,
|
||||
motion_threshold=cfg.dataset.motion_threshold,
|
||||
motion_window_size=cfg.dataset.motion_window_size,
|
||||
motion_buffer=cfg.dataset.motion_buffer,
|
||||
)
|
||||
logging.info(
|
||||
"Multiple datasets were provided. Applied the following index mapping to the provided datasets: "
|
||||
f"{pformat(dataset.repo_id_to_index, indent=2)}"
|
||||
)
|
||||
|
||||
if cfg.dataset.use_imagenet_stats:
|
||||
# Initialize stats structure if it doesn't exist
|
||||
if dataset.meta.stats is None:
|
||||
dataset.meta.stats = {}
|
||||
|
||||
for key in dataset.meta.camera_keys:
|
||||
# Initialize stats for this camera key if it doesn't exist
|
||||
if key not in dataset.meta.stats or dataset.meta.stats[key] is None:
|
||||
dataset.meta.stats[key] = {}
|
||||
|
||||
for stats_type, stats in IMAGENET_STATS.items():
|
||||
dataset.meta.stats[key][stats_type] = torch.tensor(stats, dtype=torch.float32)
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
# limitations under the License.
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
@@ -30,10 +31,18 @@ from huggingface_hub import HfApi, snapshot_download
|
||||
from huggingface_hub.constants import REPOCARD_NAME
|
||||
from huggingface_hub.errors import RevisionNotFoundError
|
||||
|
||||
from lerobot.common.constants import HF_LEROBOT_HOME
|
||||
from lerobot.common.datasets.compute_stats import aggregate_stats, compute_episode_stats
|
||||
from lerobot.common.datasets.image_writer import AsyncImageWriter, write_image
|
||||
from lerobot.common.datasets.utils import (
|
||||
from lerobot.constants import (
|
||||
ACTION,
|
||||
HF_LEROBOT_HOME,
|
||||
OBS_ENV_STATE,
|
||||
OBS_STATE,
|
||||
)
|
||||
from lerobot.datasets.compute_stats import ( # aggregate_stats_per_robot_type,
|
||||
aggregate_stats,
|
||||
compute_episode_stats,
|
||||
)
|
||||
from lerobot.datasets.image_writer import AsyncImageWriter, write_image
|
||||
from lerobot.datasets.utils import (
|
||||
DEFAULT_FEATURES,
|
||||
DEFAULT_IMAGE_PATH,
|
||||
INFO_PATH,
|
||||
@@ -41,7 +50,6 @@ from lerobot.common.datasets.utils import (
|
||||
_validate_feature_names,
|
||||
append_jsonlines,
|
||||
backward_compatible_episodes_stats,
|
||||
check_delta_timestamps,
|
||||
check_timestamps_sync,
|
||||
check_version_compatibility,
|
||||
create_empty_dataset_info,
|
||||
@@ -58,14 +66,36 @@ from lerobot.common.datasets.utils import (
|
||||
load_info,
|
||||
load_stats,
|
||||
load_tasks,
|
||||
map_dict_keys,
|
||||
validate_episode_buffer,
|
||||
validate_frame,
|
||||
write_episode,
|
||||
write_episode_stats,
|
||||
write_info,
|
||||
write_json,
|
||||
# keep_datasets_with_the_same_features_per_robot_type,
|
||||
# map_dict_pad_keys,
|
||||
# keep_datasets_with_valid_fps,
|
||||
# find_start_of_motion,
|
||||
)
|
||||
from lerobot.common.datasets.video_utils import (
|
||||
|
||||
# mustafa stuff here
|
||||
from lerobot.datasets.utils_must import (
|
||||
OBS_IMAGE,
|
||||
OBS_IMAGE_2,
|
||||
OBS_IMAGE_3,
|
||||
ROBOT_TYPE_KEYS_MAPPING,
|
||||
TASKS_KEYS_MAPPING,
|
||||
aggregate_stats_per_robot_type,
|
||||
create_padded_features,
|
||||
find_start_of_motion,
|
||||
keep_datasets_with_the_same_features_per_robot_type,
|
||||
keep_datasets_with_valid_fps,
|
||||
map_dict_keys,
|
||||
pad_tensor,
|
||||
reshape_features_to_max_dim,
|
||||
)
|
||||
from lerobot.datasets.video_utils import (
|
||||
VideoFrame,
|
||||
decode_video_frames,
|
||||
encode_video_frames,
|
||||
@@ -74,6 +104,15 @@ from lerobot.common.datasets.video_utils import (
|
||||
)
|
||||
|
||||
CODEBASE_VERSION = "v2.1"
|
||||
LEROBOT_HOME = Path(os.getenv("LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser()
|
||||
|
||||
|
||||
def find_start_of_motion(velocities, window_size, threshold, motion_buffer):
|
||||
for t in range(len(velocities) - window_size):
|
||||
window_mean = velocities[t : t + window_size].mean()
|
||||
if window_mean > threshold:
|
||||
return max(0, t - motion_buffer) # include slight context before motion
|
||||
return 0
|
||||
|
||||
|
||||
class LeRobotDatasetMetadata:
|
||||
@@ -81,10 +120,13 @@ class LeRobotDatasetMetadata:
|
||||
self,
|
||||
repo_id: str,
|
||||
root: str | Path | None = None,
|
||||
local_files_only: bool = False,
|
||||
feature_keys_mapping: dict[str, str] | None = None,
|
||||
revision: str | None = None,
|
||||
force_cache_sync: bool = False,
|
||||
):
|
||||
self.repo_id = repo_id
|
||||
self.local_files_only = local_files_only
|
||||
self.revision = revision if revision else CODEBASE_VERSION
|
||||
self.root = Path(root) if root is not None else HF_LEROBOT_HOME / repo_id
|
||||
|
||||
@@ -99,18 +141,27 @@ class LeRobotDatasetMetadata:
|
||||
(self.root / "meta").mkdir(exist_ok=True, parents=True)
|
||||
self.pull_from_repo(allow_patterns="meta/")
|
||||
self.load_metadata()
|
||||
# added by mshukor
|
||||
self.feature_keys_mapping = feature_keys_mapping.get(repo_id, None) if feature_keys_mapping else None
|
||||
self.inverse_feature_keys_mapping = (
|
||||
{v: k for k, v in self.feature_keys_mapping.items() if v} if self.feature_keys_mapping else {}
|
||||
)
|
||||
self.info["features"] = map_dict_keys(
|
||||
self.info["features"], feature_keys_mapping=self.feature_keys_mapping
|
||||
)
|
||||
|
||||
def load_metadata(self):
|
||||
self.info = load_info(self.root)
|
||||
check_version_compatibility(self.repo_id, self._version, CODEBASE_VERSION)
|
||||
self.tasks, self.task_to_task_index = load_tasks(self.root)
|
||||
self.episodes = load_episodes(self.root)
|
||||
if self._version < packaging.version.parse("v2.1"):
|
||||
self.stats = load_stats(self.root)
|
||||
self.episodes_stats = backward_compatible_episodes_stats(self.stats, self.episodes)
|
||||
else:
|
||||
self.episodes_stats = load_episodes_stats(self.root)
|
||||
self.stats = aggregate_stats(list(self.episodes_stats.values()))
|
||||
# Force all datasets to use v2.1 format (episodes_stats.jsonl) to avoid missing stats.json issues, because I converted all the datasets to v2.1 format.
|
||||
# if self._version < packaging.version.parse("v2.1"):
|
||||
# self.stats = load_stats(self.root)
|
||||
# self.episodes_stats = backward_compatible_episodes_stats(self.stats, self.episodes)
|
||||
# else:
|
||||
self.episodes_stats = load_episodes_stats(self.root)
|
||||
self.stats = aggregate_stats(list(self.episodes_stats.values()))
|
||||
|
||||
def pull_from_repo(
|
||||
self,
|
||||
@@ -177,7 +228,15 @@ class LeRobotDatasetMetadata:
|
||||
@property
|
||||
def video_keys(self) -> list[str]:
|
||||
"""Keys to access visual modalities stored as videos."""
|
||||
return [key for key, ft in self.features.items() if ft["dtype"] == "video"]
|
||||
# changed
|
||||
keys = []
|
||||
for key, ft in self.features.items():
|
||||
key_ = (
|
||||
self.inverse_feature_keys_mapping.get(key, key) if self.inverse_feature_keys_mapping else key
|
||||
)
|
||||
if ft["dtype"] == "video":
|
||||
keys.append(key_)
|
||||
return keys
|
||||
|
||||
@property
|
||||
def camera_keys(self) -> list[str]:
|
||||
@@ -342,6 +401,19 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
force_cache_sync: bool = False,
|
||||
download_videos: bool = True,
|
||||
video_backend: str | None = None,
|
||||
local_files_only: bool = False,
|
||||
# new thing by M
|
||||
feature_keys_mapping: dict[str, str] | None = None,
|
||||
max_action_dim: int = None,
|
||||
max_state_dim: int = None,
|
||||
max_num_images: int = None,
|
||||
max_image_dim: int = None,
|
||||
training_features: list | None = None,
|
||||
discard_first_n_frames: int = 0,
|
||||
discard_first_idle_frames: bool = False,
|
||||
motion_threshold: float = 5e-2,
|
||||
motion_window_size: int = 10,
|
||||
motion_buffer: int = 3,
|
||||
):
|
||||
"""
|
||||
2 modes are available for instantiating this class, depending on 2 different use cases:
|
||||
@@ -357,7 +429,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
the dataset from that address and load it, pending your dataset is compliant with
|
||||
codebase_version v2.0. If your dataset has been created before this new format, you will be
|
||||
prompted to convert it using our conversion script from v1.6 to v2.0, which you can find at
|
||||
lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py.
|
||||
lerobot/datasets/v2/convert_dataset_v1_to_v2.py.
|
||||
|
||||
|
||||
2. Your dataset doesn't already exists (either on local disk or on the Hub): you can create an empty
|
||||
@@ -455,15 +527,35 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
self.video_backend = video_backend if video_backend else get_safe_default_codec()
|
||||
self.delta_indices = None
|
||||
|
||||
# by mshukor
|
||||
self.training_features = training_features
|
||||
self.discard_first_n_frames = discard_first_n_frames
|
||||
self.discard_first_idle_frames = discard_first_idle_frames
|
||||
self.motion_threshold = motion_threshold
|
||||
self.motion_window_size = motion_window_size
|
||||
self.motion_buffer = motion_buffer
|
||||
|
||||
# Unused attributes
|
||||
self.image_writer = None
|
||||
self.episode_buffer = None
|
||||
|
||||
self.root.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# more mshukor
|
||||
self.feature_keys_mapping = feature_keys_mapping.get(repo_id, None) if feature_keys_mapping else None
|
||||
self.inverse_feature_keys_mapping = (
|
||||
{v: k for k, v in self.feature_keys_mapping.items() if v} if self.feature_keys_mapping else {}
|
||||
)
|
||||
|
||||
# Load metadata
|
||||
# TODO: change
|
||||
self.meta = LeRobotDatasetMetadata(
|
||||
self.repo_id, self.root, self.revision, force_cache_sync=force_cache_sync
|
||||
self.repo_id,
|
||||
self.root,
|
||||
local_files_only=local_files_only,
|
||||
revision=self.revision,
|
||||
force_cache_sync=force_cache_sync,
|
||||
feature_keys_mapping=feature_keys_mapping,
|
||||
)
|
||||
if self.episodes is not None and self.meta._version >= packaging.version.parse("v2.1"):
|
||||
episodes_stats = [self.meta.episodes_stats[ep_idx] for ep_idx in self.episodes]
|
||||
@@ -482,17 +574,74 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
|
||||
self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes)
|
||||
|
||||
# mustafa code
|
||||
if self.discard_first_n_frames > 0:
|
||||
print("Discarding first n frames:", self.discard_first_n_frames)
|
||||
self.subset_frame_ids = []
|
||||
for ep_idx in range(self.num_episodes):
|
||||
from_ = self.episode_data_index["from"][ep_idx]
|
||||
to_ = self.episode_data_index["to"][ep_idx]
|
||||
# TODO implement advanced strategy
|
||||
self.subset_frame_ids += [
|
||||
frame_idx for frame_idx in range(from_ + int(self.fps * self.discard_first_n_frames), to_)
|
||||
]
|
||||
elif self.discard_first_idle_frames:
|
||||
print(
|
||||
f"Discarding first idle frames: motion_threshold={self.motion_threshold}, motion_window_size={self.motion_window_size}, motion_buffer={self.motion_buffer}"
|
||||
)
|
||||
self.robot_states = torch.stack(self.hf_dataset[OBS_STATE]).numpy() # shape: [T, D]
|
||||
self.subset_frame_ids = []
|
||||
for ep_idx in range(self.num_episodes):
|
||||
from_ = self.episode_data_index["from"][ep_idx]
|
||||
to_ = self.episode_data_index["to"][ep_idx]
|
||||
ep_states = self.robot_states[from_:to_]
|
||||
velocities = np.linalg.norm(np.diff(ep_states, axis=0), axis=1)
|
||||
velocities = np.concatenate([[0.0], velocities])
|
||||
start_idx = find_start_of_motion(
|
||||
velocities, self.motion_window_size, self.motion_threshold, self.motion_buffer
|
||||
)
|
||||
self.subset_frame_ids += list(range(from_ + start_idx, to_))
|
||||
|
||||
# Check timestamps
|
||||
timestamps = torch.stack(self.hf_dataset["timestamp"]).numpy()
|
||||
episode_indices = torch.stack(self.hf_dataset["episode_index"]).numpy()
|
||||
ep_data_index_np = {k: t.numpy() for k, t in self.episode_data_index.items()}
|
||||
check_timestamps_sync(timestamps, episode_indices, ep_data_index_np, self.fps, self.tolerance_s)
|
||||
# commented TODO: check why
|
||||
# timestamps = torch.stack(self.hf_dataset["timestamp"]).numpy()
|
||||
# episode_indices = torch.stack(self.hf_dataset["episode_index"]).numpy()
|
||||
# ep_data_index_np = {k: t.numpy() for k, t in self.episode_data_index.items()}
|
||||
# check_timestamps_sync(timestamps, episode_indices, ep_data_index_np, self.fps, self.tolerance_s)
|
||||
|
||||
# Setup delta_indices
|
||||
if self.delta_timestamps is not None:
|
||||
check_delta_timestamps(self.delta_timestamps, self.fps, self.tolerance_s)
|
||||
# TODO: check why commented
|
||||
# check_delta_timestamps(self.delta_timestamps, self.fps, self.tolerance_s)
|
||||
self.delta_indices = get_delta_indices(self.delta_timestamps, self.fps)
|
||||
|
||||
# Mustafa
|
||||
self.meta.info["features"] = map_dict_keys(
|
||||
self.meta.info["features"],
|
||||
feature_keys_mapping=self.feature_keys_mapping,
|
||||
training_features=self.training_features,
|
||||
)
|
||||
self.keys_to_max_dim = {
|
||||
ACTION: max_action_dim,
|
||||
OBS_ENV_STATE: max_state_dim,
|
||||
OBS_STATE: max_state_dim,
|
||||
OBS_IMAGE: max_image_dim,
|
||||
OBS_IMAGE_2: max_image_dim,
|
||||
OBS_IMAGE_3: max_image_dim,
|
||||
}
|
||||
self.meta.info["features"] = reshape_features_to_max_dim(
|
||||
self.meta.info["features"], reshape_dim=-1, keys_to_max_dim=self.keys_to_max_dim
|
||||
)
|
||||
self.meta.stats = map_dict_keys(
|
||||
self.meta.stats,
|
||||
feature_keys_mapping=self.feature_keys_mapping,
|
||||
training_features=self.training_features,
|
||||
)
|
||||
self.robot_type = self.meta.info.get("robot_type", "")
|
||||
# Override tasks
|
||||
print(TASKS_KEYS_MAPPING.get(self.repo_id, self.meta.tasks), "previous", self.meta.tasks)
|
||||
self.meta.tasks = TASKS_KEYS_MAPPING.get(self.repo_id, self.meta.tasks)
|
||||
|
||||
def push_to_hub(
|
||||
self,
|
||||
branch: str | None = None,
|
||||
@@ -641,12 +790,18 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
return get_hf_features_from_features(self.features)
|
||||
|
||||
def _get_query_indices(self, idx: int, ep_idx: int) -> tuple[dict[str, list[int | bool]]]:
|
||||
# Bounds check to prevent IndexError when episode_index is out of range
|
||||
if ep_idx >= len(self.episode_data_index["from"]):
|
||||
# Fall back to the last valid episode
|
||||
ep_idx = len(self.episode_data_index["from"]) - 1
|
||||
|
||||
ep_start = self.episode_data_index["from"][ep_idx]
|
||||
ep_end = self.episode_data_index["to"][ep_idx]
|
||||
query_indices = {
|
||||
key: [max(ep_start.item(), min(ep_end.item() - 1, idx + delta)) for delta in delta_idx]
|
||||
for key, delta_idx in self.delta_indices.items()
|
||||
}
|
||||
# FIXME(mshukor): what if we train on multiple datasets with different features
|
||||
padding = { # Pad values outside of current episode range
|
||||
f"{key}_is_pad": torch.BoolTensor(
|
||||
[(idx + delta < ep_start.item()) | (idx + delta >= ep_end.item()) for delta in delta_idx]
|
||||
@@ -670,12 +825,21 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
|
||||
return query_timestamps
|
||||
|
||||
# TODO: changed by mustafa
|
||||
def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict:
|
||||
return {
|
||||
key: torch.stack(self.hf_dataset.select(q_idx)[key])
|
||||
for key, q_idx in query_indices.items()
|
||||
if key not in self.meta.video_keys
|
||||
}
|
||||
queries = {}
|
||||
for key, q_idx in query_indices.items():
|
||||
if (
|
||||
key not in self.meta.video_keys
|
||||
and self.inverse_feature_keys_mapping.get(key, key) not in self.meta.video_keys
|
||||
):
|
||||
key_ = (
|
||||
self.inverse_feature_keys_mapping.get(key, key)
|
||||
if self.inverse_feature_keys_mapping
|
||||
else key
|
||||
)
|
||||
queries[key] = torch.stack(self.hf_dataset.select(q_idx)[key_])
|
||||
return queries
|
||||
|
||||
def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]:
|
||||
"""Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function
|
||||
@@ -699,8 +863,12 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
def __len__(self):
|
||||
return self.num_frames
|
||||
|
||||
# changed by mshukor
|
||||
def __getitem__(self, idx) -> dict:
|
||||
if self.discard_first_n_frames > 0 or self.discard_first_idle_frames:
|
||||
idx = self.subset_frame_ids[idx]
|
||||
item = self.hf_dataset[idx]
|
||||
item = map_dict_keys(item, feature_keys_mapping=self.feature_keys_mapping)
|
||||
ep_idx = item["episode_index"].item()
|
||||
|
||||
query_indices = None
|
||||
@@ -717,15 +885,27 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
video_frames = self._query_videos(query_timestamps, ep_idx)
|
||||
item = {**video_frames, **item}
|
||||
|
||||
if self.image_transforms is not None:
|
||||
image_keys = self.meta.camera_keys
|
||||
for cam in image_keys:
|
||||
item[cam] = self.image_transforms(item[cam])
|
||||
|
||||
# Add task as a string
|
||||
task_idx = item["task_index"].item()
|
||||
item["task"] = self.meta.tasks[task_idx]
|
||||
|
||||
try:
|
||||
item["task"] = self.meta.tasks[task_idx]
|
||||
except:
|
||||
print(self.meta.tasks, task_idx, self.repo_id)
|
||||
if "robot_type" not in item:
|
||||
item["robot_type"] = self.robot_type
|
||||
item = map_dict_keys(
|
||||
item, feature_keys_mapping=self.feature_keys_mapping, training_features=self.training_features
|
||||
)
|
||||
# Add padded features
|
||||
# item = self._add_padded_features(item, self.training_features)
|
||||
if self.image_transforms is not None:
|
||||
for cam in item:
|
||||
if cam in self.meta.camera_keys or ("image" in cam and "is_pad" not in cam):
|
||||
item[cam] = self.image_transforms(item[cam])
|
||||
# Map pad keys
|
||||
# print(item.keys(), "before")
|
||||
# item = map_dict_pad_keys(item, feature_keys_mapping=self.feature_keys_mapping, training_features=self.training_features)
|
||||
# print(item.keys())
|
||||
return item
|
||||
|
||||
def __repr__(self):
|
||||
@@ -985,6 +1165,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
)
|
||||
obj.repo_id = obj.meta.repo_id
|
||||
obj.root = obj.meta.root
|
||||
obj.local_files_only = obj.meta.local_files_only
|
||||
obj.revision = None
|
||||
obj.tolerance_s = tolerance_s
|
||||
obj.image_writer = None
|
||||
@@ -1005,6 +1186,106 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
return obj
|
||||
|
||||
|
||||
class MultiLeRobotDatasetMeta:
|
||||
def __init__(
|
||||
self,
|
||||
datasets: list[LeRobotDataset],
|
||||
repo_ids: list[str],
|
||||
keys_to_max_dim: dict[str, int],
|
||||
train_on_all_features: bool = False,
|
||||
):
|
||||
self.repo_ids = repo_ids
|
||||
self.keys_to_max_dim = keys_to_max_dim
|
||||
self.train_on_all_features = train_on_all_features
|
||||
self.robot_types = [ds.meta.info["robot_type"] for ds in datasets]
|
||||
|
||||
# assign robot_type if missing
|
||||
for ds in datasets:
|
||||
ds.meta.info["robot_type"] = ROBOT_TYPE_KEYS_MAPPING.get(ds.repo_id, ds.meta.info["robot_type"])
|
||||
ds.robot_type = ds.meta.info["robot_type"]
|
||||
|
||||
# step 1: compute disabled features
|
||||
self.disabled_features = set()
|
||||
if not self.train_on_all_features:
|
||||
intersection = set(datasets[0].features)
|
||||
for ds in datasets:
|
||||
intersection.intersection_update(ds.features)
|
||||
if not intersection:
|
||||
raise RuntimeError("No common features across datasets.")
|
||||
for repo_id, ds in zip(repo_ids, datasets, strict=False):
|
||||
extra = set(ds.features) - intersection
|
||||
logging.warning(f"Disabling {extra} for repo {repo_id}")
|
||||
self.disabled_features.update(extra)
|
||||
|
||||
# step 2: build union_features excluding disabled
|
||||
self.union_features = {}
|
||||
for ds in datasets:
|
||||
for k, v in ds.features.items():
|
||||
if k not in self.disabled_features:
|
||||
self.union_features[k] = v
|
||||
|
||||
# step 3: reshape feature schema
|
||||
self.features = reshape_features_to_max_dim(
|
||||
self.union_features, reshape_dim=-1, keys_to_max_dim=self.keys_to_max_dim
|
||||
)
|
||||
|
||||
# step 4: aggregate stats
|
||||
self.stats = aggregate_stats_per_robot_type(datasets)
|
||||
for robot_type_, stats_ in self.stats.items():
|
||||
for feat_key, feat_stats in stats_.items():
|
||||
if feat_key in [ACTION, OBS_ENV_STATE, OBS_STATE]:
|
||||
for k, v in feat_stats.items():
|
||||
pad_value = 0 if k in ["min", "mean"] else 1
|
||||
self.stats[robot_type_][feat_key][k] = pad_tensor(
|
||||
v,
|
||||
max_size=self.keys_to_max_dim.get(feat_key, -1),
|
||||
pad_dim=-1,
|
||||
pad_value=pad_value,
|
||||
)
|
||||
|
||||
# step 5: episodes & tasks
|
||||
self.episodes = {repo_id: ds.meta.episodes for repo_id, ds in zip(repo_ids, datasets, strict=False)}
|
||||
self.tasks = {repo_id: ds.meta.tasks for repo_id, ds in zip(repo_ids, datasets, strict=False)}
|
||||
self.info = {repo_id: ds.meta.info for repo_id, ds in zip(repo_ids, datasets, strict=False)}
|
||||
|
||||
|
||||
class MultiLeRobotDatasetCleaner:
|
||||
def __init__(
|
||||
self,
|
||||
datasets: list[LeRobotDataset],
|
||||
repo_ids: list[str],
|
||||
sampling_weights: list[float],
|
||||
datasets_repo_ids: list[str],
|
||||
min_fps: int = 1,
|
||||
max_fps: int = 100,
|
||||
):
|
||||
self.original_datasets = datasets
|
||||
self.original_repo_ids = repo_ids
|
||||
self.original_weights = sampling_weights
|
||||
self.original_datasets_repo_ids = datasets_repo_ids
|
||||
|
||||
# step 1: remove datasets with invalid fps
|
||||
valid_fps_datasets = keep_datasets_with_valid_fps(datasets, min_fps=min_fps, max_fps=max_fps)
|
||||
|
||||
# step 2: keep datasets with same features per robot type
|
||||
consistent_datasets, keep_mask = keep_datasets_with_the_same_features_per_robot_type(
|
||||
valid_fps_datasets
|
||||
)
|
||||
|
||||
self.cleaned_datasets = consistent_datasets
|
||||
self.keep_mask = keep_mask
|
||||
self.cleaned_weights = [sampling_weights[i] for i in range(len(valid_fps_datasets)) if keep_mask[i]]
|
||||
self.cleaned_repo_ids = [repo_ids[i] for i in range(len(valid_fps_datasets)) if keep_mask[i]]
|
||||
self.cleaned_datasets_repo_ids = [
|
||||
datasets_repo_ids[i] for i in range(len(valid_fps_datasets)) if keep_mask[i]
|
||||
]
|
||||
|
||||
self.cumulative_sizes = np.array(
|
||||
[0] + list(torch.cumsum(torch.tensor([len(d) for d in consistent_datasets]), dim=0))
|
||||
)
|
||||
self.cleaned_weights = np.array(self.cleaned_weights, dtype=np.float32)
|
||||
|
||||
|
||||
class MultiLeRobotDataset(torch.utils.data.Dataset):
|
||||
"""A dataset consisting of multiple underlying `LeRobotDataset`s.
|
||||
|
||||
@@ -1021,7 +1302,24 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
|
||||
delta_timestamps: dict[list[float]] | None = None,
|
||||
tolerances_s: dict | None = None,
|
||||
download_videos: bool = True,
|
||||
local_files_only: bool = False,
|
||||
video_backend: str | None = None,
|
||||
# add
|
||||
sampling_weights: list[float] | None = None,
|
||||
feature_keys_mapping: dict[str, dict[str, str]] | None = None,
|
||||
max_action_dim: int = None,
|
||||
max_state_dim: int = None,
|
||||
max_num_images: int = None,
|
||||
max_image_dim: int = None,
|
||||
train_on_all_features: bool = False,
|
||||
training_features: list | None = None,
|
||||
discard_first_n_frames: int = 0,
|
||||
min_fps: int = 1,
|
||||
max_fps: int = 100,
|
||||
discard_first_idle_frames: bool = False,
|
||||
motion_threshold: float = 0.05,
|
||||
motion_window_size: int = 10,
|
||||
motion_buffer: int = 3,
|
||||
):
|
||||
super().__init__()
|
||||
self.repo_ids = repo_ids
|
||||
@@ -1029,46 +1327,89 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
|
||||
self.tolerances_s = tolerances_s if tolerances_s else dict.fromkeys(repo_ids, 0.0001)
|
||||
# Construct the underlying datasets passing everything but `transform` and `delta_timestamps` which
|
||||
# are handled by this class.
|
||||
self._datasets = [
|
||||
LeRobotDataset(
|
||||
repo_id,
|
||||
root=self.root / repo_id,
|
||||
episodes=episodes[repo_id] if episodes else None,
|
||||
image_transforms=image_transforms,
|
||||
delta_timestamps=delta_timestamps,
|
||||
tolerance_s=self.tolerances_s[repo_id],
|
||||
download_videos=download_videos,
|
||||
video_backend=video_backend,
|
||||
)
|
||||
for repo_id in repo_ids
|
||||
]
|
||||
_datasets = []
|
||||
datasets_repo_ids = []
|
||||
self.sampling_weights = []
|
||||
self.training_features = training_features
|
||||
|
||||
sampling_weights = sampling_weights if sampling_weights is not None else [1] * len(repo_ids)
|
||||
assert len(sampling_weights) == len(repo_ids), (
|
||||
"The number of sampling weights must match the number of datasets. "
|
||||
f"Got {len(sampling_weights)} weights for {len(repo_ids)} datasets."
|
||||
)
|
||||
for i, repo_id in enumerate(repo_ids):
|
||||
try:
|
||||
# delta_timestamps = resolve_delta_timestamps(cfg.policy, ds_meta)
|
||||
_datasets.append(
|
||||
LeRobotDataset(
|
||||
repo_id,
|
||||
root=self.root / repo_id,
|
||||
episodes=episodes.get(repo_id, None) if episodes else None,
|
||||
image_transforms=image_transforms,
|
||||
delta_timestamps=delta_timestamps.get(repo_id, None) if delta_timestamps else None,
|
||||
tolerance_s=self.tolerances_s[repo_id],
|
||||
download_videos=download_videos,
|
||||
video_backend=video_backend,
|
||||
feature_keys_mapping=feature_keys_mapping,
|
||||
training_features=training_features,
|
||||
discard_first_n_frames=discard_first_n_frames,
|
||||
discard_first_idle_frames=discard_first_idle_frames,
|
||||
motion_threshold=motion_threshold,
|
||||
motion_window_size=motion_window_size,
|
||||
motion_buffer=motion_buffer,
|
||||
)
|
||||
)
|
||||
datasets_repo_ids.append(repo_id)
|
||||
self.sampling_weights.append(float(sampling_weights[i]))
|
||||
except Exception as e:
|
||||
print(f"Failed to load dataset: {repo_id} due to Exception: {e}")
|
||||
print(
|
||||
f"Finish loading {len(_datasets)} datasets, with sampling weights: {self.sampling_weights} corresponding to: {datasets_repo_ids}"
|
||||
)
|
||||
|
||||
# Disable any data keys that are not common across all of the datasets. Note: we may relax this
|
||||
# restriction in future iterations of this class. For now, this is necessary at least for being able
|
||||
# to use PyTorch's default DataLoader collate function.
|
||||
self.disabled_features = set()
|
||||
intersection_features = set(self._datasets[0].features)
|
||||
for ds in self._datasets:
|
||||
intersection_features.intersection_update(ds.features)
|
||||
if len(intersection_features) == 0:
|
||||
raise RuntimeError(
|
||||
"Multiple datasets were provided but they had no keys common to all of them. "
|
||||
"The multi-dataset functionality currently only keeps common keys."
|
||||
)
|
||||
for repo_id, ds in zip(self.repo_ids, self._datasets, strict=True):
|
||||
extra_keys = set(ds.features).difference(intersection_features)
|
||||
logging.warning(
|
||||
f"keys {extra_keys} of {repo_id} were disabled as they are not contained in all the "
|
||||
"other datasets."
|
||||
)
|
||||
self.disabled_features.update(extra_keys)
|
||||
|
||||
# FIXME(mshukor): apply mapping to unify used keys
|
||||
# FIXME(mshukor): pad based on types in case we have more than one state?
|
||||
self.image_transforms = image_transforms
|
||||
self.delta_timestamps = delta_timestamps
|
||||
# TODO(rcadene, aliberts): We should not perform this aggregation for datasets
|
||||
# with multiple robots of different ranges. Instead we should have one normalization
|
||||
# per robot.
|
||||
self.stats = aggregate_stats([dataset.meta.stats for dataset in self._datasets])
|
||||
self.delta_timestamps = (
|
||||
delta_timestamps.get(repo_id, None) if delta_timestamps else None
|
||||
) # delta_timestamps # FIXME(mshukor): last repo?
|
||||
# In case datasets with the same robot_type have different features
|
||||
cleaner = MultiLeRobotDatasetCleaner(
|
||||
datasets=_datasets,
|
||||
repo_ids=repo_ids,
|
||||
sampling_weights=self.sampling_weights,
|
||||
datasets_repo_ids=datasets_repo_ids,
|
||||
min_fps=min_fps,
|
||||
max_fps=max_fps,
|
||||
)
|
||||
self._datasets = cleaner.cleaned_datasets
|
||||
self.sampling_weights = cleaner.cleaned_weights
|
||||
self.repo_ids = cleaner.cleaned_repo_ids
|
||||
self.datasets_repo_ids = cleaner.cleaned_datasets_repo_ids
|
||||
self.cumulative_sizes = cleaner.cumulative_sizes
|
||||
# self.meta = copy.deepcopy(self._datasets[0].meta) # FIXME(mshukor): aggregate meta from all datasets
|
||||
# self.meta.info = {
|
||||
# repo_id: ds.meta.info for repo_id, ds in zip(self.repo_ids, self._datasets, strict=False)
|
||||
# }
|
||||
# self.meta.info["features"] = self._datasets[0].meta.info["features"] # Assume all datasets have the same features
|
||||
self.meta = MultiLeRobotDatasetMeta(
|
||||
datasets=self._datasets,
|
||||
repo_ids=self.repo_ids,
|
||||
keys_to_max_dim={
|
||||
ACTION: max_action_dim,
|
||||
OBS_ENV_STATE: max_state_dim,
|
||||
OBS_STATE: max_state_dim,
|
||||
OBS_IMAGE: max_image_dim,
|
||||
OBS_IMAGE_2: max_image_dim,
|
||||
OBS_IMAGE_3: max_image_dim,
|
||||
},
|
||||
train_on_all_features=train_on_all_features,
|
||||
)
|
||||
self.disabled_features = self.meta.disabled_features
|
||||
self.stats = self.meta.stats
|
||||
|
||||
@property
|
||||
def repo_id_to_index(self):
|
||||
@@ -1156,23 +1497,14 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
|
||||
def __getitem__(self, idx: int) -> dict[str, torch.Tensor]:
|
||||
if idx >= len(self):
|
||||
raise IndexError(f"Index {idx} out of bounds.")
|
||||
# Determine which dataset to get an item from based on the index.
|
||||
start_idx = 0
|
||||
dataset_idx = 0
|
||||
for dataset in self._datasets:
|
||||
if idx >= start_idx + dataset.num_frames:
|
||||
start_idx += dataset.num_frames
|
||||
dataset_idx += 1
|
||||
continue
|
||||
break
|
||||
else:
|
||||
raise AssertionError("We expect the loop to break out as long as the index is within bounds.")
|
||||
item = self._datasets[dataset_idx][idx - start_idx]
|
||||
dataset_idx = np.searchsorted(self.cumulative_sizes, idx, side="right").item() - 1
|
||||
local_idx = (idx - self.cumulative_sizes[dataset_idx]).item()
|
||||
item = self._datasets[dataset_idx][local_idx]
|
||||
item["dataset_index"] = torch.tensor(dataset_idx)
|
||||
for data_key in self.disabled_features:
|
||||
item = create_padded_features(item, self.meta.features)
|
||||
for data_key in self.disabled_features: # FIXME(mshukor): not in getitem?
|
||||
if data_key in item:
|
||||
del item[data_key]
|
||||
|
||||
return item
|
||||
|
||||
def __repr__(self):
|
||||
@@ -28,7 +28,7 @@ from typing import Any
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
|
||||
def _make_memmap_safe(**kwargs) -> np.memmap:
|
||||
@@ -23,7 +23,7 @@ import numpy
|
||||
import PIL
|
||||
import torch
|
||||
|
||||
from lerobot.common.datasets.video_utils import encode_video_frames
|
||||
from lerobot.datasets.video_utils import encode_video_frames
|
||||
|
||||
|
||||
def concatenate_episodes(ep_dicts):
|
||||
@@ -35,14 +35,14 @@ from huggingface_hub.errors import RevisionNotFoundError
|
||||
from PIL import Image as PILImage
|
||||
from torchvision import transforms
|
||||
|
||||
from lerobot.common.datasets.backward_compatibility import (
|
||||
from lerobot.configs.types import DictLike, FeatureType, PolicyFeature
|
||||
from lerobot.datasets.backward_compatibility import (
|
||||
V21_MESSAGE,
|
||||
BackwardCompatibilityError,
|
||||
ForwardCompatibilityError,
|
||||
)
|
||||
from lerobot.common.robots import Robot
|
||||
from lerobot.common.utils.utils import is_valid_numpy_dtype_string
|
||||
from lerobot.configs.types import DictLike, FeatureType, PolicyFeature
|
||||
from lerobot.robots import Robot
|
||||
from lerobot.utils.utils import is_valid_numpy_dtype_string
|
||||
|
||||
DEFAULT_CHUNK_SIZE = 1000 # Max number of episodes per chunk
|
||||
|
||||
@@ -664,7 +664,7 @@ def create_lerobot_dataset_card(
|
||||
**kwargs,
|
||||
) -> DatasetCard:
|
||||
"""
|
||||
Keyword arguments will be used to replace values in ./lerobot/common/datasets/card_template.md.
|
||||
Keyword arguments will be used to replace values in src/lerobot/datasets/card_template.md.
|
||||
Note: If specified, license must be one of https://huggingface.co/docs/hub/repositories-licenses.
|
||||
"""
|
||||
card_tags = ["LeRobot"]
|
||||
@@ -687,7 +687,7 @@ def create_lerobot_dataset_card(
|
||||
],
|
||||
)
|
||||
|
||||
card_template = (importlib.resources.files("lerobot.common.datasets") / "card_template.md").read_text()
|
||||
card_template = (importlib.resources.files("lerobot.datasets") / "card_template.md").read_text()
|
||||
|
||||
return DatasetCard.from_template(
|
||||
card_data=card_data,
|
||||
@@ -858,3 +858,21 @@ def validate_episode_buffer(episode_buffer: dict, total_episodes: int, features:
|
||||
f"In episode_buffer not in features: {buffer_keys - set(features)}"
|
||||
f"In features not in episode_buffer: {set(features) - buffer_keys}"
|
||||
)
|
||||
|
||||
|
||||
def map_dict_keys(
|
||||
item: dict, feature_keys_mapping: dict, training_features: list = None, pad_key: str = "is_pad"
|
||||
) -> dict:
|
||||
"""Maps feature keys from the dataset to the keys used in the model."""
|
||||
if feature_keys_mapping is None:
|
||||
return item
|
||||
features = {}
|
||||
for key in item:
|
||||
if key in feature_keys_mapping:
|
||||
if feature_keys_mapping[key] is not None:
|
||||
if training_features is None or feature_keys_mapping[key] in training_features:
|
||||
features[feature_keys_mapping[key]] = item[key]
|
||||
else:
|
||||
if training_features is None or key in training_features or pad_key in key:
|
||||
features[key] = item[key]
|
||||
return features
|
||||
416
src/lerobot/datasets/utils_must.py
Normal file
416
src/lerobot/datasets/utils_must.py
Normal file
@@ -0,0 +1,416 @@
|
||||
"""
|
||||
Utils function by Mustafa to refactor
|
||||
"""
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch.utils.data.dataloader import default_collate
|
||||
|
||||
from lerobot.datasets.compute_stats import aggregate_stats
|
||||
|
||||
OBS_IMAGE = "observation.image"
|
||||
OBS_IMAGE_2 = "observation.image2"
|
||||
OBS_IMAGE_3 = "observation.image3"
|
||||
|
||||
|
||||
def reshape_features_to_max_dim(features: dict, reshape_dim: int = -1, keys_to_max_dim: dict = {}) -> dict:
|
||||
"""Reshape features to have a maximum dimension of `max_dim`."""
|
||||
reshaped_features = {}
|
||||
for key in features:
|
||||
if key in keys_to_max_dim and keys_to_max_dim[key] is not None:
|
||||
reshaped_features[key] = features[key]
|
||||
shape = list(features[key]["shape"])
|
||||
if any([k in key for k in [OBS_IMAGE, OBS_IMAGE_2, OBS_IMAGE_3]]): # Assume square images
|
||||
shape[-3] = keys_to_max_dim[key]
|
||||
shape[-2] = keys_to_max_dim[key]
|
||||
else:
|
||||
shape[reshape_dim] = keys_to_max_dim[key]
|
||||
reshaped_features[key]["shape"] = tuple(shape)
|
||||
else:
|
||||
reshaped_features[key] = features[key]
|
||||
return reshaped_features
|
||||
|
||||
|
||||
def keep_datasets_with_valid_fps(ls_datasets: list, min_fps: int = 1, max_fps: int = 100) -> list:
|
||||
print(
|
||||
f"Keeping datasets with fps between {min_fps} and {max_fps}. Considering {len(ls_datasets)} datasets."
|
||||
)
|
||||
for ds in ls_datasets:
|
||||
if ds.fps < min_fps or ds.fps > max_fps:
|
||||
print(f"Dataset {ds} has invalid fps: {ds.fps}. Removing it.")
|
||||
ls_datasets.remove(ds)
|
||||
print(f"Keeping {len(ls_datasets)} datasets with valid fps.")
|
||||
return ls_datasets
|
||||
|
||||
|
||||
def keep_datasets_with_the_same_features_per_robot_type(ls_datasets: list) -> list:
|
||||
"""
|
||||
Filters datasets to only keep those with consistent feature shapes per robot type.
|
||||
|
||||
Args:
|
||||
ls_datasets (List): List of datasets, each with a `meta.info['robot_type']`
|
||||
and `meta.episodes_stats` dictionary.
|
||||
|
||||
Returns:
|
||||
List: Filtered list of datasets with consistent feature shapes.
|
||||
"""
|
||||
robot_types = {ds.meta.info["robot_type"] for ds in ls_datasets}
|
||||
datasets_to_remove = set()
|
||||
|
||||
for robot_type in robot_types:
|
||||
# Collect all stats dicts for this robot type
|
||||
stats_list = [
|
||||
ep_stats
|
||||
for ds in ls_datasets
|
||||
if ds.meta.info["robot_type"] == robot_type
|
||||
for ep_stats in ds.meta.episodes_stats.values()
|
||||
if ep_stats is not None # Filter out None values
|
||||
]
|
||||
if not stats_list:
|
||||
continue
|
||||
|
||||
# Determine the most common shape for each key
|
||||
all_keys = {key for stats in stats_list for key in stats}
|
||||
for ds in ls_datasets:
|
||||
if ds.meta.info["robot_type"] != robot_type:
|
||||
continue
|
||||
for key in all_keys:
|
||||
shape_counter = defaultdict(int)
|
||||
|
||||
for stats in stats_list:
|
||||
value = stats.get(key)
|
||||
if (
|
||||
value and "mean" in value and isinstance(value["mean"], (torch.Tensor, np.ndarray))
|
||||
): # FIXME(mshukor): check all stats; min, mean, max
|
||||
shape_counter[value["mean"].shape] += 1
|
||||
if not shape_counter:
|
||||
continue
|
||||
|
||||
# Identify the most frequent shape
|
||||
main_shape = max(shape_counter, key=shape_counter.get)
|
||||
# Flag datasets that don't match the main shape
|
||||
# for ds in ls_datasets:
|
||||
first_ep_stats = next(iter(ds.meta.episodes_stats.values()), None)
|
||||
if not first_ep_stats:
|
||||
continue
|
||||
value = first_ep_stats.get(key)
|
||||
if (
|
||||
value
|
||||
and "mean" in value
|
||||
and isinstance(value["mean"], (torch.Tensor, np.ndarray))
|
||||
and value["mean"].shape != main_shape
|
||||
):
|
||||
datasets_to_remove.add(ds)
|
||||
break
|
||||
|
||||
# Filter out inconsistent datasets
|
||||
datasets_maks = [ds not in datasets_to_remove for ds in ls_datasets]
|
||||
filtered_datasets = [ds for ds in ls_datasets if ds not in datasets_to_remove]
|
||||
print(
|
||||
f"Keeping {len(filtered_datasets)} datasets. Removed {len(datasets_to_remove)} inconsistent ones. Inconsistent datasets:\n{datasets_to_remove}"
|
||||
)
|
||||
return filtered_datasets, datasets_maks
|
||||
|
||||
|
||||
def aggregate_stats_per_robot_type(ls_datasets) -> dict[str, dict[str, torch.Tensor]]:
|
||||
"""Aggregate stats of multiple LeRobot datasets into multiple set of stats per robot type.
|
||||
|
||||
The final stats will have the union of all data keys from each of the datasets.
|
||||
|
||||
The final stats will have the union of all data keys from each of the datasets. For instance:
|
||||
- new_max = max(max_dataset_0, max_dataset_1, ...)
|
||||
- new_min = min(min_dataset_0, min_dataset_1, ...)
|
||||
- new_mean = (mean of all data)
|
||||
- new_std = (std of all data)
|
||||
"""
|
||||
|
||||
robot_types = {ds.meta.info["robot_type"] for ds in ls_datasets}
|
||||
stats = {robot_type: {} for robot_type in robot_types}
|
||||
for robot_type in robot_types:
|
||||
robot_type_datasets = []
|
||||
for ds in ls_datasets:
|
||||
if ds.meta.info["robot_type"] == robot_type:
|
||||
# Filter out None values from episodes_stats to handle missing stats
|
||||
valid_episodes_stats = [stats for stats in ds.meta.episodes_stats.values() if stats is not None]
|
||||
robot_type_datasets.extend(valid_episodes_stats)
|
||||
# robot_type_datasets = [list(ds.episodes_stats.values()) for ds in ls_datasets if ds.meta.info["robot_type"] == robot_type]
|
||||
if robot_type_datasets: # Only aggregate if we have valid stats
|
||||
stat = aggregate_stats(robot_type_datasets)
|
||||
stats[robot_type] = stat
|
||||
else:
|
||||
print(f"Warning: No valid episode stats found for robot type {robot_type}, skipping aggregation")
|
||||
stats[robot_type] = {}
|
||||
return stats
|
||||
|
||||
|
||||
def str_to_torch_dtype(dtype_str):
|
||||
"""Convert a dtype string to a torch dtype."""
|
||||
mapping = {
|
||||
"float32": torch.float32,
|
||||
"int64": torch.int64,
|
||||
"int16": torch.int16,
|
||||
"bool": torch.bool,
|
||||
"video": torch.float32, # Assuming video is stored as uint8 images
|
||||
}
|
||||
return mapping.get(dtype_str, torch.float32) # Default to float32
|
||||
|
||||
|
||||
def create_padded_features(item: dict, features: dict = {}):
|
||||
for key, ft in features.items():
|
||||
if any([k in key for k in ["cam", "effort", "absolute"]]): # FIXME(mshukor): temporary hack
|
||||
continue
|
||||
shape = ft["shape"]
|
||||
if len(shape) == 3: # images to torch format (C, H, W)
|
||||
shape = (shape[2], shape[0], shape[1])
|
||||
if len(shape) == 1 and shape[0] == 1: # ft with shape are actually tensor(ele)
|
||||
shape = []
|
||||
if key not in item:
|
||||
dtype = str_to_torch_dtype(ft["dtype"])
|
||||
item[key] = torch.zeros(shape, dtype=dtype)
|
||||
item[f"{key}_padding_mask"] = torch.tensor(0, dtype=torch.int64)
|
||||
if "image" in key: # FIXME(mshukor): support other observations
|
||||
item[f"{key}_is_pad"] = torch.BoolTensor([False])
|
||||
else:
|
||||
item[f"{key}_padding_mask"] = torch.tensor(1, dtype=torch.int64)
|
||||
return item
|
||||
|
||||
|
||||
ROBOT_TYPE_KEYS_MAPPING = {
|
||||
"lerobot/stanford_hydra_dataset": "static_single_arm",
|
||||
"lerobot/iamlab_cmu_pickup_insert": "static_single_arm",
|
||||
"lerobot/berkeley_fanuc_manipulation": "static_single_arm",
|
||||
"lerobot/toto": "static_single_arm",
|
||||
"lerobot/roboturk": "static_single_arm",
|
||||
"lerobot/jaco_play": "static_single_arm",
|
||||
"lerobot/taco_play": "static_single_arm_7statedim",
|
||||
}
|
||||
|
||||
|
||||
def pad_tensor(
|
||||
tensor: torch.Tensor, max_size: int, pad_dim: int = -1, pad_value: float = 0.0
|
||||
) -> torch.Tensor:
|
||||
is_numpy = isinstance(tensor, np.ndarray)
|
||||
if is_numpy:
|
||||
tensor = torch.tensor(tensor)
|
||||
if tensor.ndim == 0:
|
||||
# Scalar — return as-is, no padding needed
|
||||
return tensor
|
||||
pad = max_size - tensor.shape[pad_dim]
|
||||
if pad > 0:
|
||||
pad_sizes = (0, pad) # pad right
|
||||
tensor = torch.nn.functional.pad(tensor, pad_sizes, value=pad_value)
|
||||
return tensor.numpy() if is_numpy else tensor
|
||||
|
||||
|
||||
def map_dict_keys(
|
||||
item: dict, feature_keys_mapping: dict, training_features: list = None, pad_key: str = "is_pad"
|
||||
) -> dict:
|
||||
"""Maps feature keys from the dataset to the keys used in the model."""
|
||||
if feature_keys_mapping is None:
|
||||
return item
|
||||
features = {}
|
||||
for key in item:
|
||||
if key in feature_keys_mapping:
|
||||
if feature_keys_mapping[key] is not None:
|
||||
if training_features is None or feature_keys_mapping[key] in training_features:
|
||||
features[feature_keys_mapping[key]] = item[key]
|
||||
else:
|
||||
if training_features is None or key in training_features or pad_key in key:
|
||||
features[key] = item[key]
|
||||
|
||||
# breakpoint()
|
||||
return features
|
||||
|
||||
|
||||
def find_start_of_motion(velocities, window_size, threshold, motion_buffer):
|
||||
for t in range(len(velocities) - window_size):
|
||||
window_mean = velocities[t : t + window_size].mean()
|
||||
if window_mean > threshold:
|
||||
return max(0, t - motion_buffer) # include slight context before motion
|
||||
return 0
|
||||
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
|
||||
def load_yaml_mapping(name: str) -> dict:
|
||||
"""
|
||||
Loads a YAML mapping from a Hugging Face repo.
|
||||
Example: name='features' → https://huggingface.co/jadechoghari/smolvla-keys/resolve/main/features.yaml
|
||||
"""
|
||||
url = f"https://huggingface.co/jadechoghari/smolvla-keys/resolve/main/{name}.yaml"
|
||||
response = requests.get(url)
|
||||
response.raise_for_status() # raise if the download fails
|
||||
|
||||
return yaml.safe_load(response.text)
|
||||
|
||||
|
||||
# Example usage
|
||||
TASKS_KEYS_MAPPING = load_yaml_mapping("tasks")
|
||||
FEATURE_KEYS_MAPPING = load_yaml_mapping("features")
|
||||
EPISODES_DATASET_MAPPING = {
|
||||
"cadene/droid_1.0.1": list(range(50)),
|
||||
"danaaubakirova/svla_so100_task5_v3": [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
],
|
||||
"danaaubakirova/svla_so100_task4_v3": [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
],
|
||||
}
|
||||
ACTION = "action"
|
||||
OBS_STATE = "observation.state"
|
||||
TASK = "task"
|
||||
ROBOT = "robot_type"
|
||||
TRAINING_FEATURES = {
|
||||
0: [ACTION, OBS_STATE, TASK, ROBOT, OBS_IMAGE],
|
||||
1: [ACTION, OBS_STATE, TASK, ROBOT, OBS_IMAGE, OBS_IMAGE_2],
|
||||
2: [ACTION, OBS_STATE, TASK, ROBOT, OBS_IMAGE, OBS_IMAGE_2, OBS_IMAGE_3],
|
||||
}
|
||||
|
||||
|
||||
def is_batch_need_padding(values: list[torch.Tensor], pad_dim: int = -1) -> int:
|
||||
return len(values[0].shape) > 0 # and len(set([v.shape[pad_dim] for v in values])) > 1
|
||||
|
||||
|
||||
def pad_tensor_to_shape(tensor: torch.Tensor, target_shape: tuple, pad_value: float = 0.0) -> torch.Tensor:
|
||||
"""Pads a tensor to the target shape (right/bottom only)."""
|
||||
pad = []
|
||||
for actual, target in zip(reversed(tensor.shape), reversed(target_shape), strict=False):
|
||||
pad.extend([0, max(target - actual, 0)])
|
||||
return F.pad(tensor, pad, value=pad_value)
|
||||
|
||||
|
||||
def multidataset_collate_fn(
|
||||
batch: List[Dict[str, torch.Tensor]],
|
||||
keys_to_max_dim: Dict[str, tuple] = {},
|
||||
pad_value: float = 0.0,
|
||||
) -> Dict[str, torch.Tensor]:
|
||||
"""
|
||||
Pads tensors to given target shape (if provided), otherwise uses per-batch max.
|
||||
Supports 1D (e.g. action), 3D (e.g. [C,H,W] images).
|
||||
"""
|
||||
collated_batch = [{} for _ in range(len(batch))]
|
||||
batch_keys = batch[0].keys()
|
||||
|
||||
for key in batch_keys:
|
||||
values = [sample[key] for sample in batch]
|
||||
sample = values[0]
|
||||
|
||||
if not isinstance(sample, torch.Tensor):
|
||||
for i in range(len(batch)):
|
||||
collated_batch[i][key] = values[i]
|
||||
continue
|
||||
|
||||
# use user-specified shape if available
|
||||
if key in keys_to_max_dim and keys_to_max_dim[key] is not None:
|
||||
target_shape = keys_to_max_dim[key]
|
||||
else:
|
||||
# compute per-batch max shape
|
||||
target_shape = tuple(max(v.shape[i] for v in values) for i in range(sample.ndim))
|
||||
|
||||
for i in range(len(batch)):
|
||||
collated_batch[i][key] = pad_tensor_to_shape(values[i], target_shape, pad_value=pad_value)
|
||||
|
||||
return default_collate(collated_batch)
|
||||
@@ -26,8 +26,8 @@ from pathlib import Path
|
||||
from textwrap import dedent
|
||||
|
||||
from lerobot import available_datasets
|
||||
from lerobot.common.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset
|
||||
from lerobot.common.robots.aloha.configuration_aloha import AlohaRobotConfig
|
||||
from lerobot.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset
|
||||
from lerobot.robots.aloha.configuration_aloha import AlohaRobotConfig
|
||||
|
||||
LOCAL_DIR = Path("data/")
|
||||
|
||||
@@ -38,7 +38,7 @@ If your dataset contains a single task, you can simply provide it directly via t
|
||||
Examples:
|
||||
|
||||
```bash
|
||||
python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
|
||||
python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \
|
||||
--repo-id lerobot/aloha_sim_insertion_human_image \
|
||||
--single-task "Insert the peg into the socket." \
|
||||
--robot-config lerobot/configs/robot/aloha.yaml \
|
||||
@@ -46,7 +46,7 @@ python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
|
||||
```
|
||||
|
||||
```bash
|
||||
python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
|
||||
python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \
|
||||
--repo-id aliberts/koch_tutorial \
|
||||
--single-task "Pick the Lego block and drop it in the box on the right." \
|
||||
--robot-config lerobot/configs/robot/koch.yaml \
|
||||
@@ -63,7 +63,7 @@ If your dataset is a multi-task dataset, you have two options to provide the tas
|
||||
Example:
|
||||
|
||||
```bash
|
||||
python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
|
||||
python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \
|
||||
--repo-id lerobot/stanford_kuka_multimodal_dataset \
|
||||
--tasks-col "language_instruction" \
|
||||
--local-dir data
|
||||
@@ -92,7 +92,7 @@ parquet file, and you must provide this column's name with the '--tasks-col' arg
|
||||
Example:
|
||||
|
||||
```bash
|
||||
python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
|
||||
python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \
|
||||
--repo-id lerobot/stanford_kuka_multimodal_dataset \
|
||||
--tasks-col "language_instruction" \
|
||||
--local-dir data
|
||||
@@ -119,7 +119,7 @@ from huggingface_hub import HfApi
|
||||
from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
|
||||
from safetensors.torch import load_file
|
||||
|
||||
from lerobot.common.datasets.utils import (
|
||||
from lerobot.datasets.utils import (
|
||||
DEFAULT_CHUNK_SIZE,
|
||||
DEFAULT_PARQUET_PATH,
|
||||
DEFAULT_VIDEO_PATH,
|
||||
@@ -136,12 +136,12 @@ from lerobot.common.datasets.utils import (
|
||||
write_json,
|
||||
write_jsonlines,
|
||||
)
|
||||
from lerobot.common.datasets.video_utils import (
|
||||
from lerobot.datasets.video_utils import (
|
||||
VideoFrame, # noqa: F401
|
||||
get_image_pixel_channels,
|
||||
get_video_info,
|
||||
)
|
||||
from lerobot.common.robots import RobotConfig
|
||||
from lerobot.robots import RobotConfig
|
||||
|
||||
V16 = "v1.6"
|
||||
V20 = "v2.0"
|
||||
@@ -602,19 +602,19 @@ def make_robot_config(robot_type: str, **kwargs) -> RobotConfig:
|
||||
raise NotImplementedError # TODO
|
||||
|
||||
elif robot_type == "koch_follower":
|
||||
from lerobot.common.robots.koch_follower import KochFollowerConfig
|
||||
from lerobot.robots.koch_follower import KochFollowerConfig
|
||||
|
||||
return KochFollowerConfig(**kwargs)
|
||||
elif robot_type == "so100_follower":
|
||||
from lerobot.common.robots.so100_follower import SO100FollowerConfig
|
||||
from lerobot.robots.so100_follower import SO100FollowerConfig
|
||||
|
||||
return SO100FollowerConfig(**kwargs)
|
||||
elif robot_type == "stretch":
|
||||
from lerobot.common.robots.stretch3 import Stretch3RobotConfig
|
||||
from lerobot.robots.stretch3 import Stretch3RobotConfig
|
||||
|
||||
return Stretch3RobotConfig(**kwargs)
|
||||
elif robot_type == "lekiwi":
|
||||
from lerobot.common.robots.lekiwi import LeKiwiConfig
|
||||
from lerobot.robots.lekiwi import LeKiwiConfig
|
||||
|
||||
return LeKiwiConfig(**kwargs)
|
||||
else:
|
||||
@@ -20,9 +20,9 @@ from datasets import get_dataset_config_info
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
from lerobot import available_datasets
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata
|
||||
from lerobot.common.datasets.utils import INFO_PATH, write_info
|
||||
from lerobot.common.datasets.v21.convert_dataset_v20_to_v21 import V20, SuppressWarnings
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.utils import INFO_PATH, write_info
|
||||
from lerobot.datasets.v21.convert_dataset_v20_to_v21 import V20, SuppressWarnings
|
||||
|
||||
LOCAL_DIR = Path("data/")
|
||||
|
||||
@@ -24,7 +24,7 @@ from pathlib import Path
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
from lerobot import available_datasets
|
||||
from lerobot.common.datasets.v21.convert_dataset_v20_to_v21 import V21, convert_dataset
|
||||
from lerobot.datasets.v21.convert_dataset_v20_to_v21 import V21, convert_dataset
|
||||
|
||||
LOCAL_DIR = Path("data/")
|
||||
|
||||
@@ -25,7 +25,7 @@ This script will help you convert any LeRobot dataset already pushed to the hub
|
||||
Usage:
|
||||
|
||||
```bash
|
||||
python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py \
|
||||
python -m lerobot.datasets.v21.convert_dataset_v20_to_v21 \
|
||||
--repo-id=aliberts/koch_tutorial
|
||||
```
|
||||
|
||||
@@ -36,9 +36,9 @@ import logging
|
||||
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
|
||||
from lerobot.common.datasets.utils import EPISODES_STATS_PATH, STATS_PATH, load_stats, write_info
|
||||
from lerobot.common.datasets.v21.convert_stats import check_aggregate_stats, convert_stats
|
||||
from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
|
||||
from lerobot.datasets.utils import EPISODES_STATS_PATH, STATS_PATH, load_stats, write_info
|
||||
from lerobot.datasets.v21.convert_stats import check_aggregate_stats, convert_stats
|
||||
|
||||
V20 = "v2.0"
|
||||
V21 = "v2.1"
|
||||
@@ -17,9 +17,9 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
from lerobot.common.datasets.compute_stats import aggregate_stats, get_feature_stats, sample_indices
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.common.datasets.utils import write_episode_stats
|
||||
from lerobot.datasets.compute_stats import aggregate_stats, get_feature_stats, sample_indices
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.utils import write_episode_stats
|
||||
|
||||
|
||||
def sample_episode_video_frames(dataset: LeRobotDataset, episode_index: int, ft_key: str) -> np.ndarray:
|
||||
@@ -43,14 +43,32 @@ def convert_episode_stats(dataset: LeRobotDataset, ep_idx: int):
|
||||
else:
|
||||
ep_ft_data = np.array(ep_data[key])
|
||||
|
||||
axes_to_reduce = (0, 2, 3) if ft["dtype"] in ["image", "video"] else 0
|
||||
keepdims = True if ft["dtype"] in ["image", "video"] else ep_ft_data.ndim == 1
|
||||
if ft["dtype"] in ["image", "video"]:
|
||||
# Handle variable dimensions for image/video data
|
||||
# Expected formats: (frames, channels, height, width) or (channels, height, width)
|
||||
if ep_ft_data.ndim == 4:
|
||||
# Standard case: (frames, channels, height, width)
|
||||
axes_to_reduce = (0, 2, 3) # reduce over frames, height, width
|
||||
elif ep_ft_data.ndim == 3:
|
||||
# Squeezed case: (channels, height, width) - single frame
|
||||
axes_to_reduce = (1, 2) # reduce over height, width
|
||||
else:
|
||||
raise ValueError(f"Unexpected dimensions for {ft['dtype']} data: {ep_ft_data.shape}")
|
||||
keepdims = True
|
||||
else:
|
||||
axes_to_reduce = 0
|
||||
keepdims = ep_ft_data.ndim == 1
|
||||
ep_stats[key] = get_feature_stats(ep_ft_data, axis=axes_to_reduce, keepdims=keepdims)
|
||||
|
||||
if ft["dtype"] in ["image", "video"]: # remove batch dim
|
||||
ep_stats[key] = {
|
||||
k: v if k == "count" else np.squeeze(v, axis=0) for k, v in ep_stats[key].items()
|
||||
}
|
||||
if ep_ft_data.ndim == 4:
|
||||
# For 4D data, squeeze the first axis (batch/frames)
|
||||
ep_stats[key] = {
|
||||
k: v if k == "count" else np.squeeze(v, axis=0) for k, v in ep_stats[key].items()
|
||||
}
|
||||
elif ep_ft_data.ndim == 3:
|
||||
# For 3D data, the stats already have correct shape (channels,)
|
||||
pass
|
||||
|
||||
dataset.meta.episodes_stats[ep_idx] = ep_stats
|
||||
|
||||
@@ -18,10 +18,10 @@ from typing import Any, Optional
|
||||
|
||||
import draccus
|
||||
|
||||
from lerobot.common.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
|
||||
from lerobot.common.robots import RobotConfig
|
||||
from lerobot.common.teleoperators.config import TeleoperatorConfig
|
||||
from lerobot.configs.types import FeatureType, PolicyFeature
|
||||
from lerobot.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
|
||||
from lerobot.robots import RobotConfig
|
||||
from lerobot.teleoperators.config import TeleoperatorConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -17,7 +17,7 @@ import importlib
|
||||
|
||||
import gymnasium as gym
|
||||
|
||||
from lerobot.common.envs.configs import AlohaEnv, EnvConfig, HILEnvConfig, PushtEnv, XarmEnv
|
||||
from lerobot.envs.configs import AlohaEnv, EnvConfig, HILEnvConfig, PushtEnv, XarmEnv
|
||||
|
||||
|
||||
def make_env_config(env_type: str, **kwargs) -> EnvConfig:
|
||||
@@ -22,9 +22,9 @@ import numpy as np
|
||||
import torch
|
||||
from torch import Tensor
|
||||
|
||||
from lerobot.common.envs.configs import EnvConfig
|
||||
from lerobot.common.utils.utils import get_channel_first_image_shape
|
||||
from lerobot.configs.types import FeatureType, PolicyFeature
|
||||
from lerobot.envs.configs import EnvConfig
|
||||
from lerobot.utils.utils import get_channel_first_image_shape
|
||||
|
||||
|
||||
def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Tensor]:
|
||||
@@ -37,11 +37,11 @@ from typing import Any, Dict, List
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from lerobot.common.cameras.configs import ColorMode
|
||||
from lerobot.common.cameras.opencv.camera_opencv import OpenCVCamera
|
||||
from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.common.cameras.realsense.camera_realsense import RealSenseCamera
|
||||
from lerobot.common.cameras.realsense.configuration_realsense import RealSenseCameraConfig
|
||||
from lerobot.cameras.configs import ColorMode
|
||||
from lerobot.cameras.opencv.camera_opencv import OpenCVCamera
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.cameras.realsense.camera_realsense import RealSenseCamera
|
||||
from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
128
src/lerobot/model/kinematics.py
Normal file
128
src/lerobot/model/kinematics.py
Normal file
@@ -0,0 +1,128 @@
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class RobotKinematics:
|
||||
"""Robot kinematics using placo library for forward and inverse kinematics."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
urdf_path: str,
|
||||
target_frame_name: str = "gripper_frame_link",
|
||||
joint_names: list[str] = None,
|
||||
):
|
||||
"""
|
||||
Initialize placo-based kinematics solver.
|
||||
|
||||
Args:
|
||||
urdf_path: Path to the robot URDF file
|
||||
target_frame_name: Name of the end-effector frame in the URDF
|
||||
joint_names: List of joint names to use for the kinematics solver
|
||||
"""
|
||||
try:
|
||||
import placo
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"placo is required for RobotKinematics. "
|
||||
"Please install the optional dependencies of `kinematics` in the package."
|
||||
) from e
|
||||
|
||||
self.robot = placo.RobotWrapper(urdf_path)
|
||||
self.solver = placo.KinematicsSolver(self.robot)
|
||||
self.solver.mask_fbase(True) # Fix the base
|
||||
|
||||
self.target_frame_name = target_frame_name
|
||||
|
||||
# Set joint names
|
||||
self.joint_names = list(self.robot.joint_names()) if joint_names is None else joint_names
|
||||
|
||||
# Initialize frame task for IK
|
||||
self.tip_frame = self.solver.add_frame_task(self.target_frame_name, np.eye(4))
|
||||
|
||||
def forward_kinematics(self, joint_pos_deg):
|
||||
"""
|
||||
Compute forward kinematics for given joint configuration given the target frame name in the constructor.
|
||||
|
||||
Args:
|
||||
joint_pos_deg: Joint positions in degrees (numpy array)
|
||||
|
||||
Returns:
|
||||
4x4 transformation matrix of the end-effector pose
|
||||
"""
|
||||
|
||||
# Convert degrees to radians
|
||||
joint_pos_rad = np.deg2rad(joint_pos_deg[: len(self.joint_names)])
|
||||
|
||||
# Update joint positions in placo robot
|
||||
for i, joint_name in enumerate(self.joint_names):
|
||||
self.robot.set_joint(joint_name, joint_pos_rad[i])
|
||||
|
||||
# Update kinematics
|
||||
self.robot.update_kinematics()
|
||||
|
||||
# Get the transformation matrix
|
||||
return self.robot.get_T_world_frame(self.target_frame_name)
|
||||
|
||||
def inverse_kinematics(
|
||||
self, current_joint_pos, desired_ee_pose, position_weight=1.0, orientation_weight=0.01
|
||||
):
|
||||
"""
|
||||
Compute inverse kinematics using placo solver.
|
||||
|
||||
Args:
|
||||
current_joint_pos: Current joint positions in degrees (used as initial guess)
|
||||
desired_ee_pose: Target end-effector pose as a 4x4 transformation matrix
|
||||
position_weight: Weight for position constraint in IK
|
||||
orientation_weight: Weight for orientation constraint in IK, set to 0.0 to only constrain position
|
||||
|
||||
Returns:
|
||||
Joint positions in degrees that achieve the desired end-effector pose
|
||||
"""
|
||||
|
||||
# Convert current joint positions to radians for initial guess
|
||||
current_joint_rad = np.deg2rad(current_joint_pos[: len(self.joint_names)])
|
||||
|
||||
# Set current joint positions as initial guess
|
||||
for i, joint_name in enumerate(self.joint_names):
|
||||
self.robot.set_joint(joint_name, current_joint_rad[i])
|
||||
|
||||
# Update the target pose for the frame task
|
||||
self.tip_frame.T_world_frame = desired_ee_pose
|
||||
|
||||
# Configure the task based on position_only flag
|
||||
self.tip_frame.configure(self.target_frame_name, "soft", position_weight, orientation_weight)
|
||||
|
||||
# Solve IK
|
||||
self.solver.solve(True)
|
||||
self.robot.update_kinematics()
|
||||
|
||||
# Extract joint positions
|
||||
joint_pos_rad = []
|
||||
for joint_name in self.joint_names:
|
||||
joint = self.robot.get_joint(joint_name)
|
||||
joint_pos_rad.append(joint)
|
||||
|
||||
# Convert back to degrees
|
||||
joint_pos_deg = np.rad2deg(joint_pos_rad)
|
||||
|
||||
# Preserve gripper position if present in current_joint_pos
|
||||
if len(current_joint_pos) > len(self.joint_names):
|
||||
result = np.zeros_like(current_joint_pos)
|
||||
result[: len(self.joint_names)] = joint_pos_deg
|
||||
result[len(self.joint_names) :] = current_joint_pos[len(self.joint_names) :]
|
||||
return result
|
||||
else:
|
||||
return joint_pos_deg
|
||||
@@ -22,7 +22,7 @@ import logging
|
||||
from copy import deepcopy
|
||||
from enum import Enum
|
||||
|
||||
from lerobot.common.utils.encoding_utils import decode_twos_complement, encode_twos_complement
|
||||
from lerobot.utils.encoding_utils import decode_twos_complement, encode_twos_complement
|
||||
|
||||
from ..motors_bus import Motor, MotorCalibration, MotorsBus, NameOrID, Value, get_address
|
||||
from .tables import (
|
||||
@@ -17,7 +17,7 @@ from copy import deepcopy
|
||||
from enum import Enum
|
||||
from pprint import pformat
|
||||
|
||||
from lerobot.common.utils.encoding_utils import decode_sign_magnitude, encode_sign_magnitude
|
||||
from lerobot.utils.encoding_utils import decode_sign_magnitude, encode_sign_magnitude
|
||||
|
||||
from ..motors_bus import Motor, MotorCalibration, MotorsBus, NameOrID, Value, get_address
|
||||
from .tables import (
|
||||
@@ -32,8 +32,8 @@ import serial
|
||||
from deepdiff import DeepDiff
|
||||
from tqdm import tqdm
|
||||
|
||||
from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
|
||||
from lerobot.common.utils.utils import enter_pressed, move_cursor_up
|
||||
from lerobot.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
|
||||
from lerobot.utils.utils import enter_pressed, move_cursor_up
|
||||
|
||||
NameOrID: TypeAlias = str | int
|
||||
Value: TypeAlias = int | float
|
||||
@@ -446,7 +446,7 @@ class MotorsBus(abc.ABC):
|
||||
except (FileNotFoundError, OSError, serial.SerialException) as e:
|
||||
raise ConnectionError(
|
||||
f"\nCould not connect on port '{self.port}'. Make sure you are using the correct port."
|
||||
"\nTry running `python lerobot/find_port.py`\n"
|
||||
"\nTry running `python -m lerobot.find_port`\n"
|
||||
) from e
|
||||
|
||||
@abc.abstractmethod
|
||||
@@ -18,8 +18,8 @@
|
||||
from torch.optim import Optimizer
|
||||
from torch.optim.lr_scheduler import LRScheduler
|
||||
|
||||
from lerobot.common.policies.pretrained import PreTrainedPolicy
|
||||
from lerobot.configs.train import TrainPipelineConfig
|
||||
from lerobot.policies.pretrained import PreTrainedPolicy
|
||||
|
||||
|
||||
def make_optimizer_and_scheduler(
|
||||
@@ -22,12 +22,12 @@ import draccus
|
||||
import torch
|
||||
from safetensors.torch import load_file, save_file
|
||||
|
||||
from lerobot.common.constants import (
|
||||
from lerobot.constants import (
|
||||
OPTIMIZER_PARAM_GROUPS,
|
||||
OPTIMIZER_STATE,
|
||||
)
|
||||
from lerobot.common.datasets.utils import flatten_dict, unflatten_dict, write_json
|
||||
from lerobot.common.utils.io_utils import deserialize_json_into_object
|
||||
from lerobot.datasets.utils import flatten_dict, unflatten_dict, write_json
|
||||
from lerobot.utils.io_utils import deserialize_json_into_object
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -22,9 +22,9 @@ import draccus
|
||||
from torch.optim import Optimizer
|
||||
from torch.optim.lr_scheduler import LambdaLR, LRScheduler
|
||||
|
||||
from lerobot.common.constants import SCHEDULER_STATE
|
||||
from lerobot.common.datasets.utils import write_json
|
||||
from lerobot.common.utils.io_utils import deserialize_json_into_object
|
||||
from lerobot.constants import SCHEDULER_STATE
|
||||
from lerobot.datasets.utils import write_json
|
||||
from lerobot.utils.io_utils import deserialize_json_into_object
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -16,5 +16,6 @@ from .act.configuration_act import ACTConfig as ACTConfig
|
||||
from .diffusion.configuration_diffusion import DiffusionConfig as DiffusionConfig
|
||||
from .pi0.configuration_pi0 import PI0Config as PI0Config
|
||||
from .smolvla.configuration_smolvla import SmolVLAConfig as SmolVLAConfig
|
||||
from .smolvla2.configuration_smolvla2 import SmolVLA2Config as SmolVLA2Config
|
||||
from .tdmpc.configuration_tdmpc import TDMPCConfig as TDMPCConfig
|
||||
from .vqbet.configuration_vqbet import VQBeTConfig as VQBeTConfig
|
||||
@@ -15,9 +15,9 @@
|
||||
# limitations under the License.
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from lerobot.common.optim.optimizers import AdamWConfig
|
||||
from lerobot.configs.policies import PreTrainedConfig
|
||||
from lerobot.configs.types import NormalizationMode
|
||||
from lerobot.optim.optimizers import AdamWConfig
|
||||
|
||||
|
||||
@PreTrainedConfig.register_subclass("act")
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user