Compare commits

..

5 Commits

Author SHA1 Message Date
Pepijn
f147a4cd48 Add inference for training time rtc 2026-01-29 11:05:42 +01:00
Pepijn
c3fa269b21 Merge branch 'main' into feat/training_time_rtc 2026-01-27 17:34:56 +01:00
Pepijn
385ba8d1b7 remove wall-oss from doc links 2026-01-20 20:11:56 +01:00
Pepijn
f4ccf911fa format 2026-01-20 20:08:28 +01:00
Pepijn
0cb8c92fe4 Implement training time rtc for pi0, pi0.5 and smolvla 2026-01-20 20:02:10 +01:00
318 changed files with 6297 additions and 18093 deletions

View File

@@ -44,7 +44,7 @@ permissions:
# Sets up the environment variables # Sets up the environment variables
env: env:
UV_VERSION: "0.8.0" UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.12" PYTHON_VERSION: "3.10"
# Ensures that only the latest commit for a PR or branch is built, canceling older runs. # Ensures that only the latest commit for a PR or branch is built, canceling older runs.
concurrency: concurrency:
@@ -61,7 +61,6 @@ jobs:
MUJOCO_GL: egl MUJOCO_GL: egl
HF_HOME: /mnt/cache/.cache/huggingface HF_HOME: /mnt/cache/.cache/huggingface
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
steps: steps:
- uses: actions/checkout@v6 - uses: actions/checkout@v6
with: with:
@@ -90,11 +89,5 @@ jobs:
- name: Install lerobot with test extras - name: Install lerobot with test extras
run: uv sync --extra "test" run: uv sync --extra "test"
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
uv run hf auth whoami
- name: Run pytest - name: Run pytest
run: uv run pytest tests -vv --maxfail=10 run: uv run pytest tests -vv --maxfail=10

View File

@@ -37,7 +37,7 @@ permissions:
# Sets up the environment variables # Sets up the environment variables
env: env:
UV_VERSION: "0.8.0" UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.12" PYTHON_VERSION: "3.10"
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu DOCKER_IMAGE_NAME: huggingface/lerobot-gpu
# Ensures that only the latest action is built, canceling older runs. # Ensures that only the latest action is built, canceling older runs.
@@ -60,7 +60,6 @@ jobs:
MUJOCO_GL: egl MUJOCO_GL: egl
HF_HOME: /mnt/cache/.cache/huggingface HF_HOME: /mnt/cache/.cache/huggingface
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
steps: steps:
- uses: actions/checkout@v6 - uses: actions/checkout@v6
with: with:
@@ -88,12 +87,6 @@ jobs:
- name: Install lerobot with all extras - name: Install lerobot with all extras
run: uv sync --extra all # TODO(Steven): Make flash-attn optional run: uv sync --extra all # TODO(Steven): Make flash-attn optional
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
uv run hf auth whoami
- name: Run pytest (all extras) - name: Run pytest (all extras)
run: uv run pytest tests -vv --maxfail=10 run: uv run pytest tests -vv --maxfail=10
@@ -108,11 +101,9 @@ jobs:
runs-on: runs-on:
group: aws-general-8-plus group: aws-general-8-plus
if: | if: |
github.repository == 'huggingface/lerobot' && ( (github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) ||
(github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) || github.event_name == 'push' ||
github.event_name == 'push' || github.event_name == 'workflow_dispatch'
github.event_name == 'workflow_dispatch'
)
outputs: outputs:
image_tag: ${{ steps.set_tag.outputs.image_tag }} image_tag: ${{ steps.set_tag.outputs.image_tag }}
env: env:
@@ -169,7 +160,6 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container: container:
image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images] image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb" options: --gpus all --shm-size "16gb"
@@ -181,13 +171,6 @@ jobs:
shell: bash shell: bash
working-directory: /lerobot working-directory: /lerobot
steps: steps:
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
hf auth whoami
- name: Fix ptxas permissions
run: chmod +x /lerobot/.venv/lib/python3.12/site-packages/triton/backends/nvidia/bin/ptxas
- name: Run pytest on GPU - name: Run pytest on GPU
run: pytest tests -vv --maxfail=10 run: pytest tests -vv --maxfail=10
- name: Run end-to-end tests - name: Run end-to-end tests

View File

@@ -28,7 +28,7 @@ on:
# Sets up the environment variables # Sets up the environment variables
env: env:
UV_VERSION: "0.8.0" UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.12" PYTHON_VERSION: "3.10"
DOCKER_IMAGE_NAME_CPU: huggingface/lerobot-cpu:latest DOCKER_IMAGE_NAME_CPU: huggingface/lerobot-cpu:latest
DOCKER_IMAGE_NAME_GPU: huggingface/lerobot-gpu:latest DOCKER_IMAGE_NAME_GPU: huggingface/lerobot-gpu:latest
@@ -119,7 +119,6 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container: container:
image: ${{ needs.build-docker-cpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images] image: ${{ needs.build-docker-cpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --shm-size "16gb" options: --shm-size "16gb"
@@ -131,11 +130,6 @@ jobs:
shell: bash shell: bash
working-directory: /lerobot working-directory: /lerobot
steps: steps:
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
hf auth whoami
- name: Run pytest on CPU - name: Run pytest on CPU
run: pytest tests -vv --maxfail=10 run: pytest tests -vv --maxfail=10
- name: Run end-to-end tests - name: Run end-to-end tests
@@ -152,7 +146,6 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container: container:
image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images] image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb" options: --gpus all --shm-size "16gb"
@@ -164,11 +157,6 @@ jobs:
shell: bash shell: bash
working-directory: /lerobot working-directory: /lerobot
steps: steps:
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
hf auth whoami
- name: Run pytest on GPU - name: Run pytest on GPU
run: pytest tests -vv --maxfail=10 run: pytest tests -vv --maxfail=10
- name: Run end-to-end tests - name: Run end-to-end tests
@@ -186,7 +174,6 @@ jobs:
TORCH_HOME: /home/user_lerobot/.cache/torch TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
CUDA_VISIBLE_DEVICES: "0,1,2,3" CUDA_VISIBLE_DEVICES: "0,1,2,3"
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container: container:
image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images] image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb" options: --gpus all --shm-size "16gb"
@@ -198,15 +185,12 @@ jobs:
shell: bash shell: bash
working-directory: /lerobot working-directory: /lerobot
steps: steps:
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
hf auth whoami
- name: Verify GPU availability - name: Verify GPU availability
run: | run: |
nvidia-smi nvidia-smi
python -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}'); print(f'Number of GPUs: {torch.cuda.device_count()}')" python -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}'); print(f'Number of GPUs: {torch.cuda.device_count()}')"
- name: Run multi-GPU training tests - name: Run multi-GPU training tests
run: pytest -vv tests/training/ # TODO(Steven): Investigate why motors tests are failing in multi-GPU setup
run: pytest tests -vv --maxfail=10 --ignore=tests/motors/
timeout-minutes: 10

View File

@@ -50,7 +50,7 @@ jobs:
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
python-version: '3.12' python-version: '3.10'
- name: Run pre-commit hooks - name: Run pre-commit hooks
uses: pre-commit/action@v3.0.1 # zizmor: ignore[unpinned-uses] uses: pre-commit/action@v3.0.1 # zizmor: ignore[unpinned-uses]

View File

@@ -22,7 +22,7 @@ on:
# Sets up the environment variables # Sets up the environment variables
env: env:
UV_VERSION: "0.8.0" UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.12" PYTHON_VERSION: "3.10"
jobs: jobs:
# This job builds the Python package and publishes it to PyPI # This job builds the Python package and publishes it to PyPI
@@ -45,7 +45,7 @@ jobs:
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
python-version: '3.12' python-version: '3.10'
- name: Extract Version - name: Extract Version
id: extract_info id: extract_info
@@ -83,6 +83,14 @@ jobs:
exit 1 exit 1
fi fi
- name: Remove Tags with Git dependencies
# TODO(Steven): Temporary patch to remove pi from PyPi 0.4.0 release due to its reliance on git dependencies.
run: |
echo "::info:: Checking for Git dependencies to remove from pyproject.toml..."
grep -E '@ git\+https|lerobot\[pi\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
sed -E -i '/@ git\+https|lerobot\[pi\]/d' pyproject.toml
echo "::info:: Git dependencies removed. Proceeding with build."
- name: Install build dependencies - name: Install build dependencies
run: python -m pip install build run: python -m pip install build

View File

@@ -29,7 +29,7 @@ permissions:
# Sets up the environment variables # Sets up the environment variables
env: env:
UV_VERSION: "0.8.0" UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.12" PYTHON_VERSION: "3.10"
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu:unbound DOCKER_IMAGE_NAME: huggingface/lerobot-gpu:unbound
# Ensures that only the latest action is built, canceling older runs. # Ensures that only the latest action is built, canceling older runs.
@@ -48,7 +48,6 @@ jobs:
MUJOCO_GL: egl MUJOCO_GL: egl
HF_HOME: /mnt/cache/.cache/huggingface HF_HOME: /mnt/cache/.cache/huggingface
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
steps: steps:
- uses: actions/checkout@v6 - uses: actions/checkout@v6
with: with:
@@ -80,11 +79,7 @@ jobs:
- name: Install lerobot with all extras - name: Install lerobot with all extras
run: uv sync --extra all # TODO(Steven): Make flash-attn optional run: uv sync --extra all # TODO(Steven): Make flash-attn optional
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
uv run hf auth whoami
- name: Run pytest (all extras) - name: Run pytest (all extras)
run: uv run pytest tests -vv run: uv run pytest tests -vv
@@ -96,7 +91,6 @@ jobs:
name: Build and Push Docker name: Build and Push Docker
runs-on: runs-on:
group: aws-general-8-plus group: aws-general-8-plus
if: github.repository == 'huggingface/lerobot'
outputs: outputs:
image_tag: ${{ env.DOCKER_IMAGE_NAME }} image_tag: ${{ env.DOCKER_IMAGE_NAME }}
env: env:
@@ -142,7 +136,6 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container: container:
image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images] image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb" options: --gpus all --shm-size "16gb"
@@ -154,11 +147,6 @@ jobs:
shell: bash shell: bash
working-directory: /lerobot working-directory: /lerobot
steps: steps:
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
hf auth whoami
- name: Run pytest on GPU - name: Run pytest on GPU
run: pytest tests -vv run: pytest tests -vv
- name: Run end-to-end tests - name: Run end-to-end tests

View File

@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
default_language_version: default_language_version:
python: python3.12 python: python3.10
exclude: "tests/artifacts/.*\\.safetensors$" exclude: "tests/artifacts/.*\\.safetensors$"
@@ -55,7 +55,7 @@ repos:
rev: v3.21.0 rev: v3.21.0
hooks: hooks:
- id: pyupgrade - id: pyupgrade
args: [--py312-plus] args: [--py310-plus]
##### Markdown Quality ##### ##### Markdown Quality #####
- repo: https://github.com/rbubley/mirrors-prettier - repo: https://github.com/rbubley/mirrors-prettier

View File

@@ -1,25 +0,0 @@
# AI Usage Policy
The LeRobot project welcomes contributions from everyone, and we have a few guidelines regarding AI usage to ensure high code quality, clear communication, and a healthy open-source ecosystem:
- **Please disclose significant AI assistance.** If you used AI tools (e.g., Copilot, Claude, Cursor, ChatGPT) to generate a substantial portion of your code or text, let us know in your PR description. Transparency helps us review your changes more effectively.
- **Own your code (The Human-in-the-Loop).** You must fully understand all the changes you are proposing. If you cannot explain what your AI-assisted code does or how it interacts with LeRobot's broader architecture, please take the time to learn and test it before submitting.
- **Keep issues and discussions focused.** You are welcome to use AI to help draft issues or PR descriptions, but please review and edit them carefully before posting. AI can often be overly verbose; trimming the noise and getting straight to the point helps our maintainers address your needs faster.
Our core maintainers also use AI tools to aid their workflows, but they do so while bringing deep contextual knowledge of the LeRobot codebase to validate the output. We ask all contributors to apply that same level of rigor.
## Remember the Human Maintainers
Please remember that LeRobot is maintained by a dedicated team of humans.
Every discussion, issue, and pull request is read and reviewed by real people. While AI tools can generate thousands of lines of code in seconds, reviewing that code still takes human time and energy. Submitting unverified or low-effort AI output puts an unfair burden on our maintainers.
Today, the quality of the AI output still heavily depends on the developer driving the tool. We ask that you respect our maintainers' time by thoroughly vetting, testing, and refining your submissions.
## AI is Welcome Here
LeRobot operates at the cutting edge of AI and robotics, and many of our maintainers actively embrace AI coding assistants as valuable productivity tools. We are a pro-AI project!
Our reason for having an AI policy is not an anti-AI stance. Rather, it exists to ensure that AI is used to enhance human contributions, not replace them with unverified noise. It's about how the tools are used, not the tools themselves.
We value the unique human insight you bring to the LeRobot community. Let AI empower your workflow, but always let your own judgment take the wheel.

View File

@@ -2,7 +2,7 @@
Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community. Answering questions, helping others, reaching out, and improving the documentation are immensely valuable. Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community. Answering questions, helping others, reaching out, and improving the documentation are immensely valuable.
Whichever way you choose to contribute, please be mindful to respect our [code of conduct](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md) and our [AI policy](https://github.com/huggingface/lerobot/blob/main/AI_POLICY.md). Whichever way you choose to contribute, please be mindful to respect our [code of conduct](./CODE_OF_CONDUCT.md).
## Ways to Contribute ## Ways to Contribute
@@ -32,7 +32,7 @@ git remote add upstream https://github.com/huggingface/lerobot.git
### 2. Environment Installation ### 2. Environment Installation
Please follow our [Installation Guide](https://huggingface.co/docs/lerobot/installation) for the environment setup & installation from source. Please follow our [Installation Guide](./docs/source/installation.mdx) for the environment setup & installation from source.
## Running Tests & Quality Checks ## Running Tests & Quality Checks
@@ -75,8 +75,8 @@ pytest -sv tests/test_specific_feature.py
Use the templates for required fields and examples. Use the templates for required fields and examples.
- **Issues:** Follow the [ticket template](https://github.com/huggingface/lerobot/blob/main/.github/ISSUE_TEMPLATE/bug-report.yml). - **Issues:** Follow the [ticket template](./.github/ISSUE_TEMPLATE/bug-report.yml).
- **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](https://github.com/huggingface/lerobot/blob/main/.github/PULL_REQUEST_TEMPLATE.md). - **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](./.github/PULL_REQUEST_TEMPLATE.md).
One member of the LeRobot team will then review your contribution. One member of the LeRobot team will then review your contribution.

View File

@@ -1,3 +1,2 @@
include src/lerobot/templates/lerobot_modelcard_template.md include src/lerobot/templates/lerobot_modelcard_template.md
include src/lerobot/datasets/card_template.md include src/lerobot/datasets/card_template.md
include src/lerobot/envs/metaworld_config.json

View File

@@ -135,7 +135,7 @@ Learn how to implement your own simulation environment or benchmark and distribu
## Citation ## Citation
If you use LeRobot in your project, please cite the GitHub repository to acknowledge the ongoing development and contributors: If you use LeRobot in your research, please cite:
```bibtex ```bibtex
@misc{cadene2024lerobot, @misc{cadene2024lerobot,
@@ -146,26 +146,9 @@ If you use LeRobot in your project, please cite the GitHub repository to acknowl
} }
``` ```
If you are referencing our research or the academic paper, please also cite our ICLR publication:
<details>
<summary><b>ICLR 2026 Paper</b></summary>
```bibtex
@inproceedings{cadenelerobot,
title={LeRobot: An Open-Source Library for End-to-End Robot Learning},
author={Cadene, Remi and Alibert, Simon and Capuano, Francesco and Aractingi, Michel and Zouitine, Adil and Kooijmans, Pepijn and Choghari, Jade and Russi, Martino and Pascal, Caroline and Palma, Steven and Shukor, Mustafa and Moss, Jess and Soare, Alexander and Aubakirova, Dana and Lhoest, Quentin and Gallou\'edec, Quentin and Wolf, Thomas},
booktitle={The Fourteenth International Conference on Learning Representations},
year={2026},
url={https://arxiv.org/abs/2602.22818}
}
```
</details>
## Contribute ## Contribute
We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support! We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](./CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support!
<p align="center"> <p align="center">
<img alt="SO101 Video" src="./media/readme/so100_video.webp" width="640px"> <img alt="SO101 Video" src="./media/readme/so100_video.webp" width="640px">

View File

@@ -28,9 +28,9 @@ We don't expect the same optimal settings for a dataset of images from a simulat
For these reasons, we run this benchmark on four representative datasets: For these reasons, we run this benchmark on four representative datasets:
- `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera. - `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera.
- `lerobot/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera. - `aliberts/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
- `lerobot/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera. - `aliberts/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
- `lerobot/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera. - `aliberts/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.
Note: The datasets used for this benchmark need to be image datasets, not video datasets. Note: The datasets used for this benchmark need to be image datasets, not video datasets.
@@ -179,7 +179,7 @@ python benchmark/video/run_video_benchmark.py \
--output-dir outputs/video_benchmark \ --output-dir outputs/video_benchmark \
--repo-ids \ --repo-ids \
lerobot/pusht_image \ lerobot/pusht_image \
lerobot/aloha_mobile_shrimp_image \ aliberts/aloha_mobile_shrimp_image \
--vcodec libx264 libx265 \ --vcodec libx264 libx265 \
--pix-fmt yuv444p yuv420p \ --pix-fmt yuv444p yuv420p \
--g 2 20 None \ --g 2 20 None \
@@ -203,9 +203,9 @@ python benchmark/video/run_video_benchmark.py \
--output-dir outputs/video_benchmark \ --output-dir outputs/video_benchmark \
--repo-ids \ --repo-ids \
lerobot/pusht_image \ lerobot/pusht_image \
lerobot/aloha_mobile_shrimp_image \ aliberts/aloha_mobile_shrimp_image \
lerobot/paris_street \ aliberts/paris_street \
lerobot/kitchen \ aliberts/kitchen \
--vcodec libx264 libx265 \ --vcodec libx264 libx265 \
--pix-fmt yuv444p yuv420p \ --pix-fmt yuv444p yuv420p \
--g 1 2 3 4 5 6 10 15 20 40 None \ --g 1 2 3 4 5 6 10 15 20 40 None \
@@ -221,9 +221,9 @@ python benchmark/video/run_video_benchmark.py \
--output-dir outputs/video_benchmark \ --output-dir outputs/video_benchmark \
--repo-ids \ --repo-ids \
lerobot/pusht_image \ lerobot/pusht_image \
lerobot/aloha_mobile_shrimp_image \ aliberts/aloha_mobile_shrimp_image \
lerobot/paris_street \ aliberts/paris_street \
lerobot/kitchen \ aliberts/kitchen \
--vcodec libsvtav1 \ --vcodec libsvtav1 \
--pix-fmt yuv420p \ --pix-fmt yuv420p \
--g 1 2 3 4 5 6 10 15 20 40 None \ --g 1 2 3 4 5 6 10 15 20 40 None \
@@ -252,37 +252,37 @@ Since we're using av1 encoding, we're choosing the `pyav` decoder as `video_read
These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_frames` and `backend=pyav` These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_frames` and `backend=pyav`
| video_images_size_ratio | vcodec | pix_fmt | | | | | video_images_size_ratio | vcodec | pix_fmt | | | |
| --------------------------------- | ---------- | ------- | --------- | --------- | --------- | | ---------------------------------- | ---------- | ------- | --------- | --------- | --------- |
| | libx264 | | libx265 | | libsvtav1 | | | libx264 | | libx265 | | libsvtav1 |
| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p | | repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
| lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% | | lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% |
| lerobot/aloha_mobile_shrimp_image | 2.14% | 2.11% | 1.38% | **1.37%** | 5.59% | | aliberts/aloha_mobile_shrimp_image | 2.14% | 2.11% | 1.38% | **1.37%** | 5.59% |
| lerobot/paris_street | 2.12% | 2.13% | **1.54%** | **1.54%** | 4.43% | | aliberts/paris_street | 2.12% | 2.13% | **1.54%** | **1.54%** | 4.43% |
| lerobot/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% | | aliberts/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% |
| video_images_load_time_ratio | vcodec | pix_fmt | | | | | video_images_load_time_ratio | vcodec | pix_fmt | | | |
| --------------------------------- | ------- | ------- | -------- | ------- | --------- | | ---------------------------------- | ------- | ------- | -------- | ------- | --------- |
| | libx264 | | libx265 | | libsvtav1 | | | libx264 | | libx265 | | libsvtav1 |
| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p | | repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
| lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 | | lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 |
| lerobot/aloha_mobile_shrimp_image | 11.80 | 7.92 | 0.71 | 0.85 | **0.48** | | aliberts/aloha_mobile_shrimp_image | 11.80 | 7.92 | 0.71 | 0.85 | **0.48** |
| lerobot/paris_street | 2.21 | 2.05 | 0.36 | 0.49 | **0.30** | | aliberts/paris_street | 2.21 | 2.05 | 0.36 | 0.49 | **0.30** |
| lerobot/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** | | aliberts/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** |
| | | vcodec | pix_fmt | | | | | | | vcodec | pix_fmt | | | |
| --------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ | | ---------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ |
| | | libx264 | | libx265 | | libsvtav1 | | | | libx264 | | libx265 | | libsvtav1 |
| repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p | | repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
| lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 | | lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 |
| | avg_psnr | 35.44 | 37.07 | 35.49 | **37.30** | 37.20 | | | avg_psnr | 35.44 | 37.07 | 35.49 | **37.30** | 37.20 |
| | avg_ssim | 98.28% | **98.85%** | 98.31% | 98.84% | 98.72% | | | avg_ssim | 98.28% | **98.85%** | 98.31% | 98.84% | 98.72% |
| lerobot/aloha_mobile_shrimp_image | avg_mse | 2.76E-04 | 2.59E-04 | 3.17E-04 | 3.06E-04 | **1.30E-04** | | aliberts/aloha_mobile_shrimp_image | avg_mse | 2.76E-04 | 2.59E-04 | 3.17E-04 | 3.06E-04 | **1.30E-04** |
| | avg_psnr | 35.91 | 36.21 | 35.88 | 36.09 | **40.17** | | | avg_psnr | 35.91 | 36.21 | 35.88 | 36.09 | **40.17** |
| | avg_ssim | 95.19% | 95.18% | 95.00% | 95.05% | **97.73%** | | | avg_ssim | 95.19% | 95.18% | 95.00% | 95.05% | **97.73%** |
| lerobot/paris_street | avg_mse | 6.89E-04 | 6.70E-04 | 4.03E-03 | 4.02E-03 | **3.09E-04** | | aliberts/paris_street | avg_mse | 6.89E-04 | 6.70E-04 | 4.03E-03 | 4.02E-03 | **3.09E-04** |
| | avg_psnr | 33.48 | 33.68 | 32.05 | 32.15 | **35.40** | | | avg_psnr | 33.48 | 33.68 | 32.05 | 32.15 | **35.40** |
| | avg_ssim | 93.76% | 93.75% | 89.46% | 89.46% | **95.46%** | | | avg_ssim | 93.76% | 93.75% | 89.46% | 89.46% | **95.46%** |
| lerobot/kitchen | avg_mse | 2.50E-04 | 2.24E-04 | 4.28E-04 | 4.18E-04 | **1.53E-04** | | aliberts/kitchen | avg_mse | 2.50E-04 | 2.24E-04 | 4.28E-04 | 4.18E-04 | **1.53E-04** |
| | avg_psnr | 36.73 | 37.33 | 36.56 | 36.75 | **39.12** | | | avg_psnr | 36.73 | 37.33 | 36.56 | 36.75 | **39.12** |
| | avg_ssim | 95.47% | 95.58% | 95.52% | 95.53% | **96.82%** | | | avg_ssim | 95.47% | 95.58% | 95.52% | 95.53% | **96.82%** |

View File

@@ -24,7 +24,7 @@ ARG OS_VERSION=22.04
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION} FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}
# Define Python version argument # Define Python version argument
ARG PYTHON_VERSION=3.12 ARG PYTHON_VERSION=3.10
# Configure environment variables # Configure environment variables
ENV DEBIAN_FRONTEND=noninteractive \ ENV DEBIAN_FRONTEND=noninteractive \
@@ -85,8 +85,6 @@ RUN if [ "$UNBOUND_DEPS" = "true" ]; then \
RUN uv pip install --no-cache ".[all]" RUN uv pip install --no-cache ".[all]"
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
# Copy the rest of the application source code # Copy the rest of the application source code
# Make sure to have the git-LFS files for testing # Make sure to have the git-LFS files for testing
COPY --chown=user_lerobot:user_lerobot . . COPY --chown=user_lerobot:user_lerobot . .

View File

@@ -18,10 +18,8 @@
# docker build -f docker/Dockerfile.user -t lerobot-user . # docker build -f docker/Dockerfile.user -t lerobot-user .
# docker run -it --rm lerobot-user # docker run -it --rm lerobot-user
# With USB physical access : docker run -it --device=/dev/ -v /dev/:/dev/ --rm lerobot-user
# Configure the base image # Configure the base image
ARG PYTHON_VERSION=3.12 ARG PYTHON_VERSION=3.10
FROM python:${PYTHON_VERSION}-slim FROM python:${PYTHON_VERSION}-slim
# Configure environment variables # Configure environment variables

View File

@@ -7,6 +7,8 @@
- sections: - sections:
- local: il_robots - local: il_robots
title: Imitation Learning for Robots title: Imitation Learning for Robots
- local: cameras
title: Cameras
- local: bring_your_own_policies - local: bring_your_own_policies
title: Bring Your Own Policies title: Bring Your Own Policies
- local: integrate_hardware - local: integrate_hardware
@@ -27,10 +29,6 @@
title: Porting Large Datasets title: Porting Large Datasets
- local: using_dataset_tools - local: using_dataset_tools
title: Using the Dataset Tools title: Using the Dataset Tools
- local: dataset_subtask
title: Using Subtasks in the Dataset
- local: streaming_video_encoding
title: Streaming Video Encoding
title: "Datasets" title: "Datasets"
- sections: - sections:
- local: act - local: act
@@ -59,6 +57,8 @@
title: Use Async Inference title: Use Async Inference
- local: rtc - local: rtc
title: Real-Time Chunking (RTC) title: Real-Time Chunking (RTC)
- local: training_time_rtc
title: Training-Time RTC
title: "Inference" title: "Inference"
- sections: - sections:
- local: envhub - local: envhub
@@ -103,17 +103,11 @@
title: Earth Rover Mini title: Earth Rover Mini
- local: omx - local: omx
title: OMX title: OMX
- local: openarm
title: OpenArm
title: "Robots" title: "Robots"
- sections: - sections:
- local: phone_teleop - local: phone_teleop
title: Phone title: Phone
title: "Teleoperators" title: "Teleoperators"
- sections:
- local: cameras
title: Cameras
title: "Sensors"
- sections: - sections:
- local: torch_accelerators - local: torch_accelerators
title: PyTorch accelerators title: PyTorch accelerators

View File

@@ -88,8 +88,5 @@ lerobot-record \
--dataset.repo_id=${HF_USER}/eval_act_your_dataset \ --dataset.repo_id=${HF_USER}/eval_act_your_dataset \
--dataset.num_episodes=10 \ --dataset.num_episodes=10 \
--dataset.single_task="Your task description" \ --dataset.single_task="Your task description" \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--policy.path=${HF_USER}/act_policy --policy.path=${HF_USER}/act_policy
``` ```

View File

@@ -48,7 +48,7 @@ python -m lerobot.async_inference.robot_client \
--task="dummy" \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act` --task="dummy" \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act`
--policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc) --policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc)
--pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base) --pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base)
--policy_device=mps \ # POLICY: the device to run the policy on, on the server (cuda, mps, xpu, cpu) --policy_device=mps \ # POLICY: the device to run the policy on, on the server
--actions_per_chunk=50 \ # POLICY: the number of actions to output at once --actions_per_chunk=50 \ # POLICY: the number of actions to output at once
--chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server --chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server
--aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions --aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions

View File

@@ -32,7 +32,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
# your policy-specific dependencies # your policy-specific dependencies
] ]
requires-python = ">= 3.12" requires-python = ">= 3.11"
[build-system] [build-system]
build-backend = # your-build-backend build-backend = # your-build-backend
@@ -82,7 +82,7 @@ Create your policy implementation by inheriting from LeRobot's base `PreTrainedP
# modeling_my_custom_policy.py # modeling_my_custom_policy.py
import torch import torch
import torch.nn as nn import torch.nn as nn
from typing import Any from typing import Dict, Any
from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.policies.pretrained import PreTrainedPolicy
from .configuration_my_custom_policy import MyCustomPolicyConfig from .configuration_my_custom_policy import MyCustomPolicyConfig
@@ -91,7 +91,7 @@ class MyCustomPolicy(PreTrainedPolicy):
config_class = MyCustomPolicyConfig config_class = MyCustomPolicyConfig
name = "my_custom_policy" name = "my_custom_policy"
def __init__(self, config: MyCustomPolicyConfig, dataset_stats: dict[str, Any] = None): def __init__(self, config: MyCustomPolicyConfig, dataset_stats: Dict[str, Any] = None):
super().__init__(config, dataset_stats) super().__init__(config, dataset_stats)
... ...
``` ```
@@ -102,7 +102,7 @@ Create processor functions:
```python ```python
# processor_my_custom_policy.py # processor_my_custom_policy.py
from typing import Any from typing import Dict, Any
import torch import torch

View File

@@ -1,22 +1,12 @@
# Cameras # Cameras
LeRobot offers multiple options for video capture: LeRobot offers multiple options for video capture, including phone cameras, built-in laptop cameras, external webcams, and Intel RealSense cameras. To efficiently record frames from most cameras, you can use either the `OpenCVCamera` or `RealSenseCamera` class. For additional compatibility details on the `OpenCVCamera` class, refer to the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
| Class | Supported Cameras | ### Finding your camera
| ----------------- | ----------------------------------- |
| `OpenCVCamera` | Phone, built-in laptop, USB webcams |
| `ZMQCamera` | Network-connected cameras |
| `RealSenseCamera` | Intel RealSense (with depth) |
| `Reachy2Camera` | Reachy 2 robot cameras |
> [!TIP] To instantiate a camera, you need a camera identifier. This identifier might change if you reboot your computer or re-plug your camera, a behavior mostly dependant on your operating system.
> For `OpenCVCamera` compatibility details, see the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
### Find your camera To find the camera indices of the cameras plugged into your system, run the following script:
Every camera requires a unique identifier to be instantiated, allowing you to distinguish between multiple connected devices.
`OpenCVCamera` and `RealSenseCamera` support auto-discovery. Run the command below to list available devices and their identifiers. Note that these identifiers may change after rebooting your computer or re-plugging the camera, depending on your operating system.
```bash ```bash
lerobot-find-cameras opencv # or realsense for Intel Realsense cameras lerobot-find-cameras opencv # or realsense for Intel Realsense cameras
@@ -24,7 +14,7 @@ lerobot-find-cameras opencv # or realsense for Intel Realsense cameras
The output will look something like this if you have two cameras connected: The output will look something like this if you have two cameras connected:
```bash ```
--- Detected Cameras --- --- Detected Cameras ---
Camera #0: Camera #0:
Name: OpenCV Camera @ 0 Name: OpenCV Camera @ 0
@@ -43,37 +33,13 @@ Camera #0:
> [!WARNING] > [!WARNING]
> When using Intel RealSense cameras in `macOS`, you could get this [error](https://github.com/IntelRealSense/librealsense/issues/12307): `Error finding RealSense cameras: failed to set power state`, this can be solved by running the same command with `sudo` permissions. Note that using RealSense cameras in `macOS` is unstable. > When using Intel RealSense cameras in `macOS`, you could get this [error](https://github.com/IntelRealSense/librealsense/issues/12307): `Error finding RealSense cameras: failed to set power state`, this can be solved by running the same command with `sudo` permissions. Note that using RealSense cameras in `macOS` is unstable.
`ZMQCamera` and `Reachy2Camera` do not support auto-discovery. They must be configured manually by providing their network address and port or robot SDK settings. ## Use Cameras
## Use cameras Below are two examples, demonstrating how to work with the API.
### Frame access modes - **Asynchronous frame capture** using an OpenCV-based camera
All camera classes implement three access modes for capturing frames:
| Method | Behavior | Blocks? | Best For |
| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ---------------------------------------- |
| `read()` | Waits for the camera hardware to return a frame. May block for a long time depending on the camera and SDK. | Yes | Simple scripts, sequential capture |
| `async_read(timeout_ms)` | Returns the latest unconsumed frame from background thread. Blocks only if buffer is empty, up to `timeout_ms`. Raises `TimeoutError` if no frame arrives. | With a timeout | Control loops synchronized to camera FPS |
| `read_latest(max_age_ms)` | Peeks at the most recent frame in buffer (may be stale). Raises `TimeoutError` if frame is older than `max_age_ms`. | No | UI visualization, logging, monitoring |
### Usage examples
The following examples show how to use the camera API to configure and capture frames from different camera types.
- **Blocking and non-blocking frame capture** using an OpenCV-based camera
- **Color and depth capture** using an Intel RealSense camera - **Color and depth capture** using an Intel RealSense camera
> [!WARNING]
> Failing to cleanly disconnect cameras can cause resource leaks. Use the context manager protocol to ensure automatic cleanup:
>
> ```python
> with OpenCVCamera(config) as camera:
> ...
> ```
>
> You can also call `connect()` and `disconnect()` manually, but always use a `finally` block for the latter.
<hfoptions id="shell_restart"> <hfoptions id="shell_restart">
<hfoption id="Open CV Camera"> <hfoption id="Open CV Camera">
@@ -94,30 +60,16 @@ config = OpenCVCameraConfig(
) )
# Instantiate and connect an `OpenCVCamera`, performing a warm-up read (default). # Instantiate and connect an `OpenCVCamera`, performing a warm-up read (default).
with OpenCVCamera(config) as camera: camera = OpenCVCamera(config)
camera.connect()
# Read a frame synchronously — blocks until hardware delivers a new frame
frame = camera.read()
print(f"read() call returned frame with shape:", frame.shape)
# Read a frame asynchronously with a timeout — returns the latest unconsumed frame or waits up to timeout_ms for a new one
try:
for i in range(10):
frame = camera.async_read(timeout_ms=200)
print(f"async_read call returned frame {i} with shape:", frame.shape)
except TimeoutError as e:
print(f"No frame received within timeout: {e}")
# Instantly return a frame - returns the most recent frame captured by the camera
try:
initial_frame = camera.read_latest(max_age_ms=1000)
for i in range(10):
frame = camera.read_latest(max_age_ms=1000)
print(f"read_latest call returned frame {i} with shape:", frame.shape)
print(f"Was a new frame received by the camera? {not (initial_frame == frame).any()}")
except TimeoutError as e:
print(f"Frame too old: {e}")
# Read frames asynchronously in a loop via `async_read(timeout_ms)`
try:
for i in range(10):
frame = camera.async_read(timeout_ms=200)
print(f"Async frame {i} shape:", frame.shape)
finally:
camera.disconnect()
``` ```
<!-- prettier-ignore-end --> <!-- prettier-ignore-end -->
@@ -159,10 +111,10 @@ finally:
</hfoption> </hfoption>
</hfoptions> </hfoptions>
## Use your phone's camera ## Use your phone
<hfoptions id="use phone"> <hfoptions id="use phone">
<hfoption id="iPhone & macOS"> <hfoption id="Mac">
To use your iPhone as a camera on macOS, enable the Continuity Camera feature: To use your iPhone as a camera on macOS, enable the Continuity Camera feature:
@@ -172,49 +124,83 @@ To use your iPhone as a camera on macOS, enable the Continuity Camera feature:
For more details, visit [Apple support](https://support.apple.com/en-gb/guide/mac-help/mchl77879b8a/mac). For more details, visit [Apple support](https://support.apple.com/en-gb/guide/mac-help/mchl77879b8a/mac).
Your iPhone should be detected automatically when running the camera setup script in the next section.
</hfoption> </hfoption>
<hfoption id="OBS virtual camera"> <hfoption id="Linux">
If you want to use your phone as a camera using OBS, follow these steps to set up a virtual camera. If you want to use your phone as a camera on Linux, follow these steps to set up a virtual camera
1. _(Linux only) Install `v4l2loopback-dkms` and `v4l-utils`_. These packages create virtual camera devices and verify their settings. Install with: 1. _Install `v4l2loopback-dkms` and `v4l-utils`_. Those packages are required to create virtual camera devices (`v4l2loopback`) and verify their settings with the `v4l2-ctl` utility from `v4l-utils`. Install them using:
```bash <!-- prettier-ignore-start -->
```python
sudo apt install v4l2loopback-dkms v4l-utils sudo apt install v4l2loopback-dkms v4l-utils
``` ```
<!-- prettier-ignore-end -->
2. _Install the [DroidCam app](https://droidcam.app) on your phone_. This app is available for both iOS and Android. 2. _Install [DroidCam](https://droidcam.app) on your phone_. This app is available for both iOS and Android.
3. _Download and install [OBS Studio](https://obsproject.com)_. 3. _Install [OBS Studio](https://obsproject.com)_. This software will help you manage the camera feed. Install it using [Flatpak](https://flatpak.org):
4. _Download and install the [DroidCam OBS plugin](https://droidcam.app/obs)_.
5. _Start OBS Studio_.
6. _Add your phone as a source_. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480` to avoid the watermarks. <!-- prettier-ignore-start -->
7. _Adjust resolution settings_. In OBS Studio, go to `File > Settings > Video` or `OBS > Preferences... > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it. ```python
flatpak install flathub com.obsproject.Studio
```
<!-- prettier-ignore-end -->
4. _Install the DroidCam OBS plugin_. This plugin integrates DroidCam with OBS Studio. Install it with:
<!-- prettier-ignore-start -->
```python
flatpak install flathub com.obsproject.Studio.Plugin.DroidCam
```
<!-- prettier-ignore-end -->
5. _Start OBS Studio_. Launch with:
<!-- prettier-ignore-start -->
```python
flatpak run com.obsproject.Studio
```
<!-- prettier-ignore-end -->
6. _Add your phone as a source_. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480`.
7. _Adjust resolution settings_. In OBS Studio, go to `File > Settings > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it in.
8. _Start virtual camera_. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide). 8. _Start virtual camera_. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide).
9. _Verify the virtual camera setup and resolution_. 9. _Verify the virtual camera setup_. Use `v4l2-ctl` to list the devices:
- **Linux**: Use `v4l2-ctl` to list devices and check resolution:
```bash
v4l2-ctl --list-devices # find VirtualCam and note its /dev/videoX path
v4l2-ctl -d /dev/videoX --get-fmt-video # replace with your VirtualCam path
```
You should see `VirtualCam` listed and resolution `640x480`.
- **macOS**: Open Photo Booth or FaceTime and select "OBS Virtual Camera" as the input.
- **Windows**: The native Camera app doesn't support virtual cameras. Use a video conferencing app (Zoom, Teams) or run `lerobot-find-cameras opencv` directly to verify.
<details> <!-- prettier-ignore-start -->
<summary><strong>Troubleshooting</strong></summary> ```python
v4l2-ctl --list-devices
```
<!-- prettier-ignore-end -->
> The virtual camera resolution is incorrect. You should see an entry like:
Delete the virtual camera source and recreate it. The resolution cannot be changed after creation. ```
VirtualCam (platform:v4l2loopback-000):
/dev/video1
```
> Error reading frame in background thread for OpenCVCamera(X): OpenCVCamera(X) frame width=640 or height=480 do not match configured width=1920 or height=1080. 10. _Check the camera resolution_. Use `v4l2-ctl` to ensure that the virtual camera output resolution is `640x480`. Change `/dev/video1` to the port of your virtual camera from the output of `v4l2-ctl --list-devices`.
This error is caused by OBS Virtual Camera advertising a `1920x1080` resolution despite rescaling. The only fix for now is to comment out the width and height check in `_postprocess_image()`. <!-- prettier-ignore-start -->
```python
v4l2-ctl -d /dev/video1 --get-fmt-video
```
<!-- prettier-ignore-end -->
</details> You should see an entry like:
```
>>> Format Video Capture:
>>> Width/Height : 640/480
>>> Pixel Format : 'YUYV' (YUYV 4:2:2)
```
Troubleshooting: If the resolution is not correct you will have to delete the Virtual Camera port and try again as it cannot be changed.
If everything is set up correctly, you can proceed with the rest of the tutorial.
</hfoption> </hfoption>
</hfoptions> </hfoptions>
If everything is set up correctly, your phone will appear as a standard OpenCV camera and can be used with `OpenCVCamera`.

View File

@@ -1,278 +0,0 @@
# Using Subtasks in LeRobot Datasets
Subtask support in robotics datasets has proven effective in improving robot reasoning and understanding. Subtasks are particularly useful for:
- **Hierarchical policies**: Building policies that include subtask predictions to visualize robot reasoning in real time
- **Reward modeling**: Helping reward models understand task progression (e.g., SARM-style stage-aware reward models)
- **Task decomposition**: Breaking down complex manipulation tasks into atomic, interpretable steps
LeRobotDataset now supports subtasks as part of its dataset structure, alongside tasks.
## What are Subtasks?
While a **task** describes the overall goal (e.g., "Pick up the apple and place it in the basket"), **subtasks** break down the execution into finer-grained steps:
1. "Approach the apple"
2. "Grasp the apple"
3. "Lift the apple"
4. "Move to basket"
5. "Release the apple"
Each frame in the dataset can be annotated with its corresponding subtask, enabling models to learn and predict these intermediate stages.
<img
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/subtask-asset.png"
alt="An overview of subtask annotation showing how frames are labeled with intermediate subtask stages"
width="80%"
/>
<p>
<em>Figure: Overview of subtask annotation.</em>
</p>
**Reference:** _Subtask-learning based for robot self-assembly in flexible collaborative assembly in manufacturing_, Original Article, Published: 19 April 2022.
## Dataset Structure
Subtask information is stored in the dataset metadata:
```
my-dataset/
├── data/
│ └── ...
├── meta/
│ ├── info.json
│ ├── stats.json
│ ├── tasks.parquet
│ ├── subtasks.parquet # Subtask index → subtask string mapping
│ └── episodes/
│ └── ...
└── videos/
└── ...
```
### Subtasks Parquet File
The `meta/subtasks.parquet` file maps subtask indices to their natural language descriptions:
| subtask_index | subtask (index column) |
| ------------- | ---------------------- |
| 0 | "Approach the apple" |
| 1 | "Grasp the apple" |
| 2 | "Lift the apple" |
| ... | ... |
### Frame-Level Annotations
Each frame in the dataset can include a `subtask_index` field that references the subtasks parquet file:
```python
# Example frame data in the parquet file
{
"index": 42,
"timestamp": 1.4,
"episode_index": 0,
"task_index": 0,
"subtask_index": 2, # References "Lift the apple"
"observation.state": [...],
"action": [...],
}
```
## Annotating Datasets with Subtasks
We provide a HuggingFace Space for easily annotating any LeRobotDataset with subtasks:
**[https://huggingface.co/spaces/lerobot/annotate](https://huggingface.co/spaces/lerobot/annotate)**
After completing your annotation:
1. Click "Push to Hub" to upload your annotated dataset
2. You can also run the annotation space locally by following the instructions at [github.com/huggingface/lerobot-annotate](https://github.com/huggingface/lerobot-annotate)
## Loading Datasets with Subtasks
When you load a dataset with subtask annotations, the subtask information is automatically available:
```python
from lerobot.datasets.lerobot_dataset import LeRobotDataset
# Load a dataset with subtask annotations
dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
# Access a sample
sample = dataset[100]
# The sample includes both task and subtask information
print(sample["task"]) # "Collect the fruit"
print(sample["subtask"]) # "Grasp the apple"
print(sample["task_index"]) # tensor(0)
print(sample["subtask_index"]) # tensor(2)
```
### Checking for Subtask Support
You can check if a dataset has subtask annotations:
```python
# Check if subtasks are available
has_subtasks = (
"subtask_index" in dataset.features
and dataset.meta.subtasks is not None
)
if has_subtasks:
print(f"Dataset has {len(dataset.meta.subtasks)} unique subtasks")
print("Subtasks:", list(dataset.meta.subtasks.index))
```
## Using Subtasks for Training
### With the Tokenizer Processor
The `TokenizerProcessor` automatically handles subtask tokenization for Vision-Language Action (VLA) models:
```python
from lerobot.processor.tokenizer_processor import TokenizerProcessor
from lerobot.processor.pipeline import ProcessorPipeline
# Create a tokenizer processor
tokenizer_processor = TokenizerProcessor(
tokenizer_name_or_path="google/paligemma-3b-pt-224",
padding="max_length",
max_length=64,
)
# The processor will automatically tokenize subtasks if present in the batch
# and add them to the observation under:
# - "observation.subtask.tokens"
# - "observation.subtask.attention_mask"
```
When subtasks are available in the batch, the tokenizer processor adds:
- `observation.subtask.tokens`: Tokenized subtask text
- `observation.subtask.attention_mask`: Attention mask for the subtask tokens
### DataLoader with Subtasks
```python
import torch
from lerobot.datasets.lerobot_dataset import LeRobotDataset
dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=16,
shuffle=True,
)
for batch in dataloader:
# Access subtask information in the batch
subtasks = batch["subtask"] # List of subtask strings
subtask_indices = batch["subtask_index"] # Tensor of subtask indices
# Use for training hierarchical policies or reward models
print(f"Batch subtasks: {set(subtasks)}")
```
## Example Datasets with Subtask Annotations
Try loading a dataset with subtask annotations:
```python
from lerobot.datasets.lerobot_dataset import LeRobotDataset
# Example dataset with subtask annotations
dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
# Explore the subtasks
print("Available subtasks:")
for subtask_name in dataset.meta.subtasks.index:
print(f" - {subtask_name}")
# Get subtask distribution
subtask_counts = {}
for i in range(len(dataset)):
sample = dataset[i]
subtask = sample["subtask"]
subtask_counts[subtask] = subtask_counts.get(subtask, 0) + 1
print("\nSubtask distribution:")
for subtask, count in sorted(subtask_counts.items(), key=lambda x: -x[1]):
print(f" {subtask}: {count} frames")
```
## Use Cases
### 1. Hierarchical Policy Training
Train policies that predict both actions and current subtask:
```python
class HierarchicalPolicy(nn.Module):
def __init__(self, num_subtasks):
super().__init__()
self.action_head = nn.Linear(hidden_dim, action_dim)
self.subtask_head = nn.Linear(hidden_dim, num_subtasks)
def forward(self, observations):
features = self.encoder(observations)
actions = self.action_head(features)
subtask_logits = self.subtask_head(features)
return actions, subtask_logits
```
### 2. Stage-Aware Reward Modeling (SARM)
Build reward models that understand task progression:
```python
# SARM predicts:
# - Stage: Which subtask is being executed (discrete)
# - Progress: How far along the subtask (continuous 0-1)
class SARMRewardModel(nn.Module):
def forward(self, observations):
features = self.encoder(observations)
stage_logits = self.stage_classifier(features)
progress = self.progress_regressor(features)
return stage_logits, progress
```
### 3. Progress Visualization
Monitor robot execution by tracking subtask progression:
```python
def visualize_execution(model, observations):
for t, obs in enumerate(observations):
action, subtask_logits = model(obs)
predicted_subtask = subtask_names[subtask_logits.argmax()]
print(f"t={t}: Executing '{predicted_subtask}'")
```
## API Reference
### LeRobotDataset Properties
| Property | Type | Description |
| --------------------------- | ---------------------- | ------------------------------------------ |
| `meta.subtasks` | `pd.DataFrame \| None` | DataFrame mapping subtask names to indices |
| `features["subtask_index"]` | `dict` | Feature spec for subtask_index if present |
### Sample Keys
When subtasks are available, each sample includes:
| Key | Type | Description |
| --------------- | -------------- | ------------------------------------ |
| `subtask_index` | `torch.Tensor` | Integer index of the current subtask |
| `subtask` | `str` | Natural language subtask description |
## Related Resources
- [SARM Paper](https://arxiv.org/pdf/2509.25358) - Stage-Aware Reward Modeling for Long Horizon Robot Manipulation
- [LeRobot Annotate Space](https://huggingface.co/spaces/lerobot/annotate) - Interactive annotation tool
- [LeRobotDataset v3.0](./lerobot-dataset-v3) - Dataset format documentation

View File

@@ -13,7 +13,7 @@ The EarthRover Mini Plus is a fully open source mobile robot that connects throu
### Hardware ### Hardware
- EarthRover Mini robot - EarthRover Mini robot
- Computer with Python 3.12 or newer - Computer with Python 3.10 or newer
- Internet connection - Internet connection
### Setting Up the Frodobots SDK ### Setting Up the Frodobots SDK
@@ -170,13 +170,13 @@ Once you can drive the robot well, you can start recording data to train AI mode
We use Hugging Face to store your data online. First, log in with your token from [Hugging Face settings](https://huggingface.co/settings/tokens): We use Hugging Face to store your data online. First, log in with your token from [Hugging Face settings](https://huggingface.co/settings/tokens):
```bash ```bash
hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
``` ```
Store your Hugging Face username: Store your Hugging Face username:
```bash ```bash
HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}') HF_USER=$(huggingface-cli whoami | head -n 1)
echo $HF_USER echo $HF_USER
``` ```
@@ -185,16 +185,13 @@ echo $HF_USER
Use the standard recording command: Use the standard recording command:
```bash ```bash
lerobot-record \ python src/lerobot/scripts/lerobot_record.py \
--robot.type=earthrover_mini_plus \ --robot.type=earthrover_mini_plus \
--teleop.type=keyboard_rover \ --teleop.type=keyboard_rover \
--dataset.repo_id=your_username/dataset_name \ --dataset.repo_id=your_username/dataset_name \
--dataset.num_episodes=2 \ --dataset.num_episodes=2 \
--dataset.fps=10 \ --dataset.fps=10 \
--dataset.single_task="Navigate around obstacles" \ --dataset.single_task="Navigate around obstacles" \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--display_data=true --display_data=true
``` ```
@@ -204,26 +201,22 @@ Replace `your_username/dataset_name` with your Hugging Face username and a name
Your dataset includes: Your dataset includes:
**Your Actions (2 features)**: **Your Actions (2 things)**:
- `linear_velocity`: How much you moved forward/backward - How much you moved forward/backward
- `angular_velocity`: How much you turned left/right - How much you turned left/right
**Robot Observations (24 features)**: **Robot Observations (12 things)**:
- Front camera video - Front camera video
- Rear camera video - Rear camera video
- Current speed - Current speed
- Battery level - Battery level
- Orientation - Which way the robot is facing
- GPS (latitude, longitude, signal strength) - GPS location (latitude, longitude, signal strength)
- Network signal strength - Network signal strength
- Vibration level - Vibration level
- Lamp state (on/off) - Lamp status (on/off)
- Accelerometer (x, y, z)
- Gyroscope (x, y, z)
- Magnetometer (x, y, z)
- Wheel RPMs (4 wheels)
### Where Your Data Goes ### Where Your Data Goes

View File

@@ -155,10 +155,10 @@ Upload your repository to Hugging Face:
pip install huggingface_hub pip install huggingface_hub
# Login to Hugging Face # Login to Hugging Face
hf auth login huggingface-cli login
# Create a new repository # Create a new repository
hf repo create my-org/my-custom-env huggingface-cli repo create my-custom-env --type space --org my-org
# Initialize git and push # Initialize git and push
git init git init

View File

@@ -120,12 +120,9 @@ lerobot-record \
--display_data=true \ --display_data=true \
--dataset.repo_id=<user>/eval_groot-bimanual \ --dataset.repo_id=<user>/eval_groot-bimanual \
--dataset.num_episodes=10 \ --dataset.num_episodes=10 \
--dataset.single_task="Grab and handover the red cube to the other arm" \ --dataset.single_task="Grab and handover the red cube to the other arm"
--dataset.streaming_encoding=true \ --policy.path=<user>/groot-bimanual # your trained model
--dataset.encoder_threads=2 \ --dataset.episode_time_s=30
# --dataset.vcodec=auto \
--policy.path=<user>/groot-bimanual \ # your trained model
--dataset.episode_time_s=30 \
--dataset.reset_time_s=10 --dataset.reset_time_s=10
``` ```

View File

@@ -224,15 +224,12 @@ lerobot-record \
--teleop.port=/dev/tty.usbmodem1201 \ --teleop.port=/dev/tty.usbmodem1201 \
--teleop.id=right \ --teleop.id=right \
--teleop.side=right \ --teleop.side=right \
--dataset.repo_id=<USER>/hand_record_test_with_video_data \ --dataset.repo_id=nepyope/hand_record_test_with_video_data \
--dataset.single_task="Hand recording test with video data" \ --dataset.single_task="Hand recording test with video data" \
--dataset.num_episodes=1 \ --dataset.num_episodes=1 \
--dataset.episode_time_s=5 \ --dataset.episode_time_s=5 \
--dataset.push_to_hub=true \ --dataset.push_to_hub=true \
--dataset.private=true \ --dataset.private=true \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--display_data=true --display_data=true
``` ```
@@ -244,7 +241,7 @@ lerobot-replay \
--robot.port=/dev/tty.usbmodem58760432281 \ --robot.port=/dev/tty.usbmodem58760432281 \
--robot.id=right \ --robot.id=right \
--robot.side=right \ --robot.side=right \
--dataset.repo_id=<USER>/hand_record_test_with_camera \ --dataset.repo_id=nepyope/hand_record_test_with_camera \
--dataset.episode=0 --dataset.episode=0
``` ```
@@ -252,13 +249,13 @@ lerobot-replay \
```bash ```bash
lerobot-train \ lerobot-train \
--dataset.repo_id=<USER>/hand_record_test_with_video_data \ --dataset.repo_id=nepyope/hand_record_test_with_video_data \
--policy.type=act \ --policy.type=act \
--output_dir=outputs/train/hopejr_hand \ --output_dir=outputs/train/hopejr_hand \
--job_name=hopejr \ --job_name=hopejr \
--policy.device=mps \ --policy.device=mps \
--wandb.enable=true \ --wandb.enable=true \
--policy.repo_id=<USER>/hand_test_policy --policy.repo_id=nepyope/hand_test_policy
``` ```
### Evaluate ### Evaluate
@@ -273,11 +270,8 @@ lerobot-record \
--robot.side=right \ --robot.side=right \
--robot.cameras='{"main": {"type": "opencv", "index_or_path": 0, "width": 640, "height": 480, "fps": 30}}' \ --robot.cameras='{"main": {"type": "opencv", "index_or_path": 0, "width": 640, "height": 480, "fps": 30}}' \
--display_data=false \ --display_data=false \
--dataset.repo_id=<USER>/eval_hopejr \ --dataset.repo_id=nepyope/eval_hopejr \
--dataset.single_task="Evaluate hopejr hand policy" \ --dataset.single_task="Evaluate hopejr hand policy" \
--dataset.num_episodes=10 \ --dataset.num_episodes=10 \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--policy.path=outputs/train/hopejr_hand/checkpoints/last/pretrained_model --policy.path=outputs/train/hopejr_hand/checkpoints/last/pretrained_model
``` ```

View File

@@ -159,13 +159,13 @@ We use the Hugging Face hub features for uploading your dataset. If you haven't
Add your token to the CLI by running this command: Add your token to the CLI by running this command:
```bash ```bash
hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
``` ```
Then store your Hugging Face repository name in a variable: Then store your Hugging Face repository name in a variable:
```bash ```bash
HF_USER=$(NO_COLOR=1 hf auth whoami | awk -F': *' 'NR==1 {print $2}') HF_USER=$(hf auth whoami | head -n 1)
echo $HF_USER echo $HF_USER
``` ```
@@ -185,10 +185,7 @@ lerobot-record \
--display_data=true \ --display_data=true \
--dataset.repo_id=${HF_USER}/record-test \ --dataset.repo_id=${HF_USER}/record-test \
--dataset.num_episodes=5 \ --dataset.num_episodes=5 \
--dataset.single_task="Grab the black cube" \ --dataset.single_task="Grab the black cube"
--dataset.streaming_encoding=true \
# --dataset.vcodec=auto \
--dataset.encoder_threads=2
``` ```
</hfoption> </hfoption>
<hfoption id="API example"> <hfoption id="API example">
@@ -327,7 +324,7 @@ You can look for other LeRobot datasets on the hub by searching for `LeRobot` [t
You can also push your local dataset to the Hub manually, running: You can also push your local dataset to the Hub manually, running:
```bash ```bash
hf upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset huggingface-cli upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset
``` ```
#### Record function #### Record function
@@ -491,7 +488,7 @@ If your local computer doesn't have a powerful GPU you could utilize Google Cola
Once training is done, upload the latest checkpoint with: Once training is done, upload the latest checkpoint with:
```bash ```bash
hf upload ${HF_USER}/act_so101_test \ huggingface-cli upload ${HF_USER}/act_so101_test \
outputs/train/act_so101_test/checkpoints/last/pretrained_model outputs/train/act_so101_test/checkpoints/last/pretrained_model
``` ```
@@ -499,7 +496,7 @@ You can also upload intermediate checkpoints with:
```bash ```bash
CKPT=010000 CKPT=010000
hf upload ${HF_USER}/act_so101_test${CKPT} \ huggingface-cli upload ${HF_USER}/act_so101_test${CKPT} \
outputs/train/act_so101_test/checkpoints/${CKPT}/pretrained_model outputs/train/act_so101_test/checkpoints/${CKPT}/pretrained_model
``` ```
@@ -518,9 +515,6 @@ lerobot-record \
--display_data=false \ --display_data=false \
--dataset.repo_id=${HF_USER}/eval_so100 \ --dataset.repo_id=${HF_USER}/eval_so100 \
--dataset.single_task="Put lego brick into the transparent box" \ --dataset.single_task="Put lego brick into the transparent box" \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
# <- Teleop optional if you want to teleoperate in between episodes \ # <- Teleop optional if you want to teleoperate in between episodes \
# --teleop.type=so100_leader \ # --teleop.type=so100_leader \
# --teleop.port=/dev/ttyACM0 \ # --teleop.port=/dev/ttyACM0 \

View File

@@ -1,57 +1,30 @@
# Installation # Installation
This guide uses `conda` (via miniforge) to manage environments (recommended). If you prefer another environment manager (e.g. `uv`, `venv`), ensure you have Python >=3.12 and `ffmpeg` installed with the `libsvtav1` encoder, then skip ahead to [Environment Setup](#step-2-environment-setup). ## Install [`miniforge`](https://conda-forge.org/download/)
## Step 1 (`conda` only): Install [`miniforge`](https://conda-forge.org/download/)
```bash ```bash
wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
bash Miniforge3-$(uname)-$(uname -m).sh bash Miniforge3-$(uname)-$(uname -m).sh
``` ```
## Step 2: Environment Setup ## Environment Setup
Create a virtual environment with Python 3.12: Create a virtual environment with Python 3.10, using conda:
<!-- prettier-ignore-start -->
<hfoptions id="create_venv">
<hfoption id="conda">
```bash ```bash
conda create -y -n lerobot python=3.12 conda create -y -n lerobot python=3.10
``` ```
</hfoption>
<hfoption id="uv"> Then activate your conda environment, you have to do this each time you open a shell to use lerobot:
```bash ```bash
uv python install 3.12
uv venv --python 3.12
```
</hfoption>
</hfoptions>
<!-- prettier-ignore-end -->
Then activate your virtual environment, you have to do this each time you open a shell to use lerobot:
<!-- prettier-ignore-start -->
<hfoptions id="activate_venv">
<hfoption id="conda">```bash
conda activate lerobot conda activate lerobot
```</hfoption>
<hfoption id="uv">
```bash
# Linux/macOSsource
source .venv/bin/activate
# Windows PowerShell
source .venv\Scripts\Activate.ps1
``` ```
</hfoption>
</hfoptions>
<!-- prettier-ignore-end -->
When using `conda`, install `ffmpeg` in your environment: When using `conda`, install `ffmpeg` in your environment:
```bash ```bash
conda install ffmpeg -c conda-forge conda install ffmpeg -c conda-forge
ffmpeg -version # ffmpeg 8.X is not yet supported !
``` ```
> [!TIP] > [!TIP]
@@ -65,17 +38,7 @@ ffmpeg -version # ffmpeg 8.X is not yet supported !
> >
> - _[On Linux only]_ If you want to bring your own ffmpeg: Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`. > - _[On Linux only]_ If you want to bring your own ffmpeg: Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`.
> [!NOTE] ## Install LeRobot 🤗
> When installing LeRobot inside WSL (Windows Subsystem for Linux), make sure to install `evdev` with the following command:
>
> ```bash
> conda install evdev -c conda-forge
> ```
> [!IMPORTANT]
> If you are using `uv` you will have to install `ffmpeg` system-wide (outside of the virtual environment). You rely on `uv` and `torchcodec` ability to dynamically link to the system `ffmpeg`.
## Step 3: Install LeRobot 🤗
### From Source ### From Source
@@ -88,45 +51,23 @@ cd lerobot
Then, install the library in editable mode. This is useful if you plan to contribute to the code. Then, install the library in editable mode. This is useful if you plan to contribute to the code.
<!-- prettier-ignore-start -->
<hfoptions id="install_lerobot_src">
<hfoption id="conda">
```bash ```bash
pip install -e . pip install -e .
``` ```
</hfoption>
<hfoption id="uv">
```bash
uv pip install -e .
```
</hfoption>
</hfoptions>
<!-- prettier-ignore-end -->
### Installation from PyPI ### Installation from PyPI
**Core Library:** **Core Library:**
Install the base package with: Install the base package with:
<!-- prettier-ignore-start -->
<hfoptions id="install_lerobot_pypi">
<hfoption id="conda">
```bash ```bash
pip install lerobot pip install lerobot
``` ```
</hfoption>
<hfoption id="uv">
```bash
uv pip install lerobot
```
</hfoption>
</hfoptions>
<!-- prettier-ignore-end -->
_This installs only the default dependencies._ _This installs only the default dependencies._
**Extra Features:** **Extra Features:**
To install additional functionality, use one of the following (If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.): To install additional functionality, use one of the following:
```bash ```bash
pip install 'lerobot[all]' # All available features pip install 'lerobot[all]' # All available features
@@ -140,10 +81,13 @@ _Replace `[...]` with your desired features._
For a full list of optional dependencies, see: For a full list of optional dependencies, see:
https://pypi.org/project/lerobot/ https://pypi.org/project/lerobot/
> [!NOTE]
> For lerobot 0.4.0, if you want to install pi, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`
### Troubleshooting ### Troubleshooting
If you encounter build errors, you may need to install additional dependencies: `cmake`, `build-essential`, and `ffmpeg libs`. If you encounter build errors, you may need to install additional dependencies: `cmake`, `build-essential`, and `ffmpeg libs`.
To install these for Linux run: To install these for linux run:
```bash ```bash
sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev
@@ -153,7 +97,7 @@ For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/
## Optional dependencies ## Optional dependencies
LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`. If you are using `uv`, replace `pip install` with `uv pip install` in the commands below. LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`.
### Simulations ### Simulations

View File

@@ -279,13 +279,13 @@ We use the Hugging Face hub features for uploading your dataset. If you haven't
Add your token to the CLI by running this command: Add your token to the CLI by running this command:
```bash ```bash
hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
``` ```
Then store your Hugging Face repository name in a variable: Then store your Hugging Face repository name in a variable:
```bash ```bash
HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}') HF_USER=$(huggingface-cli whoami | head -n 1)
echo $HF_USER echo $HF_USER
``` ```

View File

@@ -41,10 +41,7 @@ lerobot-record \
--display_data=true \ --display_data=true \
--dataset.repo_id=${HF_USER}/record-test \ --dataset.repo_id=${HF_USER}/record-test \
--dataset.num_episodes=5 \ --dataset.num_episodes=5 \
--dataset.single_task="Grab the black cube" \ --dataset.single_task="Grab the black cube"
--dataset.streaming_encoding=true \
# --dataset.vcodec=auto \
--dataset.encoder_threads=2
``` ```
See the [recording guide](./il_robots#record-a-dataset) for more details. See the [recording guide](./il_robots#record-a-dataset) for more details.

View File

@@ -1,276 +0,0 @@
# OpenArm
[OpenArm](https://openarm.dev) is an open-source 7DOF humanoid arm designed for physical AI research and deployment.
To get your OpenArm, assembled or DIY, and join the global community, browse verified and certified manufacturers worldwide at [openarm.dev](https://openarm.dev).
## What's Unique?
- **Human-Scale Design**: OpenArm is designed with human-like proportions, scaled for a person around 160-165cm tall. This provides an optimal balance between practical reach and manageable inertia for safe, responsive operation.
- **Safety-First Architecture**: Built with QDD backdrivable motors and high compliance, OpenArm prioritizes safe human-robot interaction while maintaining practical payload capabilities (6.0kg peak / 4.1kg nominal) for real-world tasks.
- **Built for Durability**: Critical structural components use aluminum and stainless steel construction, ensuring robust performance for repetitive data collection and continuous research use.
- **Fully Accessible & Buildable**: Every component, from CNC parts and 3D-printed casings to electrical wiring is designed to be purchasable and buildable by individual researchers and labs, with complete fabrication data provided.
- **Practical & Affordable**: At $6,500 USD for a complete bimanual system, OpenArm delivers research-grade capabilities at a fraction of traditional humanoid robot costs.
## Platform Requirements
<Tip warning={true}>
**Linux Only**: OpenArm currently only works on Linux. The CAN bus USB adapter
does not have macOS drivers and has not been tested on Windows.
</Tip>
## Safety Guide
Before operating OpenArm, please read the [official safety guide](https://docs.openarm.dev/getting-started/safety-guide). Key points:
- **Secure installation**: Fasten the arm to a flat, stable surface with screws or clamps
- **Safe distance**: Keep body parts and objects outside the range of motion during operation
- **Protective equipment**: Always wear safety goggles; use additional PPE as needed
- **Payload limits**: Do not exceed specified payload limits (6.0kg peak / 4.1kg nominal per arm)
- **Emergency stop**: Know the location and operation of the emergency stop device
- **Regular inspection**: Check for loose screws, damaged mechanical limits, unusual noises, and wiring damage
## Hardware Setup
Follow the official [OpenArm hardware documentation](https://docs.openarm.dev) for:
- Bill of materials and sourcing
- 3D printing instructions
- Mechanical assembly
- Electrical wiring
The hardware repositories are available at [github.com/enactic/openarm](https://github.com/enactic/openarm).
## CAN Bus Setup
OpenArm uses CAN bus communication with Damiao motors. Once you have the CAN bus USB adapter plugged into your Linux PC, follow the [Damiao Motors and CAN Bus guide](./damiao) to configure the interface.
Quick setup:
```bash
# Setup CAN interfaces
lerobot-setup-can --mode=setup --interfaces=can0,can1
# Test motor communication
lerobot-setup-can --mode=test --interfaces=can0,can1
```
## Install LeRobot 🤗
Follow our [Installation Guide](./installation), then install the Damiao motor support:
```bash
pip install -e ".[damiao]"
```
## Usage
### Follower Arm (Robot)
<hfoptions id="follower">
<hfoption id="Command">
```bash
lerobot-calibrate \
--robot.type=openarm_follower \
--robot.port=can0 \
--robot.side=right \
--robot.id=my_openarm_follower
```
</hfoption>
<hfoption id="API example">
```python
from lerobot.robots.openarm_follower import OpenArmFollower, OpenArmFollowerConfig
config = OpenArmFollowerConfig(
port="can0",
side="right", # or "left" for left arm
id="my_openarm_follower",
)
follower = OpenArmFollower(config)
follower.connect()
# Read current state
obs = follower.get_observation()
print(obs)
# Send action (position in degrees)
action = {
"joint_1.pos": 0.0,
"joint_2.pos": 0.0,
"joint_3.pos": 0.0,
"joint_4.pos": 45.0,
"joint_5.pos": 0.0,
"joint_6.pos": 0.0,
"joint_7.pos": 0.0,
"gripper.pos": 0.0,
}
follower.send_action(action)
follower.disconnect()
```
</hfoption>
</hfoptions>
### Leader Arm (Teleoperator)
The leader arm is used for teleoperation - manually moving it to control the follower arm.
<hfoptions id="leader">
<hfoption id="Command">
```bash
lerobot-calibrate \
--teleop.type=openarm_leader \
--teleop.port=can1 \
--teleop.id=my_openarm_leader
```
</hfoption>
<hfoption id="API example">
```python
from lerobot.teleoperators.openarm_leader import OpenArmLeader, OpenArmLeaderConfig
config = OpenArmLeaderConfig(
port="can1",
id="my_openarm_leader",
manual_control=True, # Disable torque for manual movement
)
leader = OpenArmLeader(config)
leader.connect()
# Read current position (as action to send to follower)
action = leader.get_action()
print(action)
leader.disconnect()
```
</hfoption>
</hfoptions>
### Teleoperation
To teleoperate OpenArm with leader-follower control:
```bash
lerobot-teleoperate \
--robot.type=openarm_follower \
--robot.port=can0 \
--robot.side=right \
--robot.id=my_follower \
--teleop.type=openarm_leader \
--teleop.port=can1 \
--teleop.id=my_leader
```
### Bimanual Teleoperation
To teleoperate a bimanual OpenArm setup with two leader and two follower arms:
```bash
lerobot-teleoperate \
--robot.type=bi_openarm_follower \
--robot.left_arm_config.port=can0 \
--robot.left_arm_config.side=left \
--robot.right_arm_config.port=can1 \
--robot.right_arm_config.side=right \
--robot.id=my_bimanual_follower \
--teleop.type=bi_openarm_leader \
--teleop.left_arm_config.port=can2 \
--teleop.right_arm_config.port=can3 \
--teleop.id=my_bimanual_leader
```
### Recording Data
To record a dataset during teleoperation:
```bash
lerobot-record \
--robot.type=openarm_follower \
--robot.port=can0 \
--robot.side=right \
--robot.id=my_follower \
--teleop.type=openarm_leader \
--teleop.port=can1 \
--teleop.id=my_leader \
--repo-id=my_hf_username/my_openarm_dataset \
--fps=30 \
--num-episodes=10
```
## Configuration Options
### Follower Configuration
| Parameter | Default | Description |
| --------------------- | --------- | ---------------------------------------------------------- |
| `port` | - | CAN interface (e.g., `can0`) |
| `side` | `None` | Arm side: `"left"`, `"right"`, or `None` for custom limits |
| `use_can_fd` | `True` | Enable CAN FD for higher data rates |
| `can_bitrate` | `1000000` | Nominal bitrate (1 Mbps) |
| `can_data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps) |
| `max_relative_target` | `None` | Safety limit for relative target positions |
| `position_kp` | Per-joint | Position control proportional gains |
| `position_kd` | Per-joint | Position control derivative gains |
### Leader Configuration
| Parameter | Default | Description |
| ------------------ | --------- | ----------------------------------- |
| `port` | - | CAN interface (e.g., `can1`) |
| `manual_control` | `True` | Disable torque for manual movement |
| `use_can_fd` | `True` | Enable CAN FD for higher data rates |
| `can_bitrate` | `1000000` | Nominal bitrate (1 Mbps) |
| `can_data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps) |
## Motor Configuration
OpenArm uses Damiao motors with the following default configuration:
| Joint | Motor Type | Send ID | Recv ID |
| --------------------------- | ---------- | ------- | ------- |
| joint_1 (Shoulder pan) | DM8009 | 0x01 | 0x11 |
| joint_2 (Shoulder lift) | DM8009 | 0x02 | 0x12 |
| joint_3 (Shoulder rotation) | DM4340 | 0x03 | 0x13 |
| joint_4 (Elbow flex) | DM4340 | 0x04 | 0x14 |
| joint_5 (Wrist roll) | DM4310 | 0x05 | 0x15 |
| joint_6 (Wrist pitch) | DM4310 | 0x06 | 0x16 |
| joint_7 (Wrist rotation) | DM4310 | 0x07 | 0x17 |
| gripper | DM4310 | 0x08 | 0x18 |
## Troubleshooting
### No Response from Motors
1. Check power supply connections
2. Verify CAN wiring (CAN-H, CAN-L, GND)
3. Run diagnostics: `lerobot-setup-can --mode=test --interfaces=can0`
4. See the [Damiao troubleshooting guide](./damiao#troubleshooting) for more details
### CAN Interface Not Found
Ensure the CAN interface is configured:
```bash
ip link show can0
```
## Resources
- [OpenArm Website](https://openarm.dev)
- [OpenArm Documentation](https://docs.openarm.dev)
- [OpenArm GitHub](https://github.com/enactic/openarm)
- [Safety Guide](https://docs.openarm.dev/getting-started/safety-guide)
- [Damiao Motors and CAN Bus](./damiao)

View File

@@ -66,13 +66,12 @@ Run on of the examples scripts to teleoperate, record a dataset, replay a datase
All scripts assume you configured your robot (e.g., SO-100 follower) and set the correct serial port. All scripts assume you configured your robot (e.g., SO-100 follower) and set the correct serial port.
Additionally you need to **copy the URDF of the robot into the examples folder**. For the examples in this tutorial (using SO100/SO101), copy the `SO101` folder from the [SO-ARM100 repo](https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101) into the `examples/phone_to_so100/` directory, so that the URDF file path becomes `examples/phone_to_so100/SO101/so101_new_calib.urdf`. Additionally you need to **copy the urdf of the robot to the examples folder**. For the examples in this tutorial (Using SO100/SO101) it is highly recommended to use the urdf in the [SO-ARM100 repo](https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf)
- Run this example to teleoperate: - Run this example to teleoperate:
```bash ```bash
cd examples/phone_to_so100 python examples/phone_to_so100/teleoperate.py
python teleoperate.py
``` ```
After running the example: After running the example:
@@ -85,22 +84,19 @@ Additionally you can customize mapping or safety limits by editing the processor
- Run this example to record a dataset, which saves absolute end effector observations and actions: - Run this example to record a dataset, which saves absolute end effector observations and actions:
```bash ```bash
cd examples/phone_to_so100 python examples/phone_to_so100/record.py
python record.py
``` ```
- Run this example to replay recorded episodes: - Run this example to replay recorded episodes:
```bash ```bash
cd examples/phone_to_so100 python examples/phone_to_so100/replay.py
python replay.py
``` ```
- Run this example to evaluate a pretrained policy: - Run this example to evaluate a pretrained policy:
```bash ```bash
cd examples/phone_to_so100 python examples/phone_to_so100/evaluate.py
python evaluate.py
``` ```
### Important pipeline steps and options ### Important pipeline steps and options

View File

@@ -34,6 +34,11 @@ As described by Physical Intelligence, while AI has achieved remarkable success
pip install -e ".[pi]" pip install -e ".[pi]"
``` ```
> [!NOTE]
> For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
>
> This will be solved in the next patch release
## Training Data and Capabilities ## Training Data and Capabilities
π₀ is trained on the largest robot interaction dataset to date, combining three key data sources: π₀ is trained on the largest robot interaction dataset to date, combining three key data sources:
@@ -55,7 +60,7 @@ policy.type=pi0
For training π₀, you can use the standard LeRobot training script with the appropriate configuration: For training π₀, you can use the standard LeRobot training script with the appropriate configuration:
```bash ```bash
lerobot-train \ python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your_dataset \ --dataset.repo_id=your_dataset \
--policy.type=pi0 \ --policy.type=pi0 \
--output_dir=./outputs/pi0_training \ --output_dir=./outputs/pi0_training \

View File

@@ -36,6 +36,11 @@ This diverse training mixture creates a "curriculum" that enables generalization
pip install -e ".[pi]" pip install -e ".[pi]"
``` ```
> [!NOTE]
> For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
>
> This will be solved in the next patch release
## Usage ## Usage
To use π₀.₅ in your LeRobot configuration, specify the policy type as: To use π₀.₅ in your LeRobot configuration, specify the policy type as:
@@ -51,7 +56,7 @@ policy.type=pi05
Here's a complete training command for finetuning the base π₀.₅ model on your own dataset: Here's a complete training command for finetuning the base π₀.₅ model on your own dataset:
```bash ```bash
lerobot-train \ python src/lerobot/scripts/lerobot_train.py\
--dataset.repo_id=your_dataset \ --dataset.repo_id=your_dataset \
--policy.type=pi05 \ --policy.type=pi05 \
--output_dir=./outputs/pi05_training \ --output_dir=./outputs/pi05_training \

View File

@@ -43,11 +43,16 @@ This approach can transform **any existing VLM** into a VLA by training it to pr
pip install -e ".[pi]" pip install -e ".[pi]"
``` ```
> [!NOTE]
> For lerobot 0.4.0, if you want to install the pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
>
> This will be solved in the next patch release
## Training a Custom FAST Tokenizer ## Training a Custom FAST Tokenizer
You have two options for the FAST tokenizer: You have two options for the FAST tokenizer:
1. **Use the pre-trained tokenizer**: The `lerobot/fast-action-tokenizer` tokenizer was trained on 1M+ real robot action sequences and works as a general-purpose tokenizer. 1. **Use the pre-trained tokenizer**: The `physical-intelligence/fast` tokenizer was trained on 1M+ real robot action sequences and works as a general-purpose tokenizer.
2. **Train your own tokenizer**: For maximum performance on your specific dataset, you can finetune the tokenizer on your own data. 2. **Train your own tokenizer**: For maximum performance on your specific dataset, you can finetune the tokenizer on your own data.
@@ -109,15 +114,15 @@ lerobot-train \
### Key Training Parameters ### Key Training Parameters
| Parameter | Description | Default | | Parameter | Description | Default |
| -------------------------------------- | -------------------------------------------------- | ------------------------------- | | -------------------------------------- | -------------------------------------------------- | ---------------------------- |
| `--policy.gradient_checkpointing=true` | Reduces memory usage significantly during training | `false` | | `--policy.gradient_checkpointing=true` | Reduces memory usage significantly during training | `false` |
| `--policy.dtype=bfloat16` | Use mixed precision training for efficiency | `float32` | | `--policy.dtype=bfloat16` | Use mixed precision training for efficiency | `float32` |
| `--policy.chunk_size` | Number of action steps to predict (action horizon) | `50` | | `--policy.chunk_size` | Number of action steps to predict (action horizon) | `50` |
| `--policy.n_action_steps` | Number of action steps to execute | `50` | | `--policy.n_action_steps` | Number of action steps to execute | `50` |
| `--policy.max_action_tokens` | Maximum number of FAST tokens per action chunk | `256` | | `--policy.max_action_tokens` | Maximum number of FAST tokens per action chunk | `256` |
| `--policy.action_tokenizer_name` | FAST tokenizer to use | `lerobot/fast-action-tokenizer` | | `--policy.action_tokenizer_name` | FAST tokenizer to use | `physical-intelligence/fast` |
| `--policy.compile_model=true` | Enable torch.compile for faster training | `false` | | `--policy.compile_model=true` | Enable torch.compile for faster training | `false` |
## Inference ## Inference

View File

@@ -159,9 +159,6 @@ lerobot-record \
--dataset.fps=15 \ --dataset.fps=15 \
--dataset.push_to_hub=true \ --dataset.push_to_hub=true \
--dataset.private=true \ --dataset.private=true \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--display_data=true --display_data=true
``` ```
@@ -201,9 +198,6 @@ lerobot-record \
--dataset.fps=15 \ --dataset.fps=15 \
--dataset.push_to_hub=true \ --dataset.push_to_hub=true \
--dataset.private=true \ --dataset.private=true \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--display_data=true --display_data=true
``` ```

View File

@@ -269,7 +269,7 @@ This generates visualizations showing video frames with subtask boundaries overl
Train with **no annotations** - uses linear progress from 0 to 1: Train with **no annotations** - uses linear progress from 0 to 1:
```bash ```bash
lerobot-train \ python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your-username/your-dataset \ --dataset.repo_id=your-username/your-dataset \
--policy.type=sarm \ --policy.type=sarm \
--policy.annotation_mode=single_stage \ --policy.annotation_mode=single_stage \
@@ -288,7 +288,7 @@ lerobot-train \
Train with **dense annotations only** (sparse auto-generated): Train with **dense annotations only** (sparse auto-generated):
```bash ```bash
lerobot-train \ python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your-username/your-dataset \ --dataset.repo_id=your-username/your-dataset \
--policy.type=sarm \ --policy.type=sarm \
--policy.annotation_mode=dense_only \ --policy.annotation_mode=dense_only \
@@ -307,7 +307,7 @@ lerobot-train \
Train with **both sparse and dense annotations**: Train with **both sparse and dense annotations**:
```bash ```bash
lerobot-train \ python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your-username/your-dataset \ --dataset.repo_id=your-username/your-dataset \
--policy.type=sarm \ --policy.type=sarm \
--policy.annotation_mode=dual \ --policy.annotation_mode=dual \
@@ -468,7 +468,7 @@ This script:
Once you have the progress file, train your policy with RA-BC weighting. The progress file is auto-detected from the dataset path (`sarm_progress.parquet`). Currently PI0, PI0.5 and SmolVLA are supported with RA-BC: Once you have the progress file, train your policy with RA-BC weighting. The progress file is auto-detected from the dataset path (`sarm_progress.parquet`). Currently PI0, PI0.5 and SmolVLA are supported with RA-BC:
```bash ```bash
lerobot-train \ python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your-username/your-dataset \ --dataset.repo_id=your-username/your-dataset \
--policy.type=pi0 \ --policy.type=pi0 \
--use_rabc=true \ --use_rabc=true \

View File

@@ -106,9 +106,6 @@ lerobot-record \
--dataset.repo_id=${HF_USER}/eval_DATASET_NAME_test \ # <- This will be the dataset name on HF Hub --dataset.repo_id=${HF_USER}/eval_DATASET_NAME_test \ # <- This will be the dataset name on HF Hub
--dataset.episode_time_s=50 \ --dataset.episode_time_s=50 \
--dataset.num_episodes=10 \ --dataset.num_episodes=10 \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
# <- Teleop optional if you want to teleoperate in between episodes \ # <- Teleop optional if you want to teleoperate in between episodes \
# --teleop.type=so100_leader \ # --teleop.type=so100_leader \
# --teleop.port=/dev/ttyACM0 \ # --teleop.port=/dev/ttyACM0 \

View File

@@ -1,155 +0,0 @@
# Streaming Video Encoding Guide
## 1. Overview
Streaming video encoding eliminates the traditional PNG round-trip during video dataset recording. Instead of:
1. Capture frame -> write PNG to disk -> (at episode end) read PNG's -> encode to MP4 -> delete PNG's
Frames can be encoded in real-time during capture:
1. Capture frame -> queue to encoder thread -> encode to MP4 directly
This makes `save_episode()` near-instant (the video is already encoded by the time the episode ends) and removes the blocking wait that previously occurred between episodes, especially with multiple cameras in long episodes.
## 2. Tuning Parameters
| Parameter | CLI Flag | Type | Default | Description |
| ----------------------- | --------------------------------- | ------------- | ------------- | ----------------------------------------------------------------- |
| `streaming_encoding` | `--dataset.streaming_encoding` | `bool` | `True` | Enable real-time encoding during capture |
| `vcodec` | `--dataset.vcodec` | `str` | `"libsvtav1"` | Video codec. `"auto"` detects best HW encoder |
| `encoder_threads` | `--dataset.encoder_threads` | `int \| None` | `None` (auto) | Threads per encoder instance. `None` will leave the vcoded decide |
| `encoder_queue_maxsize` | `--dataset.encoder_queue_maxsize` | `int` | `60` | Max buffered frames per camera (~2s at 30fps). Consumes RAM |
## 3. Performance Considerations
Streaming encoding means the CPU is encoding video **during** the capture loop, not after. This creates a CPU budget that must be shared between:
- **Control loop** (reading cameras, control the robot, writing non-video data)
- **Encoder threads** (one pool per camera)
- **Rerun visualization** (if enabled)
- **OS and other processes**
### Resolution & Number of Cameras Impact
| Setup | Throughput (px/sec) | CPU Encoding Load | Notes |
| ------------------------- | ------------------- | ----------------- | ------------------------------ |
| 2camsx 640x480x3 @30fps | 55M | Low | Works on most systems |
| 2camsx 1280x720x3 @30fps | 165M | Moderate | Comfortable on modern systems |
| 2camsx 1920x1080x3 @30fps | 373M | High | Requires powerful high-end CPU |
### `encoder_threads` Tuning
This parameter controls how many threads each encoder instance uses internally:
- **Higher values** (e.g., 4-5): Faster encoding, but uses more CPU cores per camera. Good for high-end systems with many cores.
- **Lower values** (e.g., 1-2): Less CPU per camera, freeing cores for capture and visualization. Good for low-res images and capable CPUs.
- **`None` (default)**: Lets the codec decide. Information available in the codec logs.
### Backpressure and Frame Dropping
Each camera has a bounded queue (`encoder_queue_maxsize`, default 60 frames). When the encoder can't keep up:
1. The queue fills up (consuming RAM)
2. New frames are **dropped** (not blocked) — the capture loop continues uninterrupted
3. A warning is logged: `"Encoder queue full for {camera}, dropped N frame(s)"`
4. At episode end, total dropped frames per camera are reported
### Symptoms of Encoder Falling Behind
- **System feels laggy and freezes**: all CPUs are at 100%
- **Dropped frame warnings** in the log or lower frames/FPS than expected in the recorded dataset
- **Choppy robot movement**: If CPU is severely overloaded, even the capture loop may be affected
- **Accumulated rerun lag**: Visualization falls behind real-time
## 4. Hardware-Accelerated Encoding
### When to Use
Use HW encoding when:
- CPU is the bottleneck (dropped frames, choppy robot, rerun lag)
- You have compatible hardware (GPU or dedicated encoder)
- You're recording at high throughput (high resolution or with many cameras)
### Choosing a Codec
| Codec | CPU Usage | File Size | Quality | Notes |
| --------------------- | --------- | -------------- | ------- | ---------------------------------------------------------------- |
| `libsvtav1` (default) | High | Smallest | Best | Default. Best compression but most CPU-intensive |
| `h264` | Medium | ~30-50% larger | Good | Software H.264. Lower CPU |
| HW encoders | Very Low | Largest | Good | Offloads to dedicated hardware. Best for CPU-constrained systems |
### Available HW Encoders
| Encoder | Platform | Hardware | CLI Value |
| ------------------- | ------------- | ------------------------------------------------------------------------------------------------ | ------------------------------------ |
| `h264_videotoolbox` | macOS | Apple Silicon / Intel | `--dataset.vcodec=h264_videotoolbox` |
| `hevc_videotoolbox` | macOS | Apple Silicon / Intel | `--dataset.vcodec=hevc_videotoolbox` |
| `h264_nvenc` | Linux/Windows | NVIDIA GPU | `--dataset.vcodec=h264_nvenc` |
| `hevc_nvenc` | Linux/Windows | NVIDIA GPU | `--dataset.vcodec=hevc_nvenc` |
| `h264_vaapi` | Linux | Intel/AMD GPU | `--dataset.vcodec=h264_vaapi` |
| `h264_qsv` | Linux/Windows | Intel Quick Sync | `--dataset.vcodec=h264_qsv` |
| `auto` | Any | Probes the system for available HW encoders. Falls back to `libsvtav1` if no HW encoder is found | `--dataset.vcodec=auto` |
> [!NOTE]
> In order to use the HW accelerated encoders you might need to upgrade your GPU drivers.
> [!NOTE]
> `libsvtav1` is the default because it provides the best training performance; other vcodecs can reduce CPU usage and be faster, but they typically produce larger files and may affect training time.
## 5. Troubleshooting
| Symptom | Likely Cause | Fix |
| ------------------------------------------------------------------ | -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| System freezes or choppy robot movement or Rerun visualization lag | CPU starved (100% load usage) | Close other apps, reduce encoding throughput, lower `encoder_threads`, use `h264`, use `display_data=False`. If the CPU continues to be at 100% then it might be insufficient for your setup, consider `--dataset.streaming_encoding=false` or HW encoding (`--dataset.vcodec=auto`) |
| "Encoder queue full" warnings or dropped frames in dataset | Encoder can't keep up (Queue overflow) | If CPU is not at 100%: Increase `encoder_threads`, increase `encoder_queue_maxsize` or use HW encoding (`--dataset.vcodec=auto`). |
| High RAM usage | Queue filling faster than encoding | `encoder_threads` too low or CPU insufficient. Reduce `encoder_queue_maxsize` or use HW encoding |
| Large video files | Using HW encoder or H.264 | Expected trade-off. Switch to `libsvtav1` if CPU allows |
| `save_episode()` still slow | `streaming_encoding` is `False` | Set `--dataset.streaming_encoding=true` |
| Encoder thread crash | Codec not available or invalid settings | Check `vcodec` is installed, try `--dataset.vcodec=auto` |
| Recorded dataset is missing frames | CPU/GPU starvation or occasional load spikes | If ~5% of frames are missing, your system is likely overloaded — follow the recommendations above. If fewer frames are missing (~2%), they are probably due to occasional transient load spikes (often at startup) and can be considered expected. |
## 6. Recommended Configurations
These estimates are conservative; we recommend testing them on your setup—start with a low load and increase it gradually.
### High-End Systems: modern 12+ cores (24+ threads)
A throughput between ~250-500M px/sec should be comfortable in CPU. For even better results try HW encoding if available.
```bash
# 3camsx 1280x720x3 @30fps: Defaults work well. Optionally increase encoder parallelism.
# 2camsx 1920x1080x3 @30fps: Defaults work well. Optionally increase encoder parallelism.
lerobot-record --dataset.encoder_threads=5 ...
# 3camsx 1920x1080x3 @30fps: Might require some tuning.
```
### Mid-Range Systems: modern 8+ cores (16+ threads) or Apple Silicon
A throughput between ~80-300M px/sec should be possible in CPU.
```bash
# 3camsx 640x480x3 @30fps: Defaults work well. Optionally decrease encoder parallelism.
# 2camsx 1280x720x3 @30fps: Defaults work well. Optionally decrease encoder parallelism.
lerobot-record --dataset.encoder_threads=2 ...
# 2camsx 1920x1080x3 @30fps: Might require some tuning.
```
### Low-Resource Systems: modern 4+ cores (8+ threads) or Raspberry Pi 5
On very constrained systems, streaming encoding may compete too heavily with the capture loop. Disabling it falls back to the PNG-based approach where encoding happens between episodes (blocking, but doesn't interfere with capture). Alternatively, record at a lower throughput to reduce both capture and encoding load. Consider also changing codec to `h264` and using batch encoding.
```bash
# 2camsx 640x480x3 @30fps: Requires some tuning.
# Use H.264, disable streaming, consider batching encoding
lerobot-record --dataset.vcodec=h264 --dataset.streaming_encoding=false ...
```
## 7. Closing note
Performance ultimately depends on your exact setup — frames-per-second, resolution, CPU cores and load, available memory, episode length, and the encoder you choose. Always test with your target workload, be mindful about your CPU & system capabilities and tune `encoder_threads`, `encoder_queue_maxsize`, and
`vcodec` reasonably. That said, a common practical configuration (for many applications) is three cameras at 640×480x3 @30fps; this usually runs fine with the default streaming video encoding settings in modern systems. Always verify your recorded dataset is healthy by comparing the video duration to the CLI episode duration and confirming the row count equals FPS × CLI duration.

View File

@@ -0,0 +1,86 @@
# Training-Time RTC
Training-Time RTC teaches the model to handle inference delay during training.
It feeds the **ground-truth action prefix** to the model and trains only on the remaining postfix actions.
This keeps chunk transitions smooth without doing any inference-time inpainting.
Based on: [Training-Time Action Conditioning for Efficient Real-Time Chunking](https://arxiv.org/abs/2512.05964).
LeRobot supports this for `pi0`, `pi05` and `smolvla` without changing model parameters.
---
## How It Works
### At Training Time
- Sample a delay `d` per batch element.
- Keep the first `d` action steps as **ground truth** (no noise).
- Add noise only to the postfix actions.
- Set the flow-matching timestep to **1.0** for prefix tokens and normal timesteps for postfix tokens.
- Mask the loss to only train on the postfix.
### At Inference Time
When `rtc_training_config.enabled=true`, the model uses training-time RTC inference:
- Replace prefix positions in `x_t` with previous chunk's leftover actions.
- Set timestep to **1.0** for prefix positions.
---
## Quick Start (CLI)
```bash
lerobot-train \
--policy.type=pi0 \
--dataset.repo_id=your/dataset \
--policy.rtc_training_config.enabled=true \
--policy.rtc_training_config.min_delay=0 \
--policy.rtc_training_config.max_delay=6 \
--policy.rtc_training_config.delay_distribution=UNIFORM
```
---
## Inference with Training-Time RTC
After training with `rtc_training_config`, use the same config at inference. The model will automatically use training-time RTC inference:
```python
policy = PI0Policy.from_pretrained("path/to/trained/model")
# rtc_training_config is loaded from the saved config
actions = policy.predict_action_chunk(
batch,
inference_delay=5, # estimated delay in timesteps
prev_chunk_left_over=previous_actions, # from previous chunk
)
```
---
## Key Parameters
`RTCTrainingConfig` is available on the policy config (`pi0`, `pi05`, `smolvla`, `xvla`):
- **`enabled`**: Toggle training-time RTC (both training and inference).
- **`min_delay` / `max_delay`**: Delay range (inclusive).
- **`delay_distribution`**:
- `UNIFORM`: uniform in `[min_delay, max_delay]`
- `EXP`: exponentially decayed distribution over delays
- **`exp_decay`**: Exponential decay factor for `EXP` sampling.
---
## Notes and Recommendations
- Start with `min_delay=0` and `max_delay` around your expected worst-case inference delay.
- Use `EXP` if you want more supervision on smaller delays.
---
## Related Docs
- [Real-Time Chunking (Inference-Time RTC)](./rtc)
- [Pi0](./pi0), [Pi0.5](./pi05), [SmolVLA](./smolvla)

View File

@@ -1,72 +1,23 @@
# Unitree G1 # Unitree G1
<img This guide covers the complete setup process for the Unitree G1 humanoid, from initial connection to running gr00t_wbc locomotion.
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/unitree_thumbnail.jpg"
alt="Unitree G1 locomanipulation demo"
style={{ width: "100%" }}
/>
The Unitree G1 humanoid is now supported in LeRobot! You can teleoperate, train locomanipulation policies, test in sim, and more. Both 29 and 23 DoF variants are supported. ## About
We support both 29 and 23 DOF G1 EDU version. We introduce:
- **`unitree g1` robot class, handling low level read/write from/to the humanoid**
- **ZMQ socket bridge** for remote communication and camera streaming, allowing for remote policy deployment over wlan, eth or directly on the robot
- **Locomotion policies** from NVIDIA gr00t and Amazon FAR Holosoma
- **Simulation mode** for testing policies without the physical robot in mujoco
--- ---
## Part 1: Getting Started ## Connection guide
### Install the Unitree SDK ### Step 1: Configure Ethernet Interface
Follow the [unitree_sdk2_python installation guide](https://github.com/unitreerobotics/unitree_sdk2_python#installation). Tested with `unitree_sdk2py==1.0.1` and `cyclonedds==0.10.2`: Set a static IP on the same subnet as the robot:
```bash
conda create -y -n lerobot python=3.12
conda activate lerobot
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
cd unitree_sdk2_python
pip install -e .
cd ..
```
### Install LeRobot
```bash
conda install ffmpeg -c conda-forge
conda install -c conda-forge "pinocchio>=3.0.0,<4.0.0"
git clone https://github.com/huggingface/lerobot.git
cd lerobot
pip install -e '.[unitree_g1]'
```
<Tip>
For now, pinocchio must be installed from conda-forge (not pip) to include the
CasADi bindings needed for arm IK.
</Tip>
### Test the Installation (Simulation)
The simulation environment has its own dependencies. Check the Simulation environment dependencies: [Unitree G1 Mujoco EnvHub](https://huggingface.co/lerobot/unitree-g1-mujoco/tree/main).
```bash
pip install mujoco loguru msgpack msgpack-numpy
```
```bash
lerobot-teleoperate \
--robot.type=unitree_g1 \
--robot.is_simulation=true \
--teleop.type=unitree_g1 \
--teleop.id=wbc_unitree \
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "localhost", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30, "warmup_s": 5}}' \
--display_data=true \
--robot.controller=GrootLocomotionController
```
This will launch a [MuJoCo sim instance](https://huggingface.co/lerobot/unitree-g1-mujoco/tree/main) for the G1. You can connect a gamepad to your machine before launching in order to control the robot's locomotion in sim. We support both [HolosomaLocomotionController](https://github.com/amazon-far/holosoma) and [GrootLocomotionController](https://github.com/NVlabs/GR00T-WholeBodyControl) via `--robot.controller`.
- Press `9` to release the robot
- Press `7` / `8` to increase / decrease waist height
### Connect to the Physical Robot
The G1's Ethernet IP is fixed at `192.168.123.164`. Your machine must have a static IP on the same subnet: `192.168.123.x` where `x ≠ 164`.
```bash ```bash
# Replace 'enp131s0' with your ethernet interface name (check with `ip a`) # Replace 'enp131s0' with your ethernet interface name (check with `ip a`)
@@ -75,23 +26,47 @@ sudo ip addr add 192.168.123.200/24 dev enp131s0
sudo ip link set enp131s0 up sudo ip link set enp131s0 up
``` ```
### SSH into the Robot **Note**: The G1's Ethernet IP is fixed at `192.168.123.164`. Your computer must use `192.168.123.x` with x ≠ 164.
### Step 2: SSH into the Robot
```bash ```bash
ssh unitree@192.168.123.164 ssh unitree@192.168.123.164
# Password: 123 # Password: 123
``` ```
### Share Internet via Ethernet You should now be connected to the G1's Orin.
The G1 needs internet access to clone repos and install packages. Share your laptop's connection over Ethernet: ---
## Part 2: Enable WiFi on the Robot
Wlan0 is disabled by default on the G1. To enable it:
### Step 1: Enable WiFi Hardware
```bash
sudo rfkill unblock wifi
sudo rfkill unblock all
# Bring up wlan0
sudo ip link set wlan0 up
# Enable NetworkManager control of wlan0
sudo nmcli radio wifi on
sudo nmcli device set wlan0 managed yes
sudo systemctl restart NetworkManager
```
### Step 2: Enable Internet Forwarding
**On your laptop:** **On your laptop:**
```bash ```bash
# Enable IP forwarding
sudo sysctl -w net.ipv4.ip_forward=1 sudo sysctl -w net.ipv4.ip_forward=1
# Replace wlp132s0f0 with your WiFi interface name # Set up NAT (replace wlp132s0f0 with your WiFi interface)
sudo iptables -t nat -A POSTROUTING -o wlp132s0f0 -s 192.168.123.0/24 -j MASQUERADE sudo iptables -t nat -A POSTROUTING -o wlp132s0f0 -s 192.168.123.0/24 -j MASQUERADE
sudo iptables -A FORWARD -i wlp132s0f0 -o enp131s0 -m state --state RELATED,ESTABLISHED -j ACCEPT sudo iptables -A FORWARD -i wlp132s0f0 -o enp131s0 -m state --state RELATED,ESTABLISHED -j ACCEPT
sudo iptables -A FORWARD -i enp131s0 -o wlp132s0f0 -j ACCEPT sudo iptables -A FORWARD -i enp131s0 -o wlp132s0f0 -j ACCEPT
@@ -100,203 +75,129 @@ sudo iptables -A FORWARD -i enp131s0 -o wlp132s0f0 -j ACCEPT
**On the G1:** **On the G1:**
```bash ```bash
# Add laptop as default gateway
sudo ip route del default 2>/dev/null || true sudo ip route del default 2>/dev/null || true
sudo ip route add default via 192.168.123.200 dev eth0 sudo ip route add default via 192.168.123.200 dev eth0
echo "nameserver 8.8.8.8" | sudo tee /etc/resolv.conf echo "nameserver 8.8.8.8" | sudo tee /etc/resolv.conf
# Verify # Test connection
ping -c 3 8.8.8.8 ping -c 3 8.8.8.8
``` ```
### Install the Unitree SDK on the G1 ### Step 3: Connect to WiFi Network
Follow the [unitree_sdk2_python installation guide](https://github.com/unitreerobotics/unitree_sdk2_python#installation):
```bash
conda create -y -n lerobot python=3.12
conda activate lerobot
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
cd unitree_sdk2_python
python -m pip install -e .
cd ..
```
### Install LeRobot on the G1
```bash
git clone https://github.com/huggingface/lerobot.git
cd lerobot
conda install -c conda-forge "pinocchio>=3.0.0,<4.0.0"
python -m pip install -e '.[unitree_g1]'
```
<Tip>
For now, pinocchio must be installed from conda-forge (not pip) to include the
CasADi bindings needed for arm IK.
</Tip>
### (Optional) Enable WiFi on the Robot
For wireless SSH access, you can enable WiFi on the G1 (it's blocked by default):
```bash
sudo rfkill unblock all
sudo ip link set wlan0 up
sudo nmcli radio wifi on
sudo nmcli device set wlan0 managed yes
sudo systemctl restart NetworkManager
```
**Connect to a WiFi network:**
```bash ```bash
# List available networks
nmcli device wifi list nmcli device wifi list
# Connect to your WiFi (example)
sudo nmcli connection add type wifi ifname wlan0 con-name "YourNetwork" ssid "YourNetwork" sudo nmcli connection add type wifi ifname wlan0 con-name "YourNetwork" ssid "YourNetwork"
sudo nmcli connection modify "YourNetwork" wifi-sec.key-mgmt wpa-psk sudo nmcli connection modify "YourNetwork" wifi-sec.key-mgmt wpa-psk
sudo nmcli connection modify "YourNetwork" wifi-sec.psk "YourPassword" sudo nmcli connection modify "YourNetwork" wifi-sec.psk "YourPassword"
sudo nmcli connection modify "YourNetwork" connection.autoconnect yes sudo nmcli connection modify "YourNetwork" connection.autoconnect yes
sudo nmcli connection up "YourNetwork" sudo nmcli connection up "YourNetwork"
# Check WiFi IP address
ip a show wlan0 ip a show wlan0
``` ```
You can then SSH over WiFi instead of Ethernet: ### Step 4: SSH Over WiFi
Once connected to WiFi, note the robot's IP address and disconnect the Ethernet cable. You can now SSH over WiFi:
```bash ```bash
ssh unitree@<ROBOT_WIFI_IP> ssh unitree@<YOUR_ROBOT_IP>
# Password: 123 # Password: 123
``` ```
--- Replace `<YOUR_ROBOT_IP>` with your robot's actual WiFi IP address.
## Part 2: Teleoperation & Locomotion
### Run the Robot Server
On the robot (from `~/lerobot`):
```bash
cd ~/lerobot
python src/lerobot/robots/unitree_g1/run_g1_server.py --camera
```
### Run the Locomotion Policy
You can run the teleoperation client from your laptop over Ethernet, over WiFi (experimental), or directly on the robot itself. Mind potential latency introduced by your network.
**From your laptop:**
```bash
lerobot-teleoperate \
--robot.type=unitree_g1 \
--robot.is_simulation=false \
--robot.robot_ip=<ROBOT_IP> \
--teleop.type=unitree_g1 \
--teleop.id=wbc_unitree \
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "<ROBOT_IP>", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
--display_data=true \
--robot.controller=HolosomaLocomotionController
```
We support both [GrootLocomotionController](https://github.com/NVlabs/GR00T-WholeBodyControl) and [HolosomaLocomotionController](https://github.com/amazon-far/holosoma) via `--robot.controller`.
--- ---
## Part 3: Loco-Manipulation with the Homunculus Exoskeleton ## Part 3: Robot Server Setup
We provide a loco-manipulation solution via the Homunculus Exoskeleton — an open-source 7 DoF exoskeleton for whole-body control. Check it out [here](https://github.com/nepyope/hmc_exo). ### Step 1: Install LeRobot on the Orin
### Calibrate SSH into the robot and install LeRobot:
```bash ```bash
lerobot-calibrate \ ssh unitree@<YOUR_ROBOT_IP>
--teleop.type=unitree_g1 \
--teleop.left_arm_config.port=/dev/ttyACM1 \ conda create -y -n lerobot python=3.10
--teleop.right_arm_config.port=/dev/ttyACM0 \ conda activate lerobot
--teleop.id=exo git clone https://github.com/huggingface/lerobot.git
cd lerobot
pip install -e '.[unitree_g1]'
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
cd unitree_sdk2_python && pip install -e .
``` ```
During calibration move each joint through its entire range. After fitting, move the joint in a neutral position and press `n` to advance. **Note**: The Unitree SDK requires CycloneDDS v0.10.2 to be installed. See the [Unitree SDK documentation](https://github.com/unitreerobotics/unitree_sdk2_python) for details.
### Record a Dataset ### Step 2: Run the Robot Server
On the robot:
```bash ```bash
lerobot-record \ python src/lerobot/robots/unitree_g1/run_g1_server.py
--robot.type=unitree_g1 \
--robot.is_simulation=true \
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "localhost", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
--teleop.type=unitree_g1 \
--teleop.left_arm_config.port=/dev/ttyACM1 \
--teleop.right_arm_config.port=/dev/ttyACM0 \
--teleop.id=exo \
--dataset.repo_id=your-username/dataset-name \
--dataset.single_task="Test" \
--dataset.num_episodes=2 \
--dataset.episode_time_s=5 \
--dataset.reset_time_s=5 \
--dataset.push_to_hub=true \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2
``` ```
> **Note:** Omit `--teleop.left_arm_config.port` and `--teleop.right_arm_config.port` if you're only using the joystick. **Important**: Keep this terminal running. The server must be active for remote control.
Example dataset: [nepyope/unitree_box_move_blue_full](https://huggingface.co/datasets/nepyope/unitree_box_move_blue_full)
--- ---
## Part 4: Training & Inference ## Part 4: Controlling the robot
### Train With the robot server running, you can now control the robot remotely. Let's launch a locomotion policy
### Step 1: Install LeRobot on your machine
```bash ```bash
python src/lerobot/scripts/lerobot_train.py \ conda create -y -n lerobot python=3.10
--dataset.repo_id=your-username/dataset-name \ conda activate lerobot
--policy.type=pi05 \ git clone https://github.com/huggingface/lerobot.git
--output_dir=./outputs/pi05_training \ cd lerobot
--job_name=pi05_training \ pip install -e '.[unitree_g1]'
--policy.repo_id=your-username/your-repo-id \ git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
--policy.pretrained_path=lerobot/pi05_base \ cd unitree_sdk2_python && pip install -e .
--policy.compile_model=true \
--policy.gradient_checkpointing=true \
--wandb.enable=true \
--policy.dtype=bfloat16 \
--policy.freeze_vision_encoder=false \
--policy.train_expert_only=false \
--steps=3000 \
--policy.device=cuda \
--batch_size=32
``` ```
### Inference with RTC ### Step 2: Update Robot IP in Config
Once trained, we recommend deploying policies using inference-time RTC: Edit the config file to match your robot's WiFi IP:
```python
# In src/lerobot/robots/unitree_g1/config_unitree_g1.py
robot_ip: str = "<YOUR_ROBOT_IP>" # Replace with your robot's WiFi IP.
```
### Step 3: Run the Locomotion Policy
```bash ```bash
python examples/rtc/eval_with_real_robot.py \ # Run GR00T locomotion controller
--policy.path=your-username/your-repo-id \ python examples/unitree_g1/gr00t_locomotion.py --repo-id "nepyope/GR00T-WholeBodyControl_g1"
--policy.device=cuda \
--robot.type=unitree_g1 \ # Run Holosoma locomotion controller
--robot.is_simulation=false \ python examples/unitree_g1/holosoma_locomotion.py
--robot.controller=HolosomaLocomotionController \
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "<ROBOT_IP>", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
--task="task_description" \
--duration=1000 \
--fps=30 \
--rtc.enabled=true
``` ```
Press `Ctrl+C` to stop the policy.
--- ---
## Running in Simulation Mode (MuJoCo)
You can now test policies before unleashing them on the physical robot using MuJoCo. To do so simply set `is_simulation=True` in config.
## Additional Resources ## Additional Resources
- [Unitree SDK Documentation](https://github.com/unitreerobotics/unitree_sdk2_python) - [Unitree SDK Documentation](https://github.com/unitreerobotics/unitree_sdk2_python)
- [GR00T-WholeBodyControl](https://github.com/NVlabs/GR00T-WholeBodyControl) - [GR00T-WholeBodyControl](https://github.com/NVlabs/GR00T-WholeBodyControl)
- [Holosoma](https://github.com/amazon-far/holosoma) - [Holosoma](https://github.com/amazon-far/holosoma)
- [LeRobot Documentation](https://github.com/huggingface/lerobot) - [LeRobot Documentation](https://github.com/huggingface/lerobot)
- [Unitree IL LeRobot](https://github.com/unitreerobotics/unitree_IL_lerobot) - [Unitree_IL_Lerobot](https://github.com/unitreerobotics/unitree_IL_lerobot)
--- ---
_Last updated: March 2026_ _Last updated: December 2025_

View File

@@ -12,7 +12,6 @@ LeRobot provides several utilities for manipulating datasets:
4. **Add Features** - Add new features to a dataset 4. **Add Features** - Add new features to a dataset
5. **Remove Features** - Remove features from a dataset 5. **Remove Features** - Remove features from a dataset
6. **Convert to Video** - Convert image-based datasets to video format for efficient storage 6. **Convert to Video** - Convert image-based datasets to video format for efficient storage
7. **Show the Info of Datasets** - Show the summary of datasets information such as number of episode etc.
The core implementation is in `lerobot.datasets.dataset_tools`. The core implementation is in `lerobot.datasets.dataset_tools`.
An example script detailing how to use the tools API is available in `examples/dataset/use_dataset_tools.py`. An example script detailing how to use the tools API is available in `examples/dataset/use_dataset_tools.py`.
@@ -157,30 +156,6 @@ lerobot-edit-dataset \
**Note:** The resulting dataset will be a proper LeRobotDataset with all cameras encoded as videos in the `videos/` directory, with parquet files containing only metadata (no raw image data). All episodes, stats, and tasks are preserved. **Note:** The resulting dataset will be a proper LeRobotDataset with all cameras encoded as videos in the `videos/` directory, with parquet files containing only metadata (no raw image data). All episodes, stats, and tasks are preserved.
### Show the information of datasets
Show the information of datasets such as number of episode, number of frame, File size and so on.
No change will be made to the dataset
```bash
# Show dataset information without feature details
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type info \
# Show dataset information with feature details
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type info \
--operation.show_features true
```
**Parameters:**
- `parameters`: The flag to control show or no show dataset information with feature details.(default=false)
### Push to Hub ### Push to Hub
Add the `--push_to_hub true` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub: Add the `--push_to_hub true` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub:

View File

@@ -45,7 +45,7 @@ policy.type=wall_x
For training WallX, you can use the standard LeRobot training script with the appropriate configuration: For training WallX, you can use the standard LeRobot training script with the appropriate configuration:
```bash ```bash
lerobot-train \ python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your_dataset \ --dataset.repo_id=your_dataset \
--policy.type=wall_x \ --policy.type=wall_x \
--output_dir=./outputs/wallx_training \ --output_dir=./outputs/wallx_training \

View File

@@ -154,7 +154,7 @@ lerobot-train \
```bash ```bash
lerobot-train \ lerobot-train \
--dataset.repo_id=<USER>/bimanual-so100-handover-cube \ --dataset.repo_id=pepijn223/bimanual-so100-handover-cube \
--output_dir=./outputs/xvla_bimanual \ --output_dir=./outputs/xvla_bimanual \
--job_name=xvla_so101_training \ --job_name=xvla_so101_training \
--policy.path="lerobot/xvla-base" \ --policy.path="lerobot/xvla-base" \

View File

@@ -22,7 +22,7 @@ lerobot-replay \
--robot.type=so100_follower \ --robot.type=so100_follower \
--robot.port=/dev/tty.usbmodem58760431541 \ --robot.port=/dev/tty.usbmodem58760431541 \
--robot.id=black \ --robot.id=black \
--dataset.repo_id=<USER>/record-test \ --dataset.repo_id=aliberts/record-test \
--dataset.episode=2 --dataset.episode=2
``` ```
""" """
@@ -57,7 +57,7 @@ class DatasetReplayConfig:
repo_id: str repo_id: str
# Episode to replay. # Episode to replay.
episode: int episode: int
# Root directory where the dataset will be stored (e.g. 'dataset/path'). If None, defaults to $HF_LEROBOT_HOME/repo_id. # Root directory where the dataset will be stored (e.g. 'dataset/path').
root: str | Path | None = None root: str | Path | None = None
# Limit the frames per second. By default, uses the policy fps. # Limit the frames per second. By default, uses the policy fps.
fps: int = 30 fps: int = 30
@@ -81,25 +81,24 @@ def replay(cfg: ReplayConfig):
actions = dataset.hf_dataset.select_columns(ACTION) actions = dataset.hf_dataset.select_columns(ACTION)
robot.connect() robot.connect()
try: log_say("Replaying episode", cfg.play_sounds, blocking=True)
log_say("Replaying episode", cfg.play_sounds, blocking=True) for idx in range(dataset.num_frames):
for idx in range(dataset.num_frames): start_episode_t = time.perf_counter()
start_episode_t = time.perf_counter()
action_array = actions[idx][ACTION] action_array = actions[idx][ACTION]
action = {} action = {}
for i, name in enumerate(dataset.features[ACTION]["names"]): for i, name in enumerate(dataset.features[ACTION]["names"]):
key = f"{name.removeprefix('main_')}.pos" key = f"{name.removeprefix('main_')}.pos"
action[key] = action_array[i].item() action[key] = action_array[i].item()
action["shoulder_lift.pos"] = -(action["shoulder_lift.pos"] - 90) action["shoulder_lift.pos"] = -(action["shoulder_lift.pos"] - 90)
action["elbow_flex.pos"] -= 90 action["elbow_flex.pos"] -= 90
robot.send_action(action) robot.send_action(action)
dt_s = time.perf_counter() - start_episode_t dt_s = time.perf_counter() - start_episode_t
precise_sleep(max(1 / dataset.fps - dt_s, 0.0)) precise_sleep(max(1 / dataset.fps - dt_s, 0.0))
finally:
robot.disconnect() robot.disconnect()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -32,8 +32,7 @@ import torch
from huggingface_hub import HfApi from huggingface_hub import HfApi
import lerobot import lerobot
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.lerobot_dataset import LeRobotDataset
def main(): def main():

View File

@@ -1,490 +0,0 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SLURM-distributed SARM RA-BC annotation pipeline.
Computes SARM progress values for all frames in a dataset, distributed across
SLURM workers, then merges the shards into a single sarm_progress.parquet.
Two subcommands, each a separate SLURM submission:
compute N workers, each computes progress for a subset of episodes
aggregate 1 worker, merges N shards into sarm_progress.parquet, pushes to hub
Usage:
python slurm_compute_rabc.py compute \\
--repo-id user/dataset --reward-model-path user/sarm_model \\
--stride 10 --device cpu --workers 50 --partition cpu
python slurm_compute_rabc.py aggregate \\
--repo-id user/dataset --reward-model-path user/sarm_model \\
--partition cpu --push-to-hub
"""
import argparse
from pathlib import Path
from datatrove.executor import LocalPipelineExecutor
from datatrove.executor.slurm import SlurmPipelineExecutor
from datatrove.pipeline.base import PipelineStep
class ComputeProgressShards(PipelineStep):
"""Each worker computes SARM progress for its assigned episodes."""
def __init__(
self, repo_id, reward_model_path, stride=1, head_mode="sparse", device="cpu", shard_dir="rabc_shards"
):
super().__init__()
if stride < 1:
raise ValueError(f"stride must be >= 1, got {stride}")
self.repo_id = repo_id
self.reward_model_path = reward_model_path
self.stride = stride
self.head_mode = head_mode
self.device = device
self.shard_dir = shard_dir
def run(self, data=None, rank: int = 0, world_size: int = 1):
import logging
from pathlib import Path
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
import torch
from tqdm import tqdm
from lerobot.policies.sarm.compute_rabc_weights import (
generate_all_frame_indices,
interpolate_progress,
load_sarm_resources,
)
from lerobot.utils.utils import init_logging
init_logging()
dataset, reward_model, preprocess = load_sarm_resources(
self.repo_id,
self.reward_model_path,
self.device,
)
if hasattr(preprocess, "eval"):
preprocess.eval()
for step in preprocess.steps:
if hasattr(step, "eval"):
step.eval()
image_key = reward_model.config.image_key
state_key = reward_model.config.state_key
frame_gap = reward_model.config.frame_gap
center_idx = reward_model.config.n_obs_steps // 2
dual_mode = reward_model.config.uses_dual_heads
compute_sparse = self.head_mode in ("sparse", "both") or not dual_mode
compute_dense = self.head_mode in ("dense", "both") and dual_mode
my_episodes = list(range(dataset.num_episodes))[rank::world_size]
if not my_episodes:
logging.info(f"Rank {rank}: no episodes assigned")
return
logging.info(f"Rank {rank}: {len(my_episodes)} / {dataset.num_episodes} episodes")
all_rows = []
for ep_idx in tqdm(my_episodes, desc=f"Rank {rank}"):
ep = dataset.meta.episodes[ep_idx]
ep_start, ep_end = ep["dataset_from_index"], ep["dataset_to_index"]
task = dataset[ep_start].get("task", "perform the task")
all_ep_indices = generate_all_frame_indices(ep_start, ep_end, frame_gap)
if self.stride > 1:
compute_indices = [i for i in all_ep_indices if (i - ep_start) % self.stride == 0]
if (ep_end - 1) not in compute_indices:
compute_indices.append(ep_end - 1)
compute_indices = sorted(set(compute_indices))
else:
compute_indices = all_ep_indices
frame_results = {}
for qi in tqdm(compute_indices, desc=f" Ep {ep_idx}", leave=False):
try:
sample = dataset[qi]
batch = {
image_key: sample[image_key],
"task": task,
"index": qi,
"episode_index": ep_idx,
}
if state_key in sample:
batch[state_key] = sample[state_key]
with torch.no_grad():
processed = preprocess(batch)
vf = processed["video_features"].to(self.device)
tf = processed["text_features"].to(self.device)
sf = processed.get("state_features")
if sf is not None:
sf = sf.to(self.device)
lengths = processed.get("lengths")
sparse_val = dense_val = np.nan
if compute_sparse:
r = reward_model.calculate_rewards(
text_embeddings=tf,
video_embeddings=vf,
state_features=sf,
lengths=lengths,
return_all_frames=True,
head_mode="sparse",
)
sparse_val = float(r[0, center_idx] if r.ndim == 2 else r[center_idx])
if compute_dense:
r = reward_model.calculate_rewards(
text_embeddings=tf,
video_embeddings=vf,
state_features=sf,
lengths=lengths,
return_all_frames=True,
head_mode="dense",
)
dense_val = float(r[0, center_idx] if r.ndim == 2 else r[center_idx])
frame_results[qi] = (sparse_val, dense_val)
except Exception as e:
logging.warning(f"Failed frame {qi}: {e}")
if not frame_results:
logging.warning(f"Episode {ep_idx}: all frames failed, skipping")
continue
# Interpolate to all frames in this episode
computed_idx = np.array(sorted(frame_results.keys()))
all_frame_arr = np.arange(ep_start, ep_end)
sparse_vals = np.array([frame_results[i][0] for i in computed_idx]) if compute_sparse else None
dense_vals = np.array([frame_results[i][1] for i in computed_idx]) if compute_dense else None
if self.stride > 1 and len(computed_idx) > 1:
if compute_sparse:
sparse_vals = interpolate_progress(computed_idx, sparse_vals, all_frame_arr)
if compute_dense:
dense_vals = interpolate_progress(computed_idx, dense_vals, all_frame_arr)
output_frames = all_frame_arr
else:
# Use only successfully computed frames to avoid indexing mismatch on failures
output_frames = computed_idx
for i, fi in enumerate(output_frames):
row = {"index": int(fi), "episode_index": ep_idx, "frame_index": int(fi - ep_start)}
if compute_sparse:
row["progress_sparse"] = float(sparse_vals[i])
if compute_dense:
row["progress_dense"] = float(dense_vals[i])
all_rows.append(row)
if all_rows:
import pandas as pd
df = pd.DataFrame(all_rows).sort_values("index").reset_index(drop=True)
table = pa.Table.from_pandas(df, preserve_index=False)
table = table.replace_schema_metadata({b"reward_model_path": self.reward_model_path.encode()})
shard_dir = Path(self.shard_dir)
shard_dir.mkdir(parents=True, exist_ok=True)
out = shard_dir / f"shard_{rank:05d}.parquet"
pq.write_table(table, out)
logging.info(f"Rank {rank}: saved {len(df)} rows to {out}")
class AggregateProgress(PipelineStep):
"""Merge all shard parquets into final sarm_progress.parquet."""
def __init__(self, repo_id, reward_model_path, shard_dir="rabc_shards", push_to_hub=False):
super().__init__()
self.repo_id = repo_id
self.reward_model_path = reward_model_path
self.shard_dir = shard_dir
self.push_to_hub = push_to_hub
def run(self, data=None, rank: int = 0, world_size: int = 1):
import datetime
import logging
import os
from pathlib import Path
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.utils.utils import init_logging
init_logging()
if rank != 0:
return
shard_dir = Path(self.shard_dir)
shards = sorted(shard_dir.glob("shard_*.parquet"))
if not shards:
raise FileNotFoundError(f"No shards found in {shard_dir}")
# Log shard modification time range to help detect stale files
mtimes = [os.path.getmtime(s) for s in shards]
oldest = datetime.datetime.fromtimestamp(min(mtimes)).isoformat(timespec="seconds")
newest = datetime.datetime.fromtimestamp(max(mtimes)).isoformat(timespec="seconds")
logging.info(f"Aggregating {len(shards)} shards (oldest: {oldest}, newest: {newest})")
df = pd.concat([pd.read_parquet(s) for s in shards], ignore_index=True)
df = df.sort_values("index").reset_index(drop=True)
table = pa.Table.from_pandas(df, preserve_index=False)
table = table.replace_schema_metadata({b"reward_model_path": self.reward_model_path.encode()})
temp_ds = LeRobotDataset(self.repo_id, download_videos=False)
out_path = Path(temp_ds.root) / "sarm_progress.parquet"
out_path.parent.mkdir(parents=True, exist_ok=True)
pq.write_table(table, out_path)
logging.info(f"Saved {len(df)} rows to {out_path}")
for col in ["progress_sparse", "progress_dense"]:
if col in df.columns:
v = df[col].dropna()
logging.info(
f"{col}: mean={v.mean():.4f} std={v.std():.4f} min={v.min():.4f} max={v.max():.4f}"
)
if self.push_to_hub:
from huggingface_hub import HfApi
api = HfApi()
hub_path = "sarm_progress.parquet"
logging.info(f"Uploading to {self.repo_id}/{hub_path}")
api.upload_file(
path_or_fileobj=str(out_path),
path_in_repo=hub_path,
repo_id=self.repo_id,
repo_type="dataset",
)
logging.info(f"Uploaded: https://huggingface.co/datasets/{self.repo_id}/blob/main/{hub_path}")
def make_compute_executor(
repo_id,
reward_model_path,
stride,
head_mode,
device,
shard_dir,
logs_dir,
job_name,
slurm,
workers,
partition,
cpus_per_task,
mem_per_cpu,
):
kwargs = {
"pipeline": [
ComputeProgressShards(repo_id, reward_model_path, stride, head_mode, device, str(shard_dir)),
],
"logging_dir": str(logs_dir / job_name),
}
if slurm:
kwargs.update(
{
"job_name": job_name,
"tasks": workers,
"workers": workers,
"time": "24:00:00",
"partition": partition,
"cpus_per_task": cpus_per_task,
"sbatch_args": {"mem-per-cpu": mem_per_cpu},
}
)
return SlurmPipelineExecutor(**kwargs)
kwargs.update({"tasks": workers, "workers": 1})
return LocalPipelineExecutor(**kwargs)
def make_aggregate_executor(
repo_id,
reward_model_path,
shard_dir,
logs_dir,
job_name,
slurm,
partition,
cpus_per_task,
mem_per_cpu,
push_to_hub,
):
kwargs = {
"pipeline": [
AggregateProgress(repo_id, reward_model_path, str(shard_dir), push_to_hub),
],
"logging_dir": str(logs_dir / job_name),
}
if slurm:
kwargs.update(
{
"job_name": job_name,
"tasks": 1,
"workers": 1,
"time": "02:00:00",
"partition": partition,
"cpus_per_task": cpus_per_task,
"sbatch_args": {"mem-per-cpu": mem_per_cpu},
}
)
return SlurmPipelineExecutor(**kwargs)
kwargs.update({"tasks": 1, "workers": 1})
return LocalPipelineExecutor(**kwargs)
def _add_shared_args(p):
p.add_argument(
"--repo-id",
type=str,
required=True,
help="Hugging Face repository identifier, e.g. 'user/dataset'.",
)
p.add_argument(
"--shard-dir",
type=Path,
default=Path("rabc_shards"),
help="Directory to read/write per-rank parquet shards.",
)
p.add_argument(
"--logs-dir",
type=Path,
default=Path("logs"),
help="Directory for datatrove logs.",
)
p.add_argument(
"--job-name",
type=str,
default=None,
help="SLURM job name (defaults to rabc_<subcommand>).",
)
p.add_argument(
"--slurm",
type=int,
default=1,
help="1 = submit via SLURM; 0 = run locally (useful for debugging).",
)
p.add_argument(
"--partition",
type=str,
default=None,
help="SLURM partition to submit to.",
)
p.add_argument(
"--cpus-per-task",
type=int,
default=4,
help="Number of CPUs per SLURM task.",
)
p.add_argument(
"--mem-per-cpu",
type=str,
default="4G",
help="Memory per CPU, e.g. '4G' or '1950M'.",
)
def main():
parser = argparse.ArgumentParser(
description="SLURM-distributed SARM RA-BC annotation pipeline",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
sub = parser.add_subparsers(dest="command", required=True)
# compute subcommand
cp = sub.add_parser(
"compute",
help="Distribute progress computation across SLURM workers.",
)
_add_shared_args(cp)
cp.add_argument(
"--reward-model-path",
type=str,
required=True,
help="Path or HF repo id of the SARM reward model.",
)
cp.add_argument(
"--stride",
type=int,
default=1,
help="Compute every Nth frame; intermediate frames are interpolated (must be >= 1).",
)
cp.add_argument(
"--head-mode",
type=str,
default="sparse",
choices=["sparse", "dense", "both"],
help="Which reward head(s) to compute.",
)
cp.add_argument(
"--device",
type=str,
default="cpu",
help="Device for reward model inference, e.g. 'cpu' or 'cuda'.",
)
cp.add_argument(
"--workers",
type=int,
default=50,
help="Number of parallel SLURM tasks (one shard per worker).",
)
# aggregate subcommand
ap = sub.add_parser(
"aggregate",
help="Merge per-rank shards into a single sarm_progress.parquet.",
)
_add_shared_args(ap)
ap.add_argument(
"--reward-model-path",
type=str,
required=True,
help="Path or HF repo id of the SARM reward model (stored in parquet metadata).",
)
ap.add_argument(
"--push-to-hub",
action="store_true",
help="Upload sarm_progress.parquet to the Hugging Face Hub after aggregation.",
)
args = parser.parse_args()
job_name = args.job_name or f"rabc_{args.command}"
kwargs = vars(args)
kwargs["slurm"] = kwargs.pop("slurm") == 1
kwargs["job_name"] = job_name
command = kwargs.pop("command")
executor = make_compute_executor(**kwargs) if command == "compute" else make_aggregate_executor(**kwargs)
executor.run()
if __name__ == "__main__":
main()

View File

@@ -1,717 +0,0 @@
"""
Action consistency analysis for imitation learning datasets.
Two parallel analyses per dataset:
1. State-based: KNN in joint-state space → action chunk variance
2. Image-based: KNN in SigLIP embedding space → action chunk variance
Comparing them reveals whether visual similarity and proprioceptive similarity
agree on where the data is inconsistent — and images are what the policy
primarily sees.
"""
import json
from pathlib import Path
import av
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from huggingface_hub import snapshot_download
from matplotlib.colors import LinearSegmentedColormap
from PIL import Image
from scipy.spatial import cKDTree
from transformers import AutoImageProcessor, AutoModel
DATASETS = [
{"repo_id": "lerobot-data-collection/level2_final_quality3", "label": "HQ curated"},
{"repo_id": "lerobot-data-collection/level12_rac_2_2026-02-08_1", "label": "Full collection"},
]
OUTPUT_DIR = Path(__file__).resolve().parent / "outputs"
OUTPUT_DIR.mkdir(exist_ok=True)
MAX_FRAMES = 100_000
K_NEIGHBORS = 50
ACTION_CHUNK_SIZE = 30
CAMERA_KEY = "observation.images.base"
ENCODER_MODEL = "google/siglip-base-patch16-224"
ENCODE_BATCH_SIZE = 512
SEED = 42
DPI = 150
CONSISTENCY_CMAP = LinearSegmentedColormap.from_list(
"consistency", ["#0a2e0a", "#1a8e1a", "#88cc22", "#ffaa22", "#ff2222"]
)
# FK chains from OpenArm bimanual URDF (same as workspace_density.py).
LEFT_CHAIN = [
((-np.pi / 2, 0, 0), (0, 0.031, 0.698), None),
((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)),
((-np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)),
((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)),
((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)),
((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)),
((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)),
((0, 0, 0), (-0.0375, 0, 0), (0, -1, 0)),
((0, 0, 0), (0, 0, 0.1001), None),
((0, 0, 0), (0, 0, 0.08), None),
]
RIGHT_CHAIN = [
((np.pi / 2, 0, 0), (0, -0.031, 0.698), None),
((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)),
((np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)),
((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)),
((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)),
((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)),
((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)),
((0, 0, 0), (-0.0375, 0, 0), (0, 1, 0)),
((0, 0, 0), (0, 0, 0.1001), None),
((0, 0, 0), (0, 0, 0.08), None),
]
# ── FK math ─────────────────────────────────────────────
def _rot_x(a: float) -> np.ndarray:
c, s = np.cos(a), np.sin(a)
return np.array([[1, 0, 0], [0, c, -s], [0, s, c]])
def _rot_y(a: float) -> np.ndarray:
c, s = np.cos(a), np.sin(a)
return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]])
def _rot_z(a: float) -> np.ndarray:
c, s = np.cos(a), np.sin(a)
return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
def _tf(rpy: tuple, xyz: tuple) -> np.ndarray:
r, p, y = rpy
mat = np.eye(4)
mat[:3, :3] = _rot_z(y) @ _rot_y(p) @ _rot_x(r)
mat[:3, 3] = xyz
return mat
def _batch_axis_rot(axis: tuple, angles: np.ndarray) -> np.ndarray:
n = len(angles)
ax = np.asarray(axis, dtype=np.float64)
ax = ax / np.linalg.norm(ax)
x, y, z = ax
c = np.cos(angles)
s = np.sin(angles)
t = 1 - c
rot = np.zeros((n, 4, 4))
rot[:, 0, 0] = t * x * x + c
rot[:, 0, 1] = t * x * y - s * z
rot[:, 0, 2] = t * x * z + s * y
rot[:, 1, 0] = t * x * y + s * z
rot[:, 1, 1] = t * y * y + c
rot[:, 1, 2] = t * y * z - s * x
rot[:, 2, 0] = t * x * z - s * y
rot[:, 2, 1] = t * y * z + s * x
rot[:, 2, 2] = t * z * z + c
rot[:, 3, 3] = 1.0
return rot
def batch_fk(chain: list, joint_angles: np.ndarray) -> np.ndarray:
n = joint_angles.shape[0]
tf_batch = np.tile(np.eye(4), (n, 1, 1))
qi = 0
for rpy, xyz, axis in chain:
tf_batch = tf_batch @ _tf(rpy, xyz)
if axis is not None:
rot = _batch_axis_rot(axis, joint_angles[:, qi])
tf_batch = np.einsum("nij,njk->nik", tf_batch, rot)
qi += 1
return tf_batch[:, :3, 3]
# ── Data helpers ────────────────────────────────────────
def _flatten_names(obj: object) -> list[str]:
if isinstance(obj, dict):
out: list[str] = []
for v in obj.values():
out.extend(_flatten_names(v))
return out
if isinstance(obj, (list, tuple)):
out = []
for item in obj:
if isinstance(item, (list, tuple, dict)):
out.extend(_flatten_names(item))
else:
out.append(str(item))
return out
return [str(obj)]
def _detect_and_convert(vals: np.ndarray) -> np.ndarray:
mx = np.max(np.abs(vals))
if mx > 360:
print(f" Unit detection: servo ticks (max={mx:.0f})")
return (vals - 2048) / 2048 * np.pi
if mx > 6.3:
print(f" Unit detection: degrees (max={mx:.1f})")
return np.deg2rad(vals)
print(f" Unit detection: radians (max={mx:.3f})")
return vals.astype(np.float64)
def _find_joint_indices(features: dict, state_col: str, n_dim: int) -> tuple[list[int], list[int]]:
feat = features.get("observation.state", features.get(state_col, {}))
names = _flatten_names(feat.get("names", []))
left_idx: list[int] = []
right_idx: list[int] = []
if names and len(names) == n_dim:
names_l = [n.lower() for n in names]
print(f" Feature names: {names[:4]}{names[-4:]}")
for j in range(1, 8):
for i, nm in enumerate(names_l):
if f"left_joint_{j}" in nm and i not in left_idx:
left_idx.append(i)
break
for i, nm in enumerate(names_l):
if f"right_joint_{j}" in nm and i not in right_idx:
right_idx.append(i)
break
if len(left_idx) == 7 and len(right_idx) == 7:
print(f" Matched by name: left={left_idx} right={right_idx}")
return left_idx, right_idx
if n_dim >= 16:
print(" Falling back to positional: [0:7]=left, [8:15]=right")
return list(range(7)), list(range(8, 15))
if n_dim >= 14:
print(" Falling back to positional: [0:7]=left, [7:14]=right")
return list(range(7)), list(range(7, 14))
raise RuntimeError(f"State dim {n_dim} too small for bimanual 7-DOF robot")
def download_data(repo_id: str, camera_key: str) -> Path:
print(f" Downloading {repo_id} (parquet + {camera_key} videos) …")
return Path(
snapshot_download(
repo_id=repo_id,
repo_type="dataset",
allow_patterns=[
"meta/**",
"data/**",
f"videos/{camera_key}/**",
],
)
)
# ── Data loading ────────────────────────────────────────
def _build_action_chunks(
actions: np.ndarray, episode_ids: np.ndarray, chunk_size: int
) -> tuple[np.ndarray, np.ndarray]:
"""
For each frame, concatenate the next chunk_size actions from the same episode.
Returns (action_chunks, valid_mask).
"""
n = len(actions)
act_dim = actions.shape[1]
chunks = np.zeros((n, chunk_size * act_dim), dtype=np.float64)
valid = np.zeros(n, dtype=bool)
for i in range(n):
end = i + chunk_size
if end > n:
continue
if episode_ids[i] != episode_ids[end - 1]:
continue
chunks[i] = actions[i:end].ravel()
valid[i] = True
return chunks, valid
def load_state_action_data(local: Path, max_frames: int, chunk_size: int, rng: np.random.Generator) -> dict:
"""
Load observation.state and action, build action chunks, subsample, normalize.
Also returns the original row indices (`chosen_idx`) for video frame mapping.
"""
info = json.loads((local / "meta" / "info.json").read_text())
features = info.get("features", {})
dfs = [pd.read_parquet(pq) for pq in sorted((local / "data").glob("**/*.parquet"))]
df = pd.concat(dfs, ignore_index=True)
n_total = len(df)
print(f" Total frames: {n_total:,}")
state_col = next((c for c in df.columns if "observation.state" in c), None)
action_col = next((c for c in df.columns if c == "action"), None)
if state_col is None:
raise RuntimeError(f"No observation.state column. Available: {list(df.columns)}")
if action_col is None:
raise RuntimeError(f"No action column. Available: {list(df.columns)}")
ep_col = next((c for c in df.columns if c == "episode_index"), None)
if ep_col is None:
raise RuntimeError(f"No episode_index column. Available: {list(df.columns)}")
state_all = np.stack(df[state_col].values).astype(np.float64)
action_all = np.stack(df[action_col].values).astype(np.float64)
episode_all = df[ep_col].values.astype(np.int64)
n_dim = state_all.shape[1]
act_dim = action_all.shape[1]
print(f" State dim: {n_dim} Action dim: {act_dim} Chunk size: {chunk_size}")
print(f" Action chunk dim: {chunk_size * act_dim}")
left_idx, right_idx = _find_joint_indices(features, state_col, n_dim)
print(" Building action chunks …")
action_chunks, valid = _build_action_chunks(action_all, episode_all, chunk_size)
valid_idx = np.where(valid)[0]
print(f" Valid frames (with full action chunk): {len(valid_idx):,} / {n_total:,}")
if len(valid_idx) > max_frames:
chosen = np.sort(rng.choice(valid_idx, max_frames, replace=False))
else:
chosen = valid_idx
print(f" Using {len(chosen):,} frames")
state_raw = state_all[chosen]
action_raw = action_chunks[chosen]
episode_ids = episode_all[chosen]
state_mean = state_raw.mean(axis=0)
state_std = state_raw.std(axis=0)
state_std[state_std < 1e-8] = 1.0
state_norm = (state_raw - state_mean) / state_std
action_mean = action_raw.mean(axis=0)
action_std = action_raw.std(axis=0)
action_std[action_std < 1e-8] = 1.0
action_norm = (action_raw - action_mean) / action_std
return {
"state_raw": state_raw,
"state_norm": state_norm,
"action_raw": action_raw,
"action_norm": action_norm,
"episode_ids": episode_ids,
"episode_all": episode_all,
"left_joint_idx": left_idx,
"right_joint_idx": right_idx,
"n_total": n_total,
"chosen_idx": chosen,
"df": df,
}
# ── Video → frame extraction ──────────────────────────────
def build_video_lookup(local: Path, camera_key: str) -> dict:
"""
Build a mapping from episode_index → {video_path, fps, from_ts}.
"""
info = json.loads((local / "meta" / "info.json").read_text())
fps = info["fps"]
video_template = info.get(
"video_path",
"videos/{video_key}/chunk-{chunk_index:03d}/file-{file_index:03d}.mp4",
)
ep_rows = []
for pq in sorted((local / "meta" / "episodes").glob("**/*.parquet")):
ep_rows.append(pd.read_parquet(pq))
ep_df = pd.concat(ep_rows, ignore_index=True)
chunk_col = f"videos/{camera_key}/chunk_index"
file_col = f"videos/{camera_key}/file_index"
ts_from = f"videos/{camera_key}/from_timestamp"
if chunk_col not in ep_df.columns:
chunk_col = f"{camera_key}/chunk_index"
file_col = f"{camera_key}/file_index"
ts_from = f"{camera_key}/from_timestamp"
lookup: dict[int, dict] = {}
for _, row in ep_df.iterrows():
ci = int(row[chunk_col])
fi = int(row[file_col])
video_rel = video_template.format(video_key=camera_key, chunk_index=ci, file_index=fi)
lookup[int(row["episode_index"])] = {
"video_path": local / video_rel,
"from_ts": float(row[ts_from]),
"fps": fps,
}
return lookup
def _decode_video_frames(video_path: str) -> list[np.ndarray]:
"""Decode all frames from a video file using PyAV. Returns list of RGB arrays."""
container = av.open(video_path)
stream = container.streams.video[0]
stream.thread_type = "AUTO"
decoded = []
for frame in container.decode(stream):
decoded.append(frame.to_ndarray(format="rgb24"))
container.close()
return decoded
def extract_frames(
chosen_idx: np.ndarray,
episode_all: np.ndarray,
video_lookup: dict,
) -> list[np.ndarray | None]:
"""
Extract RGB frames for each chosen global index using PyAV.
Returns list of (H, W, 3) RGB arrays (or None on failure).
"""
unique_eps = np.unique(episode_all)
ep_start: dict[int, int] = {}
for ep in unique_eps:
ep_start[int(ep)] = int(np.where(episode_all == ep)[0][0])
# Build jobs: (output_index, video_path, local_frame_number)
jobs: list[tuple[int, str, int]] = []
for out_i, global_i in enumerate(chosen_idx):
ep = int(episode_all[global_i])
info = video_lookup.get(ep)
if info is None:
continue
local_frame = global_i - ep_start[ep]
jobs.append((out_i, str(info["video_path"]), local_frame))
# Group by video file, decode each video once
from collections import defaultdict
video_jobs: dict[str, list[tuple[int, int]]] = defaultdict(list)
for out_i, vpath, local_frame in jobs:
video_jobs[vpath].append((out_i, local_frame))
frames: list[np.ndarray | None] = [None] * len(chosen_idx)
extracted = 0
n_videos = len(video_jobs)
for vi, (vpath, frame_requests) in enumerate(video_jobs.items()):
if not Path(vpath).exists():
continue
try:
decoded = _decode_video_frames(vpath)
except Exception as exc:
print(f" Warning: failed to decode {Path(vpath).name}: {exc}")
continue
for out_i, local_frame in frame_requests:
if 0 <= local_frame < len(decoded):
frames[out_i] = decoded[local_frame]
extracted += 1
if (vi + 1) % 50 == 0 or (vi + 1) == n_videos:
print(f" Decoded {vi + 1}/{n_videos} videos ({extracted:,} frames so far)")
del decoded
print(f" Extracted {extracted:,} / {len(chosen_idx):,} frames from video")
return frames
# ── SigLIP encoding ─────────────────────────────────────
def encode_frames_siglip(
frames: list[np.ndarray | None],
model_name: str,
batch_size: int,
device: torch.device,
) -> np.ndarray:
"""
Encode RGB frames through SigLIP vision encoder.
Returns (N, embed_dim) float32 array. Frames that are None get a zero vector.
"""
print(f" Loading SigLIP model: {model_name}")
processor = AutoImageProcessor.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name).to(device).eval()
embed_dim = model.config.vision_config.hidden_size
n = len(frames)
embeddings = np.zeros((n, embed_dim), dtype=np.float32)
valid_indices = [i for i, f in enumerate(frames) if f is not None]
print(f" Encoding {len(valid_indices):,} valid frames in batches of {batch_size}")
for batch_start in range(0, len(valid_indices), batch_size):
batch_idx = valid_indices[batch_start : batch_start + batch_size]
pil_images = [Image.fromarray(frames[i]) for i in batch_idx]
inputs = processor(images=pil_images, return_tensors="pt").to(device)
with torch.no_grad():
image_features = model.get_image_features(**inputs)
image_features = torch.nn.functional.normalize(image_features, dim=-1)
embeddings[batch_idx] = image_features.cpu().numpy()
done = min(batch_start + batch_size, len(valid_indices))
if done % (batch_size * 10) == 0 or done == len(valid_indices):
print(f" {done:,} / {len(valid_indices):,} encoded")
del model, processor
torch.cuda.empty_cache()
return embeddings
# ── KNN consistency ─────────────────────────────────────
def compute_consistency(
features: np.ndarray,
action_norm: np.ndarray,
episode_ids: np.ndarray,
k: int,
label: str = "",
) -> np.ndarray:
"""
For each frame, find K nearest neighbors in feature space from other episodes.
Return per-frame action variance (mean across action dims).
"""
n = len(features)
print(f" Building KD-tree on {n:,} vectors ({label}) …")
tree = cKDTree(features)
k_query = min(k * 3, n - 1)
print(f" Querying {k_query} neighbors per frame …")
_dists, indices = tree.query(features, k=k_query + 1)
indices = indices[:, 1:]
print(f" Computing cross-episode action variance ({label}) …")
variance = np.zeros(n)
for i in range(n):
ep_i = episode_ids[i]
neighbors = indices[i]
cross_ep = neighbors[episode_ids[neighbors] != ep_i][:k]
if len(cross_ep) < 2:
variance[i] = 0.0
continue
neighbor_actions = action_norm[cross_ep]
variance[i] = np.mean(np.var(neighbor_actions, axis=0))
return variance
# ── Visualization ───────────────────────────────────────
def _style_ax(ax: plt.Axes) -> None:
ax.set_facecolor("#0d1117")
ax.tick_params(colors="#555", labelsize=8)
for spine in ax.spines.values():
spine.set_color("#333")
def _plot_histogram(ax: plt.Axes, variance: np.ndarray, title: str, color: str) -> None:
_style_ax(ax)
median_var = np.median(variance)
mean_var = np.mean(variance)
nonzero = variance[variance > 0]
if len(nonzero) > 0:
bins = np.logspace(np.log10(nonzero.min().clip(1e-6)), np.log10(nonzero.max()), 60)
ax.hist(nonzero, bins=bins, color=color, alpha=0.8, edgecolor="#222")
ax.set_xscale("log")
ax.axvline(median_var, color="#ff6600", linewidth=2, label=f"median={median_var:.3f}")
ax.axvline(mean_var, color="#ff2222", linewidth=2, linestyle="--", label=f"mean={mean_var:.3f}")
ax.set_xlabel("Action variance (log scale)", color="#888", fontsize=10)
ax.set_ylabel("Frame count", color="#888", fontsize=10)
ax.set_title(title, color="white", fontsize=11, pad=10)
ax.legend(fontsize=8, facecolor="#1a1a2e", edgecolor="#333", labelcolor="white")
def _plot_episode_curves(
ax: plt.Axes,
var_state: np.ndarray,
var_image: np.ndarray,
episode_ids: np.ndarray,
title: str,
) -> None:
_style_ax(ax)
unique_eps = np.unique(episode_ids)
ep_means_s = np.array([var_state[episode_ids == ep].mean() for ep in unique_eps])
ep_means_i = np.array([var_image[episode_ids == ep].mean() for ep in unique_eps])
sorted_s = np.sort(ep_means_s)[::-1]
sorted_i = np.sort(ep_means_i)[::-1]
ep_x = np.arange(len(unique_eps))
ax.fill_between(ep_x, sorted_s, alpha=0.2, color="#4363d8")
ax.plot(ep_x, sorted_s, color="#4363d8", linewidth=1.2, label=f"State (med={np.median(ep_means_s):.3f})")
ax.fill_between(ep_x, sorted_i, alpha=0.2, color="#e6194b")
ax.plot(ep_x, sorted_i, color="#e6194b", linewidth=1.2, label=f"Image (med={np.median(ep_means_i):.3f})")
ax.set_xlabel("Episode rank (worst → best)", color="#888", fontsize=10)
ax.set_ylabel("Mean action variance", color="#888", fontsize=10)
ax.set_title(title, color="white", fontsize=11, pad=10)
ax.legend(fontsize=8, facecolor="#1a1a2e", edgecolor="#333", labelcolor="white")
def _plot_heatmap(
ax: plt.Axes, fig: plt.Figure, tcp_xz: np.ndarray, variance: np.ndarray, title: str
) -> None:
_style_ax(ax)
order = np.argsort(variance)
pts = tcp_xz[order]
var_sorted = variance[order]
vmin = np.percentile(variance[variance > 0], 5) if np.any(variance > 0) else 0
vmax = np.percentile(variance[variance > 0], 95) if np.any(variance > 0) else 1
sc = ax.scatter(
pts[:, 0],
pts[:, 1],
c=var_sorted,
cmap=CONSISTENCY_CMAP,
s=0.5,
alpha=0.6,
vmin=vmin,
vmax=vmax,
rasterized=True,
)
ax.set_xlabel("X (m)", color="#888", fontsize=10)
ax.set_ylabel("Z (m)", color="#888", fontsize=10)
ax.set_title(title, color="white", fontsize=11, pad=10)
ax.set_aspect("equal")
cbar = fig.colorbar(sc, ax=ax, shrink=0.8, pad=0.02)
cbar.set_label("Action variance", color="white", fontsize=9)
cbar.ax.tick_params(colors="#aaa", labelsize=7)
def render(results: list[dict], out_path: Path) -> None:
"""
4-row x N-column figure:
Row 0: State-based variance histogram
Row 1: Image-based variance histogram
Row 2: Per-episode curves (both overlaid)
Row 3: Spatial heatmap (image-based variance)
"""
n_ds = len(results)
fig, axes = plt.subplots(4, n_ds, figsize=(9 * n_ds, 24), facecolor="#0d1117")
if n_ds == 1:
axes = axes[:, np.newaxis]
headline_parts = []
for col, r in enumerate(results):
label = r["label"]
var_s = r["var_state"]
var_i = r["var_image"]
tcp_xz = r["tcp_xz"]
episode_ids = r["episode_ids"]
med_s = np.median(var_s)
med_i = np.median(var_i)
headline_parts.append(f"{label}: state={med_s:.3f}, image={med_i:.3f}")
_plot_histogram(axes[0, col], var_s, f"{label}\nState-based variance (K={K_NEIGHBORS})", "#4363d8")
_plot_histogram(
axes[1, col], var_i, f"{label}\nImage-based variance (SigLIP, K={K_NEIGHBORS})", "#e6194b"
)
_plot_episode_curves(
axes[2, col],
var_s,
var_i,
episode_ids,
f"{label}\nPer-episode inconsistency ({len(np.unique(episode_ids)):,} episodes)",
)
_plot_heatmap(
axes[3, col],
fig,
tcp_xz,
var_i,
f"{label}\nImage-based variance by TCP position (XZ)",
)
fig.suptitle(
f"Action Consistency: State vs Image (chunk={ACTION_CHUNK_SIZE}, K={K_NEIGHBORS})\n"
+ " | ".join(headline_parts),
color="white",
fontsize=15,
y=0.99,
)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig(out_path, dpi=DPI, bbox_inches="tight", facecolor=fig.get_facecolor())
plt.close()
print(f"\n✓ Saved: {out_path}")
# ── Main ────────────────────────────────────────────────
def main() -> None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
rng = np.random.default_rng(SEED)
results = []
for ds in DATASETS:
repo_id, label = ds["repo_id"], ds["label"]
print(f"\n{'=' * 60}")
print(f" {label}: {repo_id}")
print(f"{'=' * 60}")
local = download_data(repo_id, CAMERA_KEY)
data = load_state_action_data(local, MAX_FRAMES, ACTION_CHUNK_SIZE, rng)
# --- State-based KNN ---
var_state = compute_consistency(
data["state_norm"], data["action_norm"], data["episode_ids"], K_NEIGHBORS, "state"
)
print(
f" State variance: median={np.median(var_state):.4f} "
f"mean={np.mean(var_state):.4f} p90={np.percentile(var_state, 90):.4f}"
)
# --- Image-based KNN ---
print("\n Preparing image embeddings …")
video_lookup = build_video_lookup(local, CAMERA_KEY)
frames = extract_frames(data["chosen_idx"], data["episode_all"], video_lookup)
embeddings = encode_frames_siglip(frames, ENCODER_MODEL, ENCODE_BATCH_SIZE, device)
del frames # free memory
var_image = compute_consistency(
embeddings, data["action_norm"], data["episode_ids"], K_NEIGHBORS, "image"
)
print(
f" Image variance: median={np.median(var_image):.4f} "
f"mean={np.mean(var_image):.4f} p90={np.percentile(var_image, 90):.4f}"
)
# FK for spatial heatmap
print(" Computing FK for spatial heatmap …")
left_raw = data["state_raw"][:, data["left_joint_idx"]]
left_rad = _detect_and_convert(left_raw)
left_tcp = batch_fk(LEFT_CHAIN, left_rad)
tcp_xz = left_tcp[:, [0, 2]]
results.append(
{
"label": label,
"var_state": var_state,
"var_image": var_image,
"episode_ids": data["episode_ids"],
"tcp_xz": tcp_xz,
"n_total": data["n_total"],
}
)
out = OUTPUT_DIR / "action_consistency_comparison.jpg"
render(results, out)
# Save worst-episodes summary (image-based, since that's the stronger signal)
worst_summary = {}
for r in results:
unique_eps = np.unique(r["episode_ids"])
ep_means = {int(ep): float(r["var_image"][r["episode_ids"] == ep].mean()) for ep in unique_eps}
ranked = sorted(ep_means.items(), key=lambda x: x[1], reverse=True)[:50]
worst_summary[r["label"]] = [{"episode": ep, "mean_variance": v} for ep, v in ranked]
worst_path = OUTPUT_DIR / "action_consistency_worst_episodes.json"
worst_path.write_text(json.dumps(worst_summary, indent=2))
print(f"✓ Saved worst episodes: {worst_path}")
if __name__ == "__main__":
main()

View File

@@ -1,178 +0,0 @@
"""
Create a JPG grid of random frames sampled from a LeRobot video dataset.
Downloads metadata + video chunks from HuggingFace, picks random frames,
decodes them, and tiles into a single image.
"""
import json
import random
from pathlib import Path
import cv2
import numpy as np
import pandas as pd
from huggingface_hub import snapshot_download
REPO_ID = "lerobot-data-collection/level2_final_quality3"
CAMERA_KEY = "observation.images.base"
GRID_COLS = 15
GRID_ROWS = 10
THUMB_WIDTH = 160
OUTPUT_DIR = Path(__file__).resolve().parent / "outputs"
OUTPUT_DIR.mkdir(exist_ok=True)
SEED = 1
def download_metadata(repo_id: str) -> Path:
"""Download only metadata (no videos yet)."""
print(f"[1/3] Downloading metadata for {repo_id}")
return Path(
snapshot_download(
repo_id=repo_id,
repo_type="dataset",
allow_patterns=["meta/**"],
ignore_patterns=["*.mp4"],
)
)
def load_video_info(local: Path) -> tuple[str, list[dict], int]:
"""Parse info.json and episode parquets. Returns (camera_key, episode_rows, fps)."""
info = json.loads((local / "meta" / "info.json").read_text())
fps = info["fps"]
features = info["features"]
video_keys = [k for k, v in features.items() if v.get("dtype") == "video"]
if not video_keys:
raise RuntimeError("No video keys found in dataset features")
if CAMERA_KEY is not None:
if CAMERA_KEY not in video_keys:
raise RuntimeError(f"CAMERA_KEY='{CAMERA_KEY}' not found. Available: {video_keys}")
cam = CAMERA_KEY
else:
cam = video_keys[0]
print(f" camera='{cam}' all_cams={video_keys} fps={fps}")
ep_rows = []
for pq in sorted((local / "meta" / "episodes").glob("**/*.parquet")):
ep_rows.append(pd.read_parquet(pq))
ep_df = pd.concat(ep_rows, ignore_index=True)
video_template = info.get(
"video_path",
"videos/{video_key}/chunk-{chunk_index:03d}/file-{file_index:03d}.mp4",
)
chunk_col = f"videos/{cam}/chunk_index"
file_col = f"videos/{cam}/file_index"
ts_from = f"videos/{cam}/from_timestamp"
ts_to = f"videos/{cam}/to_timestamp"
if chunk_col not in ep_df.columns:
chunk_col = f"{cam}/chunk_index"
file_col = f"{cam}/file_index"
ts_from = f"{cam}/from_timestamp"
ts_to = f"{cam}/to_timestamp"
episodes = []
for _, row in ep_df.iterrows():
ci = int(row[chunk_col])
fi = int(row[file_col])
episodes.append(
{
"episode_index": int(row["episode_index"]),
"chunk_index": ci,
"file_index": fi,
"from_ts": float(row[ts_from]),
"to_ts": float(row[ts_to]),
"video_rel": video_template.format(video_key=cam, chunk_index=ci, file_index=fi),
}
)
return cam, episodes, fps
def pick_random_frames(episodes: list[dict], fps: int, n: int, rng: random.Random) -> list[dict]:
"""Pick n random (episode, timestamp) pairs, return sorted by video file for efficient access."""
picks = []
for _ in range(n):
ep = rng.choice(episodes)
duration = ep["to_ts"] - ep["from_ts"]
if duration <= 0:
continue
t = ep["from_ts"] + rng.random() * duration
picks.append({**ep, "seek_ts": t})
picks.sort(key=lambda p: (p["video_rel"], p["seek_ts"]))
return picks
def download_video_files(repo_id: str, local: Path, picks: list[dict]) -> None:
"""Download only the video files we need."""
needed = sorted({p["video_rel"] for p in picks})
print(f"[2/3] Downloading {len(needed)} video file(s) …")
snapshot_download(
repo_id=repo_id,
repo_type="dataset",
local_dir=str(local),
allow_patterns=needed,
)
def extract_frame(video_path: Path, seek_ts: float) -> np.ndarray | None:
"""Decode a single frame at the given timestamp."""
cap = cv2.VideoCapture(str(video_path))
cap.set(cv2.CAP_PROP_POS_MSEC, seek_ts * 1000.0)
ret, frame = cap.read()
cap.release()
return frame if ret else None
def build_grid(frames: list[np.ndarray], cols: int, thumb_w: int) -> np.ndarray:
"""Resize frames to uniform thumbnails and tile into a grid."""
if not frames:
raise RuntimeError("No frames decoded")
h0, w0 = frames[0].shape[:2]
thumb_h = int(thumb_w * h0 / w0)
thumbs = [cv2.resize(f, (thumb_w, thumb_h), interpolation=cv2.INTER_AREA) for f in frames]
rows = []
for i in range(0, len(thumbs), cols):
row_thumbs = thumbs[i : i + cols]
while len(row_thumbs) < cols:
row_thumbs.append(np.zeros_like(row_thumbs[0]))
rows.append(np.hstack(row_thumbs))
return np.vstack(rows)
def main() -> None:
rng = random.Random(SEED)
n_frames = GRID_COLS * GRID_ROWS
local = download_metadata(REPO_ID)
cam, episodes, fps = load_video_info(local)
picks = pick_random_frames(episodes, fps, n_frames, rng)
download_video_files(REPO_ID, local, picks)
print(f"[3/3] Decoding {n_frames} frames …")
frames: list[np.ndarray] = []
for p in picks:
vp = local / p["video_rel"]
if not vp.exists():
print(f" SKIP: {p['video_rel']} not found")
continue
frame = extract_frame(vp, p["seek_ts"])
if frame is not None:
frames.append(frame)
print(f" Decoded {len(frames)}/{n_frames} frames")
grid = build_grid(frames, GRID_COLS, THUMB_WIDTH)
safe_name = REPO_ID.replace("/", "_")
out_path = OUTPUT_DIR / f"{safe_name}_grid_{GRID_COLS}x{GRID_ROWS}.jpg"
cv2.imwrite(str(out_path), grid, [cv2.IMWRITE_JPEG_QUALITY, 92])
print(f"\n✓ Saved: {out_path} ({grid.shape[1]}×{grid.shape[0]})")
if __name__ == "__main__":
main()

View File

@@ -1,526 +0,0 @@
"""
Create MP4 videos with sarm_progress overlay for specified episodes.
Downloads datasets from HuggingFace, extracts episode video + progress data,
and draws the progress line directly on each frame (no panel, no axes).
"""
import json
import subprocess
from pathlib import Path
import cv2
import numpy as np
import pandas as pd
from huggingface_hub import snapshot_download
DATASETS = [
{"repo_id": "lerobot-data-collection/level2_final_quality3", "episode": 250},
]
CAMERA_KEY = (
"observation.images.base" # None = auto-select first camera, or set e.g. "observation.images.top"
)
OUTPUT_DIR = Path(__file__).resolve().parent / "outputs"
OUTPUT_DIR.mkdir(exist_ok=True)
# Progress line spans the full video height
GRAPH_Y_TOP_FRAC = 0.01
GRAPH_Y_BOT_FRAC = 0.99
LINE_THICKNESS = 3
SHADOW_THICKNESS = 6 # white edge thickness
REF_ALPHA = 0.45 # opacity of the 1.0 reference line
FILL_ALPHA = 0.55 # opacity of the grey fill under the line
SCORE_FONT_SCALE = 0.8
TASK_FONT_SCALE = 0.55
def download_episode(repo_id: str, episode: int) -> Path:
"""Download only the files needed for this episode."""
# We need: meta/, sarm_progress.parquet, and the relevant video/data chunks.
# We'll download meta + sarm first, then figure out chunks.
print(f"\n[1/5] Downloading metadata for {repo_id}")
local = Path(
snapshot_download(
repo_id=repo_id,
repo_type="dataset",
allow_patterns=["meta/**", "sarm_progress.parquet"],
ignore_patterns=["*.mp4"],
)
)
return local
def load_episode_meta(local: Path, episode: int) -> dict:
"""Read info.json + episode-level parquet to get fps, video paths, timestamps."""
info = json.loads((local / "meta" / "info.json").read_text())
fps = info["fps"]
features = info["features"]
# Find video keys (keys whose dtype=="video")
video_keys = [k for k, v in features.items() if v.get("dtype") == "video"]
if not video_keys:
raise RuntimeError("No video keys found in dataset features")
if CAMERA_KEY is not None:
if CAMERA_KEY not in video_keys:
raise RuntimeError(f"CAMERA_KEY='{CAMERA_KEY}' not found. Available: {video_keys}")
first_cam = CAMERA_KEY
else:
first_cam = video_keys[0]
print(f" fps={fps} camera='{first_cam}' all_cams={video_keys}")
# Load all episode-meta parquet files and find our episode
ep_rows = []
for pq in sorted((local / "meta" / "episodes").glob("**/*.parquet")):
df = pd.read_parquet(pq)
ep_rows.append(df)
ep_df = pd.concat(ep_rows, ignore_index=True)
row = ep_df[ep_df["episode_index"] == episode]
if row.empty:
raise RuntimeError(f"Episode {episode} not found in episode metadata")
row = row.iloc[0]
# Extract video chunk/file index for first camera
# Try both dot and slash variants of the key
chunk_col = f"videos/{first_cam}/chunk_index"
file_col = f"videos/{first_cam}/file_index"
ts_col = f"videos/{first_cam}/from_timestamp"
to_col = f"videos/{first_cam}/to_timestamp"
# Some datasets use different column naming
if chunk_col not in row.index:
# Try without the 'videos/' prefix
chunk_col = f"{first_cam}/chunk_index"
file_col = f"{first_cam}/file_index"
ts_col = f"{first_cam}/from_timestamp"
to_col = f"{first_cam}/to_timestamp"
if chunk_col not in row.index:
raise RuntimeError(
f"Cannot find video metadata columns for {first_cam}.\nAvailable: {list(row.index)}"
)
chunk_idx = int(row[chunk_col])
file_idx = int(row[file_col])
from_ts = float(row[ts_col])
to_ts = float(row[to_col])
video_template = info.get(
"video_path", "videos/{video_key}/chunk-{chunk_index:03d}/file-{file_index:03d}.mp4"
)
video_rel = video_template.format(
video_key=first_cam,
chunk_index=chunk_idx,
file_index=file_idx,
)
# Load task name for this episode
# tasks.parquet uses the task string as the row index; task_index column holds the int id
task_name = ""
try:
# Prefer the 'tasks' list directly on the episode row
if "tasks" in row.index and row["tasks"] is not None:
tasks_val = row["tasks"]
if isinstance(tasks_val, (list, tuple, np.ndarray)) and len(tasks_val) > 0:
task_name = str(tasks_val[0])
else:
task_name = str(tasks_val).strip("[]'")
else:
tasks_pq = local / "meta" / "tasks.parquet"
if tasks_pq.exists():
tasks_df = pd.read_parquet(tasks_pq)
# Row index is the task string; task_index column is the int
task_idx = int(row.get("task_index", 0)) if "task_index" in row.index else 0
match = tasks_df[tasks_df["task_index"] == task_idx]
if not match.empty:
task_name = str(match.index[0])
print(f" Task name: '{task_name}'")
except Exception as e:
print(f" WARNING: could not load task name: {e}")
return {
"fps": fps,
"first_cam": first_cam,
"video_rel": video_rel,
"chunk_index": chunk_idx,
"file_index": file_idx,
"from_ts": from_ts,
"to_ts": to_ts,
"task_name": task_name,
}
def download_video(repo_id: str, local: Path, video_rel: str) -> Path:
"""Download the specific video file if not already present."""
video_path = local / video_rel
if video_path.exists():
print(f" Video already cached: {video_path}")
return video_path
print(f"[2/5] Downloading video file {video_rel}")
snapshot_download(
repo_id=repo_id,
repo_type="dataset",
local_dir=str(local),
allow_patterns=[video_rel],
)
if not video_path.exists():
raise RuntimeError(f"Video not found after download: {video_path}")
return video_path
def load_progress(local: Path, episode: int) -> np.ndarray | None:
"""Load sarm_progress values for this episode. Returns sorted array of (frame_index, progress)."""
pq_path = local / "sarm_progress.parquet"
if not pq_path.exists():
print(" WARNING: sarm_progress.parquet not found, trying data parquet …")
return None
df = pd.read_parquet(pq_path)
print(f" sarm_progress.parquet columns: {list(df.columns)}")
ep_df = df[df["episode_index"] == episode].copy()
if ep_df.empty:
print(f" WARNING: No sarm_progress rows for episode {episode}")
return None
ep_df = ep_df.sort_values("frame_index")
# Prefer dense, fall back to sparse
if "progress_dense" in ep_df.columns and ep_df["progress_dense"].notna().any():
prog_col = "progress_dense"
elif "progress_sparse" in ep_df.columns:
prog_col = "progress_sparse"
else:
# Last resort: any column with 'progress' in the name
prog_cols = [c for c in ep_df.columns if "progress" in c.lower()]
if not prog_cols:
return None
prog_col = prog_cols[0]
print(f" Using progress column: '{prog_col}'")
return ep_df[["frame_index", prog_col]].rename(columns={prog_col: "progress"}).values
def extract_episode_clip(video_path: Path, from_ts: float, to_ts: float, out_path: Path) -> Path:
"""Use ffmpeg to cut the episode segment from the combined video file."""
duration = to_ts - from_ts
print(f"[3/5] Extracting clip [{from_ts:.3f}s → {to_ts:.3f}s] ({duration:.2f}s) …")
cmd = [
"ffmpeg",
"-y",
"-ss",
str(from_ts),
"-i",
str(video_path),
"-t",
str(duration),
"-c:v",
"libx264",
"-preset",
"fast",
"-crf",
"18",
"-an",
str(out_path),
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"ffmpeg clip extraction failed:\n{result.stderr}")
return out_path
def precompute_pixels(
progress_data: np.ndarray,
n_frames: int,
frame_w: int,
frame_h: int,
) -> np.ndarray:
"""
Map each progress sample to pixel coordinates.
Returns array of shape (N, 2) with (x, y) in pixel space.
x spans full video width; y maps progress [0,1] to graph band.
"""
frame_indices = progress_data[:, 0].astype(float)
progress_vals = np.clip(progress_data[:, 1].astype(float), 0.0, 1.0)
y_top = int(frame_h * GRAPH_Y_TOP_FRAC)
y_bot = int(frame_h * GRAPH_Y_BOT_FRAC)
graph_h = y_bot - y_top
xs = (frame_indices / (n_frames - 1) * (frame_w - 1)).astype(int)
# progress=1 → y_top, progress=0 → y_bot
ys = (y_bot - progress_vals * graph_h).astype(int)
return np.stack([xs, ys], axis=1) # (N, 2)
def progress_color(t: float) -> tuple[int, int, int]:
"""Interpolate BGR color red→green based on normalised position t in [0,1]."""
r = int(255 * (1.0 - t))
g = int(255 * t)
return (0, g, r) # BGR
def prerender_fill(
pixels: np.ndarray,
frame_w: int,
frame_h: int,
) -> np.ndarray:
"""Pre-render the full grey fill polygon under the curve as a BGRA image."""
y_bot = int(frame_h * GRAPH_Y_BOT_FRAC)
fill_img = np.zeros((frame_h, frame_w, 4), dtype=np.uint8)
poly = np.concatenate(
[
pixels,
[[pixels[-1][0], y_bot], [pixels[0][0], y_bot]],
],
axis=0,
).astype(np.int32)
cv2.fillPoly(fill_img, [poly], color=(128, 128, 128, int(255 * FILL_ALPHA)))
return fill_img
def alpha_composite(base: np.ndarray, overlay_bgra: np.ndarray, x_max: int) -> None:
"""Blend overlay onto base in-place, but only for x < x_max."""
if x_max <= 0:
return
roi_b = base[:, :x_max]
roi_o = overlay_bgra[:, :x_max]
alpha = roi_o[:, :, 3:4].astype(np.float32) / 255.0
roi_b[:] = np.clip(
roi_o[:, :, :3].astype(np.float32) * alpha + roi_b.astype(np.float32) * (1.0 - alpha),
0,
255,
).astype(np.uint8)
def draw_text_outlined(
frame: np.ndarray,
text: str,
pos: tuple[int, int],
font_scale: float,
thickness: int = 1,
) -> None:
"""Draw text with a dark outline for readability on any background."""
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame, text, pos, font, font_scale, (0, 0, 0), thickness + 2, cv2.LINE_AA)
cv2.putText(frame, text, pos, font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)
def composite_video(
clip_path: Path,
progress_data: np.ndarray,
out_path: Path,
fps: float,
frame_h: int,
frame_w: int,
task_name: str = "",
) -> Path:
"""Read clip frames, draw gradient progress line with fill + labels, export as GIF."""
n_total = int(cv2.VideoCapture(str(clip_path)).get(cv2.CAP_PROP_FRAME_COUNT))
pixels = precompute_pixels(progress_data, n_total, frame_w, frame_h)
y_ref = int(frame_h * GRAPH_Y_TOP_FRAC)
# Pre-render fill polygon (line is drawn per-frame with live color)
fill_img = prerender_fill(pixels, frame_w, frame_h)
# 1.0 reference line overlay (full width, drawn once)
ref_img = np.zeros((frame_h, frame_w, 4), dtype=np.uint8)
cv2.line(ref_img, (0, y_ref), (frame_w - 1, y_ref), (200, 200, 200, int(255 * REF_ALPHA)), 1, cv2.LINE_AA)
frame_indices = progress_data[:, 0].astype(int)
progress_vals = progress_data[:, 1].astype(float)
print(f"[4/4] Compositing {n_total} frames …")
cap = cv2.VideoCapture(str(clip_path))
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
tmp_path = out_path.parent / (out_path.stem + "_tmp.mp4")
writer = cv2.VideoWriter(str(tmp_path), fourcc, fps, (frame_w, frame_h))
fi = 0
while True:
ret, frame = cap.read()
if not ret:
break
n_drawn = int(np.searchsorted(frame_indices, fi, side="right"))
x_cur = int(pixels[min(n_drawn, len(pixels)) - 1][0]) + 1 if n_drawn > 0 else 0
# 1. reference line (full width, always)
alpha_composite(frame, ref_img, frame_w)
# 2. grey fill under curve up to current x
alpha_composite(frame, fill_img, x_cur)
# 3. progress line — single color that transitions red→green over time
if n_drawn >= 2:
t_cur = (n_drawn - 1) / max(len(progress_vals) - 1, 1)
line_col = progress_color(t_cur)
pts = pixels[:n_drawn].reshape(-1, 1, 2).astype(np.int32)
cv2.polylines(
frame,
[pts],
isClosed=False,
color=(255, 255, 255),
thickness=SHADOW_THICKNESS,
lineType=cv2.LINE_AA,
)
cv2.polylines(
frame, [pts], isClosed=False, color=line_col, thickness=LINE_THICKNESS, lineType=cv2.LINE_AA
)
# 4. score — bottom right
if n_drawn > 0:
score = float(progress_vals[min(n_drawn, len(progress_vals)) - 1])
score_text = f"{score:.2f}"
(tw, th), _ = cv2.getTextSize(score_text, cv2.FONT_HERSHEY_SIMPLEX, SCORE_FONT_SCALE, 2)
sx = frame_w - tw - 12
sy = frame_h - 12
# coloured score matching current gradient position
t_cur = (n_drawn - 1) / max(len(progress_vals) - 1, 1)
score_col = progress_color(t_cur)
cv2.putText(
frame,
score_text,
(sx, sy),
cv2.FONT_HERSHEY_SIMPLEX,
SCORE_FONT_SCALE,
(0, 0, 0),
4,
cv2.LINE_AA,
)
cv2.putText(
frame,
score_text,
(sx, sy),
cv2.FONT_HERSHEY_SIMPLEX,
SCORE_FONT_SCALE,
score_col,
2,
cv2.LINE_AA,
)
# 5. task name — top centre
if task_name:
(tw, _), _ = cv2.getTextSize(task_name, cv2.FONT_HERSHEY_SIMPLEX, TASK_FONT_SCALE, 1)
tx = max((frame_w - tw) // 2, 4)
draw_text_outlined(frame, task_name, (tx, 22), TASK_FONT_SCALE)
writer.write(frame)
fi += 1
if fi % 100 == 0:
print(f" Frame {fi}/{n_total}", end="\r")
cap.release()
writer.release()
print()
# Convert to GIF: full resolution, 12fps, 128-color diff palette (<40MB)
gif_path = out_path.with_suffix(".gif")
palette = out_path.parent / "_palette.png"
r1 = subprocess.run( # nosec B607
[
"ffmpeg",
"-y",
"-i",
str(tmp_path),
"-vf",
f"fps=10,scale={frame_w}:-1:flags=lanczos,palettegen=max_colors=128:stats_mode=diff",
"-update",
"1",
str(palette),
],
capture_output=True,
text=True,
)
if r1.returncode != 0:
print(f" WARNING: palettegen failed:\n{r1.stderr[-500:]}")
r2 = subprocess.run( # nosec B607
[
"ffmpeg",
"-y",
"-i",
str(tmp_path),
"-i",
str(palette),
"-filter_complex",
f"fps=10,scale={frame_w}:-1:flags=lanczos[v];[v][1:v]paletteuse=dither=bayer:bayer_scale=3",
str(gif_path),
],
capture_output=True,
text=True,
)
if r2.returncode != 0:
print(f" WARNING: gif encode failed:\n{r2.stderr[-500:]}")
tmp_path.unlink(missing_ok=True)
palette.unlink(missing_ok=True)
return gif_path
def process_dataset(repo_id: str, episode: int):
safe_name = repo_id.replace("/", "_")
print(f"\n{'=' * 60}")
print(f"Processing: {repo_id} | episode {episode}")
print(f"{'=' * 60}")
# 1. Download metadata
local = download_episode(repo_id, episode)
print(f" Local cache: {local}")
# 2. Read episode metadata
ep_meta = load_episode_meta(local, episode)
print(f" Episode meta: {ep_meta}")
# 3. Download video file
video_path = download_video(repo_id, local, ep_meta["video_rel"])
# 4. Extract clip
clip_path = OUTPUT_DIR / f"{safe_name}_ep{episode}_clip.mp4"
extract_episode_clip(video_path, ep_meta["from_ts"], ep_meta["to_ts"], clip_path)
# 5. Load progress data
progress_data = load_progress(local, episode)
if progress_data is None:
print(" ERROR: Could not load sarm_progress data. Skipping overlay.")
return
n_progress = len(progress_data)
print(f" Progress frames: {n_progress}")
# 6. Get clip dimensions
cap = cv2.VideoCapture(str(clip_path))
frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
actual_fps = cap.get(cv2.CAP_PROP_FPS) or ep_meta["fps"]
cap.release()
print(f" Clip: {frame_w}×{frame_h} {n_frames} frames @ {actual_fps:.1f}fps")
# 7. Composite (draw line directly on frames)
out_path = OUTPUT_DIR / f"{safe_name}_ep{episode}_progress.mp4"
final = composite_video(
clip_path,
progress_data,
out_path,
actual_fps,
frame_h,
frame_w,
task_name=ep_meta.get("task_name", ""),
)
clip_path.unlink(missing_ok=True)
print(f"\n✓ Done: {final}")
return final
if __name__ == "__main__":
results = []
for cfg in DATASETS:
try:
out = process_dataset(cfg["repo_id"], cfg["episode"])
if out:
results.append(out)
except Exception as e:
print(f"\nERROR processing {cfg['repo_id']}: {e}")
import traceback
traceback.print_exc()
print("\n" + "=" * 60)
print("Output files:")
for r in results:
print(f" {r}")

View File

@@ -1,496 +0,0 @@
"""
Visualize end-effector workspace density and trajectory clusters for OpenArm datasets.
Downloads joint position data (no videos) from HuggingFace, computes forward
kinematics per episode, clusters trajectories with K-means, and renders
2D projections comparing dataset coverage and multimodality.
"""
import json
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from huggingface_hub import snapshot_download
from sklearn.cluster import KMeans
DATASETS = [
{"repo_id": "lerobot-data-collection/level2_final_quality3", "label": "HQ curated"},
{"repo_id": "lerobot-data-collection/level12_rac_2_2026-02-08_1", "label": "Full collection"},
]
OUTPUT_DIR = Path(__file__).resolve().parent / "outputs"
OUTPUT_DIR.mkdir(exist_ok=True)
N_CLUSTERS = 10
WAYPOINTS = 50
SEED = 42
DPI = 180
CLUSTER_COLORS = [
"#e6194b",
"#3cb44b",
"#4363d8",
"#f58231",
"#911eb4",
"#42d4f4",
"#f032e6",
"#bfef45",
"#fabed4",
"#dcbeff",
"#9a6324",
"#fffac8",
"#800000",
"#aaffc3",
"#808000",
"#ffd8b1",
"#000075",
"#a9a9a9",
]
# FK chains extracted from OpenArm bimanual URDF.
# Each entry: (rpy, xyz, revolute_axis_or_None).
LEFT_CHAIN = [
((-np.pi / 2, 0, 0), (0, 0.031, 0.698), None),
((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)),
((-np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)),
((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)),
((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)),
((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)),
((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)),
((0, 0, 0), (-0.0375, 0, 0), (0, -1, 0)),
((0, 0, 0), (0, 0, 0.1001), None),
((0, 0, 0), (0, 0, 0.08), None),
]
RIGHT_CHAIN = [
((np.pi / 2, 0, 0), (0, -0.031, 0.698), None),
((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)),
((np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)),
((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)),
((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)),
((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)),
((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)),
((0, 0, 0), (-0.0375, 0, 0), (0, 1, 0)),
((0, 0, 0), (0, 0, 0.1001), None),
((0, 0, 0), (0, 0, 0.08), None),
]
# ── FK math ─────────────────────────────────────────────
def _rot_x(a: float) -> np.ndarray:
c, s = np.cos(a), np.sin(a)
return np.array([[1, 0, 0], [0, c, -s], [0, s, c]])
def _rot_y(a: float) -> np.ndarray:
c, s = np.cos(a), np.sin(a)
return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]])
def _rot_z(a: float) -> np.ndarray:
c, s = np.cos(a), np.sin(a)
return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
def _tf(rpy: tuple, xyz: tuple) -> np.ndarray:
"""Build a 4x4 homogeneous transform from URDF rpy + xyz."""
r, p, y = rpy
mat = np.eye(4)
mat[:3, :3] = _rot_z(y) @ _rot_y(p) @ _rot_x(r)
mat[:3, 3] = xyz
return mat
def _batch_axis_rot(axis: tuple, angles: np.ndarray) -> np.ndarray:
"""Batched Rodrigues rotation: (n,) angles around a fixed axis → (n, 4, 4)."""
n = len(angles)
ax = np.asarray(axis, dtype=np.float64)
ax = ax / np.linalg.norm(ax)
x, y, z = ax
c = np.cos(angles)
s = np.sin(angles)
t = 1 - c
rot = np.zeros((n, 4, 4))
rot[:, 0, 0] = t * x * x + c
rot[:, 0, 1] = t * x * y - s * z
rot[:, 0, 2] = t * x * z + s * y
rot[:, 1, 0] = t * x * y + s * z
rot[:, 1, 1] = t * y * y + c
rot[:, 1, 2] = t * y * z - s * x
rot[:, 2, 0] = t * x * z - s * y
rot[:, 2, 1] = t * y * z + s * x
rot[:, 2, 2] = t * z * z + c
rot[:, 3, 3] = 1.0
return rot
def batch_fk(chain: list, joint_angles: np.ndarray) -> np.ndarray:
"""Vectorized FK: (n, 7) radians → (n, 3) TCP positions in world frame."""
n = joint_angles.shape[0]
tf_batch = np.tile(np.eye(4), (n, 1, 1))
qi = 0
for rpy, xyz, axis in chain:
tf_batch = tf_batch @ _tf(rpy, xyz)
if axis is not None:
rot = _batch_axis_rot(axis, joint_angles[:, qi])
tf_batch = np.einsum("nij,njk->nik", tf_batch, rot)
qi += 1
return tf_batch[:, :3, 3]
# ── Data loading ────────────────────────────────────────
def _flatten_names(obj: object) -> list[str]:
"""Recursively flatten a names structure (list, dict, or nested) into a flat string list."""
if isinstance(obj, dict):
out: list[str] = []
for v in obj.values():
out.extend(_flatten_names(v))
return out
if isinstance(obj, (list, tuple)):
out = []
for item in obj:
if isinstance(item, (list, tuple, dict)):
out.extend(_flatten_names(item))
else:
out.append(str(item))
return out
return [str(obj)]
def _detect_and_convert(vals: np.ndarray) -> np.ndarray:
"""Auto-detect servo ticks / degrees / radians and convert to radians."""
mx = np.max(np.abs(vals))
if mx > 360:
print(f" Unit detection: servo ticks (max={mx:.0f})")
return (vals - 2048) / 2048 * np.pi
if mx > 6.3:
print(f" Unit detection: degrees (max={mx:.1f})")
return np.deg2rad(vals)
print(f" Unit detection: radians (max={mx:.3f})")
return vals.astype(np.float64)
def _find_joint_indices(features: dict, state_col: str, n_dim: int) -> tuple[list[int], list[int]]:
"""Try to find left/right joint indices from info.json feature names."""
feat = features.get("observation.state", features.get(state_col, {}))
names = _flatten_names(feat.get("names", []))
left_idx: list[int] = []
right_idx: list[int] = []
if names and len(names) == n_dim:
names_l = [n.lower() for n in names]
print(f" Feature names: {names[:4]}{names[-4:]}")
for j in range(1, 8):
for i, nm in enumerate(names_l):
if f"left_joint_{j}" in nm and i not in left_idx:
left_idx.append(i)
break
for i, nm in enumerate(names_l):
if f"right_joint_{j}" in nm and i not in right_idx:
right_idx.append(i)
break
if len(left_idx) == 7 and len(right_idx) == 7:
print(f" Matched by name: left={left_idx} right={right_idx}")
return left_idx, right_idx
if n_dim >= 16:
print(" Falling back to positional: [0:7]=left, [8:15]=right")
return list(range(7)), list(range(8, 15))
if n_dim >= 14:
print(" Falling back to positional: [0:7]=left, [7:14]=right")
return list(range(7)), list(range(7, 14))
raise RuntimeError(f"State dim {n_dim} too small for bimanual 7-DOF robot")
def download_data(repo_id: str) -> Path:
print(f" Downloading {repo_id} (parquet only) …")
return Path(
snapshot_download(
repo_id=repo_id,
repo_type="dataset",
allow_patterns=["meta/**", "data/**"],
ignore_patterns=["*.mp4", "videos/**"],
)
)
def resample_trajectory(traj: np.ndarray, n_waypoints: int) -> np.ndarray:
"""Resample a (F, 3) trajectory to exactly n_waypoints via linear interpolation."""
f = traj.shape[0]
if f == n_waypoints:
return traj
old_t = np.linspace(0, 1, f)
new_t = np.linspace(0, 1, n_waypoints)
return np.column_stack([np.interp(new_t, old_t, traj[:, d]) for d in range(3)])
def load_episode_trajectories(local: Path) -> list[dict]:
"""
Load per-episode joint data, compute FK, return list of trajectory dicts.
Each dict: {"left_tcp": (F,3), "right_tcp": (F,3), "episode_index": int}.
Uses all episodes in the dataset for a fair comparison.
"""
info = json.loads((local / "meta" / "info.json").read_text())
features = info.get("features", {})
dfs = [pd.read_parquet(pq) for pq in sorted((local / "data").glob("**/*.parquet"))]
df = pd.concat(dfs, ignore_index=True)
print(f" Total frames: {len(df):,}")
state_col = next((c for c in df.columns if "observation.state" in c), None)
if state_col is None:
raise RuntimeError(f"No observation.state column. Available: {list(df.columns)}")
first = df[state_col].iloc[0]
if not hasattr(first, "__len__"):
raise RuntimeError(f"observation.state is scalar ({type(first)}), expected array")
state = np.stack(df[state_col].values).astype(np.float64)
n_dim = state.shape[1]
print(f" State dim: {n_dim} max|val|: {np.max(np.abs(state)):.1f}")
left_idx, right_idx = _find_joint_indices(features, state_col, n_dim)
ep_col = next((c for c in df.columns if c == "episode_index"), None)
if ep_col is None:
raise RuntimeError(f"No episode_index column. Available: {list(df.columns)}")
episode_ids = df[ep_col].values
unique_eps = np.unique(episode_ids)
print(f" Episodes: {len(unique_eps):,}")
left_raw = state[:, left_idx]
right_raw = state[:, right_idx]
left_all = _detect_and_convert(left_raw)
right_all = _detect_and_convert(right_raw)
print(" Computing FK per episode …")
trajectories = []
for ep_id in unique_eps:
mask = episode_ids == ep_id
left_tcp = batch_fk(LEFT_CHAIN, left_all[mask])
right_tcp = batch_fk(RIGHT_CHAIN, right_all[mask])
if len(left_tcp) < 3:
continue
trajectories.append({"left_tcp": left_tcp, "right_tcp": right_tcp, "episode_index": int(ep_id)})
print(f" Valid trajectories: {len(trajectories):,}")
return trajectories
# ── Clustering ──────────────────────────────────────────
def cluster_trajectories(
trajectories: list[dict], n_clusters: int, n_waypoints: int
) -> tuple[np.ndarray, np.ndarray]:
"""
K-means on resampled trajectory features.
Combines left+right TCP into a single feature vector per episode.
Returns (labels, centroid_trajs (k, waypoints, 6), spread_per_cluster (k,) in metres).
Spread = mean per-waypoint Euclidean distance from each trajectory to its centroid.
"""
feat_vecs = []
for t in trajectories:
left_rs = resample_trajectory(t["left_tcp"], n_waypoints)
right_rs = resample_trajectory(t["right_tcp"], n_waypoints)
feat_vecs.append(np.concatenate([left_rs.ravel(), right_rs.ravel()]))
feat_matrix = np.array(feat_vecs)
k = min(n_clusters, len(feat_vecs))
km = KMeans(n_clusters=k, n_init=10, random_state=SEED)
labels = km.fit_predict(feat_matrix)
centroids_flat = km.cluster_centers_
centroid_trajs = np.zeros((k, n_waypoints, 6))
for ci in range(k):
left_flat = centroids_flat[ci, : n_waypoints * 3]
right_flat = centroids_flat[ci, n_waypoints * 3 :]
centroid_trajs[ci, :, :3] = left_flat.reshape(n_waypoints, 3)
centroid_trajs[ci, :, 3:] = right_flat.reshape(n_waypoints, 3)
# Mean per-waypoint distance to centroid (in metres) for each cluster
spread = np.zeros(k)
for ci in range(k):
members = np.where(labels == ci)[0]
if len(members) == 0:
continue
centroid_left = centroid_trajs[ci, :, :3]
centroid_right = centroid_trajs[ci, :, 3:]
dists = []
for mi in members:
t = trajectories[mi]
left_rs = resample_trajectory(t["left_tcp"], n_waypoints)
right_rs = resample_trajectory(t["right_tcp"], n_waypoints)
d_left = np.linalg.norm(left_rs - centroid_left, axis=1).mean()
d_right = np.linalg.norm(right_rs - centroid_right, axis=1).mean()
dists.append((d_left + d_right) / 2)
spread[ci] = np.mean(dists)
return labels, centroid_trajs, spread
# ── Visualization ───────────────────────────────────────
PROJ_VIEWS = [
("XZ (side)", 0, 2, "X (m)", "Z (m)"),
("XY (top)", 0, 1, "X (m)", "Y (m)"),
("YZ (front)", 1, 2, "Y (m)", "Z (m)"),
]
def render(results: list[dict], out_path: Path) -> None:
"""
2-row × 3-col grid per dataset (3 projections × 2 datasets).
Trajectory lines colored by cluster, centroid trajectories drawn thick.
"""
n_ds = len(results)
n_proj = len(PROJ_VIEWS)
fig, axes = plt.subplots(n_ds, n_proj, figsize=(7 * n_proj, 7 * n_ds), facecolor="#0d1117")
if n_ds == 1:
axes = axes[np.newaxis, :]
for row, r in enumerate(results):
trajectories = r["trajectories"]
labels = r["labels"]
centroids = r["centroids"]
k = centroids.shape[0]
cluster_sizes = np.bincount(labels, minlength=k)
size_order = np.argsort(-cluster_sizes)
pcts = cluster_sizes / len(labels) * 100
spread = r["spread"]
for col, (view_name, dim_a, dim_b, xlabel, ylabel) in enumerate(PROJ_VIEWS):
ax = axes[row, col]
ax.set_facecolor("#0d1117")
for ti, traj in enumerate(trajectories):
color = CLUSTER_COLORS[labels[ti] % len(CLUSTER_COLORS)]
for tcp_key in ("left_tcp", "right_tcp"):
pts = traj[tcp_key]
ax.plot(pts[:, dim_a], pts[:, dim_b], color=color, alpha=0.12, linewidth=0.4)
for ci in range(k):
color = CLUSTER_COLORS[ci % len(CLUSTER_COLORS)]
left_c = centroids[ci, :, :3]
right_c = centroids[ci, :, 3:]
lw = 1.5 + 2.0 * cluster_sizes[ci] / cluster_sizes.max()
for c_pts in (left_c, right_c):
ax.plot(
c_pts[:, dim_a],
c_pts[:, dim_b],
color=color,
linewidth=lw,
alpha=0.95,
zorder=10,
)
ax.plot(
c_pts[0, dim_a],
c_pts[0, dim_b],
"o",
color=color,
markersize=4,
zorder=11,
)
ax.plot(
c_pts[-1, dim_a],
c_pts[-1, dim_b],
"s",
color=color,
markersize=4,
zorder=11,
)
ax.set_xlabel(xlabel, color="#888", fontsize=9)
ax.set_ylabel(ylabel, color="#888", fontsize=9)
ax.tick_params(colors="#555", labelsize=7)
for spine in ax.spines.values():
spine.set_color("#333")
ax.set_aspect("equal")
mean_spread_cm = np.average(spread, weights=cluster_sizes) * 100
if col == 0:
ax.set_title(
f"{r['label']} ({r['n_episodes']:,} episodes, {k} clusters, "
f"avg spread {mean_spread_cm:.1f}cm)",
color="white",
fontsize=11,
pad=10,
)
else:
ax.set_title(view_name, color="#aaa", fontsize=10, pad=8)
# Cluster size + spread legend on the rightmost panel
legend_ax = axes[row, -1]
for ci in size_order:
color = CLUSTER_COLORS[ci % len(CLUSTER_COLORS)]
spread_cm = spread[ci] * 100
label = f"C{ci}: {cluster_sizes[ci]} eps ({pcts[ci]:.0f}%) ±{spread_cm:.1f}cm"
legend_ax.plot([], [], color=color, linewidth=3, label=label)
legend_ax.legend(
loc="upper right",
fontsize=7,
frameon=True,
facecolor="#1a1a2e",
edgecolor="#333",
labelcolor="white",
handlelength=1.5,
)
fig.suptitle(
"End-Effector Trajectory Clusters (FK · K-means)",
color="white",
fontsize=16,
y=0.98,
)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig(out_path, dpi=DPI, bbox_inches="tight", facecolor=fig.get_facecolor())
plt.close()
print(f"\n✓ Saved: {out_path}")
# ── Main ────────────────────────────────────────────────
def main() -> None:
results = []
for ds in DATASETS:
repo_id, label = ds["repo_id"], ds["label"]
print(f"\n{'=' * 60}")
print(f" {label}: {repo_id}")
print(f"{'=' * 60}")
local = download_data(repo_id)
trajectories = load_episode_trajectories(local)
labels, centroids, spread = cluster_trajectories(trajectories, N_CLUSTERS, WAYPOINTS)
cluster_sizes = np.bincount(labels, minlength=centroids.shape[0])
print(f" Cluster sizes: {sorted(cluster_sizes, reverse=True)}")
for ci in np.argsort(-cluster_sizes):
print(
f" C{ci}: {cluster_sizes[ci]} eps ({cluster_sizes[ci] / len(labels) * 100:.0f}%) "
f"spread ±{spread[ci] * 100:.1f}cm"
)
results.append(
{
"label": label,
"trajectories": trajectories,
"labels": labels,
"centroids": centroids,
"spread": spread,
"n_episodes": len(trajectories),
}
)
out = OUTPUT_DIR / "workspace_trajectory_clusters.jpg"
render(results, out)
if __name__ == "__main__":
main()

View File

@@ -14,8 +14,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from lerobot.datasets.feature_utils import hw_to_dataset_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.utils import hw_to_dataset_features
from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors
from lerobot.processor import make_default_processors from lerobot.processor import make_default_processors
@@ -78,24 +78,40 @@ def main():
listener, events = init_keyboard_listener() listener, events = init_keyboard_listener()
init_rerun(session_name="lekiwi_evaluate") init_rerun(session_name="lekiwi_evaluate")
try: if not robot.is_connected:
if not robot.is_connected: raise ValueError("Robot is not connected!")
raise ValueError("Robot is not connected!")
print("Starting evaluate loop...") print("Starting evaluate loop...")
recorded_episodes = 0 recorded_episodes = 0
while recorded_episodes < NUM_EPISODES and not events["stop_recording"]: while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
log_say(f"Running inference, recording eval episode {recorded_episodes} of {NUM_EPISODES}") log_say(f"Running inference, recording eval episode {recorded_episodes} of {NUM_EPISODES}")
# Main record loop # Main record loop
record_loop(
robot=robot,
events=events,
fps=FPS,
policy=policy,
preprocessor=preprocessor, # Pass the pre and post policy processors
postprocessor=postprocessor,
dataset=dataset,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=teleop_action_processor,
robot_action_processor=robot_action_processor,
robot_observation_processor=robot_observation_processor,
)
# Reset the environment if not stopping or re-recording
if not events["stop_recording"] and (
(recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
):
log_say("Reset the environment")
record_loop( record_loop(
robot=robot, robot=robot,
events=events, events=events,
fps=FPS, fps=FPS,
policy=policy,
preprocessor=preprocessor, # Pass the pre and post policy processors
postprocessor=postprocessor,
dataset=dataset,
control_time_s=EPISODE_TIME_SEC, control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION, single_task=TASK_DESCRIPTION,
display_data=True, display_data=True,
@@ -104,42 +120,24 @@ def main():
robot_observation_processor=robot_observation_processor, robot_observation_processor=robot_observation_processor,
) )
# Reset the environment if not stopping or re-recording if events["rerecord_episode"]:
if not events["stop_recording"] and ( log_say("Re-record episode")
(recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"] events["rerecord_episode"] = False
): events["exit_early"] = False
log_say("Reset the environment") dataset.clear_episode_buffer()
record_loop( continue
robot=robot,
events=events,
fps=FPS,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=teleop_action_processor,
robot_action_processor=robot_action_processor,
robot_observation_processor=robot_observation_processor,
)
if events["rerecord_episode"]: # Save episode
log_say("Re-record episode") dataset.save_episode()
events["rerecord_episode"] = False recorded_episodes += 1
events["exit_early"] = False
dataset.clear_episode_buffer()
continue
# Save episode # Clean up
dataset.save_episode() log_say("Stop recording")
recorded_episodes += 1 robot.disconnect()
listener.stop()
finally: dataset.finalize()
# Clean up dataset.push_to_hub()
log_say("Stop recording")
robot.disconnect()
listener.stop()
dataset.finalize()
dataset.push_to_hub()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -14,8 +14,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from lerobot.datasets.feature_utils import hw_to_dataset_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.utils import hw_to_dataset_features
from lerobot.processor import make_default_processors from lerobot.processor import make_default_processors
from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
@@ -74,23 +74,40 @@ def main():
listener, events = init_keyboard_listener() listener, events = init_keyboard_listener()
init_rerun(session_name="lekiwi_record") init_rerun(session_name="lekiwi_record")
try: if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected: raise ValueError("Robot or teleop is not connected!")
raise ValueError("Robot or teleop is not connected!")
print("Starting record loop...") print("Starting record loop...")
recorded_episodes = 0 recorded_episodes = 0
while recorded_episodes < NUM_EPISODES and not events["stop_recording"]: while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
log_say(f"Recording episode {recorded_episodes}") log_say(f"Recording episode {recorded_episodes}")
# Main record loop # Main record loop
record_loop(
robot=robot,
events=events,
fps=FPS,
dataset=dataset,
teleop=[leader_arm, keyboard],
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=teleop_action_processor,
robot_action_processor=robot_action_processor,
robot_observation_processor=robot_observation_processor,
)
# Reset the environment if not stopping or re-recording
if not events["stop_recording"] and (
(recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
):
log_say("Reset the environment")
record_loop( record_loop(
robot=robot, robot=robot,
events=events, events=events,
fps=FPS, fps=FPS,
dataset=dataset,
teleop=[leader_arm, keyboard], teleop=[leader_arm, keyboard],
control_time_s=EPISODE_TIME_SEC, control_time_s=RESET_TIME_SEC,
single_task=TASK_DESCRIPTION, single_task=TASK_DESCRIPTION,
display_data=True, display_data=True,
teleop_action_processor=teleop_action_processor, teleop_action_processor=teleop_action_processor,
@@ -98,44 +115,26 @@ def main():
robot_observation_processor=robot_observation_processor, robot_observation_processor=robot_observation_processor,
) )
# Reset the environment if not stopping or re-recording if events["rerecord_episode"]:
if not events["stop_recording"] and ( log_say("Re-record episode")
(recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"] events["rerecord_episode"] = False
): events["exit_early"] = False
log_say("Reset the environment") dataset.clear_episode_buffer()
record_loop( continue
robot=robot,
events=events,
fps=FPS,
teleop=[leader_arm, keyboard],
control_time_s=RESET_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=teleop_action_processor,
robot_action_processor=robot_action_processor,
robot_observation_processor=robot_observation_processor,
)
if events["rerecord_episode"]: # Save episode
log_say("Re-record episode") dataset.save_episode()
events["rerecord_episode"] = False recorded_episodes += 1
events["exit_early"] = False
dataset.clear_episode_buffer()
continue
# Save episode # Clean up
dataset.save_episode() log_say("Stop recording")
recorded_episodes += 1 robot.disconnect()
finally: leader_arm.disconnect()
# Clean up keyboard.disconnect()
log_say("Stop recording") listener.stop()
robot.disconnect()
leader_arm.disconnect()
keyboard.disconnect()
listener.stop()
dataset.finalize() dataset.finalize()
dataset.push_to_hub() dataset.push_to_hub()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -42,27 +42,25 @@ def main():
# Connect to the robot # Connect to the robot
robot.connect() robot.connect()
try: if not robot.is_connected:
if not robot.is_connected: raise ValueError("Robot is not connected!")
raise ValueError("Robot is not connected!")
print("Starting replay loop...") print("Starting replay loop...")
log_say(f"Replaying episode {EPISODE_IDX}") log_say(f"Replaying episode {EPISODE_IDX}")
for idx in range(len(episode_frames)): for idx in range(len(episode_frames)):
t0 = time.perf_counter() t0 = time.perf_counter()
# Get recorded action from dataset # Get recorded action from dataset
action = { action = {
name: float(actions[idx][ACTION][i]) name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"])
for i, name in enumerate(dataset.features[ACTION]["names"]) }
}
# Send action to robot # Send action to robot
_ = robot.send_action(action) _ = robot.send_action(action)
precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0)) precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
finally:
robot.disconnect() robot.disconnect()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -16,13 +16,15 @@
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.datasets.feature_utils import combine_feature_dicts
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from lerobot.datasets.utils import combine_feature_dicts
from lerobot.model.kinematics import RobotKinematics from lerobot.model.kinematics import RobotKinematics
from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors
from lerobot.processor import ( from lerobot.processor import (
RobotAction,
RobotObservation,
RobotProcessorPipeline, RobotProcessorPipeline,
make_default_teleop_action_processor, make_default_teleop_action_processor,
) )
@@ -38,7 +40,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
InverseKinematicsEEToJoints, InverseKinematicsEEToJoints,
) )
from lerobot.scripts.lerobot_record import record_loop from lerobot.scripts.lerobot_record import record_loop
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.control_utils import init_keyboard_listener from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun from lerobot.utils.visualization_utils import init_rerun
@@ -141,24 +142,38 @@ def main():
listener, events = init_keyboard_listener() listener, events = init_keyboard_listener()
init_rerun(session_name="phone_so100_evaluate") init_rerun(session_name="phone_so100_evaluate")
try: if not robot.is_connected:
if not robot.is_connected: raise ValueError("Robot is not connected!")
raise ValueError("Robot is not connected!")
print("Starting evaluate loop...") print("Starting evaluate loop...")
episode_idx = 0 episode_idx = 0
for episode_idx in range(NUM_EPISODES): for episode_idx in range(NUM_EPISODES):
log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}") log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
# Main record loop # Main record loop
record_loop(
robot=robot,
events=events,
fps=FPS,
policy=policy,
preprocessor=preprocessor, # Pass the pre and post policy processors
postprocessor=postprocessor,
dataset=dataset,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=make_default_teleop_action_processor(),
robot_action_processor=robot_ee_to_joints_processor,
robot_observation_processor=robot_joints_to_ee_pose_processor,
)
# Reset the environment if not stopping or re-recording
if not events["stop_recording"] and ((episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]):
log_say("Reset the environment")
record_loop( record_loop(
robot=robot, robot=robot,
events=events, events=events,
fps=FPS, fps=FPS,
policy=policy,
preprocessor=preprocessor, # Pass the pre and post policy processors
postprocessor=postprocessor,
dataset=dataset,
control_time_s=EPISODE_TIME_SEC, control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION, single_task=TASK_DESCRIPTION,
display_data=True, display_data=True,
@@ -167,41 +182,24 @@ def main():
robot_observation_processor=robot_joints_to_ee_pose_processor, robot_observation_processor=robot_joints_to_ee_pose_processor,
) )
# Reset the environment if not stopping or re-recording if events["rerecord_episode"]:
if not events["stop_recording"] and ( log_say("Re-record episode")
(episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"] events["rerecord_episode"] = False
): events["exit_early"] = False
log_say("Reset the environment") dataset.clear_episode_buffer()
record_loop( continue
robot=robot,
events=events,
fps=FPS,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=make_default_teleop_action_processor(),
robot_action_processor=robot_ee_to_joints_processor,
robot_observation_processor=robot_joints_to_ee_pose_processor,
)
if events["rerecord_episode"]: # Save episode
log_say("Re-record episode") dataset.save_episode()
events["rerecord_episode"] = False episode_idx += 1
events["exit_early"] = False
dataset.clear_episode_buffer()
continue
# Save episode # Clean up
dataset.save_episode() log_say("Stop recording")
episode_idx += 1 robot.disconnect()
finally: listener.stop()
# Clean up
log_say("Stop recording")
robot.disconnect()
listener.stop()
dataset.finalize() dataset.finalize()
dataset.push_to_hub() dataset.push_to_hub()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -15,11 +15,11 @@
# limitations under the License. # limitations under the License.
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.feature_utils import combine_feature_dicts
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from lerobot.datasets.utils import combine_feature_dicts
from lerobot.model.kinematics import RobotKinematics from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import ( from lerobot.processor.converters import (
observation_to_transition, observation_to_transition,
robot_action_observation_to_transition, robot_action_observation_to_transition,
@@ -38,7 +38,6 @@ from lerobot.scripts.lerobot_record import record_loop
from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
from lerobot.teleoperators.phone.teleop_phone import Phone from lerobot.teleoperators.phone.teleop_phone import Phone
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.control_utils import init_keyboard_listener from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun from lerobot.utils.visualization_utils import init_rerun
@@ -150,23 +149,38 @@ def main():
listener, events = init_keyboard_listener() listener, events = init_keyboard_listener()
init_rerun(session_name="phone_so100_record") init_rerun(session_name="phone_so100_record")
try: if not robot.is_connected or not phone.is_connected:
if not robot.is_connected or not phone.is_connected: raise ValueError("Robot or teleop is not connected!")
raise ValueError("Robot or teleop is not connected!")
print("Starting record loop. Move your phone to teleoperate the robot...") print("Starting record loop. Move your phone to teleoperate the robot...")
episode_idx = 0 episode_idx = 0
while episode_idx < NUM_EPISODES and not events["stop_recording"]: while episode_idx < NUM_EPISODES and not events["stop_recording"]:
log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}") log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
# Main record loop # Main record loop
record_loop(
robot=robot,
events=events,
fps=FPS,
teleop=phone,
dataset=dataset,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=phone_to_robot_ee_pose_processor,
robot_action_processor=robot_ee_to_joints_processor,
robot_observation_processor=robot_joints_to_ee_pose,
)
# Reset the environment if not stopping or re-recording
if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]):
log_say("Reset the environment")
record_loop( record_loop(
robot=robot, robot=robot,
events=events, events=events,
fps=FPS, fps=FPS,
teleop=phone, teleop=phone,
dataset=dataset, control_time_s=RESET_TIME_SEC,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION, single_task=TASK_DESCRIPTION,
display_data=True, display_data=True,
teleop_action_processor=phone_to_robot_ee_pose_processor, teleop_action_processor=phone_to_robot_ee_pose_processor,
@@ -174,43 +188,25 @@ def main():
robot_observation_processor=robot_joints_to_ee_pose, robot_observation_processor=robot_joints_to_ee_pose,
) )
# Reset the environment if not stopping or re-recording if events["rerecord_episode"]:
if not events["stop_recording"] and ( log_say("Re-recording episode")
episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"] events["rerecord_episode"] = False
): events["exit_early"] = False
log_say("Reset the environment") dataset.clear_episode_buffer()
record_loop( continue
robot=robot,
events=events,
fps=FPS,
teleop=phone,
control_time_s=RESET_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=phone_to_robot_ee_pose_processor,
robot_action_processor=robot_ee_to_joints_processor,
robot_observation_processor=robot_joints_to_ee_pose,
)
if events["rerecord_episode"]: # Save episode
log_say("Re-recording episode") dataset.save_episode()
events["rerecord_episode"] = False episode_idx += 1
events["exit_early"] = False
dataset.clear_episode_buffer()
continue
# Save episode # Clean up
dataset.save_episode() log_say("Stop recording")
episode_idx += 1 robot.disconnect()
finally: phone.disconnect()
# Clean up listener.stop()
log_say("Stop recording")
robot.disconnect()
phone.disconnect()
listener.stop()
dataset.finalize() dataset.finalize()
dataset.push_to_hub() dataset.push_to_hub()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -18,7 +18,7 @@ import time
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.model.kinematics import RobotKinematics from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import ( from lerobot.processor.converters import (
robot_action_observation_to_transition, robot_action_observation_to_transition,
transition_to_robot_action, transition_to_robot_action,
@@ -27,7 +27,6 @@ from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
from lerobot.robots.so_follower.robot_kinematic_processor import ( from lerobot.robots.so_follower.robot_kinematic_processor import (
InverseKinematicsEEToJoints, InverseKinematicsEEToJoints,
) )
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.constants import ACTION from lerobot.utils.constants import ACTION
from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.robot_utils import precise_sleep
from lerobot.utils.utils import log_say from lerobot.utils.utils import log_say
@@ -74,34 +73,32 @@ def main():
# Connect to the robot # Connect to the robot
robot.connect() robot.connect()
try: if not robot.is_connected:
if not robot.is_connected: raise ValueError("Robot is not connected!")
raise ValueError("Robot is not connected!")
print("Starting replay loop...") print("Starting replay loop...")
log_say(f"Replaying episode {EPISODE_IDX}") log_say(f"Replaying episode {EPISODE_IDX}")
for idx in range(len(episode_frames)): for idx in range(len(episode_frames)):
t0 = time.perf_counter() t0 = time.perf_counter()
# Get recorded action from dataset # Get recorded action from dataset
ee_action = { ee_action = {
name: float(actions[idx][ACTION][i]) name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"])
for i, name in enumerate(dataset.features[ACTION]["names"]) }
}
# Get robot observation # Get robot observation
robot_obs = robot.get_observation() robot_obs = robot.get_observation()
# Dataset EE -> robot joints # Dataset EE -> robot joints
joint_action = robot_ee_to_joints_processor((ee_action, robot_obs)) joint_action = robot_ee_to_joints_processor((ee_action, robot_obs))
# Send action to robot # Send action to robot
_ = robot.send_action(joint_action) _ = robot.send_action(joint_action)
precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0)) precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
finally:
# Clean up # Clean up
robot.disconnect() robot.disconnect()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -16,7 +16,7 @@
import time import time
from lerobot.model.kinematics import RobotKinematics from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import ( from lerobot.processor.converters import (
robot_action_observation_to_transition, robot_action_observation_to_transition,
transition_to_robot_action, transition_to_robot_action,
@@ -31,7 +31,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
from lerobot.teleoperators.phone.teleop_phone import Phone from lerobot.teleoperators.phone.teleop_phone import Phone
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.robot_utils import precise_sleep
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data from lerobot.utils.visualization_utils import init_rerun, log_rerun_data

View File

@@ -22,8 +22,7 @@ from pathlib import Path
import numpy as np import numpy as np
import tensorflow_datasets as tfds import tensorflow_datasets as tfds
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds from lerobot.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds
DROID_SHARDS = 2048 DROID_SHARDS = 2048

View File

@@ -26,7 +26,7 @@ from huggingface_hub import HfApi
from huggingface_hub.constants import REPOCARD_NAME from huggingface_hub.constants import REPOCARD_NAME
from port_droid import DROID_SHARDS from port_droid import DROID_SHARDS
from lerobot.datasets.dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDatasetMetadata
from lerobot.datasets.utils import create_lerobot_dataset_card from lerobot.datasets.utils import create_lerobot_dataset_card
from lerobot.utils.utils import init_logging from lerobot.utils.utils import init_logging
@@ -155,7 +155,7 @@ class UploadDataset(PipelineStep):
from datasets.utils.tqdm import disable_progress_bars from datasets.utils.tqdm import disable_progress_bars
from huggingface_hub import CommitOperationAdd, preupload_lfs_files from huggingface_hub import CommitOperationAdd, preupload_lfs_files
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
from lerobot.utils.utils import init_logging from lerobot.utils.utils import init_logging
init_logging() init_logging()

View File

@@ -27,8 +27,8 @@ measuring consistency and ground truth alignment.
Usage: Usage:
# Basic usage with smolvla policy # Basic usage with smolvla policy
uv run python examples/rtc/eval_dataset.py \ uv run python examples/rtc/eval_dataset.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \ --policy.path=helper2424/smolvla_check_rtc_last3 \
--dataset.repo_id=<USER>/check_rtc \ --dataset.repo_id=helper2424/check_rtc \
--rtc.execution_horizon=8 \ --rtc.execution_horizon=8 \
--device=mps \ --device=mps \
--rtc.max_guidance_weight=10.0 \ --rtc.max_guidance_weight=10.0 \
@@ -58,16 +58,16 @@ Usage:
--device=cuda --device=cuda
uv run python examples/rtc/eval_dataset.py \ uv run python examples/rtc/eval_dataset.py \
--policy.path=<USER>/reuben_pi0 \ --policy.path=lipsop/reuben_pi0 \
--dataset.repo_id=<USER>/so101_cube_in_cup \ --dataset.repo_id=ReubenLim/so101_cube_in_cup \
--rtc.execution_horizon=8 \ --rtc.execution_horizon=8 \
--device=cuda --device=cuda
# With torch.compile for faster inference (PyTorch 2.0+) # With torch.compile for faster inference (PyTorch 2.0+)
# Note: CUDA graphs disabled by default due to in-place ops in denoising loop # Note: CUDA graphs disabled by default due to in-place ops in denoising loop
uv run python examples/rtc/eval_dataset.py \ uv run python examples/rtc/eval_dataset.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \ --policy.path=helper2424/smolvla_check_rtc_last3 \
--dataset.repo_id=<USER>/check_rtc \ --dataset.repo_id=helper2424/check_rtc \
--rtc.execution_horizon=8 \ --rtc.execution_horizon=8 \
--device=mps \ --device=mps \
--use_torch_compile=true \ --use_torch_compile=true \
@@ -75,8 +75,8 @@ Usage:
# With torch.compile on CUDA (CUDA graphs disabled by default) # With torch.compile on CUDA (CUDA graphs disabled by default)
uv run python examples/rtc/eval_dataset.py \ uv run python examples/rtc/eval_dataset.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \ --policy.path=helper2424/smolvla_check_rtc_last3 \
--dataset.repo_id=<USER>/check_rtc \ --dataset.repo_id=helper2424/check_rtc \
--rtc.execution_horizon=8 \ --rtc.execution_horizon=8 \
--device=cuda \ --device=cuda \
--use_torch_compile=true \ --use_torch_compile=true \
@@ -84,8 +84,8 @@ Usage:
# Enable CUDA graphs (advanced - may cause tensor aliasing errors) # Enable CUDA graphs (advanced - may cause tensor aliasing errors)
uv run python examples/rtc/eval_dataset.py \ uv run python examples/rtc/eval_dataset.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \ --policy.path=helper2424/smolvla_check_rtc_last3 \
--dataset.repo_id=<USER>/check_rtc \ --dataset.repo_id=helper2424/check_rtc \
--use_torch_compile=true \ --use_torch_compile=true \
--torch_compile_backend=inductor \ --torch_compile_backend=inductor \
--torch_compile_mode=max-autotune \ --torch_compile_mode=max-autotune \
@@ -113,9 +113,8 @@ from lerobot.configs import parser
from lerobot.configs.default import DatasetConfig from lerobot.configs.default import DatasetConfig
from lerobot.configs.policies import PreTrainedConfig from lerobot.configs.policies import PreTrainedConfig
from lerobot.configs.types import RTCAttentionSchedule from lerobot.configs.types import RTCAttentionSchedule
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.factory import resolve_delta_timestamps from lerobot.datasets.factory import resolve_delta_timestamps
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.policies.factory import get_policy_class, make_pre_post_processors from lerobot.policies.factory import get_policy_class, make_pre_post_processors
from lerobot.policies.rtc.configuration_rtc import RTCConfig from lerobot.policies.rtc.configuration_rtc import RTCConfig
from lerobot.policies.rtc.debug_visualizer import RTCDebugVisualizer from lerobot.policies.rtc.debug_visualizer import RTCDebugVisualizer

View File

@@ -28,7 +28,7 @@ For simulation environments, see eval_with_simulation.py
Usage: Usage:
# Run RTC with Real robot with RTC # Run RTC with Real robot with RTC
uv run examples/rtc/eval_with_real_robot.py \ uv run examples/rtc/eval_with_real_robot.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \ --policy.path=helper2424/smolvla_check_rtc_last3 \
--policy.device=mps \ --policy.device=mps \
--rtc.enabled=true \ --rtc.enabled=true \
--rtc.execution_horizon=20 \ --rtc.execution_horizon=20 \
@@ -41,7 +41,7 @@ Usage:
# Run RTC with Real robot without RTC # Run RTC with Real robot without RTC
uv run examples/rtc/eval_with_real_robot.py \ uv run examples/rtc/eval_with_real_robot.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \ --policy.path=helper2424/smolvla_check_rtc_last3 \
--policy.device=mps \ --policy.device=mps \
--rtc.enabled=false \ --rtc.enabled=false \
--robot.type=so100_follower \ --robot.type=so100_follower \
@@ -53,7 +53,7 @@ Usage:
# Run RTC with Real robot with pi0.5 policy # Run RTC with Real robot with pi0.5 policy
uv run examples/rtc/eval_with_real_robot.py \ uv run examples/rtc/eval_with_real_robot.py \
--policy.path=<USER>/pi05_check_rtc \ --policy.path=helper2424/pi05_check_rtc \
--policy.device=mps \ --policy.device=mps \
--rtc.enabled=true \ --rtc.enabled=true \
--rtc.execution_horizon=20 \ --rtc.execution_horizon=20 \
@@ -78,11 +78,10 @@ from torch import Tensor
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig # noqa: F401 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig # noqa: F401
from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig # noqa: F401 from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig # noqa: F401
from lerobot.cameras.zmq.configuration_zmq import ZMQCameraConfig # noqa: F401
from lerobot.configs import parser from lerobot.configs import parser
from lerobot.configs.policies import PreTrainedConfig from lerobot.configs.policies import PreTrainedConfig
from lerobot.configs.types import RTCAttentionSchedule from lerobot.configs.types import RTCAttentionSchedule
from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features
from lerobot.policies.factory import get_policy_class, make_pre_post_processors from lerobot.policies.factory import get_policy_class, make_pre_post_processors
from lerobot.policies.rtc.action_queue import ActionQueue from lerobot.policies.rtc.action_queue import ActionQueue
from lerobot.policies.rtc.configuration_rtc import RTCConfig from lerobot.policies.rtc.configuration_rtc import RTCConfig
@@ -98,7 +97,6 @@ from lerobot.robots import ( # noqa: F401
bi_so_follower, bi_so_follower,
koch_follower, koch_follower,
so_follower, so_follower,
unitree_g1,
) )
from lerobot.robots.utils import make_robot_from_config from lerobot.robots.utils import make_robot_from_config
from lerobot.utils.constants import OBS_IMAGES from lerobot.utils.constants import OBS_IMAGES

View File

@@ -16,13 +16,15 @@
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.datasets.feature_utils import combine_feature_dicts
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from lerobot.datasets.utils import combine_feature_dicts
from lerobot.model.kinematics import RobotKinematics from lerobot.model.kinematics import RobotKinematics
from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors
from lerobot.processor import ( from lerobot.processor import (
RobotAction,
RobotObservation,
RobotProcessorPipeline, RobotProcessorPipeline,
make_default_teleop_action_processor, make_default_teleop_action_processor,
) )
@@ -38,7 +40,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
InverseKinematicsEEToJoints, InverseKinematicsEEToJoints,
) )
from lerobot.scripts.lerobot_record import record_loop from lerobot.scripts.lerobot_record import record_loop
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.control_utils import init_keyboard_listener from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun from lerobot.utils.visualization_utils import init_rerun
@@ -141,24 +142,38 @@ def main():
listener, events = init_keyboard_listener() listener, events = init_keyboard_listener()
init_rerun(session_name="so100_so100_evaluate") init_rerun(session_name="so100_so100_evaluate")
try: if not robot.is_connected:
if not robot.is_connected: raise ValueError("Robot is not connected!")
raise ValueError("Robot is not connected!")
print("Starting evaluate loop...") print("Starting evaluate loop...")
episode_idx = 0 episode_idx = 0
for episode_idx in range(NUM_EPISODES): for episode_idx in range(NUM_EPISODES):
log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}") log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
# Main record loop # Main record loop
record_loop(
robot=robot,
events=events,
fps=FPS,
policy=policy,
preprocessor=preprocessor, # Pass the pre and post policy processors
postprocessor=postprocessor,
dataset=dataset,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=make_default_teleop_action_processor(),
robot_action_processor=robot_ee_to_joints_processor,
robot_observation_processor=robot_joints_to_ee_pose_processor,
)
# Reset the environment if not stopping or re-recording
if not events["stop_recording"] and ((episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]):
log_say("Reset the environment")
record_loop( record_loop(
robot=robot, robot=robot,
events=events, events=events,
fps=FPS, fps=FPS,
policy=policy,
preprocessor=preprocessor, # Pass the pre and post policy processors
postprocessor=postprocessor,
dataset=dataset,
control_time_s=EPISODE_TIME_SEC, control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION, single_task=TASK_DESCRIPTION,
display_data=True, display_data=True,
@@ -167,41 +182,24 @@ def main():
robot_observation_processor=robot_joints_to_ee_pose_processor, robot_observation_processor=robot_joints_to_ee_pose_processor,
) )
# Reset the environment if not stopping or re-recording if events["rerecord_episode"]:
if not events["stop_recording"] and ( log_say("Re-record episode")
(episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"] events["rerecord_episode"] = False
): events["exit_early"] = False
log_say("Reset the environment") dataset.clear_episode_buffer()
record_loop( continue
robot=robot,
events=events,
fps=FPS,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=make_default_teleop_action_processor(),
robot_action_processor=robot_ee_to_joints_processor,
robot_observation_processor=robot_joints_to_ee_pose_processor,
)
if events["rerecord_episode"]: # Save episode
log_say("Re-record episode") dataset.save_episode()
events["rerecord_episode"] = False episode_idx += 1
events["exit_early"] = False
dataset.clear_episode_buffer()
continue
# Save episode # Clean up
dataset.save_episode() log_say("Stop recording")
episode_idx += 1 robot.disconnect()
finally: listener.stop()
# Clean up
log_say("Stop recording")
robot.disconnect()
listener.stop()
dataset.finalize() dataset.finalize()
dataset.push_to_hub() dataset.push_to_hub()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -16,11 +16,11 @@
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.feature_utils import combine_feature_dicts
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from lerobot.datasets.utils import combine_feature_dicts
from lerobot.model.kinematics import RobotKinematics from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import ( from lerobot.processor.converters import (
observation_to_transition, observation_to_transition,
robot_action_observation_to_transition, robot_action_observation_to_transition,
@@ -35,7 +35,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
) )
from lerobot.scripts.lerobot_record import record_loop from lerobot.scripts.lerobot_record import record_loop
from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.control_utils import init_keyboard_listener from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun from lerobot.utils.visualization_utils import init_rerun
@@ -147,23 +146,38 @@ def main():
listener, events = init_keyboard_listener() listener, events = init_keyboard_listener()
init_rerun(session_name="recording_phone") init_rerun(session_name="recording_phone")
try: if not leader.is_connected or not follower.is_connected:
if not leader.is_connected or not follower.is_connected: raise ValueError("Robot or teleop is not connected!")
raise ValueError("Robot or teleop is not connected!")
print("Starting record loop...") print("Starting record loop...")
episode_idx = 0 episode_idx = 0
while episode_idx < NUM_EPISODES and not events["stop_recording"]: while episode_idx < NUM_EPISODES and not events["stop_recording"]:
log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}") log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
# Main record loop # Main record loop
record_loop(
robot=follower,
events=events,
fps=FPS,
teleop=leader,
dataset=dataset,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=leader_joints_to_ee,
robot_action_processor=ee_to_follower_joints,
robot_observation_processor=follower_joints_to_ee,
)
# Reset the environment if not stopping or re-recording
if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]):
log_say("Reset the environment")
record_loop( record_loop(
robot=follower, robot=follower,
events=events, events=events,
fps=FPS, fps=FPS,
teleop=leader, teleop=leader,
dataset=dataset, control_time_s=RESET_TIME_SEC,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION, single_task=TASK_DESCRIPTION,
display_data=True, display_data=True,
teleop_action_processor=leader_joints_to_ee, teleop_action_processor=leader_joints_to_ee,
@@ -171,44 +185,25 @@ def main():
robot_observation_processor=follower_joints_to_ee, robot_observation_processor=follower_joints_to_ee,
) )
# Reset the environment if not stopping or re-recording if events["rerecord_episode"]:
if not events["stop_recording"] and ( log_say("Re-recording episode")
episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"] events["rerecord_episode"] = False
): events["exit_early"] = False
log_say("Reset the environment") dataset.clear_episode_buffer()
record_loop( continue
robot=follower,
events=events,
fps=FPS,
teleop=leader,
control_time_s=RESET_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=leader_joints_to_ee,
robot_action_processor=ee_to_follower_joints,
robot_observation_processor=follower_joints_to_ee,
)
if events["rerecord_episode"]: # Save episode
log_say("Re-recording episode") dataset.save_episode()
events["rerecord_episode"] = False episode_idx += 1
events["exit_early"] = False
dataset.clear_episode_buffer()
continue
# Save episode # Clean up
dataset.save_episode() log_say("Stop recording")
episode_idx += 1 leader.disconnect()
follower.disconnect()
listener.stop()
finally: dataset.finalize()
# Clean up dataset.push_to_hub()
log_say("Stop recording")
leader.disconnect()
follower.disconnect()
listener.stop()
dataset.finalize()
dataset.push_to_hub()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -19,7 +19,7 @@ import time
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.model.kinematics import RobotKinematics from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import ( from lerobot.processor.converters import (
robot_action_observation_to_transition, robot_action_observation_to_transition,
transition_to_robot_action, transition_to_robot_action,
@@ -28,7 +28,6 @@ from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
from lerobot.robots.so_follower.robot_kinematic_processor import ( from lerobot.robots.so_follower.robot_kinematic_processor import (
InverseKinematicsEEToJoints, InverseKinematicsEEToJoints,
) )
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.constants import ACTION from lerobot.utils.constants import ACTION
from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.robot_utils import precise_sleep
from lerobot.utils.utils import log_say from lerobot.utils.utils import log_say
@@ -75,35 +74,32 @@ def main():
# Connect to the robot # Connect to the robot
robot.connect() robot.connect()
try: if not robot.is_connected:
if not robot.is_connected: raise ValueError("Robot is not connected!")
raise ValueError("Robot is not connected!")
print("Starting replay loop...") print("Starting replay loop...")
log_say(f"Replaying episode {EPISODE_IDX}") log_say(f"Replaying episode {EPISODE_IDX}")
for idx in range(len(episode_frames)): for idx in range(len(episode_frames)):
t0 = time.perf_counter() t0 = time.perf_counter()
# Get recorded action from dataset # Get recorded action from dataset
ee_action = { ee_action = {
name: float(actions[idx][ACTION][i]) name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"])
for i, name in enumerate(dataset.features[ACTION]["names"]) }
}
# Get robot observation # Get robot observation
robot_obs = robot.get_observation() robot_obs = robot.get_observation()
# Dataset EE -> robot joints # Dataset EE -> robot joints
joint_action = robot_ee_to_joints_processor((ee_action, robot_obs)) joint_action = robot_ee_to_joints_processor((ee_action, robot_obs))
# Send action to robot # Send action to robot
_ = robot.send_action(joint_action) _ = robot.send_action(joint_action)
precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0)) precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
finally: # Clean up
# Clean up robot.disconnect()
robot.disconnect()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -17,7 +17,7 @@
import time import time
from lerobot.model.kinematics import RobotKinematics from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import ( from lerobot.processor.converters import (
robot_action_observation_to_transition, robot_action_observation_to_transition,
robot_action_to_transition, robot_action_to_transition,
@@ -30,7 +30,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
InverseKinematicsEEToJoints, InverseKinematicsEEToJoints,
) )
from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.robot_utils import precise_sleep
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data from lerobot.utils.visualization_utils import init_rerun, log_rerun_data

View File

@@ -19,9 +19,8 @@ from pathlib import Path
import torch import torch
from lerobot.configs.types import FeatureType from lerobot.configs.types import FeatureType
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.feature_utils import dataset_to_policy_features from lerobot.datasets.utils import dataset_to_policy_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors

View File

@@ -20,9 +20,9 @@ from pathlib import Path
import torch import torch
from lerobot.configs.types import FeatureType from lerobot.configs.types import FeatureType
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
from lerobot.datasets.feature_utils import dataset_to_policy_features
from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset
from lerobot.datasets.utils import dataset_to_policy_features
from lerobot.policies.act.configuration_act import ACTConfig from lerobot.policies.act.configuration_act import ACTConfig
from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors

View File

@@ -5,9 +5,8 @@ from pathlib import Path
import torch import torch
from lerobot.configs.types import FeatureType from lerobot.configs.types import FeatureType
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.feature_utils import dataset_to_policy_features from lerobot.datasets.utils import dataset_to_policy_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.policies.act.configuration_act import ACTConfig from lerobot.policies.act.configuration_act import ACTConfig
from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors

View File

@@ -1,7 +1,7 @@
import torch import torch
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors
from lerobot.policies.utils import build_inference_frame, make_robot_action from lerobot.policies.utils import build_inference_frame, make_robot_action

View File

@@ -5,9 +5,8 @@ from pathlib import Path
import torch import torch
from lerobot.configs.types import FeatureType from lerobot.configs.types import FeatureType
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.feature_utils import dataset_to_policy_features from lerobot.datasets.utils import dataset_to_policy_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors

View File

@@ -1,7 +1,7 @@
import torch import torch
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors
from lerobot.policies.utils import build_inference_frame, make_robot_action from lerobot.policies.utils import build_inference_frame, make_robot_action

View File

@@ -1,7 +1,7 @@
import torch import torch
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.feature_utils import hw_to_dataset_features from lerobot.datasets.utils import hw_to_dataset_features
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors
from lerobot.policies.pi0.modeling_pi0 import PI0Policy from lerobot.policies.pi0.modeling_pi0 import PI0Policy
from lerobot.policies.utils import build_inference_frame, make_robot_action from lerobot.policies.utils import build_inference_frame, make_robot_action

View File

@@ -6,8 +6,8 @@ from queue import Empty, Full
import torch import torch
import torch.optim as optim import torch.optim as optim
from lerobot.datasets.feature_utils import hw_to_dataset_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.utils import hw_to_dataset_features
from lerobot.envs.configs import HILSerlProcessorConfig, HILSerlRobotEnvConfig from lerobot.envs.configs import HILSerlProcessorConfig, HILSerlRobotEnvConfig
from lerobot.policies.sac.configuration_sac import SACConfig from lerobot.policies.sac.configuration_sac import SACConfig
from lerobot.policies.sac.modeling_sac import SACPolicy from lerobot.policies.sac.modeling_sac import SACPolicy

View File

@@ -1,7 +1,7 @@
import torch import torch
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.feature_utils import hw_to_dataset_features from lerobot.datasets.utils import hw_to_dataset_features
from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.factory import make_pre_post_processors
from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy
from lerobot.policies.utils import build_inference_frame, make_robot_action from lerobot.policies.utils import build_inference_frame, make_robot_action

View File

@@ -14,20 +14,20 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import argparse
import logging import logging
import time
from collections import deque from collections import deque
import numpy as np import numpy as np
import onnxruntime as ort import onnxruntime as ort
from huggingface_hub import hf_hub_download from huggingface_hub import hf_hub_download
from lerobot.robots.unitree_g1.g1_utils import ( from lerobot.robots.unitree_g1.config_unitree_g1 import UnitreeG1Config
REMOTE_AXES, from lerobot.robots.unitree_g1.g1_utils import G1_29_JointIndex
REMOTE_BUTTONS, from lerobot.robots.unitree_g1.unitree_g1 import UnitreeG1
G1_29_JointIndex,
get_gravity_orientation,
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -36,13 +36,18 @@ GROOT_DEFAULT_ANGLES[[0, 6]] = -0.1 # Hip pitch
GROOT_DEFAULT_ANGLES[[3, 9]] = 0.3 # Knee GROOT_DEFAULT_ANGLES[[3, 9]] = 0.3 # Knee
GROOT_DEFAULT_ANGLES[[4, 10]] = -0.2 # Ankle pitch GROOT_DEFAULT_ANGLES[[4, 10]] = -0.2 # Ankle pitch
MISSING_JOINTS = []
G1_MODEL = "g1_23" # Or "g1_29"
if G1_MODEL == "g1_23":
MISSING_JOINTS = [12, 14, 20, 21, 27, 28] # Waist yaw/pitch, wrist pitch/yaw
# Control parameters # Control parameters
ACTION_SCALE = 0.25 ACTION_SCALE = 0.25
CONTROL_DT = 0.02 # 50Hz CONTROL_DT = 0.02 # 50Hz
ANG_VEL_SCALE: float = 0.25 ANG_VEL_SCALE: float = 0.25
DOF_POS_SCALE: float = 1.0 DOF_POS_SCALE: float = 1.0
DOF_VEL_SCALE: float = 0.05 DOF_VEL_SCALE: float = 0.05
CMD_SCALE: list[float] = [2.0, 2.0, 0.25] CMD_SCALE: list = [2.0, 2.0, 0.25]
DEFAULT_GROOT_REPO_ID = "nepyope/GR00T-WholeBodyControl_g1" DEFAULT_GROOT_REPO_ID = "nepyope/GR00T-WholeBodyControl_g1"
@@ -80,11 +85,11 @@ def load_groot_policies(
class GrootLocomotionController: class GrootLocomotionController:
"""GR00T lower-body locomotion controller for the Unitree G1.""" """GR00T lower-body locomotion controller for the Unitree G1."""
control_dt = CONTROL_DT # Expose for unitree_g1.py def __init__(self, policy_balance, policy_walk, robot, config):
self.policy_balance = policy_balance
def __init__(self): self.policy_walk = policy_walk
# Load policies self.robot = robot
self.policy_balance, self.policy_walk = load_groot_policies() self.config = config
self.cmd = np.array([0.0, 0.0, 0.0], dtype=np.float32) # vx, vy, theta_dot self.cmd = np.array([0.0, 0.0, 0.0], dtype=np.float32) # vx, vy, theta_dot
@@ -104,60 +109,45 @@ class GrootLocomotionController:
logger.info("GrootLocomotionController initialized") logger.info("GrootLocomotionController initialized")
def reset(self) -> None: def run_step(self):
"""Reset internal state for a new episode.""" # Get current observation
self.cmd[:] = 0.0 obs = self.robot.get_observation()
self.groot_qj_all[:] = 0.0
self.groot_dqj_all[:] = 0.0
self.groot_action[:] = 0.0
self.groot_obs_single[:] = 0.0
self.groot_obs_stacked[:] = 0.0
self.groot_height_cmd = 0.74
self.groot_orientation_cmd[:] = 0.0
self.groot_obs_history.clear()
for _ in range(6):
self.groot_obs_history.append(np.zeros(86, dtype=np.float32))
def run_step(self, action: dict, lowstate) -> dict: if not obs:
"""Run one step of the locomotion controller. return
Args: # Get command from remote controller
action: Action dict containing remote.lx/ly/rx/ry and buttons if obs["remote.buttons"][0]: # R1 - raise waist
lowstate: Robot lowstate containing motor positions/velocities and IMU
Returns:
Action dict for lower body joints (0-14)
"""
if lowstate is None:
return {}
buttons = [int(action.get(k, 0)) for k in REMOTE_BUTTONS]
if buttons[0]: # R1 - raise waist
self.groot_height_cmd += 0.001 self.groot_height_cmd += 0.001
self.groot_height_cmd = np.clip(self.groot_height_cmd, 0.50, 1.00) self.groot_height_cmd = np.clip(self.groot_height_cmd, 0.50, 1.00)
if buttons[4]: # R2 - lower waist if obs["remote.buttons"][4]: # R2 - lower waist
self.groot_height_cmd -= 0.001 self.groot_height_cmd -= 0.001
self.groot_height_cmd = np.clip(self.groot_height_cmd, 0.50, 1.00) self.groot_height_cmd = np.clip(self.groot_height_cmd, 0.50, 1.00)
lx, ly, rx, _ry = (action.get(k, 0.0) for k in REMOTE_AXES) self.cmd[0] = obs["remote.ly"] # Forward/backward
self.cmd[0] = ly # Forward/backward self.cmd[1] = obs["remote.lx"] * -1 # Left/right
self.cmd[1] = -lx # Left/right (negated) self.cmd[2] = obs["remote.rx"] * -1 # Rotation rate
self.cmd[2] = -rx # Rotation rate (negated)
# Get joint positions and velocities from lowstate # Get joint positions and velocities from flat dict
for motor in G1_29_JointIndex: for motor in G1_29_JointIndex:
name = motor.name
idx = motor.value idx = motor.value
self.groot_qj_all[idx] = lowstate.motor_state[idx].q self.groot_qj_all[idx] = obs[f"{name}.q"]
self.groot_dqj_all[idx] = lowstate.motor_state[idx].dq self.groot_dqj_all[idx] = obs[f"{name}.dq"]
# Adapt observation for g1_23dof
for idx in MISSING_JOINTS:
self.groot_qj_all[idx] = 0.0
self.groot_dqj_all[idx] = 0.0
# Scale joint positions and velocities # Scale joint positions and velocities
qj_obs = self.groot_qj_all.copy() qj_obs = self.groot_qj_all.copy()
dqj_obs = self.groot_dqj_all.copy() dqj_obs = self.groot_dqj_all.copy()
# Express IMU data in gravity frame of reference # Express IMU data in gravity frame of reference
quat = lowstate.imu_state.quaternion quat = [obs["imu.quat.w"], obs["imu.quat.x"], obs["imu.quat.y"], obs["imu.quat.z"]]
ang_vel = np.array(lowstate.imu_state.gyroscope, dtype=np.float32) ang_vel = np.array([obs["imu.gyro.x"], obs["imu.gyro.y"], obs["imu.gyro.z"]], dtype=np.float32)
gravity_orientation = get_gravity_orientation(quat) gravity_orientation = self.robot.get_gravity_orientation(quat)
# Scale joint positions and velocities before policy inference # Scale joint positions and velocities before policy inference
qj_obs = (qj_obs - GROOT_DEFAULT_ANGLES) * DOF_POS_SCALE qj_obs = (qj_obs - GROOT_DEFAULT_ANGLES) * DOF_POS_SCALE
@@ -196,10 +186,73 @@ class GrootLocomotionController:
# Transform action back to target joint positions # Transform action back to target joint positions
target_dof_pos_15 = GROOT_DEFAULT_ANGLES[:15] + self.groot_action * ACTION_SCALE target_dof_pos_15 = GROOT_DEFAULT_ANGLES[:15] + self.groot_action * ACTION_SCALE
# Build action dict # Build action dict (only first 15 joints for GR00T)
action_dict = {} action_dict = {}
for i in range(15): for i in range(15):
motor_name = G1_29_JointIndex(i).name motor_name = G1_29_JointIndex(i).name
action_dict[f"{motor_name}.q"] = float(target_dof_pos_15[i]) action_dict[f"{motor_name}.q"] = float(target_dof_pos_15[i])
return action_dict # Zero out missing joints for g1_23dof
for joint_idx in MISSING_JOINTS:
motor_name = G1_29_JointIndex(joint_idx).name
action_dict[f"{motor_name}.q"] = 0.0
# Send action to robot
self.robot.send_action(action_dict)
def run(repo_id: str = DEFAULT_GROOT_REPO_ID) -> None:
"""Main function to run the GR00T locomotion controller.
Args:
repo_id: Hugging Face Hub repository ID for GR00T policies.
"""
# Load policies
policy_balance, policy_walk = load_groot_policies(repo_id=repo_id)
# Initialize robot
config = UnitreeG1Config()
robot = UnitreeG1(config)
robot.connect()
# Initialize gr00T locomotion controller
groot_controller = GrootLocomotionController(
policy_balance=policy_balance,
policy_walk=policy_walk,
robot=robot,
config=config,
)
try:
robot.reset(CONTROL_DT, GROOT_DEFAULT_ANGLES)
logger.info("Use joystick: LY=fwd/back, LX=left/right, RX=rotate, R1=raise waist, R2=lower waist")
logger.info("Press Ctrl+C to stop")
# Run step
while not robot._shutdown_event.is_set():
start_time = time.time()
groot_controller.run_step()
elapsed = time.time() - start_time
sleep_time = max(0, CONTROL_DT - elapsed)
time.sleep(sleep_time)
except KeyboardInterrupt:
logger.info("Stopping locomotion...")
finally:
if robot.is_connected:
robot.disconnect()
logger.info("Done!")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GR00T Locomotion Controller for Unitree G1")
parser.add_argument(
"--repo-id",
type=str,
default=DEFAULT_GROOT_REPO_ID,
help=f"Hugging Face Hub repo ID for GR00T policies (default: {DEFAULT_GROOT_REPO_ID})",
)
args = parser.parse_args()
run(repo_id=args.repo_id)

View File

@@ -14,21 +14,21 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import argparse
import json import json
import logging import logging
import time
import numpy as np import numpy as np
import onnx import onnx
import onnxruntime as ort import onnxruntime as ort
from huggingface_hub import hf_hub_download from huggingface_hub import hf_hub_download
from lerobot.robots.unitree_g1.g1_utils import ( from lerobot.robots.unitree_g1.config_unitree_g1 import UnitreeG1Config
REMOTE_AXES, from lerobot.robots.unitree_g1.g1_utils import G1_29_JointIndex
G1_29_JointArmIndex, from lerobot.robots.unitree_g1.unitree_g1 import UnitreeG1
G1_29_JointIndex,
get_gravity_orientation,
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
DEFAULT_ANGLES = np.zeros(29, dtype=np.float32) DEFAULT_ANGLES = np.zeros(29, dtype=np.float32)
@@ -40,13 +40,18 @@ DEFAULT_ANGLES[16] = 0.2 # Left shoulder roll
DEFAULT_ANGLES[23] = -0.2 # Right shoulder roll DEFAULT_ANGLES[23] = -0.2 # Right shoulder roll
DEFAULT_ANGLES[[18, 25]] = 0.6 # Elbow DEFAULT_ANGLES[[18, 25]] = 0.6 # Elbow
MISSING_JOINTS = []
G1_MODEL = "g1_23" # Or "g1_29"
if G1_MODEL == "g1_23":
MISSING_JOINTS = [12, 14, 20, 21, 27, 28] # Waist yaw/pitch, wrist pitch/yaw
# Control parameters # Control parameters
ACTION_SCALE = 0.25 ACTION_SCALE = 0.25
CONTROL_DT = 0.005 # 200Hz CONTROL_DT = 0.02 # 50Hz
ANG_VEL_SCALE = 0.25 ANG_VEL_SCALE = 0.25
DOF_POS_SCALE = 1.0 DOF_POS_SCALE = 1.0
DOF_VEL_SCALE = 0.05 DOF_VEL_SCALE = 0.05
GAIT_PERIOD = 0.5 GAIT_PERIOD = 1.0
DEFAULT_HOLOSOMA_REPO_ID = "nepyope/holosoma_locomotion" DEFAULT_HOLOSOMA_REPO_ID = "nepyope/holosoma_locomotion"
@@ -82,7 +87,7 @@ def load_policy(
logger.info(f"Policy loaded: {policy.get_inputs()[0].shape}{policy.get_outputs()[0].shape}") logger.info(f"Policy loaded: {policy.get_inputs()[0].shape}{policy.get_outputs()[0].shape}")
# Extract KP/KD from ONNX metadata # Extract KP/KD from ONNX metadata
model = onnx.load(policy_path, load_external_data=False) model = onnx.load(policy_path)
metadata = {prop.key: prop.value for prop in model.metadata_props} metadata = {prop.key: prop.value for prop in model.metadata_props}
if "kp" not in metadata or "kd" not in metadata: if "kp" not in metadata or "kd" not in metadata:
@@ -96,13 +101,15 @@ def load_policy(
class HolosomaLocomotionController: class HolosomaLocomotionController:
"""Holosoma lower-body locomotion controller for Unitree G1.""" """Holosoma whole-body locomotion controller for Unitree G1."""
control_dt = CONTROL_DT # Expose for unitree_g1.py def __init__(self, policy, robot, kp: np.ndarray, kd: np.ndarray):
self.policy = policy
self.robot = robot
def __init__(self): # Override robot's PD gains with policy gains
# Load policy and gains self.robot.kp = kp
self.policy, self.kp, self.kd = load_policy() self.robot.kd = kd
self.cmd = np.zeros(3, dtype=np.float32) self.cmd = np.zeros(3, dtype=np.float32)
@@ -117,55 +124,35 @@ class HolosomaLocomotionController:
self.phase_dt = 2 * np.pi / ((1.0 / CONTROL_DT) * GAIT_PERIOD) self.phase_dt = 2 * np.pi / ((1.0 / CONTROL_DT) * GAIT_PERIOD)
self.is_standing = True self.is_standing = True
logger.info("HolosomaLocomotionController initialized") def run_step(self):
# Get current observation
obs = self.robot.get_observation()
def reset(self) -> None: if not obs:
"""Reset internal state for a new episode.""" return
self.cmd[:] = 0.0
self.qj[:] = 0.0
self.dqj[:] = 0.0
self.obs[:] = 0.0
self.last_action[:] = 0.0
self.phase = np.array([[0.0, np.pi]], dtype=np.float32)
self.is_standing = True
def run_step(self, action: dict, lowstate) -> dict: # Get command from remote controller
"""Run one step of the locomotion controller. ly = obs["remote.ly"] if abs(obs["remote.ly"]) > 0.1 else 0.0
lx = obs["remote.lx"] if abs(obs["remote.lx"]) > 0.1 else 0.0
Args: rx = obs["remote.rx"] if abs(obs["remote.rx"]) > 0.1 else 0.0
action: Action dict containing remote.lx/ly/rx/ry
lowstate: Robot lowstate containing motor positions/velocities and IMU
Returns:
Action dict for lower body joints (0-14)
"""
if lowstate is None:
return {}
lx, ly, rx, _ry = (action.get(k, 0.0) for k in REMOTE_AXES)
ly = ly if abs(ly) > 0.1 else 0.0
lx = lx if abs(lx) > 0.1 else 0.0
rx = rx if abs(rx) > 0.1 else 0.0
ly = np.clip(ly, -0.3, 0.3)
lx = np.clip(lx, -0.3, 0.3)
self.cmd[:] = [ly, -lx, -rx] self.cmd[:] = [ly, -lx, -rx]
# Get joint positions and velocities from lowstate # Get joint positions and velocities
for motor in G1_29_JointIndex: for motor in G1_29_JointIndex:
name = motor.name
idx = motor.value idx = motor.value
self.qj[idx] = lowstate.motor_state[idx].q self.qj[idx] = obs[f"{name}.q"]
self.dqj[idx] = lowstate.motor_state[idx].dq self.dqj[idx] = obs[f"{name}.dq"]
# Hide arm positions from policy (show DEFAULT_ANGLES instead) # Adapt observation for g1_23dof
# This prevents policy from reacting to teleop arm movements for idx in MISSING_JOINTS:
for arm_joint in G1_29_JointArmIndex: self.qj[idx] = 0.0
self.qj[arm_joint.value] = DEFAULT_ANGLES[arm_joint.value] self.dqj[idx] = 0.0
self.dqj[arm_joint.value] = 0.0
# Express IMU data in gravity frame of reference # Express IMU data in gravity frame of reference
quat = lowstate.imu_state.quaternion quat = [obs["imu.quat.w"], obs["imu.quat.x"], obs["imu.quat.y"], obs["imu.quat.z"]]
ang_vel = np.array(lowstate.imu_state.gyroscope, dtype=np.float32) ang_vel = np.array([obs["imu.gyro.x"], obs["imu.gyro.y"], obs["imu.gyro.z"]], dtype=np.float32)
gravity = get_gravity_orientation(quat) gravity = self.robot.get_gravity_orientation(quat)
# Scale joint positions and velocities before policy inference # Scale joint positions and velocities before policy inference
qj_obs = (self.qj - DEFAULT_ANGLES) * DOF_POS_SCALE qj_obs = (self.qj - DEFAULT_ANGLES) * DOF_POS_SCALE
@@ -199,16 +186,79 @@ class HolosomaLocomotionController:
# Run policy inference # Run policy inference
ort_in = {self.policy.get_inputs()[0].name: self.obs.reshape(1, -1).astype(np.float32)} ort_in = {self.policy.get_inputs()[0].name: self.obs.reshape(1, -1).astype(np.float32)}
raw_action = self.policy.run(None, ort_in)[0].squeeze() raw_action = self.policy.run(None, ort_in)[0].squeeze()
policy_action = np.clip(raw_action, -100.0, 100.0) action = np.clip(raw_action, -100.0, 100.0)
self.last_action = policy_action.copy() self.last_action = action.copy()
# Transform action back to target joint positions # Transform action back to target joint positions
target = DEFAULT_ANGLES + policy_action * ACTION_SCALE target = DEFAULT_ANGLES + action * ACTION_SCALE
# Build action dict (first 15 joints only) # Build action dict
action_dict = {} action_dict = {}
for i in range(15): for motor in G1_29_JointIndex:
motor_name = G1_29_JointIndex(i).name action_dict[f"{motor.name}.q"] = float(target[motor.value])
action_dict[f"{motor_name}.q"] = float(target[i])
return action_dict # Zero out missing joints for g1_23dof
for joint_idx in MISSING_JOINTS:
motor_name = G1_29_JointIndex(joint_idx).name
action_dict[f"{motor_name}.q"] = 0.0
# Send action to robot
self.robot.send_action(action_dict)
def run(repo_id: str = DEFAULT_HOLOSOMA_REPO_ID, policy_type: str = "fastsac") -> None:
"""Main function to run the Holosoma locomotion controller.
Args:
repo_id: Hugging Face Hub repository ID for Holosoma policies.
policy_type: Policy type to use ('fastsac' or 'ppo').
"""
# Load policy and gains
policy, kp, kd = load_policy(repo_id=repo_id, policy_type=policy_type)
# Initialize robot
config = UnitreeG1Config()
robot = UnitreeG1(config)
robot.connect()
holosoma_controller = HolosomaLocomotionController(policy, robot, kp, kd)
try:
robot.reset(CONTROL_DT, DEFAULT_ANGLES)
logger.info("Use joystick: LY=fwd/back, LX=left/right, RX=rotate")
logger.info("Press Ctrl+C to stop")
# Run step
while not robot._shutdown_event.is_set():
start_time = time.time()
holosoma_controller.run_step()
elapsed = time.time() - start_time
sleep_time = max(0, CONTROL_DT - elapsed)
time.sleep(sleep_time)
except KeyboardInterrupt:
logger.info("Stopping locomotion...")
finally:
if robot.is_connected:
robot.disconnect()
logger.info("Done!")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Holosoma Locomotion Controller for Unitree G1")
parser.add_argument(
"--repo-id",
type=str,
default=DEFAULT_HOLOSOMA_REPO_ID,
help=f"Hugging Face Hub repo ID for Holosoma policies (default: {DEFAULT_HOLOSOMA_REPO_ID})",
)
parser.add_argument(
"--policy",
type=str,
choices=["fastsac", "ppo"],
default="fastsac",
help="Policy type to use: 'fastsac' (default) or 'ppo'",
)
args = parser.parse_args()
run(repo_id=args.repo_id, policy_type=args.policy)

View File

@@ -25,11 +25,11 @@ discord = "https://discord.gg/s3KuuzsPFb"
[project] [project]
name = "lerobot" name = "lerobot"
version = "0.5.1" version = "0.4.4"
description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch" description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch"
dynamic = ["readme"] dynamic = ["readme"]
license = { text = "Apache-2.0" } license = { text = "Apache-2.0" }
requires-python = ">=3.12" requires-python = ">=3.10"
authors = [ authors = [
{ name = "Rémi Cadène", email = "re.cadene@gmail.com" }, { name = "Rémi Cadène", email = "re.cadene@gmail.com" },
{ name = "Simon Alibert", email = "alibert.sim@gmail.com" }, { name = "Simon Alibert", email = "alibert.sim@gmail.com" },
@@ -50,8 +50,7 @@ classifiers = [
"Intended Audience :: Education", "Intended Audience :: Education",
"Intended Audience :: Science/Research", "Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License", "License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.13",
"Topic :: Software Development :: Build Tools", "Topic :: Software Development :: Build Tools",
"Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Artificial Intelligence",
] ]
@@ -60,30 +59,28 @@ keywords = ["lerobot", "huggingface", "robotics", "machine learning", "artifici
dependencies = [ dependencies = [
# Hugging Face dependencies # Hugging Face dependencies
"datasets>=4.0.0,<5.0.0", "datasets>=4.0.0,<4.2.0",
"diffusers>=0.27.2,<0.36.0", "diffusers>=0.27.2,<0.36.0",
"huggingface-hub>=1.0.0,<2.0.0", "huggingface-hub[hf-transfer,cli]>=0.34.2,<0.36.0",
"accelerate>=1.10.0,<2.0.0", "accelerate>=1.10.0,<2.0.0",
# Core dependencies # Core dependencies
"numpy>=2.0.0,<2.3.0", # NOTE: Explicitly listing numpy helps the resolver converge faster. Upper bound imposed by opencv-python-headless.
"setuptools>=71.0.0,<81.0.0", "setuptools>=71.0.0,<81.0.0",
"cmake>=3.29.0.1,<4.2.0", "cmake>=3.29.0.1,<4.2.0",
"packaging>=24.2,<26.0",
"torch>=2.2.1,<2.11.0",
"torchcodec>=0.2.1,<0.11.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')",
"torchvision>=0.21.0,<0.26.0",
"einops>=0.8.0,<0.9.0", "einops>=0.8.0,<0.9.0",
"opencv-python-headless>=4.9.0,<4.14.0", "opencv-python-headless>=4.9.0,<4.13.0",
"av>=15.0.0,<16.0.0", "av>=15.0.0,<16.0.0",
"jsonlines>=4.0.0,<5.0.0", "jsonlines>=4.0.0,<5.0.0",
"pynput>=1.7.8,<1.9.0", "packaging>=24.2,<26.0",
"pynput>=1.7.7,<1.9.0",
"pyserial>=3.5,<4.0", "pyserial>=3.5,<4.0",
"wandb>=0.24.0,<0.25.0", "wandb>=0.24.0,<0.25.0",
"draccus==0.10.0", # TODO: Relax version constraint
"torch>=2.2.1,<2.8.0", # TODO: Bumb dependency
"torchcodec>=0.2.1,<0.6.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # TODO: Bumb dependency
"torchvision>=0.21.0,<0.23.0", # TODO: Bumb dependency
"draccus==0.10.0", # TODO: Remove ==
"gymnasium>=1.1.1,<2.0.0", "gymnasium>=1.1.1,<2.0.0",
"rerun-sdk>=0.24.0,<0.27.0", "rerun-sdk>=0.24.0,<0.27.0",
@@ -98,56 +95,44 @@ dependencies = [
# Common # Common
pygame-dep = ["pygame>=2.5.1,<2.7.0"] pygame-dep = ["pygame>=2.5.1,<2.7.0"]
placo-dep = ["placo>=0.9.6,<0.9.17"] placo-dep = ["placo>=0.9.6,<0.10.0"]
transformers-dep = ["transformers>=5.3.0,<6.0.0"] transformers-dep = ["transformers>=4.57.1,<5.0.0"]
grpcio-dep = ["grpcio==1.73.1", "protobuf>=6.31.1,<6.32.0"] grpcio-dep = ["grpcio==1.73.1", "protobuf>=6.31.1,<6.32.0"]
can-dep = ["python-can>=4.2.0,<5.0.0"]
peft-dep = ["peft>=0.18.0,<1.0.0"]
scipy-dep = ["scipy>=1.14.0,<2.0.0"]
qwen-vl-utils-dep = ["qwen-vl-utils>=0.0.11,<0.1.0"]
matplotlib-dep = ["matplotlib>=3.10.3,<4.0.0", "contourpy>=1.3.0,<2.0.0"] # NOTE: Explicitly listing contourpy helps the resolver converge faster.
# Motors # Motors
feetech = ["feetech-servo-sdk>=1.0.0,<2.0.0"] feetech = ["feetech-servo-sdk>=1.0.0,<2.0.0"]
dynamixel = ["dynamixel-sdk>=3.7.31,<3.9.0"] dynamixel = ["dynamixel-sdk>=3.7.31,<3.9.0"]
damiao = ["lerobot[can-dep]"] damiao = ["python-can>=4.2.0,<5.0.0"]
robstride = ["lerobot[can-dep]"]
# Robots # Robots
openarms = ["lerobot[damiao]"]
gamepad = ["lerobot[pygame-dep]", "hidapi>=0.14.0,<0.15.0"] gamepad = ["lerobot[pygame-dep]", "hidapi>=0.14.0,<0.15.0"]
hopejr = ["lerobot[feetech]", "lerobot[pygame-dep]"] hopejr = ["lerobot[feetech]", "lerobot[pygame-dep]"]
lekiwi = ["lerobot[feetech]", "pyzmq>=26.2.1,<28.0.0"] lekiwi = ["lerobot[feetech]", "pyzmq>=26.2.1,<28.0.0"]
unitree_g1 = [ unitree_g1 = [
# "unitree-sdk2==1.0.1",
"pyzmq>=26.2.1,<28.0.0", "pyzmq>=26.2.1,<28.0.0",
"onnxruntime>=1.16.0,<2.0.0", "onnxruntime>=1.16.0,<2.0.0"
"onnx>=1.16.0,<2.0.0",
"meshcat>=0.3.0,<0.4.0",
"lerobot[matplotlib-dep]",
"lerobot[pygame-dep]",
] ]
reachy2 = ["reachy2_sdk>=1.0.15,<1.1.0"] reachy2 = ["reachy2_sdk>=1.0.15,<1.1.0"]
kinematics = ["lerobot[placo-dep]"] kinematics = ["lerobot[placo-dep]"]
intelrealsense = [ intelrealsense = [
"pyrealsense2>=2.55.1.6486,<2.57.0 ; sys_platform != 'darwin'", "pyrealsense2>=2.55.1.6486,<2.57.0 ; sys_platform != 'darwin'",
"pyrealsense2-macosx>=2.54,<2.57.0 ; sys_platform == 'darwin'", "pyrealsense2-macosx>=2.54,<2.55.0 ; sys_platform == 'darwin'",
] ]
phone = ["hebi-py>=2.8.0,<2.12.0", "teleop>=0.1.0,<0.2.0", "fastapi<1.0", "lerobot[scipy-dep]"] phone = ["hebi-py>=2.8.0,<2.12.0", "teleop>=0.1.0,<0.2.0", "fastapi<1.0"]
# Policies # Policies
wallx = [ wallx = [
"lerobot[transformers-dep]", "transformers==4.49.0",
"lerobot[peft]", "peft==0.17.1",
"lerobot[scipy-dep]", "scipy==1.15.3",
"torchdiffeq>=0.2.4,<0.3.0", "torchdiffeq==0.2.5",
"lerobot[qwen-vl-utils-dep]", "qwen_vl_utils==0.0.11"
] ]
pi = ["lerobot[transformers-dep]", "lerobot[scipy-dep]"] pi = ["transformers @ git+https://github.com/huggingface/transformers.git@fix/lerobot_openpi", "scipy>=1.10.1,<1.15"]
smolvla = ["lerobot[transformers-dep]", "num2words>=0.5.14,<0.6.0", "accelerate>=1.7.0,<2.0.0", "safetensors>=0.4.3,<1.0.0"] smolvla = ["lerobot[transformers-dep]", "num2words>=0.5.14,<0.6.0", "accelerate>=1.7.0,<2.0.0", "safetensors>=0.4.3,<1.0.0"]
groot = [ groot = [
"lerobot[transformers-dep]", "lerobot[transformers-dep]",
"lerobot[peft]", "peft>=0.13.0,<1.0.0",
"dm-tree>=0.1.8,<1.0.0", "dm-tree>=0.1.8,<1.0.0",
"timm>=1.0.0,<1.1.0", "timm>=1.0.0,<1.1.0",
"safetensors>=0.4.3,<1.0.0", "safetensors>=0.4.3,<1.0.0",
@@ -156,13 +141,13 @@ groot = [
"ninja>=1.11.1,<2.0.0", "ninja>=1.11.1,<2.0.0",
"flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'" "flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'"
] ]
sarm = ["lerobot[transformers-dep]", "faker>=33.0.0,<35.0.0", "lerobot[matplotlib-dep]", "lerobot[qwen-vl-utils-dep]"] sarm = ["lerobot[transformers-dep]", "faker>=33.0.0,<35.0.0", "matplotlib>=3.10.3,<4.0.0", "qwen-vl-utils>=0.0.14,<0.1.0"]
xvla = ["lerobot[transformers-dep]"] xvla = ["lerobot[transformers-dep]"]
hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"] hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
# Features # Features
async = ["lerobot[grpcio-dep]", "lerobot[matplotlib-dep]"] async = ["lerobot[grpcio-dep]", "matplotlib>=3.10.3,<4.0.0"]
peft = ["lerobot[transformers-dep]", "lerobot[peft-dep]"] peft = ["lerobot[transformers-dep]", "peft>=0.18.0,<1.0.0"]
# Development # Development
dev = ["pre-commit>=3.7.0,<5.0.0", "debugpy>=1.8.1,<1.9.0", "lerobot[grpcio-dep]", "grpcio-tools==1.73.1", "mypy>=1.19.1"] dev = ["pre-commit>=3.7.0,<5.0.0", "debugpy>=1.8.1,<1.9.0", "lerobot[grpcio-dep]", "grpcio-tools==1.73.1", "mypy>=1.19.1"]
@@ -170,19 +155,13 @@ test = ["pytest>=8.1.0,<9.0.0", "pytest-timeout>=2.4.0,<3.0.0", "pytest-cov>=5.0
video_benchmark = ["scikit-image>=0.23.2,<0.26.0", "pandas>=2.2.2,<2.4.0"] video_benchmark = ["scikit-image>=0.23.2,<0.26.0", "pandas>=2.2.2,<2.4.0"]
# Simulation # Simulation
# NOTE: Explicitly listing scipy helps flatten the dependecy tree. aloha = ["gym-aloha>=0.1.2,<0.2.0"]
aloha = ["gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"]
pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"] libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0"]
metaworld = ["metaworld==3.0.0", "lerobot[scipy-dep]"] metaworld = ["metaworld==3.0.0"]
# All # All
all = [ all = [
# NOTE(resolver hint): scipy is pulled in transitively via lerobot[scipy-dep] through
# multiple extras (aloha, metaworld, pi, wallx, phone). Listing it explicitly
# helps pip's resolver converge by constraining scipy early, before it encounters
# the loose scipy requirements from transitive deps like dm-control and metaworld.
"scipy>=1.14.0,<2.0.0",
"lerobot[dynamixel]", "lerobot[dynamixel]",
"lerobot[gamepad]", "lerobot[gamepad]",
"lerobot[hopejr]", "lerobot[hopejr]",
@@ -190,8 +169,8 @@ all = [
"lerobot[reachy2]", "lerobot[reachy2]",
"lerobot[kinematics]", "lerobot[kinematics]",
"lerobot[intelrealsense]", "lerobot[intelrealsense]",
"lerobot[wallx]", # "lerobot[wallx]",
"lerobot[pi]", # "lerobot[pi]", TODO(Pepijn): Update pi to transformers v5
"lerobot[smolvla]", "lerobot[smolvla]",
# "lerobot[groot]", TODO(Steven): Gr00t requires specific installation instructions for flash-attn # "lerobot[groot]", TODO(Steven): Gr00t requires specific installation instructions for flash-attn
"lerobot[xvla]", "lerobot[xvla]",
@@ -203,11 +182,10 @@ all = [
"lerobot[aloha]", "lerobot[aloha]",
"lerobot[pusht]", "lerobot[pusht]",
"lerobot[phone]", "lerobot[phone]",
"lerobot[libero]; sys_platform == 'linux'", "lerobot[libero]",
"lerobot[metaworld]", "lerobot[metaworld]",
"lerobot[sarm]", "lerobot[sarm]",
"lerobot[peft]", "lerobot[peft]",
# "lerobot[unitree_g1]", TODO: Unitree requires specific installation instructions for unitree_sdk2
] ]
[project.scripts] [project.scripts]
@@ -229,14 +207,11 @@ lerobot-edit-dataset="lerobot.scripts.lerobot_edit_dataset:main"
lerobot-setup-can="lerobot.scripts.lerobot_setup_can:main" lerobot-setup-can="lerobot.scripts.lerobot_setup_can:main"
# ---------------- Tool Configurations ---------------- # ---------------- Tool Configurations ----------------
[tool.setuptools.package-data]
lerobot = ["envs/*.json"]
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
where = ["src"] where = ["src"]
[tool.ruff] [tool.ruff]
target-version = "py312" target-version = "py310"
line-length = 110 line-length = 110
exclude = ["tests/artifacts/**/*.safetensors", "*_pb2.py", "*_pb2_grpc.py"] exclude = ["tests/artifacts/**/*.safetensors", "*_pb2.py", "*_pb2_grpc.py"]
@@ -328,7 +303,7 @@ default.extend-ignore-identifiers-re = [
# Uncomment [tool.mypy] first, then uncomment individual module overrides as they get proper type annotations # Uncomment [tool.mypy] first, then uncomment individual module overrides as they get proper type annotations
[tool.mypy] [tool.mypy]
python_version = "3.12" python_version = "3.10"
ignore_missing_imports = true ignore_missing_imports = true
follow_imports = "skip" follow_imports = "skip"
# warn_return_any = true # warn_return_any = true
@@ -380,9 +355,9 @@ ignore_errors = false
module = "lerobot.cameras.*" module = "lerobot.cameras.*"
ignore_errors = false ignore_errors = false
[[tool.mypy.overrides]] # [[tool.mypy.overrides]]
module = "lerobot.motors.*" # module = "lerobot.motors.*"
ignore_errors = false # ignore_errors = false
# [[tool.mypy.overrides]] # [[tool.mypy.overrides]]
# module = "lerobot.robots.*" # module = "lerobot.robots.*"
@@ -412,3 +387,85 @@ ignore_errors = false
# [[tool.mypy.overrides]] # [[tool.mypy.overrides]]
# module = "lerobot.scripts.*" # module = "lerobot.scripts.*"
# ignore_errors = false # ignore_errors = false
[tool.uv]
# wallx requires transformers==4.49.0 which conflicts with other extras that need >=4.53.0
conflicts = [
[
{ extra = "wallx" },
{ extra = "transformers-dep" },
],
[
{ extra = "wallx" },
{ extra = "pi" },
],
[
{ extra = "wallx" },
{ extra = "smolvla" },
],
[
{ extra = "wallx" },
{ extra = "groot" },
],
[
{ extra = "wallx" },
{ extra = "xvla" },
],
[
{ extra = "wallx" },
{ extra = "sarm" },
],
[
{ extra = "wallx" },
{ extra = "hilserl" },
],
[
{ extra = "wallx" },
{ extra = "libero" },
],
[
{ extra = "wallx" },
{ extra = "peft" },
],
[
{ extra = "wallx" },
{ extra = "all" },
],
# pi uses custom branch which conflicts with transformers-dep
[
{ extra = "pi" },
{ extra = "transformers-dep" },
],
[
{ extra = "pi" },
{ extra = "smolvla" },
],
[
{ extra = "pi" },
{ extra = "groot" },
],
[
{ extra = "pi" },
{ extra = "xvla" },
],
[
{ extra = "pi" },
{ extra = "sarm" },
],
[
{ extra = "pi" },
{ extra = "hilserl" },
],
[
{ extra = "pi" },
{ extra = "libero" },
],
[
{ extra = "pi" },
{ extra = "peft" },
],
[
{ extra = "pi" },
{ extra = "all" },
],
]

View File

@@ -1,73 +1,76 @@
# #
# This file is autogenerated by pip-compile with Python 3.12 # This file is autogenerated by pip-compile with Python 3.10
# by the following command: # by the following command:
# #
# pip-compile --output-file=requirements-macos.txt requirements.in # pip-compile --output-file=requirements-macos.txt requirements.in
# #
-e .[all] -e .[all]
# via -[all] # via -[all]
absl-py==2.4.0 absl-py==2.3.1
# via # via
# dm-control # dm-control
# dm-env # dm-env
# dm-tree # dm-tree
# labmaze # labmaze
# mujoco # mujoco
accelerate==1.13.0 # tensorboard
accelerate==1.11.0
# via # via
# lerobot # lerobot
# peft # peft
aiohappyeyeballs==2.6.1 aiohappyeyeballs==2.6.1
# via aiohttp # via aiohttp
aiohttp==3.13.3 aiohttp==3.13.1
# via fsspec # via fsspec
aiosignal==1.4.0 aiosignal==1.4.0
# via aiohttp # via aiohttp
annotated-doc==0.0.4
# via
# fastapi
# typer
annotated-types==0.7.0 annotated-types==0.7.0
# via pydantic # via pydantic
anyio==4.12.1 antlr4-python3-runtime==4.9.3
# via
# hydra-core
# omegaconf
anyio==4.11.0
# via # via
# httpx
# starlette # starlette
# watchfiles # watchfiles
asttokens==3.0.1 asttokens==3.0.0
# via stack-data # via stack-data
async-timeout==5.0.1
# via aiohttp
attrs==25.4.0 attrs==25.4.0
# via # via
# aiohttp # aiohttp
# dm-tree # dm-tree
# jsonlines # jsonlines
# jsonschema
# referencing
# rerun-sdk # rerun-sdk
av==15.1.0 av==15.1.0
# via lerobot
bddl==1.0.1
# via libero
certifi==2025.10.5
# via # via
# lerobot
# qwen-vl-utils
certifi==2026.2.25
# via
# httpcore
# httpx
# requests # requests
# sentry-sdk # sentry-sdk
cffi==2.0.0 cffi==2.0.0
# via pymunk # via pymunk
cfgv==3.5.0 cfgv==3.4.0
# via pre-commit # via pre-commit
charset-normalizer==3.4.5 charset-normalizer==3.4.4
# via requests # via requests
click==8.3.1 click==8.3.0
# via # via
# typer
# uvicorn # uvicorn
# wandb # wandb
cloudpickle==3.1.2 cloudpickle==3.1.1
# via gymnasium # via
cmake==4.1.3 # gymnasium
# libero
cmake==4.1.0
# via lerobot # via lerobot
cmeel==0.59.0 cmeel==0.57.3
# via # via
# cmeel-assimp # cmeel-assimp
# cmeel-boost # cmeel-boost
@@ -105,17 +108,15 @@ cmeel-zlib==1.3.1
# via cmeel-assimp # via cmeel-assimp
coal-library==3.0.1 coal-library==3.0.1
# via pin # via pin
contourpy==1.3.3 contourpy==1.3.2
# via # via matplotlib
# lerobot coverage[toml]==7.11.0
# matplotlib
coverage[toml]==7.13.4
# via pytest-cov # via pytest-cov
cycler==0.12.1 cycler==0.12.1
# via matplotlib # via matplotlib
datasets==4.6.1 datasets==4.1.1
# via lerobot # via lerobot
debugpy==1.8.20 debugpy==1.8.17
# via lerobot # via lerobot
decorator==5.2.1 decorator==5.2.1
# via ipython # via ipython
@@ -129,7 +130,7 @@ dill==0.4.0
# multiprocess # multiprocess
distlib==0.4.0 distlib==0.4.0
# via virtualenv # via virtualenv
dm-control==1.0.37 dm-control==1.0.34
# via gym-aloha # via gym-aloha
dm-env==1.6 dm-env==1.6
# via dm-control # via dm-control
@@ -137,55 +138,69 @@ dm-tree==0.1.9
# via # via
# dm-control # dm-control
# dm-env # dm-env
# lerobot
docopt==0.6.2 docopt==0.6.2
# via num2words # via num2words
draccus==0.10.0 draccus==0.10.0
# via lerobot # via lerobot
dynamixel-sdk==3.8.4 dynamixel-sdk==3.8.4
# via lerobot # via lerobot
easydict==1.13
# via libero
egl-probe @ git+https://github.com/huggingface/egl_probe.git
# via
# libero
# robomimic
eigenpy==3.10.3 eigenpy==3.10.3
# via coal-library # via coal-library
einops==0.8.2 einops==0.8.1
# via lerobot
eiquadprog==1.2.9
# via placo
etils[epath,epy]==1.14.0
# via mujoco
executing==2.2.1
# via stack-data
faker==34.0.2
# via lerobot
farama-notifications==0.0.4
# via gymnasium
fastapi==0.135.1
# via # via
# lerobot # lerobot
# teleop # libero
eiquadprog==1.2.9
# via placo
etils[epath,epy]==1.13.0
# via mujoco
exceptiongroup==1.3.0
# via
# anyio
# ipython
# pytest
executing==2.2.1
# via stack-data
farama-notifications==0.0.4
# via gymnasium
fastapi==0.119.1
# via teleop
fastjsonschema==2.21.2
# via nbformat
feetech-servo-sdk==1.0.0 feetech-servo-sdk==1.0.0
# via lerobot # via lerobot
filelock==3.25.0 filelock==3.20.0
# via # via
# datasets # datasets
# diffusers # diffusers
# huggingface-hub # huggingface-hub
# python-discovery
# torch # torch
# transformers
# virtualenv # virtualenv
fonttools==4.61.1 fonttools==4.60.1
# via matplotlib # via matplotlib
frozenlist==1.8.0 frozenlist==1.8.0
# via # via
# aiohttp # aiohttp
# aiosignal # aiosignal
fsspec[http]==2026.2.0 fsspec[http]==2025.9.0
# via # via
# datasets # datasets
# etils # etils
# huggingface-hub # huggingface-hub
# torch # torch
future==1.0.0
# via libero
gitdb==4.0.12 gitdb==4.0.12
# via gitpython # via gitpython
gitpython==3.1.46 gitpython==3.1.45
# via wandb # via wandb
glfw==2.10.0 glfw==2.10.0
# via # via
@@ -197,6 +212,7 @@ grpcio==1.73.1
# lerobot # lerobot
# reachy2-sdk # reachy2-sdk
# reachy2-sdk-api # reachy2-sdk-api
# tensorboard
grpcio-tools==1.73.1 grpcio-tools==1.73.1
# via # via
# lerobot # lerobot
@@ -207,67 +223,71 @@ gym-hil==0.1.13
# via lerobot # via lerobot
gym-pusht==0.1.6 gym-pusht==0.1.6
# via lerobot # via lerobot
gymnasium==1.2.3 gymnasium==1.2.1
# via # via
# gym-aloha # gym-aloha
# gym-hil # gym-hil
# gym-pusht # gym-pusht
# lerobot # lerobot
# libero
# metaworld # metaworld
h11==0.16.0 h11==0.16.0
# via # via uvicorn
# httpcore h5py==3.15.1
# uvicorn # via robomimic
hebi-py==2.11.0 hebi-py==2.11.0
# via lerobot # via lerobot
hf-xet==1.3.2 hf-transfer==0.1.9
# via huggingface-hub
hf-xet==1.1.10
# via huggingface-hub # via huggingface-hub
hidapi==0.14.0.post4 hidapi==0.14.0.post4
# via # via
# gym-hil # gym-hil
# lerobot # lerobot
httpcore==1.0.9
# via httpx
httptools==0.7.1 httptools==0.7.1
# via uvicorn # via uvicorn
httpx==0.28.1 huggingface-hub[cli,hf-transfer]==0.35.3
# via
# datasets
# huggingface-hub
huggingface-hub==1.6.0
# via # via
# accelerate # accelerate
# datasets # datasets
# diffusers # diffusers
# lerobot # lerobot
# peft # peft
# timm
# tokenizers # tokenizers
# transformers # transformers
identify==2.6.17 hydra-core==1.3.2
# via libero
identify==2.6.15
# via pre-commit # via pre-commit
idna==3.11 idna==3.11
# via # via
# anyio # anyio
# httpx
# requests # requests
# yarl # yarl
imageio[ffmpeg]==2.37.2 imageio[ffmpeg]==2.37.0
# via # via
# gym-aloha # gym-aloha
# gym-hil # gym-hil
# lerobot # lerobot
# metaworld # metaworld
# robomimic
# scikit-image # scikit-image
imageio-ffmpeg==0.6.0 imageio-ffmpeg==0.6.0
# via imageio # via
importlib-metadata==8.7.1 # imageio
# robomimic
importlib-metadata==8.7.0
# via diffusers # via diffusers
importlib-resources==6.5.2
# via etils
iniconfig==2.3.0 iniconfig==2.3.0
# via pytest # via pytest
ipython==9.11.0 inquirerpy==0.3.4
# via huggingface-hub
ipython==8.37.0
# via meshcat # via meshcat
ipython-pygments-lexers==1.1.1
# via ipython
ischedule==1.2.7 ischedule==1.2.7
# via placo # via placo
jedi==0.19.2 jedi==0.19.2
@@ -276,24 +296,44 @@ jinja2==3.1.6
# via torch # via torch
jsonlines==4.0.0 jsonlines==4.0.0
# via lerobot # via lerobot
jsonschema==4.25.1
# via nbformat
jsonschema-specifications==2025.9.1
# via jsonschema
jupyter-core==5.9.1
# via nbformat
jupytext==1.18.1
# via bddl
kiwisolver==1.4.9 kiwisolver==1.4.9
# via matplotlib # via matplotlib
labmaze==1.0.6 labmaze==1.0.6
# via dm-control # via dm-control
lazy-loader==0.5 lazy-loader==0.4
# via scikit-image # via scikit-image
librt==0.8.1 libero @ git+https://github.com/huggingface/lerobot-libero.git@main
# via mypy # via lerobot
llvmlite==0.45.1
# via numba
lxml==6.0.2 lxml==6.0.2
# via dm-control # via dm-control
markdown==3.9
# via tensorboard
markdown-it-py==4.0.0 markdown-it-py==4.0.0
# via rich # via
# jupytext
# mdit-py-plugins
markupsafe==3.0.3 markupsafe==3.0.3
# via jinja2 # via
matplotlib==3.10.8 # jinja2
# via lerobot # werkzeug
matplotlib==3.10.7
# via
# lerobot
# libero
matplotlib-inline==0.2.1 matplotlib-inline==0.2.1
# via ipython # via ipython
mdit-py-plugins==0.5.0
# via jupytext
mdurl==0.1.2 mdurl==0.1.2
# via markdown-it-py # via markdown-it-py
mergedeep==1.3.4 mergedeep==1.3.4
@@ -306,35 +346,41 @@ mock-serial==0.0.1
# via lerobot # via lerobot
mpmath==1.3.0 mpmath==1.3.0
# via sympy # via sympy
mujoco==3.5.0 mujoco==3.3.7
# via # via
# dm-control # dm-control
# gym-aloha # gym-aloha
# gym-hil # gym-hil
# libero
# metaworld # metaworld
multidict==6.7.1 # robosuite
multidict==6.7.0
# via # via
# aiohttp # aiohttp
# yarl # yarl
multiprocess==0.70.18 multiprocess==0.70.16
# via datasets # via datasets
mypy==1.19.1
# via lerobot
mypy-extensions==1.1.0 mypy-extensions==1.1.0
# via typing-inspect
nbformat==5.10.4
# via jupytext
networkx==3.4.2
# via # via
# mypy # bddl
# typing-inspect
networkx==3.6.1
# via
# scikit-image # scikit-image
# torch # torch
nodeenv==1.10.0 ninja==1.13.0
# via lerobot
nodeenv==1.9.1
# via pre-commit # via pre-commit
num2words==0.5.14 num2words==0.5.14
# via lerobot # via lerobot
numba==0.62.1
# via robosuite
numpy==2.2.6 numpy==2.2.6
# via # via
# accelerate # accelerate
# bddl
# cmeel-boost # cmeel-boost
# contourpy # contourpy
# datasets # datasets
@@ -343,14 +389,16 @@ numpy==2.2.6
# dm-env # dm-env
# dm-tree # dm-tree
# gymnasium # gymnasium
# h5py
# hebi-py # hebi-py
# imageio # imageio
# labmaze # labmaze
# lerobot # libero
# matplotlib # matplotlib
# meshcat # meshcat
# metaworld # metaworld
# mujoco # mujoco
# numba
# opencv-python # opencv-python
# opencv-python-headless # opencv-python-headless
# pandas # pandas
@@ -358,18 +406,26 @@ numpy==2.2.6
# pyquaternion # pyquaternion
# reachy2-sdk # reachy2-sdk
# rerun-sdk # rerun-sdk
# robomimic
# robosuite
# scikit-image # scikit-image
# scipy # scipy
# shapely # shapely
# teleop # teleop
# tensorboard
# tensorboardx
# tifffile # tifffile
# torchvision # torchvision
# transformers # transformers
# transforms3d # transforms3d
opencv-python==4.13.0.92 omegaconf==2.3.0
# via hydra-core
opencv-python==4.12.0.88
# via # via
# gym-pusht # gym-pusht
# libero
# reachy2-sdk # reachy2-sdk
# robosuite
opencv-python-headless==4.12.0.88 opencv-python-headless==4.12.0.88
# via lerobot # via lerobot
orderly-set==5.5.0 orderly-set==5.5.0
@@ -379,87 +435,97 @@ packaging==25.0
# accelerate # accelerate
# datasets # datasets
# huggingface-hub # huggingface-hub
# hydra-core
# jupytext
# lazy-loader # lazy-loader
# lerobot # lerobot
# matplotlib # matplotlib
# peft # peft
# pytest # pytest
# qwen-vl-utils
# reachy2-sdk # reachy2-sdk
# scikit-image # scikit-image
# tensorboard
# tensorboardx
# transformers # transformers
# wandb # wandb
pandas==2.3.3 pandas==2.3.3
# via # via
# datasets # datasets
# lerobot # lerobot
parso==0.8.6 parso==0.8.5
# via jedi # via jedi
pathspec==1.0.4 peft==0.17.1
# via mypy
peft==0.18.1
# via lerobot # via lerobot
pexpect==4.9.0 pexpect==4.9.0
# via ipython # via ipython
pillow==12.1.1 pfzy==0.3.4
# via inquirerpy
pillow==12.0.0
# via # via
# diffusers # diffusers
# imageio # imageio
# lerobot
# matplotlib # matplotlib
# meshcat # meshcat
# qwen-vl-utils
# rerun-sdk # rerun-sdk
# robosuite
# scikit-image # scikit-image
# tensorboard
# torchvision # torchvision
pin==3.4.0 pin==3.4.0
# via placo # via placo
placo==0.9.16 placo==0.9.14
# via lerobot # via lerobot
platformdirs==4.9.4 platformdirs==4.5.0
# via # via
# python-discovery # jupyter-core
# virtualenv # virtualenv
# wandb # wandb
pluggy==1.6.0 pluggy==1.6.0
# via # via
# pytest # pytest
# pytest-cov # pytest-cov
pre-commit==4.5.1 pre-commit==4.3.0
# via lerobot # via lerobot
prompt-toolkit==3.0.52 prompt-toolkit==3.0.52
# via ipython # via
# inquirerpy
# ipython
propcache==0.4.1 propcache==0.4.1
# via # via
# aiohttp # aiohttp
# yarl # yarl
protobuf==6.31.1 protobuf==6.31.0
# via # via
# dm-control # dm-control
# grpcio-tools # grpcio-tools
# lerobot # lerobot
# reachy2-sdk # reachy2-sdk
# reachy2-sdk-api # reachy2-sdk-api
# tensorboard
# tensorboardx
# wandb # wandb
psutil==7.2.2 psutil==7.1.1
# via # via
# accelerate # accelerate
# imageio # imageio
# peft # peft
# robomimic
ptyprocess==0.7.0 ptyprocess==0.7.0
# via pexpect # via pexpect
pure-eval==0.2.3 pure-eval==0.2.3
# via stack-data # via stack-data
pyarrow==23.0.1 pyarrow==21.0.0
# via # via
# datasets # datasets
# rerun-sdk # rerun-sdk
pycparser==3.0 pycparser==2.23
# via cffi # via cffi
pydantic==2.12.5 pydantic==2.12.3
# via # via
# fastapi # fastapi
# wandb # wandb
pydantic-core==2.41.5 pydantic-core==2.41.4
# via pydantic # via pydantic
pygame==2.6.1 pygame==2.6.1
# via # via
@@ -469,35 +535,33 @@ pygame==2.6.1
pygments==2.19.2 pygments==2.19.2
# via # via
# ipython # ipython
# ipython-pygments-lexers
# pytest # pytest
# rich
pymunk==6.11.1 pymunk==6.11.1
# via # via
# gym-pusht # gym-pusht
# lerobot # lerobot
pyngrok==7.5.1 pyngrok==7.4.1
# via meshcat # via meshcat
pynput==1.8.1 pynput==1.8.1
# via # via
# gym-hil # gym-hil
# lerobot # lerobot
pyobjc-core==12.1 pyobjc-core==12.0
# via # via
# pyobjc-framework-applicationservices # pyobjc-framework-applicationservices
# pyobjc-framework-cocoa # pyobjc-framework-cocoa
# pyobjc-framework-coretext # pyobjc-framework-coretext
# pyobjc-framework-quartz # pyobjc-framework-quartz
pyobjc-framework-applicationservices==12.1 pyobjc-framework-applicationservices==12.0
# via pynput # via pynput
pyobjc-framework-cocoa==12.1 pyobjc-framework-cocoa==12.0
# via # via
# pyobjc-framework-applicationservices # pyobjc-framework-applicationservices
# pyobjc-framework-coretext # pyobjc-framework-coretext
# pyobjc-framework-quartz # pyobjc-framework-quartz
pyobjc-framework-coretext==12.1 pyobjc-framework-coretext==12.0
# via pyobjc-framework-applicationservices # via pyobjc-framework-applicationservices
pyobjc-framework-quartz==12.1 pyobjc-framework-quartz==12.0
# via # via
# pynput # pynput
# pyobjc-framework-applicationservices # pyobjc-framework-applicationservices
@@ -506,13 +570,13 @@ pyopengl==3.1.10
# via # via
# dm-control # dm-control
# mujoco # mujoco
pyparsing==3.3.2 pyparsing==3.2.5
# via # via
# dm-control # dm-control
# matplotlib # matplotlib
pyquaternion==0.9.9 pyquaternion==0.9.9
# via reachy2-sdk # via reachy2-sdk
pyrealsense2-macosx==2.56.5 pyrealsense2-macosx==2.54.2
# via lerobot # via lerobot
pyserial==3.5 pyserial==3.5
# via # via
@@ -521,6 +585,7 @@ pyserial==3.5
# lerobot # lerobot
pytest==8.4.2 pytest==8.4.2
# via # via
# bddl
# lerobot # lerobot
# pytest-cov # pytest-cov
# pytest-timeout # pytest-timeout
@@ -531,14 +596,11 @@ pytest-timeout==2.4.0
# via lerobot # via lerobot
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via # via
# faker
# matplotlib # matplotlib
# pandas # pandas
python-discovery==1.1.1 python-dotenv==1.1.1
# via virtualenv
python-dotenv==1.2.2
# via uvicorn # via uvicorn
pytz==2026.1.post1 pytz==2025.2
# via pandas # via pandas
pyyaml==6.0.3 pyyaml==6.0.3
# via # via
@@ -547,10 +609,13 @@ pyyaml==6.0.3
# draccus # draccus
# hebi-py # hebi-py
# huggingface-hub # huggingface-hub
# jupytext
# omegaconf
# peft # peft
# pre-commit # pre-commit
# pyngrok # pyngrok
# pyyaml-include # pyyaml-include
# timm
# transformers # transformers
# uvicorn # uvicorn
# wandb # wandb
@@ -560,13 +625,15 @@ pyzmq==27.1.0
# via # via
# lerobot # lerobot
# meshcat # meshcat
qwen-vl-utils==0.0.14 reachy2-sdk==1.0.14
# via lerobot
reachy2-sdk==1.0.15
# via lerobot # via lerobot
reachy2-sdk-api==1.0.21 reachy2-sdk-api==1.0.21
# via reachy2-sdk # via reachy2-sdk
regex==2026.2.28 referencing==0.37.0
# via
# jsonschema
# jsonschema-specifications
regex==2025.10.23
# via # via
# diffusers # diffusers
# transformers # transformers
@@ -575,150 +642,184 @@ requests==2.32.5
# datasets # datasets
# diffusers # diffusers
# dm-control # dm-control
# qwen-vl-utils # huggingface-hub
# teleop # teleop
# transformers
# wandb # wandb
rerun-sdk==0.26.2 rerun-sdk==0.26.1
# via lerobot # via lerobot
rhoban-cmeel-jsoncpp==1.9.4.9 rhoban-cmeel-jsoncpp==1.9.4.9
# via placo # via placo
rich==14.3.3 robomimic==0.2.0
# via typer # via libero
safetensors==0.7.0 robosuite==1.4.0
# via libero
rpds-py==0.28.0
# via
# jsonschema
# referencing
safetensors==0.6.2
# via # via
# accelerate # accelerate
# diffusers # diffusers
# lerobot # lerobot
# peft # peft
# timm
# transformers # transformers
scikit-image==0.25.2 scikit-image==0.25.2
# via # via
# gym-pusht # gym-pusht
# lerobot # lerobot
scipy==1.17.1 scipy==1.15.3
# via # via
# dm-control # dm-control
# lerobot
# metaworld # metaworld
# robosuite
# scikit-image # scikit-image
# torchdiffeq sentry-sdk==2.42.1
sentry-sdk==2.54.0
# via wandb # via wandb
shapely==2.1.2 shapely==2.1.2
# via gym-pusht # via gym-pusht
shellingham==1.5.4
# via typer
six==1.17.0 six==1.17.0
# via # via
# pynput # pynput
# python-dateutil # python-dateutil
smmap==5.0.3 smmap==5.0.2
# via gitdb # via gitdb
sniffio==1.3.1
# via anyio
stack-data==0.6.3 stack-data==0.6.3
# via ipython # via ipython
starlette==0.52.1 starlette==0.48.0
# via fastapi # via fastapi
sympy==1.14.0 sympy==1.14.0
# via torch # via torch
teleop==0.1.4 teleop==0.1.2
# via lerobot # via lerobot
termcolor==3.3.0 tensorboard==2.20.0
# via lerobot # via robomimic
tifffile==2026.3.3 tensorboard-data-server==0.7.2
# via tensorboard
tensorboardx==2.6.4
# via robomimic
termcolor==3.1.0
# via
# lerobot
# robomimic
thop==0.1.1.post2209072238
# via libero
tifffile==2025.5.10
# via scikit-image # via scikit-image
tokenizers==0.22.2 timm==1.0.20
# via lerobot
tokenizers==0.22.1
# via transformers # via transformers
toml==0.10.2 toml==0.10.2
# via draccus # via draccus
torch==2.10.0 tomli==2.3.0
# via
# cmeel
# coverage
# jupytext
# pytest
torch==2.7.1
# via # via
# accelerate # accelerate
# lerobot # lerobot
# peft # peft
# torchdiffeq # robomimic
# thop
# timm
# torchvision # torchvision
torchcodec==0.10.0 torchcodec==0.5
# via lerobot # via lerobot
torchdiffeq==0.2.5 torchvision==0.22.1
# via lerobot # via
torchvision==0.25.0 # lerobot
# via lerobot # robomimic
tornado==6.5.4 # timm
tornado==6.5.2
# via meshcat # via meshcat
tqdm==4.67.3 tqdm==4.67.1
# via # via
# datasets # datasets
# dm-control # dm-control
# huggingface-hub # huggingface-hub
# peft # peft
# robomimic
# transformers # transformers
traitlets==5.14.3 traitlets==5.14.3
# via # via
# ipython # ipython
# jupyter-core
# matplotlib-inline # matplotlib-inline
transformers==5.3.0 # nbformat
transformers==4.57.1
# via # via
# lerobot # lerobot
# libero
# peft # peft
transforms3d==0.4.2 transforms3d==0.4.2
# via teleop # via teleop
typer==0.24.1
# via
# huggingface-hub
# transformers
typing-extensions==4.15.0 typing-extensions==4.15.0
# via # via
# aiosignal # aiosignal
# anyio # anyio
# etils # etils
# faker # exceptiongroup
# fastapi # fastapi
# gymnasium # gymnasium
# huggingface-hub # huggingface-hub
# mypy # ipython
# multidict
# pydantic # pydantic
# pydantic-core # pydantic-core
# referencing
# rerun-sdk # rerun-sdk
# starlette # starlette
# torch # torch
# typing-inspect # typing-inspect
# typing-inspection # typing-inspection
# uvicorn
# virtualenv
# wandb # wandb
typing-inspect==0.9.0 typing-inspect==0.9.0
# via draccus # via draccus
typing-inspection==0.4.2 typing-inspection==0.4.2
# via # via pydantic
# fastapi tzdata==2025.2
# pydantic
tzdata==2025.3
# via pandas # via pandas
u-msgpack-python==2.8.0 u-msgpack-python==2.8.0
# via meshcat # via meshcat
urllib3==2.6.3 urllib3==2.5.0
# via # via
# requests # requests
# sentry-sdk # sentry-sdk
uvicorn[standard]==0.41.0 uvicorn[standard]==0.38.0
# via teleop # via teleop
uvloop==0.22.1 uvloop==0.22.1
# via uvicorn # via uvicorn
virtualenv==21.1.0 virtualenv==20.35.3
# via pre-commit # via pre-commit
wandb==0.24.2 wandb==0.21.4
# via lerobot # via
# lerobot
# libero
watchfiles==1.1.1 watchfiles==1.1.1
# via uvicorn # via uvicorn
wcwidth==0.6.0 wcwidth==0.2.14
# via prompt-toolkit # via prompt-toolkit
websocket-client==1.9.0 websocket-client==1.9.0
# via teleop # via teleop
websockets==16.0 websockets==15.0.1
# via uvicorn # via uvicorn
wrapt==2.1.2 werkzeug==3.1.3
# via tensorboard
wrapt==2.0.0
# via dm-tree # via dm-tree
xxhash==3.6.0 xxhash==3.6.0
# via datasets # via datasets
yarl==1.23.0 yarl==1.22.0
# via aiohttp # via aiohttp
zipp==3.23.0 zipp==3.23.0
# via # via

View File

@@ -1,12 +1,12 @@
# #
# This file is autogenerated by pip-compile with Python 3.12 # This file is autogenerated by pip-compile with Python 3.10
# by the following command: # by the following command:
# #
# pip-compile --output-file=requirements-ubuntu.txt requirements.in # pip-compile --output-file=requirements-ubuntu.txt requirements.in
# #
-e .[all] -e .[all]
# via -[all] # via -[all]
absl-py==2.4.0 absl-py==2.3.1
# via # via
# dm-control # dm-control
# dm-env # dm-env
@@ -14,33 +14,30 @@ absl-py==2.4.0
# labmaze # labmaze
# mujoco # mujoco
# tensorboard # tensorboard
accelerate==1.13.0 accelerate==1.11.0
# via # via
# lerobot # lerobot
# peft # peft
aiohappyeyeballs==2.6.1 aiohappyeyeballs==2.6.1
# via aiohttp # via aiohttp
aiohttp==3.13.3 aiohttp==3.13.1
# via fsspec # via fsspec
aiosignal==1.4.0 aiosignal==1.4.0
# via aiohttp # via aiohttp
annotated-doc==0.0.4
# via
# fastapi
# typer
annotated-types==0.7.0 annotated-types==0.7.0
# via pydantic # via pydantic
antlr4-python3-runtime==4.9.3 antlr4-python3-runtime==4.9.3
# via # via
# hydra-core # hydra-core
# omegaconf # omegaconf
anyio==4.12.1 anyio==4.11.0
# via # via
# httpx
# starlette # starlette
# watchfiles # watchfiles
asttokens==3.0.1 asttokens==3.0.0
# via stack-data # via stack-data
async-timeout==5.0.1
# via aiohttp
attrs==25.4.0 attrs==25.4.0
# via # via
# aiohttp # aiohttp
@@ -50,35 +47,30 @@ attrs==25.4.0
# referencing # referencing
# rerun-sdk # rerun-sdk
av==15.1.0 av==15.1.0
# via # via lerobot
# lerobot
# qwen-vl-utils
bddl==1.0.1 bddl==1.0.1
# via hf-libero # via libero
certifi==2026.2.25 certifi==2025.10.5
# via # via
# httpcore
# httpx
# requests # requests
# sentry-sdk # sentry-sdk
cffi==2.0.0 cffi==2.0.0
# via pymunk # via pymunk
cfgv==3.5.0 cfgv==3.4.0
# via pre-commit # via pre-commit
charset-normalizer==3.4.5 charset-normalizer==3.4.4
# via requests # via requests
click==8.3.1 click==8.3.0
# via # via
# typer
# uvicorn # uvicorn
# wandb # wandb
cloudpickle==3.1.2 cloudpickle==3.1.1
# via # via
# gymnasium # gymnasium
# hf-libero # libero
cmake==4.1.3 cmake==4.1.0
# via lerobot # via lerobot
cmeel==0.59.0 cmeel==0.57.3
# via # via
# cmeel-assimp # cmeel-assimp
# cmeel-boost # cmeel-boost
@@ -116,24 +108,20 @@ cmeel-zlib==1.3.1
# via cmeel-assimp # via cmeel-assimp
coal-library==3.0.1 coal-library==3.0.1
# via pin # via pin
contourpy==1.3.3 contourpy==1.3.2
# via # via matplotlib
# lerobot coverage[toml]==7.11.0
# matplotlib
coverage[toml]==7.13.4
# via pytest-cov # via pytest-cov
cuda-bindings==12.9.4
# via torch
cuda-pathfinder==1.4.1
# via cuda-bindings
cycler==0.12.1 cycler==0.12.1
# via matplotlib # via matplotlib
datasets==4.6.1 datasets==4.1.1
# via lerobot # via lerobot
debugpy==1.8.20 debugpy==1.8.17
# via lerobot # via lerobot
decorator==5.2.1 decorator==5.2.1
# via ipython # via ipython
decord==0.6.0
# via lerobot
deepdiff==8.6.1 deepdiff==8.6.1
# via lerobot # via lerobot
diffusers==0.35.2 diffusers==0.35.2
@@ -144,7 +132,7 @@ dill==0.4.0
# multiprocess # multiprocess
distlib==0.4.0 distlib==0.4.0
# via virtualenv # via virtualenv
dm-control==1.0.37 dm-control==1.0.34
# via gym-aloha # via gym-aloha
dm-env==1.6 dm-env==1.6
# via dm-control # via dm-control
@@ -152,6 +140,7 @@ dm-tree==0.1.9
# via # via
# dm-control # dm-control
# dm-env # dm-env
# lerobot
docopt==0.6.2 docopt==0.6.2
# via num2words # via num2words
draccus==0.10.0 draccus==0.10.0
@@ -159,60 +148,66 @@ draccus==0.10.0
dynamixel-sdk==3.8.4 dynamixel-sdk==3.8.4
# via lerobot # via lerobot
easydict==1.13 easydict==1.13
# via hf-libero # via libero
egl-probe==1.0.2 egl-probe @ git+https://github.com/huggingface/egl_probe.git
# via robomimic # via
# libero
# robomimic
eigenpy==3.10.3 eigenpy==3.10.3
# via coal-library # via coal-library
einops==0.8.2 einops==0.8.1
# via # via
# hf-libero # flash-attn
# lerobot # lerobot
# libero
eiquadprog==1.2.9 eiquadprog==1.2.9
# via placo # via placo
etils[epath,epy]==1.14.0 etils[epath,epy]==1.13.0
# via mujoco # via mujoco
evdev==1.9.3 evdev==1.9.2
# via pynput # via pynput
exceptiongroup==1.3.0
# via
# anyio
# ipython
# pytest
executing==2.2.1 executing==2.2.1
# via stack-data # via stack-data
faker==34.0.2
# via lerobot
farama-notifications==0.0.4 farama-notifications==0.0.4
# via gymnasium # via gymnasium
fastapi==0.135.1 fastapi==0.119.1
# via # via teleop
# lerobot
# teleop
fastjsonschema==2.21.2 fastjsonschema==2.21.2
# via nbformat # via nbformat
feetech-servo-sdk==1.0.0 feetech-servo-sdk==1.0.0
# via lerobot # via lerobot
filelock==3.25.0 filelock==3.20.0
# via # via
# datasets # datasets
# diffusers # diffusers
# huggingface-hub # huggingface-hub
# python-discovery
# torch # torch
# transformers
# virtualenv # virtualenv
fonttools==4.61.1 flash-attn==2.8.3
# via lerobot
fonttools==4.60.1
# via matplotlib # via matplotlib
frozenlist==1.8.0 frozenlist==1.8.0
# via # via
# aiohttp # aiohttp
# aiosignal # aiosignal
fsspec[http]==2026.2.0 fsspec[http]==2025.9.0
# via # via
# datasets # datasets
# etils # etils
# huggingface-hub # huggingface-hub
# torch # torch
future==1.0.0 future==1.0.0
# via hf-libero # via libero
gitdb==4.0.12 gitdb==4.0.12
# via gitpython # via gitpython
gitpython==3.1.46 gitpython==3.1.45
# via wandb # via wandb
glfw==2.10.0 glfw==2.10.0
# via # via
@@ -235,60 +230,50 @@ gym-hil==0.1.13
# via lerobot # via lerobot
gym-pusht==0.1.6 gym-pusht==0.1.6
# via lerobot # via lerobot
gymnasium==1.2.3 gymnasium==1.2.1
# via # via
# gym-aloha # gym-aloha
# gym-hil # gym-hil
# gym-pusht # gym-pusht
# hf-libero
# lerobot # lerobot
# libero
# metaworld # metaworld
h11==0.16.0 h11==0.16.0
# via # via uvicorn
# httpcore h5py==3.15.1
# uvicorn
h5py==3.16.0
# via robomimic # via robomimic
hebi-py==2.11.0 hebi-py==2.11.0
# via lerobot # via lerobot
hf-egl-probe==1.0.2 hf-transfer==0.1.9
# via hf-libero # via huggingface-hub
hf-libero==0.1.3 hf-xet==1.1.10
# via lerobot
hf-xet==1.3.2
# via huggingface-hub # via huggingface-hub
hidapi==0.14.0.post4 hidapi==0.14.0.post4
# via # via
# gym-hil # gym-hil
# lerobot # lerobot
httpcore==1.0.9
# via httpx
httptools==0.7.1 httptools==0.7.1
# via uvicorn # via uvicorn
httpx==0.28.1 huggingface-hub[cli,hf-transfer]==0.35.3
# via
# datasets
# huggingface-hub
huggingface-hub==1.6.0
# via # via
# accelerate # accelerate
# datasets # datasets
# diffusers # diffusers
# lerobot # lerobot
# peft # peft
# timm
# tokenizers # tokenizers
# transformers # transformers
hydra-core==1.3.2 hydra-core==1.3.2
# via hf-libero # via libero
identify==2.6.17 identify==2.6.15
# via pre-commit # via pre-commit
idna==3.11 idna==3.11
# via # via
# anyio # anyio
# httpx
# requests # requests
# yarl # yarl
imageio[ffmpeg]==2.37.2 imageio[ffmpeg]==2.37.0
# via # via
# gym-aloha # gym-aloha
# gym-hil # gym-hil
@@ -300,14 +285,16 @@ imageio-ffmpeg==0.6.0
# via # via
# imageio # imageio
# robomimic # robomimic
importlib-metadata==8.7.1 importlib-metadata==8.7.0
# via diffusers # via diffusers
importlib-resources==6.5.2
# via etils
iniconfig==2.3.0 iniconfig==2.3.0
# via pytest # via pytest
ipython==9.11.0 inquirerpy==0.3.4
# via huggingface-hub
ipython==8.37.0
# via meshcat # via meshcat
ipython-pygments-lexers==1.1.1
# via ipython
ischedule==1.2.7 ischedule==1.2.7
# via placo # via placo
jedi==0.19.2 jedi==0.19.2
@@ -316,41 +303,40 @@ jinja2==3.1.6
# via torch # via torch
jsonlines==4.0.0 jsonlines==4.0.0
# via lerobot # via lerobot
jsonschema==4.26.0 jsonschema==4.25.1
# via nbformat # via nbformat
jsonschema-specifications==2025.9.1 jsonschema-specifications==2025.9.1
# via jsonschema # via jsonschema
jupyter-core==5.9.1 jupyter-core==5.9.1
# via nbformat # via nbformat
jupytext==1.19.1 jupytext==1.18.1
# via bddl # via bddl
kiwisolver==1.4.9 kiwisolver==1.4.9
# via matplotlib # via matplotlib
labmaze==1.0.6 labmaze==1.0.6
# via dm-control # via dm-control
lazy-loader==0.5 lazy-loader==0.4
# via scikit-image # via scikit-image
librt==0.8.1 libero @ git+https://github.com/huggingface/lerobot-libero.git@main
# via mypy # via lerobot
llvmlite==0.46.0 llvmlite==0.45.1
# via numba # via numba
lxml==6.0.2 lxml==6.0.2
# via dm-control # via dm-control
markdown==3.10.2 markdown==3.9
# via tensorboard # via tensorboard
markdown-it-py==4.0.0 markdown-it-py==4.0.0
# via # via
# jupytext # jupytext
# mdit-py-plugins # mdit-py-plugins
# rich
markupsafe==3.0.3 markupsafe==3.0.3
# via # via
# jinja2 # jinja2
# werkzeug # werkzeug
matplotlib==3.10.8 matplotlib==3.10.7
# via # via
# hf-libero
# lerobot # lerobot
# libero
matplotlib-inline==0.2.1 matplotlib-inline==0.2.1
# via ipython # via ipython
mdit-py-plugins==0.5.0 mdit-py-plugins==0.5.0
@@ -367,38 +353,36 @@ mock-serial==0.0.1
# via lerobot # via lerobot
mpmath==1.3.0 mpmath==1.3.0
# via sympy # via sympy
mujoco==3.5.0 mujoco==3.3.7
# via # via
# dm-control # dm-control
# gym-aloha # gym-aloha
# gym-hil # gym-hil
# hf-libero # libero
# metaworld # metaworld
# robosuite # robosuite
multidict==6.7.1 multidict==6.7.0
# via # via
# aiohttp # aiohttp
# yarl # yarl
multiprocess==0.70.18 multiprocess==0.70.16
# via datasets # via datasets
mypy==1.19.1
# via lerobot
mypy-extensions==1.1.0 mypy-extensions==1.1.0
# via # via typing-inspect
# mypy
# typing-inspect
nbformat==5.10.4 nbformat==5.10.4
# via jupytext # via jupytext
networkx==3.6.1 networkx==3.4.2
# via # via
# bddl # bddl
# scikit-image # scikit-image
# torch # torch
nodeenv==1.10.0 ninja==1.13.0
# via lerobot
nodeenv==1.9.1
# via pre-commit # via pre-commit
num2words==0.5.14 num2words==0.5.14
# via lerobot # via lerobot
numba==0.64.0 numba==0.62.1
# via robosuite # via robosuite
numpy==2.2.6 numpy==2.2.6
# via # via
@@ -407,6 +391,7 @@ numpy==2.2.6
# cmeel-boost # cmeel-boost
# contourpy # contourpy
# datasets # datasets
# decord
# diffusers # diffusers
# dm-control # dm-control
# dm-env # dm-env
@@ -414,10 +399,9 @@ numpy==2.2.6
# gymnasium # gymnasium
# h5py # h5py
# hebi-py # hebi-py
# hf-libero
# imageio # imageio
# labmaze # labmaze
# lerobot # libero
# matplotlib # matplotlib
# meshcat # meshcat
# metaworld # metaworld
@@ -442,51 +426,49 @@ numpy==2.2.6
# torchvision # torchvision
# transformers # transformers
# transforms3d # transforms3d
nvidia-cublas-cu12==12.8.4.1 nvidia-cublas-cu12==12.6.4.1
# via # via
# nvidia-cudnn-cu12 # nvidia-cudnn-cu12
# nvidia-cusolver-cu12 # nvidia-cusolver-cu12
# torch # torch
nvidia-cuda-cupti-cu12==12.8.90 nvidia-cuda-cupti-cu12==12.6.80
# via torch # via torch
nvidia-cuda-nvrtc-cu12==12.8.93 nvidia-cuda-nvrtc-cu12==12.6.77
# via torch # via torch
nvidia-cuda-runtime-cu12==12.8.90 nvidia-cuda-runtime-cu12==12.6.77
# via torch # via torch
nvidia-cudnn-cu12==9.10.2.21 nvidia-cudnn-cu12==9.5.1.17
# via torch # via torch
nvidia-cufft-cu12==11.3.3.83 nvidia-cufft-cu12==11.3.0.4
# via torch # via torch
nvidia-cufile-cu12==1.13.1.3 nvidia-cufile-cu12==1.11.1.6
# via torch # via torch
nvidia-curand-cu12==10.3.9.90 nvidia-curand-cu12==10.3.7.77
# via torch # via torch
nvidia-cusolver-cu12==11.7.3.90 nvidia-cusolver-cu12==11.7.1.2
# via torch # via torch
nvidia-cusparse-cu12==12.5.8.93 nvidia-cusparse-cu12==12.5.4.2
# via # via
# nvidia-cusolver-cu12 # nvidia-cusolver-cu12
# torch # torch
nvidia-cusparselt-cu12==0.7.1 nvidia-cusparselt-cu12==0.6.3
# via torch # via torch
nvidia-nccl-cu12==2.27.5 nvidia-nccl-cu12==2.26.2
# via torch # via torch
nvidia-nvjitlink-cu12==12.8.93 nvidia-nvjitlink-cu12==12.6.85
# via # via
# nvidia-cufft-cu12 # nvidia-cufft-cu12
# nvidia-cusolver-cu12 # nvidia-cusolver-cu12
# nvidia-cusparse-cu12 # nvidia-cusparse-cu12
# torch # torch
nvidia-nvshmem-cu12==3.4.5 nvidia-nvtx-cu12==12.6.77
# via torch
nvidia-nvtx-cu12==12.8.90
# via torch # via torch
omegaconf==2.3.0 omegaconf==2.3.0
# via hydra-core # via hydra-core
opencv-python==4.13.0.92 opencv-python==4.12.0.88
# via # via
# gym-pusht # gym-pusht
# hf-libero # libero
# reachy2-sdk # reachy2-sdk
# robosuite # robosuite
opencv-python-headless==4.12.0.88 opencv-python-headless==4.12.0.88
@@ -505,7 +487,6 @@ packaging==25.0
# matplotlib # matplotlib
# peft # peft
# pytest # pytest
# qwen-vl-utils
# reachy2-sdk # reachy2-sdk
# scikit-image # scikit-image
# tensorboard # tensorboard
@@ -516,21 +497,21 @@ pandas==2.3.3
# via # via
# datasets # datasets
# lerobot # lerobot
parso==0.8.6 parso==0.8.5
# via jedi # via jedi
pathspec==1.0.4 peft==0.17.1
# via mypy
peft==0.18.1
# via lerobot # via lerobot
pexpect==4.9.0 pexpect==4.9.0
# via ipython # via ipython
pillow==12.1.1 pfzy==0.3.4
# via inquirerpy
pillow==12.0.0
# via # via
# diffusers # diffusers
# imageio # imageio
# lerobot
# matplotlib # matplotlib
# meshcat # meshcat
# qwen-vl-utils
# rerun-sdk # rerun-sdk
# robosuite # robosuite
# scikit-image # scikit-image
@@ -538,27 +519,28 @@ pillow==12.1.1
# torchvision # torchvision
pin==3.4.0 pin==3.4.0
# via placo # via placo
placo==0.9.16 placo==0.9.14
# via lerobot # via lerobot
platformdirs==4.9.4 platformdirs==4.5.0
# via # via
# jupyter-core # jupyter-core
# python-discovery
# virtualenv # virtualenv
# wandb # wandb
pluggy==1.6.0 pluggy==1.6.0
# via # via
# pytest # pytest
# pytest-cov # pytest-cov
pre-commit==4.5.1 pre-commit==4.3.0
# via lerobot # via lerobot
prompt-toolkit==3.0.52 prompt-toolkit==3.0.52
# via ipython # via
# inquirerpy
# ipython
propcache==0.4.1 propcache==0.4.1
# via # via
# aiohttp # aiohttp
# yarl # yarl
protobuf==6.31.1 protobuf==6.31.0
# via # via
# dm-control # dm-control
# grpcio-tools # grpcio-tools
@@ -568,7 +550,7 @@ protobuf==6.31.1
# tensorboard # tensorboard
# tensorboardx # tensorboardx
# wandb # wandb
psutil==7.2.2 psutil==7.1.1
# via # via
# accelerate # accelerate
# imageio # imageio
@@ -578,17 +560,17 @@ ptyprocess==0.7.0
# via pexpect # via pexpect
pure-eval==0.2.3 pure-eval==0.2.3
# via stack-data # via stack-data
pyarrow==23.0.1 pyarrow==21.0.0
# via # via
# datasets # datasets
# rerun-sdk # rerun-sdk
pycparser==3.0 pycparser==2.23
# via cffi # via cffi
pydantic==2.12.5 pydantic==2.12.3
# via # via
# fastapi # fastapi
# wandb # wandb
pydantic-core==2.41.5 pydantic-core==2.41.4
# via pydantic # via pydantic
pygame==2.6.1 pygame==2.6.1
# via # via
@@ -598,14 +580,12 @@ pygame==2.6.1
pygments==2.19.2 pygments==2.19.2
# via # via
# ipython # ipython
# ipython-pygments-lexers
# pytest # pytest
# rich
pymunk==6.11.1 pymunk==6.11.1
# via # via
# gym-pusht # gym-pusht
# lerobot # lerobot
pyngrok==7.5.1 pyngrok==7.4.1
# via meshcat # via meshcat
pynput==1.8.1 pynput==1.8.1
# via # via
@@ -615,7 +595,7 @@ pyopengl==3.1.10
# via # via
# dm-control # dm-control
# mujoco # mujoco
pyparsing==3.3.2 pyparsing==3.2.5
# via # via
# dm-control # dm-control
# matplotlib # matplotlib
@@ -641,16 +621,13 @@ pytest-timeout==2.4.0
# via lerobot # via lerobot
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via # via
# faker
# matplotlib # matplotlib
# pandas # pandas
python-discovery==1.1.1 python-dotenv==1.1.1
# via virtualenv
python-dotenv==1.2.2
# via uvicorn # via uvicorn
python-xlib==0.33 python-xlib==0.33
# via pynput # via pynput
pytz==2026.1.post1 pytz==2025.2
# via pandas # via pandas
pyyaml==6.0.3 pyyaml==6.0.3
# via # via
@@ -665,6 +642,7 @@ pyyaml==6.0.3
# pre-commit # pre-commit
# pyngrok # pyngrok
# pyyaml-include # pyyaml-include
# timm
# transformers # transformers
# uvicorn # uvicorn
# wandb # wandb
@@ -674,9 +652,7 @@ pyzmq==27.1.0
# via # via
# lerobot # lerobot
# meshcat # meshcat
qwen-vl-utils==0.0.14 reachy2-sdk==1.0.14
# via lerobot
reachy2-sdk==1.0.15
# via lerobot # via lerobot
reachy2-sdk-api==1.0.21 reachy2-sdk-api==1.0.21
# via reachy2-sdk # via reachy2-sdk
@@ -684,7 +660,7 @@ referencing==0.37.0
# via # via
# jsonschema # jsonschema
# jsonschema-specifications # jsonschema-specifications
regex==2026.2.28 regex==2025.10.23
# via # via
# diffusers # diffusers
# transformers # transformers
@@ -693,62 +669,60 @@ requests==2.32.5
# datasets # datasets
# diffusers # diffusers
# dm-control # dm-control
# qwen-vl-utils # huggingface-hub
# teleop # teleop
# transformers
# wandb # wandb
rerun-sdk==0.26.2 rerun-sdk==0.26.1
# via lerobot # via lerobot
rhoban-cmeel-jsoncpp==1.9.4.9 rhoban-cmeel-jsoncpp==1.9.4.9
# via placo # via placo
rich==14.3.3
# via typer
robomimic==0.2.0 robomimic==0.2.0
# via hf-libero # via libero
robosuite==1.4.0 robosuite==1.4.0
# via hf-libero # via libero
rpds-py==0.30.0 rpds-py==0.28.0
# via # via
# jsonschema # jsonschema
# referencing # referencing
safetensors==0.7.0 safetensors==0.6.2
# via # via
# accelerate # accelerate
# diffusers # diffusers
# lerobot # lerobot
# peft # peft
# timm
# transformers # transformers
scikit-image==0.25.2 scikit-image==0.25.2
# via # via
# gym-pusht # gym-pusht
# lerobot # lerobot
scipy==1.17.1 scipy==1.15.3
# via # via
# dm-control # dm-control
# lerobot
# metaworld # metaworld
# robosuite # robosuite
# scikit-image # scikit-image
# torchdiffeq sentry-sdk==2.42.1
sentry-sdk==2.54.0
# via wandb # via wandb
shapely==2.1.2 shapely==2.1.2
# via gym-pusht # via gym-pusht
shellingham==1.5.4
# via typer
six==1.17.0 six==1.17.0
# via # via
# pynput # pynput
# python-dateutil # python-dateutil
# python-xlib # python-xlib
smmap==5.0.3 smmap==5.0.2
# via gitdb # via gitdb
sniffio==1.3.1
# via anyio
stack-data==0.6.3 stack-data==0.6.3
# via ipython # via ipython
starlette==0.52.1 starlette==0.48.0
# via fastapi # via fastapi
sympy==1.14.0 sympy==1.14.0
# via torch # via torch
teleop==0.1.4 teleop==0.1.2
# via lerobot # via lerobot
tensorboard==2.20.0 tensorboard==2.20.0
# via robomimic # via robomimic
@@ -756,38 +730,46 @@ tensorboard-data-server==0.7.2
# via tensorboard # via tensorboard
tensorboardx==2.6.4 tensorboardx==2.6.4
# via robomimic # via robomimic
termcolor==3.3.0 termcolor==3.1.0
# via # via
# lerobot # lerobot
# robomimic # robomimic
thop==0.1.1.post2209072238 thop==0.1.1.post2209072238
# via hf-libero # via libero
tifffile==2026.3.3 tifffile==2025.5.10
# via scikit-image # via scikit-image
tokenizers==0.22.2 timm==1.0.20
# via lerobot
tokenizers==0.22.1
# via transformers # via transformers
toml==0.10.2 toml==0.10.2
# via draccus # via draccus
torch==2.10.0 tomli==2.3.0
# via
# cmeel
# coverage
# jupytext
# pytest
torch==2.7.1
# via # via
# accelerate # accelerate
# flash-attn
# lerobot # lerobot
# peft # peft
# robomimic # robomimic
# thop # thop
# torchdiffeq # timm
# torchvision # torchvision
torchcodec==0.10.0 torchcodec==0.5
# via lerobot # via lerobot
torchdiffeq==0.2.5 torchvision==0.22.1
# via lerobot
torchvision==0.25.0
# via # via
# lerobot # lerobot
# robomimic # robomimic
tornado==6.5.4 # timm
tornado==6.5.2
# via meshcat # via meshcat
tqdm==4.67.3 tqdm==4.67.1
# via # via
# datasets # datasets
# dm-control # dm-control
@@ -801,29 +783,26 @@ traitlets==5.14.3
# jupyter-core # jupyter-core
# matplotlib-inline # matplotlib-inline
# nbformat # nbformat
transformers==5.3.0 transformers==4.57.1
# via # via
# hf-libero
# lerobot # lerobot
# libero
# peft # peft
transforms3d==0.4.2 transforms3d==0.4.2
# via teleop # via teleop
triton==3.6.0 triton==3.3.1
# via torch # via torch
typer==0.24.1
# via
# huggingface-hub
# transformers
typing-extensions==4.15.0 typing-extensions==4.15.0
# via # via
# aiosignal # aiosignal
# anyio # anyio
# etils # etils
# faker # exceptiongroup
# fastapi # fastapi
# gymnasium # gymnasium
# huggingface-hub # huggingface-hub
# mypy # ipython
# multidict
# pydantic # pydantic
# pydantic-core # pydantic-core
# referencing # referencing
@@ -832,46 +811,46 @@ typing-extensions==4.15.0
# torch # torch
# typing-inspect # typing-inspect
# typing-inspection # typing-inspection
# uvicorn
# virtualenv
# wandb # wandb
typing-inspect==0.9.0 typing-inspect==0.9.0
# via draccus # via draccus
typing-inspection==0.4.2 typing-inspection==0.4.2
# via # via pydantic
# fastapi tzdata==2025.2
# pydantic
tzdata==2025.3
# via pandas # via pandas
u-msgpack-python==2.8.0 u-msgpack-python==2.8.0
# via meshcat # via meshcat
urllib3==2.6.3 urllib3==2.5.0
# via # via
# requests # requests
# sentry-sdk # sentry-sdk
uvicorn[standard]==0.41.0 uvicorn[standard]==0.38.0
# via teleop # via teleop
uvloop==0.22.1 uvloop==0.22.1
# via uvicorn # via uvicorn
virtualenv==21.1.0 virtualenv==20.35.3
# via pre-commit # via pre-commit
wandb==0.24.2 wandb==0.21.4
# via # via
# hf-libero
# lerobot # lerobot
# libero
watchfiles==1.1.1 watchfiles==1.1.1
# via uvicorn # via uvicorn
wcwidth==0.6.0 wcwidth==0.2.14
# via prompt-toolkit # via prompt-toolkit
websocket-client==1.9.0 websocket-client==1.9.0
# via teleop # via teleop
websockets==16.0 websockets==15.0.1
# via uvicorn # via uvicorn
werkzeug==3.1.6 werkzeug==3.1.3
# via tensorboard # via tensorboard
wrapt==2.1.2 wrapt==2.0.0
# via dm-tree # via dm-tree
xxhash==3.6.0 xxhash==3.6.0
# via datasets # via datasets
yarl==1.23.0 yarl==1.22.0
# via aiohttp # via aiohttp
zipp==3.23.0 zipp==3.23.0
# via # via

View File

@@ -1,9 +1,9 @@
# requirements.in # requirements.in
# requirements-macos.txt was generated on macOS and is platform-specific (macOS 26.3.1 25D2128 arm64). # requirements-macos.txt was generated on macOS and is platform-specific (macOS 26.0.1 25A362 arm64).
# Darwin MacBook-Pro.local 25.3.0 Darwin Kernel Version 25.3.0: Wed Jan 28 20:54:55 PST 2026; root:xnu-12377.91.3~2/RELEASE_ARM64_T8132 arm64 # Darwin MacBook-Pro.local 25.0.0 Darwin Kernel Version 25.0.0: Wed Sep 17 21:42:08 PDT 2025; root:xnu-12377.1.9~141/RELEASE_ARM64_T8132 arm64
# requirements-ubuntu.txt was generated on Linux and is platform-specific (Ubuntu 24.04.4 LTS x86_64). # requirements-ubuntu.txt was generated on Linux and is platform-specific (Ubuntu 24.04.3 LTS x86_64).
# Linux lerobot-linux 6.17.0-14-generic #14~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Jan 15 15:52:10 UTC 2 x86_64 x86_64 x86_64 GNU/Linux # Linux mlerobot-linux 6.14.0-33-generic #33~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 19 17:02:30 UTC 2 x86_64 x86_64 x86_64 GNU/Linux
-e .[all] -e .[all]

View File

@@ -23,7 +23,7 @@ from typing import Any
import torch import torch
from lerobot.configs.types import PolicyFeature from lerobot.configs.types import PolicyFeature
from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features
# NOTE: Configs need to be loaded for the client to be able to instantiate the policy config # NOTE: Configs need to be loaded for the client to be able to instantiate the policy config
from lerobot.policies import ( # noqa: F401 from lerobot.policies import ( # noqa: F401

View File

@@ -39,13 +39,15 @@ import grpc
import torch import torch
from lerobot.policies.factory import get_policy_class, make_pre_post_processors from lerobot.policies.factory import get_policy_class, make_pre_post_processors
from lerobot.processor import PolicyProcessorPipeline from lerobot.processor import (
PolicyAction,
PolicyProcessorPipeline,
)
from lerobot.transport import ( from lerobot.transport import (
services_pb2, # type: ignore services_pb2, # type: ignore
services_pb2_grpc, # type: ignore services_pb2_grpc, # type: ignore
) )
from lerobot.transport.utils import receive_bytes_in_chunks from lerobot.transport.utils import receive_bytes_in_chunks
from lerobot.types import PolicyAction
from .configs import PolicyServerConfig from .configs import PolicyServerConfig
from .constants import SUPPORTED_POLICIES from .constants import SUPPORTED_POLICIES

View File

@@ -63,9 +63,9 @@ from lerobot.transport import (
services_pb2_grpc, # type: ignore services_pb2_grpc, # type: ignore
) )
from lerobot.transport.utils import grpc_channel_options, send_bytes_in_chunks from lerobot.transport.utils import grpc_channel_options, send_bytes_in_chunks
from lerobot.utils.import_utils import register_third_party_plugins
from .configs import RobotClientConfig from .configs import RobotClientConfig
from .constants import SUPPORTED_ROBOTS
from .helpers import ( from .helpers import (
Action, Action,
FPSTracker, FPSTracker,
@@ -485,9 +485,8 @@ class RobotClient:
def async_client(cfg: RobotClientConfig): def async_client(cfg: RobotClientConfig):
logging.info(pformat(asdict(cfg))) logging.info(pformat(asdict(cfg)))
# TODO: Assert if checking robot support is still needed with the plugin system if cfg.robot.type not in SUPPORTED_ROBOTS:
# if cfg.robot.type not in SUPPORTED_ROBOTS: raise ValueError(f"Robot {cfg.robot.type} not yet supported!")
# raise ValueError(f"Robot {cfg.robot.type} not yet supported!")
client = RobotClient(cfg) client = RobotClient(cfg)
@@ -513,5 +512,4 @@ def async_client(cfg: RobotClientConfig):
if __name__ == "__main__": if __name__ == "__main__":
register_third_party_plugins()
async_client() # run the client async_client() # run the client

View File

@@ -13,5 +13,5 @@
# limitations under the License. # limitations under the License.
from .camera import Camera from .camera import Camera
from .configs import CameraConfig, ColorMode, Cv2Backends, Cv2Rotation from .configs import CameraConfig, ColorMode, Cv2Rotation
from .utils import make_cameras_from_configs from .utils import make_cameras_from_configs

View File

@@ -15,12 +15,11 @@
# limitations under the License. # limitations under the License.
import abc import abc
import warnings
from typing import Any from typing import Any
from numpy.typing import NDArray # type: ignore # TODO: add type stubs for numpy.typing from numpy.typing import NDArray # type: ignore # TODO: add type stubs for numpy.typing
from .configs import CameraConfig from .configs import CameraConfig, ColorMode
class Camera(abc.ABC): class Camera(abc.ABC):
@@ -31,12 +30,20 @@ class Camera(abc.ABC):
Manages basic camera properties (FPS, resolution) and core operations: Manages basic camera properties (FPS, resolution) and core operations:
- Connection/disconnection - Connection/disconnection
- Frame capture (sync/async/latest) - Frame capture (sync/async)
Attributes: Attributes:
fps (int | None): Configured frames per second fps (int | None): Configured frames per second
width (int | None): Frame width in pixels width (int | None): Frame width in pixels
height (int | None): Frame height in pixels height (int | None): Frame height in pixels
Example:
class MyCamera(Camera):
def __init__(self, config): ...
@property
def is_connected(self) -> bool: ...
def connect(self, warmup=True): ...
# Plus other required methods
""" """
def __init__(self, config: CameraConfig): def __init__(self, config: CameraConfig):
@@ -49,32 +56,6 @@ class Camera(abc.ABC):
self.width: int | None = config.width self.width: int | None = config.width
self.height: int | None = config.height self.height: int | None = config.height
def __enter__(self):
"""
Context manager entry.
Automatically connects to the camera.
"""
self.connect()
return self
def __exit__(self, exc_type, exc_value, traceback) -> None:
"""
Context manager exit.
Automatically disconnects, ensuring resources are released even on error.
"""
self.disconnect()
def __del__(self) -> None:
"""
Destructor safety net.
Attempts to disconnect if the object is garbage collected without cleanup.
"""
try:
if self.is_connected:
self.disconnect()
except Exception: # nosec B110
pass
@property @property
@abc.abstractmethod @abc.abstractmethod
def is_connected(self) -> bool: def is_connected(self) -> bool:
@@ -108,10 +89,12 @@ class Camera(abc.ABC):
pass pass
@abc.abstractmethod @abc.abstractmethod
def read(self) -> NDArray[Any]: def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
"""Capture and return a single frame from the camera synchronously. """Capture and return a single frame from the camera.
This is a blocking call that will wait for the hardware and its SDK. Args:
color_mode: Desired color mode for the output frame. If None,
uses the camera's default color mode.
Returns: Returns:
np.ndarray: Captured frame as a numpy array. np.ndarray: Captured frame as a numpy array.
@@ -120,64 +103,17 @@ class Camera(abc.ABC):
@abc.abstractmethod @abc.abstractmethod
def async_read(self, timeout_ms: float = ...) -> NDArray[Any]: def async_read(self, timeout_ms: float = ...) -> NDArray[Any]:
"""Return the most recent new frame. """Asynchronously capture and return a single frame from the camera.
This method retrieves the latest frame captured by the background thread.
If a new frame is already available in the buffer (captured since the last call),
it returns it immediately.
It blocks up to `timeout_ms` only if the buffer is empty or if the latest frame
was already consumed by a previous `async_read` call.
Essentially, this method return the latest unconsumed frame, waiting if necessary
for a new one to arrive within the specified timeout.
Usage:
- Ideal for control loops where you want to ensure every processed frame
is fresh, effectively synchronizing your loop to the camera's FPS.
- Causes of a timeout usually include: very low camera FPS, heavy processing load,
or if the camera is disconnected.
Args: Args:
timeout_ms: Maximum time to wait for a new frame in milliseconds. timeout_ms: Maximum time to wait for a frame in milliseconds.
Defaults to 200ms (0.2s). Defaults to implementation-specific timeout.
Returns: Returns:
np.ndarray: Captured frame as a numpy array. np.ndarray: Captured frame as a numpy array.
Raises:
TimeoutError: If no new frame arrives within `timeout_ms`.
""" """
pass pass
def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
"""Return the most recent frame captured immediately (Peeking).
This method is non-blocking and returns whatever is currently in the
memory buffer. The frame may be stale,
meaning it could have been captured a while ago (hanging camera scenario e.g.).
Usage:
Ideal for scenarios requiring zero latency or decoupled frequencies & when
we want a guaranteed frame, such as UI visualization, logging, or
non-critical monitoring.
Returns:
NDArray[Any]: The frame image (numpy array).
Raises:
TimeoutError: If the latest frame is older than `max_age_ms`.
NotConnectedError: If the camera is not connected.
RuntimeError: If the camera is connected but has not captured any frames yet.
"""
warnings.warn(
f"{self.__class__.__name__}.read_latest() is not implemented. "
"Please override read_latest(); it will be required in future releases.",
FutureWarning,
stacklevel=2,
)
return self.async_read()
@abc.abstractmethod @abc.abstractmethod
def disconnect(self) -> None: def disconnect(self) -> None:
"""Disconnect from the camera and release resources.""" """Disconnect from the camera and release resources."""

View File

@@ -25,10 +25,6 @@ class ColorMode(str, Enum):
RGB = "rgb" RGB = "rgb"
BGR = "bgr" BGR = "bgr"
@classmethod
def _missing_(cls, value: object) -> None:
raise ValueError(f"`color_mode` is expected to be in {list(cls)}, but {value} is provided.")
class Cv2Rotation(int, Enum): class Cv2Rotation(int, Enum):
NO_ROTATION = 0 NO_ROTATION = 0
@@ -36,25 +32,6 @@ class Cv2Rotation(int, Enum):
ROTATE_180 = 180 ROTATE_180 = 180
ROTATE_270 = -90 ROTATE_270 = -90
@classmethod
def _missing_(cls, value: object) -> None:
raise ValueError(f"`rotation` is expected to be in {list(cls)}, but {value} is provided.")
# Subset from https://docs.opencv.org/3.4/d4/d15/group__videoio__flags__base.html
class Cv2Backends(int, Enum):
ANY = 0
V4L2 = 200
DSHOW = 700
PVAPI = 800
ANDROID = 1000
AVFOUNDATION = 1200
MSMF = 1400
@classmethod
def _missing_(cls, value: object) -> None:
raise ValueError(f"`backend` is expected to be in {list(cls)}, but {value} is provided.")
@dataclass(kw_only=True) @dataclass(kw_only=True)
class CameraConfig(draccus.ChoiceRegistry, abc.ABC): # type: ignore # TODO: add type stubs for draccus class CameraConfig(draccus.ChoiceRegistry, abc.ABC): # type: ignore # TODO: add type stubs for draccus

View File

@@ -32,11 +32,10 @@ if platform.system() == "Windows" and "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"
os.environ["OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"] = "0" os.environ["OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"] = "0"
import cv2 # type: ignore # TODO: add type stubs for OpenCV import cv2 # type: ignore # TODO: add type stubs for OpenCV
from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected from lerobot.utils.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
from lerobot.utils.errors import DeviceNotConnectedError
from ..camera import Camera from ..camera import Camera
from ..utils import get_cv2_rotation from ..utils import get_cv2_backend, get_cv2_rotation
from .configuration_opencv import ColorMode, OpenCVCameraConfig from .configuration_opencv import ColorMode, OpenCVCameraConfig
# NOTE(Steven): The maximum opencv device index depends on your operating system. For instance, # NOTE(Steven): The maximum opencv device index depends on your operating system. For instance,
@@ -71,24 +70,34 @@ class OpenCVCamera(Camera):
Example: Example:
```python ```python
from lerobot.cameras.opencv import OpenCVCamera from lerobot.cameras.opencv import OpenCVCamera
from lerobot.cameras.configuration_opencv import OpenCVCameraConfig from lerobot.cameras.configuration_opencv import OpenCVCameraConfig, ColorMode, Cv2Rotation
# Basic usage with camera index 0 # Basic usage with camera index 0
config = OpenCVCameraConfig(index_or_path=0) config = OpenCVCameraConfig(index_or_path=0)
camera = OpenCVCamera(config) camera = OpenCVCamera(config)
camera.connect() camera.connect()
# Read 1 frame synchronously (blocking) # Read 1 frame synchronously
color_image = camera.read() color_image = camera.read()
print(color_image.shape)
# Read 1 frame asynchronously (waits for new frame with a timeout) # Read 1 frame asynchronously
async_image = camera.async_read() async_image = camera.async_read()
# Get the latest frame immediately (no wait, returns timestamp)
latest_image, timestamp = camera.read_latest()
# When done, properly disconnect the camera using # When done, properly disconnect the camera using
camera.disconnect() camera.disconnect()
# Example with custom settings
custom_config = OpenCVCameraConfig(
index_or_path='/dev/video0', # Or use an index
fps=30,
width=1280,
height=720,
color_mode=ColorMode.RGB,
rotation=Cv2Rotation.ROTATE_90
)
custom_camera = OpenCVCamera(custom_config)
# ... connect, read, disconnect ...
``` ```
""" """
@@ -114,11 +123,10 @@ class OpenCVCamera(Camera):
self.stop_event: Event | None = None self.stop_event: Event | None = None
self.frame_lock: Lock = Lock() self.frame_lock: Lock = Lock()
self.latest_frame: NDArray[Any] | None = None self.latest_frame: NDArray[Any] | None = None
self.latest_timestamp: float | None = None
self.new_frame_event: Event = Event() self.new_frame_event: Event = Event()
self.rotation: int | None = get_cv2_rotation(config.rotation) self.rotation: int | None = get_cv2_rotation(config.rotation)
self.backend: int = config.backend self.backend: int = get_cv2_backend()
if self.height and self.width: if self.height and self.width:
self.capture_width, self.capture_height = self.width, self.height self.capture_width, self.capture_height = self.width, self.height
@@ -133,23 +141,20 @@ class OpenCVCamera(Camera):
"""Checks if the camera is currently connected and opened.""" """Checks if the camera is currently connected and opened."""
return isinstance(self.videocapture, cv2.VideoCapture) and self.videocapture.isOpened() return isinstance(self.videocapture, cv2.VideoCapture) and self.videocapture.isOpened()
@check_if_already_connected
def connect(self, warmup: bool = True) -> None: def connect(self, warmup: bool = True) -> None:
""" """
Connects to the OpenCV camera specified in the configuration. Connects to the OpenCV camera specified in the configuration.
Initializes the OpenCV VideoCapture object, sets desired camera properties Initializes the OpenCV VideoCapture object, sets desired camera properties
(FPS, width, height), starts the background reading thread and performs initial checks. (FPS, width, height), and performs initial checks.
Args:
warmup (bool): If True, waits at connect() time until at least one valid frame
has been captured by the background thread. Defaults to True.
Raises: Raises:
DeviceAlreadyConnectedError: If the camera is already connected. DeviceAlreadyConnectedError: If the camera is already connected.
ConnectionError: If the specified camera index/path is not found or fails to open. ConnectionError: If the specified camera index/path is not found or the camera is found but fails to open.
RuntimeError: If the camera opens but fails to apply requested settings. RuntimeError: If the camera opens but fails to apply requested FPS/resolution settings.
""" """
if self.is_connected:
raise DeviceAlreadyConnectedError(f"{self} is already connected.")
# Use 1 thread for OpenCV operations to avoid potential conflicts or # Use 1 thread for OpenCV operations to avoid potential conflicts or
# blocking in multi-threaded applications, especially during data collection. # blocking in multi-threaded applications, especially during data collection.
@@ -165,20 +170,15 @@ class OpenCVCamera(Camera):
) )
self._configure_capture_settings() self._configure_capture_settings()
self._start_read_thread()
if warmup and self.warmup_s > 0: if warmup:
start_time = time.time() start_time = time.time()
while time.time() - start_time < self.warmup_s: while time.time() - start_time < self.warmup_s:
self.async_read(timeout_ms=self.warmup_s * 1000) self.read()
time.sleep(0.1) time.sleep(0.1)
with self.frame_lock:
if self.latest_frame is None:
raise ConnectionError(f"{self} failed to capture frames during warmup.")
logger.info(f"{self} connected.") logger.info(f"{self} connected.")
@check_if_not_connected
def _configure_capture_settings(self) -> None: def _configure_capture_settings(self) -> None:
""" """
Applies the specified FOURCC, FPS, width, and height settings to the connected camera. Applies the specified FOURCC, FPS, width, and height settings to the connected camera.
@@ -196,8 +196,11 @@ class OpenCVCamera(Camera):
Raises: Raises:
RuntimeError: If the camera fails to set any of the specified properties RuntimeError: If the camera fails to set any of the specified properties
to the requested value. to the requested value.
DeviceNotConnectedError: If the camera is not connected. DeviceNotConnectedError: If the camera is not connected when attempting
to configure settings.
""" """
if not self.is_connected:
raise DeviceNotConnectedError(f"Cannot configure settings for {self} as it is not connected.")
# Set FOURCC first (if specified) as it can affect available FPS/resolution options # Set FOURCC first (if specified) as it can affect available FPS/resolution options
if self.config.fourcc is not None: if self.config.fourcc is not None:
@@ -336,18 +339,6 @@ class OpenCVCamera(Camera):
return found_cameras_info return found_cameras_info
def _read_from_hardware(self) -> NDArray[Any]:
if self.videocapture is None:
raise DeviceNotConnectedError(f"{self} videocapture is not initialized")
ret, frame = self.videocapture.read()
if not ret:
raise RuntimeError(f"{self} read failed (status={ret}).")
return frame
@check_if_not_connected
def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]: def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
""" """
Reads a single frame synchronously from the camera. Reads a single frame synchronously from the camera.
@@ -355,6 +346,11 @@ class OpenCVCamera(Camera):
This is a blocking call. It waits for the next available frame from the This is a blocking call. It waits for the next available frame from the
camera hardware via OpenCV. camera hardware via OpenCV.
Args:
color_mode (Optional[ColorMode]): If specified, overrides the default
color mode (`self.color_mode`) for this read operation (e.g.,
request RGB even if default is BGR).
Returns: Returns:
np.ndarray: The captured frame as a NumPy array in the format np.ndarray: The captured frame as a NumPy array in the format
(height, width, channels), using the specified or default (height, width, channels), using the specified or default
@@ -366,31 +362,34 @@ class OpenCVCamera(Camera):
received frame dimensions don't match expectations before rotation. received frame dimensions don't match expectations before rotation.
ValueError: If an invalid `color_mode` is requested. ValueError: If an invalid `color_mode` is requested.
""" """
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
start_time = time.perf_counter() start_time = time.perf_counter()
if color_mode is not None: if self.videocapture is None:
logger.warning( raise DeviceNotConnectedError(f"{self} videocapture is not initialized")
f"{self} read() color_mode parameter is deprecated and will be removed in future versions."
)
if self.thread is None or not self.thread.is_alive(): ret, frame = self.videocapture.read()
raise RuntimeError(f"{self} read thread is not running.")
self.new_frame_event.clear() if not ret or frame is None:
frame = self.async_read(timeout_ms=10000) raise RuntimeError(f"{self} read failed (status={ret}).")
processed_frame = self._postprocess_image(frame, color_mode)
read_duration_ms = (time.perf_counter() - start_time) * 1e3 read_duration_ms = (time.perf_counter() - start_time) * 1e3
logger.debug(f"{self} read took: {read_duration_ms:.1f}ms") logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
return frame return processed_frame
def _postprocess_image(self, image: NDArray[Any]) -> NDArray[Any]: def _postprocess_image(self, image: NDArray[Any], color_mode: ColorMode | None = None) -> NDArray[Any]:
""" """
Applies color conversion, dimension validation, and rotation to a raw frame. Applies color conversion, dimension validation, and rotation to a raw frame.
Args: Args:
image (np.ndarray): The raw image frame (expected BGR format from OpenCV). image (np.ndarray): The raw image frame (expected BGR format from OpenCV).
color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None,
uses the instance's default `self.color_mode`.
Returns: Returns:
np.ndarray: The processed image frame. np.ndarray: The processed image frame.
@@ -400,10 +399,11 @@ class OpenCVCamera(Camera):
RuntimeError: If the raw frame dimensions do not match the configured RuntimeError: If the raw frame dimensions do not match the configured
`width` and `height`. `width` and `height`.
""" """
requested_color_mode = self.color_mode if color_mode is None else color_mode
if self.color_mode not in (ColorMode.RGB, ColorMode.BGR): if requested_color_mode not in (ColorMode.RGB, ColorMode.BGR):
raise ValueError( raise ValueError(
f"Invalid color mode '{self.color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}." f"Invalid color mode '{requested_color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
) )
h, w, c = image.shape h, w, c = image.shape
@@ -417,7 +417,7 @@ class OpenCVCamera(Camera):
raise RuntimeError(f"{self} frame channels={c} do not match expected 3 channels (RGB/BGR).") raise RuntimeError(f"{self} frame channels={c} do not match expected 3 channels (RGB/BGR).")
processed_image = image processed_image = image
if self.color_mode == ColorMode.RGB: if requested_color_mode == ColorMode.RGB:
processed_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) processed_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE, cv2.ROTATE_180]: if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE, cv2.ROTATE_180]:
@@ -431,7 +431,7 @@ class OpenCVCamera(Camera):
On each iteration: On each iteration:
1. Reads a color frame 1. Reads a color frame
2. Stores result in latest_frame and updates timestamp (thread-safe) 2. Stores result in latest_frame (thread-safe)
3. Sets new_frame_event to notify listeners 3. Sets new_frame_event to notify listeners
Stops on DeviceNotConnectedError, logs other errors and continues. Stops on DeviceNotConnectedError, logs other errors and continues.
@@ -439,37 +439,30 @@ class OpenCVCamera(Camera):
if self.stop_event is None: if self.stop_event is None:
raise RuntimeError(f"{self}: stop_event is not initialized before starting read loop.") raise RuntimeError(f"{self}: stop_event is not initialized before starting read loop.")
failure_count = 0
while not self.stop_event.is_set(): while not self.stop_event.is_set():
try: try:
raw_frame = self._read_from_hardware() color_image = self.read()
processed_frame = self._postprocess_image(raw_frame)
capture_time = time.perf_counter()
with self.frame_lock: with self.frame_lock:
self.latest_frame = processed_frame self.latest_frame = color_image
self.latest_timestamp = capture_time
self.new_frame_event.set() self.new_frame_event.set()
failure_count = 0
except DeviceNotConnectedError: except DeviceNotConnectedError:
break break
except Exception as e: except Exception as e:
if failure_count <= 10: logger.warning(f"Error reading frame in background thread for {self}: {e}")
failure_count += 1
logger.warning(f"Error reading frame in background thread for {self}: {e}")
else:
raise RuntimeError(f"{self} exceeded maximum consecutive read failures.") from e
def _start_read_thread(self) -> None: def _start_read_thread(self) -> None:
"""Starts or restarts the background read thread if it's not running.""" """Starts or restarts the background read thread if it's not running."""
self._stop_read_thread() if self.thread is not None and self.thread.is_alive():
self.thread.join(timeout=0.1)
if self.stop_event is not None:
self.stop_event.set()
self.stop_event = Event() self.stop_event = Event()
self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop") self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop")
self.thread.daemon = True self.thread.daemon = True
self.thread.start() self.thread.start()
time.sleep(0.1)
def _stop_read_thread(self) -> None: def _stop_read_thread(self) -> None:
"""Signals the background read thread to stop and waits for it to join.""" """Signals the background read thread to stop and waits for it to join."""
@@ -482,12 +475,6 @@ class OpenCVCamera(Camera):
self.thread = None self.thread = None
self.stop_event = None self.stop_event = None
with self.frame_lock:
self.latest_frame = None
self.latest_timestamp = None
self.new_frame_event.clear()
@check_if_not_connected
def async_read(self, timeout_ms: float = 200) -> NDArray[Any]: def async_read(self, timeout_ms: float = 200) -> NDArray[Any]:
""" """
Reads the latest available frame asynchronously. Reads the latest available frame asynchronously.
@@ -495,7 +482,6 @@ class OpenCVCamera(Camera):
This method retrieves the most recent frame captured by the background This method retrieves the most recent frame captured by the background
read thread. It does not block waiting for the camera hardware directly, read thread. It does not block waiting for the camera hardware directly,
but may wait up to timeout_ms for the background thread to provide a frame. but may wait up to timeout_ms for the background thread to provide a frame.
It is “best effort” under high FPS.
Args: Args:
timeout_ms (float): Maximum time in milliseconds to wait for a frame timeout_ms (float): Maximum time in milliseconds to wait for a frame
@@ -510,14 +496,17 @@ class OpenCVCamera(Camera):
TimeoutError: If no frame becomes available within the specified timeout. TimeoutError: If no frame becomes available within the specified timeout.
RuntimeError: If an unexpected error occurs. RuntimeError: If an unexpected error occurs.
""" """
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive(): if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.") self._start_read_thread()
if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0): if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0):
thread_alive = self.thread is not None and self.thread.is_alive()
raise TimeoutError( raise TimeoutError(
f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. " f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. "
f"Read thread alive: {self.thread.is_alive()}." f"Read thread alive: {thread_alive}."
) )
with self.frame_lock: with self.frame_lock:
@@ -529,41 +518,6 @@ class OpenCVCamera(Camera):
return frame return frame
@check_if_not_connected
def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
"""Return the most recent frame captured immediately (Peeking).
This method is non-blocking and returns whatever is currently in the
memory buffer. The frame may be stale,
meaning it could have been captured a while ago (hanging camera scenario e.g.).
Returns:
NDArray[Any]: The frame image (numpy array).
Raises:
TimeoutError: If the latest frame is older than `max_age_ms`.
DeviceNotConnectedError: If the camera is not connected.
RuntimeError: If the camera is connected but has not captured any frames yet.
"""
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
with self.frame_lock:
frame = self.latest_frame
timestamp = self.latest_timestamp
if frame is None or timestamp is None:
raise RuntimeError(f"{self} has not captured any frames yet.")
age_ms = (time.perf_counter() - timestamp) * 1e3
if age_ms > max_age_ms:
raise TimeoutError(
f"{self} latest frame is too old: {age_ms:.1f} ms (max allowed: {max_age_ms} ms)."
)
return frame
def disconnect(self) -> None: def disconnect(self) -> None:
""" """
Disconnects from the camera and cleans up resources. Disconnects from the camera and cleans up resources.
@@ -584,9 +538,4 @@ class OpenCVCamera(Camera):
self.videocapture.release() self.videocapture.release()
self.videocapture = None self.videocapture = None
with self.frame_lock:
self.latest_frame = None
self.latest_timestamp = None
self.new_frame_event.clear()
logger.info(f"{self} disconnected.") logger.info(f"{self} disconnected.")

View File

@@ -15,9 +15,9 @@
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from ..configs import CameraConfig, ColorMode, Cv2Backends, Cv2Rotation from ..configs import CameraConfig, ColorMode, Cv2Rotation
__all__ = ["OpenCVCameraConfig", "ColorMode", "Cv2Rotation", "Cv2Backends"] __all__ = ["OpenCVCameraConfig", "ColorMode", "Cv2Rotation"]
@CameraConfig.register_subclass("opencv") @CameraConfig.register_subclass("opencv")
@@ -50,7 +50,6 @@ class OpenCVCameraConfig(CameraConfig):
rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation. rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
warmup_s: Time reading frames before returning from connect (in seconds) warmup_s: Time reading frames before returning from connect (in seconds)
fourcc: FOURCC code for video format (e.g., "MJPG", "YUYV", "I420"). Defaults to None (auto-detect). fourcc: FOURCC code for video format (e.g., "MJPG", "YUYV", "I420"). Defaults to None (auto-detect).
backend: OpenCV backend identifier (https://docs.opencv.org/3.4/d4/d15/group__videoio__flags__base.html). Defaults to ANY.
Note: Note:
- Only 3-channel color output (RGB/BGR) is currently supported. - Only 3-channel color output (RGB/BGR) is currently supported.
@@ -63,12 +62,22 @@ class OpenCVCameraConfig(CameraConfig):
rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
warmup_s: int = 1 warmup_s: int = 1
fourcc: str | None = None fourcc: str | None = None
backend: Cv2Backends = Cv2Backends.ANY
def __post_init__(self) -> None: def __post_init__(self) -> None:
self.color_mode = ColorMode(self.color_mode) if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
self.rotation = Cv2Rotation(self.rotation) raise ValueError(
self.backend = Cv2Backends(self.backend) f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
)
if self.rotation not in (
Cv2Rotation.NO_ROTATION,
Cv2Rotation.ROTATE_90,
Cv2Rotation.ROTATE_180,
Cv2Rotation.ROTATE_270,
):
raise ValueError(
f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
)
if self.fourcc is not None and (not isinstance(self.fourcc, str) or len(self.fourcc) != 4): if self.fourcc is not None and (not isinstance(self.fourcc, str) or len(self.fourcc) != 4):
raise ValueError( raise ValueError(

View File

@@ -74,4 +74,7 @@ class Reachy2CameraConfig(CameraConfig):
f"`image_type` is expected to be 'left' or 'right' for teleop camera, and 'rgb' or 'depth' for depth camera, but {self.image_type} is provided." f"`image_type` is expected to be 'left' or 'right' for teleop camera, and 'rgb' or 'depth' for depth camera, but {self.image_type} is provided."
) )
self.color_mode = ColorMode(self.color_mode) if self.color_mode not in ["rgb", "bgr"]:
raise ValueError(
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
)

View File

@@ -32,7 +32,6 @@ if platform.system() == "Windows" and "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"
import cv2 # type: ignore # TODO: add type stubs for OpenCV import cv2 # type: ignore # TODO: add type stubs for OpenCV
import numpy as np # type: ignore # TODO: add type stubs for numpy import numpy as np # type: ignore # TODO: add type stubs for numpy
from lerobot.utils.decorators import check_if_not_connected
from lerobot.utils.import_utils import _reachy2_sdk_available from lerobot.utils.import_utils import _reachy2_sdk_available
if TYPE_CHECKING or _reachy2_sdk_available: if TYPE_CHECKING or _reachy2_sdk_available:
@@ -81,8 +80,6 @@ class Reachy2Camera(Camera):
self.config = config self.config = config
self.color_mode = config.color_mode self.color_mode = config.color_mode
self.latest_frame: NDArray[Any] | None = None
self.latest_timestamp: float | None = None
self.cam_manager: CameraManager | None = None self.cam_manager: CameraManager | None = None
@@ -124,12 +121,16 @@ class Reachy2Camera(Camera):
""" """
raise NotImplementedError("Camera detection is not implemented for Reachy2 cameras.") raise NotImplementedError("Camera detection is not implemented for Reachy2 cameras.")
@check_if_not_connected
def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]: def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
""" """
Reads a single frame synchronously from the camera. Reads a single frame synchronously from the camera.
This method retrieves the most recent frame available in Reachy 2's low-level software. This is a blocking call.
Args:
color_mode (Optional[ColorMode]): If specified, overrides the default
color mode (`self.color_mode`) for this read operation (e.g.,
request RGB even if default is BGR).
Returns: Returns:
np.ndarray: The captured frame as a NumPy array in the format np.ndarray: The captured frame as a NumPy array in the format
@@ -138,13 +139,11 @@ class Reachy2Camera(Camera):
""" """
start_time = time.perf_counter() start_time = time.perf_counter()
if self.cam_manager is None: if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.") raise DeviceNotConnectedError(f"{self} is not connected.")
if color_mode is not None: if self.cam_manager is None:
logger.warning( raise DeviceNotConnectedError(f"{self} is not connected.")
f"{self} read() color_mode parameter is deprecated and will be removed in future versions."
)
frame: NDArray[Any] = np.empty((0, 0, 3), dtype=np.uint8) frame: NDArray[Any] = np.empty((0, 0, 3), dtype=np.uint8)
@@ -166,27 +165,25 @@ class Reachy2Camera(Camera):
raise ValueError(f"Invalid camera name '{self.config.name}'. Expected 'teleop' or 'depth'.") raise ValueError(f"Invalid camera name '{self.config.name}'. Expected 'teleop' or 'depth'.")
if frame is None: if frame is None:
raise RuntimeError(f"Internal error: No frame available for {self}.") return np.empty((0, 0, 3), dtype=np.uint8)
if self.color_mode not in (ColorMode.RGB, ColorMode.BGR): if self.config.color_mode == "rgb":
raise ValueError(
f"Invalid color mode '{self.color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
)
if self.color_mode == ColorMode.RGB:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
self.latest_frame = frame
self.latest_timestamp = time.perf_counter()
read_duration_ms = (time.perf_counter() - start_time) * 1e3 read_duration_ms = (time.perf_counter() - start_time) * 1e3
logger.debug(f"{self} read took: {read_duration_ms:.1f}ms") logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
return frame return frame
@check_if_not_connected
def async_read(self, timeout_ms: float = 200) -> NDArray[Any]: def async_read(self, timeout_ms: float = 200) -> NDArray[Any]:
""" """
Same as read() Reads the latest available frame.
This method retrieves the most recent frame available in Reachy 2's low-level software.
Args:
timeout_ms (float): Maximum time in milliseconds to wait for a frame
to become available. Defaults to 200ms (0.2 seconds).
Returns: Returns:
np.ndarray: The latest captured frame as a NumPy array in the format np.ndarray: The latest captured frame as a NumPy array in the format
@@ -197,40 +194,16 @@ class Reachy2Camera(Camera):
TimeoutError: If no frame becomes available within the specified timeout. TimeoutError: If no frame becomes available within the specified timeout.
RuntimeError: If an unexpected error occurs. RuntimeError: If an unexpected error occurs.
""" """
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
return self.read() frame = self.read()
@check_if_not_connected if frame is None:
def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]: raise RuntimeError(f"Internal error: No frame available for {self}.")
"""Return the most recent frame captured immediately (Peeking).
This method is non-blocking and returns whatever is currently in the return frame
memory buffer. The frame may be stale,
meaning it could have been captured a while ago (hanging camera scenario e.g.).
Returns:
tuple[NDArray, float]:
- The frame image (numpy array).
- The timestamp (time.perf_counter) when this frame was captured.
Raises:
TimeoutError: If the latest frame is older than `max_age_ms`.
DeviceNotConnectedError: If the camera is not connected.
RuntimeError: If the camera is connected but has not captured any frames yet.
"""
if self.latest_frame is None or self.latest_timestamp is None:
raise RuntimeError(f"{self} has not captured any frames yet.")
age_ms = (time.perf_counter() - self.latest_timestamp) * 1e3
if age_ms > max_age_ms:
raise TimeoutError(
f"{self} latest frame is too old: {age_ms:.1f} ms (max allowed: {max_age_ms} ms)."
)
return self.latest_frame
@check_if_not_connected
def disconnect(self) -> None: def disconnect(self) -> None:
""" """
Stops the background read thread (if running). Stops the background read thread (if running).
@@ -238,6 +211,8 @@ class Reachy2Camera(Camera):
Raises: Raises:
DeviceNotConnectedError: If the camera is already disconnected. DeviceNotConnectedError: If the camera is already disconnected.
""" """
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} not connected.")
if self.cam_manager is not None: if self.cam_manager is not None:
self.cam_manager.disconnect() self.cam_manager.disconnect()

View File

@@ -30,8 +30,7 @@ try:
except Exception as e: except Exception as e:
logging.info(f"Could not import realsense: {e}") logging.info(f"Could not import realsense: {e}")
from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected from lerobot.utils.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
from lerobot.utils.errors import DeviceNotConnectedError
from ..camera import Camera from ..camera import Camera
from ..configs import ColorMode from ..configs import ColorMode
@@ -73,14 +72,15 @@ class RealSenseCamera(Camera):
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
camera.connect() camera.connect()
# Read 1 frame synchronously (blocking) # Read 1 frame synchronously
color_image = camera.read() color_image = camera.read()
print(color_image.shape)
# Read 1 frame asynchronously (waits for new frame with a timeout) # Read 1 frame asynchronously
async_image = camera.async_read() async_image = camera.async_read()
# Get the latest frame immediately (no wait, returns timestamp) # When done, properly disconnect the camera using
latest_image, timestamp = camera.read_latest() camera.disconnect()
# Example with depth capture and custom settings # Example with depth capture and custom settings
custom_config = RealSenseCameraConfig( custom_config = RealSenseCameraConfig(
@@ -133,9 +133,7 @@ class RealSenseCamera(Camera):
self.thread: Thread | None = None self.thread: Thread | None = None
self.stop_event: Event | None = None self.stop_event: Event | None = None
self.frame_lock: Lock = Lock() self.frame_lock: Lock = Lock()
self.latest_color_frame: NDArray[Any] | None = None self.latest_frame: NDArray[Any] | None = None
self.latest_depth_frame: NDArray[Any] | None = None
self.latest_timestamp: float | None = None
self.new_frame_event: Event = Event() self.new_frame_event: Event = Event()
self.rotation: int | None = get_cv2_rotation(config.rotation) self.rotation: int | None = get_cv2_rotation(config.rotation)
@@ -153,7 +151,6 @@ class RealSenseCamera(Camera):
"""Checks if the camera pipeline is started and streams are active.""" """Checks if the camera pipeline is started and streams are active."""
return self.rs_pipeline is not None and self.rs_profile is not None return self.rs_pipeline is not None and self.rs_profile is not None
@check_if_already_connected
def connect(self, warmup: bool = True) -> None: def connect(self, warmup: bool = True) -> None:
""" """
Connects to the RealSense camera specified in the configuration. Connects to the RealSense camera specified in the configuration.
@@ -161,16 +158,14 @@ class RealSenseCamera(Camera):
Initializes the RealSense pipeline, configures the required streams (color Initializes the RealSense pipeline, configures the required streams (color
and optionally depth), starts the pipeline, and validates the actual stream settings. and optionally depth), starts the pipeline, and validates the actual stream settings.
Args:
warmup (bool): If True, waits at connect() time until at least one valid frame
has been captured by the background thread. Defaults to True.
Raises: Raises:
DeviceAlreadyConnectedError: If the camera is already connected. DeviceAlreadyConnectedError: If the camera is already connected.
ValueError: If the configuration is invalid (e.g., missing serial/name, name not unique). ValueError: If the configuration is invalid (e.g., missing serial/name, name not unique).
ConnectionError: If the camera is found but fails to start the pipeline or no RealSense devices are detected at all. ConnectionError: If the camera is found but fails to start the pipeline or no RealSense devices are detected at all.
RuntimeError: If the pipeline starts but fails to apply requested settings. RuntimeError: If the pipeline starts but fails to apply requested settings.
""" """
if self.is_connected:
raise DeviceAlreadyConnectedError(f"{self} is already connected.")
self.rs_pipeline = rs.pipeline() self.rs_pipeline = rs.pipeline()
rs_config = rs.config() rs_config = rs.config()
@@ -186,18 +181,15 @@ class RealSenseCamera(Camera):
) from e ) from e
self._configure_capture_settings() self._configure_capture_settings()
self._start_read_thread()
# NOTE(Steven/Caroline): Enforcing at least one second of warmup as RS cameras need a bit of time before the first read. If we don't wait, the first read from the warmup will raise. if warmup:
self.warmup_s = max(self.warmup_s, 1) time.sleep(
1
start_time = time.time() ) # NOTE(Steven): RS cameras need a bit of time to warm up before the first read. If we don't wait, the first read from the warmup will raise.
while time.time() - start_time < self.warmup_s: start_time = time.time()
self.async_read(timeout_ms=self.warmup_s * 1000) while time.time() - start_time < self.warmup_s:
time.sleep(0.1) self.read()
with self.frame_lock: time.sleep(0.1)
if self.latest_color_frame is None or self.use_depth and self.latest_depth_frame is None:
raise ConnectionError(f"{self} failed to capture frames during warmup.")
logger.info(f"{self} connected.") logger.info(f"{self} connected.")
@@ -290,7 +282,6 @@ class RealSenseCamera(Camera):
if self.use_depth: if self.use_depth:
rs_config.enable_stream(rs.stream.depth) rs_config.enable_stream(rs.stream.depth)
@check_if_not_connected
def _configure_capture_settings(self) -> None: def _configure_capture_settings(self) -> None:
"""Sets fps, width, and height from device stream if not already configured. """Sets fps, width, and height from device stream if not already configured.
@@ -300,6 +291,8 @@ class RealSenseCamera(Camera):
Raises: Raises:
DeviceNotConnectedError: If device is not connected. DeviceNotConnectedError: If device is not connected.
""" """
if not self.is_connected:
raise DeviceNotConnectedError(f"Cannot validate settings for {self} as it is not connected.")
if self.rs_profile is None: if self.rs_profile is None:
raise RuntimeError(f"{self}: rs_profile must be initialized before use.") raise RuntimeError(f"{self}: rs_profile must be initialized before use.")
@@ -319,7 +312,6 @@ class RealSenseCamera(Camera):
self.width, self.height = actual_width, actual_height self.width, self.height = actual_width, actual_height
self.capture_width, self.capture_height = actual_width, actual_height self.capture_width, self.capture_height = actual_width, actual_height
@check_if_not_connected
def read_depth(self, timeout_ms: int = 200) -> NDArray[Any]: def read_depth(self, timeout_ms: int = 200) -> NDArray[Any]:
""" """
Reads a single frame (depth) synchronously from the camera. Reads a single frame (depth) synchronously from the camera.
@@ -327,6 +319,9 @@ class RealSenseCamera(Camera):
This is a blocking call. It waits for a coherent set of frames (depth) This is a blocking call. It waits for a coherent set of frames (depth)
from the camera hardware via the RealSense pipeline. from the camera hardware via the RealSense pipeline.
Args:
timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms.
Returns: Returns:
np.ndarray: The depth map as a NumPy array (height, width) np.ndarray: The depth map as a NumPy array (height, width)
of type `np.uint16` (raw depth values in millimeters) and rotation. of type `np.uint16` (raw depth values in millimeters) and rotation.
@@ -335,50 +330,44 @@ class RealSenseCamera(Camera):
DeviceNotConnectedError: If the camera is not connected. DeviceNotConnectedError: If the camera is not connected.
RuntimeError: If reading frames from the pipeline fails or frames are invalid. RuntimeError: If reading frames from the pipeline fails or frames are invalid.
""" """
if timeout_ms:
logger.warning(
f"{self} read() timeout_ms parameter is deprecated and will be removed in future versions."
)
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if not self.use_depth: if not self.use_depth:
raise RuntimeError( raise RuntimeError(
f"Failed to capture depth frame '.read_depth()'. Depth stream is not enabled for {self}." f"Failed to capture depth frame '.read_depth()'. Depth stream is not enabled for {self}."
) )
if self.thread is None or not self.thread.is_alive(): start_time = time.perf_counter()
raise RuntimeError(f"{self} read thread is not running.")
self.new_frame_event.clear()
_ = self.async_read(timeout_ms=10000)
with self.frame_lock:
depth_map = self.latest_depth_frame
if depth_map is None:
raise RuntimeError("No depth frame available. Ensure camera is streaming.")
return depth_map
def _read_from_hardware(self):
if self.rs_pipeline is None: if self.rs_pipeline is None:
raise RuntimeError(f"{self}: rs_pipeline must be initialized before use.") raise RuntimeError(f"{self}: rs_pipeline must be initialized before use.")
ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=10000) ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=timeout_ms)
if not ret or frame is None: if not ret or frame is None:
raise RuntimeError(f"{self} read failed (status={ret}).") raise RuntimeError(f"{self} read_depth failed (status={ret}).")
return frame depth_frame = frame.get_depth_frame()
depth_map = np.asanyarray(depth_frame.get_data())
@check_if_not_connected depth_map_processed = self._postprocess_image(depth_map, depth_frame=True)
def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 0) -> NDArray[Any]:
read_duration_ms = (time.perf_counter() - start_time) * 1e3
logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
return depth_map_processed
def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 200) -> NDArray[Any]:
""" """
Reads a single frame (color) synchronously from the camera. Reads a single frame (color) synchronously from the camera.
This is a blocking call. It waits for a coherent set of frames (color) This is a blocking call. It waits for a coherent set of frames (color)
from the camera hardware via the RealSense pipeline. from the camera hardware via the RealSense pipeline.
Args:
timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms.
Returns: Returns:
np.ndarray: The captured color frame as a NumPy array np.ndarray: The captured color frame as a NumPy array
(height, width, channels), processed according to `color_mode` and rotation. (height, width, channels), processed according to `color_mode` and rotation.
@@ -389,36 +378,39 @@ class RealSenseCamera(Camera):
ValueError: If an invalid `color_mode` is requested. ValueError: If an invalid `color_mode` is requested.
""" """
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
start_time = time.perf_counter() start_time = time.perf_counter()
if color_mode is not None: if self.rs_pipeline is None:
logger.warning( raise RuntimeError(f"{self}: rs_pipeline must be initialized before use.")
f"{self} read() color_mode parameter is deprecated and will be removed in future versions."
)
if timeout_ms: ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=timeout_ms)
logger.warning(
f"{self} read() timeout_ms parameter is deprecated and will be removed in future versions."
)
if self.thread is None or not self.thread.is_alive(): if not ret or frame is None:
raise RuntimeError(f"{self} read thread is not running.") raise RuntimeError(f"{self} read failed (status={ret}).")
self.new_frame_event.clear() color_frame = frame.get_color_frame()
color_image_raw = np.asanyarray(color_frame.get_data())
frame = self.async_read(timeout_ms=10000) color_image_processed = self._postprocess_image(color_image_raw, color_mode)
read_duration_ms = (time.perf_counter() - start_time) * 1e3 read_duration_ms = (time.perf_counter() - start_time) * 1e3
logger.debug(f"{self} read took: {read_duration_ms:.1f}ms") logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
return frame return color_image_processed
def _postprocess_image(self, image: NDArray[Any], depth_frame: bool = False) -> NDArray[Any]: def _postprocess_image(
self, image: NDArray[Any], color_mode: ColorMode | None = None, depth_frame: bool = False
) -> NDArray[Any]:
""" """
Applies color conversion, dimension validation, and rotation to a raw color frame. Applies color conversion, dimension validation, and rotation to a raw color frame.
Args: Args:
image (np.ndarray): The raw image frame (expected RGB format from RealSense). image (np.ndarray): The raw image frame (expected RGB format from RealSense).
color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None,
uses the instance's default `self.color_mode`.
Returns: Returns:
np.ndarray: The processed image frame according to `self.color_mode` and `self.rotation`. np.ndarray: The processed image frame according to `self.color_mode` and `self.rotation`.
@@ -429,9 +421,9 @@ class RealSenseCamera(Camera):
`width` and `height`. `width` and `height`.
""" """
if self.color_mode and self.color_mode not in (ColorMode.RGB, ColorMode.BGR): if color_mode and color_mode not in (ColorMode.RGB, ColorMode.BGR):
raise ValueError( raise ValueError(
f"Invalid requested color mode '{self.color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}." f"Invalid requested color mode '{color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
) )
if depth_frame: if depth_frame:
@@ -462,7 +454,7 @@ class RealSenseCamera(Camera):
On each iteration: On each iteration:
1. Reads a color frame with 500ms timeout 1. Reads a color frame with 500ms timeout
2. Stores result in latest_frame and updates timestamp (thread-safe) 2. Stores result in latest_frame (thread-safe)
3. Sets new_frame_event to notify listeners 3. Sets new_frame_event to notify listeners
Stops on DeviceNotConnectedError, logs other errors and continues. Stops on DeviceNotConnectedError, logs other errors and continues.
@@ -470,41 +462,25 @@ class RealSenseCamera(Camera):
if self.stop_event is None: if self.stop_event is None:
raise RuntimeError(f"{self}: stop_event is not initialized before starting read loop.") raise RuntimeError(f"{self}: stop_event is not initialized before starting read loop.")
failure_count = 0
while not self.stop_event.is_set(): while not self.stop_event.is_set():
try: try:
frame = self._read_from_hardware() color_image = self.read(timeout_ms=500)
color_frame_raw = frame.get_color_frame()
color_frame = np.asanyarray(color_frame_raw.get_data())
processed_color_frame = self._postprocess_image(color_frame)
if self.use_depth:
depth_frame_raw = frame.get_depth_frame()
depth_frame = np.asanyarray(depth_frame_raw.get_data())
processed_depth_frame = self._postprocess_image(depth_frame, depth_frame=True)
capture_time = time.perf_counter()
with self.frame_lock: with self.frame_lock:
self.latest_color_frame = processed_color_frame self.latest_frame = color_image
if self.use_depth:
self.latest_depth_frame = processed_depth_frame
self.latest_timestamp = capture_time
self.new_frame_event.set() self.new_frame_event.set()
failure_count = 0
except DeviceNotConnectedError: except DeviceNotConnectedError:
break break
except Exception as e: except Exception as e:
if failure_count <= 10: logger.warning(f"Error reading frame in background thread for {self}: {e}")
failure_count += 1
logger.warning(f"Error reading frame in background thread for {self}: {e}")
else:
raise RuntimeError(f"{self} exceeded maximum consecutive read failures.") from e
def _start_read_thread(self) -> None: def _start_read_thread(self) -> None:
"""Starts or restarts the background read thread if it's not running.""" """Starts or restarts the background read thread if it's not running."""
self._stop_read_thread() if self.thread is not None and self.thread.is_alive():
self.thread.join(timeout=0.1)
if self.stop_event is not None:
self.stop_event.set()
self.stop_event = Event() self.stop_event = Event()
self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop") self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop")
@@ -522,14 +498,7 @@ class RealSenseCamera(Camera):
self.thread = None self.thread = None
self.stop_event = None self.stop_event = None
with self.frame_lock:
self.latest_color_frame = None
self.latest_depth_frame = None
self.latest_timestamp = None
self.new_frame_event.clear()
# NOTE(Steven): Missing implementation for depth for now # NOTE(Steven): Missing implementation for depth for now
@check_if_not_connected
def async_read(self, timeout_ms: float = 200) -> NDArray[Any]: def async_read(self, timeout_ms: float = 200) -> NDArray[Any]:
""" """
Reads the latest available frame data (color) asynchronously. Reads the latest available frame data (color) asynchronously.
@@ -537,7 +506,6 @@ class RealSenseCamera(Camera):
This method retrieves the most recent color frame captured by the background This method retrieves the most recent color frame captured by the background
read thread. It does not block waiting for the camera hardware directly, read thread. It does not block waiting for the camera hardware directly,
but may wait up to timeout_ms for the background thread to provide a frame. but may wait up to timeout_ms for the background thread to provide a frame.
It is “best effort” under high FPS.
Args: Args:
timeout_ms (float): Maximum time in milliseconds to wait for a frame timeout_ms (float): Maximum time in milliseconds to wait for a frame
@@ -552,18 +520,21 @@ class RealSenseCamera(Camera):
TimeoutError: If no frame data becomes available within the specified timeout. TimeoutError: If no frame data becomes available within the specified timeout.
RuntimeError: If the background thread died unexpectedly or another error occurs. RuntimeError: If the background thread died unexpectedly or another error occurs.
""" """
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive(): if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.") self._start_read_thread()
if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0): if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0):
thread_alive = self.thread is not None and self.thread.is_alive()
raise TimeoutError( raise TimeoutError(
f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. " f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. "
f"Read thread alive: {self.thread.is_alive()}." f"Read thread alive: {thread_alive}."
) )
with self.frame_lock: with self.frame_lock:
frame = self.latest_color_frame frame = self.latest_frame
self.new_frame_event.clear() self.new_frame_event.clear()
if frame is None: if frame is None:
@@ -571,42 +542,6 @@ class RealSenseCamera(Camera):
return frame return frame
# NOTE(Steven): Missing implementation for depth for now
@check_if_not_connected
def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
"""Return the most recent (color) frame captured immediately (Peeking).
This method is non-blocking and returns whatever is currently in the
memory buffer. The frame may be stale,
meaning it could have been captured a while ago (hanging camera scenario e.g.).
Returns:
NDArray[Any]: The frame image (numpy array).
Raises:
TimeoutError: If the latest frame is older than `max_age_ms`.
DeviceNotConnectedError: If the camera is not connected.
RuntimeError: If the camera is connected but has not captured any frames yet.
"""
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
with self.frame_lock:
frame = self.latest_color_frame
timestamp = self.latest_timestamp
if frame is None or timestamp is None:
raise RuntimeError(f"{self} has not captured any frames yet.")
age_ms = (time.perf_counter() - timestamp) * 1e3
if age_ms > max_age_ms:
raise TimeoutError(
f"{self} latest frame is too old: {age_ms:.1f} ms (max allowed: {max_age_ms} ms)."
)
return frame
def disconnect(self) -> None: def disconnect(self) -> None:
""" """
Disconnects from the camera, stops the pipeline, and cleans up resources. Disconnects from the camera, stops the pipeline, and cleans up resources.
@@ -630,10 +565,4 @@ class RealSenseCamera(Camera):
self.rs_pipeline = None self.rs_pipeline = None
self.rs_profile = None self.rs_profile = None
with self.frame_lock:
self.latest_color_frame = None
self.latest_depth_frame = None
self.latest_timestamp = None
self.new_frame_event.clear()
logger.info(f"{self} disconnected.") logger.info(f"{self} disconnected.")

View File

@@ -60,8 +60,20 @@ class RealSenseCameraConfig(CameraConfig):
warmup_s: int = 1 warmup_s: int = 1
def __post_init__(self) -> None: def __post_init__(self) -> None:
self.color_mode = ColorMode(self.color_mode) if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
self.rotation = Cv2Rotation(self.rotation) raise ValueError(
f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
)
if self.rotation not in (
Cv2Rotation.NO_ROTATION,
Cv2Rotation.ROTATE_90,
Cv2Rotation.ROTATE_180,
Cv2Rotation.ROTATE_270,
):
raise ValueError(
f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
)
values = (self.fps, self.width, self.height) values = (self.fps, self.width, self.height)
if any(v is not None for v in values) and any(v is None for v in values): if any(v is not None for v in values) and any(v is None for v in values):

View File

@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import platform
from typing import cast from typing import cast
from lerobot.utils.import_utils import make_device_from_device_class from lerobot.utils.import_utils import make_device_from_device_class
@@ -67,3 +68,14 @@ def get_cv2_rotation(rotation: Cv2Rotation) -> int | None:
return int(cv2.ROTATE_90_COUNTERCLOCKWISE) return int(cv2.ROTATE_90_COUNTERCLOCKWISE)
else: else:
return None return None
def get_cv2_backend() -> int:
import cv2
if platform.system() == "Windows":
return int(cv2.CAP_MSMF) # Use MSMF for Windows instead of AVFOUNDATION
# elif platform.system() == "Darwin": # macOS
# return cv2.CAP_AVFOUNDATION
else: # Linux and others
return int(cv2.CAP_ANY)

View File

@@ -34,8 +34,7 @@ import cv2
import numpy as np import numpy as np
from numpy.typing import NDArray from numpy.typing import NDArray
from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected from lerobot.utils.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
from lerobot.utils.errors import DeviceNotConnectedError
from ..camera import Camera from ..camera import Camera
from ..configs import ColorMode from ..configs import ColorMode
@@ -46,12 +45,6 @@ logger = logging.getLogger(__name__)
class ZMQCamera(Camera): class ZMQCamera(Camera):
""" """
Manages camera interactions via ZeroMQ for receiving frames from a remote server.
This class connects to a ZMQ Publisher, subscribes to frame topics, and decodes
incoming JSON messages containing Base64 encoded images. It supports both
synchronous and asynchronous frame reading patterns.
Example usage: Example usage:
```python ```python
from lerobot.cameras.zmq import ZMQCamera, ZMQCameraConfig from lerobot.cameras.zmq import ZMQCamera, ZMQCameraConfig
@@ -59,16 +52,7 @@ class ZMQCamera(Camera):
config = ZMQCameraConfig(server_address="192.168.123.164", port=5555, camera_name="head_camera") config = ZMQCameraConfig(server_address="192.168.123.164", port=5555, camera_name="head_camera")
camera = ZMQCamera(config) camera = ZMQCamera(config)
camera.connect() camera.connect()
frame = camera.read()
# Read 1 frame synchronously (blocking)
color_image = camera.read()
# Read 1 frame asynchronously (waits for new frame with a timeout)
async_image = camera.async_read()
# Get the latest frame immediately (no wait, returns timestamp)
latest_image, timestamp = camera.read_latest()
camera.disconnect() camera.disconnect()
``` ```
""" """
@@ -84,17 +68,14 @@ class ZMQCamera(Camera):
self.color_mode = config.color_mode self.color_mode = config.color_mode
self.timeout_ms = config.timeout_ms self.timeout_ms = config.timeout_ms
# ZMQ Context and Socket
self.context: zmq.Context | None = None self.context: zmq.Context | None = None
self.socket: zmq.Socket | None = None self.socket: zmq.Socket | None = None
self._connected = False self._connected = False
# Threading resources
self.thread: Thread | None = None self.thread: Thread | None = None
self.stop_event: Event | None = None self.stop_event: Event | None = None
self.frame_lock: Lock = Lock() self.frame_lock: Lock = Lock()
self.latest_frame: NDArray[Any] | None = None self.latest_frame: NDArray[Any] | None = None
self.latest_timestamp: float | None = None
self.new_frame_event: Event = Event() self.new_frame_event: Event = Event()
def __str__(self) -> str: def __str__(self) -> str:
@@ -102,17 +83,12 @@ class ZMQCamera(Camera):
@property @property
def is_connected(self) -> bool: def is_connected(self) -> bool:
"""Checks if the ZMQ socket is initialized and connected."""
return self._connected and self.context is not None and self.socket is not None return self._connected and self.context is not None and self.socket is not None
@check_if_already_connected
def connect(self, warmup: bool = True) -> None: def connect(self, warmup: bool = True) -> None:
"""Connect to ZMQ camera server. """Connect to ZMQ camera server."""
if self.is_connected:
Args: raise DeviceAlreadyConnectedError(f"{self} is already connected.")
warmup (bool): If True, waits for the camera to provide at least one
valid frame before returning. Defaults to True.
"""
logger.info(f"Connecting to {self}...") logger.info(f"Connecting to {self}...")
@@ -127,28 +103,17 @@ class ZMQCamera(Camera):
self.socket.connect(f"tcp://{self.server_address}:{self.port}") self.socket.connect(f"tcp://{self.server_address}:{self.port}")
self._connected = True self._connected = True
# Auto-detect resolution if not provided # Auto-detect resolution
if self.width is None or self.height is None: if self.width is None or self.height is None:
# Read directly from hardware because the thread isn't running yet h, w = self.read().shape[:2]
temp_frame = self._read_from_hardware()
h, w = temp_frame.shape[:2]
self.height = h self.height = h
self.width = w self.width = w
logger.info(f"{self} resolution detected: {w}x{h}") logger.info(f"{self} resolution: {w}x{h}")
self._start_read_thread()
logger.info(f"{self} connected.") logger.info(f"{self} connected.")
if warmup: if warmup:
# Ensure we have captured at least one frame via the thread time.sleep(0.1)
start_time = time.time()
while time.time() - start_time < (self.config.warmup_s): # Wait a bit more than timeout
self.async_read(timeout_ms=self.config.warmup_s * 1000)
time.sleep(0.1)
with self.frame_lock:
if self.latest_frame is None:
raise ConnectionError(f"{self} failed to capture frames during warmup.")
except Exception as e: except Exception as e:
self._cleanup() self._cleanup()
@@ -166,14 +131,15 @@ class ZMQCamera(Camera):
@staticmethod @staticmethod
def find_cameras() -> list[dict[str, Any]]: def find_cameras() -> list[dict[str, Any]]:
""" """ZMQ cameras require manual configuration (server address/port)."""
Detection not implemented for ZMQ cameras. These cameras require manual configuration (server address/port). return []
"""
raise NotImplementedError("Camera detection is not implemented for ZMQ cameras.")
def _read_from_hardware(self) -> NDArray[Any]: def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
""" """
Reads a single frame directly from the ZMQ socket. Read a single frame from the ZMQ camera.
Returns:
np.ndarray: Decoded frame (height, width, 3)
""" """
if not self.is_connected or self.socket is None: if not self.is_connected or self.socket is None:
raise DeviceNotConnectedError(f"{self} is not connected.") raise DeviceNotConnectedError(f"{self} is not connected.")
@@ -181,7 +147,6 @@ class ZMQCamera(Camera):
try: try:
message = self.socket.recv_string() message = self.socket.recv_string()
except Exception as e: except Exception as e:
# zmq is lazy-imported in connect(), so check by name to avoid a top-level import
if type(e).__name__ == "Again": if type(e).__name__ == "Again":
raise TimeoutError(f"{self} timeout after {self.timeout_ms}ms") from e raise TimeoutError(f"{self} timeout after {self.timeout_ms}ms") from e
raise raise
@@ -211,114 +176,42 @@ class ZMQCamera(Camera):
return frame return frame
@check_if_not_connected
def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
"""
Reads a single frame synchronously from the camera.
This is a blocking call. It waits for the next available frame from the
camera background thread.
Returns:
np.ndarray: Decoded frame (height, width, 3)
"""
start_time = time.perf_counter()
if color_mode is not None:
logger.warning(
f"{self} read() color_mode parameter is deprecated and will be removed in future versions."
)
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
self.new_frame_event.clear()
frame = self.async_read(timeout_ms=10000)
read_duration_ms = (time.perf_counter() - start_time) * 1e3
logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
return frame
def _read_loop(self) -> None: def _read_loop(self) -> None:
""" while self.stop_event and not self.stop_event.is_set():
Internal loop run by the background thread for asynchronous reading.
"""
if self.stop_event is None:
raise RuntimeError(f"{self}: stop_event is not initialized.")
failure_count = 0
while not self.stop_event.is_set():
try: try:
frame = self._read_from_hardware() frame = self.read()
capture_time = time.perf_counter()
with self.frame_lock: with self.frame_lock:
self.latest_frame = frame self.latest_frame = frame
self.latest_timestamp = capture_time
self.new_frame_event.set() self.new_frame_event.set()
failure_count = 0
except DeviceNotConnectedError: except DeviceNotConnectedError:
break break
except (TimeoutError, Exception) as e: except TimeoutError:
if failure_count <= 10: pass
failure_count += 1 except Exception as e:
logger.warning(f"Read error: {e}") logger.warning(f"Read error: {e}")
else:
raise RuntimeError(f"{self} exceeded maximum consecutive read failures.") from e
def _start_read_thread(self) -> None: def _start_read_thread(self) -> None:
if self.stop_event is not None: if self.thread and self.thread.is_alive():
self.stop_event.set() return
if self.thread is not None and self.thread.is_alive():
self.thread.join(timeout=2.0)
with self.frame_lock:
self.latest_frame = None
self.latest_timestamp = None
self.new_frame_event.clear()
self.stop_event = Event() self.stop_event = Event()
self.thread = Thread(target=self._read_loop, daemon=True, name=f"{self}_read_loop") self.thread = Thread(target=self._read_loop, daemon=True)
self.thread.start() self.thread.start()
time.sleep(0.1)
def _stop_read_thread(self) -> None: def _stop_read_thread(self) -> None:
if self.stop_event is not None: if self.stop_event:
self.stop_event.set() self.stop_event.set()
if self.thread and self.thread.is_alive():
if self.thread is not None and self.thread.is_alive():
self.thread.join(timeout=2.0) self.thread.join(timeout=2.0)
self.thread = None self.thread = None
self.stop_event = None self.stop_event = None
with self.frame_lock: def async_read(self, timeout_ms: float = 10000) -> NDArray[Any]:
self.latest_frame = None """Read latest frame asynchronously (non-blocking)."""
self.latest_timestamp = None if not self.is_connected:
self.new_frame_event.clear() raise DeviceNotConnectedError(f"{self} is not connected.")
@check_if_not_connected if not self.thread or not self.thread.is_alive():
def async_read(self, timeout_ms: float = 200) -> NDArray[Any]: self._start_read_thread()
"""
Reads the latest available frame asynchronously.
Args:
timeout_ms (float): Maximum time in milliseconds to wait for a frame
to become available. Defaults to 200ms.
Returns:
np.ndarray: The latest captured frame.
Raises:
DeviceNotConnectedError: If the camera is not connected.
TimeoutError: If no frame data becomes available within the specified timeout.
RuntimeError: If the background thread is not running.
"""
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0): if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0):
raise TimeoutError(f"{self} async_read timeout after {timeout_ms}ms") raise TimeoutError(f"{self} async_read timeout after {timeout_ms}ms")
@@ -332,54 +225,11 @@ class ZMQCamera(Camera):
return frame return frame
@check_if_not_connected
def read_latest(self, max_age_ms: int = 1000) -> NDArray[Any]:
"""Return the most recent frame captured immediately (Peeking).
This method is non-blocking and returns whatever is currently in the
memory buffer. The frame may be stale,
meaning it could have been captured a while ago (hanging camera scenario e.g.).
Returns:
NDArray[Any]: The frame image (numpy array).
Raises:
TimeoutError: If the latest frame is older than `max_age_ms`.
DeviceNotConnectedError: If the camera is not connected.
RuntimeError: If the camera is connected but has not captured any frames yet.
"""
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
with self.frame_lock:
frame = self.latest_frame
timestamp = self.latest_timestamp
if frame is None or timestamp is None:
raise RuntimeError(f"{self} has not captured any frames yet.")
age_ms = (time.perf_counter() - timestamp) * 1e3
if age_ms > max_age_ms:
raise TimeoutError(
f"{self} latest frame is too old: {age_ms:.1f} ms (max allowed: {max_age_ms} ms)."
)
return frame
def disconnect(self) -> None: def disconnect(self) -> None:
"""Disconnect from ZMQ camera.""" """Disconnect from ZMQ camera."""
if not self.is_connected and self.thread is None: if not self.is_connected and not self.thread:
raise DeviceNotConnectedError(f"{self} not connected.") raise DeviceNotConnectedError(f"{self} not connected.")
if self.thread is not None: self._stop_read_thread()
self._stop_read_thread()
self._cleanup() self._cleanup()
with self.frame_lock:
self.latest_frame = None
self.latest_timestamp = None
self.new_frame_event.clear()
logger.info(f"{self} disconnected.") logger.info(f"{self} disconnected.")

View File

@@ -29,10 +29,12 @@ class ZMQCameraConfig(CameraConfig):
camera_name: str = "zmq_camera" camera_name: str = "zmq_camera"
color_mode: ColorMode = ColorMode.RGB color_mode: ColorMode = ColorMode.RGB
timeout_ms: int = 5000 timeout_ms: int = 5000
warmup_s: int = 1
def __post_init__(self) -> None: def __post_init__(self) -> None:
self.color_mode = ColorMode(self.color_mode) if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
raise ValueError(
f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
)
if self.timeout_ms <= 0: if self.timeout_ms <= 0:
raise ValueError(f"`timeout_ms` must be positive, but {self.timeout_ms} is provided.") raise ValueError(f"`timeout_ms` must be positive, but {self.timeout_ms} is provided.")

View File

@@ -23,7 +23,6 @@ import base64
import contextlib import contextlib
import json import json
import logging import logging
import threading
import time import time
from collections import deque from collections import deque
@@ -43,57 +42,10 @@ def encode_image(image: np.ndarray, quality: int = 80) -> str:
return base64.b64encode(buffer).decode("utf-8") return base64.b64encode(buffer).decode("utf-8")
class CameraCaptureThread:
"""Background thread that continuously captures and encodes frames from a camera."""
def __init__(self, camera: OpenCVCamera, name: str):
self.camera = camera
self.name = name
self.latest_encoded: str | None = None # Pre-encoded JPEG as base64
self.latest_timestamp: float = 0.0
self.frame_lock = threading.Lock()
self.running = False
self.thread: threading.Thread | None = None
def start(self):
"""Start the capture thread."""
self.running = True
self.thread = threading.Thread(target=self._capture_loop, daemon=True)
self.thread.start()
def stop(self):
"""Stop the capture thread."""
self.running = False
if self.thread:
self.thread.join(timeout=1.0)
def _capture_loop(self):
"""Continuously capture and encode frames at the camera's native rate."""
while self.running:
try:
frame = self.camera.read() # Blocks at camera's native rate
timestamp = time.time()
# Encode immediately in capture thread (this is the slow part)
encoded = encode_image(frame)
with self.frame_lock:
self.latest_encoded = encoded
self.latest_timestamp = timestamp
except Exception as e:
logger.warning(f"Camera {self.name} capture error: {e}")
time.sleep(0.01)
def get_latest(self) -> tuple[str | None, float]:
"""Get the latest encoded frame and its timestamp."""
with self.frame_lock:
return self.latest_encoded, self.latest_timestamp
class ImageServer: class ImageServer:
def __init__(self, config: dict, port: int = 5555): def __init__(self, config: dict, port: int = 5555):
# fps controls the publish loop rate (how often frames are sent over ZMQ), not the camera capture rate
self.fps = config.get("fps", 30) self.fps = config.get("fps", 30)
self.cameras: dict[str, OpenCVCamera] = {} self.cameras: dict[str, OpenCVCamera] = {}
self.capture_threads: dict[str, CameraCaptureThread] = {}
for name, cfg in config.get("cameras", {}).items(): for name, cfg in config.get("cameras", {}).items():
shape = cfg.get("shape", [480, 640]) shape = cfg.get("shape", [480, 640])
@@ -109,10 +61,6 @@ class ImageServer:
self.cameras[name] = camera self.cameras[name] = camera
logger.info(f"Camera {name}: {shape[1]}x{shape[0]}") logger.info(f"Camera {name}: {shape[1]}x{shape[0]}")
# Create capture thread for this camera
capture_thread = CameraCaptureThread(camera, name)
self.capture_threads[name] = capture_thread
# ZMQ PUB socket # ZMQ PUB socket
self.context = zmq.Context() self.context = zmq.Context()
self.socket = self.context.socket(zmq.PUB) self.socket = self.context.socket(zmq.PUB)
@@ -125,18 +73,6 @@ class ImageServer:
def run(self): def run(self):
frame_count = 0 frame_count = 0
frame_times = deque(maxlen=60) frame_times = deque(maxlen=60)
last_published_ts: dict[str, float] = {}
# Start all capture threads
for capture_thread in self.capture_threads.values():
capture_thread.start()
# Wait for first frames to be captured and encoded
logger.info("Waiting for cameras to start capturing...")
for name, capture_thread in self.capture_threads.items():
while capture_thread.get_latest()[0] is None:
time.sleep(0.01)
logger.info(f"Camera {name} ready (capture + encode in background)")
try: try:
while True: while True:
@@ -144,12 +80,10 @@ class ImageServer:
# Build message # Build message
message = {"timestamps": {}, "images": {}} message = {"timestamps": {}, "images": {}}
for name, capture_thread in self.capture_threads.items(): for name, cam in self.cameras.items():
encoded, timestamp = capture_thread.get_latest() frame = cam.read() # Returns RGB
if encoded is not None and timestamp > last_published_ts.get(name, 0.0): message["timestamps"][name] = time.time()
message["timestamps"][name] = timestamp message["images"][name] = encode_image(frame)
message["images"][name] = encoded
last_published_ts[name] = timestamp
# Send as JSON string (suppress if buffer full) # Send as JSON string (suppress if buffer full)
with contextlib.suppress(zmq.Again): with contextlib.suppress(zmq.Again):
@@ -168,8 +102,6 @@ class ImageServer:
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
finally: finally:
for capture_thread in self.capture_threads.values():
capture_thread.stop()
for cam in self.cameras.values(): for cam in self.cameras.values():
cam.disconnect() cam.disconnect()
self.socket.close() self.socket.close()

View File

@@ -27,7 +27,7 @@ class DatasetConfig:
# "dataset_index" into the returned item. The index mapping is made according to the order in which the # "dataset_index" into the returned item. The index mapping is made according to the order in which the
# datasets are provided. # datasets are provided.
repo_id: str repo_id: str
# Root directory where the dataset will be stored (e.g. 'dataset/path'). If None, defaults to $HF_LEROBOT_HOME/repo_id. # Root directory where the dataset will be stored (e.g. 'dataset/path').
root: str | None = None root: str | None = None
episodes: list[int] | None = None episodes: list[int] | None = None
image_transforms: ImageTransformsConfig = field(default_factory=ImageTransformsConfig) image_transforms: ImageTransformsConfig = field(default_factory=ImageTransformsConfig)
@@ -36,16 +36,6 @@ class DatasetConfig:
video_backend: str = field(default_factory=get_safe_default_codec) video_backend: str = field(default_factory=get_safe_default_codec)
streaming: bool = False streaming: bool = False
def __post_init__(self) -> None:
if self.episodes is not None:
if any(ep < 0 for ep in self.episodes):
raise ValueError(
f"Episode indices must be non-negative, got: {[ep for ep in self.episodes if ep < 0]}"
)
if len(self.episodes) != len(set(self.episodes)):
duplicates = sorted({ep for ep in self.episodes if self.episodes.count(ep) > 1})
raise ValueError(f"Episode indices contain duplicates: {duplicates}")
@dataclass @dataclass
class WandBConfig: class WandBConfig:
@@ -57,7 +47,6 @@ class WandBConfig:
notes: str | None = None notes: str | None = None
run_id: str | None = None run_id: str | None = None
mode: str | None = None # Allowed values: 'online', 'offline' 'disabled'. Defaults to 'online' mode: str | None = None # Allowed values: 'online', 'offline' 'disabled'. Defaults to 'online'
add_tags: bool = True # If True, save configuration as tags in the WandB run.
@dataclass @dataclass

View File

@@ -30,8 +30,8 @@ from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.optim.optimizers import OptimizerConfig from lerobot.optim.optimizers import OptimizerConfig
from lerobot.optim.schedulers import LRSchedulerConfig from lerobot.optim.schedulers import LRSchedulerConfig
from lerobot.utils.constants import ACTION, OBS_STATE from lerobot.utils.constants import ACTION, OBS_STATE
from lerobot.utils.device_utils import auto_select_torch_device, is_amp_available, is_torch_device_available
from lerobot.utils.hub import HubMixin from lerobot.utils.hub import HubMixin
from lerobot.utils.utils import auto_select_torch_device, is_amp_available, is_torch_device_available
T = TypeVar("T", bound="PreTrainedConfig") T = TypeVar("T", bound="PreTrainedConfig")
logger = getLogger(__name__) logger = getLogger(__name__)
@@ -45,12 +45,12 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): # type: igno
Args: Args:
n_obs_steps: Number of environment steps worth of observations to pass to the policy (takes the n_obs_steps: Number of environment steps worth of observations to pass to the policy (takes the
current step and additional steps going back). current step and additional steps going back).
input_features: A dictionary defining the PolicyFeature of the input data for the policy. The key represents input_shapes: A dictionary defining the shapes of the input data for the policy.
the input data name, and the value is PolicyFeature, which consists of FeatureType and shape attributes. output_shapes: A dictionary defining the shapes of the output data for the policy.
output_features: A dictionary defining the PolicyFeature of the output data for the policy. The key represents input_normalization_modes: A dictionary with key representing the modality and the value specifies the
the output data name, and the value is PolicyFeature, which consists of FeatureType and shape attributes. normalization mode to apply.
normalization_mapping: A dictionary that maps from a str value of FeatureType (e.g., "STATE", "VISUAL") to output_normalization_modes: Similar dictionary as `input_normalization_modes`, but to unnormalize to
a corresponding NormalizationMode (e.g., NormalizationMode.MIN_MAX) the original scale.
""" """
n_obs_steps: int = 1 n_obs_steps: int = 1

View File

@@ -50,9 +50,6 @@ class TrainPipelineConfig(HubMixin):
# `seed` is used for training (eg: model initialization, dataset shuffling) # `seed` is used for training (eg: model initialization, dataset shuffling)
# AND for the evaluation environments. # AND for the evaluation environments.
seed: int | None = 1000 seed: int | None = 1000
# Set to True to use deterministic cuDNN algorithms for reproducibility.
# This disables cudnn.benchmark and may reduce training speed by ~10-20 percent.
cudnn_deterministic: bool = False
# Number of workers for the dataloader. # Number of workers for the dataloader.
num_workers: int = 4 num_workers: int = 4
batch_size: int = 8 batch_size: int = 8

View File

@@ -50,3 +50,8 @@ class RTCAttentionSchedule(str, Enum):
ONES = "ONES" ONES = "ONES"
LINEAR = "LINEAR" LINEAR = "LINEAR"
EXP = "EXP" EXP = "EXP"
class RTCTrainingDelayDistribution(str, Enum):
UNIFORM = "UNIFORM"
EXP = "EXP"

View File

@@ -746,8 +746,7 @@ def save_annotations_to_dataset(
dataset_path: Path, annotations: dict[int, SubtaskAnnotation], fps: int, prefix: str = "sparse" dataset_path: Path, annotations: dict[int, SubtaskAnnotation], fps: int, prefix: str = "sparse"
): ):
"""Save annotations to LeRobot dataset parquet format.""" """Save annotations to LeRobot dataset parquet format."""
from lerobot.datasets.io_utils import load_episodes from lerobot.datasets.utils import DEFAULT_EPISODES_PATH, load_episodes
from lerobot.datasets.utils import DEFAULT_EPISODES_PATH
episodes_dataset = load_episodes(dataset_path) episodes_dataset = load_episodes(dataset_path)
if not episodes_dataset or len(episodes_dataset) == 0: if not episodes_dataset or len(episodes_dataset) == 0:
@@ -841,7 +840,7 @@ def generate_auto_sparse_annotations(
def load_annotations_from_dataset(dataset_path: Path, prefix: str = "sparse") -> dict[int, SubtaskAnnotation]: def load_annotations_from_dataset(dataset_path: Path, prefix: str = "sparse") -> dict[int, SubtaskAnnotation]:
"""Load annotations from LeRobot dataset parquet files.""" """Load annotations from LeRobot dataset parquet files."""
from lerobot.datasets.io_utils import load_episodes from lerobot.datasets.utils import load_episodes
episodes_dataset = load_episodes(dataset_path) episodes_dataset = load_episodes(dataset_path)
if not episodes_dataset or len(episodes_dataset) == 0: if not episodes_dataset or len(episodes_dataset) == 0:

Some files were not shown because too many files have changed in this diff Show More