mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-31 02:41:24 +00:00
Compare commits
15 Commits
feat/eval-
...
add-claude
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
055f20f658 | ||
|
|
30d2fe3bb3 | ||
|
|
4eecbad32b | ||
|
|
1396b9fab7 | ||
|
|
7c032f19fc | ||
|
|
e2f27bf71b | ||
|
|
ea36a4a176 | ||
|
|
399b3c9ba5 | ||
|
|
913041e753 | ||
|
|
2b541ddd4c | ||
|
|
50a1e67e94 | ||
|
|
d60a700d2b | ||
|
|
8c3d4cf900 | ||
|
|
b6e60a6e30 | ||
|
|
3596681d94 |
44
.github/workflows/claude-code-review.yml
vendored
Normal file
44
.github/workflows/claude-code-review.yml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Claude Code Review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, ready_for_review, reopened]
|
||||
# Optional: Only run on specific file changes
|
||||
# paths:
|
||||
# - "src/**/*.ts"
|
||||
# - "src/**/*.tsx"
|
||||
# - "src/**/*.js"
|
||||
# - "src/**/*.jsx"
|
||||
|
||||
jobs:
|
||||
claude-review:
|
||||
# Optional: Filter by PR author
|
||||
# if: |
|
||||
# github.event.pull_request.user.login == 'external-contributor' ||
|
||||
# github.event.pull_request.user.login == 'new-developer' ||
|
||||
# github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
issues: read
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run Claude Code Review
|
||||
id: claude-review
|
||||
uses: anthropics/claude-code-action@v1
|
||||
with:
|
||||
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
|
||||
plugins: 'code-review@claude-code-plugins'
|
||||
prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
|
||||
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
||||
# or https://code.claude.com/docs/en/cli-reference for available options
|
||||
|
||||
50
.github/workflows/claude.yml
vendored
Normal file
50
.github/workflows/claude.yml
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
name: Claude Code
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
pull_request_review_comment:
|
||||
types: [created]
|
||||
issues:
|
||||
types: [opened, assigned]
|
||||
pull_request_review:
|
||||
types: [submitted]
|
||||
|
||||
jobs:
|
||||
claude:
|
||||
if: |
|
||||
(github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
|
||||
(github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
|
||||
(github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
|
||||
(github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
issues: read
|
||||
id-token: write
|
||||
actions: read # Required for Claude to read CI results on PRs
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run Claude Code
|
||||
id: claude
|
||||
uses: anthropics/claude-code-action@v1
|
||||
with:
|
||||
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
||||
# This is an optional setting that allows Claude to read CI results on PRs
|
||||
additional_permissions: |
|
||||
actions: read
|
||||
|
||||
# Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
|
||||
# prompt: 'Update the pull request description to include a summary of changes.'
|
||||
|
||||
# Optional: Add claude_args to customize behavior and configuration
|
||||
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
||||
# or https://code.claude.com/docs/en/cli-reference for available options
|
||||
# claude_args: '--allowed-tools Bash(gh pr:*)'
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This workflow handles nightly testing & docker images publishing.
|
||||
name: Nightly
|
||||
# This workflow handles Docker image publishing & testing.
|
||||
name: Docker Publish & Test
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -39,8 +39,8 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
# This job builds a CPU image for testing & distribution
|
||||
build-docker-cpu-nightly:
|
||||
name: Build CPU Docker for Nightly
|
||||
build-docker-cpu:
|
||||
name: Build CPU Docker
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
if: github.repository == 'huggingface/lerobot'
|
||||
@@ -74,8 +74,8 @@ jobs:
|
||||
tags: ${{ env.DOCKER_IMAGE_NAME_CPU }}
|
||||
|
||||
# This job builds a GPU image for testing & distribution
|
||||
build-docker-gpu-nightly:
|
||||
name: Build GPU Docker for Nightly
|
||||
build-docker-gpu:
|
||||
name: Build GPU Docker
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
if: github.repository == 'huggingface/lerobot'
|
||||
@@ -109,9 +109,9 @@ jobs:
|
||||
tags: ${{ env.DOCKER_IMAGE_NAME_GPU }}
|
||||
|
||||
# This job runs the E2E tests + pytest with all extras in the CPU image
|
||||
nightly-cpu-tests:
|
||||
name: Nightly CPU Tests
|
||||
needs: [build-docker-cpu-nightly]
|
||||
cpu-tests:
|
||||
name: CPU Tests
|
||||
needs: [build-docker-cpu]
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
container:
|
||||
image: ${{ needs.build-docker-cpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
image: ${{ needs.build-docker-cpu.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
options: --shm-size "16gb"
|
||||
credentials:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
@@ -142,9 +142,9 @@ jobs:
|
||||
run: make test-end-to-end
|
||||
|
||||
# This job runs the E2E tests + pytest with all extras in the GPU image
|
||||
nightly-gpu-tests:
|
||||
name: Nightly GPU Tests
|
||||
needs: [build-docker-gpu-nightly]
|
||||
gpu-tests:
|
||||
name: GPU Tests
|
||||
needs: [build-docker-gpu]
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
@@ -154,7 +154,7 @@ jobs:
|
||||
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
container:
|
||||
image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
image: ${{ needs.build-docker-gpu.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
options: --gpus all --shm-size "16gb"
|
||||
credentials:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
@@ -175,9 +175,9 @@ jobs:
|
||||
run: make test-end-to-end
|
||||
|
||||
# This job runs multi-GPU training tests with 4 GPUs
|
||||
nightly-multi-gpu-tests:
|
||||
name: Nightly Multi-GPU Tests
|
||||
needs: [build-docker-gpu-nightly]
|
||||
multi-gpu-tests:
|
||||
name: Multi-GPU Tests
|
||||
needs: [build-docker-gpu]
|
||||
runs-on:
|
||||
group: aws-g4dn-12xlarge # Instance with 4 GPUs
|
||||
env:
|
||||
@@ -188,7 +188,7 @@ jobs:
|
||||
CUDA_VISIBLE_DEVICES: "0,1,2,3"
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
container:
|
||||
image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
image: ${{ needs.build-docker-gpu.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
options: --gpus all --shm-size "16gb"
|
||||
credentials:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
@@ -33,7 +33,7 @@ jobs:
|
||||
github.event.workflow_run.event == 'pull_request' &&
|
||||
github.event.workflow_run.conclusion == 'success' &&
|
||||
github.repository == 'huggingface/lerobot'
|
||||
uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
|
||||
uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@90b4ee2c10b81b5c1a6367c4e6fc9e2fb510a7e3 # main
|
||||
with:
|
||||
package_name: lerobot
|
||||
secrets:
|
||||
|
||||
4
.github/workflows/documentation.yml
vendored
4
.github/workflows/documentation.yml
vendored
@@ -55,7 +55,7 @@ jobs:
|
||||
github.repository == 'huggingface/lerobot'
|
||||
permissions:
|
||||
contents: read
|
||||
uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
|
||||
uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@90b4ee2c10b81b5c1a6367c4e6fc9e2fb510a7e3 # main
|
||||
with:
|
||||
commit_sha: ${{ github.sha }}
|
||||
package: lerobot
|
||||
@@ -78,7 +78,7 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
|
||||
uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@90b4ee2c10b81b5c1a6367c4e6fc9e2fb510a7e3 # main
|
||||
with:
|
||||
commit_sha: ${{ github.event.pull_request.head.sha }}
|
||||
pr_number: ${{ github.event.number }}
|
||||
|
||||
8
.github/workflows/fast_tests.yml
vendored
8
.github/workflows/fast_tests.yml
vendored
@@ -27,6 +27,7 @@ on:
|
||||
- "tests/**"
|
||||
- ".github/workflows/**"
|
||||
- "pyproject.toml"
|
||||
- "uv.lock"
|
||||
- "Makefile"
|
||||
push:
|
||||
branches:
|
||||
@@ -36,6 +37,7 @@ on:
|
||||
- "tests/**"
|
||||
- ".github/workflows/**"
|
||||
- "pyproject.toml"
|
||||
- "uv.lock"
|
||||
- "Makefile"
|
||||
|
||||
permissions:
|
||||
@@ -63,7 +65,7 @@ jobs:
|
||||
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
lfs: true
|
||||
@@ -81,14 +83,14 @@ jobs:
|
||||
libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev
|
||||
|
||||
- name: Setup uv and Python
|
||||
uses: astral-sh/setup-uv@v6 # zizmor: ignore[unpinned-uses]
|
||||
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
|
||||
with:
|
||||
enable-cache: true
|
||||
version: ${{ env.UV_VERSION }}
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install lerobot with test extras
|
||||
run: uv sync --extra "test"
|
||||
run: uv sync --locked --extra "test"
|
||||
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
|
||||
15
.github/workflows/full_tests.yml
vendored
15
.github/workflows/full_tests.yml
vendored
@@ -29,6 +29,7 @@ on:
|
||||
- "tests/**"
|
||||
- ".github/workflows/**"
|
||||
- "pyproject.toml"
|
||||
- "uv.lock"
|
||||
- "Makefile"
|
||||
|
||||
permissions:
|
||||
@@ -62,7 +63,7 @@ jobs:
|
||||
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
lfs: true
|
||||
persist-credentials: false
|
||||
@@ -79,14 +80,14 @@ jobs:
|
||||
speech-dispatcher libgeos-dev portaudio19-dev
|
||||
|
||||
- name: Setup uv and Python
|
||||
uses: astral-sh/setup-uv@v6 # zizmor: ignore[unpinned-uses]
|
||||
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
|
||||
with:
|
||||
enable-cache: true
|
||||
version: ${{ env.UV_VERSION }}
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install lerobot with all extras
|
||||
run: uv sync --extra all # TODO(Steven): Make flash-attn optional
|
||||
run: uv sync --locked --extra all # TODO(Steven): Make flash-attn optional
|
||||
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
@@ -136,21 +137,21 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install git-lfs
|
||||
git lfs install
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
lfs: true
|
||||
persist-credentials: false
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
with:
|
||||
cache-binary: false
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
context: .
|
||||
file: ./docker/Dockerfile.internal
|
||||
|
||||
@@ -12,38 +12,81 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This workflow handles full testing with unboud dependencies versions.
|
||||
name: Unbound Dependency Tests
|
||||
# This workflow tests the project against the latest upstream dependencies
|
||||
# (within pyproject.toml constraints) and opens a PR to update uv.lock
|
||||
# if the tests pass and the lockfile has changed.
|
||||
name: Latest Dependency Tests
|
||||
|
||||
on:
|
||||
# Allows running this workflow manually from the Actions tab
|
||||
workflow_dispatch:
|
||||
|
||||
# Run on the 1st and 15th of every month at 09:00 UTC
|
||||
# schedule:
|
||||
# - cron: '0 2 1,15 * *'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
# Runs at 03:00 UTC
|
||||
schedule:
|
||||
- cron: "0 3 * * *"
|
||||
|
||||
# Sets up the environment variables
|
||||
env:
|
||||
UV_VERSION: "0.8.0"
|
||||
PYTHON_VERSION: "3.12"
|
||||
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu:unbound
|
||||
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu:latest-deps
|
||||
|
||||
# Ensures that only the latest action is built, canceling older runs.
|
||||
# Ensures that only the latest run is active, canceling older runs.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
group: ${{ github.workflow }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
# This job runs the E2E tests + pytest with all unbound extras
|
||||
full-tests:
|
||||
name: Full Unbound Tests
|
||||
# This job upgrades the lockfile and checks if dependencies have changed
|
||||
upgrade-lock:
|
||||
name: Upgrade Lockfile
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'huggingface/lerobot'
|
||||
permissions:
|
||||
contents: read
|
||||
outputs:
|
||||
changed: ${{ steps.diff.outputs.changed }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup uv and Python
|
||||
uses: astral-sh/setup-uv@v6 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Upgrade uv.lock
|
||||
run: uv lock --upgrade
|
||||
|
||||
- name: Check for changes
|
||||
id: diff
|
||||
run: |
|
||||
if git diff --quiet uv.lock; then
|
||||
echo "changed=false" >> "$GITHUB_OUTPUT"
|
||||
echo "uv.lock is up to date — no dependency changes."
|
||||
else
|
||||
echo "changed=true" >> "$GITHUB_OUTPUT"
|
||||
echo "uv.lock has changed — running tests."
|
||||
fi
|
||||
|
||||
- name: Upload updated lockfile
|
||||
if: steps.diff.outputs.changed == 'true'
|
||||
uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: uv-lock
|
||||
path: uv.lock
|
||||
|
||||
# This job runs the full test suite with the upgraded dependencies
|
||||
cpu-tests:
|
||||
name: CPU Tests (Latest Deps)
|
||||
needs: [upgrade-lock]
|
||||
if: needs.upgrade-lock.outputs.changed == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
MUJOCO_GL: egl
|
||||
HF_HOME: /mnt/cache/.cache/huggingface
|
||||
@@ -55,6 +98,11 @@ jobs:
|
||||
lfs: true
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download updated lockfile
|
||||
uses: actions/download-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: uv-lock
|
||||
|
||||
# NOTE(Steven): Mount to `/mnt` to avoid the limited storage on `/home`. Consider cleaning default SDKs or using self-hosted runners for more space.
|
||||
# (As of 2024-06-10, the runner's `/home` has only 6.2 GB free—8% of its 72 GB total.)
|
||||
- name: Setup /mnt storage
|
||||
@@ -73,34 +121,32 @@ jobs:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Unbound dependencies
|
||||
run: |
|
||||
sed -i 's/,[[:space:]]*<[0-9\.]*//g' pyproject.toml
|
||||
echo "Dependencies unbound:" && cat pyproject.toml
|
||||
|
||||
- name: Install lerobot with all extras
|
||||
run: uv sync --extra all # TODO(Steven): Make flash-attn optional
|
||||
run: uv sync --locked --extra all # TODO(Steven): Make flash-attn optional
|
||||
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
|
||||
uv run hf auth whoami
|
||||
|
||||
- name: Run pytest (all extras)
|
||||
run: uv run pytest tests -vv
|
||||
run: uv run pytest tests -vv --maxfail=10
|
||||
|
||||
- name: Run end-to-end tests
|
||||
run: uv run make test-end-to-end
|
||||
|
||||
# This job builds a GPU enabled image for testing
|
||||
# This job builds a GPU-enabled Docker image with the upgraded dependencies
|
||||
build-and-push-docker:
|
||||
name: Build and Push Docker
|
||||
needs: [upgrade-lock]
|
||||
if: needs.upgrade-lock.outputs.changed == 'true'
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
if: github.repository == 'huggingface/lerobot'
|
||||
outputs:
|
||||
image_tag: ${{ env.DOCKER_IMAGE_NAME }}
|
||||
env:
|
||||
GITHUB_REF: ${{ github.ref }}
|
||||
steps:
|
||||
- name: Install Git LFS
|
||||
run: |
|
||||
@@ -111,6 +157,12 @@ jobs:
|
||||
with:
|
||||
lfs: true
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download updated lockfile
|
||||
uses: actions/download-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: uv-lock
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
@@ -127,14 +179,13 @@ jobs:
|
||||
file: ./docker/Dockerfile.internal
|
||||
push: true
|
||||
tags: ${{ env.DOCKER_IMAGE_NAME }}
|
||||
build-args: |
|
||||
UNBOUND_DEPS=true
|
||||
|
||||
# This job runs pytest with all unbound extras in a GPU enabled host
|
||||
# It runs everytime a test image is created
|
||||
# This job runs pytest with all extras on a GPU-enabled host
|
||||
gpu-tests:
|
||||
name: GPU Unbound Tests
|
||||
name: GPU Tests (Latest Deps)
|
||||
needs: [build-and-push-docker]
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
@@ -159,17 +210,69 @@ jobs:
|
||||
run: |
|
||||
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
|
||||
hf auth whoami
|
||||
- name: Fix ptxas permissions
|
||||
run: chmod +x /lerobot/.venv/lib/python3.12/site-packages/triton/backends/nvidia/bin/ptxas
|
||||
- name: Run pytest on GPU
|
||||
run: pytest tests -vv
|
||||
run: pytest tests -vv --maxfail=10
|
||||
- name: Run end-to-end tests
|
||||
run: make test-end-to-end
|
||||
|
||||
# This job deletes the test image recently created
|
||||
# It runs everytime after the gpu-tests have finished
|
||||
delete-unbound-image:
|
||||
name: Delete Unbound Image
|
||||
# This job creates or updates a PR with the upgraded lockfile
|
||||
open-pr:
|
||||
name: Open PR
|
||||
needs: [cpu-tests, gpu-tests, upgrade-lock]
|
||||
if: success() && needs.upgrade-lock.outputs.changed == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.UPDATE_LOCK_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download updated lockfile
|
||||
uses: actions/download-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: uv-lock
|
||||
|
||||
- name: Create or update PR
|
||||
run: |
|
||||
set -euo pipefail
|
||||
BRANCH="auto/update-uv-lock"
|
||||
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git remote set-url origin "https://x-access-token:${GH_TOKEN}@github.com/${{ github.repository }}.git"
|
||||
|
||||
git checkout -B "$BRANCH"
|
||||
git add uv.lock
|
||||
git commit -m "chore(dependencies): update uv.lock"
|
||||
git push --force origin "$BRANCH"
|
||||
|
||||
# Create PR only if one doesn't already exist for this branch
|
||||
EXISTING_PR=$(gh pr list --head "$BRANCH" --state open --json number --jq '.[0].number')
|
||||
if [ -z "$EXISTING_PR" ]; then
|
||||
gh pr create \
|
||||
--title "chore(dependencies): update uv.lock" \
|
||||
--body "Automated update of \`uv.lock\` after successful latest dependency tests (CPU + GPU).
|
||||
|
||||
This PR upgrades all dependencies to their latest versions within the ranges specified in \`pyproject.toml\`." \
|
||||
--head "$BRANCH" \
|
||||
--base main
|
||||
else
|
||||
echo "PR #$EXISTING_PR already exists, branch has been updated."
|
||||
fi
|
||||
|
||||
# This job deletes the temporary Docker image after tests complete
|
||||
cleanup-docker:
|
||||
name: Cleanup Docker Image
|
||||
needs: [gpu-tests, build-and-push-docker]
|
||||
if: always() && needs.build-and-push-docker.result == 'success'
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get Docker Hub Token and Delete Image
|
||||
@@ -180,8 +283,7 @@ jobs:
|
||||
IMAGE_FULL: ${{ needs.build-and-push-docker.outputs.image_tag }}
|
||||
run: |
|
||||
IMAGE_NAME=$(echo "$IMAGE_FULL" | cut -d':' -f1)
|
||||
IMAGE_TAG=$(echo "$IMAGE_FULL" | cut -d':' -f2)
|
||||
|
||||
IMAGE_TAG=$(echo "$IMAGE_FULL" | cut -d':' -f2-)
|
||||
echo "Attempting to delete image: $IMAGE_NAME:$IMAGE_TAG"
|
||||
|
||||
TOKEN=$(curl -s -H "Content-Type: application/json" \
|
||||
6
.github/workflows/quality.yml
vendored
6
.github/workflows/quality.yml
vendored
@@ -43,16 +43,16 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Run pre-commit hooks
|
||||
uses: pre-commit/action@v3.0.1 # zizmor: ignore[unpinned-uses]
|
||||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
with:
|
||||
extra_args: --all-files --show-diff-on-failure --color=always
|
||||
|
||||
12
.github/workflows/release.yml
vendored
12
.github/workflows/release.yml
vendored
@@ -38,12 +38,12 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
@@ -104,7 +104,7 @@ jobs:
|
||||
- name: Publish to TestPyPI for pre-releases
|
||||
# True for tags like 'v0.2.0-rc1'
|
||||
if: startsWith(github.ref, 'refs/tags/v') && contains(github.ref, '-')
|
||||
uses: pypa/gh-action-pypi-publish@v1.13.0 # zizmor: ignore[unpinned-uses, use-trusted-publishing]
|
||||
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
|
||||
with:
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
verbose: true
|
||||
@@ -112,7 +112,7 @@ jobs:
|
||||
|
||||
- name: Publish to PyPI
|
||||
if: startsWith(github.ref, 'refs/tags/v') && !contains(github.ref, '-')
|
||||
uses: pypa/gh-action-pypi-publish@v1.13.0 # zizmor: ignore[unpinned-uses, use-trusted-publishing]
|
||||
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
|
||||
with:
|
||||
verbose: true
|
||||
print-hash: true
|
||||
@@ -127,7 +127,7 @@ jobs:
|
||||
env:
|
||||
MUJOCO_GL: egl
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
lfs: true
|
||||
persist-credentials: false
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
git curl libglib2.0-0 libegl1-mesa-dev ffmpeg libusb-1.0-0-dev \
|
||||
speech-dispatcher libgeos-dev portaudio19-dev
|
||||
- name: Setup uv and Python
|
||||
uses: astral-sh/setup-uv@v6 # zizmor: ignore[unpinned-uses]
|
||||
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
|
||||
with:
|
||||
enable-cache: true # zizmor: ignore[cache-poisoning]
|
||||
version: ${{ env.UV_VERSION }}
|
||||
|
||||
4
.github/workflows/security.yml
vendored
4
.github/workflows/security.yml
vendored
@@ -43,12 +43,12 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6 # zizmor: ignore[unpinned-uses]
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Secret Scanning
|
||||
uses: trufflesecurity/trufflehog@v3.90.0 # zizmor: ignore[unpinned-uses]
|
||||
uses: trufflesecurity/trufflehog@eafb8c5f6a06175141c27f17bcc17941853d0047 # v3.90.0
|
||||
with:
|
||||
extra_args: --only-verified
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -25,7 +25,6 @@ node_modules/
|
||||
|
||||
# Lock files
|
||||
poetry.lock
|
||||
uv.lock
|
||||
Pipfile.lock
|
||||
|
||||
### Build & Distribution ###
|
||||
|
||||
@@ -4,7 +4,8 @@
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://github.com/huggingface/lerobot/actions/workflows/nightly.yml?query=branch%3Amain)
|
||||
[](https://github.com/huggingface/lerobot/actions/workflows/latest_deps_tests.yml?query=branch%3Amain)
|
||||
[](https://github.com/huggingface/lerobot/actions/workflows/docker_publish.yml?query=branch%3Amain)
|
||||
[](https://www.python.org/downloads/)
|
||||
[](https://github.com/huggingface/lerobot/blob/main/LICENSE)
|
||||
[](https://pypi.org/project/lerobot/)
|
||||
|
||||
@@ -73,17 +73,10 @@ ENV HOME=/home/user_lerobot \
|
||||
RUN uv venv --python python${PYTHON_VERSION}
|
||||
|
||||
# Install Python dependencies for caching
|
||||
COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml README.md MANIFEST.in ./
|
||||
COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
|
||||
COPY --chown=user_lerobot:user_lerobot src/ src/
|
||||
|
||||
ARG UNBOUND_DEPS=false
|
||||
|
||||
RUN if [ "$UNBOUND_DEPS" = "true" ]; then \
|
||||
sed -i 's/,[[:space:]]*<[0-9\.]*//g' pyproject.toml; \
|
||||
echo "Dependencies unbound:" && cat pyproject.toml; \
|
||||
fi
|
||||
|
||||
RUN uv pip install --no-cache ".[all]"
|
||||
RUN uv sync --locked --extra all --no-cache
|
||||
|
||||
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
|
||||
|
||||
|
||||
@@ -61,17 +61,10 @@ ENV HOME=/home/user_lerobot \
|
||||
RUN uv venv
|
||||
|
||||
# Install Python dependencies for caching
|
||||
COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml README.md MANIFEST.in ./
|
||||
COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
|
||||
COPY --chown=user_lerobot:user_lerobot src/ src/
|
||||
|
||||
ARG UNBOUND_DEPS=false
|
||||
|
||||
RUN if [ "$UNBOUND_DEPS" = "true" ]; then \
|
||||
sed -i 's/,[[:space:]]*<[0-9\.]*//g' pyproject.toml; \
|
||||
echo "Dependencies unbound:" && cat pyproject.toml; \
|
||||
fi
|
||||
|
||||
RUN uv pip install --no-cache ".[all]"
|
||||
RUN uv sync --locked --extra all --no-cache
|
||||
|
||||
# Copy the rest of the application code
|
||||
# Make sure to have the git-LFS files for testing
|
||||
|
||||
77
docker/README.md
Normal file
77
docker/README.md
Normal file
@@ -0,0 +1,77 @@
|
||||
# Docker
|
||||
|
||||
This directory contains Dockerfiles for running LeRobot in containerized environments. Both images are **built nightly from `main`** and published to Docker Hub with the full environment pre-baked — no dependency setup required.
|
||||
|
||||
## Pre-built Images
|
||||
|
||||
```bash
|
||||
# CPU-only image (based on Dockerfile.user)
|
||||
docker pull huggingface/lerobot-cpu:latest
|
||||
|
||||
# GPU image with CUDA support (based on Dockerfile.internal)
|
||||
docker pull huggingface/lerobot-gpu:latest
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
The fastest way to start training is to pull the GPU image and run `lerobot-train` directly. This is the same environment used for all of our CI, so it is a well-tested, batteries-included setup.
|
||||
|
||||
```bash
|
||||
docker run -it --rm --gpus all --shm-size 16gb huggingface/lerobot-gpu:latest
|
||||
|
||||
# inside the container:
|
||||
lerobot-train --policy.type=act --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human
|
||||
```
|
||||
|
||||
## Dockerfiles
|
||||
|
||||
### `Dockerfile.user` (CPU)
|
||||
|
||||
A lightweight image based on `python:3.12-slim`. Includes all Python dependencies and system libraries but does not include CUDA — there is no GPU support. Useful for exploring the codebase, running scripts, or working with robots, but not practical for training.
|
||||
|
||||
### `Dockerfile.internal` (GPU)
|
||||
|
||||
A CUDA-enabled image based on `nvidia/cuda`. This is the image for training — mostly used for internal interactions with the GPU cluster.
|
||||
|
||||
## Usage
|
||||
|
||||
### Running a pre-built image
|
||||
|
||||
```bash
|
||||
# CPU
|
||||
docker run -it --rm huggingface/lerobot-cpu:latest
|
||||
|
||||
# GPU
|
||||
docker run -it --rm --gpus all --shm-size 16gb huggingface/lerobot-gpu:latest
|
||||
```
|
||||
|
||||
### Building locally
|
||||
|
||||
From the repo root:
|
||||
|
||||
```bash
|
||||
# CPU
|
||||
docker build -f docker/Dockerfile.user -t lerobot-user .
|
||||
docker run -it --rm lerobot-user
|
||||
|
||||
# GPU
|
||||
docker build -f docker/Dockerfile.internal -t lerobot-internal .
|
||||
docker run -it --rm --gpus all --shm-size 16gb lerobot-internal
|
||||
```
|
||||
|
||||
### Multi-GPU training
|
||||
|
||||
To select specific GPUs, set `CUDA_VISIBLE_DEVICES` when launching the container:
|
||||
|
||||
```bash
|
||||
# Use 4 GPUs
|
||||
docker run -it --rm --gpus all --shm-size 16gb \
|
||||
-e CUDA_VISIBLE_DEVICES=0,1,2,3 \
|
||||
huggingface/lerobot-gpu:latest
|
||||
```
|
||||
|
||||
### USB device access (e.g. robots, cameras)
|
||||
|
||||
```bash
|
||||
docker run -it --device=/dev/ -v /dev/:/dev/ --rm huggingface/lerobot-cpu:latest
|
||||
```
|
||||
@@ -131,4 +131,4 @@ lerobot-record \
|
||||
|
||||
## License
|
||||
|
||||
This model follows the **Apache 2.0 License**, consistent with the original [GR00T repository](https://github.com/NVIDIA/Isaac-GR00T).
|
||||
This model follows NVIDIA's proprietary license, consistent with the original [GR00T repository](https://github.com/NVIDIA/Isaac-GR00T). Future versions (starting from N1.7) will follow **Apache 2.0 License**.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Installation
|
||||
|
||||
This guide uses `conda` (via miniforge) to manage environments (recommended). If you prefer another environment manager (e.g. `uv`, `venv`), ensure you have Python >=3.12 and `ffmpeg` installed with the `libsvtav1` encoder, then skip ahead to [Environment Setup](#step-2-environment-setup).
|
||||
This guide uses `conda` (via miniforge) to manage environments (recommended). If you prefer another environment manager (e.g. `uv`, `venv`), ensure you have Python >=3.12 and support PyTorch >= 2.10, then skip ahead to [Environment Setup](#step-2-environment-setup).
|
||||
|
||||
## Step 1 (`conda` only): Install [`miniforge`](https://conda-forge.org/download/)
|
||||
|
||||
@@ -20,7 +20,7 @@ Create a virtual environment with Python 3.12:
|
||||
conda create -y -n lerobot python=3.12
|
||||
```
|
||||
</hfoption>
|
||||
<hfoption id="uv">
|
||||
<hfoption id="uv (PyTorch >= 2.10 only)">
|
||||
```bash
|
||||
uv python install 3.12
|
||||
uv venv --python 3.12
|
||||
@@ -32,48 +32,87 @@ uv venv --python 3.12
|
||||
Then activate your virtual environment, you have to do this each time you open a shell to use lerobot:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
|
||||
<hfoptions id="activate_venv">
|
||||
<hfoption id="conda">```bash
|
||||
<hfoption id="conda">
|
||||
```bash
|
||||
conda activate lerobot
|
||||
```</hfoption>
|
||||
<hfoption id="uv">
|
||||
```bash
|
||||
# Linux/macOSsource
|
||||
source .venv/bin/activate
|
||||
# Windows PowerShell
|
||||
source .venv\Scripts\Activate.ps1
|
||||
```
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
When using `conda`, install `ffmpeg` in your environment:
|
||||
|
||||
```bash
|
||||
conda install ffmpeg -c conda-forge
|
||||
ffmpeg -version # ffmpeg 8.X is not yet supported !
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> This usually installs `ffmpeg 7.X` for your platform compiled with the `libsvtav1` encoder. If `libsvtav1` is not supported (check supported encoders with `ffmpeg -encoders`), you can:
|
||||
>
|
||||
> - _[On any platform]_ Explicitly install `ffmpeg 7.X` using:
|
||||
>
|
||||
> ```bash
|
||||
> conda install ffmpeg=7.1.1 -c conda-forge
|
||||
> ```
|
||||
>
|
||||
> - _[On Linux only]_ If you want to bring your own ffmpeg: Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`.
|
||||
|
||||
> [!NOTE]
|
||||
> When installing LeRobot inside WSL (Windows Subsystem for Linux), make sure to install `evdev` with the following command:
|
||||
> When installing LeRobot inside WSL (Windows Subsystem for Linux), make sure to also install `evdev`:
|
||||
>
|
||||
> ```bash
|
||||
> conda install evdev -c conda-forge
|
||||
> ```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="uv (PyTorch >= 2.10 only)">
|
||||
```bash
|
||||
# Linux/macOS
|
||||
source .venv/bin/activate
|
||||
# Windows PowerShell
|
||||
.venv\Scripts\activate
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> When installing LeRobot inside WSL (Windows Subsystem for Linux), make sure to also install `evdev`:
|
||||
>
|
||||
> ```bash
|
||||
> sudo apt install libevdev-dev
|
||||
> uv pip install evdev
|
||||
> ```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
### Install `ffmpeg` (for video decoding)
|
||||
|
||||
LeRobot uses [TorchCodec](https://github.com/meta-pytorch/torchcodec) for video decoding by default, which requires `ffmpeg`.
|
||||
|
||||
> [!NOTE]
|
||||
> **Platform support:** TorchCodec is **not available** on macOS Intel (x86_64), Linux ARM (aarch64, arm64, armv7l), or Windows with PyTorch < 2.8. On these platforms, LeRobot automatically falls back to `pyav` — so you do not need to install `ffmpeg` and can skip to Step 3.
|
||||
|
||||
If your platform supports TorchCodec, install `ffmpeg` using one of the methods below:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
|
||||
<hfoptions id="install_ffmpeg">
|
||||
<hfoption id="conda (any PyTorch version)">
|
||||
|
||||
Install `ffmpeg` in your conda environment. This works with **all PyTorch versions** and is **required for PyTorch < 2.10**:
|
||||
|
||||
```bash
|
||||
conda install ffmpeg -c conda-forge
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> This usually installs `ffmpeg 8.X` with the `libsvtav1` encoder. If you run into issues (e.g. `libsvtav1` missing — check with `ffmpeg -encoders` — or a version mismatch with `torchcodec`), you can explicitly install `ffmpeg 7.1.1` using:
|
||||
>
|
||||
> ```bash
|
||||
> conda install ffmpeg=7.1.1 -c conda-forge
|
||||
> ```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="uv (PyTorch >= 2.10 only)">
|
||||
|
||||
Starting with **PyTorch >= 2.10** (TorchCodec ≥ 0.10), TorchCodec can dynamically link to a system-wide `ffmpeg` installation. This is useful when using `uv` or other non-`conda` environment managers:
|
||||
|
||||
```bash
|
||||
# Ubuntu/Debian
|
||||
sudo apt install ffmpeg
|
||||
|
||||
# macOS (Apple Silicon)
|
||||
brew install ffmpeg
|
||||
```
|
||||
|
||||
> [!IMPORTANT]
|
||||
> If you are using `uv` you will have to install `ffmpeg` system-wide (outside of the virtual environment). You rely on `uv` and `torchcodec` ability to dynamically link to the system `ffmpeg`.
|
||||
> System-wide `ffmpeg` is **only supported with PyTorch >= 2.10** (TorchCodec ≥ 0.10). For older PyTorch versions, you **must** use `conda install ffmpeg -c conda-forge` instead.
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
## Step 3: Install LeRobot 🤗
|
||||
|
||||
|
||||
@@ -331,6 +331,54 @@ lerobot-train \
|
||||
--wandb.project=multitask_dit
|
||||
```
|
||||
|
||||
## Libero Results
|
||||
|
||||
```
|
||||
python -m lerobot.scripts.lerobot_train \
|
||||
--dataset.repo_id=HuggingFaceVLA/libero \
|
||||
--policy.type=multi_task_dit \
|
||||
--policy.push_to_hub=false \
|
||||
--output_dir="./outputs/multitask_dit_libero" \
|
||||
--job_name="multitask-dit-libero" \
|
||||
--wandb.enable=true \
|
||||
--wandb.project=multitask_dit_libero \
|
||||
--dataset.image_transforms.enable=true \
|
||||
--dataset.image_transforms.max_num_transforms=4 \
|
||||
--dataset.image_transforms.tfs='{"brightness":{"type":"ColorJitter","kwargs":{"brightness":[0.75,1.25]}},"contrast":{"type":"ColorJitter","kwargs":{"contrast":[0.6,1.4]}},"saturation":{"type":"ColorJitter","kwargs":{"saturation":[0.8,1.2]}},"hue":{"type":"ColorJitter","kwargs":{"hue":[-0.05,0.05]}},"sharpness":{"type":"SharpnessJitter","kwargs":{"sharpness":[0.6,1.4]}},"rotation":{"type":"RandomRotation","kwargs":{"degrees":[-5,5]}},"translation":{"type":"RandomAffine","kwargs":{"degrees":0,"translate":[0.1,0.1]}}}' \
|
||||
--dataset.video_backend=torchcodec \
|
||||
--policy.use_amp=true \
|
||||
--policy.horizon=48 \
|
||||
--policy.n_obs_steps=2 \
|
||||
--policy.use_rope=true \
|
||||
--policy.use_positional_encoding=false \
|
||||
--policy.hidden_dim=768 \
|
||||
--policy.num_layers=8 \
|
||||
--policy.num_heads=12 \
|
||||
--policy.dropout=0.1 \
|
||||
--policy.timestep_embed_dim=256 \
|
||||
--policy.objective=diffusion \
|
||||
--policy.optimizer_lr=3e-4 \
|
||||
--policy.optimizer_weight_decay=0 \
|
||||
--policy.scheduler_warmup_steps=0 \
|
||||
--policy.vision_encoder_name=openai/clip-vit-base-patch16 \
|
||||
--policy.image_resize_shape=[256,256] \
|
||||
--policy.image_crop_is_random=true \
|
||||
--policy.text_encoder_name=openai/clip-vit-base-patch16 \
|
||||
--policy.vision_encoder_lr_multiplier=0.1 \
|
||||
--policy.device=cuda \
|
||||
--num_workers=8 \
|
||||
--save_freq=4000 \
|
||||
--log_freq=100 \
|
||||
--steps=100000 \
|
||||
--batch_size=320
|
||||
```
|
||||
|
||||
Results:
|
||||
|
||||
| LIBERO Spatial | LIBERO Object | LIBERO Goal | LIBERO 10 | Average |
|
||||
| -------------- | ------------- | ----------- | --------- | ------- |
|
||||
| 87.0 | 98.2 | 93.8 | 83.2 | 90.6 |
|
||||
|
||||
## References
|
||||
|
||||
For more details on the technical implementation and architecture, see:
|
||||
|
||||
91
docs/source/policy_pi05_README.md
Normal file
91
docs/source/policy_pi05_README.md
Normal file
@@ -0,0 +1,91 @@
|
||||
# π₀.₅ (pi05)
|
||||
|
||||
This repository contains the Hugging Face port of **π₀.₅**, adapted from [OpenPI](https://github.com/Physical-Intelligence/openpi) by the Physical Intelligence.
|
||||
It is designed as a **Vision-Language-Action model with open-world generalization**.
|
||||
|
||||
---
|
||||
|
||||
## Model Overview
|
||||
|
||||
| Feature | π₀ | π₀.₅ |
|
||||
| -------------------- | ------------------------------------------------------ | ----------------------------------------- |
|
||||
| Time Conditioning | Concatenates time with actions via `action_time_mlp_*` | Uses `time_mlp_*` for AdaRMS conditioning |
|
||||
| AdaRMS | Not used | Used in action expert |
|
||||
| Tokenizer Length | 48 tokens | 200 tokens |
|
||||
| Discrete State Input | False (Uses `state_proj` layer) | True |
|
||||
| Parameter Count | Higher (includes state embedding) | Lower (no state embedding) |
|
||||
|
||||
---
|
||||
|
||||
## Relative Actions
|
||||
|
||||
π₀.₅ supports training with **relative actions**, where the model learns relative offsets
|
||||
from the current robot state instead of absolute joint positions. This mirrors the
|
||||
relative-action transform in OpenPI (`DeltaActions`) and can improve performance.
|
||||
|
||||
### How it works
|
||||
|
||||
1. **During preprocessing**, absolute actions are converted to relative offsets:
|
||||
`relative = action - state` (for selected joints).
|
||||
2. The relative actions are normalized using statistics computed from the relative distribution.
|
||||
3. **During postprocessing**, predicted relative actions are converted back to absolute:
|
||||
`absolute = relative + state`.
|
||||
|
||||
Joints listed in `relative_exclude_joints` (e.g., gripper) are kept absolute.
|
||||
|
||||
### Configuration
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
| ------------------------- | ----------- | ------------- | ---------------------------------------------------------------- |
|
||||
| `use_relative_actions` | `bool` | `False` | Enable relative-action training |
|
||||
| `relative_exclude_joints` | `list[str]` | `["gripper"]` | Joint names to keep absolute (matched by substring) |
|
||||
| `action_feature_names` | `list[str]` | `None` | Auto-populated from dataset metadata at runtime by `make_policy` |
|
||||
|
||||
### Training example
|
||||
|
||||
```bash
|
||||
python -m lerobot.scripts.lerobot_train \
|
||||
--policy.type=pi05 \
|
||||
--dataset.repo_id=your_org/your_dataset \
|
||||
--policy.use_relative_actions=true \
|
||||
--policy.relative_exclude_joints='["gripper"]'
|
||||
```
|
||||
|
||||
When `use_relative_actions=true`, the training script automatically:
|
||||
|
||||
- Computes relative action statistics from the dataset (sampled chunk-level relative actions)
|
||||
- Replaces the standard action stats with relative stats for normalization
|
||||
- Broadcasts these stats across all ranks in distributed training
|
||||
|
||||
---
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this work, please cite both **OpenPI** and the π₀.₅ paper:
|
||||
|
||||
```bibtex
|
||||
@misc{openpi2024,
|
||||
author = {Physical Intelligence Lab},
|
||||
title = {OpenPI: PyTorch Implementation of π0 and π0.5 Policies},
|
||||
year = {2024},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/Physical-Intelligence/openpi}},
|
||||
license = {Apache-2.0}
|
||||
}
|
||||
|
||||
@misc{intelligence2025pi05visionlanguageactionmodelopenworld,
|
||||
title = {π₀.₅: a Vision-Language-Action Model with Open-World Generalization},
|
||||
author = {Physical Intelligence and Kevin Black and Noah Brown and James Darpinian and Karan Dhabalia and Danny Driess and Adnan Esmail and Michael Equi and Chelsea Finn and Niccolo Fusai and Manuel Y. Galliker and Dibya Ghosh and Lachy Groom and Karol Hausman and Brian Ichter and Szymon Jakubczak and Tim Jones and Liyiming Ke and Devin LeBlanc and Sergey Levine and Adrian Li-Bell and Mohith Mothukuri and Suraj Nair and Karl Pertsch and Allen Z. Ren and Lucy Xiaoyang Shi and Laura Smith and Jost Tobias Springenberg and Kyle Stachowicz and James Tanner and Quan Vuong and Homer Walke and Anna Walling and Haohuan Wang and Lili Yu and Ury Zhilinsky},
|
||||
year = {2025},
|
||||
eprint = {2504.16054},
|
||||
archivePrefix= {arXiv},
|
||||
primaryClass = {cs.LG},
|
||||
url = {https://arxiv.org/abs/2504.16054},
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This port follows the **Apache 2.0 License**, consistent with the original [OpenPI repository](https://github.com/Physical-Intelligence/openpi).
|
||||
108
docs/source/policy_pi0_README.md
Normal file
108
docs/source/policy_pi0_README.md
Normal file
@@ -0,0 +1,108 @@
|
||||
# π₀ (pi0)
|
||||
|
||||
This repository contains the Hugging Face port of **π₀**, adapted from [OpenPI](https://github.com/Physical-Intelligence/openpi) by the Physical Intelligence.
|
||||
It is designed as a **Vision-Language-Action model for general robot control**.
|
||||
|
||||
---
|
||||
|
||||
## Model Overview
|
||||
|
||||
| Feature | π₀ | π₀.₅ |
|
||||
| -------------------- | ------------------------------------------------------ | ----------------------------------------- |
|
||||
| Time Conditioning | Concatenates time with actions via `action_time_mlp_*` | Uses `time_mlp_*` for AdaRMS conditioning |
|
||||
| AdaRMS | Not used | Used in action expert |
|
||||
| Tokenizer Length | 48 tokens | 200 tokens |
|
||||
| Discrete State Input | False (Uses `state_proj` layer) | True |
|
||||
| Parameter Count | Higher (includes state embedding) | Lower (no state embedding) |
|
||||
|
||||
---
|
||||
|
||||
## Relative Actions
|
||||
|
||||
π₀ supports training with **relative actions**, where the model learns relative offsets
|
||||
from the current robot state instead of absolute joint positions. This mirrors the
|
||||
relative-action transform in OpenPI (`DeltaActions`) and can improve performance.
|
||||
|
||||
### How it works
|
||||
|
||||
1. **During preprocessing**, absolute actions are converted to relative offsets:
|
||||
`relative = action - state` (for selected joints).
|
||||
2. The relative actions are normalized using statistics computed from the relative distribution.
|
||||
3. **During postprocessing**, predicted relative actions are converted back to absolute:
|
||||
`absolute = relative + state`.
|
||||
|
||||
Joints listed in `relative_exclude_joints` (e.g., gripper) are kept absolute.
|
||||
|
||||
### Configuration
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
| ------------------------- | ----------- | ------------- | ---------------------------------------------------------------- |
|
||||
| `use_relative_actions` | `bool` | `False` | Enable relative-action training |
|
||||
| `relative_exclude_joints` | `list[str]` | `["gripper"]` | Joint names to keep absolute (matched by substring) |
|
||||
| `action_feature_names` | `list[str]` | `None` | Auto-populated from dataset metadata at runtime by `make_policy` |
|
||||
|
||||
### Training example
|
||||
|
||||
```bash
|
||||
python -m lerobot.scripts.lerobot_train \
|
||||
--policy.type=pi0 \
|
||||
--dataset.repo_id=your_org/your_dataset \
|
||||
--policy.use_relative_actions=true \
|
||||
--policy.relative_exclude_joints='["gripper"]'
|
||||
```
|
||||
|
||||
When `use_relative_actions=true`, the training script automatically:
|
||||
|
||||
- Computes relative action statistics from the dataset (sampled chunk-level relative actions)
|
||||
- Replaces the standard action stats with relative stats for normalization
|
||||
- Broadcasts these stats across all ranks in distributed training
|
||||
|
||||
### Recomputing stats for an existing dataset
|
||||
|
||||
If you want to precompute relative action stats offline, use `recompute_stats` from
|
||||
`lerobot.datasets.dataset_tools`:
|
||||
|
||||
```python
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.dataset_tools import recompute_stats
|
||||
|
||||
dataset = LeRobotDataset("your_org/your_dataset")
|
||||
dataset = recompute_stats(
|
||||
dataset,
|
||||
relative_action=True,
|
||||
relative_exclude_joints=["gripper"],
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this work, please cite both **OpenPI** and the π₀ paper:
|
||||
|
||||
```bibtex
|
||||
@misc{openpi2024,
|
||||
author = {Physical Intelligence Lab},
|
||||
title = {OpenPI: PyTorch Implementation of π0 and π0.5 Policies},
|
||||
year = {2024},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/Physical-Intelligence/openpi}},
|
||||
license = {Apache-2.0}
|
||||
}
|
||||
|
||||
@misc{black2024pi0visionlanguageactionflowmodel,
|
||||
title = {π₀: A Vision-Language-Action Flow Model for General Robot Control},
|
||||
author = {Kevin Black and Noah Brown and Danny Driess and Adnan Esmail and Michael Equi and Chelsea Finn and Niccolo Fusai and Lachy Groom and Karol Hausman and Brian Ichter and Szymon Jakubczak and Tim Jones and Liyiming Ke and Sergey Levine and Adrian Li-Bell and Mohith Mothukuri and Suraj Nair and Karl Pertsch and Lucy Xiaoyang Shi and James Tanner and Quan Vuong and Anna Walling and Haohuan Wang and Ury Zhilinsky},
|
||||
year = {2024},
|
||||
eprint = {2410.24164},
|
||||
archivePrefix= {arXiv},
|
||||
primaryClass = {cs.LG},
|
||||
url = {https://arxiv.org/abs/2410.24164},
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This port follows the **Apache 2.0 License**, consistent with the original [OpenPI repository](https://github.com/Physical-Intelligence/openpi).
|
||||
38
docs/source/policy_rtc_README.md
Normal file
38
docs/source/policy_rtc_README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# Real-Time Chunking (RTC)
|
||||
|
||||
This module contains the LeRobot implementation of **Real-Time Chunking (RTC)**, an inference-time technique for flow-matching based policies.
|
||||
|
||||
**Note**: RTC is not a policy itself, but rather an inference enhancement that works with flow-matching based policies including [π₀](../pi0/), [π₀.₅](../pi05/), and [SmolVLA](../smolvla/).
|
||||
|
||||
---
|
||||
|
||||
## Citation
|
||||
|
||||
If you use Real-Time Chunking in your work, please cite:
|
||||
|
||||
```bibtex
|
||||
@misc{openpi2024,
|
||||
author = {Physical Intelligence Lab},
|
||||
title = {OpenPI: PyTorch Implementation of π0 and π0.5 Policies},
|
||||
year = {2024},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/Physical-Intelligence/openpi}},
|
||||
license = {Apache-2.0}
|
||||
}
|
||||
|
||||
@misc{black2025realtimeexecutionactionchunking,
|
||||
title={Real-Time Execution of Action Chunking Flow Policies},
|
||||
author={Kevin Black and Manuel Y. Galliker and Sergey Levine},
|
||||
year={2025},
|
||||
eprint={2506.07339},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.RO},
|
||||
url={https://arxiv.org/abs/2506.07339},
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This implementation follows the **Apache 2.0 License**, consistent with the LeRobot project.
|
||||
14
docs/source/policy_sarm_README.md
Normal file
14
docs/source/policy_sarm_README.md
Normal file
@@ -0,0 +1,14 @@
|
||||
## Paper
|
||||
|
||||
https://arxiv.org/abs/2509.25358
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@article{chen2025sarm,
|
||||
title={SARM: Stage-Aware Reward Modeling for Long Horizon Robot Manipulation},
|
||||
author={Chen, Qianzhong and Yu, Justin and Schwager, Mac and Abbeel, Pieter and Shentu, Yide and Wu, Philipp},
|
||||
journal={arXiv preprint arXiv:2509.25358},
|
||||
year={2025}
|
||||
}
|
||||
```
|
||||
@@ -25,7 +25,7 @@ discord = "https://discord.gg/s3KuuzsPFb"
|
||||
|
||||
[project]
|
||||
name = "lerobot"
|
||||
version = "0.5.1"
|
||||
version = "0.5.2"
|
||||
description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch"
|
||||
dynamic = ["readme"]
|
||||
license = { text = "Apache-2.0" }
|
||||
@@ -71,9 +71,9 @@ dependencies = [
|
||||
"cmake>=3.29.0.1,<4.2.0",
|
||||
"packaging>=24.2,<26.0",
|
||||
|
||||
"torch>=2.2.1,<2.11.0",
|
||||
"torchcodec>=0.2.1,<0.11.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')",
|
||||
"torchvision>=0.21.0,<0.26.0",
|
||||
"torch>=2.7,<2.11.0",
|
||||
"torchcodec>=0.3.0,<0.11.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # NOTE: Windows support starts at version 0.7 (needs torch==2.8), ffmpeg>=8 support starts at version 0.8.1 (needs torch==2.9), system-wide ffmpeg support starts at version 0.10 (needs torch==2.10).
|
||||
"torchvision>=0.22.0,<0.26.0",
|
||||
|
||||
"einops>=0.8.0,<0.9.0",
|
||||
"opencv-python-headless>=4.9.0,<4.14.0",
|
||||
|
||||
@@ -151,9 +151,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
``$HF_LEROBOT_HOME/hub``.
|
||||
episodes (list[int] | None, optional): If specified, this will only load episodes specified by
|
||||
their episode_index in this list. Defaults to None.
|
||||
image_transforms (Callable | None, optional): You can pass standard v2 image transforms from
|
||||
torchvision.transforms.v2 here which will be applied to visual modalities (whether they come
|
||||
from videos or images). Defaults to None.
|
||||
image_transforms (Callable | None, optional):
|
||||
Transform applied to visual modalities inside `__getitem__` after image decoding / tensor
|
||||
conversion. This works for both image-backed and video-backed observations and can later be
|
||||
updated with `set_image_transforms()` or cleared with `clear_image_transforms()`.
|
||||
Defaults to None.
|
||||
delta_timestamps (dict[list[float]] | None, optional): _description_. Defaults to None.
|
||||
tolerance_s (float, optional): Tolerance in seconds used to ensure data timestamps are actually in
|
||||
sync with the fps value. It is used at the init of the dataset to make sure that each
|
||||
@@ -192,7 +194,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
super().__init__()
|
||||
self.repo_id = repo_id
|
||||
self._requested_root = Path(root) if root else None
|
||||
self.image_transforms = image_transforms
|
||||
self.reader = None
|
||||
self.set_image_transforms(image_transforms)
|
||||
self.delta_timestamps = delta_timestamps
|
||||
self.episodes = episodes
|
||||
self.tolerance_s = tolerance_s
|
||||
@@ -475,6 +478,18 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
f"}})"
|
||||
)
|
||||
|
||||
def set_image_transforms(self, image_transforms: Callable | None) -> None:
|
||||
"""Replace the transform applied to visual observations."""
|
||||
if image_transforms is not None and not callable(image_transforms):
|
||||
raise TypeError("image_transforms must be callable or None.")
|
||||
self.image_transforms = image_transforms
|
||||
if self.reader is not None:
|
||||
self.reader._image_transforms = image_transforms
|
||||
|
||||
def clear_image_transforms(self) -> None:
|
||||
"""Remove the transform applied to visual observations."""
|
||||
self.set_image_transforms(None)
|
||||
|
||||
# ── Hub methods (stay on facade) ──────────────────────────────────
|
||||
|
||||
def push_to_hub(
|
||||
|
||||
@@ -89,12 +89,24 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
|
||||
)
|
||||
self.disabled_features.update(extra_keys)
|
||||
|
||||
self.image_transforms = image_transforms
|
||||
self.delta_timestamps = delta_timestamps
|
||||
# TODO(rcadene, aliberts): We should not perform this aggregation for datasets
|
||||
# with multiple robots of different ranges. Instead we should have one normalization
|
||||
# per robot.
|
||||
self.stats = aggregate_stats([dataset.meta.stats for dataset in self._datasets])
|
||||
self.set_image_transforms(image_transforms)
|
||||
|
||||
def set_image_transforms(self, image_transforms: Callable | None) -> None:
|
||||
"""Replace the transform for this dataset and its children."""
|
||||
if image_transforms is not None and not callable(image_transforms):
|
||||
raise TypeError("image_transforms must be callable or None.")
|
||||
self.image_transforms = image_transforms
|
||||
for dataset in getattr(self, "_datasets", []):
|
||||
dataset.set_image_transforms(self.image_transforms)
|
||||
|
||||
def clear_image_transforms(self) -> None:
|
||||
"""Remove the transform from this dataset and its children."""
|
||||
self.set_image_transforms(None)
|
||||
|
||||
@property
|
||||
def repo_id_to_index(self):
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
# Multitask DiT Policy
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this work, please cite the following works:
|
||||
|
||||
```bibtex
|
||||
@misc{jones2025multitaskditpolicy,
|
||||
author = {Bryson Jones},
|
||||
title = {Dissecting and Open-Sourcing Multitask Diffusion Transformer Policy},
|
||||
year = {2025},
|
||||
url = {https://brysonkjones.substack.com/p/dissecting-and-open-sourcing-multitask-diffusion-transformer-policy},
|
||||
note = {Blog post}
|
||||
}
|
||||
```
|
||||
|
||||
```bibtex
|
||||
@misc{trilbmteam2025carefulexaminationlargebehaviormodels,
|
||||
author = {TRI LBM Team},
|
||||
title = {A Careful Examination of Large Behavior Models for Multitask Dexterous Manipulation},
|
||||
year = {2025},
|
||||
eprint = {arXiv:2507.05331},
|
||||
archivePrefix = {arXiv},
|
||||
primaryClass = {cs.RO},
|
||||
url = {https://arxiv.org/abs/2507.05331}
|
||||
}
|
||||
```
|
||||
|
||||
```bibtex
|
||||
@misc{bostondynamics2025largebehaviormodelsatlas,
|
||||
author = {Boston Dynamics and TRI Research Team},
|
||||
title = {Large Behavior Models and Atlas Find New Footing},
|
||||
year = {2025},
|
||||
url = {https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/},
|
||||
note = {Blog post}
|
||||
}
|
||||
```
|
||||
1
src/lerobot/policies/multi_task_dit/README.md
Symbolic link
1
src/lerobot/policies/multi_task_dit/README.md
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../docs/source/policy_multi_task_dit_README.md
|
||||
@@ -1,108 +0,0 @@
|
||||
# π₀ (pi0)
|
||||
|
||||
This repository contains the Hugging Face port of **π₀**, adapted from [OpenPI](https://github.com/Physical-Intelligence/openpi) by the Physical Intelligence.
|
||||
It is designed as a **Vision-Language-Action model for general robot control**.
|
||||
|
||||
---
|
||||
|
||||
## Model Overview
|
||||
|
||||
| Feature | π₀ | π₀.₅ |
|
||||
| -------------------- | ------------------------------------------------------ | ----------------------------------------- |
|
||||
| Time Conditioning | Concatenates time with actions via `action_time_mlp_*` | Uses `time_mlp_*` for AdaRMS conditioning |
|
||||
| AdaRMS | Not used | Used in action expert |
|
||||
| Tokenizer Length | 48 tokens | 200 tokens |
|
||||
| Discrete State Input | False (Uses `state_proj` layer) | True |
|
||||
| Parameter Count | Higher (includes state embedding) | Lower (no state embedding) |
|
||||
|
||||
---
|
||||
|
||||
## Relative Actions
|
||||
|
||||
π₀ supports training with **relative actions**, where the model learns relative offsets
|
||||
from the current robot state instead of absolute joint positions. This mirrors the
|
||||
relative-action transform in OpenPI (`DeltaActions`) and can improve performance.
|
||||
|
||||
### How it works
|
||||
|
||||
1. **During preprocessing**, absolute actions are converted to relative offsets:
|
||||
`relative = action - state` (for selected joints).
|
||||
2. The relative actions are normalized using statistics computed from the relative distribution.
|
||||
3. **During postprocessing**, predicted relative actions are converted back to absolute:
|
||||
`absolute = relative + state`.
|
||||
|
||||
Joints listed in `relative_exclude_joints` (e.g., gripper) are kept absolute.
|
||||
|
||||
### Configuration
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
| ------------------------- | ----------- | ------------- | ---------------------------------------------------------------- |
|
||||
| `use_relative_actions` | `bool` | `False` | Enable relative-action training |
|
||||
| `relative_exclude_joints` | `list[str]` | `["gripper"]` | Joint names to keep absolute (matched by substring) |
|
||||
| `action_feature_names` | `list[str]` | `None` | Auto-populated from dataset metadata at runtime by `make_policy` |
|
||||
|
||||
### Training example
|
||||
|
||||
```bash
|
||||
python -m lerobot.scripts.lerobot_train \
|
||||
--policy.type=pi0 \
|
||||
--dataset.repo_id=your_org/your_dataset \
|
||||
--policy.use_relative_actions=true \
|
||||
--policy.relative_exclude_joints='["gripper"]'
|
||||
```
|
||||
|
||||
When `use_relative_actions=true`, the training script automatically:
|
||||
|
||||
- Computes relative action statistics from the dataset (sampled chunk-level relative actions)
|
||||
- Replaces the standard action stats with relative stats for normalization
|
||||
- Broadcasts these stats across all ranks in distributed training
|
||||
|
||||
### Recomputing stats for an existing dataset
|
||||
|
||||
If you want to precompute relative action stats offline, use `recompute_stats` from
|
||||
`lerobot.datasets.dataset_tools`:
|
||||
|
||||
```python
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.dataset_tools import recompute_stats
|
||||
|
||||
dataset = LeRobotDataset("your_org/your_dataset")
|
||||
dataset = recompute_stats(
|
||||
dataset,
|
||||
relative_action=True,
|
||||
relative_exclude_joints=["gripper"],
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this work, please cite both **OpenPI** and the π₀ paper:
|
||||
|
||||
```bibtex
|
||||
@misc{openpi2024,
|
||||
author = {Physical Intelligence Lab},
|
||||
title = {OpenPI: PyTorch Implementation of π0 and π0.5 Policies},
|
||||
year = {2024},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/Physical-Intelligence/openpi}},
|
||||
license = {Apache-2.0}
|
||||
}
|
||||
|
||||
@misc{black2024pi0visionlanguageactionflowmodel,
|
||||
title = {π₀: A Vision-Language-Action Flow Model for General Robot Control},
|
||||
author = {Kevin Black and Noah Brown and Danny Driess and Adnan Esmail and Michael Equi and Chelsea Finn and Niccolo Fusai and Lachy Groom and Karol Hausman and Brian Ichter and Szymon Jakubczak and Tim Jones and Liyiming Ke and Sergey Levine and Adrian Li-Bell and Mohith Mothukuri and Suraj Nair and Karl Pertsch and Lucy Xiaoyang Shi and James Tanner and Quan Vuong and Anna Walling and Haohuan Wang and Ury Zhilinsky},
|
||||
year = {2024},
|
||||
eprint = {2410.24164},
|
||||
archivePrefix= {arXiv},
|
||||
primaryClass = {cs.LG},
|
||||
url = {https://arxiv.org/abs/2410.24164},
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This port follows the **Apache 2.0 License**, consistent with the original [OpenPI repository](https://github.com/Physical-Intelligence/openpi).
|
||||
1
src/lerobot/policies/pi0/README.md
Symbolic link
1
src/lerobot/policies/pi0/README.md
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../docs/source/policy_pi0_README.md
|
||||
@@ -1,91 +0,0 @@
|
||||
# π₀.₅ (pi05)
|
||||
|
||||
This repository contains the Hugging Face port of **π₀.₅**, adapted from [OpenPI](https://github.com/Physical-Intelligence/openpi) by the Physical Intelligence.
|
||||
It is designed as a **Vision-Language-Action model with open-world generalization**.
|
||||
|
||||
---
|
||||
|
||||
## Model Overview
|
||||
|
||||
| Feature | π₀ | π₀.₅ |
|
||||
| -------------------- | ------------------------------------------------------ | ----------------------------------------- |
|
||||
| Time Conditioning | Concatenates time with actions via `action_time_mlp_*` | Uses `time_mlp_*` for AdaRMS conditioning |
|
||||
| AdaRMS | Not used | Used in action expert |
|
||||
| Tokenizer Length | 48 tokens | 200 tokens |
|
||||
| Discrete State Input | False (Uses `state_proj` layer) | True |
|
||||
| Parameter Count | Higher (includes state embedding) | Lower (no state embedding) |
|
||||
|
||||
---
|
||||
|
||||
## Relative Actions
|
||||
|
||||
π₀.₅ supports training with **relative actions**, where the model learns relative offsets
|
||||
from the current robot state instead of absolute joint positions. This mirrors the
|
||||
relative-action transform in OpenPI (`DeltaActions`) and can improve performance.
|
||||
|
||||
### How it works
|
||||
|
||||
1. **During preprocessing**, absolute actions are converted to relative offsets:
|
||||
`relative = action - state` (for selected joints).
|
||||
2. The relative actions are normalized using statistics computed from the relative distribution.
|
||||
3. **During postprocessing**, predicted relative actions are converted back to absolute:
|
||||
`absolute = relative + state`.
|
||||
|
||||
Joints listed in `relative_exclude_joints` (e.g., gripper) are kept absolute.
|
||||
|
||||
### Configuration
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
| ------------------------- | ----------- | ------------- | ---------------------------------------------------------------- |
|
||||
| `use_relative_actions` | `bool` | `False` | Enable relative-action training |
|
||||
| `relative_exclude_joints` | `list[str]` | `["gripper"]` | Joint names to keep absolute (matched by substring) |
|
||||
| `action_feature_names` | `list[str]` | `None` | Auto-populated from dataset metadata at runtime by `make_policy` |
|
||||
|
||||
### Training example
|
||||
|
||||
```bash
|
||||
python -m lerobot.scripts.lerobot_train \
|
||||
--policy.type=pi05 \
|
||||
--dataset.repo_id=your_org/your_dataset \
|
||||
--policy.use_relative_actions=true \
|
||||
--policy.relative_exclude_joints='["gripper"]'
|
||||
```
|
||||
|
||||
When `use_relative_actions=true`, the training script automatically:
|
||||
|
||||
- Computes relative action statistics from the dataset (sampled chunk-level relative actions)
|
||||
- Replaces the standard action stats with relative stats for normalization
|
||||
- Broadcasts these stats across all ranks in distributed training
|
||||
|
||||
---
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this work, please cite both **OpenPI** and the π₀.₅ paper:
|
||||
|
||||
```bibtex
|
||||
@misc{openpi2024,
|
||||
author = {Physical Intelligence Lab},
|
||||
title = {OpenPI: PyTorch Implementation of π0 and π0.5 Policies},
|
||||
year = {2024},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/Physical-Intelligence/openpi}},
|
||||
license = {Apache-2.0}
|
||||
}
|
||||
|
||||
@misc{intelligence2025pi05visionlanguageactionmodelopenworld,
|
||||
title = {π₀.₅: a Vision-Language-Action Model with Open-World Generalization},
|
||||
author = {Physical Intelligence and Kevin Black and Noah Brown and James Darpinian and Karan Dhabalia and Danny Driess and Adnan Esmail and Michael Equi and Chelsea Finn and Niccolo Fusai and Manuel Y. Galliker and Dibya Ghosh and Lachy Groom and Karol Hausman and Brian Ichter and Szymon Jakubczak and Tim Jones and Liyiming Ke and Devin LeBlanc and Sergey Levine and Adrian Li-Bell and Mohith Mothukuri and Suraj Nair and Karl Pertsch and Allen Z. Ren and Lucy Xiaoyang Shi and Laura Smith and Jost Tobias Springenberg and Kyle Stachowicz and James Tanner and Quan Vuong and Homer Walke and Anna Walling and Haohuan Wang and Lili Yu and Ury Zhilinsky},
|
||||
year = {2025},
|
||||
eprint = {2504.16054},
|
||||
archivePrefix= {arXiv},
|
||||
primaryClass = {cs.LG},
|
||||
url = {https://arxiv.org/abs/2504.16054},
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This port follows the **Apache 2.0 License**, consistent with the original [OpenPI repository](https://github.com/Physical-Intelligence/openpi).
|
||||
1
src/lerobot/policies/pi05/README.md
Symbolic link
1
src/lerobot/policies/pi05/README.md
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../docs/source/policy_pi05_README.md
|
||||
@@ -1,38 +0,0 @@
|
||||
# Real-Time Chunking (RTC)
|
||||
|
||||
This module contains the LeRobot implementation of **Real-Time Chunking (RTC)**, an inference-time technique for flow-matching based policies.
|
||||
|
||||
**Note**: RTC is not a policy itself, but rather an inference enhancement that works with flow-matching based policies including [π₀](../pi0/), [π₀.₅](../pi05/), and [SmolVLA](../smolvla/).
|
||||
|
||||
---
|
||||
|
||||
## Citation
|
||||
|
||||
If you use Real-Time Chunking in your work, please cite:
|
||||
|
||||
```bibtex
|
||||
@misc{openpi2024,
|
||||
author = {Physical Intelligence Lab},
|
||||
title = {OpenPI: PyTorch Implementation of π0 and π0.5 Policies},
|
||||
year = {2024},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/Physical-Intelligence/openpi}},
|
||||
license = {Apache-2.0}
|
||||
}
|
||||
|
||||
@misc{black2025realtimeexecutionactionchunking,
|
||||
title={Real-Time Execution of Action Chunking Flow Policies},
|
||||
author={Kevin Black and Manuel Y. Galliker and Sergey Levine},
|
||||
year={2025},
|
||||
eprint={2506.07339},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.RO},
|
||||
url={https://arxiv.org/abs/2506.07339},
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This implementation follows the **Apache 2.0 License**, consistent with the LeRobot project.
|
||||
1
src/lerobot/policies/rtc/README.md
Symbolic link
1
src/lerobot/policies/rtc/README.md
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../docs/source/policy_rtc_README.md
|
||||
@@ -1,14 +0,0 @@
|
||||
## Paper
|
||||
|
||||
https://arxiv.org/abs/2509.25358
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@article{chen2025sarm,
|
||||
title={SARM: Stage-Aware Reward Modeling for Long Horizon Robot Manipulation},
|
||||
author={Chen, Qianzhong and Yu, Justin and Schwager, Mac and Abbeel, Pieter and Shentu, Yide and Wu, Philipp},
|
||||
journal={arXiv preprint arXiv:2509.25358},
|
||||
year={2025}
|
||||
}
|
||||
```
|
||||
1
src/lerobot/policies/sarm/README.md
Symbolic link
1
src/lerobot/policies/sarm/README.md
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../docs/source/policy_sarm_README.md
|
||||
@@ -421,6 +421,7 @@ def record_loop(
|
||||
act_processed_policy: RobotAction = make_robot_action(action_values, dataset.features)
|
||||
# Applies a pipeline to the action, default is IdentityProcessor
|
||||
robot_action_to_send = robot_action_processor((act_processed_policy, obs))
|
||||
action_values = robot_action_to_send
|
||||
|
||||
elif policy is None and isinstance(teleop, Teleoperator):
|
||||
act = teleop.get_action()
|
||||
|
||||
@@ -24,6 +24,7 @@ import torch
|
||||
from huggingface_hub import HfApi
|
||||
from PIL import Image
|
||||
from safetensors.torch import load_file
|
||||
from torchvision.transforms import v2
|
||||
|
||||
import lerobot
|
||||
from lerobot.configs.default import DatasetConfig
|
||||
@@ -34,6 +35,7 @@ from lerobot.datasets.image_writer import image_array_to_pil_image
|
||||
from lerobot.datasets.io_utils import hf_transform_to_torch
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.multi_dataset import MultiLeRobotDataset
|
||||
from lerobot.datasets.transforms import ImageTransforms, ImageTransformsConfig
|
||||
from lerobot.datasets.utils import (
|
||||
DEFAULT_CHUNK_SIZE,
|
||||
DEFAULT_DATA_FILE_SIZE_IN_MB,
|
||||
@@ -355,6 +357,62 @@ def test_add_frame_image_pil(image_dataset):
|
||||
assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
|
||||
|
||||
|
||||
def test_set_image_transforms_applies_transparently(image_dataset):
|
||||
dataset = image_dataset
|
||||
dataset.add_frame({"image": np.random.rand(*DUMMY_CHW), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
dataset.finalize()
|
||||
|
||||
dataset.set_image_transforms(v2.Resize((224, 224)))
|
||||
assert dataset[0]["image"].shape == torch.Size((3, 224, 224))
|
||||
|
||||
dataset.set_image_transforms(v2.Resize((128, 128)))
|
||||
assert dataset[0]["image"].shape == torch.Size((3, 128, 128))
|
||||
|
||||
dataset.clear_image_transforms()
|
||||
assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
|
||||
|
||||
|
||||
def test_set_image_transforms_supports_lerobot_image_transforms(image_dataset):
|
||||
dataset = image_dataset
|
||||
dataset.add_frame({"image": np.random.rand(*DUMMY_CHW), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
dataset.finalize()
|
||||
|
||||
image_transforms = ImageTransforms(ImageTransformsConfig(enable=False))
|
||||
dataset.set_image_transforms(image_transforms)
|
||||
|
||||
assert dataset.image_transforms is image_transforms
|
||||
assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
|
||||
|
||||
|
||||
def test_set_image_transforms_supports_loaded_dataset(tmp_path, lerobot_dataset_factory):
|
||||
dataset = lerobot_dataset_factory(root=tmp_path / "test", use_videos=False)
|
||||
dataset.set_image_transforms(v2.Compose([v2.Resize((224, 224)), v2.Resize((112, 112))]))
|
||||
|
||||
camera_key = dataset.meta.camera_keys[0]
|
||||
assert dataset[0][camera_key].shape == torch.Size((3, 112, 112))
|
||||
|
||||
|
||||
def test_multilerobot_dataset_set_image_transforms_propagates(tmp_path, lerobot_dataset_factory):
|
||||
root = tmp_path / "multi"
|
||||
repo_ids = ["lerobot/test_multi_a", "lerobot/test_multi_b"]
|
||||
|
||||
for repo_id in repo_ids:
|
||||
lerobot_dataset_factory(root=root / repo_id, repo_id=repo_id, use_videos=False)
|
||||
|
||||
dataset = MultiLeRobotDataset(repo_ids, root=root, download_videos=False)
|
||||
dataset.set_image_transforms(v2.Resize((96, 96)))
|
||||
|
||||
camera_key = dataset.camera_keys[0]
|
||||
assert dataset[0][camera_key].shape == torch.Size((3, 96, 96))
|
||||
assert all(child.image_transforms is dataset.image_transforms for child in dataset._datasets)
|
||||
|
||||
dataset.clear_image_transforms()
|
||||
assert dataset.image_transforms is None
|
||||
assert all(child.image_transforms is None for child in dataset._datasets)
|
||||
|
||||
|
||||
def test_image_array_to_pil_image_wrong_range_float_0_255():
|
||||
image = np.random.rand(*DUMMY_HWC) * 255
|
||||
with pytest.raises(ValueError):
|
||||
|
||||
Reference in New Issue
Block a user