mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-01 11:21:27 +00:00
Merge branch 'main' into codex/model-profiling
This commit is contained in:
541
.github/workflows/benchmark_tests.yml
vendored
541
.github/workflows/benchmark_tests.yml
vendored
@@ -83,10 +83,13 @@ jobs:
|
||||
cache-binary: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
if: ${{ env.DOCKERHUB_USERNAME != '' }}
|
||||
uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
|
||||
env:
|
||||
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
|
||||
# Build the benchmark-specific image. The Dockerfile separates dep-install
|
||||
# from source-copy, so code-only changes skip the slow uv-sync layer
|
||||
@@ -115,7 +118,7 @@ jobs:
|
||||
bash -c "
|
||||
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
|
||||
lerobot-eval \
|
||||
--policy.path=pepijn223/smolvla_libero \
|
||||
--policy.path=lerobot/smolvla_libero \
|
||||
--env.type=libero \
|
||||
--env.task=libero_spatial \
|
||||
--eval.batch_size=1 \
|
||||
@@ -144,7 +147,7 @@ jobs:
|
||||
--artifacts-dir /tmp/libero-artifacts \
|
||||
--env libero \
|
||||
--task libero_spatial \
|
||||
--policy pepijn223/smolvla_libero
|
||||
--policy lerobot/smolvla_libero
|
||||
|
||||
- name: Upload Libero rollout video
|
||||
if: always()
|
||||
@@ -238,10 +241,13 @@ jobs:
|
||||
cache-binary: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
if: ${{ env.DOCKERHUB_USERNAME != '' }}
|
||||
uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
|
||||
env:
|
||||
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
|
||||
- name: Build MetaWorld benchmark image
|
||||
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
|
||||
@@ -264,7 +270,7 @@ jobs:
|
||||
bash -c "
|
||||
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
|
||||
lerobot-eval \
|
||||
--policy.path=pepijn223/smolvla_metaworld \
|
||||
--policy.path=lerobot/smolvla_metaworld \
|
||||
--env.type=metaworld \
|
||||
--env.task=metaworld-push-v3 \
|
||||
--eval.batch_size=1 \
|
||||
@@ -293,7 +299,7 @@ jobs:
|
||||
--artifacts-dir /tmp/metaworld-artifacts \
|
||||
--env metaworld \
|
||||
--task metaworld-push-v3 \
|
||||
--policy pepijn223/smolvla_metaworld
|
||||
--policy lerobot/smolvla_metaworld
|
||||
|
||||
- name: Upload MetaWorld rollout video
|
||||
if: always()
|
||||
@@ -310,3 +316,530 @@ jobs:
|
||||
name: metaworld-metrics
|
||||
path: /tmp/metaworld-artifacts/metrics.json
|
||||
if-no-files-found: warn
|
||||
|
||||
# ── ROBOTWIN 2.0 ──────────────────────────────────────────────────────────
|
||||
# Isolated image: full RoboTwin 2.0 stack — SAPIEN, mplib, CuRobo,
|
||||
# pytorch3d, + simulation assets (~4 GB).
|
||||
# Build takes ~20 min on first run; subsequent runs hit the layer cache.
|
||||
# Requires an NVIDIA GPU runner with CUDA 12.1 drivers.
|
||||
robotwin-integration-test:
|
||||
name: RoboTwin 2.0 — build image + 1-episode eval
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
ROBOTWIN_POLICY: lerobot/smolvla_robotwin
|
||||
ROBOTWIN_TASKS: beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
lfs: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
cache-binary: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
if: ${{ env.DOCKERHUB_USERNAME != '' }}
|
||||
uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
|
||||
env:
|
||||
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
|
||||
# Build the full-install image: SAPIEN, mplib, CuRobo, pytorch3d +
|
||||
# simulation assets (~4 GB). Layer cache lives in the runner's local
|
||||
# Docker daemon — reused across re-runs on the same machine.
|
||||
- name: Build RoboTwin 2.0 benchmark image
|
||||
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
context: .
|
||||
file: docker/Dockerfile.benchmark.robotwin
|
||||
push: false
|
||||
load: true
|
||||
tags: lerobot-benchmark-robotwin:ci
|
||||
cache-from: type=local,src=/tmp/.buildx-cache-robotwin
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache-robotwin,mode=max
|
||||
|
||||
- name: Run RoboTwin 2.0 smoke eval (10 tasks, 1 episode each)
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
# Named container (no --rm) so we can docker cp artifacts out.
|
||||
docker run --name robotwin-eval --gpus all \
|
||||
--shm-size=4g \
|
||||
-e HF_HOME=/tmp/hf \
|
||||
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \
|
||||
-e ROBOTWIN_POLICY="${ROBOTWIN_POLICY}" \
|
||||
-e ROBOTWIN_TASKS="${ROBOTWIN_TASKS}" \
|
||||
lerobot-benchmark-robotwin:ci \
|
||||
bash -c "
|
||||
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
|
||||
cd /opt/robotwin && lerobot-eval \
|
||||
--policy.path=\"\$ROBOTWIN_POLICY\" \
|
||||
--env.type=robotwin \
|
||||
--env.task=\"\$ROBOTWIN_TASKS\" \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.use_async_envs=false \
|
||||
--policy.device=cuda \
|
||||
'--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.left_camera\": \"observation.images.camera2\", \"observation.images.right_camera\": \"observation.images.camera3\"}' \
|
||||
--output_dir=/tmp/eval-artifacts
|
||||
python /lerobot/scripts/ci/extract_task_descriptions.py \
|
||||
--env robotwin \
|
||||
--task \"\$ROBOTWIN_TASKS\" \
|
||||
--output /tmp/eval-artifacts/task_descriptions.json
|
||||
"
|
||||
|
||||
- name: Copy RoboTwin artifacts from container
|
||||
if: always()
|
||||
run: |
|
||||
mkdir -p /tmp/robotwin-artifacts
|
||||
docker cp robotwin-eval:/tmp/eval-artifacts/. /tmp/robotwin-artifacts/ 2>/dev/null || true
|
||||
docker rm -f robotwin-eval || true
|
||||
|
||||
- name: Parse RoboTwin eval metrics
|
||||
if: always()
|
||||
run: |
|
||||
python3 scripts/ci/parse_eval_metrics.py \
|
||||
--artifacts-dir /tmp/robotwin-artifacts \
|
||||
--env robotwin \
|
||||
--task "${ROBOTWIN_TASKS}" \
|
||||
--policy "${ROBOTWIN_POLICY}"
|
||||
|
||||
- name: Upload RoboTwin rollout video
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: robotwin-rollout-video
|
||||
path: /tmp/robotwin-artifacts/videos/
|
||||
if-no-files-found: warn
|
||||
|
||||
- name: Upload RoboTwin eval metrics
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: robotwin-metrics
|
||||
path: /tmp/robotwin-artifacts/metrics.json
|
||||
if-no-files-found: warn
|
||||
|
||||
# ── ROBOCASA365 ──────────────────────────────────────────────────────────
|
||||
# Isolated image: robocasa + robosuite installed manually as editable
|
||||
# clones (no `lerobot[robocasa]` extra — robocasa's setup.py pins
|
||||
# `lerobot==0.3.3`, which would shadow this repo's lerobot).
|
||||
robocasa-integration-test:
|
||||
name: RoboCasa365 — build image + 1-episode eval
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
lfs: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
cache-binary: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
if: ${{ env.DOCKERHUB_USERNAME != '' }}
|
||||
uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
|
||||
env:
|
||||
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
|
||||
- name: Build RoboCasa365 benchmark image
|
||||
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
context: .
|
||||
file: docker/Dockerfile.benchmark.robocasa
|
||||
push: false
|
||||
load: true
|
||||
tags: lerobot-benchmark-robocasa:ci
|
||||
|
||||
- name: Run RoboCasa365 smoke eval (10 atomic tasks, 1 episode each)
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
docker run --name robocasa-eval --gpus all \
|
||||
--shm-size=4g \
|
||||
-e HF_HOME=/tmp/hf \
|
||||
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \
|
||||
-e HF_HUB_DOWNLOAD_TIMEOUT=300 \
|
||||
-e MUJOCO_GL=egl \
|
||||
lerobot-benchmark-robocasa:ci \
|
||||
bash -c "
|
||||
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
|
||||
lerobot-eval \
|
||||
--policy.path=lerobot/smolvla_robocasa \
|
||||
--env.type=robocasa \
|
||||
--env.task=CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove,CloseToasterOvenDoor,SlideDishwasherRack,TurnOnSinkFaucet,NavigateKitchen,TurnOnElectricKettle \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.use_async_envs=false \
|
||||
--policy.device=cuda \
|
||||
'--rename_map={\"observation.images.robot0_agentview_left\": \"observation.images.camera1\", \"observation.images.robot0_eye_in_hand\": \"observation.images.camera2\", \"observation.images.robot0_agentview_right\": \"observation.images.camera3\"}' \
|
||||
--output_dir=/tmp/eval-artifacts
|
||||
python scripts/ci/extract_task_descriptions.py \
|
||||
--env robocasa \
|
||||
--task CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove,CloseToasterOvenDoor,SlideDishwasherRack,TurnOnSinkFaucet,NavigateKitchen,TurnOnElectricKettle \
|
||||
--output /tmp/eval-artifacts/task_descriptions.json
|
||||
"
|
||||
|
||||
- name: Copy RoboCasa365 artifacts from container
|
||||
if: always()
|
||||
run: |
|
||||
mkdir -p /tmp/robocasa-artifacts
|
||||
docker cp robocasa-eval:/tmp/eval-artifacts/. /tmp/robocasa-artifacts/ 2>/dev/null || true
|
||||
docker rm -f robocasa-eval || true
|
||||
|
||||
- name: Parse RoboCasa365 eval metrics
|
||||
if: always()
|
||||
run: |
|
||||
python3 scripts/ci/parse_eval_metrics.py \
|
||||
--artifacts-dir /tmp/robocasa-artifacts \
|
||||
--env robocasa \
|
||||
--task atomic_smoke_10 \
|
||||
--policy lerobot/smolvla_robocasa
|
||||
|
||||
- name: Upload RoboCasa365 rollout video
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: robocasa-rollout-video
|
||||
path: /tmp/robocasa-artifacts/videos/
|
||||
if-no-files-found: warn
|
||||
|
||||
- name: Upload RoboCasa365 eval metrics
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: robocasa-metrics
|
||||
path: /tmp/robocasa-artifacts/metrics.json
|
||||
if-no-files-found: warn
|
||||
|
||||
# ── ROBOCEREBRA ───────────────────────────────────────────────────────────
|
||||
# Reuses the LIBERO simulator (libero_10 suite) with RoboCerebra camera
|
||||
# defaults (image/wrist_image). The image is layered on
|
||||
# huggingface/lerobot-gpu, which already ships [libero] as part of [all].
|
||||
robocerebra-integration-test:
|
||||
name: RoboCerebra — build image + 1-episode eval
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
lfs: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
cache-binary: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
if: ${{ env.DOCKERHUB_USERNAME != '' }}
|
||||
uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
|
||||
env:
|
||||
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
|
||||
- name: Build RoboCerebra benchmark image
|
||||
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
context: .
|
||||
file: docker/Dockerfile.benchmark.robocerebra
|
||||
push: false
|
||||
load: true
|
||||
tags: lerobot-benchmark-robocerebra:ci
|
||||
cache-from: type=local,src=/tmp/.buildx-cache-robocerebra
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache-robocerebra,mode=max
|
||||
|
||||
- name: Run RoboCerebra smoke eval (1 episode)
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
docker run --name robocerebra-eval --gpus all \
|
||||
--shm-size=4g \
|
||||
-e HF_HOME=/tmp/hf \
|
||||
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \
|
||||
-e HF_HUB_DOWNLOAD_TIMEOUT=300 \
|
||||
-e LIBERO_DATA_FOLDER=/tmp/libero_data \
|
||||
lerobot-benchmark-robocerebra:ci \
|
||||
bash -c "
|
||||
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
|
||||
lerobot-eval \
|
||||
--policy.path=lerobot/smolvla_robocerebra \
|
||||
--env.type=libero \
|
||||
--env.task=libero_10 \
|
||||
--env.fps=20 \
|
||||
--env.obs_type=pixels_agent_pos \
|
||||
--env.observation_height=256 \
|
||||
--env.observation_width=256 \
|
||||
'--env.camera_name_mapping={\"agentview_image\": \"image\", \"robot0_eye_in_hand_image\": \"wrist_image\"}' \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.use_async_envs=false \
|
||||
--policy.device=cuda \
|
||||
'--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.wrist_image\": \"observation.images.camera2\"}' \
|
||||
--policy.empty_cameras=1 \
|
||||
--output_dir=/tmp/eval-artifacts
|
||||
python scripts/ci/extract_task_descriptions.py \
|
||||
--env libero --task libero_10 \
|
||||
--output /tmp/eval-artifacts/task_descriptions.json
|
||||
"
|
||||
|
||||
- name: Copy RoboCerebra artifacts from container
|
||||
if: always()
|
||||
run: |
|
||||
mkdir -p /tmp/robocerebra-artifacts
|
||||
docker cp robocerebra-eval:/tmp/eval-artifacts/. /tmp/robocerebra-artifacts/ 2>/dev/null || true
|
||||
docker rm -f robocerebra-eval || true
|
||||
|
||||
- name: Parse RoboCerebra eval metrics
|
||||
if: always()
|
||||
run: |
|
||||
python3 scripts/ci/parse_eval_metrics.py \
|
||||
--artifacts-dir /tmp/robocerebra-artifacts \
|
||||
--env robocerebra \
|
||||
--task libero_10 \
|
||||
--policy lerobot/smolvla_robocerebra
|
||||
|
||||
- name: Upload RoboCerebra rollout video
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: robocerebra-rollout-video
|
||||
path: /tmp/robocerebra-artifacts/videos/
|
||||
if-no-files-found: warn
|
||||
|
||||
- name: Upload RoboCerebra eval metrics
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: robocerebra-metrics
|
||||
path: /tmp/robocerebra-artifacts/metrics.json
|
||||
if-no-files-found: warn
|
||||
|
||||
# ── ROBOMME ───────────────────────────────────────────────────────────────
|
||||
# Isolated image: mani-skill/SAPIEN/Vulkan chain with gymnasium and numpy
|
||||
# overrides (robomme can't be a pyproject extra due to numpy<2 pin).
|
||||
robomme-integration-test:
|
||||
name: RoboMME — build image + 1-episode eval
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
ROBOMME_POLICY: lerobot/smolvla_robomme
|
||||
ROBOMME_TASKS: PickXtimes,BinFill,StopCube,MoveCube,InsertPeg,SwingXtimes,VideoUnmask,ButtonUnmask,PickHighlight,PatternLock
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
lfs: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
cache-binary: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
if: ${{ env.DOCKERHUB_USERNAME != '' }}
|
||||
uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
|
||||
env:
|
||||
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
|
||||
- name: Build RoboMME benchmark image
|
||||
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
context: .
|
||||
file: docker/Dockerfile.benchmark.robomme
|
||||
push: false
|
||||
load: true
|
||||
tags: lerobot-benchmark-robomme:ci
|
||||
|
||||
- name: Run RoboMME smoke eval (10 tasks, 1 episode each)
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
docker run --name robomme-eval --gpus all \
|
||||
--shm-size=4g \
|
||||
-e HF_HOME=/tmp/hf \
|
||||
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \
|
||||
-e HF_HUB_DOWNLOAD_TIMEOUT=300 \
|
||||
-e ROBOMME_POLICY="${ROBOMME_POLICY}" \
|
||||
-e ROBOMME_TASKS="${ROBOMME_TASKS}" \
|
||||
lerobot-benchmark-robomme:ci \
|
||||
bash -c "
|
||||
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
|
||||
lerobot-eval \
|
||||
--policy.path=\"\$ROBOMME_POLICY\" \
|
||||
--env.type=robomme \
|
||||
--env.task=\"\$ROBOMME_TASKS\" \
|
||||
--env.dataset_split=test \
|
||||
--env.task_ids=[0] \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.use_async_envs=false \
|
||||
--policy.device=cuda \
|
||||
'--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.wrist_image\": \"observation.images.camera2\"}' \
|
||||
--policy.empty_cameras=3 \
|
||||
--output_dir=/tmp/eval-artifacts
|
||||
python scripts/ci/extract_task_descriptions.py \
|
||||
--env robomme --task \"\$ROBOMME_TASKS\" \
|
||||
--output /tmp/eval-artifacts/task_descriptions.json
|
||||
"
|
||||
|
||||
- name: Copy RoboMME artifacts from container
|
||||
if: always()
|
||||
run: |
|
||||
mkdir -p /tmp/robomme-artifacts
|
||||
docker cp robomme-eval:/tmp/eval-artifacts/. /tmp/robomme-artifacts/ 2>/dev/null || true
|
||||
docker rm -f robomme-eval || true
|
||||
|
||||
- name: Parse RoboMME eval metrics
|
||||
if: always()
|
||||
run: |
|
||||
python3 scripts/ci/parse_eval_metrics.py \
|
||||
--artifacts-dir /tmp/robomme-artifacts \
|
||||
--env robomme \
|
||||
--task "${ROBOMME_TASKS}" \
|
||||
--policy "${ROBOMME_POLICY}"
|
||||
|
||||
- name: Upload RoboMME rollout video
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: robomme-rollout-video
|
||||
path: /tmp/robomme-artifacts/videos/
|
||||
if-no-files-found: warn
|
||||
|
||||
- name: Upload RoboMME eval metrics
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: robomme-metrics
|
||||
path: /tmp/robomme-artifacts/metrics.json
|
||||
if-no-files-found: warn
|
||||
|
||||
# ── LIBERO-plus ───────────────────────────────────────────────────────────
|
||||
# Isolated image: LIBERO-plus fork cloned into /home/user_lerobot on top of
|
||||
# huggingface/lerobot-gpu (see docker/Dockerfile.benchmark.libero_plus).
|
||||
libero-plus-integration-test:
|
||||
name: LIBERO-plus — build image + 1-episode eval
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
LIBERO_PLUS_SUITE: libero_spatial
|
||||
LIBERO_PLUS_POLICY: lerobot/smolvla_libero_plus
|
||||
LIBERO_PLUS_TASK_IDS: "[0,100,260,500,1000,1500,2000,2400]"
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
lfs: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
cache-binary: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
if: ${{ env.DOCKERHUB_USERNAME != '' }}
|
||||
uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
|
||||
env:
|
||||
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
|
||||
- name: Build LIBERO-plus benchmark image
|
||||
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
context: .
|
||||
file: docker/Dockerfile.benchmark.libero_plus
|
||||
push: false
|
||||
load: true
|
||||
tags: lerobot-benchmark-libero-plus:ci
|
||||
cache-from: type=local,src=/tmp/.buildx-cache-libero-plus
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache-libero-plus,mode=max
|
||||
|
||||
- name: Run LIBERO-plus smoke eval (1 episode)
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
docker run --name libero-plus-eval --gpus all \
|
||||
--shm-size=4g \
|
||||
-e HF_HOME=/tmp/hf \
|
||||
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \
|
||||
-e HF_HUB_DOWNLOAD_TIMEOUT=300 \
|
||||
-e LIBERO_PLUS_SUITE="${LIBERO_PLUS_SUITE}" \
|
||||
-e LIBERO_PLUS_POLICY="${LIBERO_PLUS_POLICY}" \
|
||||
-e LIBERO_PLUS_TASK_IDS="${LIBERO_PLUS_TASK_IDS}" \
|
||||
lerobot-benchmark-libero-plus:ci \
|
||||
bash -c "
|
||||
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
|
||||
lerobot-eval \
|
||||
--policy.path=\"\$LIBERO_PLUS_POLICY\" \
|
||||
--env.type=libero_plus \
|
||||
--env.task=\"\$LIBERO_PLUS_SUITE\" \
|
||||
--env.task_ids=\"\$LIBERO_PLUS_TASK_IDS\" \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.use_async_envs=false \
|
||||
--policy.device=cuda \
|
||||
'--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \
|
||||
--policy.empty_cameras=1 \
|
||||
--output_dir=/tmp/eval-artifacts
|
||||
python scripts/ci/extract_task_descriptions.py \
|
||||
--env libero_plus --task \"\$LIBERO_PLUS_SUITE\" \
|
||||
--output /tmp/eval-artifacts/task_descriptions.json
|
||||
"
|
||||
|
||||
- name: Copy LIBERO-plus artifacts from container
|
||||
if: always()
|
||||
run: |
|
||||
mkdir -p /tmp/libero-plus-artifacts
|
||||
docker cp libero-plus-eval:/tmp/eval-artifacts/. /tmp/libero-plus-artifacts/ 2>/dev/null || true
|
||||
docker rm -f libero-plus-eval || true
|
||||
|
||||
- name: Parse LIBERO-plus eval metrics
|
||||
if: always()
|
||||
run: |
|
||||
python3 scripts/ci/parse_eval_metrics.py \
|
||||
--artifacts-dir /tmp/libero-plus-artifacts \
|
||||
--env libero_plus \
|
||||
--task "${LIBERO_PLUS_SUITE}" \
|
||||
--policy "${LIBERO_PLUS_POLICY}"
|
||||
|
||||
- name: Upload LIBERO-plus rollout video
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: libero-plus-rollout-video
|
||||
path: /tmp/libero-plus-artifacts/videos/
|
||||
if-no-files-found: warn
|
||||
|
||||
- name: Upload LIBERO-plus eval metrics
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: libero-plus-metrics
|
||||
path: /tmp/libero-plus-artifacts/metrics.json
|
||||
if-no-files-found: warn
|
||||
|
||||
Reference in New Issue
Block a user