Compare commits

..

3 Commits

Author SHA1 Message Date
Michel Aractingi
8008cb357d remove bad typing 2025-11-06 09:13:26 +01:00
Michel Aractingi
ca5a4a7ae5 add typing hints 2025-11-06 09:12:09 +01:00
Michel Aractingi
b5dcd70d2c add embed images in conversion to v3 script; add parquet writer in conversion script 2025-11-05 23:41:38 +01:00
9 changed files with 43 additions and 193 deletions

View File

@@ -83,11 +83,11 @@ jobs:
fi
- name: Remove Tags with Git dependencies
# TODO(Steven): Temporary patch to remove pi from PyPi 0.4.0 release due to its reliance on git dependencies.
# TODO(Steven): Temporary patch to remove libero and pi from PyPi 0.4.0 release due to its reliance on git dependencies.
run: |
echo "::info:: Checking for Git dependencies to remove from pyproject.toml..."
grep -E '@ git\+https|lerobot\[pi\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
sed -E -i '/@ git\+https|lerobot\[pi\]/d' pyproject.toml
grep -E '@ git\+https|lerobot\[pi\]|lerobot\[libero\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
sed -E -i '/@ git\+https|lerobot\[pi\]|lerobot\[libero\]/d' pyproject.toml
echo "::info:: Git dependencies removed. Proceeding with build."
- name: Install build dependencies

View File

@@ -70,7 +70,7 @@ jobs:
echo "Dependencies unbound:" && cat pyproject.toml
- name: Install lerobot with all extras
run: uv sync --all-extras --no-extra groot # TODO(Steven): Make flash-attn optional
run: uv sync --all-extras
- name: Run pytest (all extras)
run: uv run pytest tests -vv

View File

@@ -186,7 +186,7 @@ For a full list of optional dependencies, see:
https://pypi.org/project/lerobot/
> [!NOTE]
> For lerobot 0.4.0, if you want to install pi tags, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
> For lerobot 0.4.0, if you want to install libero or pi tags, you will have to do: `pip install "lerobot[pi,libero]@git+https://github.com/huggingface/lerobot.git"`.
>
> This will be solved in the next patch release

View File

@@ -82,7 +82,7 @@ For a full list of optional dependencies, see:
https://pypi.org/project/lerobot/
> [!NOTE]
> For lerobot 0.4.0, if you want to install pi, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`
> For lerobot 0.4.0, if you want to install libero or pi, you will have to do: `pip install "lerobot[pi,libero]@git+https://github.com/huggingface/lerobot.git"`
### Troubleshooting

View File

@@ -28,6 +28,11 @@ LIBERO is now part of our **multi-eval supported simulation**, meaning you can b
To Install LIBERO, after following LeRobot official instructions, just do:
`pip install -e ".[libero]"`
> [!NOTE]
> For lerobot 0.4.0, if you want to install libero tag, you will have to do: `pip install "lerobot[libero]@git+https://github.com/huggingface/lerobot.git"`.
>
> This will be solved in the next patch release
### Single-suite evaluation
Evaluate a policy on one LIBERO suite:

View File

@@ -940,26 +940,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
return query_timestamps
def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict:
"""
Query dataset for indices across keys, skipping video keys.
Tries column-first [key][indices] for speed, falls back to row-first.
Args:
query_indices: Dict mapping keys to index lists to retrieve
Returns:
Dict with stacked tensors of queried data (video keys excluded)
"""
result: dict = {}
for key, q_idx in query_indices.items():
if key in self.meta.video_keys:
continue
try:
result[key] = torch.stack(self.hf_dataset[key][q_idx])
except (KeyError, TypeError, IndexError):
result[key] = torch.stack(self.hf_dataset[q_idx][key])
return result
return {
key: torch.stack(self.hf_dataset[q_idx][key])
for key, q_idx in query_indices.items()
if key not in self.meta.video_keys
}
def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]:
"""Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function

View File

@@ -50,9 +50,9 @@ from typing import Any
import jsonlines
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import tqdm
from datasets import Dataset, Features, Image
from datasets import Dataset, concatenate_datasets
from huggingface_hub import HfApi, snapshot_download
from requests import HTTPError
@@ -68,6 +68,7 @@ from lerobot.datasets.utils import (
LEGACY_EPISODES_STATS_PATH,
LEGACY_TASKS_PATH,
cast_stats_to_numpy,
embed_images,
flatten_dict,
get_file_size_in_mb,
get_parquet_file_size_in_mb,
@@ -174,25 +175,33 @@ def convert_tasks(root, new_root):
write_tasks(df_tasks, new_root)
def concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys):
# TODO(rcadene): to save RAM use Dataset.from_parquet(file) and concatenate_datasets
dataframes = [pd.read_parquet(file) for file in paths_to_cat]
# Concatenate all DataFrames along rows
concatenated_df = pd.concat(dataframes, ignore_index=True)
def concat_data_files(
paths_to_cat: list[Path], new_root: Path, chunk_idx: int, file_idx: int, image_keys: list[str]
):
"""Concatenate multiple parquet data files into a single file.
Args:
paths_to_cat: List of parquet file paths to concatenate
new_root: Root directory for the new dataset
chunk_idx: Chunk index for the output file
file_idx: File index within the chunk
image_keys: List of feature keys that contain images
"""
datasets_list: list[Dataset] = [Dataset.from_parquet(str(file)) for file in paths_to_cat]
concatenated_ds: Dataset = concatenate_datasets(datasets_list)
if len(image_keys) > 0:
logging.debug(f"Embedding {len(image_keys)} image features for optimal training performance")
concatenated_ds = embed_images(concatenated_ds)
path = new_root / DEFAULT_DATA_PATH.format(chunk_index=chunk_idx, file_index=file_idx)
path.parent.mkdir(parents=True, exist_ok=True)
if len(image_keys) > 0:
schema = pa.Schema.from_pandas(concatenated_df)
features = Features.from_arrow_schema(schema)
for key in image_keys:
features[key] = Image()
schema = features.arrow_schema
else:
schema = None
concatenated_df.to_parquet(path, index=False, schema=schema)
table = concatenated_ds.with_format("arrow")[:]
writer = pq.ParquetWriter(path, schema=table.schema, compression="snappy", use_dictionary=True)
writer.write_table(table)
writer.close()
def convert_data(root: Path, new_root: Path, data_file_size_in_mb: int):

View File

@@ -237,10 +237,9 @@ class LiberoEnv(gym.Env):
def reset(self, seed=None, **kwargs):
super().reset(seed=seed)
self._env.seed(seed)
raw_obs = self._env.reset()
if self.init_states and self._init_states is not None:
self._env.set_init_state(self._init_states[self._init_state_id])
raw_obs = self._env.env._get_observations()
raw_obs = self._env.reset()
# After reset, objects may be unstable (slightly floating, intersecting, etc.).
# Step the simulator with a no-op action for a few frames so everything settles.

View File

@@ -1,148 +0,0 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pytest
from lerobot.envs.factory import make_env, make_env_config
# Set MuJoCo rendering backend before importing environment
os.environ["MUJOCO_GL"] = "egl"
def assert_observations_equal(obs1, obs2, path="", atol=1e-8):
"""
Recursively compare two observations and assert they are equal.
Args:
obs1: First observation (dict or numpy array)
obs2: Second observation (dict or numpy array)
path: Current path in nested structure (for error messages)
atol: Absolute tolerance for numpy array comparisons
"""
if isinstance(obs1, dict) and isinstance(obs2, dict):
assert obs1.keys() == obs2.keys(), f"Keys differ at {path}: {obs1.keys()} != {obs2.keys()}"
for key in obs1:
assert_observations_equal(obs1[key], obs2[key], path=f"{path}.{key}" if path else key, atol=atol)
elif isinstance(obs1, np.ndarray) and isinstance(obs2, np.ndarray):
assert obs1.shape == obs2.shape, f"Shape mismatch at {path}: {obs1.shape} != {obs2.shape}"
assert obs1.dtype == obs2.dtype, f"Dtype mismatch at {path}: {obs1.dtype} != {obs2.dtype}"
assert np.allclose(obs1, obs2, atol=atol), (
f"Array values differ at {path}: max abs diff = {np.abs(obs1 - obs2).max()}"
)
else:
assert type(obs1) is type(obs2), f"Type mismatch at {path}: {type(obs1)} != {type(obs2)}"
assert obs1 == obs2, f"Values differ at {path}: {obs1} != {obs2}"
def test_libero_env_creation():
"""Test that the libero environment can be created successfully."""
config = make_env_config("libero", task="libero_spatial")
envs_dict = make_env(config)
assert "libero_spatial" in envs_dict
assert 0 in envs_dict["libero_spatial"]
env = envs_dict["libero_spatial"][0]
assert env is not None
# Test basic reset
observation, info = env.reset(seed=42)
assert observation is not None
assert info is not None
env.close()
def test_libero_reset_determinism():
"""Test that resetting with the same seed produces identical observations."""
config = make_env_config("libero", task="libero_spatial")
envs_dict = make_env(config)
env = envs_dict["libero_spatial"][0]
# Reset multiple times with the same seed
obs1, info1 = env.reset(seed=42)
obs2, info2 = env.reset(seed=42)
obs3, info3 = env.reset(seed=42)
# All observations should be identical
assert_observations_equal(obs1, obs2)
assert_observations_equal(obs1, obs3)
assert_observations_equal(obs2, obs3)
env.close()
def test_libero_step_determinism():
"""Test that step() is deterministic when resetting with the same seed."""
config = make_env_config("libero", task="libero_spatial")
envs_dict = make_env(config)
env = envs_dict["libero_spatial"][0]
seed = 42
# First rollout
obs1, info1 = env.reset(seed=seed)
action = env.action_space.sample()
obs_after_step1, reward1, terminated1, truncated1, info_step1 = env.step(action)
# Second rollout with identical seed and action
obs2, info2 = env.reset(seed=seed)
obs_after_step2, reward2, terminated2, truncated2, info_step2 = env.step(action)
# Initial observations should be identical
assert_observations_equal(obs1, obs2)
# Post-step observations should be identical
assert_observations_equal(obs_after_step1, obs_after_step2)
# Rewards and termination flags should be identical
assert np.allclose(reward1, reward2), f"Rewards differ: {reward1} != {reward2}"
assert np.array_equal(terminated1, terminated2), (
f"Terminated flags differ: {terminated1} != {terminated2}"
)
assert np.array_equal(truncated1, truncated2), f"Truncated flags differ: {truncated1} != {truncated2}"
env.close()
@pytest.mark.parametrize("task", ["libero_spatial", "libero_object", "libero_goal", "libero_10"])
def test_libero_tasks(task):
"""Test that different libero tasks can be created and used."""
config = make_env_config("libero", task=task)
envs_dict = make_env(config)
assert task in envs_dict
assert 0 in envs_dict[task]
env = envs_dict[task][0]
observation, info = env.reset(seed=42)
assert observation is not None
assert info is not None
# Take a step
action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)
assert obs is not None
assert reward is not None
assert isinstance(terminated, (bool, np.ndarray))
assert isinstance(truncated, (bool, np.ndarray))
env.close()