remove bad typing

add typing hints
add embed images in conversion to v3 script; add parquet writer in conversion script
2026-06-01 03:11:29 +00:00 · 2025-11-06 09:13:26 +01:00 · 2025-11-06 09:12:09 +01:00 · 2025-11-05 23:41:38 +01:00
9 changed files with 43 additions and 193 deletions
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -83,11 +83,11 @@ jobs:
          fi

      - name: Remove Tags with Git dependencies
-        # TODO(Steven): Temporary patch to remove pi from PyPi 0.4.0 release due to its reliance on git dependencies.
+        # TODO(Steven): Temporary patch to remove libero and pi from PyPi 0.4.0 release due to its reliance on git dependencies.
        run: |
          echo "::info:: Checking for Git dependencies to remove from pyproject.toml..."
-          grep -E '@ git\+https|lerobot\[pi\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
-          sed -E -i '/@ git\+https|lerobot\[pi\]/d' pyproject.toml
+          grep -E '@ git\+https|lerobot\[pi\]|lerobot\[libero\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
+          sed -E -i '/@ git\+https|lerobot\[pi\]|lerobot\[libero\]/d' pyproject.toml
          echo "::info:: Git dependencies removed. Proceeding with build."

      - name: Install build dependencies
--- a/.github/workflows/unbound_deps_tests.yml
+++ b/.github/workflows/unbound_deps_tests.yml
@@ -70,7 +70,7 @@ jobs:
          echo "Dependencies unbound:" && cat pyproject.toml

      - name: Install lerobot with all extras
-        run: uv sync --all-extras --no-extra groot # TODO(Steven): Make flash-attn optional
+        run: uv sync --all-extras

      - name: Run pytest (all extras)
        run: uv run pytest tests -vv
--- a/README.md
+++ b/README.md
@@ -186,7 +186,7 @@ For a full list of optional dependencies, see:
 https://pypi.org/project/lerobot/

 > [!NOTE]
-> For lerobot 0.4.0, if you want to install pi tags, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
+> For lerobot 0.4.0, if you want to install libero or pi tags, you will have to do: `pip install "lerobot[pi,libero]@git+https://github.com/huggingface/lerobot.git"`.
 >
 > This will be solved in the next patch release

--- a/docs/source/installation.mdx
+++ b/docs/source/installation.mdx
@@ -82,7 +82,7 @@ For a full list of optional dependencies, see:
 https://pypi.org/project/lerobot/

 > [!NOTE]
-> For lerobot 0.4.0, if you want to install pi, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`
+> For lerobot 0.4.0, if you want to install libero or pi, you will have to do: `pip install "lerobot[pi,libero]@git+https://github.com/huggingface/lerobot.git"`

 ### Troubleshooting

--- a/docs/source/libero.mdx
+++ b/docs/source/libero.mdx
@@ -28,6 +28,11 @@ LIBERO is now part of our **multi-eval supported simulation**, meaning you can b
 To Install LIBERO, after following LeRobot official instructions, just do:
 `pip install -e ".[libero]"`

+> [!NOTE]
+> For lerobot 0.4.0, if you want to install libero tag, you will have to do: `pip install "lerobot[libero]@git+https://github.com/huggingface/lerobot.git"`.
+>
+> This will be solved in the next patch release
+
 ### Single-suite evaluation

 Evaluate a policy on one LIBERO suite:
--- a/src/lerobot/datasets/lerobot_dataset.py
+++ b/src/lerobot/datasets/lerobot_dataset.py
@@ -940,26 +940,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
        return query_timestamps

    def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict:
-        """
-        Query dataset for indices across keys, skipping video keys.
-
-        Tries column-first [key][indices] for speed, falls back to row-first.
-
-        Args:
-            query_indices: Dict mapping keys to index lists to retrieve
-
-        Returns:
-            Dict with stacked tensors of queried data (video keys excluded)
-        """
-        result: dict = {}
-        for key, q_idx in query_indices.items():
-            if key in self.meta.video_keys:
-                continue
-            try:
-                result[key] = torch.stack(self.hf_dataset[key][q_idx])
-            except (KeyError, TypeError, IndexError):
-                result[key] = torch.stack(self.hf_dataset[q_idx][key])
-        return result
+        return {
+            key: torch.stack(self.hf_dataset[q_idx][key])
+            for key, q_idx in query_indices.items()
+            if key not in self.meta.video_keys
+        }

    def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]:
        """Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function
--- a/src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py
+++ b/src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py
@@ -50,9 +50,9 @@ from typing import Any

 import jsonlines
 import pandas as pd
-import pyarrow as pa
+import pyarrow.parquet as pq
 import tqdm
-from datasets import Dataset, Features, Image
+from datasets import Dataset, concatenate_datasets
 from huggingface_hub import HfApi, snapshot_download
 from requests import HTTPError

@@ -68,6 +68,7 @@ from lerobot.datasets.utils import (
    LEGACY_EPISODES_STATS_PATH,
    LEGACY_TASKS_PATH,
    cast_stats_to_numpy,
+    embed_images,
    flatten_dict,
    get_file_size_in_mb,
    get_parquet_file_size_in_mb,
@@ -174,25 +175,33 @@ def convert_tasks(root, new_root):
    write_tasks(df_tasks, new_root)


-def concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys):
-    # TODO(rcadene): to save RAM use Dataset.from_parquet(file) and concatenate_datasets
-    dataframes = [pd.read_parquet(file) for file in paths_to_cat]
-    # Concatenate all DataFrames along rows
-    concatenated_df = pd.concat(dataframes, ignore_index=True)
+def concat_data_files(
+    paths_to_cat: list[Path], new_root: Path, chunk_idx: int, file_idx: int, image_keys: list[str]
+):
+    """Concatenate multiple parquet data files into a single file.
+
+    Args:
+        paths_to_cat: List of parquet file paths to concatenate
+        new_root: Root directory for the new dataset
+        chunk_idx: Chunk index for the output file
+        file_idx: File index within the chunk
+        image_keys: List of feature keys that contain images
+    """
+
+    datasets_list: list[Dataset] = [Dataset.from_parquet(str(file)) for file in paths_to_cat]
+    concatenated_ds: Dataset = concatenate_datasets(datasets_list)
+
+    if len(image_keys) > 0:
+        logging.debug(f"Embedding {len(image_keys)} image features for optimal training performance")
+        concatenated_ds = embed_images(concatenated_ds)

    path = new_root / DEFAULT_DATA_PATH.format(chunk_index=chunk_idx, file_index=file_idx)
    path.parent.mkdir(parents=True, exist_ok=True)

-    if len(image_keys) > 0:
-        schema = pa.Schema.from_pandas(concatenated_df)
-        features = Features.from_arrow_schema(schema)
-        for key in image_keys:
-            features[key] = Image()
-        schema = features.arrow_schema
-    else:
-        schema = None
-
-    concatenated_df.to_parquet(path, index=False, schema=schema)
+    table = concatenated_ds.with_format("arrow")[:]
+    writer = pq.ParquetWriter(path, schema=table.schema, compression="snappy", use_dictionary=True)
+    writer.write_table(table)
+    writer.close()


 def convert_data(root: Path, new_root: Path, data_file_size_in_mb: int):
--- a/src/lerobot/envs/libero.py
+++ b/src/lerobot/envs/libero.py
@@ -237,10 +237,9 @@ class LiberoEnv(gym.Env):
    def reset(self, seed=None, **kwargs):
        super().reset(seed=seed)
        self._env.seed(seed)
-        raw_obs = self._env.reset()
        if self.init_states and self._init_states is not None:
            self._env.set_init_state(self._init_states[self._init_state_id])
-            raw_obs = self._env.env._get_observations()
+        raw_obs = self._env.reset()

        # After reset, objects may be unstable (slightly floating, intersecting, etc.).
        # Step the simulator with a no-op action for a few frames so everything settles.
--- a/tests/envs/test_libero.py
+++ b/tests/envs/test_libero.py
@@ -1,148 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import numpy as np
-import pytest
-
-from lerobot.envs.factory import make_env, make_env_config
-
-# Set MuJoCo rendering backend before importing environment
-os.environ["MUJOCO_GL"] = "egl"
-
-
-def assert_observations_equal(obs1, obs2, path="", atol=1e-8):
-    """
-    Recursively compare two observations and assert they are equal.
-
-    Args:
-        obs1: First observation (dict or numpy array)
-        obs2: Second observation (dict or numpy array)
-        path: Current path in nested structure (for error messages)
-        atol: Absolute tolerance for numpy array comparisons
-    """
-    if isinstance(obs1, dict) and isinstance(obs2, dict):
-        assert obs1.keys() == obs2.keys(), f"Keys differ at {path}: {obs1.keys()} != {obs2.keys()}"
-        for key in obs1:
-            assert_observations_equal(obs1[key], obs2[key], path=f"{path}.{key}" if path else key, atol=atol)
-    elif isinstance(obs1, np.ndarray) and isinstance(obs2, np.ndarray):
-        assert obs1.shape == obs2.shape, f"Shape mismatch at {path}: {obs1.shape} != {obs2.shape}"
-        assert obs1.dtype == obs2.dtype, f"Dtype mismatch at {path}: {obs1.dtype} != {obs2.dtype}"
-        assert np.allclose(obs1, obs2, atol=atol), (
-            f"Array values differ at {path}: max abs diff = {np.abs(obs1 - obs2).max()}"
-        )
-    else:
-        assert type(obs1) is type(obs2), f"Type mismatch at {path}: {type(obs1)} != {type(obs2)}"
-        assert obs1 == obs2, f"Values differ at {path}: {obs1} != {obs2}"
-
-
-def test_libero_env_creation():
-    """Test that the libero environment can be created successfully."""
-    config = make_env_config("libero", task="libero_spatial")
-    envs_dict = make_env(config)
-
-    assert "libero_spatial" in envs_dict
-    assert 0 in envs_dict["libero_spatial"]
-
-    env = envs_dict["libero_spatial"][0]
-    assert env is not None
-
-    # Test basic reset
-    observation, info = env.reset(seed=42)
-    assert observation is not None
-    assert info is not None
-
-    env.close()
-
-
-def test_libero_reset_determinism():
-    """Test that resetting with the same seed produces identical observations."""
-    config = make_env_config("libero", task="libero_spatial")
-    envs_dict = make_env(config)
-    env = envs_dict["libero_spatial"][0]
-
-    # Reset multiple times with the same seed
-    obs1, info1 = env.reset(seed=42)
-    obs2, info2 = env.reset(seed=42)
-    obs3, info3 = env.reset(seed=42)
-
-    # All observations should be identical
-    assert_observations_equal(obs1, obs2)
-    assert_observations_equal(obs1, obs3)
-    assert_observations_equal(obs2, obs3)
-
-    env.close()
-
-
-def test_libero_step_determinism():
-    """Test that step() is deterministic when resetting with the same seed."""
-    config = make_env_config("libero", task="libero_spatial")
-    envs_dict = make_env(config)
-    env = envs_dict["libero_spatial"][0]
-
-    seed = 42
-
-    # First rollout
-    obs1, info1 = env.reset(seed=seed)
-    action = env.action_space.sample()
-    obs_after_step1, reward1, terminated1, truncated1, info_step1 = env.step(action)
-
-    # Second rollout with identical seed and action
-    obs2, info2 = env.reset(seed=seed)
-    obs_after_step2, reward2, terminated2, truncated2, info_step2 = env.step(action)
-
-    # Initial observations should be identical
-    assert_observations_equal(obs1, obs2)
-
-    # Post-step observations should be identical
-    assert_observations_equal(obs_after_step1, obs_after_step2)
-
-    # Rewards and termination flags should be identical
-    assert np.allclose(reward1, reward2), f"Rewards differ: {reward1} != {reward2}"
-    assert np.array_equal(terminated1, terminated2), (
-        f"Terminated flags differ: {terminated1} != {terminated2}"
-    )
-    assert np.array_equal(truncated1, truncated2), f"Truncated flags differ: {truncated1} != {truncated2}"
-
-    env.close()
-
-
-@pytest.mark.parametrize("task", ["libero_spatial", "libero_object", "libero_goal", "libero_10"])
-def test_libero_tasks(task):
-    """Test that different libero tasks can be created and used."""
-    config = make_env_config("libero", task=task)
-    envs_dict = make_env(config)
-
-    assert task in envs_dict
-    assert 0 in envs_dict[task]
-
-    env = envs_dict[task][0]
-    observation, info = env.reset(seed=42)
-
-    assert observation is not None
-    assert info is not None
-
-    # Take a step
-    action = env.action_space.sample()
-    obs, reward, terminated, truncated, info = env.step(action)
-
-    assert obs is not None
-    assert reward is not None
-    assert isinstance(terminated, (bool, np.ndarray))
-    assert isinstance(truncated, (bool, np.ndarray))
-
-    env.close()
Author	SHA1	Message	Date
Michel Aractingi	8008cb357d	remove bad typing	2025-11-06 09:13:26 +01:00
Michel Aractingi	ca5a4a7ae5	add typing hints	2025-11-06 09:12:09 +01:00
Michel Aractingi	b5dcd70d2c	add embed images in conversion to v3 script; add parquet writer in conversion script	2025-11-05 23:41:38 +01:00