try fix

2026-06-04 12:51:27 +00:00 · 2025-11-05 20:24:47 +01:00
parent f6b16f6d97
commit 3ec4e4ce37
1 changed files with 7 additions and 2 deletions
--- a/src/lerobot/datasets/utils.py
+++ b/src/lerobot/datasets/utils.py
@@ -116,10 +116,15 @@ def load_nested_dataset(pq_dir: Path, features: datasets.Features | None = None)
    if len(paths) == 0:
        raise FileNotFoundError(f"Provided directory does not contain any parquet file: {pq_dir}")

+    # Convert Path objects to a list of strings
+    file_paths = [str(path) for path in paths]
+
+    # Use datasets.load_dataset to force creation of an efficient cache
+    # This pre-decodes the images and avoids the on-the-fly bottleneck.
    # TODO(rcadene): set num_proc to accelerate conversion to pyarrow
    with SuppressProgressBars():
-        datasets = Dataset.from_parquet([str(path) for path in paths], features=features)
-    return datasets
+        dataset = datasets.load_dataset("parquet", data_files=file_paths, features=features, split="train")
+    return dataset


 def get_parquet_num_frames(parquet_path: str | Path) -> int: