mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-31 19:01:28 +00:00
Compare commits
35 Commits
feat/rtc-d
...
feat/fraca
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1c8f922379 | ||
|
|
2b2ff19366 | ||
|
|
c912b1dd03 | ||
|
|
ca1841f5fc | ||
|
|
f6755dbf20 | ||
|
|
0846b5704c | ||
|
|
f386591be7 | ||
|
|
f875566e1d | ||
|
|
eaea3806e8 | ||
|
|
1ef0f0bb86 | ||
|
|
e70dd620f3 | ||
|
|
31274975f0 | ||
|
|
edbfa3d3e6 | ||
|
|
09e2a55901 | ||
|
|
413c5e01be | ||
|
|
91a0a4fe7a | ||
|
|
7710411d3a | ||
|
|
4a153825ee | ||
|
|
46606359fc | ||
|
|
1d0eb922bd | ||
|
|
1612aa7ac7 | ||
|
|
c1f5d8f48f | ||
|
|
14743b896e | ||
|
|
624939c71c | ||
|
|
a276f5b8ac | ||
|
|
33ff386dbc | ||
|
|
50f8cbc392 | ||
|
|
23999ba40d | ||
|
|
dd4837f06e | ||
|
|
9f00d2c3a2 | ||
|
|
950a6fb83d | ||
|
|
0f551df8f4 | ||
|
|
6e86a69dcd | ||
|
|
8a915c6b6f | ||
|
|
b464d9f8bc |
@@ -63,6 +63,8 @@
|
||||
title: Implement your own processor
|
||||
- local: processors_robots_teleop
|
||||
title: Processors for Robots and Teleoperators
|
||||
- local: env_processor
|
||||
title: Environment Processors
|
||||
title: "Robot Processors"
|
||||
- sections:
|
||||
- local: so101
|
||||
|
||||
418
docs/source/env_processor.mdx
Normal file
418
docs/source/env_processor.mdx
Normal file
@@ -0,0 +1,418 @@
|
||||
# Environment Processors
|
||||
|
||||
Environment processors are a critical layer in LeRobot's data processing architecture that handle **environment-specific** transformations, separate from policy-specific processing. This separation of concerns enables cleaner code, better modularity, and easier experimentation with different environments and policies.
|
||||
|
||||
## Why Environment Processors?
|
||||
|
||||
When working with different robot environments (LIBERO, MetaWorld, Aloha, etc.), each environment often has unique data formats, coordinate systems, and conventions that need standardization **before** policy processing. Without environment processors, these transformations would be:
|
||||
|
||||
1. **Hardcoded in environment code** - Making it difficult to experiment with different state representations
|
||||
2. **Duplicated across policies** - Each policy would need to handle environment-specific quirks
|
||||
3. **Mixed with policy logic** - Violating separation of concerns and making debugging harder
|
||||
|
||||
Environment processors solve this by providing a **dedicated processing layer** between raw environment observations and policy inputs.
|
||||
|
||||
## The Processing Pipeline
|
||||
|
||||
Here's how data flows through the complete processing pipeline during evaluation:
|
||||
|
||||
```python
|
||||
# In lerobot_eval.py rollout() function:
|
||||
|
||||
# 1. Raw environment observation (numpy arrays, various formats)
|
||||
raw_observation = env.step(action)
|
||||
|
||||
# 2. Convert numpy to torch, normalize images [0,1]
|
||||
observation = preprocess_observation(raw_observation)
|
||||
|
||||
# 3. Add task metadata (for multi-task environments)
|
||||
observation = add_envs_task(env, observation)
|
||||
|
||||
# 4. ENVIRONMENT-SPECIFIC preprocessing (NEW!)
|
||||
# - Flatten robot states
|
||||
# - Rotate images to match dataset conventions
|
||||
# - Handle environment-specific coordinate systems
|
||||
observation = env_preprocessor(observation)
|
||||
|
||||
# 5. POLICY-SPECIFIC preprocessing
|
||||
# - Normalize with dataset statistics
|
||||
# - Add batch dimensions
|
||||
# - Move to GPU
|
||||
# - Tokenize language instructions
|
||||
observation = preprocessor(observation)
|
||||
|
||||
# 6. Policy inference
|
||||
action = policy.select_action(observation)
|
||||
|
||||
# 7. POLICY-SPECIFIC postprocessing
|
||||
# - Unnormalize actions
|
||||
# - Remove batch dimensions
|
||||
action = postprocessor(action)
|
||||
|
||||
# 8. ENVIRONMENT-SPECIFIC postprocessing (NEW!)
|
||||
# - Convert action formats if needed
|
||||
# - Apply environment-specific constraints
|
||||
action_transition = {"action": action}
|
||||
action_transition = env_postprocessor(action_transition)
|
||||
action = action_transition["action"]
|
||||
|
||||
# 9. Execute in environment
|
||||
env.step(action)
|
||||
```
|
||||
|
||||
## The Benefits
|
||||
|
||||
### 1. **Separation of Concerns**
|
||||
|
||||
Environment processors handle transformations specific to the **environment's data format**, while policy processors handle transformations specific to the **model's requirements**.
|
||||
|
||||
```python
|
||||
# ❌ Before: Mixed concerns
|
||||
class LiberoVLAPolicy:
|
||||
def preprocess(self, obs):
|
||||
# Environment-specific: Flatten robot state (shouldn't be in policy!)
|
||||
state = self._flatten_robot_state(obs["robot_state"])
|
||||
# Policy-specific: Normalize with dataset stats
|
||||
state = self.normalizer(state)
|
||||
return state
|
||||
|
||||
# ✅ After: Clear separation
|
||||
# Environment processor: Handles LIBERO's nested robot state
|
||||
env_preprocessor = LiberoProcessorStep() # Flattens robot_state
|
||||
|
||||
# Policy processor: Handles model requirements
|
||||
policy_preprocessor = NormalizerProcessorStep(stats=dataset_stats)
|
||||
```
|
||||
|
||||
### 2. **Flexibility and Reusability**
|
||||
|
||||
The same policy can work with different environment processors, and the same environment processor can work with different policies:
|
||||
|
||||
```python
|
||||
# Use SmolVLA policy with LIBERO environment
|
||||
libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(libero_cfg)
|
||||
smolvla_preprocessor, smolvla_postprocessor = make_pre_post_processors(smolvla_cfg)
|
||||
|
||||
# Or use ACT policy with the same LIBERO environment
|
||||
libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(libero_cfg)
|
||||
act_preprocessor, act_postprocessor = make_pre_post_processors(act_cfg)
|
||||
```
|
||||
|
||||
### 3. **Easier Experimentation**
|
||||
|
||||
Want to try different state representations for LIBERO? Just create a new processor:
|
||||
|
||||
```python
|
||||
# Original: 8D state (pos + quat→axisangle + gripper)
|
||||
@ProcessorStepRegistry.register("libero_processor")
|
||||
class LiberoProcessorStep(ObservationProcessorStep):
|
||||
def _process_observation(self, obs):
|
||||
eef_pos = robot_state["eef"]["pos"] # 3D
|
||||
eef_axisangle = quat2axisangle(quat) # 3D
|
||||
gripper = robot_state["gripper"]["qpos"] # 2D
|
||||
state = torch.cat([eef_pos, eef_axisangle, gripper], dim=-1) # 8D
|
||||
return state
|
||||
|
||||
# Experiment: Add velocity for better control
|
||||
@ProcessorStepRegistry.register("libero_velocity_processor")
|
||||
class LiberoVelocityProcessorStep(ObservationProcessorStep):
|
||||
def _process_observation(self, obs):
|
||||
# Include velocities for 14D state
|
||||
eef_pos = robot_state["eef"]["pos"] # 3D
|
||||
eef_axisangle = quat2axisangle(quat) # 3D
|
||||
eef_vel = robot_state["eef"]["vel"] # 3D (NEW)
|
||||
gripper_pos = robot_state["gripper"]["qpos"] # 2D
|
||||
gripper_vel = robot_state["gripper"]["qvel"] # 3D (NEW)
|
||||
state = torch.cat([eef_pos, eef_axisangle, eef_vel,
|
||||
gripper_pos, gripper_vel], dim=-1) # 14D
|
||||
return state
|
||||
```
|
||||
|
||||
### 4. **Cleaner Environment Code**
|
||||
|
||||
Environments expose **all available data** without needing to know what downstream models will use:
|
||||
|
||||
```python
|
||||
# LIBERO environment exposes full robot state
|
||||
observation = {
|
||||
"pixels": {"image": img, "image2": img2},
|
||||
"robot_state": {
|
||||
"eef": {"pos": ..., "quat": ..., "vel": ..., "mat": ..., "axisangle": ...},
|
||||
"gripper": {"qpos": ..., "qvel": ...},
|
||||
"joints": {"pos": ..., "vel": ...}
|
||||
}
|
||||
}
|
||||
|
||||
# Environment processor decides what to use
|
||||
# Policy processor handles model-specific transformations
|
||||
```
|
||||
|
||||
## Using Environment Processors
|
||||
|
||||
### Factory Function
|
||||
|
||||
The `make_env_pre_post_processors` function follows the same pattern as `make_pre_post_processors` for policies:
|
||||
|
||||
```python
|
||||
from lerobot.envs.factory import make_env_pre_post_processors
|
||||
from lerobot.envs.configs import LiberoEnv, PushtEnv
|
||||
|
||||
# For LIBERO: Returns LiberoProcessorStep in preprocessor
|
||||
libero_cfg = LiberoEnv(task="libero_spatial", camera_name=["agentview"])
|
||||
env_preprocessor, env_postprocessor = make_env_pre_post_processors(libero_cfg)
|
||||
|
||||
# For other environments: Returns identity processors (no-op)
|
||||
pusht_cfg = PushtEnv()
|
||||
env_preprocessor, env_postprocessor = make_env_pre_post_processors(pusht_cfg)
|
||||
```
|
||||
|
||||
### Implementation in `envs/factory.py`
|
||||
|
||||
```python
|
||||
def make_env_pre_post_processors(
|
||||
env_cfg: EnvConfig,
|
||||
) -> tuple[
|
||||
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
]:
|
||||
"""
|
||||
Create preprocessor and postprocessor pipelines for environment observations.
|
||||
|
||||
Args:
|
||||
env_cfg: The configuration of the environment.
|
||||
|
||||
Returns:
|
||||
A tuple containing:
|
||||
- preprocessor: Pipeline that processes environment observations
|
||||
- postprocessor: Pipeline that processes environment outputs
|
||||
"""
|
||||
# For LIBERO environments, add the LiberoProcessorStep to preprocessor
|
||||
if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
|
||||
preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()])
|
||||
else:
|
||||
# For all other environments, return an identity preprocessor
|
||||
preprocessor = PolicyProcessorPipeline(steps=[])
|
||||
|
||||
# Postprocessor is currently identity for all environments
|
||||
# Future: Could add environment-specific action transformations
|
||||
postprocessor = PolicyProcessorPipeline(steps=[])
|
||||
|
||||
return preprocessor, postprocessor
|
||||
```
|
||||
|
||||
### Integration in Evaluation
|
||||
|
||||
In `lerobot_eval.py`, the environment processors are created once and used throughout:
|
||||
|
||||
```python
|
||||
def eval_main(cfg: EvalPipelineConfig):
|
||||
# Create environment
|
||||
envs = make_env(cfg.env, n_envs=cfg.eval.batch_size)
|
||||
|
||||
# Create policy
|
||||
policy = make_policy(cfg=cfg.policy, env_cfg=cfg.env)
|
||||
|
||||
# Create policy processors
|
||||
preprocessor, postprocessor = make_pre_post_processors(
|
||||
policy_cfg=cfg.policy,
|
||||
pretrained_path=cfg.policy.pretrained_path,
|
||||
)
|
||||
|
||||
# Create environment processors (NEW!)
|
||||
env_preprocessor, env_postprocessor = make_env_pre_post_processors(env_cfg=cfg.env)
|
||||
|
||||
# Run evaluation with both processor types
|
||||
eval_policy_all(
|
||||
envs=envs,
|
||||
policy=policy,
|
||||
env_preprocessor=env_preprocessor, # Environment-specific
|
||||
env_postprocessor=env_postprocessor, # Environment-specific
|
||||
preprocessor=preprocessor, # Policy-specific
|
||||
postprocessor=postprocessor, # Policy-specific
|
||||
n_episodes=cfg.eval.n_episodes,
|
||||
)
|
||||
```
|
||||
|
||||
## Example: LIBERO Environment Processor
|
||||
|
||||
The `LiberoProcessorStep` demonstrates a real-world environment processor:
|
||||
|
||||
```python
|
||||
from lerobot.processor.pipeline import ObservationProcessorStep
|
||||
|
||||
@dataclass
|
||||
@ProcessorStepRegistry.register(name="libero_processor")
|
||||
class LiberoProcessorStep(ObservationProcessorStep):
|
||||
"""
|
||||
Processes LIBERO observations into the LeRobot format.
|
||||
|
||||
**State Processing:**
|
||||
- Extracts end-effector position (3D)
|
||||
- Converts quaternion to axis-angle representation (3D)
|
||||
- Extracts gripper joint positions (2D)
|
||||
- Concatenates into 8D state vector
|
||||
|
||||
**Image Processing:**
|
||||
- Rotates images 180° to match HuggingFaceVLA/libero convention
|
||||
"""
|
||||
|
||||
def _process_observation(self, observation):
|
||||
processed_obs = observation.copy()
|
||||
|
||||
# Process images: Flip 180° for camera convention
|
||||
for key in list(processed_obs.keys()):
|
||||
if key.startswith("observation.images."):
|
||||
img = processed_obs[key]
|
||||
img = torch.flip(img, dims=[2, 3]) # Flip H and W
|
||||
processed_obs[key] = img
|
||||
|
||||
# Process robot_state: Flatten to 8D vector
|
||||
if "observation.robot_state" in processed_obs:
|
||||
robot_state = processed_obs.pop("observation.robot_state")
|
||||
|
||||
eef_pos = robot_state["eef"]["pos"] # (B, 3)
|
||||
eef_quat = robot_state["eef"]["quat"] # (B, 4)
|
||||
gripper_qpos = robot_state["gripper"]["qpos"] # (B, 2)
|
||||
|
||||
# Convert quaternion to axis-angle
|
||||
eef_axisangle = self._quat2axisangle(eef_quat) # (B, 3)
|
||||
|
||||
# Concatenate into single state vector
|
||||
state = torch.cat((eef_pos, eef_axisangle, gripper_qpos), dim=-1)
|
||||
state = state.float()
|
||||
|
||||
processed_obs["observation.state"] = state
|
||||
|
||||
return processed_obs
|
||||
```
|
||||
|
||||
### Why These Transformations?
|
||||
|
||||
1. **Image Rotation**: The HuggingFaceVLA/libero dataset has images rotated 180° from the raw LIBERO simulator. The processor handles this convention mismatch so policies trained on the dataset work seamlessly.
|
||||
|
||||
2. **State Flattening**: The raw LIBERO environment exposes nested dictionaries with all available state information (position, quaternion, velocity, matrix representation, etc.). The processor:
|
||||
- Selects the relevant components (pos, quat, gripper)
|
||||
- Converts quaternion to axis-angle (more suitable for learning)
|
||||
- Flattens to a single 8D vector that policies expect
|
||||
|
||||
3. **Flexibility**: The environment still exposes **all** raw data. If you want to try different state representations (e.g., including velocities, using matrix representation instead of axis-angle), you can create a new processor without modifying the environment code.
|
||||
|
||||
## Adding Environment Processors for New Environments
|
||||
|
||||
To add environment processors for a new environment:
|
||||
|
||||
### 1. Create the Processor Step
|
||||
|
||||
```python
|
||||
# In src/lerobot/processor/env_processor.py
|
||||
|
||||
@dataclass
|
||||
@ProcessorStepRegistry.register(name="myenv_processor")
|
||||
class MyEnvProcessorStep(ObservationProcessorStep):
|
||||
"""Process observations from MyEnv."""
|
||||
|
||||
def _process_observation(self, observation):
|
||||
processed = observation.copy()
|
||||
|
||||
# Your environment-specific transformations
|
||||
if "myenv.specific.state" in processed:
|
||||
state = processed.pop("myenv.specific.state")
|
||||
# Transform to standard format
|
||||
processed["observation.state"] = self._transform_state(state)
|
||||
|
||||
return processed
|
||||
```
|
||||
|
||||
### 2. Update the Factory
|
||||
|
||||
```python
|
||||
# In src/lerobot/envs/factory.py
|
||||
|
||||
def make_env_pre_post_processors(env_cfg: EnvConfig):
|
||||
if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
|
||||
preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()])
|
||||
elif isinstance(env_cfg, MyEnvConfig) or "myenv" in env_cfg.type:
|
||||
preprocessor = PolicyProcessorPipeline(steps=[MyEnvProcessorStep()])
|
||||
else:
|
||||
preprocessor = PolicyProcessorPipeline(steps=[])
|
||||
|
||||
postprocessor = PolicyProcessorPipeline(steps=[])
|
||||
return preprocessor, postprocessor
|
||||
```
|
||||
|
||||
### 3. Use in Evaluation
|
||||
|
||||
No changes needed! The evaluation script automatically uses the appropriate processor:
|
||||
|
||||
```bash
|
||||
lerobot-eval \
|
||||
--policy.path=lerobot/my_policy \
|
||||
--env.type=myenv \ # Automatically uses MyEnvProcessorStep
|
||||
--eval.n_episodes=10
|
||||
```
|
||||
|
||||
## Future: Environment Postprocessors
|
||||
|
||||
Currently, postprocessors are identity (no-op) for all environments. Future use cases include:
|
||||
|
||||
### Action Space Transformations
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class MyEnvActionPostprocessor(ProcessorStep):
|
||||
"""Convert policy actions to environment-specific format."""
|
||||
|
||||
def __call__(self, transition: EnvTransition) -> EnvTransition:
|
||||
action = transition["action"]
|
||||
|
||||
# Example: Convert from Cartesian to joint space
|
||||
if self.action_space == "joint":
|
||||
action = self.ik_solver(action)
|
||||
|
||||
# Example: Apply environment-specific safety limits
|
||||
action = torch.clamp(action, self.min_action, self.max_action)
|
||||
|
||||
transition["action"] = action
|
||||
return transition
|
||||
```
|
||||
|
||||
### Coordinate System Conversions
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class CoordinateTransformPostprocessor(ProcessorStep):
|
||||
"""Transform actions between coordinate systems."""
|
||||
|
||||
def __call__(self, transition: EnvTransition) -> EnvTransition:
|
||||
action = transition["action"]
|
||||
|
||||
# Example: Policy outputs in world frame, env expects base frame
|
||||
action = self.world_to_base_transform(action)
|
||||
|
||||
transition["action"] = action
|
||||
return transition
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Keep environment processors simple**: They should only handle environment-specific data format issues, not complex learning-related transformations.
|
||||
|
||||
2. **Use policy processors for model requirements**: Normalization, batching, device placement, and tokenization belong in policy processors.
|
||||
|
||||
3. **Expose all data from environments**: Let processors decide what to use rather than hardcoding choices in the environment.
|
||||
|
||||
4. **Document conventions**: Clearly document any coordinate system conventions, camera orientations, or data formats that your processor handles.
|
||||
|
||||
5. **Test independently**: Environment processors should be testable without loading full policies or environments.
|
||||
|
||||
## Summary
|
||||
|
||||
Environment processors provide a **clean separation** between environment-specific data transformations and policy-specific model requirements. This architecture:
|
||||
|
||||
- ✅ Enables easy experimentation with different state representations
|
||||
- ✅ Allows policies to work seamlessly across different environments
|
||||
- ✅ Keeps environment code focused on simulation/hardware interface
|
||||
- ✅ Makes processor pipelines more maintainable and debuggable
|
||||
- ✅ Follows the single responsibility principle
|
||||
|
||||
The key insight: **Environments define data formats, processors standardize them, policies consume standardized data.** Each layer has a clear, focused responsibility.
|
||||
29
examples/behavior_1k/aggregate.slurm
Normal file
29
examples/behavior_1k/aggregate.slurm
Normal file
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -J b1k-aggregate
|
||||
#SBATCH -p hopper-cpu
|
||||
#SBATCH --qos=high
|
||||
#SBATCH -c 2
|
||||
#SBATCH -t 20:00:00
|
||||
#SBATCH --mem=4G
|
||||
#SBATCH -D /admin/home/francesco_capuano/lerobot
|
||||
#SBATCH -o /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%j.out
|
||||
#SBATCH -e /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%j.err
|
||||
|
||||
set -euo pipefail
|
||||
set -x
|
||||
export PYTHONUNBUFFERED=1
|
||||
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1}
|
||||
|
||||
source "$HOME/.bashrc" 2>/dev/null || true
|
||||
if ! command -v conda >/dev/null 2>&1; then
|
||||
source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||
source "$HOME/anaconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||
fi
|
||||
conda activate lerobot
|
||||
|
||||
python examples/behavior_1k/aggregate_tasks_datasets.py \
|
||||
--task-datasets-dir /fsx/francesco_capuano/behavior1k-v3 \
|
||||
--aggregated-root /fsx/francesco_capuano/behavior1k-v3/behavior1k \
|
||||
--num-tasks 50 \
|
||||
--hf-user fracapuano \
|
||||
--push-to-hub
|
||||
100
examples/behavior_1k/aggregate_tasks_datasets.py
Normal file
100
examples/behavior_1k/aggregate_tasks_datasets.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""Aggregate multiple task-specific LeRobot datasets into a single combined dataset."""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from lerobot.datasets.aggregate import aggregate_datasets
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Aggregate multiple task-specific datasets into a single LeRobot dataset"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--task-datasets-dir",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Directory containing individual task datasets (e.g., /path/to/behavior1k/)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--aggregated-root",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path where the aggregated dataset will be written",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num-tasks",
|
||||
type=int,
|
||||
default=50,
|
||||
help="Number of tasks to aggregate (default: 50)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--task-start-idx",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Starting task index (default: 0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hf-user",
|
||||
type=str,
|
||||
default=None,
|
||||
help="HuggingFace username for repo IDs (defaults to HF_USER env var or 'lerobot')",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--aggregated-repo-id",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Repository ID for the aggregated dataset (defaults to {hf_user}/behavior1k)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--push-to-hub",
|
||||
action="store_true",
|
||||
help="Push the aggregated dataset to the Hugging Face Hub",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine HF user
|
||||
hf_user = args.hf_user or os.environ.get("HF_USER", "lerobot")
|
||||
|
||||
# Set default aggregated repo ID if not provided
|
||||
aggregated_repo_id = args.aggregated_repo_id or f"{hf_user}/behavior1k"
|
||||
|
||||
# Generate task indices
|
||||
task_indices = range(args.task_start_idx, args.task_start_idx + args.num_tasks)
|
||||
|
||||
# Generate repo IDs for individual tasks
|
||||
repo_ids = [f"{hf_user}/behavior1k-task{i:04d}" for i in task_indices]
|
||||
|
||||
# Generate local paths for individual task datasets
|
||||
task_datasets_dir = Path(args.task_datasets_dir)
|
||||
roots = [task_datasets_dir / f"behavior1k-task{i:04d}" for i in task_indices]
|
||||
|
||||
# Aggregated dataset path
|
||||
aggregated_root = Path(args.aggregated_root)
|
||||
|
||||
print(f"🔹 Aggregating {args.num_tasks} task datasets")
|
||||
print(f"Task datasets directory: {task_datasets_dir}")
|
||||
print(f"Aggregated output: {aggregated_root}")
|
||||
print(f"Aggregated repo ID: {aggregated_repo_id}")
|
||||
|
||||
aggregate_datasets(
|
||||
repo_ids=repo_ids,
|
||||
roots=roots,
|
||||
aggr_repo_id=aggregated_repo_id,
|
||||
aggr_root=aggregated_root,
|
||||
)
|
||||
|
||||
print("✅ Aggregation complete")
|
||||
|
||||
if args.push_to_hub:
|
||||
print(f"📤 Pushing aggregated dataset to {aggregated_repo_id}")
|
||||
ds = LeRobotDataset(repo_id=aggregated_repo_id, root=aggregated_root)
|
||||
ds.push_to_hub()
|
||||
print("✅ Successfully pushed to hub")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
38
examples/behavior_1k/convert.slurm
Normal file
38
examples/behavior_1k/convert.slurm
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -J b1k-convert
|
||||
#SBATCH -p hopper-cpu # pick your partition
|
||||
#SBATCH --qos=high
|
||||
#SBATCH --array=0-49%8 # 50 tasks, max 8 running concurrently (conversion is I/O bound)
|
||||
#SBATCH -c 1 # CPUs per conversion (tune as needed)
|
||||
#SBATCH -t 2:00:00 # Time per conversion
|
||||
#SBATCH --mem=3G # ~1.75GB for task 0, ~doubled for safety
|
||||
#SBATCH -D /admin/home/francesco_capuano/lerobot
|
||||
#SBATCH -o /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.out
|
||||
#SBATCH -e /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.err
|
||||
|
||||
set -euo pipefail
|
||||
set -x
|
||||
export PYTHONUNBUFFERED=1
|
||||
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1} # avoid BLAS oversubscription
|
||||
|
||||
DATA_PATH="/fsx/francesco_capuano/behavior1k-2025-v21"
|
||||
BASE_OUT="/fsx/francesco_capuano/behavior1k-v3"
|
||||
mkdir -p "$BASE_OUT" logs
|
||||
|
||||
i="${SLURM_ARRAY_TASK_ID}"
|
||||
OUT_DIR="$(printf "%s/behavior1k-task%04d" "$BASE_OUT" "$i")"
|
||||
|
||||
# activate your env if needed
|
||||
source "$HOME/.bashrc" 2>/dev/null || true
|
||||
if ! command -v conda >/dev/null 2>&1; then
|
||||
source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||
source "$HOME/anaconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||
fi
|
||||
conda activate lerobot
|
||||
|
||||
python examples/behavior_1k/convert_to_lerobot_v3.py \
|
||||
--data-path "$DATA_PATH" \
|
||||
--new-repo "$OUT_DIR" \
|
||||
--task-id "$i" \
|
||||
--force-conversion \
|
||||
--push-to-hub
|
||||
667
examples/behavior_1k/convert_to_lerobot_v3.py
Executable file
667
examples/behavior_1k/convert_to_lerobot_v3.py
Executable file
@@ -0,0 +1,667 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Convert Behavior Dataset to LeRobotDataset v3.0 format"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import jsonlines
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import tqdm
|
||||
from datasets import Dataset, Features, Image
|
||||
|
||||
from lerobot.datasets.compute_stats import aggregate_stats
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.utils import (
|
||||
DEFAULT_CHUNK_SIZE,
|
||||
DEFAULT_DATA_FILE_SIZE_IN_MB,
|
||||
DEFAULT_DATA_PATH,
|
||||
DEFAULT_FEATURES,
|
||||
DEFAULT_VIDEO_FILE_SIZE_IN_MB,
|
||||
DEFAULT_VIDEO_PATH,
|
||||
LEGACY_EPISODES_PATH,
|
||||
LEGACY_EPISODES_STATS_PATH,
|
||||
LEGACY_TASKS_PATH,
|
||||
cast_stats_to_numpy,
|
||||
flatten_dict,
|
||||
get_file_size_in_mb,
|
||||
get_parquet_file_size_in_mb,
|
||||
get_parquet_num_frames,
|
||||
load_info,
|
||||
update_chunk_file_indices,
|
||||
write_episodes,
|
||||
write_info,
|
||||
write_stats,
|
||||
write_tasks,
|
||||
)
|
||||
from lerobot.datasets.video_utils import concatenate_video_files, get_video_duration_in_s
|
||||
from lerobot.utils.utils import init_logging
|
||||
|
||||
# script to convert one single task to v3.1
|
||||
# TASK = 1
|
||||
NEW_ROOT = Path("/fsx/jade_choghari/tmp/bb")
|
||||
|
||||
|
||||
def fix_episode_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Performs several fixes to an underlying dataframe to make it LeRobotDataset-v3 compatible"""
|
||||
# Inject per-episode frame_index if missing (0..N-1 within each episode)
|
||||
if "frame_index" not in df.columns:
|
||||
df["frame_index"] = range(len(df))
|
||||
|
||||
# Remove variable-length task_info feature (NOTE(fracapuano): change to padding at some point?)
|
||||
if "observation.task_info" in df.columns:
|
||||
df = df.drop(columns=["observation.task_info"])
|
||||
|
||||
# NOTE(fracapuano): tasks are ordered (and there is one task per file/dataset)
|
||||
if "task_index" in df.columns:
|
||||
df["task_index"] = 0
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def get_total_episodes_task(local_dir: Path, task_id: int, task_ranges: dict, step) -> int:
|
||||
"""
|
||||
Calculates the total number of episodes for a single, specified task.
|
||||
"""
|
||||
# Simply load the episodes for the task and count them.
|
||||
episodes = legacy_load_episodes_task(
|
||||
local_dir=local_dir, task_id=task_id, task_ranges=task_ranges, step=step
|
||||
)
|
||||
return len(episodes)
|
||||
|
||||
|
||||
NUM_CAMERAS = 9
|
||||
|
||||
|
||||
def get_total_frames_task(local_dir, meta_path, task_id: int, task_ranges: dict, step: int) -> int:
|
||||
episodes_metadata = legacy_load_episodes_task(
|
||||
local_dir=local_dir, task_id=task_id, task_ranges=task_ranges, step=step
|
||||
)
|
||||
total_frames = 0
|
||||
# like 'duration'
|
||||
for ep in episodes_metadata.values():
|
||||
duration_s = ep["length"]
|
||||
total_frames += int(duration_s)
|
||||
return total_frames
|
||||
|
||||
|
||||
def convert_info(
|
||||
root, new_root, data_file_size_in_mb, video_file_size_in_mb, meta_path, task_id: int, task_ranges, step
|
||||
):
|
||||
info = load_info(root)
|
||||
features = {**info["features"], **DEFAULT_FEATURES}
|
||||
del features[
|
||||
"observation.task_info"
|
||||
] # variable-length task_info is not supported in LeRobotDataset v3.0!
|
||||
|
||||
info["codebase_version"] = "v3.0"
|
||||
info["features"] = features
|
||||
del info["total_videos"]
|
||||
info["data_files_size_in_mb"] = data_file_size_in_mb
|
||||
info["video_files_size_in_mb"] = video_file_size_in_mb
|
||||
info["data_path"] = DEFAULT_DATA_PATH
|
||||
info["video_path"] = DEFAULT_VIDEO_PATH if info["video_path"] is not None else None
|
||||
info["fps"] = int(info["fps"])
|
||||
for key in info["features"]:
|
||||
if info["features"][key]["dtype"] == "video":
|
||||
# already has fps in video_info
|
||||
continue
|
||||
info["features"][key]["fps"] = info["fps"]
|
||||
|
||||
info["total_episodes"] = get_total_episodes_task(root, task_id, task_ranges, step)
|
||||
info["total_videos"] = info["total_episodes"] * NUM_CAMERAS
|
||||
info["total_frames"] = get_total_frames_task(root, meta_path, task_id, task_ranges, step)
|
||||
info["total_tasks"] = 1
|
||||
write_info(info, new_root)
|
||||
|
||||
|
||||
def load_jsonlines(fpath: Path) -> list[any]:
|
||||
with jsonlines.open(fpath, "r") as reader:
|
||||
return list(reader)
|
||||
|
||||
|
||||
def legacy_load_tasks(local_dir: Path) -> tuple[dict, dict]:
|
||||
tasks = load_jsonlines(local_dir / LEGACY_TASKS_PATH)
|
||||
# return tasks dict such that
|
||||
tasks = {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])}
|
||||
task_to_task_index = {task: task_index for task_index, task in tasks.items()}
|
||||
return tasks, task_to_task_index
|
||||
|
||||
|
||||
def convert_tasks(root, new_root, task_id: int):
|
||||
tasks, _ = legacy_load_tasks(root)
|
||||
if task_id not in tasks:
|
||||
raise ValueError(f"Task ID {task_id} not found in tasks (available: {list(tasks.keys())})")
|
||||
tasks = {task_id: tasks[task_id]}
|
||||
# Tasks are ordered with 0..ntasks-1 in the converted dataset
|
||||
task_indices = range(len(tasks.keys()))
|
||||
task_strings = tasks.values()
|
||||
df_tasks = pd.DataFrame({"task_index": task_indices}, index=task_strings)
|
||||
write_tasks(df_tasks, new_root)
|
||||
|
||||
|
||||
def concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys):
|
||||
# TODO(rcadene): to save RAM use Dataset.from_parquet(file) and concatenate_datasets
|
||||
dataframes = []
|
||||
for file in paths_to_cat:
|
||||
df = pd.read_parquet(file)
|
||||
df = fix_episode_dataframe(df)
|
||||
dataframes.append(df)
|
||||
|
||||
# Concatenate all DataFrames along rows
|
||||
concatenated_df = pd.concat(dataframes, ignore_index=True)
|
||||
|
||||
path = new_root / DEFAULT_DATA_PATH.format(chunk_index=chunk_idx, file_index=file_idx)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if len(image_keys) > 0:
|
||||
schema = pa.Schema.from_pandas(concatenated_df)
|
||||
features = Features.from_arrow_schema(schema)
|
||||
for key in image_keys:
|
||||
features[key] = Image()
|
||||
schema = features.arrow_schema
|
||||
else:
|
||||
schema = None
|
||||
|
||||
concatenated_df.to_parquet(path, index=False, schema=schema)
|
||||
|
||||
|
||||
def get_image_keys(root):
|
||||
info = load_info(root)
|
||||
features = info["features"]
|
||||
image_keys = [key for key, ft in features.items() if ft["dtype"] == "image"]
|
||||
return image_keys
|
||||
|
||||
|
||||
def convert_data(root: Path, new_root: Path, data_file_size_in_mb: int, task_index: int):
|
||||
task_dir_name = f"task-{task_index:04d}"
|
||||
data_dir = root / "data" / task_dir_name
|
||||
ep_paths = sorted(data_dir.glob("*.parquet"))
|
||||
image_keys = get_image_keys(root)
|
||||
|
||||
ep_idx = 0
|
||||
chunk_idx = 0
|
||||
file_idx = 0
|
||||
size_in_mb = 0
|
||||
num_frames = 0
|
||||
paths_to_cat = []
|
||||
episodes_metadata = []
|
||||
|
||||
logging.info(f"Converting data files from {len(ep_paths)} episodes")
|
||||
|
||||
for ep_path in tqdm.tqdm(ep_paths, desc="convert data files"):
|
||||
ep_size_in_mb = get_parquet_file_size_in_mb(ep_path)
|
||||
ep_num_frames = get_parquet_num_frames(ep_path)
|
||||
ep_metadata = {
|
||||
"episode_index": ep_idx,
|
||||
"data/chunk_index": chunk_idx,
|
||||
"data/file_index": file_idx,
|
||||
"dataset_from_index": num_frames,
|
||||
"dataset_to_index": num_frames + ep_num_frames,
|
||||
}
|
||||
size_in_mb += ep_size_in_mb
|
||||
num_frames += ep_num_frames
|
||||
episodes_metadata.append(ep_metadata)
|
||||
|
||||
# write 0-based episode index instead of custom episode index (otherwise breaks compatibility with LeRobotDataset)
|
||||
tmp_df = pd.read_parquet(ep_path)
|
||||
tmp_df["episode_index"] = ep_idx
|
||||
tmp_df.to_parquet(ep_path)
|
||||
|
||||
ep_idx += 1
|
||||
|
||||
if size_in_mb < data_file_size_in_mb:
|
||||
paths_to_cat.append(ep_path)
|
||||
continue
|
||||
|
||||
if paths_to_cat:
|
||||
concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys)
|
||||
|
||||
# Reset for the next file
|
||||
size_in_mb = ep_size_in_mb
|
||||
paths_to_cat = [ep_path]
|
||||
|
||||
chunk_idx, file_idx = update_chunk_file_indices(chunk_idx, file_idx, DEFAULT_CHUNK_SIZE)
|
||||
|
||||
# Write remaining data if any
|
||||
if paths_to_cat:
|
||||
concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys)
|
||||
|
||||
return episodes_metadata
|
||||
|
||||
|
||||
def convert_videos_of_camera(
|
||||
root: Path, new_root: Path, video_key: str, video_file_size_in_mb: int, task_index: int
|
||||
):
|
||||
# Access old paths to mp4
|
||||
# videos_dir = root / "videos"
|
||||
# ep_paths = sorted(videos_dir.glob(f"*/{video_key}/*.mp4"))
|
||||
task_dir_name = f"task-{task_index:04d}"
|
||||
videos_dir = root / "videos" / task_dir_name / video_key
|
||||
ep_paths = sorted(videos_dir.glob("*.mp4"))
|
||||
ep_idx = 0
|
||||
chunk_idx = 0
|
||||
file_idx = 0
|
||||
size_in_mb = 0
|
||||
duration_in_s = 0.0
|
||||
paths_to_cat = []
|
||||
episodes_metadata = []
|
||||
|
||||
for ep_path in tqdm.tqdm(ep_paths, desc=f"convert videos of {video_key}"):
|
||||
ep_size_in_mb = get_file_size_in_mb(ep_path)
|
||||
ep_duration_in_s = get_video_duration_in_s(ep_path)
|
||||
|
||||
# Check if adding this episode would exceed the limit
|
||||
if size_in_mb + ep_size_in_mb >= video_file_size_in_mb and len(paths_to_cat) > 0:
|
||||
# Size limit would be exceeded, save current accumulation WITHOUT this episode
|
||||
concatenate_video_files(
|
||||
paths_to_cat,
|
||||
new_root
|
||||
/ DEFAULT_VIDEO_PATH.format(video_key=video_key, chunk_index=chunk_idx, file_index=file_idx),
|
||||
)
|
||||
|
||||
# Update episodes metadata for the file we just saved
|
||||
for i, _ in enumerate(paths_to_cat):
|
||||
past_ep_idx = ep_idx - len(paths_to_cat) + i
|
||||
episodes_metadata[past_ep_idx][f"videos/{video_key}/chunk_index"] = chunk_idx
|
||||
episodes_metadata[past_ep_idx][f"videos/{video_key}/file_index"] = file_idx
|
||||
|
||||
# Move to next file and start fresh with current episode
|
||||
chunk_idx, file_idx = update_chunk_file_indices(chunk_idx, file_idx, DEFAULT_CHUNK_SIZE)
|
||||
size_in_mb = 0
|
||||
duration_in_s = 0.0
|
||||
paths_to_cat = []
|
||||
|
||||
# Add current episode metadata
|
||||
ep_metadata = {
|
||||
"episode_index": ep_idx,
|
||||
f"videos/{video_key}/chunk_index": chunk_idx, # Will be updated when file is saved
|
||||
f"videos/{video_key}/file_index": file_idx, # Will be updated when file is saved
|
||||
f"videos/{video_key}/from_timestamp": duration_in_s,
|
||||
f"videos/{video_key}/to_timestamp": duration_in_s + ep_duration_in_s,
|
||||
}
|
||||
episodes_metadata.append(ep_metadata)
|
||||
|
||||
# Add current episode to accumulation
|
||||
paths_to_cat.append(ep_path)
|
||||
size_in_mb += ep_size_in_mb
|
||||
duration_in_s += ep_duration_in_s
|
||||
ep_idx += 1
|
||||
|
||||
# Write remaining videos if any
|
||||
if paths_to_cat:
|
||||
concatenate_video_files(
|
||||
paths_to_cat,
|
||||
new_root
|
||||
/ DEFAULT_VIDEO_PATH.format(video_key=video_key, chunk_index=chunk_idx, file_index=file_idx),
|
||||
)
|
||||
|
||||
# Update episodes metadata for the final file
|
||||
for i, _ in enumerate(paths_to_cat):
|
||||
past_ep_idx = ep_idx - len(paths_to_cat) + i
|
||||
episodes_metadata[past_ep_idx][f"videos/{video_key}/chunk_index"] = chunk_idx
|
||||
episodes_metadata[past_ep_idx][f"videos/{video_key}/file_index"] = file_idx
|
||||
|
||||
return episodes_metadata
|
||||
|
||||
|
||||
def get_video_keys(root):
|
||||
info = load_info(root)
|
||||
features = info["features"]
|
||||
video_keys = [key for key, ft in features.items() if ft["dtype"] == "video"]
|
||||
return video_keys
|
||||
|
||||
|
||||
def convert_videos(root: Path, new_root: Path, video_file_size_in_mb: int, task_id: int):
|
||||
logging.info(f"Converting videos from {root} to {new_root}")
|
||||
|
||||
video_keys = get_video_keys(root)
|
||||
if len(video_keys) == 0:
|
||||
return None
|
||||
|
||||
video_keys = sorted(video_keys)
|
||||
|
||||
eps_metadata_per_cam = []
|
||||
for camera in video_keys:
|
||||
eps_metadata = convert_videos_of_camera(root, new_root, camera, video_file_size_in_mb, task_id)
|
||||
eps_metadata_per_cam.append(eps_metadata)
|
||||
|
||||
num_eps_per_cam = [len(eps_cam_map) for eps_cam_map in eps_metadata_per_cam]
|
||||
if len(set(num_eps_per_cam)) != 1:
|
||||
raise ValueError(f"All cams dont have same number of episodes ({num_eps_per_cam}).")
|
||||
|
||||
episodes_metadata = []
|
||||
num_cameras = len(video_keys)
|
||||
num_episodes = num_eps_per_cam[0]
|
||||
for ep_idx in tqdm.tqdm(range(num_episodes), desc="convert videos"):
|
||||
# Sanity check
|
||||
ep_ids = [eps_metadata_per_cam[cam_idx][ep_idx]["episode_index"] for cam_idx in range(num_cameras)]
|
||||
ep_ids += [ep_idx]
|
||||
if len(set(ep_ids)) != 1:
|
||||
raise ValueError(f"All episode indices need to match ({ep_ids}).")
|
||||
|
||||
ep_dict = {}
|
||||
for cam_idx in range(num_cameras):
|
||||
ep_dict.update(eps_metadata_per_cam[cam_idx][ep_idx])
|
||||
episodes_metadata.append(ep_dict)
|
||||
|
||||
return episodes_metadata
|
||||
|
||||
|
||||
def infer_task_episode_ranges(episodes_jsonl_path: Path) -> dict:
|
||||
"""
|
||||
Parse the Behavior-1K episodes.jsonl metadata and infer contiguous episode ranges per unique task.
|
||||
Returns a dict:
|
||||
{ task_id: { "task_string": ..., "ep_start": ..., "ep_end": ... } }
|
||||
"""
|
||||
task_ranges = {}
|
||||
task_id = 0
|
||||
current_task_str = None
|
||||
ep_start = None
|
||||
ep_end = None
|
||||
|
||||
with open(episodes_jsonl_path) as f:
|
||||
for line in f:
|
||||
if not line.strip():
|
||||
continue
|
||||
ep = json.loads(line)
|
||||
ep_idx = ep["episode_index"]
|
||||
task_str = ep["tasks"][0] if ep["tasks"] else "UNKNOWN"
|
||||
|
||||
if current_task_str is None:
|
||||
current_task_str = task_str
|
||||
ep_start = ep_idx
|
||||
ep_end = ep_idx
|
||||
elif task_str == current_task_str:
|
||||
ep_end = ep_idx
|
||||
else:
|
||||
# close previous task group
|
||||
task_ranges[task_id] = {
|
||||
"task_string": current_task_str,
|
||||
"ep_start": ep_start,
|
||||
"ep_end": ep_end,
|
||||
}
|
||||
task_id += 1
|
||||
# start new one
|
||||
current_task_str = task_str
|
||||
ep_start = ep_idx
|
||||
ep_end = ep_idx
|
||||
|
||||
# store last task
|
||||
if current_task_str is not None:
|
||||
task_ranges[task_id] = {
|
||||
"task_string": current_task_str,
|
||||
"ep_start": ep_start,
|
||||
"ep_end": ep_end,
|
||||
}
|
||||
|
||||
return task_ranges
|
||||
|
||||
|
||||
def legacy_load_episodes_task(local_dir: Path, task_id: int, task_ranges: dict, step: int = 10) -> dict:
|
||||
"""
|
||||
Load only the episodes belonging to a specific task, inferred automatically from episode ranges.
|
||||
|
||||
Args:
|
||||
local_dir (Path): Root path containing legacy meta/episodes.jsonl
|
||||
task_id (int): Which task to load (key from the inferred task_ranges dict)
|
||||
task_ranges (dict): Mapping from infer_task_episode_ranges()
|
||||
step (int): Episode index step (Behavior-1K = 10)
|
||||
"""
|
||||
all_episodes = legacy_load_episodes(local_dir)
|
||||
|
||||
# get the range for this task
|
||||
if task_id not in task_ranges:
|
||||
raise ValueError(f"Task id {task_id} not found in task_ranges")
|
||||
|
||||
ep_start = task_ranges[task_id]["ep_start"]
|
||||
ep_end = task_ranges[task_id]["ep_end"]
|
||||
|
||||
task_episode_indices = range(ep_start, ep_end + step, step)
|
||||
return {i: all_episodes[i] for i in task_episode_indices if i in all_episodes}
|
||||
|
||||
|
||||
def legacy_load_episodes(local_dir: Path) -> dict:
|
||||
episodes = load_jsonlines(local_dir / LEGACY_EPISODES_PATH)
|
||||
return {item["episode_index"]: item for item in sorted(episodes, key=lambda x: x["episode_index"])}
|
||||
|
||||
|
||||
def legacy_load_episodes_stats(local_dir: Path) -> dict:
|
||||
episodes_stats = load_jsonlines(local_dir / LEGACY_EPISODES_STATS_PATH)
|
||||
return {
|
||||
item["episode_index"]: cast_stats_to_numpy(item["stats"])
|
||||
for item in sorted(episodes_stats, key=lambda x: x["episode_index"])
|
||||
}
|
||||
|
||||
|
||||
def legacy_load_episodes_stats_task(local_dir: Path, task_id: int, task_ranges: dict, step: int = 10) -> dict:
|
||||
all_stats = legacy_load_episodes_stats(local_dir)
|
||||
|
||||
if task_id not in task_ranges:
|
||||
raise ValueError(f"Task id {task_id} not found in task_ranges")
|
||||
|
||||
ep_start = task_ranges[task_id]["ep_start"]
|
||||
ep_end = task_ranges[task_id]["ep_end"]
|
||||
|
||||
task_episode_indices = range(ep_start, ep_end + step, step)
|
||||
return {i: all_stats[i] for i in task_episode_indices if i in all_stats}
|
||||
|
||||
|
||||
def generate_episode_metadata_dict(
|
||||
episodes_legacy_metadata, episodes_metadata, episodes_stats, episodes_videos=None
|
||||
):
|
||||
num_episodes = len(episodes_metadata)
|
||||
episodes_legacy_metadata_vals = list(episodes_legacy_metadata.values())
|
||||
episodes_stats_vals = list(episodes_stats.values())
|
||||
episodes_stats_keys = list(episodes_stats.keys())
|
||||
|
||||
for i in range(num_episodes):
|
||||
ep_legacy_metadata = episodes_legacy_metadata_vals[i]
|
||||
ep_metadata = episodes_metadata[i]
|
||||
ep_stats = episodes_stats_vals[i]
|
||||
|
||||
ep_ids_set = {
|
||||
ep_legacy_metadata["episode_index"],
|
||||
ep_metadata["episode_index"],
|
||||
episodes_stats_keys[i],
|
||||
}
|
||||
|
||||
if episodes_videos is None:
|
||||
ep_video = {}
|
||||
else:
|
||||
ep_video = episodes_videos[i]
|
||||
ep_ids_set.add(ep_video["episode_index"])
|
||||
|
||||
ep_dict = {
|
||||
**ep_legacy_metadata,
|
||||
**ep_video,
|
||||
**ep_metadata,
|
||||
**flatten_dict({"stats": ep_stats}),
|
||||
}
|
||||
|
||||
# enforce contiguous indexing 0..n-1, but also stores the legacy episode index
|
||||
ep_dict["episode_index"] = i
|
||||
|
||||
yield ep_dict
|
||||
|
||||
|
||||
def convert_episodes_metadata(
|
||||
root, new_root, episodes_metadata, task_id: int, task_ranges, episodes_video_metadata=None
|
||||
):
|
||||
logging.info(f"Converting episodes metadata from {root} to {new_root}")
|
||||
|
||||
# filter by task
|
||||
episodes_legacy_metadata = legacy_load_episodes_task(root, task_id=task_id, task_ranges=task_ranges)
|
||||
episodes_stats = legacy_load_episodes_stats_task(root, task_id=task_id, task_ranges=task_ranges)
|
||||
|
||||
num_eps_set = {len(episodes_legacy_metadata), len(episodes_metadata)}
|
||||
if episodes_video_metadata is not None:
|
||||
num_eps_set.add(len(episodes_video_metadata))
|
||||
|
||||
if len(num_eps_set) != 1:
|
||||
raise ValueError(f"Number of episodes is not the same ({num_eps_set}).")
|
||||
|
||||
# Single file approach: set meta indices to 0 for all rows and write once
|
||||
ds_episodes = Dataset.from_generator(
|
||||
lambda: generate_episode_metadata_dict(
|
||||
episodes_legacy_metadata, episodes_metadata, episodes_stats, episodes_video_metadata
|
||||
)
|
||||
)
|
||||
num_eps = len(ds_episodes)
|
||||
# NOTE(fracapuano): for the size of the average dataset this is fine!
|
||||
ds_episodes = ds_episodes.add_column("meta/episodes/chunk_index", [0] * num_eps)
|
||||
ds_episodes = ds_episodes.add_column("meta/episodes/file_index", [0] * num_eps)
|
||||
write_episodes(ds_episodes, new_root)
|
||||
|
||||
stats = aggregate_stats(list(episodes_stats.values()))
|
||||
write_stats(stats, new_root)
|
||||
|
||||
|
||||
def convert_dataset_local(
|
||||
data_path: Path,
|
||||
new_repo: Path,
|
||||
task_id: int,
|
||||
data_file_size_in_mb: int = DEFAULT_DATA_FILE_SIZE_IN_MB,
|
||||
video_file_size_in_mb: int = DEFAULT_VIDEO_FILE_SIZE_IN_MB,
|
||||
force_conversion: bool = False,
|
||||
):
|
||||
"""
|
||||
Convert a local dataset to v3.x format, task-by-task, without using the Hugging Face Hub.
|
||||
|
||||
Args:
|
||||
data_path (Path): path to local dataset root (e.g. /fsx/.../2025-challenge-demos)
|
||||
new_repo (Path): path where converted dataset will be written (e.g. /fsx/.../behavior1k_v3)
|
||||
task_id (int): which task to convert (index)
|
||||
data_file_size_in_mb (int): max size per data chunk
|
||||
video_file_size_in_mb (int): max size per video chunk
|
||||
force_conversion (bool): overwrite existing conversion if True
|
||||
"""
|
||||
|
||||
root = Path(data_path)
|
||||
new_root = Path(new_repo)
|
||||
|
||||
# Clean up if needed
|
||||
if new_root.exists() and force_conversion:
|
||||
shutil.rmtree(new_root)
|
||||
new_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"🔹 Starting conversion for task {task_id}")
|
||||
print(f"Input root: {root}")
|
||||
print(f"Output root: {new_root}")
|
||||
# Infer task episode ranges
|
||||
episodes_meta_path = root / "meta" / "episodes.jsonl"
|
||||
task_ranges = infer_task_episode_ranges(episodes_meta_path)
|
||||
convert_info(
|
||||
root,
|
||||
new_root,
|
||||
data_file_size_in_mb,
|
||||
video_file_size_in_mb,
|
||||
episodes_meta_path,
|
||||
task_id,
|
||||
task_ranges,
|
||||
step=10,
|
||||
)
|
||||
convert_tasks(root, new_root, task_id)
|
||||
episodes_metadata = convert_data(root, new_root, data_file_size_in_mb, task_index=task_id)
|
||||
episodes_videos_metadata = convert_videos(root, new_root, video_file_size_in_mb, task_id=task_id)
|
||||
convert_episodes_metadata(
|
||||
root,
|
||||
new_root,
|
||||
episodes_metadata,
|
||||
task_id=task_id,
|
||||
task_ranges=task_ranges,
|
||||
episodes_video_metadata=episodes_videos_metadata,
|
||||
)
|
||||
|
||||
print(f"✅ Conversion complete for task {task_id}")
|
||||
print(f"Converted dataset written to: {new_root}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
init_logging()
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Convert Behavior-1K tasks to LeRobot v3 format (local only)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--data-path",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to the local Behavior-1K dataset (e.g. /fsx/francesco_capuano/.cache/behavior-1k/2025-challenge-demos)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--new-repo",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to the output directory for the converted dataset",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--task-id",
|
||||
type=int,
|
||||
required=True,
|
||||
help="Task index to convert (e.g. 0, 1, 2, ...)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--data-file-size-in-mb",
|
||||
type=int,
|
||||
default=DEFAULT_DATA_FILE_SIZE_IN_MB,
|
||||
help=f"Maximum size per data chunk (default: {DEFAULT_DATA_FILE_SIZE_IN_MB})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--video-file-size-in-mb",
|
||||
type=int,
|
||||
default=DEFAULT_VIDEO_FILE_SIZE_IN_MB,
|
||||
help=f"Maximum size per video chunk (default: {DEFAULT_VIDEO_FILE_SIZE_IN_MB})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force-conversion",
|
||||
action="store_true",
|
||||
help="Force overwrite of existing conversion output if present.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--push-to-hub",
|
||||
action="store_true",
|
||||
help="Push the (converted) dataset to the hub.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.push_to_hub:
|
||||
HF_USER = os.environ.get("HF_USER", "fracapuano")
|
||||
if HF_USER is None:
|
||||
raise ValueError(
|
||||
"HF_USER environment variable is not set! Set before converting and pushing to hub."
|
||||
)
|
||||
|
||||
convert_dataset_local(
|
||||
data_path=Path(args.data_path),
|
||||
new_repo=Path(args.new_repo),
|
||||
task_id=args.task_id,
|
||||
data_file_size_in_mb=args.data_file_size_in_mb,
|
||||
video_file_size_in_mb=args.video_file_size_in_mb,
|
||||
force_conversion=args.force_conversion,
|
||||
)
|
||||
|
||||
if args.push_to_hub:
|
||||
ds = LeRobotDataset(repo_id=f"{HF_USER}/behavior1k-task{args.task_id:04d}", root=args.new_repo)
|
||||
ds.push_to_hub()
|
||||
27
examples/behavior_1k/download.slurm
Normal file
27
examples/behavior_1k/download.slurm
Normal file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -J b1k-download
|
||||
#SBATCH -p hopper-cpu
|
||||
#SBATCH --qos=high
|
||||
#SBATCH -c 32 # CPUs per conversion (tune as needed)
|
||||
#SBATCH -t 20:00:00 # Time per conversion
|
||||
#SBATCH -D /admin/home/francesco_capuano/lerobot
|
||||
#SBATCH -o /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A.out
|
||||
#SBATCH -e /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A.err
|
||||
|
||||
set -euo pipefail
|
||||
set -x
|
||||
export PYTHONUNBUFFERED=1
|
||||
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1}
|
||||
|
||||
# activate your env if needed
|
||||
source "$HOME/.bashrc" 2>/dev/null || true
|
||||
if ! command -v conda >/dev/null 2>&1; then
|
||||
source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||
source "$HOME/anaconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||
fi
|
||||
conda activate lerobot
|
||||
|
||||
python examples/behavior_1k/download_data.py \
|
||||
--repo-id "behavior-1k/2025-challenge-demos" \
|
||||
--local-dir "/fsx/francesco_capuano/behavior1k-2025-v21" \
|
||||
--max-workers 32
|
||||
26
examples/behavior_1k/download_data.py
Normal file
26
examples/behavior_1k/download_data.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import shutil
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--repo-id", type=str, required=True)
|
||||
parser.add_argument("--max-workers", type=int, default=8)
|
||||
parser.add_argument("--local-dir", type=str, required=True)
|
||||
parser.add_argument("--force-download", action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.force_download:
|
||||
shutil.rmtree(args.local_dir, ignore_errors=True)
|
||||
|
||||
snapshot_download(
|
||||
repo_id=args.repo_id,
|
||||
repo_type="dataset",
|
||||
force_download=args.force_download,
|
||||
max_workers=args.max_workers,
|
||||
local_dir=args.local_dir,
|
||||
ignore_patterns=["annotations/*"], # NOTE(fracapuano): Dropping textual annotations right now
|
||||
)
|
||||
41
examples/behavior_1k/upload.slurm
Normal file
41
examples/behavior_1k/upload.slurm
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -J b1k-upload
|
||||
#SBATCH -p hopper-cpu
|
||||
#SBATCH --qos=high
|
||||
#SBATCH -c 1
|
||||
#SBATCH -t 48:00:00
|
||||
#SBATCH --mem=4G
|
||||
#SBATCH --array=0-49%2
|
||||
#SBATCH -D /admin/home/francesco_capuano/lerobot
|
||||
#SBATCH -o /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.out
|
||||
#SBATCH -e /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.err
|
||||
|
||||
set -euo pipefail
|
||||
set -x
|
||||
export PYTHONUNBUFFERED=1
|
||||
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1}
|
||||
|
||||
source "$HOME/.bashrc" 2>/dev/null || true
|
||||
if ! command -v conda >/dev/null 2>&1; then
|
||||
source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||
source "$HOME/anaconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||
fi
|
||||
conda activate lerobot
|
||||
|
||||
# The SLURM_ARRAY_TASK_ID will be used as the task-id
|
||||
TASK_ID=${SLURM_ARRAY_TASK_ID}
|
||||
|
||||
# Configuration
|
||||
ROOT_PATH="/fsx/francesco_capuano/behavior1k-v3"
|
||||
HF_USER="fracapuano"
|
||||
# Limit upload workers to reduce network contention (default in HF Hub is 4)
|
||||
# For I/O-bound uploads, 2-4 workers per task is optimal
|
||||
NUM_WORKERS=2
|
||||
|
||||
echo "Task ${TASK_ID}: uploading with ${NUM_WORKERS} workers from ${ROOT_PATH}"
|
||||
|
||||
python examples/behavior_1k/upload_folders.py \
|
||||
--task-id ${TASK_ID} \
|
||||
--root-path ${ROOT_PATH} \
|
||||
--hf-user ${HF_USER} \
|
||||
--num-workers ${NUM_WORKERS}
|
||||
108
examples/behavior_1k/upload_folders.py
Normal file
108
examples/behavior_1k/upload_folders.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from huggingface_hub import HfApi, upload_large_folder
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Upload a folder to Hugging Face Hub using upload_large_folder"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--folder-path",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Path to the folder to upload (used if task-id is not provided)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo-id",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Repository ID on Hugging Face Hub (e.g., 'username/repo-name'). If task-id is provided, will be constructed as '{hf-user}/behavior1k-task{task_id:04d}'",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--task-id",
|
||||
type=int,
|
||||
required=False,
|
||||
help="Task index to upload (e.g., 0, 1, 2, ...). When provided, folder-path is constructed from root-path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--root-path",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Root path containing task folders (e.g., /fsx/user/behavior1k-v3). Used with --task-id to construct folder path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hf-user",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Hugging Face username for constructing repo-id with task-id (default: from HF_USER env var or 'fracapuano')",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--create-repo", action="store_true", help="Create the repository if it doesn't exist"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num-workers",
|
||||
type=int,
|
||||
default=2,
|
||||
help="Number of parallel workers for upload (default: 2). For I/O-bound uploads, use 1-4 to avoid network contention.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Construct folder path and repo ID based on task-id or use provided values
|
||||
if args.task_id is not None:
|
||||
if not args.root_path:
|
||||
raise ValueError("--root-path is required when --task-id is provided")
|
||||
|
||||
task_folder_name = f"behavior1k-task{args.task_id:04d}"
|
||||
folder_path = Path(args.root_path) / task_folder_name
|
||||
repo_id = f"{args.hf_user}/{task_folder_name}"
|
||||
|
||||
print(f"Task mode: uploading task {args.task_id}")
|
||||
else:
|
||||
if not args.folder_path or not args.repo_id:
|
||||
raise ValueError(
|
||||
"Either --task-id with --root-path, or both --folder-path and --repo-id must be provided"
|
||||
)
|
||||
|
||||
folder_path = Path(args.folder_path)
|
||||
repo_id = args.repo_id
|
||||
|
||||
# Validate folder path
|
||||
if not folder_path.exists():
|
||||
raise ValueError(f"Folder path does not exist: {folder_path}")
|
||||
if not folder_path.is_dir():
|
||||
raise ValueError(f"Path is not a directory: {folder_path}")
|
||||
|
||||
print(f"Uploading folder: {folder_path}")
|
||||
print(f"Repository: {repo_id}")
|
||||
|
||||
# Create repository if requested
|
||||
if args.create_repo:
|
||||
api = HfApi()
|
||||
print(f"Creating repository {repo_id}...")
|
||||
try:
|
||||
api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
|
||||
print("Repository created or already exists. Updating its contents")
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not create repository: {e}")
|
||||
|
||||
# Upload the folder
|
||||
print(f"Starting upload with {args.num_workers} parallel workers...")
|
||||
try:
|
||||
result = upload_large_folder(
|
||||
folder_path=str(folder_path),
|
||||
repo_id=repo_id,
|
||||
repo_type="dataset",
|
||||
num_workers=args.num_workers,
|
||||
)
|
||||
print("✓ Upload completed successfully!")
|
||||
print(f"Commit URL: {result}")
|
||||
except Exception as e:
|
||||
print(f"✗ Upload failed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -142,7 +142,7 @@ def _check_matplotlib_available():
|
||||
raise ImportError(
|
||||
"matplotlib is required for RTC debug visualizations. "
|
||||
"Please install it by running:\n"
|
||||
" uv pip install -e '.[matplotlib-dep]'"
|
||||
" uv pip install matplotlib"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -712,6 +712,15 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
self.download(download_videos)
|
||||
self.hf_dataset = self.load_hf_dataset()
|
||||
|
||||
# Create mapping from absolute indices to relative indices when only a subset of the episodes are loaded
|
||||
# Build a mapping: absolute_index -> relative_index_in_filtered_dataset
|
||||
self._absolute_to_relative_idx = None
|
||||
if self.episodes is not None:
|
||||
self._absolute_to_relative_idx = {
|
||||
abs_idx.item() if isinstance(abs_idx, torch.Tensor) else abs_idx: rel_idx
|
||||
for rel_idx, abs_idx in enumerate(self.hf_dataset["index"])
|
||||
}
|
||||
|
||||
# Setup delta_indices
|
||||
if self.delta_timestamps is not None:
|
||||
check_delta_timestamps(self.delta_timestamps, self.fps, self.tolerance_s)
|
||||
@@ -830,7 +839,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
def load_hf_dataset(self) -> datasets.Dataset:
|
||||
"""hf_dataset contains all the observations, states, actions, rewards, etc."""
|
||||
features = get_hf_features_from_features(self.features)
|
||||
hf_dataset = load_nested_dataset(self.root / "data", features=features)
|
||||
hf_dataset = load_nested_dataset(self.root / "data", features=features, episodes=self.episodes)
|
||||
hf_dataset.set_transform(hf_transform_to_torch)
|
||||
return hf_dataset
|
||||
|
||||
@@ -847,10 +856,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
|
||||
# Determine requested episodes
|
||||
if self.episodes is None:
|
||||
# Requesting all episodes - check if we have all episodes from metadata
|
||||
requested_episodes = set(range(self.meta.total_episodes))
|
||||
else:
|
||||
# Requesting specific episodes
|
||||
requested_episodes = set(self.episodes)
|
||||
|
||||
# Check if all requested episodes are available in cached data
|
||||
@@ -932,7 +939,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
query_timestamps = {}
|
||||
for key in self.meta.video_keys:
|
||||
if query_indices is not None and key in query_indices:
|
||||
timestamps = self.hf_dataset[query_indices[key]]["timestamp"]
|
||||
if self._absolute_to_relative_idx is not None:
|
||||
relative_indices = [self._absolute_to_relative_idx[idx] for idx in query_indices[key]]
|
||||
timestamps = self.hf_dataset[relative_indices]["timestamp"]
|
||||
else:
|
||||
timestamps = self.hf_dataset[query_indices[key]]["timestamp"]
|
||||
query_timestamps[key] = torch.stack(timestamps).tolist()
|
||||
else:
|
||||
query_timestamps[key] = [current_ts]
|
||||
@@ -955,10 +966,16 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
for key, q_idx in query_indices.items():
|
||||
if key in self.meta.video_keys:
|
||||
continue
|
||||
# Map absolute indices to relative indices if needed
|
||||
relative_indices = (
|
||||
q_idx
|
||||
if self._absolute_to_relative_idx is None
|
||||
else [self._absolute_to_relative_idx[idx] for idx in q_idx]
|
||||
)
|
||||
try:
|
||||
result[key] = torch.stack(self.hf_dataset[key][q_idx])
|
||||
result[key] = torch.stack(self.hf_dataset[key][relative_indices])
|
||||
except (KeyError, TypeError, IndexError):
|
||||
result[key] = torch.stack(self.hf_dataset[q_idx][key])
|
||||
result[key] = torch.stack(self.hf_dataset[relative_indices][key])
|
||||
return result
|
||||
|
||||
def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]:
|
||||
@@ -1498,6 +1515,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
obj.image_transforms = None
|
||||
obj.delta_timestamps = None
|
||||
obj.delta_indices = None
|
||||
obj._absolute_to_relative_idx = None
|
||||
obj.video_backend = video_backend if video_backend is not None else get_safe_default_codec()
|
||||
obj.writer = None
|
||||
obj.latest_episode = None
|
||||
|
||||
@@ -28,6 +28,7 @@ import numpy as np
|
||||
import packaging.version
|
||||
import pandas
|
||||
import pandas as pd
|
||||
import pyarrow.dataset as pa_ds
|
||||
import pyarrow.parquet as pq
|
||||
import torch
|
||||
from datasets import Dataset
|
||||
@@ -103,7 +104,9 @@ def update_chunk_file_indices(chunk_idx: int, file_idx: int, chunks_size: int) -
|
||||
return chunk_idx, file_idx
|
||||
|
||||
|
||||
def load_nested_dataset(pq_dir: Path, features: datasets.Features | None = None) -> Dataset:
|
||||
def load_nested_dataset(
|
||||
pq_dir: Path, features: datasets.Features | None = None, episodes: list[int] | None = None
|
||||
) -> Dataset:
|
||||
"""Find parquet files in provided directory {pq_dir}/chunk-xxx/file-xxx.parquet
|
||||
Convert parquet files to pyarrow memory mapped in a cache folder for efficient RAM usage
|
||||
Concatenate all pyarrow references to return HF Dataset format
|
||||
@@ -111,15 +114,26 @@ def load_nested_dataset(pq_dir: Path, features: datasets.Features | None = None)
|
||||
Args:
|
||||
pq_dir: Directory containing parquet files
|
||||
features: Optional features schema to ensure consistent loading of complex types like images
|
||||
episodes: Optional list of episode indices to filter. Uses PyArrow predicate pushdown for efficiency.
|
||||
"""
|
||||
paths = sorted(pq_dir.glob("*/*.parquet"))
|
||||
if len(paths) == 0:
|
||||
raise FileNotFoundError(f"Provided directory does not contain any parquet file: {pq_dir}")
|
||||
|
||||
# TODO(rcadene): set num_proc to accelerate conversion to pyarrow
|
||||
with SuppressProgressBars():
|
||||
datasets = Dataset.from_parquet([str(path) for path in paths], features=features)
|
||||
return datasets
|
||||
# When no filtering needed, Dataset uses memory-mapped loading for efficiency
|
||||
# PyArrow loads the entire dataset into memory
|
||||
if episodes is None:
|
||||
return Dataset.from_parquet([str(path) for path in paths], features=features)
|
||||
|
||||
arrow_dataset = pa_ds.dataset(paths, format="parquet")
|
||||
filter_expr = pa_ds.field("episode_index").isin(episodes)
|
||||
table = arrow_dataset.to_table(filter=filter_expr)
|
||||
|
||||
if features is not None:
|
||||
table = table.cast(features.arrow_schema)
|
||||
|
||||
return Dataset(table)
|
||||
|
||||
|
||||
def get_parquet_num_frames(parquet_path: str | Path) -> int:
|
||||
|
||||
@@ -21,7 +21,22 @@ import draccus
|
||||
from lerobot.configs.types import FeatureType, PolicyFeature
|
||||
from lerobot.robots import RobotConfig
|
||||
from lerobot.teleoperators.config import TeleoperatorConfig
|
||||
from lerobot.utils.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
|
||||
from lerobot.utils.constants import (
|
||||
ACTION,
|
||||
LIBERO_KEY_EEF_MAT,
|
||||
LIBERO_KEY_EEF_POS,
|
||||
LIBERO_KEY_EEF_QUAT,
|
||||
LIBERO_KEY_GRIPPER_QPOS,
|
||||
LIBERO_KEY_GRIPPER_QVEL,
|
||||
LIBERO_KEY_JOINTS_POS,
|
||||
LIBERO_KEY_JOINTS_VEL,
|
||||
LIBERO_KEY_PIXELS_AGENTVIEW,
|
||||
LIBERO_KEY_PIXELS_EYE_IN_HAND,
|
||||
OBS_ENV_STATE,
|
||||
OBS_IMAGE,
|
||||
OBS_IMAGES,
|
||||
OBS_STATE,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -246,28 +261,61 @@ class LiberoEnv(EnvConfig):
|
||||
features_map: dict[str, str] = field(
|
||||
default_factory=lambda: {
|
||||
ACTION: ACTION,
|
||||
"agent_pos": OBS_STATE,
|
||||
"pixels/agentview_image": f"{OBS_IMAGES}.image",
|
||||
"pixels/robot0_eye_in_hand_image": f"{OBS_IMAGES}.image2",
|
||||
LIBERO_KEY_EEF_POS: f"{OBS_STATE}.eef_pos",
|
||||
LIBERO_KEY_EEF_QUAT: f"{OBS_STATE}.eef_quat",
|
||||
LIBERO_KEY_EEF_MAT: f"{OBS_STATE}.eef_mat",
|
||||
LIBERO_KEY_GRIPPER_QPOS: f"{OBS_STATE}.gripper_qpos",
|
||||
LIBERO_KEY_GRIPPER_QVEL: f"{OBS_STATE}.gripper_qvel",
|
||||
LIBERO_KEY_JOINTS_POS: f"{OBS_STATE}.joint_pos",
|
||||
LIBERO_KEY_JOINTS_VEL: f"{OBS_STATE}.joint_vel",
|
||||
LIBERO_KEY_PIXELS_AGENTVIEW: f"{OBS_IMAGES}.image",
|
||||
LIBERO_KEY_PIXELS_EYE_IN_HAND: f"{OBS_IMAGES}.image2",
|
||||
}
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
if self.obs_type == "pixels":
|
||||
self.features["pixels/agentview_image"] = PolicyFeature(
|
||||
self.features[LIBERO_KEY_PIXELS_AGENTVIEW] = PolicyFeature(
|
||||
type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
|
||||
)
|
||||
self.features["pixels/robot0_eye_in_hand_image"] = PolicyFeature(
|
||||
self.features[LIBERO_KEY_PIXELS_EYE_IN_HAND] = PolicyFeature(
|
||||
type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
|
||||
)
|
||||
elif self.obs_type == "pixels_agent_pos":
|
||||
self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(8,))
|
||||
self.features["pixels/agentview_image"] = PolicyFeature(
|
||||
self.features[LIBERO_KEY_PIXELS_AGENTVIEW] = PolicyFeature(
|
||||
type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
|
||||
)
|
||||
self.features["pixels/robot0_eye_in_hand_image"] = PolicyFeature(
|
||||
self.features[LIBERO_KEY_PIXELS_EYE_IN_HAND] = PolicyFeature(
|
||||
type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
|
||||
)
|
||||
self.features[LIBERO_KEY_EEF_POS] = PolicyFeature(
|
||||
type=FeatureType.STATE,
|
||||
shape=(3,),
|
||||
)
|
||||
self.features[LIBERO_KEY_EEF_QUAT] = PolicyFeature(
|
||||
type=FeatureType.STATE,
|
||||
shape=(4,),
|
||||
)
|
||||
self.features[LIBERO_KEY_EEF_MAT] = PolicyFeature(
|
||||
type=FeatureType.STATE,
|
||||
shape=(3, 3),
|
||||
)
|
||||
self.features[LIBERO_KEY_GRIPPER_QPOS] = PolicyFeature(
|
||||
type=FeatureType.STATE,
|
||||
shape=(2,),
|
||||
)
|
||||
self.features[LIBERO_KEY_GRIPPER_QVEL] = PolicyFeature(
|
||||
type=FeatureType.STATE,
|
||||
shape=(2,),
|
||||
)
|
||||
self.features[LIBERO_KEY_JOINTS_POS] = PolicyFeature(
|
||||
type=FeatureType.STATE,
|
||||
shape=(7,),
|
||||
)
|
||||
self.features[LIBERO_KEY_JOINTS_VEL] = PolicyFeature(
|
||||
type=FeatureType.STATE,
|
||||
shape=(7,),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported obs_type: {self.obs_type}")
|
||||
|
||||
|
||||
@@ -14,12 +14,16 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import importlib
|
||||
from typing import Any
|
||||
|
||||
import gymnasium as gym
|
||||
from gymnasium.envs.registration import registry as gym_registry
|
||||
|
||||
from lerobot.envs.configs import AlohaEnv, EnvConfig, LiberoEnv, PushtEnv
|
||||
from lerobot.envs.utils import _call_make_env, _download_hub_file, _import_hub_module, _normalize_hub_result
|
||||
from lerobot.processor import ProcessorStep
|
||||
from lerobot.processor.env_processor import LiberoProcessorStep
|
||||
from lerobot.processor.pipeline import PolicyProcessorPipeline
|
||||
|
||||
|
||||
def make_env_config(env_type: str, **kwargs) -> EnvConfig:
|
||||
@@ -33,6 +37,41 @@ def make_env_config(env_type: str, **kwargs) -> EnvConfig:
|
||||
raise ValueError(f"Policy type '{env_type}' is not available.")
|
||||
|
||||
|
||||
def make_env_pre_post_processors(
|
||||
env_cfg: EnvConfig,
|
||||
) -> tuple[
|
||||
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
]:
|
||||
"""
|
||||
Create preprocessor and postprocessor pipelines for environment observations.
|
||||
|
||||
This function creates processor pipelines that transform raw environment
|
||||
observations and actions. By default, it returns identity processors that do nothing.
|
||||
For specific environments like LIBERO, it adds environment-specific processing steps.
|
||||
|
||||
Args:
|
||||
env_cfg: The configuration of the environment.
|
||||
|
||||
Returns:
|
||||
A tuple containing:
|
||||
- preprocessor: Pipeline that processes environment observations
|
||||
- postprocessor: Pipeline that processes environment outputs (currently identity)
|
||||
"""
|
||||
# Preprocessor and Postprocessor steps are Identity for most environments
|
||||
preprocessor_steps: list[ProcessorStep] = []
|
||||
postprocessor_steps: list[ProcessorStep] = []
|
||||
|
||||
# For LIBERO environments, add the LiberoProcessorStep to preprocessor
|
||||
if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
|
||||
preprocessor_steps.append(LiberoProcessorStep())
|
||||
|
||||
preprocessor = PolicyProcessorPipeline(steps=preprocessor_steps)
|
||||
postprocessor = PolicyProcessorPipeline(steps=postprocessor_steps)
|
||||
|
||||
return preprocessor, postprocessor
|
||||
|
||||
|
||||
def make_env(
|
||||
cfg: EnvConfig | str,
|
||||
n_envs: int = 1,
|
||||
|
||||
@@ -28,7 +28,6 @@ import torch
|
||||
from gymnasium import spaces
|
||||
from libero.libero import benchmark, get_libero_path
|
||||
from libero.libero.envs import OffScreenRenderEnv
|
||||
from robosuite.utils.transform_utils import quat2axisangle
|
||||
|
||||
|
||||
def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
|
||||
@@ -175,11 +174,36 @@ class LiberoEnv(gym.Env):
|
||||
self.observation_space = spaces.Dict(
|
||||
{
|
||||
"pixels": spaces.Dict(images),
|
||||
"agent_pos": spaces.Box(
|
||||
low=AGENT_POS_LOW,
|
||||
high=AGENT_POS_HIGH,
|
||||
shape=(OBS_STATE_DIM,),
|
||||
dtype=np.float64,
|
||||
"robot_state": spaces.Dict(
|
||||
{
|
||||
"eef": spaces.Dict(
|
||||
{
|
||||
"pos": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64),
|
||||
"quat": spaces.Box(
|
||||
low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64
|
||||
),
|
||||
"mat": spaces.Box(
|
||||
low=-np.inf, high=np.inf, shape=(3, 3), dtype=np.float64
|
||||
),
|
||||
}
|
||||
),
|
||||
"gripper": spaces.Dict(
|
||||
{
|
||||
"qpos": spaces.Box(
|
||||
low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64
|
||||
),
|
||||
"qvel": spaces.Box(
|
||||
low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64
|
||||
),
|
||||
}
|
||||
),
|
||||
"joints": spaces.Dict(
|
||||
{
|
||||
"pos": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
|
||||
"vel": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
|
||||
}
|
||||
),
|
||||
}
|
||||
),
|
||||
}
|
||||
)
|
||||
@@ -191,6 +215,7 @@ class LiberoEnv(gym.Env):
|
||||
def render(self):
|
||||
raw_obs = self._env.env._get_observations()
|
||||
image = self._format_raw_obs(raw_obs)["pixels"]["image"]
|
||||
image = image[::-1, ::-1] # flip both H and W for visualization
|
||||
return image
|
||||
|
||||
def _make_envs_task(self, task_suite: Any, task_id: int = 0):
|
||||
@@ -212,23 +237,48 @@ class LiberoEnv(gym.Env):
|
||||
images = {}
|
||||
for camera_name in self.camera_name:
|
||||
image = raw_obs[camera_name]
|
||||
image = image[::-1, ::-1] # rotate 180 degrees
|
||||
images[self.camera_name_mapping[camera_name]] = image
|
||||
state = np.concatenate(
|
||||
(
|
||||
raw_obs["robot0_eef_pos"],
|
||||
quat2axisangle(raw_obs["robot0_eef_quat"]),
|
||||
raw_obs["robot0_gripper_qpos"],
|
||||
)
|
||||
)
|
||||
agent_pos = state
|
||||
|
||||
eef_pos = raw_obs.get("robot0_eef_pos")
|
||||
eef_quat = raw_obs.get("robot0_eef_quat")
|
||||
|
||||
# rotation matrix from controller
|
||||
eef_mat = self._env.robots[0].controller.ee_ori_mat if eef_pos is not None else None
|
||||
gripper_qpos = raw_obs.get("robot0_gripper_qpos")
|
||||
gripper_qvel = raw_obs.get("robot0_gripper_qvel")
|
||||
joint_pos = raw_obs.get("robot0_joint_pos")
|
||||
joint_vel = raw_obs.get("robot0_joint_vel")
|
||||
obs = {
|
||||
"pixels": images,
|
||||
"robot_state": {
|
||||
"eef": {
|
||||
"pos": eef_pos, # (3,)
|
||||
"quat": eef_quat, # (4,)
|
||||
"mat": eef_mat, # (3, 3)
|
||||
},
|
||||
"gripper": {
|
||||
"qpos": gripper_qpos, # (2,)
|
||||
"qvel": gripper_qvel, # (2,)
|
||||
},
|
||||
"joints": {
|
||||
"pos": joint_pos, # (7,)
|
||||
"vel": joint_vel, # (7,)
|
||||
},
|
||||
},
|
||||
}
|
||||
if self.obs_type == "pixels":
|
||||
return {"pixels": images.copy()}
|
||||
|
||||
if self.obs_type == "pixels_agent_pos":
|
||||
return {
|
||||
"pixels": images.copy(),
|
||||
"agent_pos": agent_pos,
|
||||
}
|
||||
# Validate required fields are present
|
||||
if eef_pos is None or eef_quat is None or gripper_qpos is None:
|
||||
raise ValueError(
|
||||
f"Missing required robot state fields in raw observation. "
|
||||
f"Got eef_pos={eef_pos is not None}, eef_quat={eef_quat is not None}, "
|
||||
f"gripper_qpos={gripper_qpos is not None}"
|
||||
)
|
||||
return obs
|
||||
|
||||
raise NotImplementedError(
|
||||
f"The observation type '{self.obs_type}' is not supported in LiberoEnv. "
|
||||
"Please switch to an image-based obs_type (e.g. 'pixels', 'pixels_agent_pos')."
|
||||
@@ -355,12 +405,10 @@ def create_libero_envs(
|
||||
print(f"Restricting to task_ids={task_ids_filter}")
|
||||
|
||||
out: dict[str, dict[int, Any]] = defaultdict(dict)
|
||||
|
||||
for suite_name in suite_names:
|
||||
suite = _get_suite(suite_name)
|
||||
total = len(suite.tasks)
|
||||
selected = _select_task_ids(total, task_ids_filter)
|
||||
|
||||
if not selected:
|
||||
raise ValueError(f"No tasks selected for suite '{suite_name}' (available: {total}).")
|
||||
|
||||
|
||||
@@ -29,10 +29,22 @@ from torch import Tensor
|
||||
|
||||
from lerobot.configs.types import FeatureType, PolicyFeature
|
||||
from lerobot.envs.configs import EnvConfig
|
||||
from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
|
||||
from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE, OBS_STR
|
||||
from lerobot.utils.utils import get_channel_first_image_shape
|
||||
|
||||
|
||||
def _convert_nested_dict(d):
|
||||
result = {}
|
||||
for k, v in d.items():
|
||||
if isinstance(v, dict):
|
||||
result[k] = _convert_nested_dict(v)
|
||||
elif isinstance(v, np.ndarray):
|
||||
result[k] = torch.from_numpy(v)
|
||||
else:
|
||||
result[k] = v
|
||||
return result
|
||||
|
||||
|
||||
def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Tensor]:
|
||||
# TODO(aliberts, rcadene): refactor this to use features from the environment (no hardcoding)
|
||||
"""Convert environment observation to LeRobot format observation.
|
||||
@@ -78,12 +90,14 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten
|
||||
|
||||
return_observations[OBS_ENV_STATE] = env_state
|
||||
|
||||
# TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
|
||||
agent_pos = torch.from_numpy(observations["agent_pos"]).float()
|
||||
if agent_pos.dim() == 1:
|
||||
agent_pos = agent_pos.unsqueeze(0)
|
||||
return_observations[OBS_STATE] = agent_pos
|
||||
if "agent_pos" in observations:
|
||||
agent_pos = torch.from_numpy(observations["agent_pos"]).float()
|
||||
if agent_pos.dim() == 1:
|
||||
agent_pos = agent_pos.unsqueeze(0)
|
||||
return_observations[OBS_STATE] = agent_pos
|
||||
|
||||
if "robot_state" in observations:
|
||||
return_observations[f"{OBS_STR}.robot_state"] = _convert_nested_dict(observations["robot_state"])
|
||||
return return_observations
|
||||
|
||||
|
||||
|
||||
154
src/lerobot/processor/env_processor.py
Normal file
154
src/lerobot/processor/env_processor.py
Normal file
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from dataclasses import dataclass
|
||||
|
||||
import torch
|
||||
|
||||
from lerobot.configs.types import PipelineFeatureType, PolicyFeature
|
||||
from lerobot.utils.constants import OBS_IMAGES, OBS_STATE
|
||||
|
||||
from .pipeline import ObservationProcessorStep, ProcessorStepRegistry
|
||||
|
||||
|
||||
@dataclass
|
||||
@ProcessorStepRegistry.register(name="libero_processor")
|
||||
class LiberoProcessorStep(ObservationProcessorStep):
|
||||
"""
|
||||
Processes LIBERO observations into the LeRobot format.
|
||||
|
||||
This step handles the specific observation structure from LIBERO environments,
|
||||
which includes nested robot_state dictionaries and image observations.
|
||||
|
||||
**State Processing:**
|
||||
- Processes the `robot_state` dictionary which contains nested end-effector,
|
||||
gripper, and joint information.
|
||||
- Extracts and concatenates:
|
||||
- End-effector position (3D)
|
||||
- End-effector quaternion converted to axis-angle (3D)
|
||||
- Gripper joint positions (2D)
|
||||
- Maps the concatenated state to `"observation.state"`.
|
||||
|
||||
**Image Processing:**
|
||||
- Rotates images by 180 degrees by flipping both height and width dimensions.
|
||||
- This accounts for the HuggingFaceVLA/libero camera orientation convention.
|
||||
"""
|
||||
|
||||
def _process_observation(self, observation):
|
||||
"""
|
||||
Processes both image and robot_state observations from LIBERO.
|
||||
"""
|
||||
processed_obs = observation.copy()
|
||||
for key in list(processed_obs.keys()):
|
||||
if key.startswith(f"{OBS_IMAGES}."):
|
||||
img = processed_obs[key]
|
||||
|
||||
# Flip both H and W
|
||||
img = torch.flip(img, dims=[2, 3])
|
||||
|
||||
processed_obs[key] = img
|
||||
# Process robot_state into a flat state vector
|
||||
if "observation.robot_state" in processed_obs:
|
||||
robot_state = processed_obs.pop("observation.robot_state")
|
||||
|
||||
# Extract components
|
||||
eef_pos = robot_state["eef"]["pos"] # (B, 3,)
|
||||
eef_quat = robot_state["eef"]["quat"] # (B, 4,)
|
||||
gripper_qpos = robot_state["gripper"]["qpos"] # (B, 2,)
|
||||
|
||||
# Convert quaternion to axis-angle
|
||||
eef_axisangle = self._quat2axisangle(eef_quat) # (B, 3)
|
||||
# Concatenate into a single state vector
|
||||
state = torch.cat((eef_pos, eef_axisangle, gripper_qpos), dim=-1)
|
||||
|
||||
# ensure float32
|
||||
state = state.float()
|
||||
if state.dim() == 1:
|
||||
state = state.unsqueeze(0)
|
||||
|
||||
processed_obs[OBS_STATE] = state
|
||||
return processed_obs
|
||||
|
||||
def transform_features(
|
||||
self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
|
||||
) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
|
||||
"""
|
||||
Transforms feature keys from the LIBERO format to the LeRobot standard.
|
||||
"""
|
||||
new_features: dict[PipelineFeatureType, dict[str, PolicyFeature]] = {}
|
||||
|
||||
# copy over non-STATE features
|
||||
for ft, feats in features.items():
|
||||
if ft != PipelineFeatureType.STATE:
|
||||
new_features[ft] = feats.copy()
|
||||
|
||||
# rebuild STATE features
|
||||
state_feats = {}
|
||||
|
||||
# add our new flattened state
|
||||
state_feats["observation.state"] = PolicyFeature(
|
||||
key="observation.state",
|
||||
shape=(8,), # [eef_pos(3), axis_angle(3), gripper(2)]
|
||||
dtype="float32",
|
||||
description=("Concatenated end-effector position (3), axis-angle (3), and gripper qpos (2)."),
|
||||
)
|
||||
|
||||
new_features[PipelineFeatureType.STATE] = state_feats
|
||||
|
||||
return new_features
|
||||
|
||||
def observation(self, observation):
|
||||
return self._process_observation(observation)
|
||||
|
||||
def _quat2axisangle(self, quat: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Convert batched quaternions to axis-angle format.
|
||||
Only accepts torch tensors of shape (B, 4).
|
||||
|
||||
Args:
|
||||
quat (Tensor): (B, 4) tensor of quaternions in (x, y, z, w) format
|
||||
|
||||
Returns:
|
||||
Tensor: (B, 3) axis-angle vectors
|
||||
|
||||
Raises:
|
||||
TypeError: if input is not a torch tensor
|
||||
ValueError: if shape is not (B, 4)
|
||||
"""
|
||||
|
||||
if not isinstance(quat, torch.Tensor):
|
||||
raise TypeError(f"_quat2axisangle expected a torch.Tensor, got {type(quat)}")
|
||||
|
||||
if quat.ndim != 2 or quat.shape[1] != 4:
|
||||
raise ValueError(f"_quat2axisangle expected shape (B, 4), got {tuple(quat.shape)}")
|
||||
|
||||
quat = quat.to(dtype=torch.float32)
|
||||
device = quat.device
|
||||
batch_size = quat.shape[0]
|
||||
|
||||
w = quat[:, 3].clamp(-1.0, 1.0)
|
||||
|
||||
den = torch.sqrt(torch.clamp(1.0 - w * w, min=0.0))
|
||||
|
||||
result = torch.zeros((batch_size, 3), device=device)
|
||||
|
||||
mask = den > 1e-10
|
||||
|
||||
if mask.any():
|
||||
angle = 2.0 * torch.acos(w[mask]) # (M,)
|
||||
axis = quat[mask, :3] / den[mask].unsqueeze(1)
|
||||
result[mask] = axis * angle.unsqueeze(1)
|
||||
|
||||
return result
|
||||
@@ -71,7 +71,7 @@ from tqdm import trange
|
||||
|
||||
from lerobot.configs import parser
|
||||
from lerobot.configs.eval import EvalPipelineConfig
|
||||
from lerobot.envs.factory import make_env
|
||||
from lerobot.envs.factory import make_env, make_env_pre_post_processors
|
||||
from lerobot.envs.utils import (
|
||||
add_envs_task,
|
||||
check_env_attributes_and_types,
|
||||
@@ -94,6 +94,8 @@ from lerobot.utils.utils import (
|
||||
def rollout(
|
||||
env: gym.vector.VectorEnv,
|
||||
policy: PreTrainedPolicy,
|
||||
env_preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
env_postprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
|
||||
seeds: list[int] | None = None,
|
||||
@@ -165,11 +167,19 @@ def rollout(
|
||||
# Infer "task" from attributes of environments.
|
||||
# TODO: works with SyncVectorEnv but not AsyncVectorEnv
|
||||
observation = add_envs_task(env, observation)
|
||||
|
||||
# Apply environment-specific preprocessing (e.g., LiberoProcessorStep for LIBERO)
|
||||
observation = env_preprocessor(observation)
|
||||
|
||||
observation = preprocessor(observation)
|
||||
with torch.inference_mode():
|
||||
action = policy.select_action(observation)
|
||||
action = postprocessor(action)
|
||||
|
||||
action_transition = {"action": action}
|
||||
action_transition = env_postprocessor(action_transition)
|
||||
action = action_transition["action"]
|
||||
|
||||
# Convert to CPU / numpy.
|
||||
action_numpy: np.ndarray = action.to("cpu").numpy()
|
||||
assert action_numpy.ndim == 2, "Action dimensions should be (batch, action_dim)"
|
||||
@@ -239,6 +249,8 @@ def rollout(
|
||||
def eval_policy(
|
||||
env: gym.vector.VectorEnv,
|
||||
policy: PreTrainedPolicy,
|
||||
env_preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
env_postprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
|
||||
n_episodes: int,
|
||||
@@ -319,6 +331,8 @@ def eval_policy(
|
||||
rollout_data = rollout(
|
||||
env=env,
|
||||
policy=policy,
|
||||
env_preprocessor=env_preprocessor,
|
||||
env_postprocessor=env_postprocessor,
|
||||
preprocessor=preprocessor,
|
||||
postprocessor=postprocessor,
|
||||
seeds=list(seeds) if seeds else None,
|
||||
@@ -517,10 +531,16 @@ def eval_main(cfg: EvalPipelineConfig):
|
||||
pretrained_path=cfg.policy.pretrained_path,
|
||||
preprocessor_overrides=preprocessor_overrides,
|
||||
)
|
||||
|
||||
# Create environment-specific preprocessor and postprocessor (e.g., for LIBERO environments)
|
||||
env_preprocessor, env_postprocessor = make_env_pre_post_processors(env_cfg=cfg.env)
|
||||
|
||||
with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext():
|
||||
info = eval_policy_all(
|
||||
envs=envs,
|
||||
policy=policy,
|
||||
env_preprocessor=env_preprocessor,
|
||||
env_postprocessor=env_postprocessor,
|
||||
preprocessor=preprocessor,
|
||||
postprocessor=postprocessor,
|
||||
n_episodes=cfg.eval.n_episodes,
|
||||
@@ -561,6 +581,8 @@ def eval_one(
|
||||
env: gym.vector.VectorEnv,
|
||||
*,
|
||||
policy: PreTrainedPolicy,
|
||||
env_preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
env_postprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
|
||||
n_episodes: int,
|
||||
@@ -576,6 +598,8 @@ def eval_one(
|
||||
task_result = eval_policy(
|
||||
env=env,
|
||||
policy=policy,
|
||||
env_preprocessor=env_preprocessor,
|
||||
env_postprocessor=env_postprocessor,
|
||||
preprocessor=preprocessor,
|
||||
postprocessor=postprocessor,
|
||||
n_episodes=n_episodes,
|
||||
@@ -600,6 +624,8 @@ def run_one(
|
||||
env,
|
||||
*,
|
||||
policy,
|
||||
env_preprocessor,
|
||||
env_postprocessor,
|
||||
preprocessor,
|
||||
postprocessor,
|
||||
n_episodes: int,
|
||||
@@ -622,6 +648,8 @@ def run_one(
|
||||
metrics = eval_one(
|
||||
env,
|
||||
policy=policy,
|
||||
env_preprocessor=env_preprocessor,
|
||||
env_postprocessor=env_postprocessor,
|
||||
preprocessor=preprocessor,
|
||||
postprocessor=postprocessor,
|
||||
n_episodes=n_episodes,
|
||||
@@ -639,6 +667,8 @@ def run_one(
|
||||
def eval_policy_all(
|
||||
envs: dict[str, dict[int, gym.vector.VectorEnv]],
|
||||
policy,
|
||||
env_preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
env_postprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
|
||||
n_episodes: int,
|
||||
@@ -694,6 +724,8 @@ def eval_policy_all(
|
||||
task_runner = partial(
|
||||
run_one,
|
||||
policy=policy,
|
||||
env_preprocessor=env_preprocessor,
|
||||
env_postprocessor=env_postprocessor,
|
||||
preprocessor=preprocessor,
|
||||
postprocessor=postprocessor,
|
||||
n_episodes=n_episodes,
|
||||
|
||||
@@ -29,7 +29,7 @@ from lerobot.configs.train import TrainPipelineConfig
|
||||
from lerobot.datasets.factory import make_dataset
|
||||
from lerobot.datasets.sampler import EpisodeAwareSampler
|
||||
from lerobot.datasets.utils import cycle
|
||||
from lerobot.envs.factory import make_env
|
||||
from lerobot.envs.factory import make_env, make_env_pre_post_processors
|
||||
from lerobot.envs.utils import close_envs
|
||||
from lerobot.optim.factory import make_optimizer_and_scheduler
|
||||
from lerobot.policies.factory import make_policy, make_pre_post_processors
|
||||
@@ -259,6 +259,8 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
|
||||
logging.info(colored("Output dir:", "yellow", attrs=["bold"]) + f" {cfg.output_dir}")
|
||||
if cfg.env is not None:
|
||||
logging.info(f"{cfg.env.task=}")
|
||||
logging.info("Creating environment processors")
|
||||
env_preprocessor, env_postprocessor = make_env_pre_post_processors(env_cfg=cfg.env)
|
||||
logging.info(f"{cfg.steps=} ({format_big_number(cfg.steps)})")
|
||||
logging.info(f"{dataset.num_frames=} ({format_big_number(dataset.num_frames)})")
|
||||
logging.info(f"{dataset.num_episodes=}")
|
||||
@@ -274,6 +276,7 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
|
||||
sampler = EpisodeAwareSampler(
|
||||
dataset.meta.episodes["dataset_from_index"],
|
||||
dataset.meta.episodes["dataset_to_index"],
|
||||
episode_indices_to_use=dataset.episodes,
|
||||
drop_n_last_frames=cfg.policy.drop_n_last_frames,
|
||||
shuffle=True,
|
||||
)
|
||||
@@ -384,6 +387,8 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
|
||||
eval_info = eval_policy_all(
|
||||
envs=eval_env, # dict[suite][task_id] -> vec_env
|
||||
policy=accelerator.unwrap_model(policy),
|
||||
env_preprocessor=env_preprocessor,
|
||||
env_postprocessor=env_postprocessor,
|
||||
preprocessor=preprocessor,
|
||||
postprocessor=postprocessor,
|
||||
n_episodes=cfg.eval.n_episodes,
|
||||
|
||||
@@ -70,3 +70,15 @@ LOOKAHEAD_BACKTRACKTABLE = 100
|
||||
|
||||
# openpi
|
||||
OPENPI_ATTENTION_MASK_VALUE = -2.3819763e38 # TODO(pepijn): Modify this when extending support to fp8 models
|
||||
|
||||
# Constants for LIBERO observation keys
|
||||
LIBERO_KEY_EEF_POS = "robot_state/eef/pos"
|
||||
LIBERO_KEY_EEF_QUAT = "robot_state/eef/quat"
|
||||
LIBERO_KEY_EEF_MAT = "robot_state/eef/mat"
|
||||
LIBERO_KEY_EEF_AXISANGLE = "robot_state/eef/axisangle"
|
||||
LIBERO_KEY_GRIPPER_QPOS = "robot_state/gripper/qpos"
|
||||
LIBERO_KEY_GRIPPER_QVEL = "robot_state/gripper/qvel"
|
||||
LIBERO_KEY_JOINTS_POS = "robot_state/joints/pos"
|
||||
LIBERO_KEY_JOINTS_VEL = "robot_state/joints/vel"
|
||||
LIBERO_KEY_PIXELS_AGENTVIEW = "pixels/agentview_image"
|
||||
LIBERO_KEY_PIXELS_EYE_IN_HAND = "pixels/robot0_eye_in_hand_image"
|
||||
|
||||
72
tests/processor/test_libero_processor.py
Normal file
72
tests/processor/test_libero_processor.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from lerobot.envs.utils import preprocess_observation
|
||||
from lerobot.processor.env_processor import LiberoProcessorStep
|
||||
from lerobot.processor.pipeline import PolicyProcessorPipeline
|
||||
|
||||
seed = 42
|
||||
np.random.seed(seed)
|
||||
|
||||
B = 5
|
||||
obs1 = {
|
||||
"pixels": {
|
||||
"image": (np.random.rand(B, 256, 256, 3) * 255).astype(np.uint8),
|
||||
"image2": (np.random.rand(B, 256, 256, 3) * 255).astype(np.uint8),
|
||||
},
|
||||
"robot_state": {
|
||||
"eef": {
|
||||
"pos": np.random.randn(B, 3),
|
||||
"quat": np.random.randn(B, 4),
|
||||
"mat": np.random.randn(B, 3, 3),
|
||||
},
|
||||
"gripper": {
|
||||
"qpos": np.random.randn(B, 2),
|
||||
"qvel": np.random.randn(B, 2),
|
||||
},
|
||||
"joints": {
|
||||
"pos": np.random.randn(B, 7),
|
||||
"vel": np.random.randn(B, 7),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
observation = preprocess_observation(obs1)
|
||||
libero_preprocessor = PolicyProcessorPipeline(
|
||||
steps=[
|
||||
LiberoProcessorStep(),
|
||||
]
|
||||
)
|
||||
processed_obs = libero_preprocessor(observation)
|
||||
assert "observation.state" in processed_obs
|
||||
state = processed_obs["observation.state"]
|
||||
assert isinstance(state, torch.Tensor)
|
||||
assert state.dtype == torch.float32
|
||||
|
||||
assert state.shape[0] == B
|
||||
assert state.shape[1] == 8
|
||||
|
||||
assert "observation.images.image" in processed_obs
|
||||
assert "observation.images.image2" in processed_obs
|
||||
|
||||
assert isinstance(processed_obs["observation.images.image"], torch.Tensor)
|
||||
assert isinstance(processed_obs["observation.images.image2"], torch.Tensor)
|
||||
|
||||
assert processed_obs["observation.images.image"].shape == (B, 3, 256, 256)
|
||||
assert processed_obs["observation.images.image2"].shape == (B, 3, 256, 256)
|
||||
Reference in New Issue
Block a user