src/lerobot/envs/utils.py

#!/usr/bin/env python

# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
from typing import Any

import einops
import gymnasium as gym
import numpy as np
import torch
from torch import Tensor

from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.envs.configs import EnvConfig
from lerobot.utils.utils import get_channel_first_image_shape


def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Tensor]:
    # TODO(aliberts, rcadene): refactor this to use features from the environment (no hardcoding)
    """Convert environment observation to LeRobot format observation.
    Args:
        observation: Dictionary of observation batches from a Gym vector environment.
    Returns:
        Dictionary of observation batches with keys renamed to LeRobot format and values as tensors.
    """
    # map to expected inputs for the policy
    return_observations = {}
    if "pixels" in observations:
        if isinstance(observations["pixels"], dict):
            imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()}
        else:
            imgs = {"observation.image": observations["pixels"]}

        for imgkey, img in imgs.items():
            # TODO(aliberts, rcadene): use transforms.ToTensor()?
            img = torch.from_numpy(img)

            # When preprocessing observations in a non-vectorized environment, we need to add a batch dimension.
            # This is the case for human-in-the-loop RL where there is only one environment.
            if img.ndim == 3:
                img = img.unsqueeze(0)
            # sanity check that images are channel last
            _, h, w, c = img.shape
            assert c < h and c < w, f"expect channel last images, but instead got {img.shape=}"

            # sanity check that images are uint8
            assert img.dtype == torch.uint8, f"expect torch.uint8, but instead {img.dtype=}"

            # convert to channel first of type float32 in range [0,1]
            img = einops.rearrange(img, "b h w c -> b c h w").contiguous()
            img = img.type(torch.float32)
            img /= 255

            return_observations[imgkey] = img

    if "environment_state" in observations:
        env_state = torch.from_numpy(observations["environment_state"]).float()
        if env_state.dim() == 1:
            env_state = env_state.unsqueeze(0)

        return_observations["observation.environment_state"] = env_state

    # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
    agent_pos = torch.from_numpy(observations["agent_pos"]).float()
    if agent_pos.dim() == 1:
        agent_pos = agent_pos.unsqueeze(0)
    return_observations["observation.state"] = agent_pos

    return return_observations


def env_to_policy_features(env_cfg: EnvConfig) -> dict[str, PolicyFeature]:
    # TODO(aliberts, rcadene): remove this hardcoding of keys and just use the nested keys as is
    # (need to also refactor preprocess_observation and externalize normalization from policies)
    policy_features = {}
    for key, ft in env_cfg.features.items():
        if ft.type is FeatureType.VISUAL:
            if len(ft.shape) != 3:
                raise ValueError(f"Number of dimensions of {key} != 3 (shape={ft.shape})")

            shape = get_channel_first_image_shape(ft.shape)
            feature = PolicyFeature(type=ft.type, shape=shape)
        else:
            feature = ft

        policy_key = env_cfg.features_map[key]
        policy_features[policy_key] = feature

    return policy_features


def are_all_envs_same_type(env: gym.vector.VectorEnv) -> bool:
    first_type = type(env.envs[0])  # Get type of first env
    return all(type(e) is first_type for e in env.envs)  # Fast type check


def check_env_attributes_and_types(env: gym.vector.VectorEnv) -> None:
    with warnings.catch_warnings():
        warnings.simplefilter("once", UserWarning)  # Apply filter only in this function

        if not (hasattr(env.envs[0], "task_description") and hasattr(env.envs[0], "task")):
            warnings.warn(
                "The environment does not have 'task_description' and 'task'. Some policies require these features.",
                UserWarning,
                stacklevel=2,
            )
        if not are_all_envs_same_type(env):
            warnings.warn(
                "The environments have different types. Make sure you infer the right task from each environment. Empty task will be passed instead.",
                UserWarning,
                stacklevel=2,
            )


def add_envs_task(env: gym.vector.VectorEnv, observation: dict[str, Any]) -> dict[str, Any]:
    """Adds task feature to the observation dict with respect to the first environment attribute."""
    if hasattr(env.envs[0], "task_description"):
        observation["task"] = env.call("task_description")
    elif hasattr(env.envs[0], "task"):
        observation["task"] = env.call("task")
    else:  #  For envs without language instructions, e.g. aloha transfer cube and etc.
        num_envs = observation[list(observation.keys())[0]].shape[0]
        observation["task"] = ["" for _ in range(num_envs)]
    return observation
Add copyrights (#157) 2024-05-15 12:13:09 +02:00			`#!/usr/bin/env python`

			`# Copyright 2024 The HuggingFace Inc. team. All rights reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
Support for PI0+FAST (#921) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <re.cadene@gmail.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> 2025-04-04 11:51:11 +02:00			`import warnings`
			`from typing import Any`

refactor(observation): Streamline observation preprocessing and remove unused processor methods - Updated the `preprocess_observation` function to enhance image handling and ensure proper tensor formatting. - Removed the `RobotProcessor` and associated transition handling from the `rollout` function, simplifying the observation processing flow. - Integrated direct calls to `preprocess_observation` for improved clarity and efficiency in the evaluation script. 2025-08-05 10:32:56 +02:00			`import einops`
Support for PI0+FAST (#921) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <re.cadene@gmail.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> 2025-04-04 11:51:11 +02:00			`import gymnasium as gym`
Refactor eval.py (#127) 2024-05-03 17:33:16 +01:00			`import numpy as np`
refactor(observation): Streamline observation preprocessing and remove unused processor methods - Updated the `preprocess_observation` function to enhance image handling and ensure proper tensor formatting. - Removed the `RobotProcessor` and associated transition handling from the `rollout` function, simplifying the observation processing flow. - Integrated direct calls to `preprocess_observation` for improved clarity and efficiency in the evaluation script. 2025-08-05 10:32:56 +02:00			`import torch`
Refactor eval.py (#127) 2024-05-03 17:33:16 +01:00			`from torch import Tensor`
test_envs.py are passing, remove simxarm and pusht directories 2024-04-05 16:21:07 +00:00
Simplify configs (#550) Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: HUANG TZU-CHUN <137322177+tc-huang@users.noreply.github.com> 2025-01-31 13:57:37 +01:00			`from lerobot.configs.types import FeatureType, PolicyFeature`
Package folder structure (#1417) * Move files * Replace imports & paths * Update relative paths * Update doc symlinks * Update instructions paths * Fix imports * Update grpc files * Update more instructions * Downgrade grpc-tools * Update manifest * Update more paths * Update config paths * Update CI paths * Update bandit exclusions * Remove walkthrough section 2025-07-01 16:34:46 +02:00			`from lerobot.envs.configs import EnvConfig`
			`from lerobot.utils.utils import get_channel_first_image_shape`
Simplify configs (#550) Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: HUANG TZU-CHUN <137322177+tc-huang@users.noreply.github.com> 2025-01-31 13:57:37 +01:00
test_envs.py are passing, remove simxarm and pusht directories 2024-04-05 16:21:07 +00:00
Refactor eval.py (#127) 2024-05-03 17:33:16 +01:00			`def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Tensor]:`
refactor(observation): Streamline observation preprocessing and remove unused processor methods - Updated the `preprocess_observation` function to enhance image handling and ensure proper tensor formatting. - Removed the `RobotProcessor` and associated transition handling from the `rollout` function, simplifying the observation processing flow. - Integrated direct calls to `preprocess_observation` for improved clarity and efficiency in the evaluation script. 2025-08-05 10:32:56 +02:00			`# TODO(aliberts, rcadene): refactor this to use features from the environment (no hardcoding)`
Refactor eval.py (#127) 2024-05-03 17:33:16 +01:00			`"""Convert environment observation to LeRobot format observation.`
			`Args:`
			`observation: Dictionary of observation batches from a Gym vector environment.`
			`Returns:`
			`Dictionary of observation batches with keys renamed to LeRobot format and values as tensors.`
			`"""`
refactor(observation): Streamline observation preprocessing and remove unused processor methods - Updated the `preprocess_observation` function to enhance image handling and ensure proper tensor formatting. - Removed the `RobotProcessor` and associated transition handling from the `rollout` function, simplifying the observation processing flow. - Integrated direct calls to `preprocess_observation` for improved clarity and efficiency in the evaluation script. 2025-08-05 10:32:56 +02:00			`# map to expected inputs for the policy`
			`return_observations = {}`
			`if "pixels" in observations:`
			`if isinstance(observations["pixels"], dict):`
			`imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()}`
			`else:`
			`imgs = {"observation.image": observations["pixels"]}`

			`for imgkey, img in imgs.items():`
			`# TODO(aliberts, rcadene): use transforms.ToTensor()?`
			`img = torch.from_numpy(img)`

			`# When preprocessing observations in a non-vectorized environment, we need to add a batch dimension.`
			`# This is the case for human-in-the-loop RL where there is only one environment.`
			`if img.ndim == 3:`
			`img = img.unsqueeze(0)`
			`# sanity check that images are channel last`
			`_, h, w, c = img.shape`
			`assert c < h and c < w, f"expect channel last images, but instead got {img.shape=}"`

			`# sanity check that images are uint8`
			`assert img.dtype == torch.uint8, f"expect torch.uint8, but instead {img.dtype=}"`

			`# convert to channel first of type float32 in range [0,1]`
			`img = einops.rearrange(img, "b h w c -> b c h w").contiguous()`
			`img = img.type(torch.float32)`
			`img /= 255`

			`return_observations[imgkey] = img`

			`if "environment_state" in observations:`
			`env_state = torch.from_numpy(observations["environment_state"]).float()`
			`if env_state.dim() == 1:`
			`env_state = env_state.unsqueeze(0)`
[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci 2025-07-02 15:31:15 +00:00
refactor(observation): Streamline observation preprocessing and remove unused processor methods - Updated the `preprocess_observation` function to enhance image handling and ensure proper tensor formatting. - Removed the `RobotProcessor` and associated transition handling from the `rollout` function, simplifying the observation processing flow. - Integrated direct calls to `preprocess_observation` for improved clarity and efficiency in the evaluation script. 2025-08-05 10:32:56 +02:00			`return_observations["observation.environment_state"] = env_state`
[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci 2025-07-02 15:31:15 +00:00
refactor(observation): Streamline observation preprocessing and remove unused processor methods - Updated the `preprocess_observation` function to enhance image handling and ensure proper tensor formatting. - Removed the `RobotProcessor` and associated transition handling from the `rollout` function, simplifying the observation processing flow. - Integrated direct calls to `preprocess_observation` for improved clarity and efficiency in the evaluation script. 2025-08-05 10:32:56 +02:00			# TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
			`agent_pos = torch.from_numpy(observations["agent_pos"]).float()`
			`if agent_pos.dim() == 1:`
			`agent_pos = agent_pos.unsqueeze(0)`
			`return_observations["observation.state"] = agent_pos`
[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci 2025-07-02 15:31:15 +00:00
refactor(observation): Streamline observation preprocessing and remove unused processor methods - Updated the `preprocess_observation` function to enhance image handling and ensure proper tensor formatting. - Removed the `RobotProcessor` and associated transition handling from the `rollout` function, simplifying the observation processing flow. - Integrated direct calls to `preprocess_observation` for improved clarity and efficiency in the evaluation script. 2025-08-05 10:32:56 +02:00			`return return_observations`
test_envs are passing 2024-04-05 23:27:12 +00:00

Simplify configs (#550) Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: HUANG TZU-CHUN <137322177+tc-huang@users.noreply.github.com> 2025-01-31 13:57:37 +01:00			`def env_to_policy_features(env_cfg: EnvConfig) -> dict[str, PolicyFeature]:`
			`# TODO(aliberts, rcadene): remove this hardcoding of keys and just use the nested keys as is`
refactor(observation): Streamline observation preprocessing and remove unused processor methods - Updated the `preprocess_observation` function to enhance image handling and ensure proper tensor formatting. - Removed the `RobotProcessor` and associated transition handling from the `rollout` function, simplifying the observation processing flow. - Integrated direct calls to `preprocess_observation` for improved clarity and efficiency in the evaluation script. 2025-08-05 10:32:56 +02:00			`# (need to also refactor preprocess_observation and externalize normalization from policies)`
Simplify configs (#550) Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: HUANG TZU-CHUN <137322177+tc-huang@users.noreply.github.com> 2025-01-31 13:57:37 +01:00			`policy_features = {}`
			`for key, ft in env_cfg.features.items():`
			`if ft.type is FeatureType.VISUAL:`
			`if len(ft.shape) != 3:`
			`raise ValueError(f"Number of dimensions of {key} != 3 (shape={ft.shape})")`

			`shape = get_channel_first_image_shape(ft.shape)`
			`feature = PolicyFeature(type=ft.type, shape=shape)`
			`else:`
			`feature = ft`

			`policy_key = env_cfg.features_map[key]`
			`policy_features[policy_key] = feature`

			`return policy_features`
Support for PI0+FAST (#921) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com> Co-authored-by: Remi <re.cadene@gmail.com> Co-authored-by: Steven Palma <imstevenpmwork@ieee.org> 2025-04-04 11:51:11 +02:00

			`def are_all_envs_same_type(env: gym.vector.VectorEnv) -> bool:`
			`first_type = type(env.envs[0]) # Get type of first env`
			`return all(type(e) is first_type for e in env.envs) # Fast type check`


			`def check_env_attributes_and_types(env: gym.vector.VectorEnv) -> None:`
			`with warnings.catch_warnings():`
			`warnings.simplefilter("once", UserWarning) # Apply filter only in this function`

			`if not (hasattr(env.envs[0], "task_description") and hasattr(env.envs[0], "task")):`
			`warnings.warn(`
			`"The environment does not have 'task_description' and 'task'. Some policies require these features.",`
			`UserWarning,`
			`stacklevel=2,`
			`)`
			`if not are_all_envs_same_type(env):`
			`warnings.warn(`
			`"The environments have different types. Make sure you infer the right task from each environment. Empty task will be passed instead.",`
			`UserWarning,`
			`stacklevel=2,`
			`)`


			`def add_envs_task(env: gym.vector.VectorEnv, observation: dict[str, Any]) -> dict[str, Any]:`
			`"""Adds task feature to the observation dict with respect to the first environment attribute."""`
			`if hasattr(env.envs[0], "task_description"):`
			`observation["task"] = env.call("task_description")`
			`elif hasattr(env.envs[0], "task"):`
			`observation["task"] = env.call("task")`
			`else: # For envs without language instructions, e.g. aloha transfer cube and etc.`
			`num_envs = observation[list(observation.keys())[0]].shape[0]`
			`observation["task"] = ["" for _ in range(num_envs)]`
			`return observation`