wip

2026-05-31 19:01:28 +00:00 · 2025-06-15 00:24:03 +02:00
280 changed files with 2099 additions and 8928 deletions
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -24,7 +24,7 @@ Examples:
 pytest -sx tests/test_stuff.py::test_something
 ```
 ```bash
-python -m lerobot.scripts.train --some.option=true
+python lerobot/scripts/train.py --some.option=true
 ```

 ## SECTION TO REMOVE BEFORE SUBMITTING YOUR PR
--- a/.github/workflows/nightly-tests.yml
+++ b/.github/workflows/nightly-tests.yml
@@ -44,7 +44,7 @@ jobs:
        working-directory: /lerobot
    steps:
      - name: Tests
-        run: pytest -v --cov=./src/lerobot --disable-warnings tests
+        run: pytest -v --cov=./lerobot --disable-warnings tests

      - name: Tests end-to-end
        run: make test-end-to-end
@@ -74,7 +74,7 @@ jobs:
        run: nvidia-smi

      - name: Test
-        run: pytest -v --cov=./src/lerobot --cov-report=xml --disable-warnings tests
+        run: pytest -v --cov=./lerobot --cov-report=xml --disable-warnings tests
      #   TODO(aliberts): Link with HF Codecov account
      # - name: Upload coverage reports to Codecov with GitHub Action
      #   uses: codecov/codecov-action@v4
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -17,7 +17,7 @@ name: Tests
 on:
  pull_request:
    paths:
-      - "src/**"
+      - "lerobot/**"
      - "tests/**"
      - "examples/**"
      - ".github/**"
@@ -29,7 +29,7 @@ on:
    branches:
      - main
    paths:
-      - "src/**"
+      - "lerobot/**"
      - "tests/**"
      - "examples/**"
      - ".github/**"
@@ -73,7 +73,7 @@ jobs:

      - name: Test with pytest
        run: |
-          uv run pytest tests -v --cov=./src/lerobot --durations=0 \
+          uv run pytest tests -v --cov=./lerobot --durations=0 \
            -W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \
            -W ignore::UserWarning:torch.utils.data.dataloader:558 \
            -W ignore::UserWarning:gymnasium.utils.env_checker:247 \
@@ -105,7 +105,7 @@ jobs:

      - name: Test with pytest
        run: |
-          uv run pytest tests -v --cov=./src/lerobot --durations=0 \
+          uv run pytest tests -v --cov=./lerobot --durations=0 \
            -W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \
            -W ignore::UserWarning:torch.utils.data.dataloader:558 \
            -W ignore::UserWarning:gymnasium.utils.env_checker:247 \
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -67,7 +67,7 @@ post it.

 ## Adding new policies, datasets or environments

-Look at our implementations for [datasets](./src/lerobot/datasets/), [policies](./src/lerobot/policies/),
+Look at our implementations for [datasets](./lerobot/common/datasets/), [policies](./lerobot/common/policies/),
 environments ([aloha](https://github.com/huggingface/gym-aloha),
 [xarm](https://github.com/huggingface/gym-xarm),
 [pusht](https://github.com/huggingface/gym-pusht))
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +0,0 @@
-include src/lerobot/templates/lerobot_modelcard_template.md
-include src/lerobot/datasets/card_template.md
--- a/52
+++ b/52
@@ -40,17 +40,14 @@ test-end-to-end:
 	${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-eval
 	${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train
 	${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-eval
-	${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-train
-	${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-eval

 test-act-ete-train:
-	python -m lerobot.scripts.train \
+	python lerobot/scripts/train.py \
 		--policy.type=act \
 		--policy.dim_model=64 \
 		--policy.n_action_steps=20 \
 		--policy.chunk_size=20 \
 		--policy.device=$(DEVICE) \
-		--policy.push_to_hub=false \
 		--env.type=aloha \
 		--env.episode_length=5 \
 		--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
@@ -68,12 +65,12 @@ test-act-ete-train:
 		--output_dir=tests/outputs/act/

 test-act-ete-train-resume:
-	python -m lerobot.scripts.train \
+	python lerobot/scripts/train.py \
 		--config_path=tests/outputs/act/checkpoints/000002/pretrained_model/train_config.json \
 		--resume=true

 test-act-ete-eval:
-	python -m lerobot.scripts.eval \
+	python lerobot/scripts/eval.py \
 		--policy.path=tests/outputs/act/checkpoints/000004/pretrained_model \
 		--policy.device=$(DEVICE) \
 		--env.type=aloha \
@@ -82,13 +79,12 @@ test-act-ete-eval:
 		--eval.batch_size=1

 test-diffusion-ete-train:
-	python -m lerobot.scripts.train \
+	python lerobot/scripts/train.py \
 		--policy.type=diffusion \
 		--policy.down_dims='[64,128,256]' \
 		--policy.diffusion_step_embed_dim=32 \
 		--policy.num_inference_steps=10 \
 		--policy.device=$(DEVICE) \
-		--policy.push_to_hub=false \
 		--env.type=pusht \
 		--env.episode_length=5 \
 		--dataset.repo_id=lerobot/pusht \
@@ -106,7 +102,7 @@ test-diffusion-ete-train:
 		--output_dir=tests/outputs/diffusion/

 test-diffusion-ete-eval:
-	python -m lerobot.scripts.eval \
+	python lerobot/scripts/eval.py \
 		--policy.path=tests/outputs/diffusion/checkpoints/000002/pretrained_model \
 		--policy.device=$(DEVICE) \
 		--env.type=pusht \
@@ -115,10 +111,9 @@ test-diffusion-ete-eval:
 		--eval.batch_size=1

 test-tdmpc-ete-train:
-	python -m lerobot.scripts.train \
+	python lerobot/scripts/train.py \
 		--policy.type=tdmpc \
 		--policy.device=$(DEVICE) \
-		--policy.push_to_hub=false \
 		--env.type=xarm \
 		--env.task=XarmLift-v0 \
 		--env.episode_length=5 \
@@ -137,7 +132,7 @@ test-tdmpc-ete-train:
 		--output_dir=tests/outputs/tdmpc/

 test-tdmpc-ete-eval:
-	python -m lerobot.scripts.eval \
+	python lerobot/scripts/eval.py \
 		--policy.path=tests/outputs/tdmpc/checkpoints/000002/pretrained_model \
 		--policy.device=$(DEVICE) \
 		--env.type=xarm \
@@ -145,36 +140,3 @@ test-tdmpc-ete-eval:
 		--env.task=XarmLift-v0 \
 		--eval.n_episodes=1 \
 		--eval.batch_size=1
-
-
-test-smolvla-ete-train:
-	python -m lerobot.scripts.train \
-		--policy.type=smolvla \
-		--policy.n_action_steps=20 \
-		--policy.chunk_size=20 \
-		--policy.device=$(DEVICE) \
-		--policy.push_to_hub=false \
-		--env.type=aloha \
-		--env.episode_length=5 \
-		--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
-		--dataset.image_transforms.enable=true \
-		--dataset.episodes="[0]" \
-		--batch_size=2 \
-		--steps=4 \
-		--eval_freq=2 \
-		--eval.n_episodes=1 \
-		--eval.batch_size=1 \
-		--save_freq=2 \
-		--save_checkpoint=true \
-		--log_freq=1 \
-		--wandb.enable=false \
-		--output_dir=tests/outputs/smolvla/
-
-test-smolvla-ete-eval:
-	python -m lerobot.scripts.eval \
-		--policy.path=tests/outputs/smolvla/checkpoints/000004/pretrained_model \
-		--policy.device=$(DEVICE) \
-		--env.type=aloha \
-		--env.episode_length=5 \
-		--eval.n_episodes=1 \
-		--eval.batch_size=1
--- a/README.md
+++ b/README.md
@@ -130,7 +130,7 @@ pip install -e .
 ```

 > **NOTE:** If you encounter build errors, you may need to install additional dependencies (`cmake`, `build-essential`, and `ffmpeg libs`). On Linux, run:
-`sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev`. For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg)
+`sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev pkg-config`. For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg)

 For simulations, 🤗 LeRobot comes with gymnasium environments that can be installed as extras:
 - [aloha](https://github.com/huggingface/gym-aloha)
@@ -149,20 +149,44 @@ wandb login

 (note: you will also need to enable WandB in the configuration. See below.)

+## Walkthrough
+
+```
+.
+├── examples             # contains demonstration examples, start here to learn about LeRobot
+|   └── advanced         # contains even more examples for those who have mastered the basics
+├── lerobot
+|   ├── configs          # contains config classes with all options that you can override in the command line
+|   ├── common           # contains classes and utilities
+|   |   ├── datasets       # various datasets of human demonstrations: aloha, pusht, xarm
+|   |   ├── envs           # various sim environments: aloha, pusht, xarm
+|   |   ├── policies       # various policies: act, diffusion, tdmpc
+|   |   ├── robot_devices  # various real devices: dynamixel motors, opencv cameras, koch robots
+|   |   └── utils          # various utilities
+|   └── scripts          # contains functions to execute via command line
+|       ├── eval.py                 # load policy and evaluate it on an environment
+|       ├── train.py                # train a policy via imitation learning and/or reinforcement learning
+|       ├── control_robot.py        # teleoperate a real robot, record data, run a policy
+|       ├── push_dataset_to_hub.py  # convert your dataset into LeRobot dataset format and upload it to the Hugging Face hub
+|       └── visualize_dataset.py    # load a dataset and render its demonstrations
+├── outputs               # contains results of scripts execution: logs, videos, model checkpoints
+└── tests                 # contains pytest utilities for continuous integration
+```
+
 ### Visualize datasets

 Check out [example 1](./examples/1_load_lerobot_dataset.py) that illustrates how to use our dataset class which automatically downloads data from the Hugging Face hub.

 You can also locally visualize episodes from a dataset on the hub by executing our script from the command line:
 ```bash
-python -m lerobot.scripts.visualize_dataset \
+python lerobot/scripts/visualize_dataset.py \
    --repo-id lerobot/pusht \
    --episode-index 0
 ```

 or from a dataset in a local folder with the `root` option and the `--local-files-only` (in the following case the dataset will be searched for in `./my_local_data_dir/lerobot/pusht`)
 ```bash
-python -m lerobot.scripts.visualize_dataset \
+python lerobot/scripts/visualize_dataset.py \
    --repo-id lerobot/pusht \
    --root ./my_local_data_dir \
    --local-files-only 1 \
@@ -175,7 +199,7 @@ It will open `rerun.io` and display the camera streams, robot states and actions
 https://github-production-user-asset-6210df.s3.amazonaws.com/4681518/328035972-fd46b787-b532-47e2-bb6f-fd536a55a7ed.mov?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20240505%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240505T172924Z&X-Amz-Expires=300&X-Amz-Signature=d680b26c532eeaf80740f08af3320d22ad0b8a4e4da1bcc4f33142c15b509eda&X-Amz-SignedHeaders=host&actor_id=24889239&key_id=0&repo_id=748713144


-Our script can also visualize datasets stored on a distant server. See `python -m lerobot.scripts.visualize_dataset --help` for more instructions.
+Our script can also visualize datasets stored on a distant server. See `python lerobot/scripts/visualize_dataset.py --help` for more instructions.

 ### The `LeRobotDataset` format

@@ -228,7 +252,7 @@ Check out [example 2](./examples/2_evaluate_pretrained_policy.py) that illustrat

 We also provide a more capable script to parallelize the evaluation over multiple environments during the same rollout. Here is an example with a pretrained model hosted on [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht):
 ```bash
-python -m lerobot.scripts.eval \
+python lerobot/scripts/eval.py \
    --policy.path=lerobot/diffusion_pusht \
    --env.type=pusht \
    --eval.batch_size=10 \
@@ -240,10 +264,10 @@ python -m lerobot.scripts.eval \
 Note: After training your own policy, you can re-evaluate the checkpoints with:

 ```bash
-python -m lerobot.scripts.eval --policy.path={OUTPUT_DIR}/checkpoints/last/pretrained_model
+python lerobot/scripts/eval.py --policy.path={OUTPUT_DIR}/checkpoints/last/pretrained_model
 ```

-See `python -m lerobot.scripts.eval --help` for more instructions.
+See `python lerobot/scripts/eval.py --help` for more instructions.

 ### Train your own policy

@@ -255,14 +279,14 @@ A link to the wandb logs for the run will also show up in yellow in your termina

 ![](media/wandb.png)

-Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. You may use `--eval.n_episodes=500` to evaluate on more episodes than the default. Or, after training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python -m lerobot.scripts.eval --help` for more instructions.
+Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. You may use `--eval.n_episodes=500` to evaluate on more episodes than the default. Or, after training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python lerobot/scripts/eval.py --help` for more instructions.

 #### Reproduce state-of-the-art (SOTA)

 We provide some pretrained policies on our [hub page](https://huggingface.co/lerobot) that can achieve state-of-the-art performances.
 You can reproduce their training by loading the config from their run. Simply running:
 ```bash
-python -m lerobot.scripts.train --config_path=lerobot/diffusion_pusht
+python lerobot/scripts/train.py --config_path=lerobot/diffusion_pusht
 ```
 reproduces SOTA results for Diffusion Policy on the PushT task.

@@ -288,7 +312,7 @@ python lerobot/scripts/push_dataset_to_hub.py \

 See `python lerobot/scripts/push_dataset_to_hub.py --help` for more instructions.

-If your dataset format is not supported, implement your own in `lerobot/datasets/push_dataset_to_hub/${raw_format}_format.py` by copying examples like [pusht_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/pusht_zarr_format.py), [umi_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/umi_zarr_format.py), [aloha_hdf5](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/aloha_hdf5_format.py), or [xarm_pkl](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/xarm_pkl_format.py). -->
+If your dataset format is not supported, implement your own in `lerobot/common/datasets/push_dataset_to_hub/${raw_format}_format.py` by copying examples like [pusht_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py), [umi_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py), [aloha_hdf5](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py), or [xarm_pkl](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py). -->


 ### Add a pretrained policy
--- a/benchmarks/video/capture_camera_feed.py
+++ b/benchmarks/video/capture_camera_feed.py
@@ -55,7 +55,7 @@ def display_and_save_video_stream(output_dir: Path, fps: int, width: int, height
        if not ret:
            print("Error: Could not read frame.")
            break
-        rr.log("video/stream", rr.Image(frame), static=True)
+        rr.log("video/stream", rr.Image(frame.numpy()), static=True)
        cv2.imwrite(str(capture_dir / f"frame_{frame_index:06d}.png"), frame)
        frame_index += 1

--- a/benchmarks/video/run_video_benchmark.py
+++ b/benchmarks/video/run_video_benchmark.py
@@ -35,12 +35,12 @@ import torch
 from skimage.metrics import mean_squared_error, peak_signal_noise_ratio, structural_similarity
 from tqdm import tqdm

-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.datasets.video_utils import (
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.video_utils import (
    decode_video_frames_torchvision,
    encode_video_frames,
 )
-from lerobot.utils.benchmark import TimeBenchmark
+from lerobot.common.utils.benchmark import TimeBenchmark

 BASE_ENCODING = OrderedDict(
    [
--- a/docker/lerobot-cpu/Dockerfile
+++ b/docker/lerobot-cpu/Dockerfile
@@ -22,7 +22,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 COPY . /lerobot
 WORKDIR /lerobot
 RUN /opt/venv/bin/pip install --upgrade --no-cache-dir pip \
-    && /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, smolvla]" \
+    && /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht]" \
        --extra-index-url https://download.pytorch.org/whl/cpu

 # Execute in bash shell rather than python
--- a/docker/lerobot-gpu/Dockerfile
+++ b/docker/lerobot-gpu/Dockerfile
@@ -21,4 +21,4 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 COPY . /lerobot
 WORKDIR /lerobot
 RUN /opt/venv/bin/pip install --upgrade --no-cache-dir pip \
-    && /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel, smolvla]"
+    && /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]"
--- a/docs/source/cameras.mdx
+++ b/docs/source/cameras.mdx
@@ -8,7 +8,7 @@ To instantiate a camera, you need a camera identifier. This identifier might cha

 To find the camera indices of the cameras plugged into your system, run the following script:
 ```bash
-python -m lerobot.find_cameras opencv # or realsense for Intel Realsense cameras
+python lerobot/find_cameras.py opencv # or realsense for Intel Realsense cameras
 ```

 The output will look something like this if you have two cameras connected:
@@ -44,9 +44,9 @@ Below are two examples, demonstrating how to work with the API.
 <hfoption id="Open CV Camera">

 ```python
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.cameras.opencv.camera_opencv import OpenCVCamera
-from lerobot.cameras.configs import ColorMode, Cv2Rotation
+from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.common.cameras.opencv.camera_opencv import OpenCVCamera
+from lerobot.common.cameras.configs import ColorMode, Cv2Rotation

 # Construct an `OpenCVCameraConfig` with your desired FPS, resolution, color mode, and rotation.
 config = OpenCVCameraConfig(
@@ -75,9 +75,9 @@ finally:
 <hfoption id="Intel Realsense Camera">

 ```python
-from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig
-from lerobot.cameras.realsense.camera_realsense import RealSenseCamera
-from lerobot.cameras.configs import ColorMode, Cv2Rotation
+from lerobot.common.cameras.realsense.configuration_realsense import RealSenseCameraConfig
+from lerobot.common.cameras.realsense.camera_realsense import RealSenseCamera
+from lerobot.common.cameras.configs import ColorMode, Cv2Rotation

 # Create a `RealSenseCameraConfig` specifying your camera’s serial number and enabling depth.
 config = RealSenseCameraConfig(
--- a/docs/source/hilserl.mdx
+++ b/docs/source/hilserl.mdx
@@ -24,7 +24,6 @@ This guide provides step-by-step instructions for training a robot policy using
 - A gamepad (recommended) or keyboard to control the robot
 - A Nvidia GPU
 - A real robot with a follower and leader arm (optional if you use the keyboard or the gamepad)
- A URDF file for the robot for the kinematics package (check `lerobot/common/model/kinematics.py`)

 ## What kind of tasks can I train?

@@ -51,12 +50,12 @@ pip install -e ".[hilserl]"

 ### Understanding Configuration

-The training process begins with proper configuration for the HILSerl environment. The configuration class of interest is `HILSerlRobotEnvConfig` in `lerobot/envs/configs.py`. Which is defined as:
+The training process begins with proper configuration for the HILSerl environment. The configuration class of interest is `HILSerlRobotEnvConfig` in `lerobot/common/envs/configs.py`. Which is defined as:

 ```python
 class HILSerlRobotEnvConfig(EnvConfig):
-    robot: RobotConfig | None = None    # Main robot agent (defined in `lerobot/robots`)
-    teleop: TeleoperatorConfig | None = None    # Teleoperator agent, e.g., gamepad or leader arm, (defined in `lerobot/teleoperators`)
+    robot: RobotConfig | None = None    # Main robot agent (defined in `lerobot/common/robots`)
+    teleop: TeleoperatorConfig | None = None    # Teleoperator agent, e.g., gamepad or leader arm, (defined in `lerobot/common/teleoperators`)
    wrapper: EnvTransformConfig | None = None    # Environment wrapper settings; check `lerobot/scripts/server/gym_manipulator.py`
    fps: int = 10    # Control frequency
    name: str = "real_robot"    # Environment name
@@ -173,7 +172,7 @@ class SO100FollowerEndEffectorConfig(SO100FollowerConfig):
    )
 ```

-The `Teleoperator` defines the teleoperation device. You can check the list of available teleoperators in `lerobot/teleoperators`.
+The `Teleoperator` defines the teleoperation device. You can check the list of available teleoperators in `lerobot/common/teleoperators`.

 **Setting up the Gamepad**

@@ -227,7 +226,7 @@ During the online training, press `space` to take over the policy and `space` ag
 Start the recording process, an example of the config file can be found [here](https://huggingface.co/datasets/aractingi/lerobot-example-config-files/blob/main/env_config_so100.json):

 ```bash
-python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config_so100.json
+python lerobot/scripts/rl/gym_manipulator.py --config_path lerobot/configs/env_config_so100.json
 ```

 During recording:
@@ -257,7 +256,7 @@ Note: If you already know the crop parameters, you can skip this step and just s
 Use the `crop_dataset_roi.py` script to interactively select regions of interest in your camera images:

 ```bash
-python -m lerobot.scripts.rl.crop_dataset_roi --repo-id username/pick_lift_cube
+python lerobot/scripts/rl/crop_dataset_roi.py --repo-id username/pick_lift_cube
 ```

 1. For each camera view, the script will display the first frame
@@ -314,7 +313,7 @@ Before training, you need to collect a dataset with labeled examples. The `recor
 To collect a dataset, you need to modify some parameters in the environment configuration based on HILSerlRobotEnvConfig.

 ```bash
-python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/reward_classifier_train_config.json
+python lerobot/scripts/rl/gym_manipulator.py --config_path lerobot/configs/reward_classifier_train_config.json
 ```

 **Key Parameters for Data Collection**
@@ -388,7 +387,7 @@ Example configuration for training the [reward classifier](https://huggingface.c
 To train the classifier, use the `train.py` script with your configuration:

 ```bash
-python -m lerobot.scripts.train --config_path path/to/reward_classifier_train_config.json
+python lerobot/scripts/train.py --config_path path/to/reward_classifier_train_config.json
 ```

 **Deploying and Testing the Model**
@@ -411,7 +410,7 @@ or set the argument in the json config file.

 Run `gym_manipulator.py` to test the model.
 ```bash
-python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config.json
+python lerobot/scripts/rl/gym_manipulator.py --config_path path/to/env_config.json
 ```

 The reward classifier will automatically provide rewards based on the visual input from the robot's cameras.
@@ -423,17 +422,17 @@ The reward classifier will automatically provide rewards based on the visual inp

 2. **Collect a dataset**:
   ```bash
-   python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config.json
+   python lerobot/scripts/rl/gym_manipulator.py --config_path lerobot/configs/env_config.json
   ```

 3. **Train the classifier**:
   ```bash
-   python -m lerobot.scripts.train --config_path src/lerobot/configs/reward_classifier_train_config.json
+   python lerobot/scripts/train.py --config_path lerobot/configs/reward_classifier_train_config.json
   ```

 4. **Test the classifier**:
   ```bash
-   python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config.json
+   python lerobot/scripts/rl/gym_manipulator.py --config_path lerobot/configs/env_config.json
   ```

 ### Training with Actor-Learner
@@ -447,7 +446,7 @@ Create a training configuration file (example available [here](https://huggingfa
 1. Configure the policy settings (`type="sac"`, `device`, etc.)
 2. Set `dataset` to your cropped dataset
 3. Configure environment settings with crop parameters
-4. Check the other parameters related to SAC in [configuration_sac.py](https://github.com/huggingface/lerobot/blob/19bb621a7d0a31c20cd3cc08b1dbab68d3031454/lerobot/policies/sac/configuration_sac.py#L79).
+4. Check the other parameters related to SAC in [configuration_sac.py](https://github.com/huggingface/lerobot/blob/19bb621a7d0a31c20cd3cc08b1dbab68d3031454/lerobot/common/policies/sac/configuration_sac.py#L79).
 5. Verify that the `policy` config is correct with the right `input_features` and `output_features` for your task.

 **Starting the Learner**
@@ -455,7 +454,7 @@ Create a training configuration file (example available [here](https://huggingfa
 First, start the learner server process:

 ```bash
-python -m lerobot.scripts.rl.learner --config_path src/lerobot/configs/train_config_hilserl_so100.json
+python lerobot/scripts/rl/learner.py --config_path lerobot/configs/train_config_hilserl_so100.json
 ```

 The learner:
@@ -469,7 +468,7 @@ The learner:
 In a separate terminal, start the actor process with the same configuration:

 ```bash
-python -m lerobot.scripts.rl.actor --config_path src/lerobot/configs/train_config_hilserl_so100.json
+python lerobot/scripts/rl/actor.py --config_path lerobot/configs/train_config_hilserl_so100.json
 ```

 The actor:
--- a/docs/source/hilserl_sim.mdx
+++ b/docs/source/hilserl_sim.mdx
@@ -77,7 +77,7 @@ Important parameters:
 To run the environment, set mode to null:

 ```python
-python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.json
+python lerobot/scripts/rl/gym_manipulator.py --config_path path/to/gym_hil_env.json
 ```

 ### Recording a Dataset
@@ -85,7 +85,7 @@ python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.j
 To collect a dataset, set the mode to `record` whilst defining the repo_id and number of episodes to record:

 ```python
-python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.json
+python lerobot/scripts/rl/gym_manipulator.py --config_path path/to/gym_hil_env.json
 ```

 ### Training a Policy
@@ -93,13 +93,13 @@ python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.j
 To train a policy, checkout the configuration example available [here](https://huggingface.co/datasets/aractingi/lerobot-example-config-files/blob/main/train_gym_hil_env.json) and run the actor and learner servers:

 ```python
-python -m lerobot.scripts.rl.actor --config_path path/to/train_gym_hil_env.json
+python lerobot/scripts/rl/actor.py --config_path path/to/train_gym_hil_env.json
 ```

 In a different terminal, run the learner server:

 ```python
-python -m lerobot.scripts.rl.learner --config_path path/to/train_gym_hil_env.json
+python lerobot/scripts/rl/learner.py --config_path path/to/train_gym_hil_env.json
 ```

 The simulation environment provides a safe and repeatable way to develop and test your Human-In-the-Loop reinforcement learning components before deploying to real robots.
--- a/docs/source/il_robots.mdx
+++ b/docs/source/il_robots.mdx
@@ -52,8 +52,8 @@ python -m lerobot.teleoperate \
 </hfoption>
 <hfoption id="API example">
 ```python
-from lerobot.teleoperators.so101_leader import SO101LeaderConfig, SO101Leader
-from lerobot.robots.so101_follower import SO101FollowerConfig, SO101Follower
+from lerobot.common.teleoperators.so101_leader import SO101LeaderConfig, SO101Leader
+from lerobot.common.robots.so101_follower import SO101FollowerConfig, SO101Follower

 robot_config = SO101FollowerConfig(
    port="/dev/tty.usbmodem58760431541",
@@ -105,9 +105,9 @@ python -m lerobot.teleoperate \
 </hfoption>
 <hfoption id="API example">
 ```python
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.teleoperators.koch_leader import KochLeaderConfig, KochLeader
-from lerobot.robots.koch_follower import KochFollowerConfig, KochFollower
+from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.common.teleoperators.koch_leader import KochLeaderConfig, KochLeader
+from lerobot.common.robots.koch_follower import KochFollowerConfig, KochFollower

 camera_config = {
    "front": OpenCVCameraConfig(index_or_path=0, width=1920, height=1080, fps=30)
@@ -154,10 +154,7 @@ HF_USER=$(huggingface-cli whoami | head -n 1)
 echo $HF_USER
 ```

-Now you can record a dataset. To record 5 episodes and upload your dataset to the hub, adapt the code below for your robot and execute the command or API example.
-
-<hfoptions id="record">
-<hfoption id="Command">
+Now you can record a dataset. To record 2 episodes and upload your dataset to the hub, execute this command tailored to the SO101.
 ```bash
 python -m lerobot.record \
    --robot.type=so101_follower \
@@ -169,109 +166,9 @@ python -m lerobot.record \
    --teleop.id=my_awesome_leader_arm \
    --display_data=true \
    --dataset.repo_id=${HF_USER}/record-test \
-    --dataset.num_episodes=5 \
+    --dataset.num_episodes=2 \
    --dataset.single_task="Grab the black cube"
 ```
-</hfoption>
-<hfoption id="API example">
-```python
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.datasets.utils import hw_to_dataset_features
-from lerobot.robots.so100_follower import SO100Follower, SO100FollowerConfig
-from lerobot.teleoperators.so100_leader.config_so100_leader import SO100LeaderConfig
-from lerobot.teleoperators.so100_leader.so100_leader import SO100Leader
-from lerobot.utils.control_utils import init_keyboard_listener
-from lerobot.utils.utils import log_say
-from lerobot.utils.visualization_utils import _init_rerun
-from lerobot.record import record_loop
-
-NUM_EPISODES = 5
-FPS = 30
-EPISODE_TIME_SEC = 60
-RESET_TIME_SEC = 10
-TASK_DESCRIPTION = "My task description"
-
-# Create the robot and teleoperator configurations
-camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
-robot_config = SO100FollowerConfig(
-    port="/dev/tty.usbmodem58760434471", id="my_awesome_follower_arm", cameras=camera_config
-)
-teleop_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm")
-
-# Initialize the robot and teleoperator
-robot = SO100Follower(robot_config)
-teleop = SO100Leader(teleop_config)
-
-# Configure the dataset features
-action_features = hw_to_dataset_features(robot.action_features, "action")
-obs_features = hw_to_dataset_features(robot.observation_features, "observation")
-dataset_features = {**action_features, **obs_features}
-
-# Create the dataset
-dataset = LeRobotDataset.create(
-    repo_id="<hf_username>/<dataset_repo_id>",
-    fps=FPS,
-    features=dataset_features,
-    robot_type=robot.name,
-    use_videos=True,
-    image_writer_threads=4,
-)
-
-# Initialize the keyboard listener and rerun visualization
-_, events = init_keyboard_listener()
-_init_rerun(session_name="recording")
-
-# Connect the robot and teleoperator
-robot.connect()
-teleop.connect()
-
-episode_idx = 0
-while episode_idx < NUM_EPISODES and not events["stop_recording"]:
-    log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
-
-    record_loop(
-        robot=robot,
-        events=events,
-        fps=FPS,
-        teleop=teleop,
-        dataset=dataset,
-        control_time_s=EPISODE_TIME_SEC,
-        single_task=TASK_DESCRIPTION,
-        display_data=True,
-    )
-
-    # Reset the environment if not stopping or re-recording
-    if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]):
-        log_say("Reset the environment")
-        record_loop(
-            robot=robot,
-            events=events,
-            fps=FPS,
-            teleop=teleop,
-            control_time_s=RESET_TIME_SEC,
-            single_task=TASK_DESCRIPTION,
-            display_data=True,
-        )
-
-    if events["rerecord_episode"]:
-        log_say("Re-recording episode")
-        events["rerecord_episode"] = False
-        events["exit_early"] = False
-        dataset.clear_episode_buffer()
-        continue
-
-    dataset.save_episode()
-    episode_idx += 1
-
-# Clean up
-log_say("Stop recording")
-robot.disconnect()
-teleop.disconnect()
-dataset.push_to_hub()
-```
-</hfoption>
-</hfoptions>

 #### Dataset upload
 Locally, your dataset is stored in this folder: `~/.cache/huggingface/lerobot/{repo-id}`. At the end of data recording, your dataset will be uploaded on your Hugging Face page (e.g. https://huggingface.co/datasets/cadene/so101_test) that you can obtain by running:
@@ -293,7 +190,7 @@ The `record` function provides a suite of tools for capturing and managing data

 ##### 2. Checkpointing and Resuming
 - Checkpoints are automatically created during recording.
- If an issue occurs, you can resume by re-running the same command with `--resume=true`.
+- If an issue occurs, you can resume by re-running the same command with `--control.resume=true`.
 - To start recording from scratch, **manually delete** the dataset directory.

 ##### 3. Recording Parameters
@@ -336,10 +233,7 @@ echo ${HF_USER}/so101_test

 A useful feature is the `replay` function, which allows you to replay any episode that you've recorded or episodes from any dataset out there. This function helps you test the repeatability of your robot's actions and assess transferability across robots of the same model.

-You can replay the first episode on your robot with either the command below or with the API example:
-
-<hfoptions id="replay">
-<hfoption id="Command">
+You can replay the first episode on your robot with:
 ```bash
 python -m lerobot.replay \
    --robot.type=so101_follower \
@@ -348,62 +242,25 @@ python -m lerobot.replay \
    --dataset.repo_id=${HF_USER}/record-test \
    --dataset.episode=0 # choose the episode you want to replay
 ```
-</hfoption>
-<hfoption id="API example">
-```python
-import time
-
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig
-from lerobot.robots.so100_follower.so100_follower import SO100Follower
-from lerobot.utils.robot_utils import busy_wait
-from lerobot.utils.utils import log_say
-
-episode_idx = 0
-
-robot_config = SO100FollowerConfig(port="/dev/tty.usbmodem58760434471", id="my_awesome_follower_arm")
-
-robot = SO100Follower(robot_config)
-robot.connect()
-
-dataset = LeRobotDataset("<hf_username>/<dataset_repo_id>", episodes=[episode_idx])
-actions = dataset.hf_dataset.select_columns("action")
-
-log_say(f"Replaying episode {episode_idx}")
-for idx in range(dataset.num_frames):
-    t0 = time.perf_counter()
-
-    action = {
-        name: float(actions[idx]["action"][i]) for i, name in enumerate(dataset.features["action"]["names"])
-    }
-    robot.send_action(action)
-
-    busy_wait(1.0 / dataset.fps - (time.perf_counter() - t0))
-
-robot.disconnect()
-```
-</hfoption>
-</hfoptions>

 Your robot should replicate movements similar to those you recorded. For example, check out [this video](https://x.com/RemiCadene/status/1793654950905680090) where we use `replay` on a Aloha robot from [Trossen Robotics](https://www.trossenrobotics.com).

 ## Train a policy

-To train a policy to control your robot, use the [`python -m lerobot.scripts.train`](../src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
+To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
  --dataset.repo_id=${HF_USER}/so101_test \
  --policy.type=act \
  --output_dir=outputs/train/act_so101_test \
  --job_name=act_so101_test \
  --policy.device=cuda \
-  --wandb.enable=true \
-  --policy.repo_id=${HF_USER}/my_policy
+  --wandb.enable=true
 ```

 Let's explain the command:
 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/so101_test`.
-2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../src/lerobot/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
+2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
 4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
 5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.

@@ -411,15 +268,11 @@ Training should take several hours. You will find checkpoints in `outputs/train/

 To resume training from a checkpoint, below is an example command to resume from `last` checkpoint of the `act_so101_test` policy:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
  --config_path=outputs/train/act_so101_test/checkpoints/last/pretrained_model/train_config.json \
  --resume=true
 ```

-If you do not want to push your model to the hub after training use `--policy.push_to_hub=false`.
-
-Additionally you can provide extra `tags` or specify a `license` for your model or make the model repo `private` by adding this: `--policy.private=true --policy.tags=\[ppo,rl\] --policy.license=mit`
-
 #### Train using Collab
 If your local computer doesn't have a powerful GPU you could utilize Google Collab to train your model by following the [ACT training notebook](./notebooks#training-act).

@@ -438,12 +291,9 @@ huggingface-cli upload ${HF_USER}/act_so101_test${CKPT} \
  outputs/train/act_so101_test/checkpoints/${CKPT}/pretrained_model
 ```

-## Run inference and evaluate your policy
+## Evaluate your policy

-You can use the `record` script from [`lerobot/record.py`](https://github.com/huggingface/lerobot/blob/main/lerobot/record.py) with a policy checkpoint as input, to run inference and evaluate your policy. For instance, run this command or API example to run inference and record 10 evaluation episodes:
-
-<hfoptions id="eval">
-<hfoption id="Command">
+You can use the `record` script from [`lerobot/record.py`](https://github.com/huggingface/lerobot/blob/main/lerobot/record.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
 ```bash
 python -m lerobot.record  \
  --robot.type=so100_follower \
@@ -451,7 +301,7 @@ python -m lerobot.record  \
  --robot.cameras="{ up: {type: opencv, index_or_path: /dev/video10, width: 640, height: 480, fps: 30}, side: {type: intelrealsense, serial_number_or_name: 233522074606, width: 640, height: 480, fps: 30}}" \
  --robot.id=my_awesome_follower_arm \
  --display_data=false \
-  --dataset.repo_id=${HF_USER}/eval_so100 \
+  --dataset.repo_id=$HF_USER/eval_so100 \
  --dataset.single_task="Put lego brick into the transparent box" \
  # <- Teleop optional if you want to teleoperate in between episodes \
  # --teleop.type=so100_leader \
@@ -459,82 +309,6 @@ python -m lerobot.record  \
  # --teleop.id=my_awesome_leader_arm \
  --policy.path=${HF_USER}/my_policy
 ```
-</hfoption>
-<hfoption id="API example">
-```python
-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.datasets.utils import hw_to_dataset_features
-from lerobot.policies.act.modeling_act import ACTPolicy
-from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig
-from lerobot.robots.so100_follower.so100_follower import SO100Follower
-from lerobot.utils.control_utils import init_keyboard_listener
-from lerobot.utils.utils import log_say
-from lerobot.utils.visualization_utils import _init_rerun
-from lerobot.record import record_loop
-
-NUM_EPISODES = 5
-FPS = 30
-EPISODE_TIME_SEC = 60
-TASK_DESCRIPTION = "My task description"
-
-# Create the robot configuration
-camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
-robot_config = SO100FollowerConfig(
-    port="/dev/tty.usbmodem58760434471", id="my_awesome_follower_arm", cameras=camera_config
-)
-
-# Initialize the robot
-robot = SO100Follower(robot_config)
-
-# Initialize the policy
-policy = ACTPolicy.from_pretrained("<hf_username>/<my_policy_repo_id>")
-
-# Configure the dataset features
-action_features = hw_to_dataset_features(robot.action_features, "action")
-obs_features = hw_to_dataset_features(robot.observation_features, "observation")
-dataset_features = {**action_features, **obs_features}
-
-# Create the dataset
-dataset = LeRobotDataset.create(
-    repo_id="<hf_username>/eval_<dataset_repo_id>",
-    fps=FPS,
-    features=dataset_features,
-    robot_type=robot.name,
-    use_videos=True,
-    image_writer_threads=4,
-)
-
-# Initialize the keyboard listener and rerun visualization
-_, events = init_keyboard_listener()
-_init_rerun(session_name="recording")
-
-# Connect the robot
-robot.connect()
-
-for episode_idx in range(NUM_EPISODES):
-    log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
-
-    # Run the policy inference loop
-    record_loop(
-        robot=robot,
-        events=events,
-        fps=FPS,
-        policy=policy,
-        dataset=dataset,
-        control_time_s=EPISODE_TIME_SEC,
-        single_task=TASK_DESCRIPTION,
-        display_data=True,
-    )
-
-    dataset.save_episode()
-
-# Clean up
-robot.disconnect()
-dataset.push_to_hub()
-```
-</hfoption>
-</hfoptions>

 As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
 1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with  (e.g. `outputs/train/eval_act_so101_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_so101_test`).
--- a/docs/source/il_sim.mdx
+++ b/docs/source/il_sim.mdx
@@ -35,14 +35,14 @@ Then we can run this command to start:
 <hfoption id="Linux">

 ```bash
-python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json
+python lerobot/scripts/rl/gym_manipulator.py --config_path path/to/env_config_gym_hil_il.json
 ```

 </hfoption>
 <hfoption id="MacOS">

 ```bash
-mjpython -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json
+mjpython lerobot/scripts/rl/gym_manipulator.py --config_path path/to/env_config_gym_hil_il.json
 ```

 </hfoption>
@@ -81,9 +81,9 @@ If you uploaded your dataset to the hub you can [visualize your dataset online](

 ## Train a policy

-To train a policy to control your robot, use the [`python -m lerobot.scripts.train`](../src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
+To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
  --dataset.repo_id=${HF_USER}/il_gym \
  --policy.type=act \
  --output_dir=outputs/train/il_sim_test \
@@ -94,7 +94,7 @@ python -m lerobot.scripts.train \

 Let's explain the command:
 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/il_gym`.
-2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../src/lerobot/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
+2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
 4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
 5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.

@@ -130,14 +130,14 @@ Then you can run this command to visualize your trained policy
 <hfoption id="Linux">

 ```bash
-python -m lerobot.scripts.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json
+python lerobot/scripts/rl/eval_policy.py --config_path=path/to/eval_config_gym_hil.json
 ```

 </hfoption>
 <hfoption id="MacOS">

 ```bash
-mjpython -m lerobot.scripts.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json
+mjpython lerobot/scripts/rl/eval_policy.py --config_path=path/to/eval_config_gym_hil.json
 ```

 </hfoption>
--- a/docs/source/integrate_hardware.mdx
+++ b/docs/source/integrate_hardware.mdx
@@ -2,7 +2,7 @@

 This tutorial will explain how to integrate your own robot design into the LeRobot ecosystem and have it access all of our tools (data collection, control pipelines, policy training and inference).

-To that end, we provide the [`Robot`](https://github.com/huggingface/lerobot/blob/main/lerobot/robots/robot.py) base class in the LeRobot which specifies a standard interface for physical robot integration. Let's see how to implement it.
+To that end, we provide the [`Robot`](https://github.com/huggingface/lerobot/blob/main/lerobot/common/robots/robot.py) base class in the LeRobot which specifies a standard interface for physical robot integration. Let's see how to implement it.

 ## Prerequisites

@@ -14,11 +14,11 @@ To that end, we provide the [`Robot`](https://github.com/huggingface/lerobot/blo

 If you're using Feetech or Dynamixel motors, LeRobot provides built-in bus interfaces:

- [`FeetechMotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/feetech/feetech.py) – for controlling Feetech servos
- [`DynamixelMotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/dynamixel/dynamixel.py) – for controlling Dynamixel servos
+- [`FeetechMotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/common/motors/feetech/feetech.py) – for controlling Feetech servos
+- [`DynamixelMotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/common/motors/dynamixel/dynamixel.py) – for controlling Dynamixel servos

-Please refer to the [`MotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/motors_bus.py) abstract class to learn about its API.
-For a good example of how it can be used, you can have a look at our own [SO101 follower implementation](https://github.com/huggingface/lerobot/blob/main/lerobot/robots/so101_follower/so101_follower.py)
+Please refer to the [`MotorsBus`](https://github.com/huggingface/lerobot/blob/main/lerobot/common/motors/motors_bus.py) abstract class to learn about its API.
+For a good example of how it can be used, you can have a look at our own [SO101 follower implementation](https://github.com/huggingface/lerobot/blob/main/lerobot/common/robots/so101_follower/so101_follower.py)

 Use these if compatible. Otherwise, you'll need to find or write a Python interface (not covered in this tutorial):
 - Find an existing SDK in Python (or use bindings to C/C++)
@@ -32,7 +32,7 @@ For Feetech and Dynamixel, we currently support these servos:
        - SCS series (protocol 1): `scs0009`
    - Dynamixel (protocol 2.0 only): `xl330-m077`, `xl330-m288`, `xl430-w250`, `xm430-w350`, `xm540-w270`, `xc430-w150`

-If you are using Feetech or Dynamixel servos that are not in this list, you can add those in the [Feetech table](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/feetech/tables.py) or [Dynamixel table](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/dynamixel/tables.py). Depending on the model, this will require you to add model-specific information. In most cases though, there shouldn't be a lot of additions to do.
+If you are using Feetech or Dynamixel servos that are not in this list, you can add those in the [Feetech table](https://github.com/huggingface/lerobot/blob/main/lerobot/common/motors/feetech/tables.py) or [Dynamixel table](https://github.com/huggingface/lerobot/blob/main/lerobot/common/motors/dynamixel/tables.py). Depending on the model, this will require you to add model-specific information. In most cases though, there shouldn't be a lot of additions to do.

 In the next sections, we'll use a `FeetechMotorsBus` as the motors interface for the examples. Replace it and adapt to your motors if necessary.

@@ -44,9 +44,9 @@ Here, we'll add the port name and one camera by default for our robot:
 ```python
 from dataclasses import dataclass, field

-from lerobot.cameras import CameraConfig
-from lerobot.cameras.opencv import OpenCVCameraConfig
-from lerobot.robots import RobotConfig
+from lerobot.common.cameras import CameraConfig
+from lerobot.common.cameras.opencv import OpenCVCameraConfig
+from lerobot.common.robots import RobotConfig


@RobotConfig.register_subclass("my_cool_robot")
@@ -72,10 +72,10 @@ Next, we'll create our actual robot class which inherits from `Robot`. This abst
 Here we'll create a simple 5-DoF robot with one camera. It could be a simple arm but notice that the `Robot` abstract class does not assume anything on your robot's form factor. You can let you imagination run wild when designing new robots!

 ```python
-from lerobot.cameras import make_cameras_from_configs
-from lerobot.motors import Motor, MotorNormMode
-from lerobot.motors.feetech import FeetechMotorsBus
-from lerobot.robots import Robot
+from lerobot.common.cameras import make_cameras_from_configs
+from lerobot.common.motors import Motor, MotorNormMode
+from lerobot.common.motors.feetech import FeetechMotorsBus
+from lerobot.common.robots import Robot

 class MyCoolRobot(Robot):
    config_class = MyCoolRobotConfig
@@ -303,7 +303,7 @@ def send_action(self, action: dict[str, Any]) -> dict[str, Any]:

 ## Adding a Teleoperator

-For implementing teleoperation devices, we also provide a [`Teleoperator`](https://github.com/huggingface/lerobot/blob/main/lerobot/teleoperators/teleoperator.py) base class. This class is very similar to the `Robot` base class and also doesn't assume anything on form factor.
+For implementing teleoperation devices, we also provide a [`Teleoperator`](https://github.com/huggingface/lerobot/blob/main/lerobot/common/teleoperators/teleoperator.py) base class. This class is very similar to the `Robot` base class and also doesn't assume anything on form factor.

 The main differences are in the I/O functions: a teleoperator allows you to produce action via `get_action` and can receive feedback actions via `send_feedback`. Feedback could be anything controllable on the teleoperation device that could help the person controlling it understand the consequences of the actions sent. Think motion/force feedback on a leader arm, vibrations on a gamepad controller for example. To implement a teleoperator, you can follow this same tutorial and adapt it for these two methods.

--- a/docs/source/koch.mdx
+++ b/docs/source/koch.mdx
@@ -1 +1 @@
-../../src/lerobot/robots/koch_follower/koch.mdx
+../../lerobot/common/robots/koch_follower/koch.mdx
--- a/docs/source/lekiwi.mdx
+++ b/docs/source/lekiwi.mdx
@@ -1 +1 @@
-../../src/lerobot/robots/lekiwi/lekiwi.mdx
+../../lerobot/common/robots/lekiwi/lekiwi.mdx
--- a/docs/source/smolvla.mdx
+++ b/docs/source/smolvla.mdx
@@ -44,7 +44,7 @@ If you don't have a gpu device, you can train using our notebook on [![Google Co
 Pass your dataset to the training script using `--dataset.repo_id`. If you want to test your installation, run the following command where we use one of the datasets we collected for the [SmolVLA Paper](https://huggingface.co/papers/2506.01844).

 ```bash
-cd lerobot && python -m lerobot.scripts.train \
+cd lerobot && python lerobot/scripts/train.py \
  --policy.path=lerobot/smolvla_base \
  --dataset.repo_id=${HF_USER}/mydataset \
  --batch_size=64 \
@@ -62,7 +62,7 @@ You can start with a small batch size and increase it incrementally, if the GPU
 Fine-tuning is an art. For a complete overview of the options for finetuning, run

 ```bash
-python -m lerobot.scripts.train --help
+python lerobot/scripts/train.py --help
 ```

 <p align="center">
--- a/docs/source/so100.mdx
+++ b/docs/source/so100.mdx
@@ -1 +1 @@
-../../src/lerobot/robots/so100_follower/so100.mdx
+../../lerobot/common/robots/so100_follower/so100.mdx
--- a/docs/source/so101.mdx
+++ b/docs/source/so101.mdx
@@ -1 +1 @@
-../../src/lerobot/robots/so101_follower/so101.mdx
+../../lerobot/common/robots/so101_follower/so101.mdx
--- a/examples/1_load_lerobot_dataset.py
+++ b/examples/1_load_lerobot_dataset.py
@@ -32,7 +32,7 @@ import torch
 from huggingface_hub import HfApi

 import lerobot
-from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata

 # We ported a number of existing datasets ourselves, use this to see the list:
 print("List of available datasets:")
--- a/examples/2_evaluate_pretrained_policy.py
+++ b/examples/2_evaluate_pretrained_policy.py
@@ -30,7 +30,7 @@ import imageio
 import numpy
 import torch

-from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
+from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy

 # Create a directory to store the video of the evaluation
 output_directory = Path("outputs/eval/example_pusht_diffusion")
--- a/examples/3_train_policy.py
+++ b/examples/3_train_policy.py
@@ -22,74 +22,80 @@ from pathlib import Path

 import torch

+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
+from lerobot.common.datasets.utils import dataset_to_policy_features
+from lerobot.common.policies.smolvla.configuration_smolvla import SmolVLAConfig
+from lerobot.common.policies.smolvla.modeling_smolvla import SmolVLAPolicy
 from lerobot.configs.types import FeatureType
-from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
-from lerobot.datasets.utils import dataset_to_policy_features
-from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
-from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy

+def inject_normalization_stats(policy, dataset):
+    """Manually loads normalization stats from the dataset into the policy's state dictionary."""
+    stats = dataset.meta.stats
+    pol_state_dict = policy.state_dict()
+
+    keys_to_update = {
+        "normalize_inputs.buffer_observation_state.mean": ("observation.state", "mean"),
+        "normalize_inputs.buffer_observation_state.std": ("observation.state", "std"),
+        "normalize_targets.buffer_action.mean": ("action", "mean"),
+        "normalize_targets.buffer_action.std": ("action", "std"),
+        "unnormalize_outputs.buffer_action.mean": ("action", "mean"),
+        "unnormalize_outputs.buffer_action.std": ("action", "std"),
+    }
+
+    for pol_key, (stat_key, stat_type) in keys_to_update.items():
+        pol_state_dict[pol_key] = torch.from_numpy(stats[stat_key][stat_type])
+
+    policy.load_state_dict(pol_state_dict)
+    print("Normalization stats injected into the policy.")
+
+def prepare_batch(batch, device):
+    """
+    Prepares a batch of samples from the dataset for inference.
+    This involves moving tensors to the correct device,
+    and remapping image keys to match the policy's expectations.
+    """
+    batch = {
+        "observation.state": batch["observation.state"].to(device),
+        "observation.image": batch["observation.images.top"].to(device),
+        "observation.image2": batch["observation.images.wrist"].to(device),
+        "action": batch["action"].to(device),
+        "task": batch["task"],
+    }
+    return batch

 def main():
    # Create a directory to store the training checkpoint.
-    output_directory = Path("outputs/train/example_pusht_diffusion")
+    output_directory = Path("outputs/train/smolvlaplus_training")
    output_directory.mkdir(parents=True, exist_ok=True)

    # # Select your device
-    device = torch.device("cuda")
+    device = torch.device("mps")

    # Number of offline training steps (we'll only do offline training for this example.)
    # Adjust as you prefer. 5000 steps are needed to get something worth evaluating.
-    training_steps = 5000
+    training_steps = 10_000
    log_freq = 1
+    batch_size = 32

    # When starting from scratch (i.e. not from a pretrained policy), we need to specify 2 things before
    # creating the policy:
    #   - input/output shapes: to properly size the policy
    #   - dataset stats: for normalization and denormalization of input/outputs
-    dataset_metadata = LeRobotDatasetMetadata("lerobot/pusht")
-    features = dataset_to_policy_features(dataset_metadata.features)
-    output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION}
-    input_features = {key: ft for key, ft in features.items() if key not in output_features}
+    dataset = LeRobotDataset("lerobot/svla_so100_stacking")
+    policy = SmolVLAPolicy.from_pretrained("lerobot/smolvla_base")

-    # Policies are initialized with a configuration class, in this case `DiffusionConfig`. For this example,
-    # we'll just use the defaults and so no arguments other than input/output features need to be passed.
-    cfg = DiffusionConfig(input_features=input_features, output_features=output_features)
-
-    # We can now instantiate our policy with this config and the dataset stats.
-    policy = DiffusionPolicy(cfg, dataset_stats=dataset_metadata.stats)
+    # fix absence of normalization stats in the policy
+    inject_normalization_stats(policy, dataset)
    policy.train()
    policy.to(device)

-    # Another policy-dataset interaction is with the delta_timestamps. Each policy expects a given number frames
-    # which can differ for inputs, outputs and rewards (if there are some).
-    delta_timestamps = {
-        "observation.image": [i / dataset_metadata.fps for i in cfg.observation_delta_indices],
-        "observation.state": [i / dataset_metadata.fps for i in cfg.observation_delta_indices],
-        "action": [i / dataset_metadata.fps for i in cfg.action_delta_indices],
-    }
-
-    # In this case with the standard configuration for Diffusion Policy, it is equivalent to this:
-    delta_timestamps = {
-        # Load the previous image and state at -0.1 seconds before current frame,
-        # then load current image and state corresponding to 0.0 second.
-        "observation.image": [-0.1, 0.0],
-        "observation.state": [-0.1, 0.0],
-        # Load the previous action (-0.1), the next action to be executed (0.0),
-        # and 14 future actions with a 0.1 seconds spacing. All these actions will be
-        # used to supervise the policy.
-        "action": [-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4],
-    }
-
-    # We can then instantiate the dataset with these delta_timestamps configuration.
-    dataset = LeRobotDataset("lerobot/pusht", delta_timestamps=delta_timestamps)
-
    # Then we create our optimizer and dataloader for offline training.
-    optimizer = torch.optim.Adam(policy.parameters(), lr=1e-4)
+    optimizer = torch.optim.AdamW(policy.parameters(), lr=3e-4)
    dataloader = torch.utils.data.DataLoader(
        dataset,
        num_workers=4,
-        batch_size=64,
-        shuffle=True,
+        batch_size=batch_size,
+        shuffle=False,
        pin_memory=device.type != "cpu",
        drop_last=True,
    )
@@ -99,7 +105,7 @@ def main():
    done = False
    while not done:
        for batch in dataloader:
-            batch = {k: (v.to(device) if isinstance(v, torch.Tensor) else v) for k, v in batch.items()}
+            batch = prepare_batch(batch, device)
            loss, _ = policy.forward(batch)
            loss.backward()
            optimizer.step()
--- a/examples/4_train_policy_with_script.md
+++ b/examples/4_train_policy_with_script.md
@@ -4,7 +4,7 @@ This tutorial will explain the training script, how to use it, and particularly

 ## The training script

-LeRobot offers a training script at [`lerobot/scripts/train.py`](../src/lerobot/scripts/train.py). At a high level it does the following:
+LeRobot offers a training script at [`lerobot/scripts/train.py`](../lerobot/scripts/train.py). At a high level it does the following:

 - Initialize/load a configuration for the following steps using.
 - Instantiates a dataset.
@@ -21,7 +21,7 @@ In the training script, the main function `train` expects a `TrainPipelineConfig
 def train(cfg: TrainPipelineConfig):
 ```

-You can inspect the `TrainPipelineConfig` defined in [`lerobot/configs/train.py`](../src/lerobot/configs/train.py) (which is heavily commented and meant to be a reference to understand any option)
+You can inspect the `TrainPipelineConfig` defined in [`lerobot/configs/train.py`](../lerobot/configs/train.py) (which is heavily commented and meant to be a reference to understand any option)

 When running the script, inputs for the command line are parsed thanks to the `@parser.wrap()` decorator and an instance of this class is automatically generated. Under the hood, this is done with [Draccus](https://github.com/dlwh/draccus) which is a tool dedicated to this purpose. If you're familiar with Hydra, Draccus can similarly load configurations from config files (.json, .yaml) and also override their values through command line inputs. Unlike Hydra, these configurations are pre-defined in the code through dataclasses rather than being defined entirely in config files. This allows for more rigorous serialization/deserialization, typing, and to manipulate configuration as objects directly in the code and not as dictionaries or namespaces (which enables nice features in an IDE such as autocomplete, jump-to-def, etc.)

@@ -50,9 +50,9 @@ By default, every field takes its default value specified in the dataclass. If a

 ## Specifying values from the CLI

-Let's say that we want to train [Diffusion Policy](../src/lerobot/policies/diffusion) on the [pusht](https://huggingface.co/datasets/lerobot/pusht) dataset, using the [gym_pusht](https://github.com/huggingface/gym-pusht) environment for evaluation. The command to do so would look like this:
+Let's say that we want to train [Diffusion Policy](../lerobot/common/policies/diffusion) on the [pusht](https://huggingface.co/datasets/lerobot/pusht) dataset, using the [gym_pusht](https://github.com/huggingface/gym-pusht) environment for evaluation. The command to do so would look like this:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --dataset.repo_id=lerobot/pusht \
    --policy.type=diffusion \
    --env.type=pusht
@@ -60,12 +60,12 @@ python -m lerobot.scripts.train \

 Let's break this down:
 - To specify the dataset, we just need to specify its `repo_id` on the hub which is the only required argument in the `DatasetConfig`. The rest of the fields have default values and in this case we are fine with those so we can just add the option `--dataset.repo_id=lerobot/pusht`.
- To specify the policy, we can just select diffusion policy using `--policy` appended with `.type`. Here, `.type` is a special argument which allows us to select config classes inheriting from `draccus.ChoiceRegistry` and that have been decorated with the `register_subclass()` method. To have a better explanation of this feature, have a look at this [Draccus demo](https://github.com/dlwh/draccus?tab=readme-ov-file#more-flexible-configuration-with-choice-types). In our code, we use this mechanism mainly to select policies, environments, robots, and some other components like optimizers. The policies available to select are located in [lerobot/policies](../src/lerobot/policies)
- Similarly, we select the environment with `--env.type=pusht`. The different environment configs are available in [`lerobot/envs/configs.py`](../src/lerobot/envs/configs.py)
+- To specify the policy, we can just select diffusion policy using `--policy` appended with `.type`. Here, `.type` is a special argument which allows us to select config classes inheriting from `draccus.ChoiceRegistry` and that have been decorated with the `register_subclass()` method. To have a better explanation of this feature, have a look at this [Draccus demo](https://github.com/dlwh/draccus?tab=readme-ov-file#more-flexible-configuration-with-choice-types). In our code, we use this mechanism mainly to select policies, environments, robots, and some other components like optimizers. The policies available to select are located in [lerobot/common/policies](../lerobot/common/policies)
+- Similarly, we select the environment with `--env.type=pusht`. The different environment configs are available in [`lerobot/common/envs/configs.py`](../lerobot/common/envs/configs.py)

-Let's see another example. Let's say you've been training [ACT](../src/lerobot/policies/act) on [lerobot/aloha_sim_insertion_human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human) using the [gym-aloha](https://github.com/huggingface/gym-aloha) environment for evaluation with:
+Let's see another example. Let's say you've been training [ACT](../lerobot/common/policies/act) on [lerobot/aloha_sim_insertion_human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human) using the [gym-aloha](https://github.com/huggingface/gym-aloha) environment for evaluation with:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --policy.type=act \
    --dataset.repo_id=lerobot/aloha_sim_insertion_human \
    --env.type=aloha \
@@ -74,9 +74,9 @@ python -m lerobot.scripts.train \
 > Notice we added `--output_dir` to explicitly tell where to write outputs from this run (checkpoints, training state, configs etc.). This is not mandatory and if you don't specify it, a default directory will be created from the current date and time, env.type and policy.type. This will typically look like `outputs/train/2025-01-24/16-10-05_aloha_act`.

 We now want to train a different policy for aloha on another task. We'll change the dataset and use [lerobot/aloha_sim_transfer_cube_human](https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_human) instead. Of course, we also need to change the task of the environment as well to match this other task.
-Looking at the [`AlohaEnv`](../src/lerobot/envs/configs.py) config, the task is `"AlohaInsertion-v0"` by default, which corresponds to the task we trained on in the command above. The [gym-aloha](https://github.com/huggingface/gym-aloha?tab=readme-ov-file#description) environment also has the `AlohaTransferCube-v0` task which corresponds to this other task we want to train on. Putting this together, we can train this new policy on this different task using:
+Looking at the [`AlohaEnv`](../lerobot/common/envs/configs.py) config, the task is `"AlohaInsertion-v0"` by default, which corresponds to the task we trained on in the command above. The [gym-aloha](https://github.com/huggingface/gym-aloha?tab=readme-ov-file#description) environment also has the `AlohaTransferCube-v0` task which corresponds to this other task we want to train on. Putting this together, we can train this new policy on this different task using:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --policy.type=act \
    --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
    --env.type=aloha \
@@ -111,7 +111,7 @@ Now, let's assume that we want to reproduce the run just above. That run has pro

 We can then simply load the config values from this file using:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
    --output_dir=outputs/train/act_aloha_transfer_2
 ```
@@ -119,7 +119,7 @@ python -m lerobot.scripts.train \

 Similarly to Hydra, we can still override some parameters in the CLI if we want to, e.g.:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
    --output_dir=outputs/train/act_aloha_transfer_2
    --policy.n_action_steps=80
@@ -128,7 +128,7 @@ python -m lerobot.scripts.train \

 `--config_path` can also accept the repo_id of a repo on the hub that contains a `train_config.json` file, e.g. running:
 ```bash
-python -m lerobot.scripts.train --config_path=lerobot/diffusion_pusht
+python lerobot/scripts/train.py --config_path=lerobot/diffusion_pusht
 ```
 will start a training run with the same configuration used for training [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht)

@@ -139,7 +139,7 @@ Being able to resume a training run is important in case it crashed or aborted f

 Let's reuse the command from the previous run and add a few more options:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --policy.type=act \
    --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
    --env.type=aloha \
@@ -155,7 +155,7 @@ INFO 2025-01-24 16:10:56 ts/train.py:263 Checkpoint policy after step 100
 ```
 Now let's simulate a crash by killing the process (hit `ctrl`+`c`). We can then simply resume this run from the last checkpoint available with:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
    --resume=true
 ```
@@ -164,7 +164,7 @@ You should see from the logging that your training picks up from where it left o
 Another reason for which you might want to resume a run is simply to extend training and add more training steps. The number of training steps is set by the option `--steps`, which is 100 000 by default.
 You could double the number of steps of the previous run with:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
    --resume=true \
    --steps=200000
@@ -195,7 +195,7 @@ In addition to the features currently in Draccus, we've added a special `.path`

 For example, we could fine-tune a [policy pre-trained on the aloha transfer task](https://huggingface.co/lerobot/act_aloha_sim_transfer_cube_human) on the aloha insertion task. We can achieve this with:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --policy.path=lerobot/act_aloha_sim_transfer_cube_human \
    --dataset.repo_id=lerobot/aloha_sim_insertion_human \
    --env.type=aloha \
@@ -236,7 +236,7 @@ We'll summarize here the main use cases to remember from this tutorial.

 #### Train a policy from scratch – CLI
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --policy.type=act \  # <- select 'act' policy
    --env.type=pusht \  # <- select 'pusht' environment
    --dataset.repo_id=lerobot/pusht  # <- train on this dataset
@@ -244,14 +244,14 @@ python -m lerobot.scripts.train \

 #### Train a policy from scratch - config file + CLI
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --config_path=path/to/pretrained_model \  # <- can also be a repo_id
    --policy.n_action_steps=80  # <- you may still override values
 ```

 #### Resume/continue a training run
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --config_path=checkpoint/pretrained_model/ \
    --resume=true \
    --steps=200000  # <- you can change some training parameters
@@ -259,7 +259,7 @@ python -m lerobot.scripts.train \

 #### Fine-tuning
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
    --policy.path=lerobot/act_aloha_sim_transfer_cube_human \  # <- can also be a local path to a checkpoint
    --dataset.repo_id=lerobot/aloha_sim_insertion_human \
    --env.type=aloha \
--- a/examples/advanced/1_add_image_transforms.py
+++ b/examples/advanced/1_add_image_transforms.py
@@ -22,7 +22,7 @@ from pathlib import Path

 from torchvision.transforms import ToPILImage, v2

-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset

 dataset_repo_id = "lerobot/aloha_static_screw_driver"

--- a/examples/advanced/2_calculate_validation_loss.py
+++ b/examples/advanced/2_calculate_validation_loss.py
@@ -26,8 +26,8 @@ import math

 import torch

-from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
-from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
+from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy


 def main():
--- a/examples/backward_compatibility/replay.py
+++ b/examples/backward_compatibility/replay.py
@@ -35,8 +35,8 @@ from pprint import pformat

 import draccus

-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.robots import (  # noqa: F401
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.robots import (  # noqa: F401
    Robot,
    RobotConfig,
    koch_follower,
@@ -44,8 +44,8 @@ from lerobot.robots import (  # noqa: F401
    so100_follower,
    so101_follower,
 )
-from lerobot.utils.robot_utils import busy_wait
-from lerobot.utils.utils import (
+from lerobot.common.utils.robot_utils import busy_wait
+from lerobot.common.utils.utils import (
    init_logging,
    log_say,
 )
--- a/examples/lekiwi/evaluate.py
+++ b/examples/lekiwi/evaluate.py
@@ -1,90 +1,32 @@
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.datasets.utils import hw_to_dataset_features
-from lerobot.policies.act.modeling_act import ACTPolicy
-from lerobot.record import record_loop
-from lerobot.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
-from lerobot.utils.control_utils import init_keyboard_listener
-from lerobot.utils.utils import log_say
-from lerobot.utils.visualization_utils import _init_rerun
+from lerobot.common.datasets.utils import build_dataset_frame, hw_to_dataset_features
+from lerobot.common.policies.act.modeling_act import ACTPolicy
+from lerobot.common.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
+from lerobot.common.utils.control_utils import predict_action
+from lerobot.common.utils.utils import get_safe_torch_device

-NUM_EPISODES = 2
-FPS = 30
-EPISODE_TIME_SEC = 60
-TASK_DESCRIPTION = "My task description"
+NB_CYCLES_CLIENT_CONNECTION = 1000

-# Create the robot and teleoperator configurations
 robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
 robot = LeKiwiClient(robot_config)

-policy = ACTPolicy.from_pretrained("<hf_username>/<policy_repo_id>")
-
-# Configure the dataset features
-action_features = hw_to_dataset_features(robot.action_features, "action")
-obs_features = hw_to_dataset_features(robot.observation_features, "observation")
-dataset_features = {**action_features, **obs_features}
-
-# Create the dataset
-dataset = LeRobotDataset.create(
-    repo_id="<hf_username>/<eval_dataset_repo_id>",
-    fps=FPS,
-    features=dataset_features,
-    robot_type=robot.name,
-    use_videos=True,
-    image_writer_threads=4,
-)
-
-# To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi`
 robot.connect()

-_init_rerun(session_name="recording")
+policy = ACTPolicy.from_pretrained("pepijn223/act_lekiwi_circle")
+policy.reset()

-listener, events = init_keyboard_listener()
+obs_features = hw_to_dataset_features(robot.observation_features, "observation")

-if not robot.is_connected:
-    raise ValueError("Robot is not connected!")
+print("Running inference")
+i = 0
+while i < NB_CYCLES_CLIENT_CONNECTION:
+    obs = robot.get_observation()

-recorded_episodes = 0
-while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
-    log_say(f"Running inference, recording eval episode {recorded_episodes} of {NUM_EPISODES}")
-
-    # Run the policy inference loop
-    record_loop(
-        robot=robot,
-        events=events,
-        fps=FPS,
-        policy=policy,
-        dataset=dataset,
-        control_time_s=EPISODE_TIME_SEC,
-        single_task=TASK_DESCRIPTION,
-        display_data=True,
+    observation_frame = build_dataset_frame(obs_features, obs, prefix="observation")
+    action_values = predict_action(
+        observation_frame, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp
    )
-
-    # Logic for reset env
-    if not events["stop_recording"] and (
-        (recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
-    ):
-        log_say("Reset the environment")
-        record_loop(
-            robot=robot,
-            events=events,
-            fps=FPS,
-            control_time_s=EPISODE_TIME_SEC,
-            single_task=TASK_DESCRIPTION,
-            display_data=True,
-        )
-
-    if events["rerecord_episode"]:
-        log_say("Re-record episode")
-        events["rerecord_episode"] = False
-        events["exit_early"] = False
-        dataset.clear_episode_buffer()
-        continue
-
-    dataset.save_episode()
-    recorded_episodes += 1
-
-# Upload to hub and clean up
-dataset.push_to_hub()
+    action = {key: action_values[i].item() for i, key in enumerate(robot.action_features)}
+    robot.send_action(action)
+    i += 1

 robot.disconnect()
-listener.stop()
--- a/examples/lekiwi/record.py
+++ b/examples/lekiwi/record.py
@@ -1,101 +1,67 @@
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.datasets.utils import hw_to_dataset_features
-from lerobot.record import record_loop
-from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
-from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
-from lerobot.teleoperators.keyboard import KeyboardTeleop, KeyboardTeleopConfig
-from lerobot.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
-from lerobot.utils.control_utils import init_keyboard_listener
-from lerobot.utils.utils import log_say
-from lerobot.utils.visualization_utils import _init_rerun
+import time

-NUM_EPISODES = 3
-FPS = 30
-EPISODE_TIME_SEC = 30
-RESET_TIME_SEC = 10
-TASK_DESCRIPTION = "My task description"
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.utils import hw_to_dataset_features
+from lerobot.common.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
+from lerobot.common.robots.lekiwi.lekiwi_client import LeKiwiClient
+from lerobot.common.teleoperators.keyboard import KeyboardTeleop, KeyboardTeleopConfig
+from lerobot.common.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig

-# Create the robot and teleoperator configurations
-robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
-leader_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm")
-keyboard_config = KeyboardTeleopConfig()
+NB_CYCLES_CLIENT_CONNECTION = 250

-robot = LeKiwiClient(robot_config)
+leader_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem58760431551")
 leader_arm = SO100Leader(leader_arm_config)
+
+keyboard_config = KeyboardTeleopConfig()
 keyboard = KeyboardTeleop(keyboard_config)

-# Configure the dataset features
+robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
+robot = LeKiwiClient(robot_config)
+
 action_features = hw_to_dataset_features(robot.action_features, "action")
 obs_features = hw_to_dataset_features(robot.observation_features, "observation")
 dataset_features = {**action_features, **obs_features}

-# Create the dataset
 dataset = LeRobotDataset.create(
-    repo_id="<hf_username>/<dataset_repo_id>",
-    fps=FPS,
+    repo_id="pepijn223/lekiwi" + str(int(time.time())),
+    fps=10,
    features=dataset_features,
    robot_type=robot.name,
-    use_videos=True,
-    image_writer_threads=4,
 )

-# To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi`
-robot.connect()
 leader_arm.connect()
 keyboard.connect()
-
-_init_rerun(session_name="lekiwi_record")
-
-listener, events = init_keyboard_listener()
+robot.connect()

 if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
-    raise ValueError("Robot, leader arm of keyboard is not connected!")
+    exit()

-recorded_episodes = 0
-while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
-    log_say(f"Recording episode {recorded_episodes}")
+print("Starting LeKiwi recording")
+i = 0
+while i < NB_CYCLES_CLIENT_CONNECTION:
+    arm_action = leader_arm.get_action()
+    arm_action = {f"arm_{k}": v for k, v in arm_action.items()}

-    # Run the record loop
-    record_loop(
-        robot=robot,
-        events=events,
-        fps=FPS,
-        dataset=dataset,
-        teleop=[leader_arm, keyboard],
-        control_time_s=EPISODE_TIME_SEC,
-        single_task=TASK_DESCRIPTION,
-        display_data=True,
-    )
+    keyboard_keys = keyboard.get_action()

-    # Logic for reset env
-    if not events["stop_recording"] and (
-        (recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
-    ):
-        log_say("Reset the environment")
-        record_loop(
-            robot=robot,
-            events=events,
-            fps=FPS,
-            teleop=[leader_arm, keyboard],
-            control_time_s=RESET_TIME_SEC,
-            single_task=TASK_DESCRIPTION,
-            display_data=True,
-        )
+    base_action = robot._from_keyboard_to_base_action(keyboard_keys)

-    if events["rerecord_episode"]:
-        log_say("Re-record episode")
-        events["rerecord_episode"] = False
-        events["exit_early"] = False
-        dataset.clear_episode_buffer()
-        continue
+    action = {**arm_action, **base_action} if len(base_action) > 0 else arm_action

-    dataset.save_episode()
-    recorded_episodes += 1
+    action_sent = robot.send_action(action)
+    observation = robot.get_observation()

-# Upload to hub and clean up
-dataset.push_to_hub()
+    frame = {**action_sent, **observation}
+    task = "Dummy Example Task Dataset"

+    dataset.add_frame(frame, task)
+    i += 1
+
+print("Disconnecting Teleop Devices and LeKiwi Client")
 robot.disconnect()
 leader_arm.disconnect()
 keyboard.disconnect()
-listener.stop()
+
+print("Uploading dataset to the hub")
+dataset.save_episode()
+dataset.push_to_hub()
--- a/examples/lekiwi/replay.py
+++ b/examples/lekiwi/replay.py
@@ -1,33 +1,25 @@
 import time

-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
-from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
-from lerobot.utils.robot_utils import busy_wait
-from lerobot.utils.utils import log_say
-
-EPISODE_IDX = 0
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
+from lerobot.common.robots.lekiwi.lekiwi_client import LeKiwiClient
+from lerobot.common.utils.robot_utils import busy_wait

 robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
 robot = LeKiwiClient(robot_config)

-dataset = LeRobotDataset("<hf_username>/<dataset_repo_id>", episodes=[EPISODE_IDX])
-actions = dataset.hf_dataset.select_columns("action")
+dataset = LeRobotDataset("pepijn223/lekiwi1749025613", episodes=[0])

 robot.connect()

-if not robot.is_connected:
-    raise ValueError("Robot is not connected!")
-
-log_say(f"Replaying episode {EPISODE_IDX}")
-for idx in range(dataset.num_frames):
+print("Replaying episode…")
+for _, action_array in enumerate(dataset.hf_dataset["action"]):
    t0 = time.perf_counter()

-    action = {
-        name: float(actions[idx]["action"][i]) for i, name in enumerate(dataset.features["action"]["names"])
-    }
+    action = {name: float(action_array[i]) for i, name in enumerate(dataset.features["action"]["names"])}
    robot.send_action(action)

    busy_wait(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))

+print("Disconnecting LeKiwi Client")
 robot.disconnect()
--- a/examples/lekiwi/teleoperate.py
+++ b/examples/lekiwi/teleoperate.py
@@ -1,47 +1,32 @@
-import time
+from lerobot.common.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
+from lerobot.common.teleoperators.keyboard.teleop_keyboard import KeyboardTeleop, KeyboardTeleopConfig
+from lerobot.common.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig

-from lerobot.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
-from lerobot.teleoperators.keyboard.teleop_keyboard import KeyboardTeleop, KeyboardTeleopConfig
-from lerobot.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
-from lerobot.utils.robot_utils import busy_wait
-from lerobot.utils.visualization_utils import _init_rerun, log_rerun_data
-
-FPS = 30
-
-# Create the robot and teleoperator configurations
 robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="my_lekiwi")
-teleop_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm")
-keyboard_config = KeyboardTeleopConfig(id="my_laptop_keyboard")
+
+teleop__arm_config = SO100LeaderConfig(
+    port="/dev/tty.usbmodem58760431551",
+    id="my_awesome_leader_arm",
+)
+
+teleop_keyboard_config = KeyboardTeleopConfig(
+    id="my_laptop_keyboard",
+)

 robot = LeKiwiClient(robot_config)
-leader_arm = SO100Leader(teleop_arm_config)
-keyboard = KeyboardTeleop(keyboard_config)
-
-# To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi`
+teleop_arm = SO100Leader(teleop__arm_config)
+telep_keyboard = KeyboardTeleop(teleop_keyboard_config)
 robot.connect()
-leader_arm.connect()
-keyboard.connect()
-
-_init_rerun(session_name="lekiwi_teleop")
-
-if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
-    raise ValueError("Robot, leader arm of keyboard is not connected!")
+teleop_arm.connect()
+telep_keyboard.connect()

 while True:
-    t0 = time.perf_counter()
-
    observation = robot.get_observation()

-    arm_action = leader_arm.get_action()
+    arm_action = teleop_arm.get_action()
    arm_action = {f"arm_{k}": v for k, v in arm_action.items()}

-    keyboard_keys = keyboard.get_action()
+    keyboard_keys = telep_keyboard.get_action()
    base_action = robot._from_keyboard_to_base_action(keyboard_keys)

-    log_rerun_data(observation, {**arm_action, **base_action})
-
-    action = {**arm_action, **base_action} if len(base_action) > 0 else arm_action
-
-    robot.send_action(action)
-
-    busy_wait(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
+    robot.send_action(arm_action | base_action)
--- a/src/lerobot/init.py
+++ b/src/lerobot/init.py
@@ -167,10 +167,10 @@ available_datasets = sorted(
    set(itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets))
 )

-# lists all available policies from `lerobot/policies`
+# lists all available policies from `lerobot/common/policies`
 available_policies = ["act", "diffusion", "tdmpc", "vqbet"]

-# lists all available robots from `lerobot/robot_devices/robots`
+# lists all available robots from `lerobot/common/robot_devices/robots`
 available_robots = [
    "koch",
    "koch_bimanual",
@@ -179,13 +179,13 @@ available_robots = [
    "so101",
 ]

-# lists all available cameras from `lerobot/robot_devices/cameras`
+# lists all available cameras from `lerobot/common/robot_devices/cameras`
 available_cameras = [
    "opencv",
    "intelrealsense",
 ]

-# lists all available motors from `lerobot/robot_devices/motors`
+# lists all available motors from `lerobot/common/robot_devices/motors`
 available_motors = [
    "dynamixel",
    "feetech",
--- a/src/lerobot/version.py
+++ b/src/lerobot/version.py
--- a/src/lerobot/calibrate.py
+++ b/src/lerobot/calibrate.py
@@ -31,9 +31,9 @@ from pprint import pformat

 import draccus

-from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
-from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
-from lerobot.robots import (  # noqa: F401
+from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
+from lerobot.common.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
+from lerobot.common.robots import (  # noqa: F401
    Robot,
    RobotConfig,
    koch_follower,
@@ -42,7 +42,7 @@ from lerobot.robots import (  # noqa: F401
    so100_follower,
    so101_follower,
 )
-from lerobot.teleoperators import (  # noqa: F401
+from lerobot.common.teleoperators import (  # noqa: F401
    Teleoperator,
    TeleoperatorConfig,
    koch_leader,
@@ -50,7 +50,7 @@ from lerobot.teleoperators import (  # noqa: F401
    so100_leader,
    so101_leader,
 )
-from lerobot.utils.utils import init_logging
+from lerobot.common.utils.utils import init_logging


@dataclass
--- a/lerobot/common/cameras/init.py
+++ b/lerobot/common/cameras/init.py
--- a/lerobot/common/cameras/camera.py
+++ b/lerobot/common/cameras/camera.py
--- a/lerobot/common/cameras/configs.py
+++ b/lerobot/common/cameras/configs.py
--- a/lerobot/common/cameras/opencv/init.py
+++ b/lerobot/common/cameras/opencv/init.py
--- a/lerobot/common/cameras/opencv/camera_opencv.py
+++ b/lerobot/common/cameras/opencv/camera_opencv.py
@@ -27,7 +27,7 @@ from typing import Any, Dict, List
 import cv2
 import numpy as np

-from lerobot.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
+from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError

 from ..camera import Camera
 from ..utils import get_cv2_backend, get_cv2_rotation
@@ -64,8 +64,8 @@ class OpenCVCamera(Camera):

    Example:
        ```python
-        from lerobot.cameras.opencv import OpenCVCamera
-        from lerobot.cameras.configuration_opencv import OpenCVCameraConfig, ColorMode, Cv2Rotation
+        from lerobot.common.cameras.opencv import OpenCVCamera
+        from lerobot.common.cameras.configuration_opencv import OpenCVCameraConfig, ColorMode, Cv2Rotation

        # Basic usage with camera index 0
        config = OpenCVCameraConfig(index_or_path=0)
--- a/lerobot/common/cameras/opencv/configuration_opencv.py
+++ b/lerobot/common/cameras/opencv/configuration_opencv.py
--- a/lerobot/common/cameras/realsense/init.py
+++ b/lerobot/common/cameras/realsense/init.py
--- a/lerobot/common/cameras/realsense/camera_realsense.py
+++ b/lerobot/common/cameras/realsense/camera_realsense.py
@@ -29,7 +29,7 @@ try:
 except Exception as e:
    logging.info(f"Could not import realsense: {e}")

-from lerobot.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
+from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError

 from ..camera import Camera
 from ..configs import ColorMode
@@ -63,8 +63,8 @@ class RealSenseCamera(Camera):

    Example:
        ```python
-        from lerobot.cameras.realsense import RealSenseCamera, RealSenseCameraConfig
-        from lerobot.cameras import ColorMode, Cv2Rotation
+        from lerobot.common.cameras.realsense import RealSenseCamera, RealSenseCameraConfig
+        from lerobot.common.cameras import ColorMode, Cv2Rotation

        # Basic usage with serial number
        config = RealSenseCameraConfig(serial_number_or_name="0123456789") # Replace with actual SN
--- a/lerobot/common/cameras/realsense/configuration_realsense.py
+++ b/lerobot/common/cameras/realsense/configuration_realsense.py
--- a/lerobot/common/cameras/utils.py
+++ b/lerobot/common/cameras/utils.py
--- a/lerobot/common/constants.py
+++ b/lerobot/common/constants.py
@@ -22,14 +22,9 @@ OBS_STATE = "observation.state"
 OBS_IMAGE = "observation.image"
 OBS_IMAGES = "observation.images"
 ACTION = "action"
-OBS_IMAGE_2 = "observation.image2"
-OBS_IMAGE_3 = "observation.image3"
-OBS_IMAGE_4 = "observation.image4"
 REWARD = "next.reward"

 ROBOTS = "robots"
-TASK = "task"
-ROBOT_TYPE = "robot_type"
 TELEOPERATORS = "teleoperators"

 # files & directories
--- a/lerobot/common/datasets/backward_compatibility.py
+++ b/lerobot/common/datasets/backward_compatibility.py
@@ -20,7 +20,7 @@ The dataset you requested ({repo_id}) is in {version} format.
 We introduced a new format since v2.0 which is not backward compatible with v1.x.
 Please, use our conversion script. Modify the following command with your own task description:
 ```
-python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \\
+python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \\
    --repo-id {repo_id} \\
    --single-task "TASK DESCRIPTION."  # <---- /!\\ Replace TASK DESCRIPTION /!\\
 ```
@@ -40,7 +40,7 @@ The dataset you requested ({repo_id}) is in {version} format.
 While current version of LeRobot is backward-compatible with it, the version of your dataset still uses global
 stats instead of per-episode stats. Update your dataset stats to the new format using this command:
 ```
-python -m lerobot.datasets.v21.convert_dataset_v20_to_v21 --repo-id={repo_id}
+python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py --repo-id={repo_id}
 ```

 If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb)
--- a/lerobot/common/datasets/card_template.md
+++ b/lerobot/common/datasets/card_template.md
--- a/lerobot/common/datasets/compute_stats.py
+++ b/lerobot/common/datasets/compute_stats.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 import numpy as np

-from lerobot.datasets.utils import load_image_as_numpy
+from lerobot.common.datasets.utils import load_image_as_numpy


 def estimate_num_samples(
@@ -125,30 +125,9 @@ def _assert_type_and_shape(stats_list: list[dict[str, dict]]):

 def aggregate_feature_stats(stats_ft_list: list[dict[str, dict]]) -> dict[str, dict[str, np.ndarray]]:
    """Aggregates stats for a single feature."""
-    # Filter out stats that don't have required keys
-    valid_stats = []
-    for s in stats_ft_list:
-        if all(key in s for key in ["mean", "std", "count", "min", "max"]):
-            valid_stats.append(s)
-        else:
-            # If count is missing, add it with a default value
-            if "count" not in s:
-                s["count"] = np.array([1])  # Default count
-            valid_stats.append(s)
-    
-    if not valid_stats:
-        # If no valid stats, return empty stats
-        return {
-            "min": np.array([0]),
-            "max": np.array([0]),
-            "mean": np.array([0]),
-            "std": np.array([0]),
-            "count": np.array([0]),
-        }
-    
-    means = np.stack([s["mean"] for s in valid_stats])
-    variances = np.stack([s["std"] ** 2 for s in valid_stats])
-    counts = np.stack([s["count"] for s in valid_stats])
+    means = np.stack([s["mean"] for s in stats_ft_list])
+    variances = np.stack([s["std"] ** 2 for s in stats_ft_list])
+    counts = np.stack([s["count"] for s in stats_ft_list])
    total_count = counts.sum(axis=0)

    # Prepare weighted mean by matching number of dimensions
@@ -163,13 +142,12 @@ def aggregate_feature_stats(stats_ft_list: list[dict[str, dict]]) -> dict[str, d
    delta_means = means - total_mean
    weighted_variances = (variances + delta_means**2) * counts
    total_variance = weighted_variances.sum(axis=0) / total_count
-    total_std = np.sqrt(total_variance)

    return {
-        "min": np.min(np.stack([s["min"] for s in valid_stats]), axis=0),
-        "max": np.max(np.stack([s["max"] for s in valid_stats]), axis=0),
+        "min": np.min(np.stack([s["min"] for s in stats_ft_list]), axis=0),
+        "max": np.max(np.stack([s["max"] for s in stats_ft_list]), axis=0),
        "mean": total_mean,
-        "std": total_std,
+        "std": np.sqrt(total_variance),
        "count": total_count,
    }

--- a/lerobot/common/datasets/factory.py
+++ b/lerobot/common/datasets/factory.py
@@ -18,22 +18,20 @@ from pprint import pformat

 import torch

-from lerobot.configs.policies import PreTrainedConfig
-from lerobot.configs.train import TrainPipelineConfig
-from lerobot.datasets.lerobot_dataset import (
+from lerobot.common.datasets.lerobot_dataset import (
    LeRobotDataset,
    LeRobotDatasetMetadata,
    MultiLeRobotDataset,
 )
-from lerobot.datasets.transforms import ImageTransforms
+from lerobot.common.datasets.transforms import ImageTransforms
+from lerobot.configs.policies import PreTrainedConfig
+from lerobot.configs.train import TrainPipelineConfig

 IMAGENET_STATS = {
    "mean": [[[0.485]], [[0.456]], [[0.406]]],  # (c,1,1)
    "std": [[[0.229]], [[0.224]], [[0.225]]],  # (c,1,1)
 }

-from lerobot.datasets.utils_must import EPISODES_DATASET_MAPPING, FEATURE_KEYS_MAPPING
-

 def resolve_delta_timestamps(
    cfg: PreTrainedConfig, ds_meta: LeRobotDatasetMetadata
@@ -83,87 +81,37 @@ def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDatas
    image_transforms = (
        ImageTransforms(cfg.dataset.image_transforms) if cfg.dataset.image_transforms.enable else None
    )
-    if "," in cfg.dataset.repo_id:
-        repo_id = cfg.dataset.repo_id.split(",")
-        repo_id = [r for r in repo_id if r]
-    else:
-        repo_id = cfg.dataset.repo_id
-    sampling_weights = cfg.dataset.sampling_weights.split(",") if cfg.dataset.sampling_weights else None
-    feature_keys_mapping = FEATURE_KEYS_MAPPING
-    if isinstance(repo_id, str):
-        revision = getattr(cfg.dataset, "revision", None)
+
+    if isinstance(cfg.dataset.repo_id, str):
        ds_meta = LeRobotDatasetMetadata(
-            cfg.dataset.repo_id,
-            feature_keys_mapping=feature_keys_mapping,
-            revision=revision,
+            cfg.dataset.repo_id, root=cfg.dataset.root, revision=cfg.dataset.revision
        )
        delta_timestamps = resolve_delta_timestamps(cfg.policy, ds_meta)
        dataset = LeRobotDataset(
            cfg.dataset.repo_id,
-            root=getattr(cfg.dataset, "root", None),
+            root=cfg.dataset.root,
            episodes=cfg.dataset.episodes,
            delta_timestamps=delta_timestamps,
            image_transforms=image_transforms,
-            revision=revision,
+            revision=cfg.dataset.revision,
            video_backend=cfg.dataset.video_backend,
-            download_videos=True,
-            feature_keys_mapping=feature_keys_mapping,
-            max_action_dim=cfg.dataset.max_action_dim,
-            max_state_dim=cfg.dataset.max_state_dim,
-            max_num_images=cfg.dataset.max_num_images,
-            max_image_dim=cfg.dataset.max_image_dim,
        )
    else:
-        delta_timestamps = {}
-        episodes = {}
-        for i in range(len(repo_id)):
-            ds_meta = LeRobotDatasetMetadata(
-                repo_id[i],
-                feature_keys_mapping=feature_keys_mapping,
-            )  # FIXME(mshukor): ?
-            delta_timestamps[repo_id[i]] = resolve_delta_timestamps(cfg.policy, ds_meta)
-            episodes[repo_id[i]] = EPISODES_DATASET_MAPPING.get(repo_id[i], cfg.dataset.episodes)
-        # training_features = TRAINING_FEATURES.get(cfg.dataset.features_version, None)
-        # FIXME: (jadechoghari): check support for training features
-        training_features = None
+        raise NotImplementedError("The MultiLeRobotDataset isn't supported for now.")
        dataset = MultiLeRobotDataset(
-            repo_id,
+            cfg.dataset.repo_id,
            # TODO(aliberts): add proper support for multi dataset
-            episodes=episodes,
-            delta_timestamps=delta_timestamps,
+            # delta_timestamps=delta_timestamps,
            image_transforms=image_transforms,
            video_backend=cfg.dataset.video_backend,
-            download_videos=True,
-            sampling_weights=sampling_weights,
-            feature_keys_mapping=feature_keys_mapping,
-            max_action_dim=cfg.policy.max_action_dim,
-            max_state_dim=cfg.policy.max_state_dim,
-            max_num_images=cfg.dataset.max_num_images,
-            max_image_dim=cfg.dataset.max_image_dim,
-            train_on_all_features=cfg.dataset.train_on_all_features,
-            training_features=training_features,
-            discard_first_n_frames=cfg.dataset.discard_first_n_frames,
-            min_fps=cfg.dataset.min_fps,
-            max_fps=cfg.dataset.max_fps,
-            discard_first_idle_frames=cfg.dataset.discard_first_idle_frames,
-            motion_threshold=cfg.dataset.motion_threshold,
-            motion_window_size=cfg.dataset.motion_window_size,
-            motion_buffer=cfg.dataset.motion_buffer,
        )
        logging.info(
            "Multiple datasets were provided. Applied the following index mapping to the provided datasets: "
            f"{pformat(dataset.repo_id_to_index, indent=2)}"
        )
+
    if cfg.dataset.use_imagenet_stats:
-        # Initialize stats structure if it doesn't exist
-        if dataset.meta.stats is None:
-            dataset.meta.stats = {}
-        
        for key in dataset.meta.camera_keys:
-            # Initialize stats for this camera key if it doesn't exist
-            if key not in dataset.meta.stats or dataset.meta.stats[key] is None:
-                dataset.meta.stats[key] = {}
-                
            for stats_type, stats in IMAGENET_STATS.items():
                dataset.meta.stats[key][stats_type] = torch.tensor(stats, dtype=torch.float32)

--- a/lerobot/common/datasets/image_writer.py
+++ b/lerobot/common/datasets/image_writer.py
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 import contextlib
 import logging
-import os
 import shutil
 from pathlib import Path
 from typing import Callable
@@ -31,18 +30,10 @@ from huggingface_hub import HfApi, snapshot_download
 from huggingface_hub.constants import REPOCARD_NAME
 from huggingface_hub.errors import RevisionNotFoundError

-from lerobot.constants import (
-    ACTION,
-    HF_LEROBOT_HOME,
-    OBS_ENV_STATE,
-    OBS_STATE,
-)
-from lerobot.datasets.compute_stats import (  # aggregate_stats_per_robot_type,
-    aggregate_stats,
-    compute_episode_stats,
-)
-from lerobot.datasets.image_writer import AsyncImageWriter, write_image
-from lerobot.datasets.utils import (
+from lerobot.common.constants import HF_LEROBOT_HOME
+from lerobot.common.datasets.compute_stats import aggregate_stats, compute_episode_stats
+from lerobot.common.datasets.image_writer import AsyncImageWriter, write_image
+from lerobot.common.datasets.utils import (
    DEFAULT_FEATURES,
    DEFAULT_IMAGE_PATH,
    INFO_PATH,
@@ -50,6 +41,7 @@ from lerobot.datasets.utils import (
    _validate_feature_names,
    append_jsonlines,
    backward_compatible_episodes_stats,
+    check_delta_timestamps,
    check_timestamps_sync,
    check_version_compatibility,
    create_empty_dataset_info,
@@ -66,36 +58,14 @@ from lerobot.datasets.utils import (
    load_info,
    load_stats,
    load_tasks,
-    map_dict_keys,
    validate_episode_buffer,
    validate_frame,
    write_episode,
    write_episode_stats,
    write_info,
    write_json,
-    # keep_datasets_with_the_same_features_per_robot_type,
-    # map_dict_pad_keys,
-    # keep_datasets_with_valid_fps,
-    # find_start_of_motion,
 )
-
-# mustafa stuff here
-from lerobot.datasets.utils_must import (
-    OBS_IMAGE,
-    OBS_IMAGE_2,
-    OBS_IMAGE_3,
-    ROBOT_TYPE_KEYS_MAPPING,
-    TASKS_KEYS_MAPPING,
-    aggregate_stats_per_robot_type,
-    create_padded_features,
-    find_start_of_motion,
-    keep_datasets_with_the_same_features_per_robot_type,
-    keep_datasets_with_valid_fps,
-    map_dict_keys,
-    pad_tensor,
-    reshape_features_to_max_dim,
-)
-from lerobot.datasets.video_utils import (
+from lerobot.common.datasets.video_utils import (
    VideoFrame,
    decode_video_frames,
    encode_video_frames,
@@ -104,15 +74,6 @@ from lerobot.datasets.video_utils import (
 )

 CODEBASE_VERSION = "v2.1"
-LEROBOT_HOME = Path(os.getenv("LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser()
-
-
-def find_start_of_motion(velocities, window_size, threshold, motion_buffer):
-    for t in range(len(velocities) - window_size):
-        window_mean = velocities[t : t + window_size].mean()
-        if window_mean > threshold:
-            return max(0, t - motion_buffer)  # include slight context before motion
-    return 0


 class LeRobotDatasetMetadata:
@@ -120,13 +81,10 @@ class LeRobotDatasetMetadata:
        self,
        repo_id: str,
        root: str | Path | None = None,
-        local_files_only: bool = False,
-        feature_keys_mapping: dict[str, str] | None = None,
        revision: str | None = None,
        force_cache_sync: bool = False,
    ):
        self.repo_id = repo_id
-        self.local_files_only = local_files_only
        self.revision = revision if revision else CODEBASE_VERSION
        self.root = Path(root) if root is not None else HF_LEROBOT_HOME / repo_id

@@ -141,27 +99,18 @@ class LeRobotDatasetMetadata:
            (self.root / "meta").mkdir(exist_ok=True, parents=True)
            self.pull_from_repo(allow_patterns="meta/")
            self.load_metadata()
-        # added by mshukor
-        self.feature_keys_mapping = feature_keys_mapping.get(repo_id, None) if feature_keys_mapping else None
-        self.inverse_feature_keys_mapping = (
-            {v: k for k, v in self.feature_keys_mapping.items() if v} if self.feature_keys_mapping else {}
-        )
-        self.info["features"] = map_dict_keys(
-            self.info["features"], feature_keys_mapping=self.feature_keys_mapping
-        )

    def load_metadata(self):
        self.info = load_info(self.root)
        check_version_compatibility(self.repo_id, self._version, CODEBASE_VERSION)
        self.tasks, self.task_to_task_index = load_tasks(self.root)
        self.episodes = load_episodes(self.root)
-        # Force all datasets to use v2.1 format (episodes_stats.jsonl) to avoid missing stats.json issues, because I converted all the datasets to v2.1 format. 
-        # if self._version < packaging.version.parse("v2.1"):
-        #     self.stats = load_stats(self.root)
-        #     self.episodes_stats = backward_compatible_episodes_stats(self.stats, self.episodes)
-        # else:
-        self.episodes_stats = load_episodes_stats(self.root)
-        self.stats = aggregate_stats(list(self.episodes_stats.values()))
+        if self._version < packaging.version.parse("v2.1"):
+            self.stats = load_stats(self.root)
+            self.episodes_stats = backward_compatible_episodes_stats(self.stats, self.episodes)
+        else:
+            self.episodes_stats = load_episodes_stats(self.root)
+            self.stats = aggregate_stats(list(self.episodes_stats.values()))

    def pull_from_repo(
        self,
@@ -228,15 +177,7 @@ class LeRobotDatasetMetadata:
    @property
    def video_keys(self) -> list[str]:
        """Keys to access visual modalities stored as videos."""
-        # changed
-        keys = []
-        for key, ft in self.features.items():
-            key_ = (
-                self.inverse_feature_keys_mapping.get(key, key) if self.inverse_feature_keys_mapping else key
-            )
-            if ft["dtype"] == "video":
-                keys.append(key_)
-        return keys
+        return [key for key, ft in self.features.items() if ft["dtype"] == "video"]

    @property
    def camera_keys(self) -> list[str]:
@@ -401,19 +342,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
        force_cache_sync: bool = False,
        download_videos: bool = True,
        video_backend: str | None = None,
-        local_files_only: bool = False,
-        # new thing by M
-        feature_keys_mapping: dict[str, str] | None = None,
-        max_action_dim: int = None,
-        max_state_dim: int = None,
-        max_num_images: int = None,
-        max_image_dim: int = None,
-        training_features: list | None = None,
-        discard_first_n_frames: int = 0,
-        discard_first_idle_frames: bool = False,
-        motion_threshold: float = 5e-2,
-        motion_window_size: int = 10,
-        motion_buffer: int = 3,
    ):
        """
        2 modes are available for instantiating this class, depending on 2 different use cases:
@@ -429,7 +357,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
              the dataset from that address and load it, pending your dataset is compliant with
              codebase_version v2.0. If your dataset has been created before this new format, you will be
              prompted to convert it using our conversion script from v1.6 to v2.0, which you can find at
-              lerobot/datasets/v2/convert_dataset_v1_to_v2.py.
+              lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py.


        2. Your dataset doesn't already exists (either on local disk or on the Hub): you can create an empty
@@ -527,35 +455,15 @@ class LeRobotDataset(torch.utils.data.Dataset):
        self.video_backend = video_backend if video_backend else get_safe_default_codec()
        self.delta_indices = None

-        # by mshukor
-        self.training_features = training_features
-        self.discard_first_n_frames = discard_first_n_frames
-        self.discard_first_idle_frames = discard_first_idle_frames
-        self.motion_threshold = motion_threshold
-        self.motion_window_size = motion_window_size
-        self.motion_buffer = motion_buffer
-
        # Unused attributes
        self.image_writer = None
        self.episode_buffer = None

        self.root.mkdir(exist_ok=True, parents=True)

-        # more mshukor
-        self.feature_keys_mapping = feature_keys_mapping.get(repo_id, None) if feature_keys_mapping else None
-        self.inverse_feature_keys_mapping = (
-            {v: k for k, v in self.feature_keys_mapping.items() if v} if self.feature_keys_mapping else {}
-        )
-
        # Load metadata
-        # TODO: change
        self.meta = LeRobotDatasetMetadata(
-            self.repo_id,
-            self.root,
-            local_files_only=local_files_only,
-            revision=self.revision,
-            force_cache_sync=force_cache_sync,
-            feature_keys_mapping=feature_keys_mapping,
+            self.repo_id, self.root, self.revision, force_cache_sync=force_cache_sync
        )
        if self.episodes is not None and self.meta._version >= packaging.version.parse("v2.1"):
            episodes_stats = [self.meta.episodes_stats[ep_idx] for ep_idx in self.episodes]
@@ -574,74 +482,17 @@ class LeRobotDataset(torch.utils.data.Dataset):

        self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes)

-        # mustafa code
-        if self.discard_first_n_frames > 0:
-            print("Discarding first n frames:", self.discard_first_n_frames)
-            self.subset_frame_ids = []
-            for ep_idx in range(self.num_episodes):
-                from_ = self.episode_data_index["from"][ep_idx]
-                to_ = self.episode_data_index["to"][ep_idx]
-                # TODO implement advanced strategy
-                self.subset_frame_ids += [
-                    frame_idx for frame_idx in range(from_ + int(self.fps * self.discard_first_n_frames), to_)
-                ]
-        elif self.discard_first_idle_frames:
-            print(
-                f"Discarding first idle frames: motion_threshold={self.motion_threshold}, motion_window_size={self.motion_window_size}, motion_buffer={self.motion_buffer}"
-            )
-            self.robot_states = torch.stack(self.hf_dataset[OBS_STATE]).numpy()  # shape: [T, D]
-            self.subset_frame_ids = []
-            for ep_idx in range(self.num_episodes):
-                from_ = self.episode_data_index["from"][ep_idx]
-                to_ = self.episode_data_index["to"][ep_idx]
-                ep_states = self.robot_states[from_:to_]
-                velocities = np.linalg.norm(np.diff(ep_states, axis=0), axis=1)
-                velocities = np.concatenate([[0.0], velocities])
-                start_idx = find_start_of_motion(
-                    velocities, self.motion_window_size, self.motion_threshold, self.motion_buffer
-                )
-                self.subset_frame_ids += list(range(from_ + start_idx, to_))
-
        # Check timestamps
-        # commented TODO: check why
-        # timestamps = torch.stack(self.hf_dataset["timestamp"]).numpy()
-        # episode_indices = torch.stack(self.hf_dataset["episode_index"]).numpy()
-        # ep_data_index_np = {k: t.numpy() for k, t in self.episode_data_index.items()}
-        # check_timestamps_sync(timestamps, episode_indices, ep_data_index_np, self.fps, self.tolerance_s)
+        timestamps = torch.stack(self.hf_dataset["timestamp"]).numpy()
+        episode_indices = torch.stack(self.hf_dataset["episode_index"]).numpy()
+        ep_data_index_np = {k: t.numpy() for k, t in self.episode_data_index.items()}
+        check_timestamps_sync(timestamps, episode_indices, ep_data_index_np, self.fps, self.tolerance_s)

        # Setup delta_indices
        if self.delta_timestamps is not None:
-            # TODO: check why commented
-            # check_delta_timestamps(self.delta_timestamps, self.fps, self.tolerance_s)
+            check_delta_timestamps(self.delta_timestamps, self.fps, self.tolerance_s)
            self.delta_indices = get_delta_indices(self.delta_timestamps, self.fps)

-        # Mustafa
-        self.meta.info["features"] = map_dict_keys(
-            self.meta.info["features"],
-            feature_keys_mapping=self.feature_keys_mapping,
-            training_features=self.training_features,
-        )
-        self.keys_to_max_dim = {
-            ACTION: max_action_dim,
-            OBS_ENV_STATE: max_state_dim,
-            OBS_STATE: max_state_dim,
-            OBS_IMAGE: max_image_dim,
-            OBS_IMAGE_2: max_image_dim,
-            OBS_IMAGE_3: max_image_dim,
-        }
-        self.meta.info["features"] = reshape_features_to_max_dim(
-            self.meta.info["features"], reshape_dim=-1, keys_to_max_dim=self.keys_to_max_dim
-        )
-        self.meta.stats = map_dict_keys(
-            self.meta.stats,
-            feature_keys_mapping=self.feature_keys_mapping,
-            training_features=self.training_features,
-        )
-        self.robot_type = self.meta.info.get("robot_type", "")
-        # Override tasks
-        print(TASKS_KEYS_MAPPING.get(self.repo_id, self.meta.tasks), "previous", self.meta.tasks)
-        self.meta.tasks = TASKS_KEYS_MAPPING.get(self.repo_id, self.meta.tasks)
-
    def push_to_hub(
        self,
        branch: str | None = None,
@@ -790,18 +641,12 @@ class LeRobotDataset(torch.utils.data.Dataset):
            return get_hf_features_from_features(self.features)

    def _get_query_indices(self, idx: int, ep_idx: int) -> tuple[dict[str, list[int | bool]]]:
-        # Bounds check to prevent IndexError when episode_index is out of range
-        if ep_idx >= len(self.episode_data_index["from"]):
-            # Fall back to the last valid episode
-            ep_idx = len(self.episode_data_index["from"]) - 1
-            
        ep_start = self.episode_data_index["from"][ep_idx]
        ep_end = self.episode_data_index["to"][ep_idx]
        query_indices = {
            key: [max(ep_start.item(), min(ep_end.item() - 1, idx + delta)) for delta in delta_idx]
            for key, delta_idx in self.delta_indices.items()
        }
-        # FIXME(mshukor): what if we train on multiple datasets with different features
        padding = {  # Pad values outside of current episode range
            f"{key}_is_pad": torch.BoolTensor(
                [(idx + delta < ep_start.item()) | (idx + delta >= ep_end.item()) for delta in delta_idx]
@@ -825,21 +670,12 @@ class LeRobotDataset(torch.utils.data.Dataset):

        return query_timestamps

-    # TODO: changed by mustafa
    def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict:
-        queries = {}
-        for key, q_idx in query_indices.items():
-            if (
-                key not in self.meta.video_keys
-                and self.inverse_feature_keys_mapping.get(key, key) not in self.meta.video_keys
-            ):
-                key_ = (
-                    self.inverse_feature_keys_mapping.get(key, key)
-                    if self.inverse_feature_keys_mapping
-                    else key
-                )
-                queries[key] = torch.stack(self.hf_dataset.select(q_idx)[key_])
-        return queries
+        return {
+            key: torch.stack(self.hf_dataset.select(q_idx)[key])
+            for key, q_idx in query_indices.items()
+            if key not in self.meta.video_keys
+        }

    def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]:
        """Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function
@@ -863,12 +699,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
    def __len__(self):
        return self.num_frames

-    # changed by mshukor
    def __getitem__(self, idx) -> dict:
-        if self.discard_first_n_frames > 0 or self.discard_first_idle_frames:
-            idx = self.subset_frame_ids[idx]
        item = self.hf_dataset[idx]
-        item = map_dict_keys(item, feature_keys_mapping=self.feature_keys_mapping)
        ep_idx = item["episode_index"].item()

        query_indices = None
@@ -885,27 +717,15 @@ class LeRobotDataset(torch.utils.data.Dataset):
            video_frames = self._query_videos(query_timestamps, ep_idx)
            item = {**video_frames, **item}

+        if self.image_transforms is not None:
+            image_keys = self.meta.camera_keys
+            for cam in image_keys:
+                item[cam] = self.image_transforms(item[cam])
+
        # Add task as a string
        task_idx = item["task_index"].item()
-        try:
-            item["task"] = self.meta.tasks[task_idx]
-        except:
-            print(self.meta.tasks, task_idx, self.repo_id)
-        if "robot_type" not in item:
-            item["robot_type"] = self.robot_type
-        item = map_dict_keys(
-            item, feature_keys_mapping=self.feature_keys_mapping, training_features=self.training_features
-        )
-        # Add padded features
-        # item = self._add_padded_features(item, self.training_features)
-        if self.image_transforms is not None:
-            for cam in item:
-                if cam in self.meta.camera_keys or ("image" in cam and "is_pad" not in cam):
-                    item[cam] = self.image_transforms(item[cam])
-        # Map pad keys
-        # print(item.keys(), "before")
-        # item = map_dict_pad_keys(item, feature_keys_mapping=self.feature_keys_mapping, training_features=self.training_features)
-        # print(item.keys())
+        item["task"] = self.meta.tasks[task_idx]
+
        return item

    def __repr__(self):
@@ -1165,7 +985,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
        )
        obj.repo_id = obj.meta.repo_id
        obj.root = obj.meta.root
-        obj.local_files_only = obj.meta.local_files_only
        obj.revision = None
        obj.tolerance_s = tolerance_s
        obj.image_writer = None
@@ -1186,106 +1005,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
        return obj


-class MultiLeRobotDatasetMeta:
-    def __init__(
-        self,
-        datasets: list[LeRobotDataset],
-        repo_ids: list[str],
-        keys_to_max_dim: dict[str, int],
-        train_on_all_features: bool = False,
-    ):
-        self.repo_ids = repo_ids
-        self.keys_to_max_dim = keys_to_max_dim
-        self.train_on_all_features = train_on_all_features
-        self.robot_types = [ds.meta.info["robot_type"] for ds in datasets]
-
-        # assign robot_type if missing
-        for ds in datasets:
-            ds.meta.info["robot_type"] = ROBOT_TYPE_KEYS_MAPPING.get(ds.repo_id, ds.meta.info["robot_type"])
-            ds.robot_type = ds.meta.info["robot_type"]
-
-        # step 1: compute disabled features
-        self.disabled_features = set()
-        if not self.train_on_all_features:
-            intersection = set(datasets[0].features)
-            for ds in datasets:
-                intersection.intersection_update(ds.features)
-            if not intersection:
-                raise RuntimeError("No common features across datasets.")
-            for repo_id, ds in zip(repo_ids, datasets, strict=False):
-                extra = set(ds.features) - intersection
-                logging.warning(f"Disabling {extra} for repo {repo_id}")
-                self.disabled_features.update(extra)
-
-        # step 2: build union_features excluding disabled
-        self.union_features = {}
-        for ds in datasets:
-            for k, v in ds.features.items():
-                if k not in self.disabled_features:
-                    self.union_features[k] = v
-
-        # step 3: reshape feature schema
-        self.features = reshape_features_to_max_dim(
-            self.union_features, reshape_dim=-1, keys_to_max_dim=self.keys_to_max_dim
-        )
-
-        # step 4: aggregate stats
-        self.stats = aggregate_stats_per_robot_type(datasets)
-        for robot_type_, stats_ in self.stats.items():
-            for feat_key, feat_stats in stats_.items():
-                if feat_key in [ACTION, OBS_ENV_STATE, OBS_STATE]:
-                    for k, v in feat_stats.items():
-                        pad_value = 0 if k in ["min", "mean"] else 1
-                        self.stats[robot_type_][feat_key][k] = pad_tensor(
-                            v,
-                            max_size=self.keys_to_max_dim.get(feat_key, -1),
-                            pad_dim=-1,
-                            pad_value=pad_value,
-                        )
-
-        # step 5: episodes & tasks
-        self.episodes = {repo_id: ds.meta.episodes for repo_id, ds in zip(repo_ids, datasets, strict=False)}
-        self.tasks = {repo_id: ds.meta.tasks for repo_id, ds in zip(repo_ids, datasets, strict=False)}
-        self.info = {repo_id: ds.meta.info for repo_id, ds in zip(repo_ids, datasets, strict=False)}
-
-
-class MultiLeRobotDatasetCleaner:
-    def __init__(
-        self,
-        datasets: list[LeRobotDataset],
-        repo_ids: list[str],
-        sampling_weights: list[float],
-        datasets_repo_ids: list[str],
-        min_fps: int = 1,
-        max_fps: int = 100,
-    ):
-        self.original_datasets = datasets
-        self.original_repo_ids = repo_ids
-        self.original_weights = sampling_weights
-        self.original_datasets_repo_ids = datasets_repo_ids
-
-        # step 1: remove datasets with invalid fps
-        valid_fps_datasets = keep_datasets_with_valid_fps(datasets, min_fps=min_fps, max_fps=max_fps)
-
-        # step 2: keep datasets with same features per robot type
-        consistent_datasets, keep_mask = keep_datasets_with_the_same_features_per_robot_type(
-            valid_fps_datasets
-        )
-
-        self.cleaned_datasets = consistent_datasets
-        self.keep_mask = keep_mask
-        self.cleaned_weights = [sampling_weights[i] for i in range(len(valid_fps_datasets)) if keep_mask[i]]
-        self.cleaned_repo_ids = [repo_ids[i] for i in range(len(valid_fps_datasets)) if keep_mask[i]]
-        self.cleaned_datasets_repo_ids = [
-            datasets_repo_ids[i] for i in range(len(valid_fps_datasets)) if keep_mask[i]
-        ]
-
-        self.cumulative_sizes = np.array(
-            [0] + list(torch.cumsum(torch.tensor([len(d) for d in consistent_datasets]), dim=0))
-        )
-        self.cleaned_weights = np.array(self.cleaned_weights, dtype=np.float32)
-
-
 class MultiLeRobotDataset(torch.utils.data.Dataset):
    """A dataset consisting of multiple underlying `LeRobotDataset`s.

@@ -1302,24 +1021,7 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
        delta_timestamps: dict[list[float]] | None = None,
        tolerances_s: dict | None = None,
        download_videos: bool = True,
-        local_files_only: bool = False,
        video_backend: str | None = None,
-        # add
-        sampling_weights: list[float] | None = None,
-        feature_keys_mapping: dict[str, dict[str, str]] | None = None,
-        max_action_dim: int = None,
-        max_state_dim: int = None,
-        max_num_images: int = None,
-        max_image_dim: int = None,
-        train_on_all_features: bool = False,
-        training_features: list | None = None,
-        discard_first_n_frames: int = 0,
-        min_fps: int = 1,
-        max_fps: int = 100,
-        discard_first_idle_frames: bool = False,
-        motion_threshold: float = 0.05,
-        motion_window_size: int = 10,
-        motion_buffer: int = 3,
    ):
        super().__init__()
        self.repo_ids = repo_ids
@@ -1327,89 +1029,46 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
        self.tolerances_s = tolerances_s if tolerances_s else dict.fromkeys(repo_ids, 0.0001)
        # Construct the underlying datasets passing everything but `transform` and `delta_timestamps` which
        # are handled by this class.
-        _datasets = []
-        datasets_repo_ids = []
-        self.sampling_weights = []
-        self.training_features = training_features
-
-        sampling_weights = sampling_weights if sampling_weights is not None else [1] * len(repo_ids)
-        assert len(sampling_weights) == len(repo_ids), (
-            "The number of sampling weights must match the number of datasets. "
-            f"Got {len(sampling_weights)} weights for {len(repo_ids)} datasets."
-        )
-        for i, repo_id in enumerate(repo_ids):
-            try:
-                # delta_timestamps = resolve_delta_timestamps(cfg.policy, ds_meta)
-                _datasets.append(
-                    LeRobotDataset(
-                        repo_id,
-                        root=self.root / repo_id,
-                        episodes=episodes.get(repo_id, None) if episodes else None,
-                        image_transforms=image_transforms,
-                        delta_timestamps=delta_timestamps.get(repo_id, None) if delta_timestamps else None,
-                        tolerance_s=self.tolerances_s[repo_id],
-                        download_videos=download_videos,
-                        video_backend=video_backend,
-                        feature_keys_mapping=feature_keys_mapping,
-                        training_features=training_features,
-                        discard_first_n_frames=discard_first_n_frames,
-                        discard_first_idle_frames=discard_first_idle_frames,
-                        motion_threshold=motion_threshold,
-                        motion_window_size=motion_window_size,
-                        motion_buffer=motion_buffer,
-                    )
-                )
-                datasets_repo_ids.append(repo_id)
-                self.sampling_weights.append(float(sampling_weights[i]))
-            except Exception as e:
-                print(f"Failed to load dataset: {repo_id} due to Exception: {e}")
-        print(
-            f"Finish loading {len(_datasets)} datasets, with sampling weights: {self.sampling_weights} corresponding to: {datasets_repo_ids}"
-        )
+        self._datasets = [
+            LeRobotDataset(
+                repo_id,
+                root=self.root / repo_id,
+                episodes=episodes[repo_id] if episodes else None,
+                image_transforms=image_transforms,
+                delta_timestamps=delta_timestamps,
+                tolerance_s=self.tolerances_s[repo_id],
+                download_videos=download_videos,
+                video_backend=video_backend,
+            )
+            for repo_id in repo_ids
+        ]

        # Disable any data keys that are not common across all of the datasets. Note: we may relax this
        # restriction in future iterations of this class. For now, this is necessary at least for being able
        # to use PyTorch's default DataLoader collate function.
-        # FIXME(mshukor): apply mapping to unify used keys
-        # FIXME(mshukor): pad based on types in case we have more than one state?
+        self.disabled_features = set()
+        intersection_features = set(self._datasets[0].features)
+        for ds in self._datasets:
+            intersection_features.intersection_update(ds.features)
+        if len(intersection_features) == 0:
+            raise RuntimeError(
+                "Multiple datasets were provided but they had no keys common to all of them. "
+                "The multi-dataset functionality currently only keeps common keys."
+            )
+        for repo_id, ds in zip(self.repo_ids, self._datasets, strict=True):
+            extra_keys = set(ds.features).difference(intersection_features)
+            logging.warning(
+                f"keys {extra_keys} of {repo_id} were disabled as they are not contained in all the "
+                "other datasets."
+            )
+            self.disabled_features.update(extra_keys)
+
        self.image_transforms = image_transforms
-        self.delta_timestamps = (
-            delta_timestamps.get(repo_id, None) if delta_timestamps else None
-        )  # delta_timestamps # FIXME(mshukor): last repo?
-        # In case datasets with the same robot_type have different features
-        cleaner = MultiLeRobotDatasetCleaner(
-            datasets=_datasets,
-            repo_ids=repo_ids,
-            sampling_weights=self.sampling_weights,
-            datasets_repo_ids=datasets_repo_ids,
-            min_fps=min_fps,
-            max_fps=max_fps,
-        )
-        self._datasets = cleaner.cleaned_datasets
-        self.sampling_weights = cleaner.cleaned_weights
-        self.repo_ids = cleaner.cleaned_repo_ids
-        self.datasets_repo_ids = cleaner.cleaned_datasets_repo_ids
-        self.cumulative_sizes = cleaner.cumulative_sizes
-        # self.meta = copy.deepcopy(self._datasets[0].meta)  # FIXME(mshukor): aggregate meta from all datasets
-        # self.meta.info = {
-        #     repo_id: ds.meta.info for repo_id, ds in zip(self.repo_ids, self._datasets, strict=False)
-        # }
-        # self.meta.info["features"] = self._datasets[0].meta.info["features"] # Assume all datasets have the same features
-        self.meta = MultiLeRobotDatasetMeta(
-            datasets=self._datasets,
-            repo_ids=self.repo_ids,
-            keys_to_max_dim={
-                ACTION: max_action_dim,
-                OBS_ENV_STATE: max_state_dim,
-                OBS_STATE: max_state_dim,
-                OBS_IMAGE: max_image_dim,
-                OBS_IMAGE_2: max_image_dim,
-                OBS_IMAGE_3: max_image_dim,
-            },
-            train_on_all_features=train_on_all_features,
-        )
-        self.disabled_features = self.meta.disabled_features
-        self.stats = self.meta.stats
+        self.delta_timestamps = delta_timestamps
+        # TODO(rcadene, aliberts): We should not perform this aggregation for datasets
+        # with multiple robots of different ranges. Instead we should have one normalization
+        # per robot.
+        self.stats = aggregate_stats([dataset.meta.stats for dataset in self._datasets])

    @property
    def repo_id_to_index(self):
@@ -1497,14 +1156,23 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
    def __getitem__(self, idx: int) -> dict[str, torch.Tensor]:
        if idx >= len(self):
            raise IndexError(f"Index {idx} out of bounds.")
-        dataset_idx = np.searchsorted(self.cumulative_sizes, idx, side="right").item() - 1
-        local_idx = (idx - self.cumulative_sizes[dataset_idx]).item()
-        item = self._datasets[dataset_idx][local_idx]
+        # Determine which dataset to get an item from based on the index.
+        start_idx = 0
+        dataset_idx = 0
+        for dataset in self._datasets:
+            if idx >= start_idx + dataset.num_frames:
+                start_idx += dataset.num_frames
+                dataset_idx += 1
+                continue
+            break
+        else:
+            raise AssertionError("We expect the loop to break out as long as the index is within bounds.")
+        item = self._datasets[dataset_idx][idx - start_idx]
        item["dataset_index"] = torch.tensor(dataset_idx)
-        item = create_padded_features(item, self.meta.features)
-        for data_key in self.disabled_features:  # FIXME(mshukor): not in getitem?
+        for data_key in self.disabled_features:
            if data_key in item:
                del item[data_key]
+
        return item

    def __repr__(self):
--- a/lerobot/common/datasets/online_buffer.py
+++ b/lerobot/common/datasets/online_buffer.py
@@ -28,7 +28,7 @@ from typing import Any
 import numpy as np
 import torch

-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset


 def _make_memmap_safe(**kwargs) -> np.memmap:
--- a/lerobot/common/datasets/push_dataset_to_hub/utils.py
+++ b/lerobot/common/datasets/push_dataset_to_hub/utils.py
@@ -23,7 +23,7 @@ import numpy
 import PIL
 import torch

-from lerobot.datasets.video_utils import encode_video_frames
+from lerobot.common.datasets.video_utils import encode_video_frames


 def concatenate_episodes(ep_dicts):
--- a/lerobot/common/datasets/sampler.py
+++ b/lerobot/common/datasets/sampler.py
--- a/lerobot/common/datasets/transforms.py
+++ b/lerobot/common/datasets/transforms.py
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -35,14 +35,14 @@ from huggingface_hub.errors import RevisionNotFoundError
 from PIL import Image as PILImage
 from torchvision import transforms

-from lerobot.configs.types import DictLike, FeatureType, PolicyFeature
-from lerobot.datasets.backward_compatibility import (
+from lerobot.common.datasets.backward_compatibility import (
    V21_MESSAGE,
    BackwardCompatibilityError,
    ForwardCompatibilityError,
 )
-from lerobot.robots import Robot
-from lerobot.utils.utils import is_valid_numpy_dtype_string
+from lerobot.common.robots import Robot
+from lerobot.common.utils.utils import is_valid_numpy_dtype_string
+from lerobot.configs.types import DictLike, FeatureType, PolicyFeature

 DEFAULT_CHUNK_SIZE = 1000  # Max number of episodes per chunk

@@ -664,7 +664,7 @@ def create_lerobot_dataset_card(
    **kwargs,
 ) -> DatasetCard:
    """
-    Keyword arguments will be used to replace values in src/lerobot/datasets/card_template.md.
+    Keyword arguments will be used to replace values in ./lerobot/common/datasets/card_template.md.
    Note: If specified, license must be one of https://huggingface.co/docs/hub/repositories-licenses.
    """
    card_tags = ["LeRobot"]
@@ -687,7 +687,7 @@ def create_lerobot_dataset_card(
        ],
    )

-    card_template = (importlib.resources.files("lerobot.datasets") / "card_template.md").read_text()
+    card_template = (importlib.resources.files("lerobot.common.datasets") / "card_template.md").read_text()

    return DatasetCard.from_template(
        card_data=card_data,
@@ -858,21 +858,3 @@ def validate_episode_buffer(episode_buffer: dict, total_episodes: int, features:
            f"In episode_buffer not in features: {buffer_keys - set(features)}"
            f"In features not in episode_buffer: {set(features) - buffer_keys}"
        )
-
-
-def map_dict_keys(
-    item: dict, feature_keys_mapping: dict, training_features: list = None, pad_key: str = "is_pad"
-) -> dict:
-    """Maps feature keys from the dataset to the keys used in the model."""
-    if feature_keys_mapping is None:
-        return item
-    features = {}
-    for key in item:
-        if key in feature_keys_mapping:
-            if feature_keys_mapping[key] is not None:
-                if training_features is None or feature_keys_mapping[key] in training_features:
-                    features[feature_keys_mapping[key]] = item[key]
-        else:
-            if training_features is None or key in training_features or pad_key in key:
-                features[key] = item[key]
-    return features
--- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
@@ -26,8 +26,8 @@ from pathlib import Path
 from textwrap import dedent

 from lerobot import available_datasets
-from lerobot.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset
-from lerobot.robots.aloha.configuration_aloha import AlohaRobotConfig
+from lerobot.common.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset
+from lerobot.common.robots.aloha.configuration_aloha import AlohaRobotConfig

 LOCAL_DIR = Path("data/")

--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -38,7 +38,7 @@ If your dataset contains a single task, you can simply provide it directly via t
 Examples:

 ```bash
-python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \
+python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
    --repo-id lerobot/aloha_sim_insertion_human_image \
    --single-task "Insert the peg into the socket." \
    --robot-config lerobot/configs/robot/aloha.yaml \
@@ -46,7 +46,7 @@ python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \
 ```

 ```bash
-python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \
+python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
    --repo-id aliberts/koch_tutorial \
    --single-task "Pick the Lego block and drop it in the box on the right." \
    --robot-config lerobot/configs/robot/koch.yaml \
@@ -63,7 +63,7 @@ If your dataset is a multi-task dataset, you have two options to provide the tas
    Example:

    ```bash
-    python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \
+    python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
        --repo-id lerobot/stanford_kuka_multimodal_dataset \
        --tasks-col "language_instruction" \
        --local-dir data
@@ -92,7 +92,7 @@ parquet file, and you must provide this column's name with the '--tasks-col' arg
 Example:

 ```bash
-python -m lerobot.datasets.v2.convert_dataset_v1_to_v2 \
+python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
    --repo-id lerobot/stanford_kuka_multimodal_dataset \
    --tasks-col "language_instruction" \
    --local-dir data
@@ -119,7 +119,7 @@ from huggingface_hub import HfApi
 from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
 from safetensors.torch import load_file

-from lerobot.datasets.utils import (
+from lerobot.common.datasets.utils import (
    DEFAULT_CHUNK_SIZE,
    DEFAULT_PARQUET_PATH,
    DEFAULT_VIDEO_PATH,
@@ -136,12 +136,12 @@ from lerobot.datasets.utils import (
    write_json,
    write_jsonlines,
 )
-from lerobot.datasets.video_utils import (
+from lerobot.common.datasets.video_utils import (
    VideoFrame,  # noqa: F401
    get_image_pixel_channels,
    get_video_info,
 )
-from lerobot.robots import RobotConfig
+from lerobot.common.robots import RobotConfig

 V16 = "v1.6"
 V20 = "v2.0"
@@ -602,19 +602,19 @@ def make_robot_config(robot_type: str, **kwargs) -> RobotConfig:
        raise NotImplementedError  # TODO

    elif robot_type == "koch_follower":
-        from lerobot.robots.koch_follower import KochFollowerConfig
+        from lerobot.common.robots.koch_follower import KochFollowerConfig

        return KochFollowerConfig(**kwargs)
    elif robot_type == "so100_follower":
-        from lerobot.robots.so100_follower import SO100FollowerConfig
+        from lerobot.common.robots.so100_follower import SO100FollowerConfig

        return SO100FollowerConfig(**kwargs)
    elif robot_type == "stretch":
-        from lerobot.robots.stretch3 import Stretch3RobotConfig
+        from lerobot.common.robots.stretch3 import Stretch3RobotConfig

        return Stretch3RobotConfig(**kwargs)
    elif robot_type == "lekiwi":
-        from lerobot.robots.lekiwi import LeKiwiConfig
+        from lerobot.common.robots.lekiwi import LeKiwiConfig

        return LeKiwiConfig(**kwargs)
    else:
--- a/lerobot/common/datasets/v21/_remove_language_instruction.py
+++ b/lerobot/common/datasets/v21/_remove_language_instruction.py
@@ -20,9 +20,9 @@ from datasets import get_dataset_config_info
 from huggingface_hub import HfApi

 from lerobot import available_datasets
-from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
-from lerobot.datasets.utils import INFO_PATH, write_info
-from lerobot.datasets.v21.convert_dataset_v20_to_v21 import V20, SuppressWarnings
+from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata
+from lerobot.common.datasets.utils import INFO_PATH, write_info
+from lerobot.common.datasets.v21.convert_dataset_v20_to_v21 import V20, SuppressWarnings

 LOCAL_DIR = Path("data/")

--- a/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py
+++ b/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py
@@ -24,7 +24,7 @@ from pathlib import Path
 from huggingface_hub import HfApi

 from lerobot import available_datasets
-from lerobot.datasets.v21.convert_dataset_v20_to_v21 import V21, convert_dataset
+from lerobot.common.datasets.v21.convert_dataset_v20_to_v21 import V21, convert_dataset

 LOCAL_DIR = Path("data/")

--- a/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py
+++ b/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py
@@ -25,7 +25,7 @@ This script will help you convert any LeRobot dataset already pushed to the hub
 Usage:

 ```bash
-python -m lerobot.datasets.v21.convert_dataset_v20_to_v21 \
+python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py \
    --repo-id=aliberts/koch_tutorial
 ```

@@ -36,9 +36,9 @@ import logging

 from huggingface_hub import HfApi

-from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
-from lerobot.datasets.utils import EPISODES_STATS_PATH, STATS_PATH, load_stats, write_info
-from lerobot.datasets.v21.convert_stats import check_aggregate_stats, convert_stats
+from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
+from lerobot.common.datasets.utils import EPISODES_STATS_PATH, STATS_PATH, load_stats, write_info
+from lerobot.common.datasets.v21.convert_stats import check_aggregate_stats, convert_stats

 V20 = "v2.0"
 V21 = "v2.1"
--- a/lerobot/common/datasets/v21/convert_stats.py
+++ b/lerobot/common/datasets/v21/convert_stats.py
@@ -17,9 +17,9 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 import numpy as np
 from tqdm import tqdm

-from lerobot.datasets.compute_stats import aggregate_stats, get_feature_stats, sample_indices
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.datasets.utils import write_episode_stats
+from lerobot.common.datasets.compute_stats import aggregate_stats, get_feature_stats, sample_indices
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.utils import write_episode_stats


 def sample_episode_video_frames(dataset: LeRobotDataset, episode_index: int, ft_key: str) -> np.ndarray:
@@ -43,32 +43,14 @@ def convert_episode_stats(dataset: LeRobotDataset, ep_idx: int):
        else:
            ep_ft_data = np.array(ep_data[key])

-        if ft["dtype"] in ["image", "video"]:
-            # Handle variable dimensions for image/video data
-            # Expected formats: (frames, channels, height, width) or (channels, height, width)
-            if ep_ft_data.ndim == 4:
-                # Standard case: (frames, channels, height, width)
-                axes_to_reduce = (0, 2, 3)  # reduce over frames, height, width
-            elif ep_ft_data.ndim == 3:
-                # Squeezed case: (channels, height, width) - single frame
-                axes_to_reduce = (1, 2)  # reduce over height, width
-            else:
-                raise ValueError(f"Unexpected dimensions for {ft['dtype']} data: {ep_ft_data.shape}")
-            keepdims = True
-        else:
-            axes_to_reduce = 0
-            keepdims = ep_ft_data.ndim == 1
+        axes_to_reduce = (0, 2, 3) if ft["dtype"] in ["image", "video"] else 0
+        keepdims = True if ft["dtype"] in ["image", "video"] else ep_ft_data.ndim == 1
        ep_stats[key] = get_feature_stats(ep_ft_data, axis=axes_to_reduce, keepdims=keepdims)

        if ft["dtype"] in ["image", "video"]:  # remove batch dim
-            if ep_ft_data.ndim == 4:
-                # For 4D data, squeeze the first axis (batch/frames)
-                ep_stats[key] = {
-                    k: v if k == "count" else np.squeeze(v, axis=0) for k, v in ep_stats[key].items()
-                }
-            elif ep_ft_data.ndim == 3:
-                # For 3D data, the stats already have correct shape (channels,)
-                pass
+            ep_stats[key] = {
+                k: v if k == "count" else np.squeeze(v, axis=0) for k, v in ep_stats[key].items()
+            }

    dataset.meta.episodes_stats[ep_idx] = ep_stats

--- a/lerobot/common/datasets/video_utils.py
+++ b/lerobot/common/datasets/video_utils.py
--- a/lerobot/common/envs/init.py
+++ b/lerobot/common/envs/init.py
--- a/lerobot/common/envs/configs.py
+++ b/lerobot/common/envs/configs.py
@@ -18,10 +18,10 @@ from typing import Any, Optional

 import draccus

+from lerobot.common.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
+from lerobot.common.robots import RobotConfig
+from lerobot.common.teleoperators.config import TeleoperatorConfig
 from lerobot.configs.types import FeatureType, PolicyFeature
-from lerobot.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
-from lerobot.robots import RobotConfig
-from lerobot.teleoperators.config import TeleoperatorConfig


@dataclass
--- a/lerobot/common/envs/factory.py
+++ b/lerobot/common/envs/factory.py
@@ -17,7 +17,7 @@ import importlib

 import gymnasium as gym

-from lerobot.envs.configs import AlohaEnv, EnvConfig, HILEnvConfig, PushtEnv, XarmEnv
+from lerobot.common.envs.configs import AlohaEnv, EnvConfig, HILEnvConfig, PushtEnv, XarmEnv


 def make_env_config(env_type: str, **kwargs) -> EnvConfig:
--- a/lerobot/common/envs/utils.py
+++ b/lerobot/common/envs/utils.py
@@ -22,9 +22,9 @@ import numpy as np
 import torch
 from torch import Tensor

+from lerobot.common.envs.configs import EnvConfig
+from lerobot.common.utils.utils import get_channel_first_image_shape
 from lerobot.configs.types import FeatureType, PolicyFeature
-from lerobot.envs.configs import EnvConfig
-from lerobot.utils.utils import get_channel_first_image_shape


 def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Tensor]:
--- a/lerobot/common/errors.py
+++ b/lerobot/common/errors.py
--- a/lerobot/common/model/kinematics.py
+++ b/lerobot/common/model/kinematics.py
@@ -0,0 +1,483 @@
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import numpy as np
+from numpy.typing import NDArray
+from scipy.spatial.transform import Rotation
+
+
+def skew_symmetric(w: NDArray[np.float32]) -> NDArray[np.float32]:
+    """Creates the skew-symmetric matrix from a 3D vector."""
+    return np.array([[0, -w[2], w[1]], [w[2], 0, -w[0]], [-w[1], w[0], 0]])
+
+
+def rodrigues_rotation(w: NDArray[np.float32], theta: float) -> NDArray[np.float32]:
+    """Computes the rotation matrix using Rodrigues' formula."""
+    w_hat = skew_symmetric(w)
+    return np.eye(3) + np.sin(theta) * w_hat + (1 - np.cos(theta)) * w_hat @ w_hat
+
+
+def screw_axis_to_transform(s: NDArray[np.float32], theta: float) -> NDArray[np.float32]:
+    """Converts a screw axis to a 4x4 transformation matrix."""
+    screw_axis_rot = s[:3]
+    screw_axis_trans = s[3:]
+
+    # Pure translation
+    if np.allclose(screw_axis_rot, 0) and np.linalg.norm(screw_axis_trans) == 1:
+        transform = np.eye(4)
+        transform[:3, 3] = screw_axis_trans * theta
+
+    # Rotation (and potentially translation)
+    elif np.linalg.norm(screw_axis_rot) == 1:
+        w_hat = skew_symmetric(screw_axis_rot)
+        rot_mat = np.eye(3) + np.sin(theta) * w_hat + (1 - np.cos(theta)) * w_hat @ w_hat
+        t = (
+            np.eye(3) * theta + (1 - np.cos(theta)) * w_hat + (theta - np.sin(theta)) * w_hat @ w_hat
+        ) @ screw_axis_trans
+        transform = np.eye(4)
+        transform[:3, :3] = rot_mat
+        transform[:3, 3] = t
+    else:
+        raise ValueError("Invalid screw axis parameters")
+    return transform
+
+
+def pose_difference_se3(pose1: NDArray[np.float32], pose2: NDArray[np.float32]) -> NDArray[np.float32]:
+    """
+    Calculates the SE(3) difference between two 4x4 homogeneous transformation matrices.
+    SE(3) (Special Euclidean Group) represents rigid body transformations in 3D space,
+    combining rotation (SO(3)) and translation.
+
+    Each 4x4 matrix has the following structure:
+    [R11 R12 R13 tx]
+    [R21 R22 R23 ty]
+    [R31 R32 R33 tz]
+    [ 0   0   0   1]
+
+    where R is the 3x3 rotation matrix and [tx,ty,tz] is the translation vector.
+
+    Args:
+        pose1: A 4x4 numpy array representing the first pose.
+        pose2: A 4x4 numpy array representing the second pose.
+
+    Returns:
+        A 6D numpy array concatenating translation and rotation differences.
+        First 3 elements are the translational difference (position).
+        Last 3 elements are the rotational difference in axis-angle representation.
+    """
+    rot1 = pose1[:3, :3]
+    rot2 = pose2[:3, :3]
+
+    translation_diff = pose1[:3, 3] - pose2[:3, 3]
+
+    # Calculate rotational difference using scipy's Rotation library
+    rot_diff = Rotation.from_matrix(rot1 @ rot2.T)
+    rotation_diff = rot_diff.as_rotvec()  # Axis-angle representation
+
+    return np.concatenate([translation_diff, rotation_diff])
+
+
+def se3_error(target_pose: NDArray[np.float32], current_pose: NDArray[np.float32]) -> NDArray[np.float32]:
+    pos_error = target_pose[:3, 3] - current_pose[:3, 3]
+
+    rot_target = target_pose[:3, :3]
+    rot_current = current_pose[:3, :3]
+    rot_error_mat = rot_target @ rot_current.T
+    rot_error = Rotation.from_matrix(rot_error_mat).as_rotvec()
+
+    return np.concatenate([pos_error, rot_error])
+
+
+class RobotKinematics:
+    """Robot kinematics class supporting multiple robot models."""
+
+    # Robot measurements dictionary
+    ROBOT_MEASUREMENTS = {
+        "koch": {
+            "gripper": [0.239, -0.001, 0.024],
+            "wrist": [0.209, 0, 0.024],
+            "forearm": [0.108, 0, 0.02],
+            "humerus": [0, 0, 0.036],
+            "shoulder": [0, 0, 0],
+            "base": [0, 0, 0.02],
+        },
+        "moss": {
+            "gripper": [0.246, 0.013, 0.111],
+            "wrist": [0.245, 0.002, 0.064],
+            "forearm": [0.122, 0, 0.064],
+            "humerus": [0.001, 0.001, 0.063],
+            "shoulder": [0, 0, 0],
+            "base": [0, 0, 0.02],
+        },
+        "so_old_calibration": {
+            "gripper": [0.320, 0, 0.050],
+            "wrist": [0.278, 0, 0.050],
+            "forearm": [0.143, 0, 0.044],
+            "humerus": [0.031, 0, 0.072],
+            "shoulder": [0, 0, 0],
+            "base": [0, 0, 0.02],
+        },
+        "so_new_calibration": {
+            "gripper": [0.33, 0.0, 0.285],
+            "wrist": [0.30, 0.0, 0.267],
+            "forearm": [0.25, 0.0, 0.266],
+            "humerus": [0.06, 0.0, 0.264],
+            "shoulder": [0.0, 0.0, 0.238],
+            "base": [0.0, 0.0, 0.12],
+        },
+    }
+
+    def __init__(self, robot_type: str = "so100"):
+        """Initialize kinematics for the specified robot type.
+
+        Args:
+            robot_type: String specifying the robot model ("koch", "so100", or "moss")
+        """
+        if robot_type not in self.ROBOT_MEASUREMENTS:
+            raise ValueError(
+                f"Unknown robot type: {robot_type}. Available types: {list(self.ROBOT_MEASUREMENTS.keys())}"
+            )
+
+        self.robot_type = robot_type
+        self.measurements = self.ROBOT_MEASUREMENTS[robot_type]
+
+        # Initialize all transformation matrices and screw axes
+        self._setup_transforms()
+
+    def _create_translation_matrix(
+        self, x: float = 0.0, y: float = 0.0, z: float = 0.0
+    ) -> NDArray[np.float32]:
+        """Create a 4x4 translation matrix."""
+        return np.array([[1, 0, 0, x], [0, 1, 0, y], [0, 0, 1, z], [0, 0, 0, 1]])
+
+    def _setup_transforms(self):
+        """Setup all transformation matrices and screw axes for the robot."""
+        # Set up rotation matrices (constant across robot types)
+
+        # Gripper orientation
+        self.gripper_X0 = np.array(
+            [
+                [1, 0, 0, 0],
+                [0, 0, 1, 0],
+                [0, -1, 0, 0],
+                [0, 0, 0, 1],
+            ],
+            dtype=np.float32,
+        )
+
+        # Wrist orientation
+        self.wrist_X0 = np.array(
+            [
+                [0, -1, 0, 0],
+                [1, 0, 0, 0],
+                [0, 0, 1, 0],
+                [0, 0, 0, 1],
+            ],
+            dtype=np.float32,
+        )
+
+        # Base orientation
+        self.base_X0 = np.array(
+            [
+                [0, 0, 1, 0],
+                [1, 0, 0, 0],
+                [0, 1, 0, 0],
+                [0, 0, 0, 1],
+            ],
+            dtype=np.float32,
+        )
+
+        # Gripper
+        # Screw axis of gripper frame wrt base frame
+        self.S_BG = np.array(
+            [
+                1,
+                0,
+                0,
+                0,
+                self.measurements["gripper"][2],
+                -self.measurements["gripper"][1],
+            ],
+            dtype=np.float32,
+        )
+
+        # Gripper origin to centroid transform
+        self.X_GoGc = self._create_translation_matrix(x=0.07)
+
+        # Gripper origin to tip transform
+        self.X_GoGt = self._create_translation_matrix(x=0.12)
+
+        # 0-position gripper frame pose wrt base
+        self.X_BoGo = self._create_translation_matrix(
+            x=self.measurements["gripper"][0],
+            y=self.measurements["gripper"][1],
+            z=self.measurements["gripper"][2],
+        )
+
+        # Wrist
+        # Screw axis of wrist frame wrt base frame
+        self.S_BR = np.array(
+            [0, 1, 0, -self.measurements["wrist"][2], 0, self.measurements["wrist"][0]], dtype=np.float32
+        )
+
+        # 0-position origin to centroid transform
+        self.X_RoRc = self._create_translation_matrix(x=0.0035, y=-0.002)
+
+        # 0-position wrist frame pose wrt base
+        self.X_BR = self._create_translation_matrix(
+            x=self.measurements["wrist"][0],
+            y=self.measurements["wrist"][1],
+            z=self.measurements["wrist"][2],
+        )
+
+        # Forearm
+        # Screw axis of forearm frame wrt base frame
+        self.S_BF = np.array(
+            [
+                0,
+                1,
+                0,
+                -self.measurements["forearm"][2],
+                0,
+                self.measurements["forearm"][0],
+            ],
+            dtype=np.float32,
+        )
+
+        # Forearm origin + centroid transform
+        self.X_ForearmFc = self._create_translation_matrix(x=0.036)
+
+        # 0-position forearm frame pose wrt base
+        self.X_BF = self._create_translation_matrix(
+            x=self.measurements["forearm"][0],
+            y=self.measurements["forearm"][1],
+            z=self.measurements["forearm"][2],
+        )
+
+        # Humerus
+        # Screw axis of humerus frame wrt base frame
+        self.S_BH = np.array(
+            [
+                0,
+                -1,
+                0,
+                self.measurements["humerus"][2],
+                0,
+                -self.measurements["humerus"][0],
+            ],
+            dtype=np.float32,
+        )
+
+        # Humerus origin to centroid transform
+        self.X_HoHc = self._create_translation_matrix(x=0.0475)
+
+        # 0-position humerus frame pose wrt base
+        self.X_BH = self._create_translation_matrix(
+            x=self.measurements["humerus"][0],
+            y=self.measurements["humerus"][1],
+            z=self.measurements["humerus"][2],
+        )
+
+        # Shoulder
+        # Screw axis of shoulder frame wrt Base frame
+        self.S_BS = np.array([0, 0, -1, 0, 0, 0], dtype=np.float32)
+
+        # Shoulder origin to centroid transform
+        self.X_SoSc = self._create_translation_matrix(x=-0.017, z=0.0235)
+
+        # 0-position shoulder frame pose wrt base
+        self.X_BS = self._create_translation_matrix(
+            x=self.measurements["shoulder"][0],
+            y=self.measurements["shoulder"][1],
+            z=self.measurements["shoulder"][2],
+        )
+
+        # Base
+        # Base origin to centroid transform
+        self.X_BoBc = self._create_translation_matrix(y=0.015)
+
+        # World to base transform
+        self.X_WoBo = self._create_translation_matrix(
+            x=self.measurements["base"][0],
+            y=self.measurements["base"][1],
+            z=self.measurements["base"][2],
+        )
+
+        # Pre-compute gripper post-multiplication matrix
+        self._fk_gripper_post = self.X_GoGc @ self.X_BoGo @ self.gripper_X0
+
+    def forward_kinematics(
+        self,
+        robot_pos_deg: NDArray[np.float32],
+        frame: str = "gripper_tip",
+    ) -> NDArray[np.float32]:
+        """Generic forward kinematics.
+
+        Args:
+            robot_pos_deg: Joint positions in degrees. Can be ``None`` when
+                computing the *base* frame as it does not depend on joint
+                angles.
+            frame: Target frame. One of
+                ``{"base", "shoulder", "humerus", "forearm", "wrist", "gripper", "gripper_tip"}``.
+
+        Returns
+        -------
+        NDArray[np.float32]
+            4×4 homogeneous transformation matrix of the requested frame
+            expressed in the world coordinate system.
+        """
+        frame = frame.lower()
+        if frame not in {
+            "base",
+            "shoulder",
+            "humerus",
+            "forearm",
+            "wrist",
+            "gripper",
+            "gripper_tip",
+        }:
+            raise ValueError(
+                f"Unknown frame '{frame}'. Valid options are base, shoulder, humerus, forearm, wrist, gripper, gripper_tip."
+            )
+
+        # Base frame does not rely on joint angles.
+        if frame == "base":
+            return self.X_WoBo @ self.X_BoBc @ self.base_X0
+
+        robot_pos_rad = robot_pos_deg / 180 * np.pi
+
+        # Extract joint angles (note the sign convention for shoulder lift).
+        theta_shoulder_pan = robot_pos_rad[0]
+        theta_shoulder_lift = -robot_pos_rad[1]
+        theta_elbow_flex = robot_pos_rad[2]
+        theta_wrist_flex = robot_pos_rad[3]
+        theta_wrist_roll = robot_pos_rad[4]
+
+        # Start with the world-to-base transform; incrementally add successive links.
+        transformation_matrix = self.X_WoBo @ screw_axis_to_transform(self.S_BS, theta_shoulder_pan)
+        if frame == "shoulder":
+            return transformation_matrix @ self.X_SoSc @ self.X_BS
+
+        transformation_matrix = transformation_matrix @ screw_axis_to_transform(
+            self.S_BH, theta_shoulder_lift
+        )
+        if frame == "humerus":
+            return transformation_matrix @ self.X_HoHc @ self.X_BH
+
+        transformation_matrix = transformation_matrix @ screw_axis_to_transform(self.S_BF, theta_elbow_flex)
+        if frame == "forearm":
+            return transformation_matrix @ self.X_ForearmFc @ self.X_BF
+
+        transformation_matrix = transformation_matrix @ screw_axis_to_transform(self.S_BR, theta_wrist_flex)
+        if frame == "wrist":
+            return transformation_matrix @ self.X_RoRc @ self.X_BR @ self.wrist_X0
+
+        transformation_matrix = transformation_matrix @ screw_axis_to_transform(self.S_BG, theta_wrist_roll)
+        if frame == "gripper":
+            return transformation_matrix @ self._fk_gripper_post
+        else:  # frame == "gripper_tip"
+            return transformation_matrix @ self.X_GoGt @ self.X_BoGo @ self.gripper_X0
+
+    def compute_jacobian(
+        self, robot_pos_deg: NDArray[np.float32], frame: str = "gripper_tip"
+    ) -> NDArray[np.float32]:
+        """Finite differences to compute the Jacobian.
+        J(i, j) represents how the ith component of the end-effector's velocity changes wrt a small change
+        in the jth joint's velocity.
+
+        Args:
+            robot_pos_deg: Current joint positions in degrees
+            fk_func: Forward kinematics function to use (defaults to fk_gripper)
+        """
+
+        eps = 1e-8
+        jac = np.zeros(shape=(6, 5))
+        delta = np.zeros(len(robot_pos_deg[:-1]), dtype=np.float64)
+        for el_ix in range(len(robot_pos_deg[:-1])):
+            delta *= 0
+            delta[el_ix] = eps / 2
+            sdot = (
+                pose_difference_se3(
+                    self.forward_kinematics(robot_pos_deg[:-1] + delta, frame),
+                    self.forward_kinematics(robot_pos_deg[:-1] - delta, frame),
+                )
+                / eps
+            )
+            jac[:, el_ix] = sdot
+        return jac
+
+    def compute_positional_jacobian(
+        self, robot_pos_deg: NDArray[np.float32], frame: str = "gripper_tip"
+    ) -> NDArray[np.float32]:
+        """Finite differences to compute the positional Jacobian.
+        J(i, j) represents how the ith component of the end-effector's position changes wrt a small change
+        in the jth joint's velocity.
+
+        Args:
+            robot_pos_deg: Current joint positions in degrees
+            fk_func: Forward kinematics function to use (defaults to fk_gripper)
+        """
+        eps = 1e-8
+        jac = np.zeros(shape=(3, 5))
+        delta = np.zeros(len(robot_pos_deg[:-1]), dtype=np.float64)
+        for el_ix in range(len(robot_pos_deg[:-1])):
+            delta *= 0
+            delta[el_ix] = eps / 2
+            sdot = (
+                self.forward_kinematics(robot_pos_deg[:-1] + delta, frame)[:3, 3]
+                - self.forward_kinematics(robot_pos_deg[:-1] - delta, frame)[:3, 3]
+            ) / eps
+            jac[:, el_ix] = sdot
+        return jac
+
+    def ik(
+        self,
+        current_joint_pos: NDArray[np.float32],
+        desired_ee_pose: NDArray[np.float32],
+        position_only: bool = True,
+        frame: str = "gripper_tip",
+        max_iterations: int = 5,
+        learning_rate: float = 1,
+    ) -> NDArray[np.float32]:
+        """Inverse kinematics using gradient descent.
+
+        Args:
+            current_joint_state: Initial joint positions in degrees
+            desired_ee_pose: Target end-effector pose as a 4x4 transformation matrix
+            position_only: If True, only match end-effector position, not orientation
+            frame: Target frame. One of
+                ``{"base", "shoulder", "humerus", "forearm", "wrist", "gripper", "gripper_tip"}``.
+            max_iterations: Maximum number of iterations to run
+            learning_rate: Learning rate for gradient descent
+
+        Returns:
+            Joint positions in degrees that achieve the desired end-effector pose
+        """
+        # Do gradient descent.
+        current_joint_state = current_joint_pos.copy()
+        for _ in range(max_iterations):
+            current_ee_pose = self.forward_kinematics(current_joint_state, frame)
+            if not position_only:
+                error = se3_error(desired_ee_pose, current_ee_pose)
+                jac = self.compute_jacobian(current_joint_state, frame)
+            else:
+                error = desired_ee_pose[:3, 3] - current_ee_pose[:3, 3]
+                jac = self.compute_positional_jacobian(current_joint_state, frame)
+            delta_angles = np.linalg.pinv(jac) @ error
+            current_joint_state[:-1] += learning_rate * delta_angles
+
+            if np.linalg.norm(error) < 5e-3:
+                return current_joint_state
+        return current_joint_state
--- a/lerobot/common/motors/init.py
+++ b/lerobot/common/motors/init.py
--- a/lerobot/common/motors/dynamixel/init.py
+++ b/lerobot/common/motors/dynamixel/init.py
--- a/lerobot/common/motors/dynamixel/dynamixel.py
+++ b/lerobot/common/motors/dynamixel/dynamixel.py
@@ -22,7 +22,7 @@ import logging
 from copy import deepcopy
 from enum import Enum

-from lerobot.utils.encoding_utils import decode_twos_complement, encode_twos_complement
+from lerobot.common.utils.encoding_utils import decode_twos_complement, encode_twos_complement

 from ..motors_bus import Motor, MotorCalibration, MotorsBus, NameOrID, Value, get_address
 from .tables import (
--- a/lerobot/common/motors/dynamixel/tables.py
+++ b/lerobot/common/motors/dynamixel/tables.py
--- a/lerobot/common/motors/feetech/init.py
+++ b/lerobot/common/motors/feetech/init.py
--- a/lerobot/common/motors/feetech/feetech.py
+++ b/lerobot/common/motors/feetech/feetech.py
@@ -17,7 +17,7 @@ from copy import deepcopy
 from enum import Enum
 from pprint import pformat

-from lerobot.utils.encoding_utils import decode_sign_magnitude, encode_sign_magnitude
+from lerobot.common.utils.encoding_utils import decode_sign_magnitude, encode_sign_magnitude

 from ..motors_bus import Motor, MotorCalibration, MotorsBus, NameOrID, Value, get_address
 from .tables import (
--- a/lerobot/common/motors/feetech/tables.py
+++ b/lerobot/common/motors/feetech/tables.py
--- a/lerobot/common/motors/motors_bus.py
+++ b/lerobot/common/motors/motors_bus.py
@@ -32,8 +32,8 @@ import serial
 from deepdiff import DeepDiff
 from tqdm import tqdm

-from lerobot.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
-from lerobot.utils.utils import enter_pressed, move_cursor_up
+from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
+from lerobot.common.utils.utils import enter_pressed, move_cursor_up

 NameOrID: TypeAlias = str | int
 Value: TypeAlias = int | float
@@ -446,7 +446,7 @@ class MotorsBus(abc.ABC):
        except (FileNotFoundError, OSError, serial.SerialException) as e:
            raise ConnectionError(
                f"\nCould not connect on port '{self.port}'. Make sure you are using the correct port."
-                "\nTry running `python -m lerobot.find_port`\n"
+                "\nTry running `python lerobot/find_port.py`\n"
            ) from e

    @abc.abstractmethod
--- a/lerobot/common/optim/init.py
+++ b/lerobot/common/optim/init.py
--- a/lerobot/common/optim/factory.py
+++ b/lerobot/common/optim/factory.py
@@ -18,8 +18,8 @@
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import LRScheduler

+from lerobot.common.policies.pretrained import PreTrainedPolicy
 from lerobot.configs.train import TrainPipelineConfig
-from lerobot.policies.pretrained import PreTrainedPolicy


 def make_optimizer_and_scheduler(
--- a/lerobot/common/optim/optimizers.py
+++ b/lerobot/common/optim/optimizers.py
@@ -22,12 +22,12 @@ import draccus
 import torch
 from safetensors.torch import load_file, save_file

-from lerobot.constants import (
+from lerobot.common.constants import (
    OPTIMIZER_PARAM_GROUPS,
    OPTIMIZER_STATE,
 )
-from lerobot.datasets.utils import flatten_dict, unflatten_dict, write_json
-from lerobot.utils.io_utils import deserialize_json_into_object
+from lerobot.common.datasets.utils import flatten_dict, unflatten_dict, write_json
+from lerobot.common.utils.io_utils import deserialize_json_into_object


@dataclass
--- a/lerobot/common/optim/schedulers.py
+++ b/lerobot/common/optim/schedulers.py
@@ -22,9 +22,9 @@ import draccus
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import LambdaLR, LRScheduler

-from lerobot.constants import SCHEDULER_STATE
-from lerobot.datasets.utils import write_json
-from lerobot.utils.io_utils import deserialize_json_into_object
+from lerobot.common.constants import SCHEDULER_STATE
+from lerobot.common.datasets.utils import write_json
+from lerobot.common.utils.io_utils import deserialize_json_into_object


@dataclass
--- a/lerobot/common/policies/init.py
+++ b/lerobot/common/policies/init.py
@@ -16,6 +16,5 @@ from .act.configuration_act import ACTConfig as ACTConfig
 from .diffusion.configuration_diffusion import DiffusionConfig as DiffusionConfig
 from .pi0.configuration_pi0 import PI0Config as PI0Config
 from .smolvla.configuration_smolvla import SmolVLAConfig as SmolVLAConfig
-from .smolvla2.configuration_smolvla2 import SmolVLA2Config as SmolVLA2Config
 from .tdmpc.configuration_tdmpc import TDMPCConfig as TDMPCConfig
 from .vqbet.configuration_vqbet import VQBeTConfig as VQBeTConfig
--- a/lerobot/common/policies/act/configuration_act.py
+++ b/lerobot/common/policies/act/configuration_act.py
@@ -15,9 +15,9 @@
 # limitations under the License.
 from dataclasses import dataclass, field

+from lerobot.common.optim.optimizers import AdamWConfig
 from lerobot.configs.policies import PreTrainedConfig
 from lerobot.configs.types import NormalizationMode
-from lerobot.optim.optimizers import AdamWConfig


@PreTrainedConfig.register_subclass("act")
--- a/lerobot/common/policies/act/modeling_act.py
+++ b/lerobot/common/policies/act/modeling_act.py
@@ -33,10 +33,9 @@ from torch import Tensor, nn
 from torchvision.models._utils import IntermediateLayerGetter
 from torchvision.ops.misc import FrozenBatchNorm2d

-from lerobot.constants import ACTION, OBS_IMAGES
-from lerobot.policies.act.configuration_act import ACTConfig
-from lerobot.policies.normalize import Normalize, Unnormalize
-from lerobot.policies.pretrained import PreTrainedPolicy
+from lerobot.common.policies.act.configuration_act import ACTConfig
+from lerobot.common.policies.normalize import Normalize, Unnormalize
+from lerobot.common.policies.pretrained import PreTrainedPolicy


 class ACTPolicy(PreTrainedPolicy):
@@ -115,49 +114,46 @@ class ACTPolicy(PreTrainedPolicy):
        environment. It works by managing the actions in a queue and only calling `select_actions` when the
        queue is empty.
        """
-        self.eval()  # keeping the policy in eval mode as it could be set to train mode while queue is consumed
+        self.eval()

+        batch = self.normalize_inputs(batch)
+        if self.config.image_features:
+            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
+            batch["observation.images"] = [batch[key] for key in self.config.image_features]
+
+        # If we are doing temporal ensembling, do online updates where we keep track of the number of actions
+        # we are ensembling over.
        if self.config.temporal_ensemble_coeff is not None:
-            actions = self.predict_action_chunk(batch)
+            actions = self.model(batch)[0]  # (batch_size, chunk_size, action_dim)
+            actions = self.unnormalize_outputs({"action": actions})["action"]
            action = self.temporal_ensembler.update(actions)
            return action

        # Action queue logic for n_action_steps > 1. When the action_queue is depleted, populate it by
        # querying the policy.
        if len(self._action_queue) == 0:
-            actions = self.predict_action_chunk(batch)[:, : self.config.n_action_steps]
+            actions = self.model(batch)[0][:, : self.config.n_action_steps]
+
+            # TODO(rcadene): make _forward return output dictionary?
+            actions = self.unnormalize_outputs({"action": actions})["action"]

            # `self.model.forward` returns a (batch_size, n_action_steps, action_dim) tensor, but the queue
            # effectively has shape (n_action_steps, batch_size, *), hence the transpose.
            self._action_queue.extend(actions.transpose(0, 1))
        return self._action_queue.popleft()

-    @torch.no_grad
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
-        """Predict a chunk of actions given environment observations."""
-        self.eval()
-
-        batch = self.normalize_inputs(batch)
-        if self.config.image_features:
-            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch[OBS_IMAGES] = [batch[key] for key in self.config.image_features]
-
-        actions = self.model(batch)[0]
-        actions = self.unnormalize_outputs({ACTION: actions})[ACTION]
-        return actions
-
    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]:
        """Run the batch through the model and compute the loss for training or validation."""
        batch = self.normalize_inputs(batch)
        if self.config.image_features:
            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch[OBS_IMAGES] = [batch[key] for key in self.config.image_features]
+            batch["observation.images"] = [batch[key] for key in self.config.image_features]

        batch = self.normalize_targets(batch)
        actions_hat, (mu_hat, log_sigma_x2_hat) = self.model(batch)

        l1_loss = (
-            F.l1_loss(batch[ACTION], actions_hat, reduction="none") * ~batch["action_is_pad"].unsqueeze(-1)
+            F.l1_loss(batch["action"], actions_hat, reduction="none") * ~batch["action_is_pad"].unsqueeze(-1)
        ).mean()

        loss_dict = {"l1_loss": l1_loss.item()}
--- a/lerobot/common/policies/diffusion/configuration_diffusion.py
+++ b/lerobot/common/policies/diffusion/configuration_diffusion.py
@@ -16,10 +16,10 @@
 # limitations under the License.
 from dataclasses import dataclass, field

+from lerobot.common.optim.optimizers import AdamConfig
+from lerobot.common.optim.schedulers import DiffuserSchedulerConfig
 from lerobot.configs.policies import PreTrainedConfig
 from lerobot.configs.types import NormalizationMode
-from lerobot.optim.optimizers import AdamConfig
-from lerobot.optim.schedulers import DiffuserSchedulerConfig


@PreTrainedConfig.register_subclass("diffusion")
--- a/lerobot/common/policies/diffusion/modeling_diffusion.py
+++ b/lerobot/common/policies/diffusion/modeling_diffusion.py
@@ -33,11 +33,11 @@ from diffusers.schedulers.scheduling_ddim import DDIMScheduler
 from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
 from torch import Tensor, nn

-from lerobot.constants import ACTION, OBS_ENV_STATE, OBS_IMAGES, OBS_STATE
-from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
-from lerobot.policies.normalize import Normalize, Unnormalize
-from lerobot.policies.pretrained import PreTrainedPolicy
-from lerobot.policies.utils import (
+from lerobot.common.constants import OBS_ENV_STATE, OBS_STATE
+from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig
+from lerobot.common.policies.normalize import Normalize, Unnormalize
+from lerobot.common.policies.pretrained import PreTrainedPolicy
+from lerobot.common.policies.utils import (
    get_device_from_parameters,
    get_dtype_from_parameters,
    get_output_shape,
@@ -99,18 +99,6 @@ class DiffusionPolicy(PreTrainedPolicy):
        if self.config.env_state_feature:
            self._queues["observation.environment_state"] = deque(maxlen=self.config.n_obs_steps)

-    @torch.no_grad
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
-        """Predict a chunk of actions given environment observations."""
-        # stack n latest observations from the queue
-        batch = {k: torch.stack(list(self._queues[k]), dim=1) for k in batch if k in self._queues}
-        actions = self.diffusion.generate_actions(batch)
-
-        # TODO(rcadene): make above methods return output dictionary?
-        actions = self.unnormalize_outputs({ACTION: actions})[ACTION]
-
-        return actions
-
    @torch.no_grad
    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
        """Select a single action given environment observations.
@@ -136,15 +124,23 @@ class DiffusionPolicy(PreTrainedPolicy):
        batch = self.normalize_inputs(batch)
        if self.config.image_features:
            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
+            batch["observation.images"] = torch.stack(
+                [batch[key] for key in self.config.image_features], dim=-4
+            )
        # Note: It's important that this happens after stacking the images into a single key.
        self._queues = populate_queues(self._queues, batch)

-        if len(self._queues[ACTION]) == 0:
-            actions = self.predict_action_chunk(batch)
-            self._queues[ACTION].extend(actions.transpose(0, 1))
+        if len(self._queues["action"]) == 0:
+            # stack n latest observations from the queue
+            batch = {k: torch.stack(list(self._queues[k]), dim=1) for k in batch if k in self._queues}
+            actions = self.diffusion.generate_actions(batch)

-        action = self._queues[ACTION].popleft()
+            # TODO(rcadene): make above methods return output dictionary?
+            actions = self.unnormalize_outputs({"action": actions})["action"]
+
+            self._queues["action"].extend(actions.transpose(0, 1))
+
+        action = self._queues["action"].popleft()
        return action

    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, None]:
@@ -152,7 +148,9 @@ class DiffusionPolicy(PreTrainedPolicy):
        batch = self.normalize_inputs(batch)
        if self.config.image_features:
            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
+            batch["observation.images"] = torch.stack(
+                [batch[key] for key in self.config.image_features], dim=-4
+            )
        batch = self.normalize_targets(batch)
        loss = self.diffusion.compute_loss(batch)
        # no output_dict so returning None
--- a/lerobot/common/policies/factory.py
+++ b/lerobot/common/policies/factory.py
@@ -18,67 +18,62 @@ import logging

 from torch import nn

+from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata
+from lerobot.common.datasets.utils import dataset_to_policy_features
+from lerobot.common.envs.configs import EnvConfig
+from lerobot.common.envs.utils import env_to_policy_features
+from lerobot.common.policies.act.configuration_act import ACTConfig
+from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig
+from lerobot.common.policies.pi0.configuration_pi0 import PI0Config
+from lerobot.common.policies.pi0fast.configuration_pi0fast import PI0FASTConfig
+from lerobot.common.policies.pretrained import PreTrainedPolicy
+from lerobot.common.policies.sac.configuration_sac import SACConfig
+from lerobot.common.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig
+from lerobot.common.policies.smolvla.configuration_smolvla import SmolVLAConfig
+from lerobot.common.policies.tdmpc.configuration_tdmpc import TDMPCConfig
+from lerobot.common.policies.vqbet.configuration_vqbet import VQBeTConfig
 from lerobot.configs.policies import PreTrainedConfig
 from lerobot.configs.types import FeatureType
-from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
-from lerobot.datasets.utils import dataset_to_policy_features
-from lerobot.envs.configs import EnvConfig
-from lerobot.envs.utils import env_to_policy_features
-from lerobot.policies.act.configuration_act import ACTConfig
-from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
-from lerobot.policies.pi0.configuration_pi0 import PI0Config
-from lerobot.policies.pi0fast.configuration_pi0fast import PI0FASTConfig
-from lerobot.policies.pretrained import PreTrainedPolicy
-from lerobot.policies.sac.configuration_sac import SACConfig
-from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig
-from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig
-from lerobot.policies.smolvla2.configuration_smolvla2 import SmolVLA2Config
-from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig
-from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig


 def get_policy_class(name: str) -> PreTrainedPolicy:
    """Get the policy's class and config class given a name (matching the policy class' `name` attribute)."""
    if name == "tdmpc":
-        from lerobot.policies.tdmpc.modeling_tdmpc import TDMPCPolicy
+        from lerobot.common.policies.tdmpc.modeling_tdmpc import TDMPCPolicy

        return TDMPCPolicy
    elif name == "diffusion":
-        from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
+        from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy

        return DiffusionPolicy
    elif name == "act":
-        from lerobot.policies.act.modeling_act import ACTPolicy
+        from lerobot.common.policies.act.modeling_act import ACTPolicy

        return ACTPolicy
    elif name == "vqbet":
-        from lerobot.policies.vqbet.modeling_vqbet import VQBeTPolicy
+        from lerobot.common.policies.vqbet.modeling_vqbet import VQBeTPolicy

        return VQBeTPolicy
    elif name == "pi0":
-        from lerobot.policies.pi0.modeling_pi0 import PI0Policy
+        from lerobot.common.policies.pi0.modeling_pi0 import PI0Policy

        return PI0Policy
    elif name == "pi0fast":
-        from lerobot.policies.pi0fast.modeling_pi0fast import PI0FASTPolicy
+        from lerobot.common.policies.pi0fast.modeling_pi0fast import PI0FASTPolicy

        return PI0FASTPolicy
    elif name == "sac":
-        from lerobot.policies.sac.modeling_sac import SACPolicy
+        from lerobot.common.policies.sac.modeling_sac import SACPolicy

        return SACPolicy
    elif name == "reward_classifier":
-        from lerobot.policies.sac.reward_model.modeling_classifier import Classifier
+        from lerobot.common.policies.sac.reward_model.modeling_classifier import Classifier

        return Classifier
    elif name == "smolvla":
-        from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy
+        from lerobot.common.policies.smolvla.modeling_smolvla import SmolVLAPolicy

        return SmolVLAPolicy
-    elif name == "smolvla2":
-        from lerobot.policies.smolvla2.modeling_smolvla2 import SmolVLA2Policy
-
-        return SmolVLA2Policy
    else:
        raise NotImplementedError(f"Policy with name {name} is not implemented.")

@@ -100,8 +95,6 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
        return SACConfig(**kwargs)
    elif policy_type == "smolvla":
        return SmolVLAConfig(**kwargs)
-    elif policy_type == "smolvla2":
-        return SmolVLA2Config(**kwargs)
    elif policy_type == "reward_classifier":
        return RewardClassifierConfig(**kwargs)
    else:
@@ -154,18 +147,7 @@ def make_policy(
    kwargs = {}
    if ds_meta is not None:
        features = dataset_to_policy_features(ds_meta.features)
-        # Handle robot-type grouped stats - flatten to feature-level stats  
-        if ds_meta.stats and len(ds_meta.stats) == 1:
-            # Single robot type - use its stats directly
-            robot_type = list(ds_meta.stats.keys())[0]
-            kwargs["dataset_stats"] = ds_meta.stats[robot_type]
-        elif ds_meta.stats and len(ds_meta.stats) > 1:
-            # Multiple robot types - need to aggregate across all robot types
-            # For now, use the first robot type (TODO: proper multi-robot handling)
-            robot_type = list(ds_meta.stats.keys())[0]
-            kwargs["dataset_stats"] = ds_meta.stats[robot_type]
-        else:
-            kwargs["dataset_stats"] = ds_meta.stats
+        kwargs["dataset_stats"] = ds_meta.stats
    else:
        if not cfg.pretrained_path:
            logging.warning(
--- a/lerobot/common/policies/normalize.py
+++ b/lerobot/common/policies/normalize.py
@@ -79,7 +79,7 @@ def create_stats_buffers(
            )

        # TODO(aliberts, rcadene): harmonize this to only use one framework (np or torch)
-        if stats and key in stats:
+        if stats:
            if isinstance(stats[key]["mean"], np.ndarray):
                if norm_mode is NormalizationMode.MEAN_STD:
                    buffer["mean"].data = torch.from_numpy(stats[key]["mean"]).to(dtype=torch.float32)
--- a/lerobot/common/policies/pi0/configuration_pi0.py
+++ b/lerobot/common/policies/pi0/configuration_pi0.py
@@ -14,12 +14,12 @@

 from dataclasses import dataclass, field

-from lerobot.configs.policies import PreTrainedConfig
-from lerobot.configs.types import FeatureType, NormalizationMode, PolicyFeature
-from lerobot.optim.optimizers import AdamWConfig
-from lerobot.optim.schedulers import (
+from lerobot.common.optim.optimizers import AdamWConfig
+from lerobot.common.optim.schedulers import (
    CosineDecayWithWarmupSchedulerConfig,
 )
+from lerobot.configs.policies import PreTrainedConfig
+from lerobot.configs.types import FeatureType, NormalizationMode, PolicyFeature


@PreTrainedConfig.register_subclass("pi0")
--- a/lerobot/common/policies/pi0/conversion_scripts/benchmark.py
+++ b/lerobot/common/policies/pi0/conversion_scripts/benchmark.py
@@ -14,9 +14,9 @@

 import torch

+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.policies.factory import make_policy
 from lerobot.configs.policies import PreTrainedConfig
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.policies.factory import make_policy

 torch.backends.cudnn.benchmark = True

--- a/lerobot/common/policies/pi0/conversion_scripts/compare_with_jax.py
+++ b/lerobot/common/policies/pi0/conversion_scripts/compare_with_jax.py
@@ -18,9 +18,9 @@ from pathlib import Path

 import torch

+from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata
+from lerobot.common.policies.factory import make_policy
 from lerobot.configs.policies import PreTrainedConfig
-from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
-from lerobot.policies.factory import make_policy


 def display(tensor: torch.Tensor):
@@ -97,7 +97,7 @@ def main():

    noise = torch.from_numpy(noise).to(device=device, dtype=torch.float32)

-    from lerobot import policies  # noqa
+    from lerobot.common import policies  # noqa

    cfg = PreTrainedConfig.from_pretrained(ckpt_torch_dir)
    cfg.pretrained_path = ckpt_torch_dir
--- a/lerobot/common/policies/pi0/conversion_scripts/conversion_utils.py
+++ b/lerobot/common/policies/pi0/conversion_scripts/conversion_utils.py
--- a/lerobot/common/policies/pi0/conversion_scripts/convert_pi0_to_hf_lerobot.py
+++ b/lerobot/common/policies/pi0/conversion_scripts/convert_pi0_to_hf_lerobot.py
@@ -33,13 +33,13 @@ python

 Converting pi0_base:
 ```python
-python -m lerobot.policies.pi0.conversion_scripts.convert_pi0_to_hf_lerobot \
+python lerobot/common/policies/pi0/conversion_scripts/convert_pi0_to_hf_lerobot.py \
    --checkpoint_dir /home/remi_cadene/.cache/openpi/openpi-assets/checkpoints/pi0_base/params \
    --output_path /home/remi_cadene/.cache/openpi/openpi-assets/checkpoints/pi0_base_pytorch
 ```

 ```python
-python -m lerobot.policies.pi0.conversion_scripts.convert_pi0_to_hf_lerobot \
+python lerobot/common/policies/pi0/conversion_scripts/convert_pi0_to_hf_lerobot.py \
    --checkpoint_dir /home/remi_cadene/.cache/openpi/openpi-assets/checkpoints/pi0_aloha_sim/params \
    --output_path /home/remi_cadene/.cache/openpi/openpi-assets/checkpoints/pi0_aloha_sim_pytorch
 ```
@@ -54,12 +54,12 @@ import orbax.checkpoint as ocp
 import torch
 from jax.sharding import SingleDeviceSharding

-from lerobot.policies.pi0.configuration_pi0 import PI0Config
-from lerobot.policies.pi0.conversion_scripts.conversion_utils import (
+from lerobot.common.policies.pi0.configuration_pi0 import PI0Config
+from lerobot.common.policies.pi0.conversion_scripts.conversion_utils import (
    get_gemma_config,
    get_paligemma_config,
 )
-from lerobot.policies.pi0.modeling_pi0 import PI0Policy
+from lerobot.common.policies.pi0.modeling_pi0 import PI0Policy

 PRECISIONS = {"bfloat16": torch.bfloat16, "float32": torch.float32, "float16": torch.float16}

--- a/lerobot/common/policies/pi0/flex_attention.py
+++ b/lerobot/common/policies/pi0/flex_attention.py
--- a/lerobot/common/policies/pi0/modeling_pi0.py
+++ b/lerobot/common/policies/pi0/modeling_pi0.py
@@ -29,7 +29,7 @@ pip install -e ".[pi0]"

 Example of finetuning the pi0 pretrained model (`pi0_base` in `openpi`):
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
 --policy.path=lerobot/pi0 \
 --dataset.repo_id=danaaubakirova/koch_test
 ```
@@ -37,7 +37,7 @@ python -m lerobot.scripts.train \
 Example of finetuning the pi0 neural network with PaliGemma and expert Gemma
 pretrained with VLM default parameters before pi0 finetuning:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
 --policy.type=pi0 \
 --dataset.repo_id=danaaubakirova/koch_test
 ```
@@ -57,15 +57,15 @@ import torch.nn.functional as F  # noqa: N812
 from torch import Tensor, nn
 from transformers import AutoTokenizer

-from lerobot.constants import ACTION, OBS_STATE
-from lerobot.policies.normalize import Normalize, Unnormalize
-from lerobot.policies.pi0.configuration_pi0 import PI0Config
-from lerobot.policies.pi0.paligemma_with_expert import (
+from lerobot.common.constants import ACTION, OBS_STATE
+from lerobot.common.policies.normalize import Normalize, Unnormalize
+from lerobot.common.policies.pi0.configuration_pi0 import PI0Config
+from lerobot.common.policies.pi0.paligemma_with_expert import (
    PaliGemmaWithExpertConfig,
    PaliGemmaWithExpertModel,
 )
-from lerobot.policies.pretrained import PreTrainedPolicy
-from lerobot.utils.utils import get_safe_dtype
+from lerobot.common.policies.pretrained import PreTrainedPolicy
+from lerobot.common.utils.utils import get_safe_dtype


 def create_sinusoidal_pos_embedding(
@@ -260,11 +260,6 @@ class PI0Policy(PreTrainedPolicy):
    def get_optim_params(self) -> dict:
        return self.parameters()

-    @torch.no_grad
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
-        """Predict a chunk of actions given environment observations."""
-        raise NotImplementedError("Currently not implemented for PI0")
-
    @torch.no_grad
    def select_action(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor:
        """Select a single action given environment observations.
--- a/lerobot/common/policies/pi0/paligemma_with_expert.py
+++ b/lerobot/common/policies/pi0/paligemma_with_expert.py
@@ -27,7 +27,7 @@ from transformers import (
 )
 from transformers.models.auto import CONFIG_MAPPING

-from lerobot.policies.pi0.flex_attention import flex_attention_forward
+from lerobot.common.policies.pi0.flex_attention import flex_attention_forward


 def apply_rope(x, positions, max_wavelength=10_000):
--- a/lerobot/common/policies/pi0fast/configuration_pi0fast.py
+++ b/lerobot/common/policies/pi0fast/configuration_pi0fast.py
@@ -1,11 +1,11 @@
 from dataclasses import dataclass, field

-from lerobot.configs.policies import PreTrainedConfig
-from lerobot.configs.types import FeatureType, NormalizationMode, PolicyFeature
-from lerobot.optim.optimizers import AdamWConfig
-from lerobot.optim.schedulers import (
+from lerobot.common.optim.optimizers import AdamWConfig
+from lerobot.common.optim.schedulers import (
    CosineDecayWithWarmupSchedulerConfig,
 )
+from lerobot.configs.policies import PreTrainedConfig
+from lerobot.configs.types import FeatureType, NormalizationMode, PolicyFeature


@PreTrainedConfig.register_subclass("pi0fast")
--- a/lerobot/common/policies/pi0fast/modeling_pi0fast.py
+++ b/lerobot/common/policies/pi0fast/modeling_pi0fast.py
@@ -24,14 +24,14 @@ Designed by Physical Intelligence. Ported from Jax by Hugging Face.

 Example of finetuning the pi0+FAST pretrained model (`pi0_fast_base` in `openpi`):
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
 --policy.path=lerobot/pi0fast_base \
 --dataset.repo_id=danaaubakirova/koch_test
 ```

 Example of training the pi0+FAST neural network with from scratch:
 ```bash
-python -m lerobot.scripts.train \
+python lerobot/scripts/train.py \
 --policy.type=pi0fast \
 --dataset.repo_id=danaaubakirova/koch_test
 ```
@@ -56,10 +56,10 @@ from transformers import AutoProcessor, AutoTokenizer, PaliGemmaForConditionalGe
 from transformers.cache_utils import HybridCache, StaticCache
 from transformers.models.auto import CONFIG_MAPPING

-from lerobot.constants import ACTION, OBS_STATE
-from lerobot.policies.normalize import Normalize, Unnormalize
-from lerobot.policies.pi0fast.configuration_pi0fast import PI0FASTConfig
-from lerobot.policies.pretrained import PreTrainedPolicy
+from lerobot.common.constants import ACTION, OBS_STATE
+from lerobot.common.policies.normalize import Normalize, Unnormalize
+from lerobot.common.policies.pi0fast.configuration_pi0fast import PI0FASTConfig
+from lerobot.common.policies.pretrained import PreTrainedPolicy

 PRECISION = {
    "float16": torch.float16,
@@ -192,11 +192,6 @@ class PI0FASTPolicy(PreTrainedPolicy):
            actions[:, :, motor_idx] = aloha_gripper_from_angular_inv(actions[:, :, motor_idx])
        return actions

-    @torch.no_grad
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
-        """Predict a chunk of actions given environment observations."""
-        raise NotImplementedError("Currently not implemented for PI0FAST")
-
    @torch.no_grad
    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
        """Select a single action given environment observations.
--- a/Show More
+++ b/Show More