mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-31 19:01:28 +00:00
* feat(rewards): add RewardModelConfig and PreTrainedRewardModel base classes * refactor(rewards): migrate Classifier from policies/sac/reward_model/ to rewards/classifier/ * refactor(rewards): migrate SARM from policies/sarm/ to rewards/sarm/ * refactor(rewards): add rewards/factory.py and remove reward model code from policies/factory.py * refactor(rewards): update imports and delete old reward model locations * test(rewards): add reward model tests and update existing test imports * fix(rewards): restore full Classifier and SARM implementations * test(rewards): restore missing CUDA and mixed precision classifier processor tests * refactor(lerobot_train.py): remove rabc specific configuration and replace it with a generic samplerweight class in lerobot_train * refactor(lerobot_train.py): add missing sampling weight script * linter + missing files * add testing for sampl weighter * revert some useless changes, improve typing * update docs * add automatic detection of the progress path * remove type exp * improve comment * fix: move rabc.py to rewards/sarm/ and update import paths * refactor(imports): update reward model imports to new module structure * refactor(imports): update reward model imports to reflect new module structure * refactor(imports): conditionally import pandas based on availability * feat(configs): add reward_model field to TrainPipelineConfig and Hub fields to RewardModelConfig * refactor(policies): remove reward model branches from policy factory and __init__ * refactor(rewards): expand __init__ facade and fix SARMConfig __post_init__ crash * feat(train): route reward model training through rewards/factory instead of policies/factory * refactor(train): streamline reward model training logic * fix(rewards): ensure FileNotFoundError is raised for missing config_file * refactor(train): update __get_path_fields__ to include reward_model for config loading * refactor(classifier): remove redundant input normalization in predict_reward method * fix(train): raise ValueError for non-trainable reward models in train function * refactor(pretrained_rm): add model card template * refactor(tests): reward models * refactor(sarm): update reset method and remove unused action prediction methods * refactor(wandb): differentiate tags for reward model and policy training in cfg_to_group function * fix(train): raise ValueError for PEFT usage in reward model training * refactor(rewards): enhance RewardModelConfig with device handling and delta indices properties --------- Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>
75 lines
2.8 KiB
Python
75 lines
2.8 KiB
Python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
from dataclasses import dataclass, field
|
|
|
|
from lerobot.configs import NormalizationMode
|
|
from lerobot.configs.rewards import RewardModelConfig
|
|
from lerobot.optim import AdamWConfig, LRSchedulerConfig, OptimizerConfig
|
|
from lerobot.utils.constants import OBS_IMAGE
|
|
|
|
|
|
@RewardModelConfig.register_subclass(name="reward_classifier")
|
|
@dataclass
|
|
class RewardClassifierConfig(RewardModelConfig):
|
|
"""Configuration for the Reward Classifier model."""
|
|
|
|
name: str = "reward_classifier"
|
|
num_classes: int = 2
|
|
hidden_dim: int = 256
|
|
latent_dim: int = 256
|
|
image_embedding_pooling_dim: int = 8
|
|
dropout_rate: float = 0.1
|
|
model_name: str = "helper2424/resnet10" # TODO: This needs to be updated. The model on the Hub doesn't call self.post_init() in its __init__, which is required by transformers v5 to set all_tied_weights_keys. The from_pretrained call fails when it tries to access this attribute during _finalize_model_loading.
|
|
device: str = "cpu"
|
|
model_type: str = "cnn" # "transformer" or "cnn"
|
|
num_cameras: int = 2
|
|
learning_rate: float = 1e-4
|
|
weight_decay: float = 0.01
|
|
grad_clip_norm: float = 1.0
|
|
normalization_mapping: dict[str, NormalizationMode] = field(
|
|
default_factory=lambda: {
|
|
"VISUAL": NormalizationMode.MEAN_STD,
|
|
}
|
|
)
|
|
|
|
@property
|
|
def observation_delta_indices(self) -> list | None:
|
|
return None
|
|
|
|
@property
|
|
def action_delta_indices(self) -> list | None:
|
|
return None
|
|
|
|
@property
|
|
def reward_delta_indices(self) -> list | None:
|
|
return None
|
|
|
|
def get_optimizer_preset(self) -> OptimizerConfig:
|
|
return AdamWConfig(
|
|
lr=self.learning_rate,
|
|
weight_decay=self.weight_decay,
|
|
grad_clip_norm=self.grad_clip_norm,
|
|
)
|
|
|
|
def get_scheduler_preset(self) -> LRSchedulerConfig | None:
|
|
return None
|
|
|
|
def validate_features(self) -> None:
|
|
"""Validate feature configurations."""
|
|
has_image = any(key.startswith(OBS_IMAGE) for key in self.input_features)
|
|
if not has_image:
|
|
raise ValueError(
|
|
"You must provide an image observation (key starting with 'observation.image') in the input features"
|
|
)
|