Better documentation for CLI arguments

This commit is contained in:
nemo
2025-10-16 14:39:43 +02:00
parent d39e7f0bc4
commit e263a1de13
2 changed files with 22 additions and 11 deletions

View File

@@ -71,23 +71,27 @@ class EvalConfig:
@dataclass
class PeftConfig:
# PEFT offers many methods, layer adapters are the most common and currently also the most effective methods so
# we'll focus on those in this high-level config interface.
# PEFT offers many fine-tuning methods, layer adapters being the most common and currently also the most
# effective methods so we'll focus on those in this high-level config interface.
# `target_modules` can be set by the user but default to specific values depending on the used policy. See
# `get_peft_configuration` in `scripts/train.py`.
#
# Either a string (module name suffix or 'all-linear'), a list of module name suffixes or a regular expression
# describing module names to target with the configured PEFT method. Some policies have a default value for this
# so that you don't *have* to choose which layers to adapt but it might still be worthwhile depending on your case.
target_modules: list[str] | str | None = None
# Similarly to `target_modules` this will have policy-dependent defaults which the user can override.
modules_to_save: list[str] | None = None
# Names/suffixes of modules to fully fine-tune and store alongside adapter weights. Useful for layers that are
# not part of a pre-trained model (e.g., action state projections). Depending on the policy this defaults to layers
# that are newly created in pre-trained policies. If you're fine-tuning an already trained policy you might want
# to set this to `[]`. Corresponds to PEFT's `modules_to_save`.
full_training_modules: list[str] | None = None
# The PEFT (adapter) method to apply to the policy.
# The PEFT (adapter) method to apply to the policy. Needs to be a valid PEFT type.
method_type: str = "LORA"
# Adapter initialization method. Look at the specific adapter method documentation for defaults.
# Adapter initialization method. Look at the specific PEFT adapter documentation for defaults.
init_type: str | None = None
# We expect that all adapters are in some way doing rank-decomposition. This is not true, there are several
# methods that don't but we're focussing on these methods for now.
# We expect that all PEFT adapters are in some way doing rank-decomposition therefore this parameter specifies
# the rank used for the adapter. In general a higher rank means more trainable parameters and closer to full
# fine-tuning.
r: int = 16

View File

@@ -130,6 +130,12 @@ def update_policy(
def get_default_peft_configuration(policy_type):
"""Build a PEFT configuration for the given policy type assuming that we train a policy from scratch
(i.e. only parts of it are pre-trained) and not from a checkpoint. This means that some layers are targeted for
full fine-tuning via `modules_to_save`, e.g. `state_proj` for SmolVLA which would otherwise be randomly initialized.
Users can still override the full fine-tuning of these layers by passing `--peft.full_training_modules=[]`.
"""
if policy_type == "smolvla":
return {
"target_modules": r"(model\.vlm_with_expert\.lm_expert\..*\.(q_proj|v_proj)|model\.action_.*|model\.state_proj.*)",
@@ -164,6 +170,7 @@ def wrap_policy_in_peft_model(cfg, policy):
peft_config_policy = get_default_peft_configuration(cfg.policy.type)
peft_config_cli = dataclasses.asdict(cfg.peft) if cfg.peft else {}
peft_config_cli['modules_to_save'] = peft_config_cli['full_training_modules'] # compatibility with PEFT
peft_method_type = PeftType[peft_config_cli["method_type"].upper()]
peft_config_cls = PEFT_TYPE_TO_CONFIG_MAPPING[peft_method_type]