diff --git a/src/lerobot/policies/rlearn/configuration_rlearn.py b/src/lerobot/policies/rlearn/configuration_rlearn.py index e02b5084a..31dca0b67 100644 --- a/src/lerobot/policies/rlearn/configuration_rlearn.py +++ b/src/lerobot/policies/rlearn/configuration_rlearn.py @@ -57,7 +57,7 @@ class RLearNConfig(PreTrainedConfig): num_register_tokens: int = 4 # --- reward head options --- - use_categorical_rewards: bool = True # classification over bins + use_categorical_rewards: bool = False # classification over bins num_reward_bins: int = 25 reward_min_value: float = 0.0 # for HL-Gauss range reward_max_value: float = 1.0 @@ -65,7 +65,7 @@ class RLearNConfig(PreTrainedConfig): hl_gauss_num_bins: int = 25 # histogram resolution # Inference-time subsampling and regularization - inference_stride: int = 1 # in forward + inference_stride: int = 1 # inference_stride is an extra, second downsampling applied in forward after window sampling/rewind. Keep it at 1 to disable extra skipping frame_dropout_p: float = 0.10 # Training