From cff4bcf4a002871c1ad972bfa2e7a8fdeead5b54 Mon Sep 17 00:00:00 2001
From: Ethan Pronovost <EPronovost@users.noreply.github.com>
Date: Tue, 14 Apr 2026 02:28:49 -0700
Subject: [PATCH] Update reward classifier training config (#3147)

Co-authored-by: Khalil Meftah <khalil.meftah@huggingface.co>
---
 docs/source/hilserl.mdx | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/docs/source/hilserl.mdx b/docs/source/hilserl.mdx
index ad1c74f9a..5b9439d51 100644
--- a/docs/source/hilserl.mdx
+++ b/docs/source/hilserl.mdx
@@ -685,6 +685,10 @@ Example configuration for training the [reward classifier](https://huggingface.c
 
 ```json
 {
+  "dataset": {
+    "repo_id": "hf_username/dataset_name",
+    "root": null
+  },
   "policy": {
     "type": "reward_classifier",
     "model_name": "helper2424/resnet10",
@@ -705,8 +709,28 @@ Example configuration for training the [reward classifier](https://huggingface.c
         "type": "VISUAL",
         "shape": [3, 128, 128]
       }
-    }
-  }
+    },
+    "push_to_hub": true,
+    "repo_id": "hf_username/model_repo"
+  },
+  "batch_size": 16,
+  "num_workers": 4,
+  "steps": 5000,
+  "log_freq": 10,
+  "eval_freq": 1000,
+  "save_freq": 1000,
+  "save_checkpoint": true,
+  "seed": 2,
+  "resume": false,
+  "optimizer": {
+    "grad_clip_norm": 10.0
+  },
+  "wandb": {
+    "enable": true,
+    "project": "reward-classifier",
+    "disable_artifact": false
+  },
+  "job_name": "reward-classifier"
 }
 ```