diff --git a/src/lerobot/policies/rlearn/eval_script.py b/src/lerobot/policies/rlearn/eval_script.py
index 82c568f56..69cfc1b1f 100644
--- a/src/lerobot/policies/rlearn/eval_script.py
+++ b/src/lerobot/policies/rlearn/eval_script.py
@@ -165,10 +165,10 @@ def predict_rewards_sliding(model, frames, language, max_seq_len=16, batch_size=
             pad = window[:1].expand(pad_needed, -1, -1, -1)  # repeat first frame
             window = torch.cat([pad, window], dim=0)
         
-        # CRITICAL FIX: Use the MLP corresponding to the frame's temporal position
-        # Frame 0 -> MLP[0], Frame 1 -> MLP[1], ..., Frame 15+ -> MLP[15]
-        # This matches how the model was trained with different MLPs for different temporal positions
-        frame_pos = min(i, L - 1)  # Clamp to available MLP range [0, 15]
+        # IMPROVED FIX: Cycle through MLPs to get varied predictions throughout the episode
+        # This ensures we use all 16 frame-specific MLPs and get varied outputs
+        # Frames 0-15 use MLPs 0-15, frames 16-31 use MLPs 0-15 again, etc.
+        frame_pos = i % L  # Cycle through [0, 1, 2, ..., 15, 0, 1, 2, ..., 15, ...]
             
         windows.append(window)
         frame_positions.append(frame_pos)