From 45348d7b691c355e9b5c9ce2c5c3ff1f859decbe Mon Sep 17 00:00:00 2001 From: Pepijn Date: Mon, 1 Sep 2025 13:32:37 +0200 Subject: [PATCH] remove debug log --- .../policies/rlearn/configuration_rlearn.py | 4 +- .../policies/rlearn/modeling_rlearn.py | 106 ------------------ 2 files changed, 2 insertions(+), 108 deletions(-) diff --git a/src/lerobot/policies/rlearn/configuration_rlearn.py b/src/lerobot/policies/rlearn/configuration_rlearn.py index 690e50899..87bfacebc 100644 --- a/src/lerobot/policies/rlearn/configuration_rlearn.py +++ b/src/lerobot/policies/rlearn/configuration_rlearn.py @@ -57,7 +57,7 @@ class RLearNConfig(PreTrainedConfig): num_register_tokens: int = 4 # --- reward head options --- - use_categorical_rewards: bool = False # classification over bins + use_categorical_rewards: bool = True # classification over bins num_reward_bins: int = 25 reward_min_value: float = 0.0 # for HL-Gauss range reward_max_value: float = 1.0 @@ -65,7 +65,7 @@ class RLearNConfig(PreTrainedConfig): hl_gauss_num_bins: int = 25 # histogram resolution # Inference-time subsampling and regularization - inference_stride: int = 1 + inference_stride: int = 2 frame_dropout_p: float = 0.10 # Training diff --git a/src/lerobot/policies/rlearn/modeling_rlearn.py b/src/lerobot/policies/rlearn/modeling_rlearn.py index 57723c96c..ebe62574c 100644 --- a/src/lerobot/policies/rlearn/modeling_rlearn.py +++ b/src/lerobot/policies/rlearn/modeling_rlearn.py @@ -270,112 +270,6 @@ class RLearNPolicy(PreTrainedPolicy): # Fallback to default frame_features = cls_tokens_flat.view(B, T, -1) - # DEBUG: Analyze vision feature variability (use per-frame pooled features for readability) - if self.training and torch.rand(1).item() < 0.1: # 10% of training steps for more frequent debugging - with torch.no_grad(): - print(f"\n🔍 SigLIP2 VISION FEATURE DEBUG (B={B}, T={T}):") - - # CRITICAL: Check if input frames are actually different - print(f"Raw frame tensor stats: mean={frames.mean():.6f}, std={frames.std():.6f}") - - # Check frame-to-frame differences in raw input - if T > 1: - # FIXED: Use proper tensor operations for difference calculation - frame_diffs = (frames[:, 1:, :, :, :] - frames[:, :-1, :, :, :]).pow(2).sum(dim=(2, 3, 4)).sqrt() - raw_frame_diffs = frame_diffs.mean() - print(f"Raw input frame differences: {raw_frame_diffs:.6f}") - - if raw_frame_diffs < 0.001: - print(f" ⚠️ INPUT FRAMES ARE NEARLY IDENTICAL! Diff: {raw_frame_diffs:.8f}") - else: - print(f" ✓ Input frames are different. Diff: {raw_frame_diffs:.6f}") - - # Check processed pixel values - first_sample_pixels = pixel_values[:T] - if T > 1: - # FIXED: Use proper tensor operations - pixel_frame_diffs = (first_sample_pixels[1:] - first_sample_pixels[:-1]).pow(2).sum(dim=(1, 2, 3)).sqrt() - pixel_diffs = pixel_frame_diffs.mean() - print(f"Processed pixel_values differences: {pixel_diffs:.6f}") - - if pixel_diffs < 0.001: - print(f" ⚠️ PROCESSED PIXELS ARE NEARLY IDENTICAL! Diff: {pixel_diffs:.8f}") - else: - print(f" ✓ Processed pixels are different. Diff: {pixel_diffs:.6f}") - - # Check if all samples in batch have same first frame - if B > 1: - # FIXED: Use proper tensor operations - batch_first_frames = pixel_values[::T] # Every T-th frame (first frame of each sample) - if len(batch_first_frames) > 1: - first_frame_diffs = (batch_first_frames[1:] - batch_first_frames[0].unsqueeze(0)).pow(2).sum(dim=(1, 2, 3)).sqrt() - batch_first_frame_diff = first_frame_diffs.mean() - print(f"Batch first-frame differences: {batch_first_frame_diff:.6f}") - - if batch_first_frame_diff < 0.001: - print(f" ⚠️ ALL BATCH SAMPLES HAVE SAME FIRST FRAME! Diff: {batch_first_frame_diff:.8f}") - else: - print(f" ✓ Batch samples have different first frames. Diff: {batch_first_frame_diff:.6f}") - - # Check feature statistics - vision_features = frame_features # (B, T, D) - feature_mean = vision_features.mean().item() - feature_std = vision_features.std().item() - print(f"Feature stats: mean={feature_mean:.4f}, std={feature_std:.4f}") - - # Extra DIAGNOSTIC: CLS vs patch mean/max deltas for one sample, two far-apart frames - try: - if 'last_hidden_state' in vision_outputs.__dict__ and T >= 2: - # Recover CLS tokens (already computed as frame_features) - cls = frame_features - b0 = 0 - f0, f1 = 0, T - 1 - # L2 between CLS at two frames - cls_l2 = (cls[b0, f1] - cls[b0, f0]).pow(2).sum().sqrt().item() - print(f"CLS ΔL2: {cls_l2:.6f}") - except Exception as _: - pass - - # Check temporal variance for each sample - for b_idx in range(min(B, 2)): # Debug first 2 samples - sample_features = vision_features[b_idx] # (T, D) - - # Variance across time dimension - temporal_variance = sample_features.var(dim=0).mean().item() - temporal_std = sample_features.std(dim=0).mean().item() - print(f"Sample {b_idx} temporal variance: {temporal_variance:.6f} (std: {temporal_std:.6f})") - - # Frame-to-frame differences - if T > 1: - frame_diffs = (sample_features[1:] - sample_features[:-1]).pow(2).sum(dim=-1).sqrt() - avg_frame_diff = frame_diffs.mean().item() - max_frame_diff = frame_diffs.max().item() - min_frame_diff = frame_diffs.min().item() - print(f"Sample {b_idx} frame differences: avg={avg_frame_diff:.6f}, " - f"max={max_frame_diff:.6f}, min={min_frame_diff:.6f}") - - # Check if features are nearly identical - if avg_frame_diff < 0.001: - print(f" ⚠️ FEATURES BARELY CHANGING! Avg diff: {avg_frame_diff:.8f}") - elif avg_frame_diff < 0.01: - print(f" ⚠️ Features changing slowly. Avg diff: {avg_frame_diff:.6f}") - else: - print(f" ✓ Features changing normally. Avg diff: {avg_frame_diff:.6f}") - - # Overall batch statistics - if B > 1 and T > 1: - all_diffs = ( - vision_features[:, 1:, :] - vision_features[:, :-1, :] - ).pow(2).sum(dim=-1).sqrt().flatten() - print(f"Batch-wide frame differences: mean={all_diffs.mean():.6f}, " - f"std={all_diffs.std():.6f}") - - # Check percentage of very small differences - small_diffs = (all_diffs < 0.001).float().mean().item() * 100 - print(f"Percentage of tiny differences (<0.001): {small_diffs:.1f}%") - - print("=" * 50) - return frame_features def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]: