mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-30 10:21:24 +00:00
remove debug log
This commit is contained in:
@@ -57,7 +57,7 @@ class RLearNConfig(PreTrainedConfig):
|
||||
num_register_tokens: int = 4
|
||||
|
||||
# --- reward head options ---
|
||||
use_categorical_rewards: bool = False # classification over bins
|
||||
use_categorical_rewards: bool = True # classification over bins
|
||||
num_reward_bins: int = 25
|
||||
reward_min_value: float = 0.0 # for HL-Gauss range
|
||||
reward_max_value: float = 1.0
|
||||
@@ -65,7 +65,7 @@ class RLearNConfig(PreTrainedConfig):
|
||||
hl_gauss_num_bins: int = 25 # histogram resolution
|
||||
|
||||
# Inference-time subsampling and regularization
|
||||
inference_stride: int = 1
|
||||
inference_stride: int = 2
|
||||
frame_dropout_p: float = 0.10
|
||||
|
||||
# Training
|
||||
|
||||
@@ -270,112 +270,6 @@ class RLearNPolicy(PreTrainedPolicy):
|
||||
# Fallback to default
|
||||
frame_features = cls_tokens_flat.view(B, T, -1)
|
||||
|
||||
# DEBUG: Analyze vision feature variability (use per-frame pooled features for readability)
|
||||
if self.training and torch.rand(1).item() < 0.1: # 10% of training steps for more frequent debugging
|
||||
with torch.no_grad():
|
||||
print(f"\n🔍 SigLIP2 VISION FEATURE DEBUG (B={B}, T={T}):")
|
||||
|
||||
# CRITICAL: Check if input frames are actually different
|
||||
print(f"Raw frame tensor stats: mean={frames.mean():.6f}, std={frames.std():.6f}")
|
||||
|
||||
# Check frame-to-frame differences in raw input
|
||||
if T > 1:
|
||||
# FIXED: Use proper tensor operations for difference calculation
|
||||
frame_diffs = (frames[:, 1:, :, :, :] - frames[:, :-1, :, :, :]).pow(2).sum(dim=(2, 3, 4)).sqrt()
|
||||
raw_frame_diffs = frame_diffs.mean()
|
||||
print(f"Raw input frame differences: {raw_frame_diffs:.6f}")
|
||||
|
||||
if raw_frame_diffs < 0.001:
|
||||
print(f" ⚠️ INPUT FRAMES ARE NEARLY IDENTICAL! Diff: {raw_frame_diffs:.8f}")
|
||||
else:
|
||||
print(f" ✓ Input frames are different. Diff: {raw_frame_diffs:.6f}")
|
||||
|
||||
# Check processed pixel values
|
||||
first_sample_pixels = pixel_values[:T]
|
||||
if T > 1:
|
||||
# FIXED: Use proper tensor operations
|
||||
pixel_frame_diffs = (first_sample_pixels[1:] - first_sample_pixels[:-1]).pow(2).sum(dim=(1, 2, 3)).sqrt()
|
||||
pixel_diffs = pixel_frame_diffs.mean()
|
||||
print(f"Processed pixel_values differences: {pixel_diffs:.6f}")
|
||||
|
||||
if pixel_diffs < 0.001:
|
||||
print(f" ⚠️ PROCESSED PIXELS ARE NEARLY IDENTICAL! Diff: {pixel_diffs:.8f}")
|
||||
else:
|
||||
print(f" ✓ Processed pixels are different. Diff: {pixel_diffs:.6f}")
|
||||
|
||||
# Check if all samples in batch have same first frame
|
||||
if B > 1:
|
||||
# FIXED: Use proper tensor operations
|
||||
batch_first_frames = pixel_values[::T] # Every T-th frame (first frame of each sample)
|
||||
if len(batch_first_frames) > 1:
|
||||
first_frame_diffs = (batch_first_frames[1:] - batch_first_frames[0].unsqueeze(0)).pow(2).sum(dim=(1, 2, 3)).sqrt()
|
||||
batch_first_frame_diff = first_frame_diffs.mean()
|
||||
print(f"Batch first-frame differences: {batch_first_frame_diff:.6f}")
|
||||
|
||||
if batch_first_frame_diff < 0.001:
|
||||
print(f" ⚠️ ALL BATCH SAMPLES HAVE SAME FIRST FRAME! Diff: {batch_first_frame_diff:.8f}")
|
||||
else:
|
||||
print(f" ✓ Batch samples have different first frames. Diff: {batch_first_frame_diff:.6f}")
|
||||
|
||||
# Check feature statistics
|
||||
vision_features = frame_features # (B, T, D)
|
||||
feature_mean = vision_features.mean().item()
|
||||
feature_std = vision_features.std().item()
|
||||
print(f"Feature stats: mean={feature_mean:.4f}, std={feature_std:.4f}")
|
||||
|
||||
# Extra DIAGNOSTIC: CLS vs patch mean/max deltas for one sample, two far-apart frames
|
||||
try:
|
||||
if 'last_hidden_state' in vision_outputs.__dict__ and T >= 2:
|
||||
# Recover CLS tokens (already computed as frame_features)
|
||||
cls = frame_features
|
||||
b0 = 0
|
||||
f0, f1 = 0, T - 1
|
||||
# L2 between CLS at two frames
|
||||
cls_l2 = (cls[b0, f1] - cls[b0, f0]).pow(2).sum().sqrt().item()
|
||||
print(f"CLS ΔL2: {cls_l2:.6f}")
|
||||
except Exception as _:
|
||||
pass
|
||||
|
||||
# Check temporal variance for each sample
|
||||
for b_idx in range(min(B, 2)): # Debug first 2 samples
|
||||
sample_features = vision_features[b_idx] # (T, D)
|
||||
|
||||
# Variance across time dimension
|
||||
temporal_variance = sample_features.var(dim=0).mean().item()
|
||||
temporal_std = sample_features.std(dim=0).mean().item()
|
||||
print(f"Sample {b_idx} temporal variance: {temporal_variance:.6f} (std: {temporal_std:.6f})")
|
||||
|
||||
# Frame-to-frame differences
|
||||
if T > 1:
|
||||
frame_diffs = (sample_features[1:] - sample_features[:-1]).pow(2).sum(dim=-1).sqrt()
|
||||
avg_frame_diff = frame_diffs.mean().item()
|
||||
max_frame_diff = frame_diffs.max().item()
|
||||
min_frame_diff = frame_diffs.min().item()
|
||||
print(f"Sample {b_idx} frame differences: avg={avg_frame_diff:.6f}, "
|
||||
f"max={max_frame_diff:.6f}, min={min_frame_diff:.6f}")
|
||||
|
||||
# Check if features are nearly identical
|
||||
if avg_frame_diff < 0.001:
|
||||
print(f" ⚠️ FEATURES BARELY CHANGING! Avg diff: {avg_frame_diff:.8f}")
|
||||
elif avg_frame_diff < 0.01:
|
||||
print(f" ⚠️ Features changing slowly. Avg diff: {avg_frame_diff:.6f}")
|
||||
else:
|
||||
print(f" ✓ Features changing normally. Avg diff: {avg_frame_diff:.6f}")
|
||||
|
||||
# Overall batch statistics
|
||||
if B > 1 and T > 1:
|
||||
all_diffs = (
|
||||
vision_features[:, 1:, :] - vision_features[:, :-1, :]
|
||||
).pow(2).sum(dim=-1).sqrt().flatten()
|
||||
print(f"Batch-wide frame differences: mean={all_diffs.mean():.6f}, "
|
||||
f"std={all_diffs.std():.6f}")
|
||||
|
||||
# Check percentage of very small differences
|
||||
small_diffs = (all_diffs < 0.001).float().mean().item() * 100
|
||||
print(f"Percentage of tiny differences (<0.001): {small_diffs:.1f}%")
|
||||
|
||||
print("=" * 50)
|
||||
|
||||
return frame_features
|
||||
|
||||
def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]:
|
||||
|
||||
Reference in New Issue
Block a user