From f52cf79d8e52e1d00df4c5a151f3aba016bd3eed Mon Sep 17 00:00:00 2001
From: Jade Choghari <chogharijade@gmail.com>
Date: Sat, 15 Nov 2025 19:23:27 +0100
Subject: [PATCH] logits matching

---
 test_2.py | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 xvla      |  1 +
 2 files changed, 73 insertions(+)
 create mode 100644 test_2.py
 create mode 160000 xvla

diff --git a/test_2.py b/test_2.py
new file mode 100644
index 000000000..f00995c13
--- /dev/null
+++ b/test_2.py
@@ -0,0 +1,72 @@
+from xvla.models.processing_xvla import XVLAProcessor
+from xvla.models.modeling_xvla import XVLA
+from xvla.models.configuration_xvla import XVLAConfig
+import torch
+import random
+import numpy as np
+from PIL import Image
+from lerobot.policies.factory import make_policy
+from lerobot.configs.policies import PreTrainedConfig
+from lerobot.envs.factory import make_env_config
+cfg = XVLAConfig.from_pretrained("/raid/jade/models/xvla-libero")
+model = XVLA.from_pretrained("/raid/jade/models/xvla-libero")
+model.eval()
+model.to("cuda")
+processor = XVLAProcessor.from_pretrained("/raid/jade/models/xvla-libero")
+# /raid/jade/models/xvla-libero
+# seet seed
+torch.manual_seed(42)
+random.seed(42)
+np.random.seed(42)
+
+def make_random_pil_images(num_images=3, H=480, W=640):
+    images = []
+    for _ in range(num_images):
+        # Random RGB image
+        arr = np.random.randint(0, 256, (H, W, 3), dtype=np.uint8)
+        img = Image.fromarray(arr)
+        images.append(img)
+    return images
+
+# Example:
+images = make_random_pil_images()
+language_instruction = "This is a random image"
+# Multimodal preprocessing by processor
+inputs = processor(images, language_instruction)
+if not {"input_ids", "image_input", "image_mask"}.issubset(inputs):
+    raise ValueError("Processor did not return the expected keys.")
+
+proprio = torch.randn(1, 20)
+domain_id = torch.tensor([int(0)], dtype=torch.long)
+
+# Align to model's device/dtype
+device = model.device
+dtype = next(model.parameters()).dtype
+
+def to_model(t: torch.Tensor) -> torch.Tensor:
+    if not isinstance(t, torch.Tensor):
+        t = torch.as_tensor(t)
+    # cast floats to model dtype, keep integral/bool as-is
+    return t.to(device=device, dtype=dtype) if t.is_floating_point() else t.to(device=device)
+
+inputs = {k: to_model(v) for k, v in inputs.items()}
+inputs.update({
+    "proprio": to_model(proprio),
+    "domain_id": domain_id.to(device),
+})
+
+# Inference
+action = model.generate_actions(**inputs, steps=10).squeeze(0).float().cpu().numpy()
+
+
+#### now for lerobot model #####################################################
+
+cfg = PreTrainedConfig.from_pretrained("/raid/jade/models/xvla-libero-og_migrated")
+env_cfg = make_env_config("libero", task="libero_spatial")
+cfg.pretrained_path = "/raid/jade/models/xvla-libero-og_migrated"
+policy = make_policy(cfg=cfg, env_cfg=env_cfg)
+policy.eval()
+policy.to("cuda")
+
+action_1 = policy.model.generate_actions(**inputs, steps=10).squeeze(0).float().cpu().numpy()
+breakpoint()
\ No newline at end of file
diff --git a/xvla b/xvla
new file mode 160000
index 000000000..e2f0554f8
--- /dev/null
+++ b/xvla
@@ -0,0 +1 @@
+Subproject commit e2f0554f8ce8ab19c678652f9c30c431a37b7bbd