From f52cf79d8e52e1d00df4c5a151f3aba016bd3eed Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Sat, 15 Nov 2025 19:23:27 +0100 Subject: [PATCH] logits matching --- test_2.py | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ xvla | 1 + 2 files changed, 73 insertions(+) create mode 100644 test_2.py create mode 160000 xvla diff --git a/test_2.py b/test_2.py new file mode 100644 index 000000000..f00995c13 --- /dev/null +++ b/test_2.py @@ -0,0 +1,72 @@ +from xvla.models.processing_xvla import XVLAProcessor +from xvla.models.modeling_xvla import XVLA +from xvla.models.configuration_xvla import XVLAConfig +import torch +import random +import numpy as np +from PIL import Image +from lerobot.policies.factory import make_policy +from lerobot.configs.policies import PreTrainedConfig +from lerobot.envs.factory import make_env_config +cfg = XVLAConfig.from_pretrained("/raid/jade/models/xvla-libero") +model = XVLA.from_pretrained("/raid/jade/models/xvla-libero") +model.eval() +model.to("cuda") +processor = XVLAProcessor.from_pretrained("/raid/jade/models/xvla-libero") +# /raid/jade/models/xvla-libero +# seet seed +torch.manual_seed(42) +random.seed(42) +np.random.seed(42) + +def make_random_pil_images(num_images=3, H=480, W=640): + images = [] + for _ in range(num_images): + # Random RGB image + arr = np.random.randint(0, 256, (H, W, 3), dtype=np.uint8) + img = Image.fromarray(arr) + images.append(img) + return images + +# Example: +images = make_random_pil_images() +language_instruction = "This is a random image" +# Multimodal preprocessing by processor +inputs = processor(images, language_instruction) +if not {"input_ids", "image_input", "image_mask"}.issubset(inputs): + raise ValueError("Processor did not return the expected keys.") + +proprio = torch.randn(1, 20) +domain_id = torch.tensor([int(0)], dtype=torch.long) + +# Align to model's device/dtype +device = model.device +dtype = next(model.parameters()).dtype + +def to_model(t: torch.Tensor) -> torch.Tensor: + if not isinstance(t, torch.Tensor): + t = torch.as_tensor(t) + # cast floats to model dtype, keep integral/bool as-is + return t.to(device=device, dtype=dtype) if t.is_floating_point() else t.to(device=device) + +inputs = {k: to_model(v) for k, v in inputs.items()} +inputs.update({ + "proprio": to_model(proprio), + "domain_id": domain_id.to(device), +}) + +# Inference +action = model.generate_actions(**inputs, steps=10).squeeze(0).float().cpu().numpy() + + +#### now for lerobot model ##################################################### + +cfg = PreTrainedConfig.from_pretrained("/raid/jade/models/xvla-libero-og_migrated") +env_cfg = make_env_config("libero", task="libero_spatial") +cfg.pretrained_path = "/raid/jade/models/xvla-libero-og_migrated" +policy = make_policy(cfg=cfg, env_cfg=env_cfg) +policy.eval() +policy.to("cuda") + +action_1 = policy.model.generate_actions(**inputs, steps=10).squeeze(0).float().cpu().numpy() +breakpoint() \ No newline at end of file diff --git a/xvla b/xvla new file mode 160000 index 000000000..e2f0554f8 --- /dev/null +++ b/xvla @@ -0,0 +1 @@ +Subproject commit e2f0554f8ce8ab19c678652f9c30c431a37b7bbd