From 0bda18726800eb9b5d05860b3106a59b7ecd71ce Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Thu, 26 Feb 2026 09:55:25 +0000 Subject: [PATCH] xvla log fix --- .../policies/xvla/configuration_florence2.py | 3 --- .../policies/xvla/modeling_florence2.py | 19 ------------------- 2 files changed, 22 deletions(-) diff --git a/src/lerobot/policies/xvla/configuration_florence2.py b/src/lerobot/policies/xvla/configuration_florence2.py index 77f1b3a1d..4e3240487 100644 --- a/src/lerobot/policies/xvla/configuration_florence2.py +++ b/src/lerobot/policies/xvla/configuration_florence2.py @@ -13,12 +13,9 @@ import warnings from transformers.configuration_utils import PretrainedConfig -from transformers.utils import logging """ Florence-2 configuration""" -logger = logging.get_logger(__name__) - class Florence2VisionConfig(PretrainedConfig): r""" diff --git a/src/lerobot/policies/xvla/modeling_florence2.py b/src/lerobot/policies/xvla/modeling_florence2.py index e33efe5c3..df8e45a14 100644 --- a/src/lerobot/policies/xvla/modeling_florence2.py +++ b/src/lerobot/policies/xvla/modeling_florence2.py @@ -46,7 +46,6 @@ from transformers.utils import ( add_start_docstrings_to_model_forward, is_flash_attn_2_available, is_flash_attn_greater_or_equal_2_10, - logging, replace_return_docstrings, ) @@ -57,8 +56,6 @@ if is_flash_attn_2_available(): from flash_attn import flash_attn_func, flash_attn_varlen_func from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa -logger = logging.get_logger(__name__) - _CONFIG_FOR_DOC = "Florence2Config" @@ -992,12 +989,6 @@ class Florence2FlashAttention2(Florence2Attention): else: target_dtype = self.q_proj.weight.dtype - logger.warning_once( - f"The input hidden states seems to be silently casted in float32, this might be related to" - f" the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in" - f" {target_dtype}." - ) - query_states = query_states.to(target_dtype) key_states = key_states.to(target_dtype) value_states = value_states.to(target_dtype) @@ -1135,11 +1126,6 @@ class Florence2SdpaAttention(Florence2Attention): ) -> tuple[torch.Tensor, torch.Tensor | None, tuple[torch.Tensor] | None]: """Input shape: Batch x Time x Channel""" if output_attentions or layer_head_mask is not None: - # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once this is implemented. - logger.warning_once( - "Florence2Model is using Florence2SdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True` or `layer_head_mask` not None. Falling back to the manual attention" - ' implementation, but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.' - ) return super().forward( hidden_states, key_value_states=key_value_states, @@ -1860,9 +1846,6 @@ class Florence2Decoder(Florence2LanguagePreTrainedModel): hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) if self.gradient_checkpointing and self.training and use_cache: - logger.warning_once( - "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." - ) use_cache = False # decoder layers @@ -2160,8 +2143,6 @@ class Florence2LanguageForConditionalGeneration(Florence2LanguagePreTrainedModel return_dict = return_dict if return_dict is not None else self.config.use_return_dict if labels is not None: - if use_cache: - logger.warning("The `use_cache` argument is changed to `False` since `labels` is provided.") use_cache = False if decoder_input_ids is None and decoder_inputs_embeds is None: decoder_input_ids = shift_tokens_right(