huggingface · RevanthGundala · Aug 9, 2025 · Aug 11, 2025 · Aug 12, 2025
diff --git a/src/transformers/integrations/ggml.py b/src/transformers/integrations/ggml.py
@@ -116,6 +116,18 @@
         "expert_count": "num_experts",
         "expert_used_count": "num_experts_per_tok",
     },
+    "qwen2vl": {
+        "context_length": "max_position_embeddings",
+        "block_count": "num_hidden_layers",
+        "feed_forward_length": "intermediate_size",
+        "embedding_length": "hidden_size",
+        "rope.dimension_count": None,
+        "rope.freq_base": "rope_theta",
+        "attention.head_count": "num_attention_heads",
+        "attention.head_count_kv": "num_key_value_heads",
+        "attention.layer_norm_rms_epsilon": "rms_norm_eps",
+        "vocab_size": "vocab_size",
+    },
     "falcon": {
         "context_length": "max_position_embeddings",
         "block_count": "num_hidden_layers",
@@ -704,6 +716,7 @@ def converted(self) -> Tokenizer:
     "qwen2_moe": GGUFQwen2Converter,
     "qwen3": GGUFQwen2Converter,
     "qwen3_moe": GGUFQwen2Converter,
+    "qwen2_vl": GGUFQwen2Converter,
     "phi3": GGUFPhi3Converter,
     "bloom": GGUFGPTConverter,
     "falcon": GGUFGPTConverter,

diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py
@@ -297,6 +297,8 @@ def get_gguf_hf_weights_map(
         model_type = "qwen2moe"
     elif model_type == "gemma3_text":
         model_type = "gemma3"
+    elif model_type == "qwen2_vl":
+        model_type = "qwen2vl"
     arch = None
     for key, value in MODEL_ARCH_NAMES.items():
         if value == model_type:

diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py
@@ -303,6 +303,8 @@ class GgufModelTests(unittest.TestCase):
     gemma3_vision_model_id = "unsloth/gemma-3-4b-it-GGUF"
     qwen3_model_id = "Qwen/Qwen3-0.6B-GGUF"
     qwen3moe_model_id = "Qwen/Qwen3-30B-A3B-GGUF"
+    qwen2vl_model_id = "unsloth/Qwen2.5-VL-3B-Instruct-GGUF"
+    original_qwen2vl_model_id = "Qwen/Qwen2.5-VL-3B-Instruct"
 
     q4_0_phi3_model_id = "Phi-3-mini-4k-instruct-q4.gguf"
     q4_0_mistral_model_id = "mistral-7b-instruct-v0.2.Q4_0.gguf"
@@ -337,6 +339,7 @@ class GgufModelTests(unittest.TestCase):
     bf16_gemma3_vision_model_id = "gemma-3-4b-it-BF16.gguf"
     q8_0_qwen3_model_id = "Qwen3-0.6B-Q8_0.gguf"
     q4_k_m_qwen3moe_model_id = "Qwen3-30B-A3B-Q4_K_M.gguf"
+    q8_0_qwen2vl_model_id = "Qwen2.5-VL-3B-Instruct-Q8_0.gguf"
 
     example_text = "Hello"
 
@@ -989,3 +992,26 @@ def test_qwen3moe_q4_k_m(self):
 
         EXPECTED_TEXT = "Hello, I am a 20 year old male"
         self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+    @unittest.skipUnless(is_gguf_available("0.16.0"), "test requires gguf version >= 0.16.0")
+    def test_qwen2vl(self):
+        original_model = AutoModelForCausalLM.from_pretrained(
+            self.original_qwen2vl_model_id,
+            torch_dtype=torch.float16,
+        ).language_model
+
+        converted_model = AutoModelForCausalLM.from_pretrained(
+            self.qwen2vl_model_id,
+            gguf_file=self.q8_0_qwen2vl_model_id,
+            torch_dtype=torch.float16,
+        )
+
+        converted_state_dict = converted_model.state_dict()
+        original_state_dict = original_model.state_dict()
+
+        for layer_name, original_params in original_state_dict.items():
+            if layer_name in converted_state_dict:
+                self.assertTrue(original_params.shape == converted_state_dict[layer_name].shape)
+                torch.testing.assert_close(original_params, converted_state_dict[layer_name])
+            else:
+                raise ValueError(f"Layer {layer_name} is not presented in GGUF model")