Merge branch 'main' into main

johnnynunez · web-flow · commit bc0be005e38e · 2025-07-18T18:59:16.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.3.14]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@79e0b68c178656bb0632cb8602d2940b755077f8
+
+## [0.3.13]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@bdca38376f7e8dd928defe01ce6a16218a64b040
+- fix: Better chat format for Qwen2.5-VL by @alcoftTAO in #2040
+
 ## [0.3.12]
 
 - feat: Update llama.cpp to ggerganov/llama.cpp@a0374a67e2924f2e845cdc59dd67d9a44065a89c
diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-__version__ = "0.3.12"
+__version__ = "0.3.14"
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
@@ -3459,26 +3459,29 @@ class Qwen25VLChatHandler(Llava15ChatHandler):
     DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
 
     CHAT_FORMAT = (
-        "<|im_start|>system\n"
-        "You are a helpful assistant.<|im_end|>\n"
+        #"{% set image_count = namespace(value=0) %}"
+        #"{% set video_count = namespace(value=0) %}"
         "{% for message in messages %}"
-        "{% if message['role'] == 'user' %}"
-        "<|im_start|>user\n"
+        "{% if loop.first and message['role'] != 'system' %}"
+        "<|im_start|>system\n"
+        "{{ self.DEFAULT_SYSTEM_MESSAGE }}<|im_end|>\n"
+        "{% endif %}"
+        "<|im_start|>{{ message['role'] }}\n"
         "{% if message['content'] is string %}"
-        "{{ message['content'] }}"
+        "{{ message['content'] }}<|im_end|>\n"
         "{% else %}"
         "{% for content in message['content'] %}"
-        "{% if content['type'] == 'text' %}"
-        "{{ content['text'] }}"
-        "{% elif content['type'] == 'image_url' %}"
+        "{% if content['type'] == 'image_url' %}"
         "{% if content.image_url is string %}"
         "{{ content.image_url }}"
         "{% else %}"
         "{{ content.image_url.url }}"
         "{% endif %}"
+        #"{% set image_count.value = image_count.value + 1 %}"
+        "{% elif content['type'] == 'text' %}"
+        "{{ content['text'] }}"
         "{% endif %}"
         "{% endfor %}"
-        "{% endif %}"
         "<|im_end|>\n"
         "{% endif %}"
         "{% endfor %}"
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -179,12 +179,13 @@
 
 
 # enum llama_vocab_type {
-#     LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab
-#     LLAMA_VOCAB_TYPE_SPM  = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
-#     LLAMA_VOCAB_TYPE_BPE  = 2, // GPT-2 tokenizer based on byte-level BPE
-#     LLAMA_VOCAB_TYPE_WPM  = 3, // BERT tokenizer based on WordPiece
-#     LLAMA_VOCAB_TYPE_UGM  = 4, // T5 tokenizer based on Unigram
-#     LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization
+#     LLAMA_VOCAB_TYPE_NONE   = 0, // For models without vocab
+#     LLAMA_VOCAB_TYPE_SPM    = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
+#     LLAMA_VOCAB_TYPE_BPE    = 2, // GPT-2 tokenizer based on byte-level BPE
+#     LLAMA_VOCAB_TYPE_WPM    = 3, // BERT tokenizer based on WordPiece
+#     LLAMA_VOCAB_TYPE_UGM    = 4, // T5 tokenizer based on Unigram
+#     LLAMA_VOCAB_TYPE_RWKV   = 5, // RWKV tokenizer based on greedy tokenization
+#     LLAMA_VOCAB_TYPE_PLAMO2 = 6, // PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming
 # };
 LLAMA_VOCAB_TYPE_NONE = 0
 """For models without vocab"""
@@ -198,8 +199,11 @@
 """T5 tokenizer based on Unigram"""
 LLAMA_VOCAB_TYPE_RWKV = 5
 """RWKV tokenizer based on greedy tokenization"""
+LLAMA_VOCAB_TYPE_PLAMO2 = 6
+"""PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming"""
 
 
+# NOTE: Deprecated and will be removed in the future. (already gone in llama.cpp)
 # // pre-tokenization types
 # enum llama_vocab_pre_type {
 #     LLAMA_VOCAB_PRE_TYPE_DEFAULT        = 0,
@@ -2170,7 +2174,7 @@ def llama_kv_self_seq_add(
 # //   - lazily on next llama_decode()
 # // p0 < 0 : [0,  p1]
 # // p1 < 0 : [p0, inf)
-# DEPRECATED(void llama_kv_self_seq_div(
+# DEPRECATED(LLAMA_API void llama_kv_self_seq_div(
 #         struct llama_context * ctx,
 #                 llama_seq_id   seq_id,
 #                    llama_pos   p0,
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit a0374a67e2924f2e845cdc59dd67d9a44065a89c
+Subproject commit 79e0b68c178656bb0632cb8602d2940b755077f8