From 95292e36c6cef7430cff0bda16578fe9f7303e01 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Wed, 16 Jul 2025 02:47:55 -0400 Subject: [PATCH 1/2] feat: Update llama.cpp --- llama_cpp/llama_cpp.py | 17 ++++++++++------- vendor/llama.cpp | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index bd8e36b79..b9e245e2f 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -179,12 +179,13 @@ # enum llama_vocab_type { -# LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab -# LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback -# LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE -# LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece -# LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram -# LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization +# LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab +# LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback +# LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE +# LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece +# LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram +# LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization +# LLAMA_VOCAB_TYPE_PLAMO2 = 6, // PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming # }; LLAMA_VOCAB_TYPE_NONE = 0 """For models without vocab""" @@ -198,6 +199,8 @@ """T5 tokenizer based on Unigram""" LLAMA_VOCAB_TYPE_RWKV = 5 """RWKV tokenizer based on greedy tokenization""" +LLAMA_VOCAB_TYPE_PLAMO2 = 6 +"""PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming""" # NOTE: Deprecated and will be removed in the future. (already gone in llama.cpp) @@ -2171,7 +2174,7 @@ def llama_kv_self_seq_add( # // - lazily on next llama_decode() # // p0 < 0 : [0, p1] # // p1 < 0 : [p0, inf) -# DEPRECATED(void llama_kv_self_seq_div( +# DEPRECATED(LLAMA_API void llama_kv_self_seq_div( # struct llama_context * ctx, # llama_seq_id seq_id, # llama_pos p0, diff --git a/vendor/llama.cpp b/vendor/llama.cpp index bdca38376..79e0b68c1 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit bdca38376f7e8dd928defe01ce6a16218a64b040 +Subproject commit 79e0b68c178656bb0632cb8602d2940b755077f8 From e1af05f43f57d2b660edfb77935dd2d2641ec602 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 18 Jul 2025 12:45:45 -0400 Subject: [PATCH 2/2] chore: Bump version --- CHANGELOG.md | 6 +++++- llama_cpp/__init__.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88b9a1b45..e743c4584 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.3.14] + +- feat: Update llama.cpp to ggerganov/llama.cpp@79e0b68c178656bb0632cb8602d2940b755077f8 + ## [0.3.13] -- feat: Update llama.cpp to ggerganov/llama.cpp@ +- feat: Update llama.cpp to ggerganov/llama.cpp@bdca38376f7e8dd928defe01ce6a16218a64b040 - fix: Better chat format for Qwen2.5-VL by @alcoftTAO in #2040 ## [0.3.12] diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py index 0c869dcae..409c59514 100644 --- a/llama_cpp/__init__.py +++ b/llama_cpp/__init__.py @@ -1,4 +1,4 @@ from .llama_cpp import * from .llama import * -__version__ = "0.3.13" +__version__ = "0.3.14"