diff options
author | Aleksey Nikiforov <lexn82@gmail.com> | 2025-07-14 12:43:52 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-14 18:43:52 +0200 |
commit | f5353047ef461e6fc9d527e09a06c9802c699929 (patch) | |
tree | 206c8c56efd3dcac1e39655e73788affe6c02832 /src/llama-vocab.cpp | |
parent | 255c22046bcaef41850125be924f3e42e2a65571 (diff) |
Ported kimi-k2 support from llama.cpp (#609)
Original patch by @gabriellarson:
https://github.com/ggml-org/llama.cpp/pull/14654
Co-authored-by: anikifoss <anikifoss>
Diffstat (limited to 'src/llama-vocab.cpp')
-rw-r--r-- | src/llama-vocab.cpp | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 7bae4fec..109a6659 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -464,6 +464,13 @@ struct llm_tokenizer_bpe { "[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", }; break; + case LLAMA_VOCAB_PRE_TYPE_KIMI_K2: + regex_exprs = { + // K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp + // The custom handler implements all K2 patterns with proper Han character exclusion + "\\p{Han}+", + }; + break; case LLAMA_VOCAB_PRE_TYPE_SUPERBPE: regex_exprs = { "\\p{N}+", |