diff options
Diffstat (limited to 'src/llama-vocab.cpp')
-rw-r--r-- | src/llama-vocab.cpp | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 749f8571..4bd5aa81 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -367,6 +367,13 @@ struct llm_tokenizer_bpe { "\\p{N}+", }; break; + case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM: + regex_exprs = { + "\\p{N}{1,3}", + "[一-龥-ゟ゠-ヿ]+", + "[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+", + }; + break; case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER: regex_exprs = { "[\r\n]", |