summaryrefslogtreecommitdiff
path: root/src/llama-vocab.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/llama-vocab.cpp')
-rw-r--r--src/llama-vocab.cpp7
1 files changed, 7 insertions, 0 deletions
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 749f8571..4bd5aa81 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -367,6 +367,13 @@ struct llm_tokenizer_bpe {
"\\p{N}+",
};
break;
+ case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM:
+ regex_exprs = {
+ "\\p{N}{1,3}",
+ "[一-龥぀-ゟ゠-ヿ]+",
+ "[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
+ };
+ break;
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
regex_exprs = {
"[\r\n]",