summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorubergarm <leimgrub@gmail.com>2025-07-15 13:54:04 -0400
committerGitHub <noreply@github.com>2025-07-15 19:54:04 +0200
commit13b2f193723486f46efe34297cf797186ab14bc2 (patch)
treebda8a4b50adb20a564302e16dc42bed45ea798d4 /src
parent2081b3fccb9923699bf4d5e926d8719fc1d12c39 (diff)
kimi-k2 convert script and chat template (#612)
* convert_hf_to_gguf for Kimi-K2-Instruct Adapt mainline `PR14653` for tokenizer while maintaining proper MLA tensors. Tested with this workflow using deepseek fp8_cast_bf16.py and triton-cpu to upcast the fp8 safetensors to bf16 safetensors then used this convert_hf_to_gguf. * Add Kimi-K2 chat template moonshotai/Kimi-K2-Instruct https://github.com/ikawrakow/ik_llama.cpp/pull/609#issuecomment-3071259454 * kimi-k2 add ass to template to get response
Diffstat (limited to 'src')
-rw-r--r--src/llama.cpp19
1 files changed, 19 insertions, 0 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index 0a81f2b9..58812fc8 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -1695,6 +1695,7 @@ enum llm_chat_template {
LLM_CHAT_TEMPLATE_BITNET,
LLM_CHAT_TEMPLATE_DOTS1,
LLM_CHAT_TEMPLATE_HUNYUAN_MOE,
+ LLM_CHAT_TEMPLATE_KIMI_K2,
LLM_CHAT_TEMPLATE_UNKNOWN,
};
@@ -1733,6 +1734,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
{ "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
{ "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
{ "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
+ { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
{ "bitnet", LLM_CHAT_TEMPLATE_BITNET },
};
@@ -23270,6 +23272,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
return LLM_CHAT_TEMPLATE_DOTS1;
} else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) {
return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
+ } else if (tmpl_contains("<|im_middle|>") && tmpl_contains("<|im_end|>")) {
+ return LLM_CHAT_TEMPLATE_KIMI_K2;
}
return LLM_CHAT_TEMPLATE_UNKNOWN;
}
@@ -23715,6 +23719,21 @@ static int32_t llama_chat_apply_template_internal(
ss << "<|startoftext|>" << message->content << "<|extra_0|>";
}
}
+ } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
+ // moonshotai/Kimi-K2-Instruct
+ for (auto message : chat) {
+ std::string role(message->role);
+ if (role == "system") {
+ ss << "<|im_system|>system<|im_middle|>" << message->content << "<|im_end|>";
+ } else if (role == "assistant") {
+ ss << "<|im_user|>user<|im_middle|>" << message->content << "<|im_end|>";
+ } else {
+ ss << "<|im_assistant|>assistant<|im_middle|>" << message->content << "<|im_end|>";
+ }
+ }
+ if (add_ass) {
+ ss << "<|im_assistant|>assistant<|im_middle|>";
+ }
} else {
// template not supported
return -1;