MPT : support GQA for replit-code-v1.5 (#3627)

author: cebtenzzre <cebtenzzre@gmail.com> 2023-10-15 02:32:06 -0400
committer: GitHub <noreply@github.com> 2023-10-15 09:32:06 +0300
commit: 11bff290458f12f020b588792707f76ec658a27a (patch)
tree: 1f37e1f536551b0d6d760530ea0896e67d78cc6e /llama.cpp
parent: 11dc1091f64b24ca6d643acc6d0051117ba60161 (diff)
1 files changed, 3 insertions, 3 deletions
diff --git a/llama.cpp b/llama.cpp
index 2cd2dad7..5329bd82 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2839,8 +2839,8 @@ static void llm_load_tensors(
                         auto & layer = model.layers[i];
 
                         layer.attn_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, backend);
-                        layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, 3*n_embd}, backend_split);
-                        layer.wo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd},     backend_split);
+                        layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, backend_split);
+                        layer.wo   = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd},                backend_split);
 
                         layer.ffn_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, backend);
 
@@ -5368,7 +5368,7 @@ static struct ggml_cgraph * llm_build_mpt(
     const int64_t n_layer     = hparams.n_layer;
     const int64_t n_ctx       = cparams.n_ctx;
     const int64_t n_head      = hparams.n_head;
-    const int64_t n_head_kv   = hparams.n_head_kv; // == n_head for MPT, as there's no MQA/GQA
+    const int64_t n_head_kv   = hparams.n_head_kv;
     const int64_t n_embd_head = hparams.n_embd_head();
     const int64_t n_embd_gqa  = hparams.n_embd_gqa();
author	cebtenzzre <cebtenzzre@gmail.com>	2023-10-15 02:32:06 -0400
committer	GitHub <noreply@github.com>	2023-10-15 09:32:06 +0300
commit	11bff290458f12f020b588792707f76ec658a27a (patch)
tree	1f37e1f536551b0d6d760530ea0896e67d78cc6e /llama.cpp
parent	11dc1091f64b24ca6d643acc6d0051117ba60161 (diff)