summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2024-03-15 22:14:16 +0100
committerGitHub <noreply@github.com>2024-03-15 23:14:16 +0200
commitd84c48505f60bcd358b82a751d40418c4d235643 (patch)
tree21d0c1d29d79f7563607935a7d8059ab4dc85ba1
parent877b4d0c628cc70dddb5df72ed8fc14d126ca7e8 (diff)
llama : fix Baichuan2 13B (#6092)
-rw-r--r--llama.cpp3
1 files changed, 1 insertions, 2 deletions
diff --git a/llama.cpp b/llama.cpp
index 52bd718b..e4db288d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6000,7 +6000,7 @@ struct llm_build_context {
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
// inp_pos - contains the positions
- struct ggml_tensor * inp_pos = build_inp_pos();
+ struct ggml_tensor * inp_pos = model.type == MODEL_7B ? build_inp_pos() : nullptr;
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
@@ -6050,7 +6050,6 @@ struct llm_build_context {
cb(Qcur, "Qcur", il);
cb(Kcur, "Kcur", il);
-
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
model.layers[il].wo, NULL,
Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);