summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-02-22 23:22:48 +0200
committerGitHub <noreply@github.com>2024-02-22 23:22:48 +0200
commit847eedbdb2d1ebf14ef56eb507d4b4b975510908 (patch)
tree87897844deb476482461dc431ae2b4d19e9454db /llama.cpp
parent7e4f339c404dbe029d4a117c03b37a9bf646cf0e (diff)
py : add Gemma conversion from HF models (#5647)
* py : add gemma conversion from HF models * Update convert-hf-to-gguf.py Co-authored-by: Aarni Koskela <akx@iki.fi> * Update convert-hf-to-gguf.py Co-authored-by: Aarni Koskela <akx@iki.fi> * Update convert-hf-to-gguf.py Co-authored-by: Jared Van Bortel <jared@nomic.ai> --------- Co-authored-by: Aarni Koskela <akx@iki.fi> Co-authored-by: Jared Van Bortel <jared@nomic.ai>
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp3
1 files changed, 3 insertions, 0 deletions
diff --git a/llama.cpp b/llama.cpp
index 40dda265..7770fa0e 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -7450,6 +7450,7 @@ struct llm_build_context {
inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb);
cb(inpL, "inp_embd", -1);
+
inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd));
cb(inpL, "inp_scaled", -1);
@@ -7491,6 +7492,7 @@ struct llm_build_context {
n_embd_head_k, 2, 0, n_orig_ctx, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow);
cb(Qcur, "Qcur", il);
+
Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k)));
cb(Qcur, "Qcur_scaled", il);
@@ -7505,6 +7507,7 @@ struct llm_build_context {
Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f, cb, il);
cb(cur, "kqv_out", il);
}
+
struct ggml_tensor * sa_out = ggml_add(ctx0, cur, inpL);
cb(sa_out, "sa_out", il);