diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-02-22 23:22:48 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-22 23:22:48 +0200 |
commit | 847eedbdb2d1ebf14ef56eb507d4b4b975510908 (patch) | |
tree | 87897844deb476482461dc431ae2b4d19e9454db /llama.cpp | |
parent | 7e4f339c404dbe029d4a117c03b37a9bf646cf0e (diff) |
py : add Gemma conversion from HF models (#5647)
* py : add gemma conversion from HF models
* Update convert-hf-to-gguf.py
Co-authored-by: Aarni Koskela <akx@iki.fi>
* Update convert-hf-to-gguf.py
Co-authored-by: Aarni Koskela <akx@iki.fi>
* Update convert-hf-to-gguf.py
Co-authored-by: Jared Van Bortel <jared@nomic.ai>
---------
Co-authored-by: Aarni Koskela <akx@iki.fi>
Co-authored-by: Jared Van Bortel <jared@nomic.ai>
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 3 |
1 files changed, 3 insertions, 0 deletions
@@ -7450,6 +7450,7 @@ struct llm_build_context { inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); cb(inpL, "inp_embd", -1); + inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd)); cb(inpL, "inp_scaled", -1); @@ -7491,6 +7492,7 @@ struct llm_build_context { n_embd_head_k, 2, 0, n_orig_ctx, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow); cb(Qcur, "Qcur", il); + Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); cb(Qcur, "Qcur_scaled", il); @@ -7505,6 +7507,7 @@ struct llm_build_context { Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f, cb, il); cb(cur, "kqv_out", il); } + struct ggml_tensor * sa_out = ggml_add(ctx0, cur, inpL); cb(sa_out, "sa_out", il); |