ggml-cuda : support stablelm rope (#4156)

* ggml-cuda : support stablelm rope * remove unused freq_base kernel parameter * add n_dims parameter to llm_build_k_shift, default to n_rot via overload * llama : fix llm_build_k_shift args --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: slaren <slarengh@gmail.com> 2023-11-24 18:04:31 +0100
committer: GitHub <noreply@github.com> 2023-11-24 18:04:31 +0100
commit: 8a052c131ed3525313cdb84e5ae4e2b6cf8d2e24 (patch)
tree: 4c7f2e9a5929ac9db83c36a2cab588f3112798df /llama.cpp
parent: 189d68446e7ef21e8f3af3c0a3d91c35a39aec89 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/llama.cpp b/llama.cpp
index 9fb7244b..5b31f201 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3469,7 +3469,7 @@ static void llm_build_k_shift(
        struct ggml_cgraph * graph,
             llm_rope_type   type,
                   int64_t   n_ctx,
-                  int64_t   n_rot,
+                  int       n_rot,
                   float     freq_base,
                   float     freq_scale,
        const llm_build_cb & cb) {
@@ -3501,7 +3501,7 @@ static void llm_build_k_shift(
             // we rotate only the first n_rot dimensions
             ggml_rope_custom_inplace(ctx,
                     ggml_view_3d(ctx, kv.k,
-                        n_rot, n_head_kv, n_ctx,
+                        n_embd_head, n_head_kv, n_ctx,
                         ggml_element_size(kv.k)*n_embd_head,
                         ggml_element_size(kv.k)*n_embd_gqa,
                         ggml_element_size(kv.k)*n_embd_gqa*n_ctx*il),
author	slaren <slarengh@gmail.com>	2023-11-24 18:04:31 +0100
committer	GitHub <noreply@github.com>	2023-11-24 18:04:31 +0100
commit	8a052c131ed3525313cdb84e5ae4e2b6cf8d2e24 (patch)
tree	4c7f2e9a5929ac9db83c36a2cab588f3112798df /llama.cpp
parent	189d68446e7ef21e8f3af3c0a3d91c35a39aec89 (diff)