From 8a052c131ed3525313cdb84e5ae4e2b6cf8d2e24 Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Fri, 24 Nov 2023 18:04:31 +0100
Subject: ggml-cuda : support stablelm rope (#4156)

* ggml-cuda : support stablelm rope

* remove unused freq_base kernel parameter

* add n_dims parameter to llm_build_k_shift, default to n_rot via overload

* llama : fix llm_build_k_shift args

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 llama.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'llama.cpp')

diff --git a/llama.cpp b/llama.cpp
index 9fb7244b..5b31f201 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3469,7 +3469,7 @@ static void llm_build_k_shift(
        struct ggml_cgraph * graph,
             llm_rope_type   type,
                   int64_t   n_ctx,
-                  int64_t   n_rot,
+                  int       n_rot,
                   float     freq_base,
                   float     freq_scale,
        const llm_build_cb & cb) {
@@ -3501,7 +3501,7 @@ static void llm_build_k_shift(
             // we rotate only the first n_rot dimensions
             ggml_rope_custom_inplace(ctx,
                     ggml_view_3d(ctx, kv.k,
-                        n_rot, n_head_kv, n_ctx,
+                        n_embd_head, n_head_kv, n_ctx,
                         ggml_element_size(kv.k)*n_embd_head,
                         ggml_element_size(kv.k)*n_embd_gqa,
                         ggml_element_size(kv.k)*n_embd_gqa*n_ctx*il),
-- 
cgit v1.2.3