From 8a052c131ed3525313cdb84e5ae4e2b6cf8d2e24 Mon Sep 17 00:00:00 2001 From: slaren Date: Fri, 24 Nov 2023 18:04:31 +0100 Subject: ggml-cuda : support stablelm rope (#4156) * ggml-cuda : support stablelm rope * remove unused freq_base kernel parameter * add n_dims parameter to llm_build_k_shift, default to n_rot via overload * llama : fix llm_build_k_shift args --------- Co-authored-by: Georgi Gerganov --- llama.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index 9fb7244b..5b31f201 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3469,7 +3469,7 @@ static void llm_build_k_shift( struct ggml_cgraph * graph, llm_rope_type type, int64_t n_ctx, - int64_t n_rot, + int n_rot, float freq_base, float freq_scale, const llm_build_cb & cb) { @@ -3501,7 +3501,7 @@ static void llm_build_k_shift( // we rotate only the first n_rot dimensions ggml_rope_custom_inplace(ctx, ggml_view_3d(ctx, kv.k, - n_rot, n_head_kv, n_ctx, + n_embd_head, n_head_kv, n_ctx, ggml_element_size(kv.k)*n_embd_head, ggml_element_size(kv.k)*n_embd_gqa, ggml_element_size(kv.k)*n_embd_gqa*n_ctx*il), -- cgit v1.2.3