summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2023-11-24 18:04:31 +0100
committerGitHub <noreply@github.com>2023-11-24 18:04:31 +0100
commit8a052c131ed3525313cdb84e5ae4e2b6cf8d2e24 (patch)
tree4c7f2e9a5929ac9db83c36a2cab588f3112798df /llama.cpp
parent189d68446e7ef21e8f3af3c0a3d91c35a39aec89 (diff)
ggml-cuda : support stablelm rope (#4156)
* ggml-cuda : support stablelm rope * remove unused freq_base kernel parameter * add n_dims parameter to llm_build_k_shift, default to n_rot via overload * llama : fix llm_build_k_shift args --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/llama.cpp b/llama.cpp
index 9fb7244b..5b31f201 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3469,7 +3469,7 @@ static void llm_build_k_shift(
struct ggml_cgraph * graph,
llm_rope_type type,
int64_t n_ctx,
- int64_t n_rot,
+ int n_rot,
float freq_base,
float freq_scale,
const llm_build_cb & cb) {
@@ -3501,7 +3501,7 @@ static void llm_build_k_shift(
// we rotate only the first n_rot dimensions
ggml_rope_custom_inplace(ctx,
ggml_view_3d(ctx, kv.k,
- n_rot, n_head_kv, n_ctx,
+ n_embd_head, n_head_kv, n_ctx,
ggml_element_size(kv.k)*n_embd_head,
ggml_element_size(kv.k)*n_embd_gqa,
ggml_element_size(kv.k)*n_embd_gqa*n_ctx*il),