diff options
author | slaren <slarengh@gmail.com> | 2023-11-24 18:04:31 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-24 18:04:31 +0100 |
commit | 8a052c131ed3525313cdb84e5ae4e2b6cf8d2e24 (patch) | |
tree | 4c7f2e9a5929ac9db83c36a2cab588f3112798df /llama.cpp | |
parent | 189d68446e7ef21e8f3af3c0a3d91c35a39aec89 (diff) |
ggml-cuda : support stablelm rope (#4156)
* ggml-cuda : support stablelm rope
* remove unused freq_base kernel parameter
* add n_dims parameter to llm_build_k_shift, default to n_rot via overload
* llama : fix llm_build_k_shift args
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 4 |
1 files changed, 2 insertions, 2 deletions
@@ -3469,7 +3469,7 @@ static void llm_build_k_shift( struct ggml_cgraph * graph, llm_rope_type type, int64_t n_ctx, - int64_t n_rot, + int n_rot, float freq_base, float freq_scale, const llm_build_cb & cb) { @@ -3501,7 +3501,7 @@ static void llm_build_k_shift( // we rotate only the first n_rot dimensions ggml_rope_custom_inplace(ctx, ggml_view_3d(ctx, kv.k, - n_rot, n_head_kv, n_ctx, + n_embd_head, n_head_kv, n_ctx, ggml_element_size(kv.k)*n_embd_head, ggml_element_size(kv.k)*n_embd_gqa, ggml_element_size(kv.k)*n_embd_gqa*n_ctx*il), |