summaryrefslogtreecommitdiff
path: root/src/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/llama.cpp')
-rw-r--r--src/llama.cpp6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index 00a3c9b1..29926a94 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -13390,7 +13390,7 @@ struct llm_build_context {
ggml_row_size(kv_pe_compresseed->type, kv_lora_rank));
cb(k_pe, "k_pe", il);
- kv_compressed = ggml_cont(ctx0, kv_compressed); // TODO: the CUDA backend does not support non-contiguous norm
+ //kv_compressed = ggml_cont(ctx0, kv_compressed); // TODO: the CUDA backend does not support non-contiguous norm
kv_compressed = llm_build_norm(ctx0, kv_compressed, hparams,
model.layers[il].attn_kv_a_norm, NULL,
LLM_NORM_RMS, cb, il);
@@ -13422,7 +13422,7 @@ struct llm_build_context {
0);
cb(v_states, "v_states", il);
- q_pe = ggml_cont(ctx0, q_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
+ //q_pe = ggml_cont(ctx0, q_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
q_pe = ggml_rope_ext(
ctx0, q_pe, inp_pos, nullptr,
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
@@ -13431,7 +13431,7 @@ struct llm_build_context {
cb(q_pe, "q_pe", il);
// shared RoPE key
- k_pe = ggml_cont(ctx0, k_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
+ //k_pe = ggml_cont(ctx0, k_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
k_pe = ggml_rope_ext(
ctx0, k_pe, inp_pos, nullptr,
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,