summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2024-01-22 23:42:41 +0100
committerGitHub <noreply@github.com>2024-01-22 23:42:41 +0100
commit011e8ec577fd135cbc02993d3ea9840c516d6a1c (patch)
tree2960730d772fa3d28960f2db14df2e96c1726044
parent6f9939d119b2d004c264952eb510bd106455531e (diff)
llama : fix not enough space in buffer with Qwen (#5086)
-rw-r--r--llama.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/llama.cpp b/llama.cpp
index 8c906a22..f6f1ec0f 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4440,9 +4440,9 @@ static struct ggml_tensor * llm_build_kv(
// these nodes are added to the graph together so that they are not reordered
// by doing so, the number of splits in the graph is reduced
+ ggml_build_forward_expand(graph, q_cur);
ggml_build_forward_expand(graph, k_cur);
ggml_build_forward_expand(graph, v_cur);
- ggml_build_forward_expand(graph, q_cur);
llm_build_kv_store(ctx, hparams, kv, graph, k_cur, v_cur, n_ctx, n_tokens, kv_head, cb, il);