summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authoragray3 <agray3@users.noreply.github.com>2024-04-26 19:08:30 +0100
committerGitHub <noreply@github.com>2024-04-26 20:08:30 +0200
commit928e0b7013c862cf10701957b3d654aa70f11bd8 (patch)
tree9e80a33cf6f8c245849c7273ec6bb1d3ad43836f /llama.cpp
parent0c4d489e29e53589bf13a801fe7c94b7b546d8f6 (diff)
Reset schedule earlier to allow overlap with ggml graph computation on device (#6933)
* Reset schedule earlier to allow overlap with graph computation on device
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp4
1 files changed, 4 insertions, 0 deletions
diff --git a/llama.cpp b/llama.cpp
index dd8b1f26..49f2b559 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -11473,6 +11473,10 @@ static int llama_decode_internal(
}
}
+ // Reset state for the next token before backend sync, to allow the CPU activities in the reset to
+ // overlap with device computation.
+ ggml_backend_sched_reset(lctx.sched);
+
return 0;
}