summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ggml-backend.c12
-rw-r--r--llama.cpp4
2 files changed, 11 insertions, 5 deletions
diff --git a/ggml-backend.c b/ggml-backend.c
index e91d97cd..f5bdcf07 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -1784,12 +1784,14 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
// reset state for the next run
- size_t hash_size = sched->hash_set.size;
- memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
- memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
- memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
+ if (!sched->is_reset) {
+ size_t hash_size = sched->hash_set.size;
+ memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
+ memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
+ memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
- sched->is_reset = true;
+ sched->is_reset = true;
+ }
sched->is_alloc = false;
}
diff --git a/llama.cpp b/llama.cpp
index dd8b1f26..49f2b559 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -11473,6 +11473,10 @@ static int llama_decode_internal(
}
}
+ // Reset state for the next token before backend sync, to allow the CPU activities in the reset to
+ // overlap with device computation.
+ ggml_backend_sched_reset(lctx.sched);
+
return 0;
}