diff options
author | agray3 <agray3@users.noreply.github.com> | 2024-04-26 19:08:30 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-26 20:08:30 +0200 |
commit | 928e0b7013c862cf10701957b3d654aa70f11bd8 (patch) | |
tree | 9e80a33cf6f8c245849c7273ec6bb1d3ad43836f | |
parent | 0c4d489e29e53589bf13a801fe7c94b7b546d8f6 (diff) |
Reset schedule earlier to allow overlap with ggml graph computation on device (#6933)
* Reset schedule earlier to allow overlap with graph computation on device
-rw-r--r-- | ggml-backend.c | 12 | ||||
-rw-r--r-- | llama.cpp | 4 |
2 files changed, 11 insertions, 5 deletions
diff --git a/ggml-backend.c b/ggml-backend.c index e91d97cd..f5bdcf07 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1784,12 +1784,14 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) { void ggml_backend_sched_reset(ggml_backend_sched_t sched) { // reset state for the next run - size_t hash_size = sched->hash_set.size; - memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT - memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size); - memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size); + if (!sched->is_reset) { + size_t hash_size = sched->hash_set.size; + memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT + memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size); + memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size); - sched->is_reset = true; + sched->is_reset = true; + } sched->is_alloc = false; } @@ -11473,6 +11473,10 @@ static int llama_decode_internal( } } + // Reset state for the next token before backend sync, to allow the CPU activities in the reset to + // overlap with device computation. + ggml_backend_sched_reset(lctx.sched); + return 0; } |