From 928e0b7013c862cf10701957b3d654aa70f11bd8 Mon Sep 17 00:00:00 2001 From: agray3 Date: Fri, 26 Apr 2024 19:08:30 +0100 Subject: Reset schedule earlier to allow overlap with ggml graph computation on device (#6933) * Reset schedule earlier to allow overlap with graph computation on device --- llama.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index dd8b1f26..49f2b559 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11473,6 +11473,10 @@ static int llama_decode_internal( } } + // Reset state for the next token before backend sync, to allow the CPU activities in the reset to + // overlap with device computation. + ggml_backend_sched_reset(lctx.sched); + return 0; } -- cgit v1.2.3