From 1aa18ef994a6a2b531434eb13251ef48e56d345b Mon Sep 17 00:00:00 2001 From: Shouzheng Liu Date: Tue, 25 Jul 2023 08:00:19 -0400 Subject: metal : concurrently dispatch commands (#2358) * metal: concurrently dispatch commands Function `ggml_metal_graph_find_concurrency` will run and write commands that can be issued concurrently to metal context `concur_list` array, when `ggml_metal_graph_compute` is called for the first time. * metal: don't call find_concurrency automatically. * metal : code style changes --------- Co-authored-by: Georgi Gerganov --- llama.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index b42b4100..2d737bbc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1720,6 +1720,9 @@ static bool llama_eval_internal( #ifdef GGML_USE_METAL if (lctx.ctx_metal && N == 1) { + if (!ggml_metal_if_optimized(lctx.ctx_metal)) { + ggml_metal_graph_find_concurrency(lctx.ctx_metal,&gf); + } ggml_metal_set_n_cb (lctx.ctx_metal, n_threads); ggml_metal_graph_compute(lctx.ctx_metal, &gf); ggml_metal_get_tensor (lctx.ctx_metal, cur); -- cgit v1.2.3