From 38b16dfca6e5032e6cfb90c1653bf1ba4cf647b4 Mon Sep 17 00:00:00 2001 From: Shouzheng Liu Date: Thu, 24 Aug 2023 12:27:25 -0400 Subject: metal : bug-fix when enable ggml-alloc (#2757) * metal: better memory alloc w/ concurrency dispatch The ggml-alloc should only free tensors at memory barriers. * ggml-alloc: avoid return silently In certain cases, the allocate_node() function may silently return without performing any memory allocation. --- llama.cpp | 5 ----- 1 file changed, 5 deletions(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index 7ee6bcda..b5266c1e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2707,11 +2707,6 @@ static struct ggml_cgraph * llm_build_falcon( struct ggml_tensor * inpFF = attn_norm; cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF); - - // TODO: this is temporary needed to introduce artificial dependency between FF and ATTN - // adding this, because there seems to be a bug in the Metal concurrency optimization - // without this line, the results are non-deterministic and wrong - cur->src[2] = attn_out; offload_func(cur); cur = ggml_gelu(ctx0, cur); -- cgit v1.2.3