From 214b6a35702a489e3738acd81fad6d46182d3036 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 29 Apr 2023 18:43:28 +0300 Subject: ggml : adjust mul_mat_f16 work memory (#1226) * llama : minor - remove explicity int64_t cast * ggml : reduce memory buffer for F16 mul_mat when not using cuBLAS * ggml : add asserts to guard for incorrect wsize --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index dc4bdc53..f8b4c8e4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -780,7 +780,7 @@ static bool kv_cache_init( const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; - const int64_t n_mem = (int64_t)n_layer*n_ctx; + const int64_t n_mem = n_layer*n_ctx; const int64_t n_elements = n_embd*n_mem; cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB); -- cgit v1.2.3