summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-10-09 14:32:17 +0300
committerGitHub <noreply@github.com>2023-10-09 14:32:17 +0300
commitfcca0a700487999d52a525c96d6661e9f6a8703a (patch)
treeedf07ca2f40aa95e40b5f6863322ea0293467592 /llama.cpp
parentdcc09d25961c5d0626bc148e558ee841141748f7 (diff)
refact : fix convert script + zero out KV cache to avoid nans (#3523)
* refact : fix convert script + zero out KV cache to avoid nans * ggml : silu(-inf) should never happen * metal : assert various kernel requirements
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp4
1 files changed, 4 insertions, 0 deletions
diff --git a/llama.cpp b/llama.cpp
index 77f7fa7c..24f07dac 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1325,7 +1325,11 @@ static bool llama_kv_cache_init(
cache.cells.clear();
cache.cells.resize(n_ctx);
+ // TODO: this should be:
+ // cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*ggml_tensor_overhead());
+ // change it and test that it works
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
+ memset(cache.buf.data, 0, cache.buf.size);
struct ggml_init_params params;
params.mem_size = cache.buf.size;