refact : fix convert script + zero out KV cache to avoid nans (#3523)

* refact : fix convert script + zero out KV cache to avoid nans * ggml : silu(-inf) should never happen * metal : assert various kernel requirements
author: Georgi Gerganov <ggerganov@gmail.com> 2023-10-09 14:32:17 +0300
committer: GitHub <noreply@github.com> 2023-10-09 14:32:17 +0300
commit: fcca0a700487999d52a525c96d6661e9f6a8703a (patch)
tree: edf07ca2f40aa95e40b5f6863322ea0293467592 /llama.cpp
parent: dcc09d25961c5d0626bc148e558ee841141748f7 (diff)
1 files changed, 4 insertions, 0 deletions
diff --git a/llama.cpp b/llama.cpp
index 77f7fa7c..24f07dac 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1325,7 +1325,11 @@ static bool llama_kv_cache_init(
     cache.cells.clear();
     cache.cells.resize(n_ctx);
 
+    // TODO: this should be:
+    //       cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*ggml_tensor_overhead());
+    //       change it and test that it works
     cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
+    memset(cache.buf.data, 0, cache.buf.size);
 
     struct ggml_init_params params;
     params.mem_size   = cache.buf.size;
author	Georgi Gerganov <ggerganov@gmail.com>	2023-10-09 14:32:17 +0300
committer	GitHub <noreply@github.com>	2023-10-09 14:32:17 +0300
commit	fcca0a700487999d52a525c96d6661e9f6a8703a (patch)
tree	edf07ca2f40aa95e40b5f6863322ea0293467592 /llama.cpp
parent	dcc09d25961c5d0626bc148e558ee841141748f7 (diff)