diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-06-17 19:30:22 +0300 |
---|---|---|
committer | Georgi Gerganov <ggerganov@gmail.com> | 2023-06-17 19:31:20 +0300 |
commit | 051e1b0e6a6e3aee7d989b47760980e6fda5861c (patch) | |
tree | b2b2cdb4d59003606c4dce3751273a3911d3b7e0 /llama.cpp | |
parent | 86c7571864ff331f8cdb9e092f3abeb123729a56 (diff) |
llama : fix kv_cache `n` init (close #1903)
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 2 |
1 files changed, 2 insertions, 0 deletions
@@ -886,6 +886,7 @@ static bool kv_cache_init( const int64_t n_elements = n_embd*n_mem; cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB); + cache.n = 0; struct ggml_init_params params; params.mem_size = cache.buf.size; @@ -904,6 +905,7 @@ static bool kv_cache_init( ggml_set_name(cache.k, "cache_k"); ggml_set_name(cache.v, "cache_v"); + (void) n_gpu_layers; #ifdef GGML_USE_CUBLAS if (n_gpu_layers > n_layer + 1) { ggml_cuda_assign_buffers_no_scratch(cache.v); |