diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-10-29 18:32:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-29 18:32:51 +0200 |
commit | 71a09da301705b9c5ad4ca3cf3fbd966dd3f1ec5 (patch) | |
tree | a07e37ca6a6596aa543e06b70dbba628b9493fe0 | |
parent | d69d777c02b9ac405a95f3cbfba219a990caefff (diff) |
llama : fix kv shift bug (#3835)
ggml-ci
-rw-r--r-- | llama.cpp | 27 |
1 files changed, 18 insertions, 9 deletions
@@ -1552,14 +1552,14 @@ static void llama_kv_cache_seq_shift( for (uint32_t i = 0; i < cache.size; ++i) { if (cache.cells[i].has_seq_id(seq_id) && cache.cells[i].pos >= p0 && cache.cells[i].pos < p1) { - cache.cells[i].pos += delta; + cache.has_shift = true; + cache.cells[i].pos += delta; + cache.cells[i].delta += delta; + if (cache.cells[i].pos < 0) { cache.cells[i].pos = -1; cache.cells[i].seq_id.clear(); if (new_head == cache.size) new_head = i; - } else { - cache.has_shift = true; - cache.cells[i].delta = delta; } } } @@ -6073,11 +6073,20 @@ static int llama_decode_internal( #endif // update the kv ring buffer - lctx.kv_self.has_shift = false; - lctx.kv_self.head += n_tokens; - // Ensure kv cache head points to a valid index. - if (lctx.kv_self.head >= lctx.kv_self.size) { - lctx.kv_self.head = 0; + { + if (kv_self.has_shift) { + kv_self.has_shift = false; + for (uint32_t i = 0; i < kv_self.size; ++i) { + kv_self.cells[i].delta = 0; + } + } + + kv_self.head += n_tokens; + + // Ensure kv cache head points to a valid index. + if (kv_self.head >= kv_self.size) { + kv_self.head = 0; + } } #ifdef GGML_PERF |