diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-01-16 19:34:54 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-16 19:34:54 +0200 |
commit | 959ef0c0df725c013c7f712eaa7790b8e38a8e20 (patch) | |
tree | b7a0abd13dbd276bbf4737bb4a8967cf52265026 /examples/perplexity | |
parent | c37b3474e61d609d43cccc3bde5d559e80e4f5d1 (diff) |
perplexity : fix kv cache handling for hellaswag (#4981)
ggml-ci
Diffstat (limited to 'examples/perplexity')
-rw-r--r-- | examples/perplexity/perplexity.cpp | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 9a77beca..b4fedf80 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -428,6 +428,7 @@ static std::vector<float> hellaswag_evaluate_tokens( for (size_t i_chunk = 0; i_chunk < n_chunk; ++i_chunk) { size_t n_tokens = tokens.size() - i_chunk * n_batch; n_tokens = std::min(n_tokens, size_t(n_batch)); + llama_kv_cache_seq_rm(ctx, 0, n_past, -1); if (llama_decode(ctx, llama_batch_get_one(tokens.data() + i_chunk * n_batch, n_tokens, n_past, 0))) { fprintf(stderr, "%s : failed to eval\n", __func__); return {}; |