Extend llama_kv_cache_seq_rm to allow matching any sequence (#3843)

* Extend llama_kv_cache_seq_rm to allow matichng any sequence * Replace llama_kv_cache_tokens_rm with llama_kv_cache_clear Use llama_kv_cache_clear for cache clearing Change calls to llama_kv_cache_tokens_rm that want to delete by position to use llama_kv_cache_seq_rm functionality
author: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> 2023-10-29 11:31:40 -0600
committer: GitHub <noreply@github.com> 2023-10-29 11:31:40 -0600
commit: 6e08281e588bbba1a5d180290a94a43f167f3a1a (patch)
tree: 46add394417eb2b5929793ca879c793a478fd3f8 /examples/perplexity/perplexity.cpp
parent: 2046eb4345e62c4575b3cdc0115a51db89f3fb70 (diff)
1 files changed, 3 insertions, 3 deletions
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index 3c2542e8..bd2c73d8 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -210,7 +210,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
         const auto t_start = std::chrono::high_resolution_clock::now();
 
         // clear the KV cache
-        llama_kv_cache_tokens_rm(ctx, -1, -1);
+        llama_kv_cache_clear(ctx);
 
         for (int j = 0; j < num_batches; ++j) {
             const int batch_start = start + j * n_batch;
@@ -339,7 +339,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
         const auto t_start = std::chrono::high_resolution_clock::now();
 
         // clear the KV cache
-        llama_kv_cache_tokens_rm(ctx, -1, -1);
+        llama_kv_cache_clear(ctx);
 
         for (int j = 0; j < num_batches; ++j) {
             const int batch_start = start + j * n_batch;
@@ -573,7 +573,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
         }
 
         // clear the KV cache
-        llama_kv_cache_tokens_rm(ctx, -1, -1);
+        llama_kv_cache_clear(ctx);
 
         auto logits = hellaswag_evaluate_tokens(ctx, query_embd, 0, params.n_batch, n_vocab);
         if (logits.empty()) {
author	Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>	2023-10-29 11:31:40 -0600
committer	GitHub <noreply@github.com>	2023-10-29 11:31:40 -0600
commit	6e08281e588bbba1a5d180290a94a43f167f3a1a (patch)
tree	46add394417eb2b5929793ca879c793a478fd3f8 /examples/perplexity/perplexity.cpp
parent	2046eb4345e62c4575b3cdc0115a51db89f3fb70 (diff)