diff options
Diffstat (limited to 'examples')
-rw-r--r-- | examples/batched-bench/batched-bench.cpp | 2 | ||||
-rw-r--r-- | examples/llama-bench/llama-bench.cpp | 4 | ||||
-rw-r--r-- | examples/main/main.cpp | 2 | ||||
-rw-r--r-- | examples/perplexity/perplexity.cpp | 6 | ||||
-rw-r--r-- | examples/server/server.cpp | 2 |
5 files changed, 8 insertions, 8 deletions
diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 43f9c971..533c55c1 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -185,7 +185,7 @@ int main(int argc, char ** argv) { const auto t_pp_start = ggml_time_us(); - llama_kv_cache_tokens_rm(ctx, -1, -1); + llama_kv_cache_clear(ctx); if (!decode_helper(ctx, batch, ctx_params.n_batch)) { LOG_TEE("%s: llama_decode() failed\n", __func__); diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 20767d55..78039818 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -1037,7 +1037,7 @@ int main(int argc, char ** argv) { test t(inst, lmodel, ctx); - llama_kv_cache_tokens_rm(ctx, -1, -1); + llama_kv_cache_clear(ctx); // warmup run if (t.n_prompt > 0) { @@ -1048,7 +1048,7 @@ int main(int argc, char ** argv) { } for (int i = 0; i < params.reps; i++) { - llama_kv_cache_tokens_rm(ctx, -1, -1); + llama_kv_cache_clear(ctx); uint64_t t_start = get_time_ns(); if (t.n_prompt > 0) { diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 3d9f670b..8a43b6ab 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -298,7 +298,7 @@ int main(int argc, char ** argv) { } // remove any "future" tokens that we might have inherited from the previous session - llama_kv_cache_tokens_rm(ctx, n_matching_session_tokens, -1); + llama_kv_cache_seq_rm(ctx, -1, n_matching_session_tokens, -1); } LOGLN( diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 3c2542e8..bd2c73d8 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -210,7 +210,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params & const auto t_start = std::chrono::high_resolution_clock::now(); // clear the KV cache - llama_kv_cache_tokens_rm(ctx, -1, -1); + llama_kv_cache_clear(ctx); for (int j = 0; j < num_batches; ++j) { const int batch_start = start + j * n_batch; @@ -339,7 +339,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par const auto t_start = std::chrono::high_resolution_clock::now(); // clear the KV cache - llama_kv_cache_tokens_rm(ctx, -1, -1); + llama_kv_cache_clear(ctx); for (int j = 0; j < num_batches; ++j) { const int batch_start = start + j * n_batch; @@ -573,7 +573,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) { } // clear the KV cache - llama_kv_cache_tokens_rm(ctx, -1, -1); + llama_kv_cache_clear(ctx); auto logits = hellaswag_evaluate_tokens(ctx, query_embd, 0, params.n_batch, n_vocab); if (logits.empty()) { diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 5b7e4139..c163c7f8 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -857,7 +857,7 @@ struct llama_server_context void kv_cache_clear() { // clear the entire KV cache - llama_kv_cache_tokens_rm(ctx, -1, -1); + llama_kv_cache_clear(ctx); clean_kv_cache = false; } |