summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rw-r--r--examples/batched-bench/batched-bench.cpp2
-rw-r--r--examples/llama-bench/llama-bench.cpp4
-rw-r--r--examples/main/main.cpp2
-rw-r--r--examples/perplexity/perplexity.cpp6
-rw-r--r--examples/server/server.cpp2
5 files changed, 8 insertions, 8 deletions
diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp
index 43f9c971..533c55c1 100644
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@@ -185,7 +185,7 @@ int main(int argc, char ** argv) {
const auto t_pp_start = ggml_time_us();
- llama_kv_cache_tokens_rm(ctx, -1, -1);
+ llama_kv_cache_clear(ctx);
if (!decode_helper(ctx, batch, ctx_params.n_batch)) {
LOG_TEE("%s: llama_decode() failed\n", __func__);
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 20767d55..78039818 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -1037,7 +1037,7 @@ int main(int argc, char ** argv) {
test t(inst, lmodel, ctx);
- llama_kv_cache_tokens_rm(ctx, -1, -1);
+ llama_kv_cache_clear(ctx);
// warmup run
if (t.n_prompt > 0) {
@@ -1048,7 +1048,7 @@ int main(int argc, char ** argv) {
}
for (int i = 0; i < params.reps; i++) {
- llama_kv_cache_tokens_rm(ctx, -1, -1);
+ llama_kv_cache_clear(ctx);
uint64_t t_start = get_time_ns();
if (t.n_prompt > 0) {
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 3d9f670b..8a43b6ab 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -298,7 +298,7 @@ int main(int argc, char ** argv) {
}
// remove any "future" tokens that we might have inherited from the previous session
- llama_kv_cache_tokens_rm(ctx, n_matching_session_tokens, -1);
+ llama_kv_cache_seq_rm(ctx, -1, n_matching_session_tokens, -1);
}
LOGLN(
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index 3c2542e8..bd2c73d8 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -210,7 +210,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
const auto t_start = std::chrono::high_resolution_clock::now();
// clear the KV cache
- llama_kv_cache_tokens_rm(ctx, -1, -1);
+ llama_kv_cache_clear(ctx);
for (int j = 0; j < num_batches; ++j) {
const int batch_start = start + j * n_batch;
@@ -339,7 +339,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
const auto t_start = std::chrono::high_resolution_clock::now();
// clear the KV cache
- llama_kv_cache_tokens_rm(ctx, -1, -1);
+ llama_kv_cache_clear(ctx);
for (int j = 0; j < num_batches; ++j) {
const int batch_start = start + j * n_batch;
@@ -573,7 +573,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
}
// clear the KV cache
- llama_kv_cache_tokens_rm(ctx, -1, -1);
+ llama_kv_cache_clear(ctx);
auto logits = hellaswag_evaluate_tokens(ctx, query_embd, 0, params.n_batch, n_vocab);
if (logits.empty()) {
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 5b7e4139..c163c7f8 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -857,7 +857,7 @@ struct llama_server_context
void kv_cache_clear() {
// clear the entire KV cache
- llama_kv_cache_tokens_rm(ctx, -1, -1);
+ llama_kv_cache_clear(ctx);
clean_kv_cache = false;
}