Extend llama_kv_cache_seq_rm to allow matching any sequence (#3843)

* Extend llama_kv_cache_seq_rm to allow matichng any sequence * Replace llama_kv_cache_tokens_rm with llama_kv_cache_clear Use llama_kv_cache_clear for cache clearing Change calls to llama_kv_cache_tokens_rm that want to delete by position to use llama_kv_cache_seq_rm functionality
author: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> 2023-10-29 11:31:40 -0600
committer: GitHub <noreply@github.com> 2023-10-29 11:31:40 -0600
commit: 6e08281e588bbba1a5d180290a94a43f167f3a1a (patch)
tree: 46add394417eb2b5929793ca879c793a478fd3f8 /llama.h
parent: 2046eb4345e62c4575b3cdc0115a51db89f3fb70 (diff)
1 files changed, 6 insertions, 9 deletions
diff --git a/llama.h b/llama.h
index 6927bd60..d727dbd9 100644
--- a/llama.h
+++ b/llama.h
@@ -334,17 +334,14 @@ extern "C" {
     LLAMA_API DEPRECATED(int llama_get_kv_cache_token_count(const struct llama_context * ctx),
             "avoid using this, it will be removed in the future, instead - count the tokens in user code");
 
-    // Remove all tokens data of cells in [c0, c1)
-    // c0 < 0 : [0,  c1]
-    // c1 < 0 : [c0, inf)
-    LLAMA_API void llama_kv_cache_tokens_rm(
-            struct llama_context * ctx,
-                         int32_t   c0,
-                         int32_t   c1);
+    // Clear the KV cache
+    LLAMA_API void llama_kv_cache_clear(
+            struct llama_context * ctx);
 
     // Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
-    // p0 < 0 : [0,  p1]
-    // p1 < 0 : [p0, inf)
+    // seq_id < 0 : match any sequence
+    // p0 < 0     : [0,  p1]
+    // p1 < 0     : [p0, inf)
     LLAMA_API void llama_kv_cache_seq_rm(
             struct llama_context * ctx,
                     llama_seq_id   seq_id,
author	Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>	2023-10-29 11:31:40 -0600
committer	GitHub <noreply@github.com>	2023-10-29 11:31:40 -0600
commit	6e08281e588bbba1a5d180290a94a43f167f3a1a (patch)
tree	46add394417eb2b5929793ca879c793a478fd3f8 /llama.h
parent	2046eb4345e62c4575b3cdc0115a51db89f3fb70 (diff)