llama : save and restore kv cache for single seq id (#6341)

* llama : save and restore kv cache for single seq id * remove trailing whitespace * respond error in case there's no space in the kv cache * add kv seq save restore to test case * add --slot-save-path arg to enable save restore and restrict save location * Returning 0 for some cases, instead of asserting. * cleanup error cases * rename sequence state functions * rename state get set functions * add previous function names back in with DEPRECATED notice * update doc * adjust endpoints to preferred style * fix restoring zero cell count * handle seq rm return value * unused param * keep in the size check * fix return types * add server test case for slot save restore * cleanup * add cake * cleanup style * add special * removing a whole sequence never fails * move sequence state file functionality from server to llama to match session api and add version tags * catch exceptions on save as well * error log messages * check types for stricter restore * update server doc * readme : update API changes date * strict filename validation * move include, reject bom as well * also reject empty filename * reject whitespace and trailing dot --------- Co-authored-by: Martin Evans <martindevans@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: Jan Boon <jan.boon@kaetemi.be> 2024-04-08 20:43:30 +0800
committer: GitHub <noreply@github.com> 2024-04-08 15:43:30 +0300
commit: beea6e1b16e783a0886e78dec01002a8c00db24d (patch)
tree: a7365b1e93145b78a8b4be72df959239aa8c0f0d /examples/main/main.cpp
parent: 87fb5b4234d4b9c56ac94cf7aa229c8fd7defdb0 (diff)
1 files changed, 3 insertions, 3 deletions
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index e2d07a63..711f162d 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -235,7 +235,7 @@ int main(int argc, char ** argv) {
             // The file exists and is not empty
             session_tokens.resize(n_ctx);
             size_t n_token_count_out = 0;
-            if (!llama_load_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) {
+            if (!llama_state_load_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) {
                 LOG_TEE("%s: error: failed to load session file '%s'\n", __func__, path_session.c_str());
                 return 1;
             }
@@ -693,7 +693,7 @@ int main(int argc, char ** argv) {
             // optionally save the session on first sample (for faster prompt loading next time)
             if (!path_session.empty() && need_to_save_session && !params.prompt_cache_ro) {
                 need_to_save_session = false;
-                llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
+                llama_state_save_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
 
                 LOG("saved session to %s\n", path_session.c_str());
             }
@@ -935,7 +935,7 @@ int main(int argc, char ** argv) {
 
     if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) {
         LOG_TEE("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str());
-        llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
+        llama_state_save_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
     }
 
     llama_print_timings(ctx);
author	Jan Boon <jan.boon@kaetemi.be>	2024-04-08 20:43:30 +0800
committer	GitHub <noreply@github.com>	2024-04-08 15:43:30 +0300
commit	beea6e1b16e783a0886e78dec01002a8c00db24d (patch)
tree	a7365b1e93145b78a8b4be72df959239aa8c0f0d /examples/main/main.cpp
parent	87fb5b4234d4b9c56ac94cf7aa229c8fd7defdb0 (diff)