From beea6e1b16e783a0886e78dec01002a8c00db24d Mon Sep 17 00:00:00 2001 From: Jan Boon Date: Mon, 8 Apr 2024 20:43:30 +0800 Subject: llama : save and restore kv cache for single seq id (#6341) * llama : save and restore kv cache for single seq id * remove trailing whitespace * respond error in case there's no space in the kv cache * add kv seq save restore to test case * add --slot-save-path arg to enable save restore and restrict save location * Returning 0 for some cases, instead of asserting. * cleanup error cases * rename sequence state functions * rename state get set functions * add previous function names back in with DEPRECATED notice * update doc * adjust endpoints to preferred style * fix restoring zero cell count * handle seq rm return value * unused param * keep in the size check * fix return types * add server test case for slot save restore * cleanup * add cake * cleanup style * add special * removing a whole sequence never fails * move sequence state file functionality from server to llama to match session api and add version tags * catch exceptions on save as well * error log messages * check types for stricter restore * update server doc * readme : update API changes date * strict filename validation * move include, reject bom as well * also reject empty filename * reject whitespace and trailing dot --------- Co-authored-by: Martin Evans Co-authored-by: Georgi Gerganov --- examples/main/main.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'examples/main/main.cpp') diff --git a/examples/main/main.cpp b/examples/main/main.cpp index e2d07a63..711f162d 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -235,7 +235,7 @@ int main(int argc, char ** argv) { // The file exists and is not empty session_tokens.resize(n_ctx); size_t n_token_count_out = 0; - if (!llama_load_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) { + if (!llama_state_load_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) { LOG_TEE("%s: error: failed to load session file '%s'\n", __func__, path_session.c_str()); return 1; } @@ -693,7 +693,7 @@ int main(int argc, char ** argv) { // optionally save the session on first sample (for faster prompt loading next time) if (!path_session.empty() && need_to_save_session && !params.prompt_cache_ro) { need_to_save_session = false; - llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); + llama_state_save_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); LOG("saved session to %s\n", path_session.c_str()); } @@ -935,7 +935,7 @@ int main(int argc, char ** argv) { if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) { LOG_TEE("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str()); - llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); + llama_state_save_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); } llama_print_timings(ctx); -- cgit v1.2.3