From cf348a60e0af3905acd1d297cb064b918265b7ac Mon Sep 17 00:00:00 2001 From: Evan Jones Date: Wed, 10 May 2023 11:37:14 -0400 Subject: main : add option to save full output to session (#1338) * main : add option to save full output to session * split behavior into --session and --prompt-cache * restore original implementation with new names * PR comments * move the check for incompatible parameters to gpt_params_parse * Fix whitespace Co-authored-by: DannyDaemonic --------- Co-authored-by: DannyDaemonic --- examples/main/main.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'examples/main/main.cpp') diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 6e1172a4..bd1c4ab5 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -139,7 +139,7 @@ int main(int argc, char ** argv) { // Add a space in front of the first character to match OG llama tokenizer behavior params.prompt.insert(0, 1, ' '); - std::string path_session = params.path_session; + std::string path_session = params.path_prompt_cache; std::vector session_tokens; if (!path_session.empty()) { @@ -292,14 +292,9 @@ int main(int argc, char ** argv) { is_interacting = params.interactive_first; } - bool is_antiprompt = false; - bool input_echo = true; - - // HACK - because session saving incurs a non-negligible delay, for now skip re-saving session - // if we loaded a session with at least 75% similarity. It's currently just used to speed up the - // initial prompt so it doesn't need to be an exact match. - bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < (embd_inp.size() * 3 / 4); - + bool is_antiprompt = false; + bool input_echo = true; + bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < embd_inp.size(); int n_past = 0; int n_remain = params.n_predict; @@ -328,7 +323,7 @@ int main(int argc, char ** argv) { embd.insert(embd.begin(), last_n_tokens.begin() + n_ctx - n_left/2 - embd.size(), last_n_tokens.end() - embd.size()); // stop saving session if we run out of context - path_session = ""; + path_session.clear(); //printf("\n---\n"); //printf("resetting: '"); @@ -603,6 +598,11 @@ int main(int argc, char ** argv) { } } + if (!path_session.empty() && params.prompt_cache_all) { + fprintf(stderr, "\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str()); + llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); + } + llama_print_timings(ctx); llama_free(ctx); -- cgit v1.2.3