From 527b6fba1d237befb324fd846bda7418c0fa394d Mon Sep 17 00:00:00 2001
From: Didzis Gosko <didzis@users.noreply.github.com>
Date: Sat, 24 Jun 2023 11:47:58 +0300
Subject: llama : make model stateless and context stateful (llama_state)
 (#1797)

* llama : make model stateless and context stateful

* llama : minor cleanup

* llama : update internal API declaration

* Apply suggestions from code review

fix style

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Missing model memory release

* Fix style

* Add deprecated warning for public API function llama_init_from_file

* Update public API use cases: move away from deprecated llama_init_from_file

* Deprecate public API function llama_apply_lora_from_file

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 examples/server/server.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'examples/server/server.cpp')

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index c0984aad..de22d301 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -115,6 +115,7 @@ struct llama_server_context {
     std::vector<llama_token> embd;
     std::vector<llama_token> last_n_tokens;
 
+    llama_model * model = nullptr;
     llama_context * ctx = nullptr;
     gpt_params params;
 
@@ -130,6 +131,10 @@ struct llama_server_context {
             llama_free(ctx);
             ctx = nullptr;
         }
+        if (model) {
+            llama_free_model(model);
+            model = nullptr;
+        }
     }
 
     void rewind() {
@@ -150,8 +155,8 @@ struct llama_server_context {
 
     bool loadModel(const gpt_params & params_) {
         params = params_;
-        ctx = llama_init_from_gpt_params(params);
-        if (ctx == nullptr) {
+        std::tie(model, ctx) = llama_init_from_gpt_params(params);
+        if (model == nullptr) {
             LOG_ERROR("unable to load model", { { "model", params_.model } });
             return false;
         }
-- 
cgit v1.2.3