From 16bc66d9479edd5ee12ec734973554d4493c5dfa Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Thu, 28 Sep 2023 21:42:38 +0200
Subject: llama.cpp : split llama_context_params into model and context params
 (#3301)

* llama.cpp : split llama_context_params into model and context params

ggml-ci

* fix metal build

* fix freq_base/scale default to model value

* llama-bench : keep the same model between tests when possible

* move n_threads to llama_context_params, add n_threads_batch

* fix mpi build

* remove kv_size(), cuda scratch fixes

* remove low-vram option

* add n_threads_batch to system info, refactor to get_system_info()

* add documentation about --threads-batch to the READMEs

* llama-bench fix

* main : fix rope freq/scale warning

* llama.cpp : add llama_get_model
common : add llama_tokenize from model

* remove duplicated ctx/model functions

ggml-ci

* cuda : print total VRAM used
---
 examples/embd-input/embd-input-test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'examples/embd-input/embd-input-test.cpp')

diff --git a/examples/embd-input/embd-input-test.cpp b/examples/embd-input/embd-input-test.cpp
index e5e040f6..dc4a0e48 100644
--- a/examples/embd-input/embd-input-test.cpp
+++ b/examples/embd-input/embd-input-test.cpp
@@ -8,7 +8,7 @@ int main(int argc, char** argv) {
     auto mymodel = create_mymodel(argc, argv);
     int N = 10;
     int max_tgt_len = 500;
-    int n_embd = llama_n_embd(mymodel->ctx);
+    int n_embd = llama_n_embd(llama_get_model(mymodel->ctx));
 
     // add random float embd to test evaluation
     float * data = new float[N*n_embd];
-- 
cgit v1.2.3