diff options
author | slaren <slarengh@gmail.com> | 2023-09-28 21:42:38 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-28 22:42:38 +0300 |
commit | 16bc66d9479edd5ee12ec734973554d4493c5dfa (patch) | |
tree | 4cca787ebd86dd55fd176d27112117c74e9b34c6 /examples/embd-input | |
parent | 0512d66670de3f650c579519833c085014b0f200 (diff) |
llama.cpp : split llama_context_params into model and context params (#3301)
* llama.cpp : split llama_context_params into model and context params
ggml-ci
* fix metal build
* fix freq_base/scale default to model value
* llama-bench : keep the same model between tests when possible
* move n_threads to llama_context_params, add n_threads_batch
* fix mpi build
* remove kv_size(), cuda scratch fixes
* remove low-vram option
* add n_threads_batch to system info, refactor to get_system_info()
* add documentation about --threads-batch to the READMEs
* llama-bench fix
* main : fix rope freq/scale warning
* llama.cpp : add llama_get_model
common : add llama_tokenize from model
* remove duplicated ctx/model functions
ggml-ci
* cuda : print total VRAM used
Diffstat (limited to 'examples/embd-input')
-rw-r--r-- | examples/embd-input/embd-input-lib.cpp | 13 | ||||
-rw-r--r-- | examples/embd-input/embd-input-test.cpp | 2 |
2 files changed, 7 insertions, 8 deletions
diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp index 9bd4d347..99e6bdad 100644 --- a/examples/embd-input/embd-input-lib.cpp +++ b/examples/embd-input/embd-input-lib.cpp @@ -48,8 +48,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) { // print system information { fprintf(stderr, "\n"); - fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", - params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); + fprintf(stderr, "%s\n", get_system_info(params).c_str()); } struct MyModel * ret = new MyModel(); ret->ctx = ctx; @@ -71,7 +70,7 @@ bool eval_float(void * model, float * input, int N){ MyModel * mymodel = (MyModel*)model; llama_context * ctx = mymodel->ctx; gpt_params params = mymodel->params; - int n_emb = llama_n_embd(ctx); + int n_emb = llama_n_embd(llama_get_model(ctx)); int n_past = mymodel->n_past; int n_batch = N; // params.n_batch; @@ -81,7 +80,7 @@ bool eval_float(void * model, float * input, int N){ n_eval = n_batch; } llama_batch batch = { int32_t(n_eval), nullptr, (input+i*n_emb), nullptr, nullptr, nullptr, n_past, 1, 0, }; - if (llama_decode(ctx, batch, params.n_threads)) { + if (llama_decode(ctx, batch)) { fprintf(stderr, "%s : failed to eval\n", __func__); return false; } @@ -102,7 +101,7 @@ bool eval_tokens(void * model, std::vector<llama_token> tokens) { if (n_eval > params.n_batch) { n_eval = params.n_batch; } - if (llama_decode(ctx, llama_batch_get_one(&tokens[i], n_eval, n_past, 0), params.n_threads)) { + if (llama_decode(ctx, llama_batch_get_one(&tokens[i], n_eval, n_past, 0))) { fprintf(stderr, "%s : failed to eval\n", __func__); return false; } @@ -133,7 +132,7 @@ llama_token sampling_id(struct MyModel* mymodel) { // out of user input, sample next token const float temp = params.temp; - const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(ctx) : params.top_k; + const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(llama_get_model(ctx)) : params.top_k; const float top_p = params.top_p; const float tfs_z = params.tfs_z; const float typical_p = params.typical_p; @@ -149,7 +148,7 @@ llama_token sampling_id(struct MyModel* mymodel) { llama_token id = 0; { auto logits = llama_get_logits(ctx); - auto n_vocab = llama_n_vocab(ctx); + auto n_vocab = llama_n_vocab(llama_get_model(ctx)); // Apply params.logit_bias map for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) { diff --git a/examples/embd-input/embd-input-test.cpp b/examples/embd-input/embd-input-test.cpp index e5e040f6..dc4a0e48 100644 --- a/examples/embd-input/embd-input-test.cpp +++ b/examples/embd-input/embd-input-test.cpp @@ -8,7 +8,7 @@ int main(int argc, char** argv) { auto mymodel = create_mymodel(argc, argv); int N = 10; int max_tgt_len = 500; - int n_embd = llama_n_embd(mymodel->ctx); + int n_embd = llama_n_embd(llama_get_model(mymodel->ctx)); // add random float embd to test evaluation float * data = new float[N*n_embd]; |