From 5be6c803fa5378f62a1590f3ad8c6b64c7c0c2ce Mon Sep 17 00:00:00 2001 From: Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com> Date: Mon, 23 Oct 2023 12:40:03 -0700 Subject: llama : remove token functions with `context` args in favor of `model` (#3720) * added `llama_model_token_*` variants to all the `llama_token_*` functions. * added `LLAMA_API` * formatting Co-authored-by: Georgi Gerganov * removed old `llama_token` functions * changed 3 more functions to take in model - `llama_token_get_text` - `llama_token_get_score` - `llama_token_get_type` * added back docs * fixed main.cpp * changed token functions to use new model variants * changed token functions to use new model variants --------- Co-authored-by: Georgi Gerganov --- examples/llama-bench/llama-bench.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'examples/llama-bench') diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index a04115c9..20767d55 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -933,7 +933,7 @@ struct sql_printer : public printer { }; static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) { - std::vector tokens(n_batch, llama_token_bos(ctx)); + std::vector tokens(n_batch, llama_token_bos(llama_get_model(ctx))); int n_processed = 0; llama_set_n_threads(ctx, n_threads, n_threads); @@ -946,7 +946,7 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_bat } static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) { - llama_token token = llama_token_bos(ctx); + llama_token token = llama_token_bos(llama_get_model(ctx)); llama_set_n_threads(ctx, n_threads, n_threads); -- cgit v1.2.3