From 5be6c803fa5378f62a1590f3ad8c6b64c7c0c2ce Mon Sep 17 00:00:00 2001 From: Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com> Date: Mon, 23 Oct 2023 12:40:03 -0700 Subject: llama : remove token functions with `context` args in favor of `model` (#3720) * added `llama_model_token_*` variants to all the `llama_token_*` functions. * added `LLAMA_API` * formatting Co-authored-by: Georgi Gerganov * removed old `llama_token` functions * changed 3 more functions to take in model - `llama_token_get_text` - `llama_token_get_score` - `llama_token_get_type` * added back docs * fixed main.cpp * changed token functions to use new model variants * changed token functions to use new model variants --------- Co-authored-by: Georgi Gerganov --- examples/server/server.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'examples/server') diff --git a/examples/server/server.cpp b/examples/server/server.cpp index c3279dbc..693f9b77 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -726,7 +726,7 @@ struct llama_server_context if (json_value(data, "ignore_eos", false)) { - slot->sparams.logit_bias[llama_token_eos(ctx)] = -INFINITY; + slot->sparams.logit_bias[llama_token_eos(model)] = -INFINITY; } const auto &logit_bias = data.find("logit_bias"); @@ -1056,7 +1056,7 @@ struct llama_server_context slot.has_next_token = false; } - if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(ctx)) + if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model)) { slot.stopped_eos = true; slot.has_next_token = false; @@ -1130,7 +1130,7 @@ struct llama_server_context json get_formated_generation(llama_client_slot &slot) { - const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(ctx)); + const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model)); const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() && eos_bias->second < 0.0f && std::isinf(eos_bias->second); return json { @@ -1555,11 +1555,11 @@ struct llama_server_context suffix_tokens.erase(suffix_tokens.begin()); } - prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(ctx)); - prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(ctx)); // always add BOS - prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(ctx)); + prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model)); + prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS + prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model)); prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end()); - prefix_tokens.push_back(llama_token_middle(ctx)); + prefix_tokens.push_back(llama_token_middle(model)); prompt_tokens = prefix_tokens; } else -- cgit v1.2.3