diff options
author | Alexey Parfenov <zxed@alkatrazstudio.net> | 2024-02-11 13:38:14 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-11 15:38:14 +0200 |
commit | 684780141a08200ec98eba3e982dbafd1d0b5000 (patch) | |
tree | 1acab1855330f647cae61c6040b7ac5ae3cf8ee1 /examples/server/server.cpp | |
parent | 85910c5b30f6e268321be8df044f5528a6efac52 (diff) |
server : allow to specify tokens as strings in logit_bias (#5003)
* server: allow to specify tokens as strings in logit_bias
* Apply suggestions from code review
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r-- | examples/server/server.cpp | 32 |
1 files changed, 25 insertions, 7 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 4d212f1f..1699eb76 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -626,18 +626,36 @@ struct llama_server_context const int n_vocab = llama_n_vocab(model); for (const auto &el : *logit_bias) { - if (el.is_array() && el.size() == 2 && el[0].is_number_integer()) + if (el.is_array() && el.size() == 2) { - llama_token tok = el[0].get<llama_token>(); - if (tok >= 0 && tok < n_vocab) + float bias; + if (el[1].is_number()) { - if (el[1].is_number()) + bias = el[1].get<float>(); + } + else if (el[1].is_boolean() && !el[1].get<bool>()) + { + bias = -INFINITY; + } + else + { + continue; + } + + if (el[0].is_number_integer()) + { + llama_token tok = el[0].get<llama_token>(); + if (tok >= 0 && tok < n_vocab) { - slot->sparams.logit_bias[tok] = el[1].get<float>(); + slot->sparams.logit_bias[tok] = bias; } - else if (el[1].is_boolean() && !el[1].get<bool>()) + } + else if (el[0].is_string()) + { + auto toks = llama_tokenize(model, el[0].get<std::string>(), false); + for (auto tok : toks) { - slot->sparams.logit_bias[tok] = -INFINITY; + slot->sparams.logit_bias[tok] = bias; } } } |