summaryrefslogtreecommitdiff
path: root/examples/server/server.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-08-27 14:19:19 +0300
committerGitHub <noreply@github.com>2023-08-27 14:19:19 +0300
commitedd4c1481708fcd788b0e423268304fd26e2b125 (patch)
tree2e7db62ea4816dc18f2518a08c36b6ea480eff05 /examples/server/server.cpp
parent1591e2e590762011b43b10a9b6e04f13f98f2aa5 (diff)
llama : more tokenizer fixes (#2810)
* tests : write a Python tokenizer test (wip) * llama : prefix input text for tokenization with whitespace * llama : distinguish pieces from decoded text + fix detokenization * common : add comments * examples : no longer manually add leading space when tokenizing * tests : use Python to generate tokenizer tests for C++ * tests : add option to tokenize text files ggml-ci * tests : add test-tokenizer-1.py * llama.cpp : fix LF token * hellaswag : move the concat space for clarity * tests : add falcon tests (py + cpp, currently do not pass Unicode) ggml-ci * common : temporary separate llama_detokenize calls for SPM and BPE --------- Co-authored-by: klosax <131523366+klosax@users.noreply.github.com>
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r--examples/server/server.cpp16
1 files changed, 7 insertions, 9 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index a4b4d641..89a3311f 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -94,7 +94,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
std::string ret;
for (; begin != end; ++begin)
{
- ret += llama_token_to_str(ctx, *begin);
+ ret += llama_token_to_piece(ctx, *begin);
}
return ret;
}
@@ -123,7 +123,7 @@ static void server_log(const char *level, const char *function, int line,
// format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
{
- std::string out = token == -1 ? "" : llama_token_to_str(ctx, token);
+ std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character
// (size > 1 meaning it's already a known token)
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
@@ -286,7 +286,6 @@ struct llama_server_context
std::vector<llama_token> p;
if (first)
{
- s.insert(0, 1, ' '); // add a space if it's the first
p = ::llama_tokenize(ctx, s, add_bos);
first = false;
}
@@ -309,7 +308,6 @@ struct llama_server_context
else
{
auto s = json_prompt.template get<std::string>();
- s.insert(0, 1, ' '); // always add a first space
prompt_tokens = ::llama_tokenize(ctx, s, add_bos);
}
@@ -566,7 +564,7 @@ struct llama_server_context
if (!embd.empty() && embd.back() == llama_token_eos(ctx))
{
- // stopping_word = llama_token_to_str(ctx, embd.back());
+ // stopping_word = llama_token_to_piece(ctx, embd.back());
has_next_token = false;
stopped_eos = true;
LOG_VERBOSE("eos token found", {});
@@ -613,7 +611,7 @@ struct llama_server_context
{
const completion_token_output token_with_probs = nextToken();
- const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(ctx, token_with_probs.tok);
+ const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(ctx, token_with_probs.tok);
generated_text += token_text;
if (params.n_probs > 0)
@@ -1254,7 +1252,7 @@ void beam_search_callback(void * callback_data, llama_beams_state beams_state) {
struct token_translator {
llama_context * ctx;
- std::string operator()(llama_token tok) const { return llama_token_to_str(ctx, tok); }
+ std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
std::string operator()(completion_token_output cto) const { return (*this)(cto.tok); }
};
@@ -1364,7 +1362,7 @@ int main(int argc, char **argv)
while (llama.has_next_token) {
const completion_token_output token_with_probs = llama.doCompletion();
- const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(llama.ctx, token_with_probs.tok);
+ const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(llama.ctx, token_with_probs.tok);
stop_pos = llama.findStoppingStrings(llama.generated_text,
token_text.size(), STOP_FULL);
@@ -1395,7 +1393,7 @@ int main(int argc, char **argv)
if (token_with_probs.tok == -1 || llama.multibyte_pending > 0) {
continue;
}
- const std::string token_text = llama_token_to_str(llama.ctx, token_with_probs.tok);
+ const std::string token_text = llama_token_to_piece(llama.ctx, token_with_probs.tok);
size_t pos = std::min(sent_count, llama.generated_text.size());