diff options
author | goerch <jhr.walter@t-online.de> | 2023-10-22 21:21:42 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-22 21:21:42 +0200 |
commit | 9e70cc03229df19ca2d28ce23cc817198f897278 (patch) | |
tree | 0c027b73d2efc94260b41e2227a1318e2c9ba23d /llama.cpp | |
parent | 5a42a5f8e8a86da9ac88008d748cf232a83aa0e1 (diff) |
Add test for MPT tokenization (#3728)
* Add test for MPT tokenization
* Revert code motion
* Remove unnecessary restriction in test case
* Clarify logic in conversion
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 17 |
1 files changed, 9 insertions, 8 deletions
@@ -975,14 +975,15 @@ static void llama_nop(struct ggml_tensor * tensor) { // don't offload by default (void) tensor; } -static std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) { +static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) { std::vector<char> result(8, 0); const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size()); if (n_tokens < 0) { result.resize(-n_tokens); int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size()); GGML_ASSERT(check == -n_tokens); - } else { + } + else { result.resize(n_tokens); } @@ -1202,10 +1203,10 @@ struct llama_vocab { id special_eot_id = 32010; int find_bpe_rank(std::string token_left, std::string token_right) const { - replace_all(token_left, " ", "\u0120"); - replace_all(token_left, "\n", "\u010A"); - replace_all(token_right, " ", "\u0120"); - replace_all(token_right, "\n", "\u010A"); + GGML_ASSERT(token_left.find(" ") == std::string::npos); + GGML_ASSERT(token_left.find("\n") == std::string::npos); + GGML_ASSERT(token_right.find(" ") == std::string::npos); + GGML_ASSERT(token_right.find("\n") == std::string::npos); auto it = bpe_ranks.find(std::make_pair(token_left, token_right)); if (it == bpe_ranks.end()) { @@ -7499,7 +7500,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c for (size_t i = 0; i < candidates->size; ++i) { const llama_token id = candidates->data[i].id; - const std::string piece = llama_token_to_str(ctx, id); + const std::string piece = llama_token_to_piece(ctx, id); if (id == eos) { if (!allow_eos) { candidates->data[i].logit = -INFINITY; @@ -7711,7 +7712,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar GGML_ASSERT(false); } - const std::string piece = llama_token_to_str(ctx, token); + const std::string piece = llama_token_to_piece(ctx, token); // Note terminating 0 in decoded string const auto decoded = decode_utf8(piece.c_str(), grammar->partial_utf8); |