summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-08-27 14:19:19 +0300
committerGitHub <noreply@github.com>2023-08-27 14:19:19 +0300
commitedd4c1481708fcd788b0e423268304fd26e2b125 (patch)
tree2e7db62ea4816dc18f2518a08c36b6ea480eff05 /examples
parent1591e2e590762011b43b10a9b6e04f13f98f2aa5 (diff)
llama : more tokenizer fixes (#2810)
* tests : write a Python tokenizer test (wip) * llama : prefix input text for tokenization with whitespace * llama : distinguish pieces from decoded text + fix detokenization * common : add comments * examples : no longer manually add leading space when tokenizing * tests : use Python to generate tokenizer tests for C++ * tests : add option to tokenize text files ggml-ci * tests : add test-tokenizer-1.py * llama.cpp : fix LF token * hellaswag : move the concat space for clarity * tests : add falcon tests (py + cpp, currently do not pass Unicode) ggml-ci * common : temporary separate llama_detokenize calls for SPM and BPE --------- Co-authored-by: klosax <131523366+klosax@users.noreply.github.com>
Diffstat (limited to 'examples')
-rw-r--r--examples/beam_search/beam_search.cpp6
-rw-r--r--examples/embd-input/embd-input-lib.cpp2
-rw-r--r--examples/embedding/embedding.cpp5
-rw-r--r--examples/main/main.cpp20
-rw-r--r--examples/perplexity/perplexity.cpp4
-rw-r--r--examples/save-load-state/save-load-state.cpp4
-rw-r--r--examples/server/server.cpp16
-rw-r--r--examples/simple/simple.cpp4
-rw-r--r--examples/train-text-from-scratch/train-text-from-scratch.cpp4
9 files changed, 27 insertions, 38 deletions
diff --git a/examples/beam_search/beam_search.cpp b/examples/beam_search/beam_search.cpp
index 1c04fabc..42c7c725 100644
--- a/examples/beam_search/beam_search.cpp
+++ b/examples/beam_search/beam_search.cpp
@@ -35,7 +35,7 @@ struct ostream_beam_view {
std::ostream& operator<<(std::ostream& os, const ostream_beam_view & obv) {
os << "p(" << obv.beam_view.p << ") eob(" << std::boolalpha << obv.beam_view.eob << ") tokens(";
for (size_t i = 0 ; i < obv.beam_view.n_tokens ; ++i) {
- os << llama_token_to_str(obv.ctx, obv.beam_view.tokens[i]);
+ os << llama_token_to_piece(obv.ctx, obv.beam_view.tokens[i]);
}
return os << ')';
}
@@ -156,7 +156,7 @@ int main(int argc, char ** argv)
for( auto id : tokens_list )
{
- std::cout << llama_token_to_str(ctx, id);
+ std::cout << llama_token_to_piece(ctx, id);
}
std::cout << std::flush;
@@ -175,7 +175,7 @@ int main(int argc, char ** argv)
std::cout << "\n\n";
for (llama_token const token_id : callback_data.response) {
- std::cout << llama_token_to_str(ctx,token_id);
+ std::cout << llama_token_to_piece(ctx,token_id);
}
std::cout << std::endl;
diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp
index 8a6ad882..036bdb39 100644
--- a/examples/embd-input/embd-input-lib.cpp
+++ b/examples/embd-input/embd-input-lib.cpp
@@ -214,7 +214,7 @@ const char * sampling(struct MyModel * mymodel) {
if (id == llama_token_eos(ctx)) {
ret = "</s>";
} else {
- ret = llama_token_to_str(ctx, id);
+ ret = llama_token_to_piece(ctx, id);
}
eval_id(mymodel, id);
return ret.c_str();
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 38395c75..93d583b5 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -56,9 +56,6 @@ int main(int argc, char ** argv) {
int n_past = 0;
- // Add a space in front of the first character to match OG llama tokenizer behavior
- params.prompt.insert(0, 1, ' ');
-
// tokenize the prompt
auto embd_inp = ::llama_tokenize(ctx, params.prompt, true);
@@ -67,7 +64,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
for (int i = 0; i < (int) embd_inp.size(); i++) {
- fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]).c_str());
+ fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str());
}
fprintf(stderr, "\n");
}
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 11d7a7e4..3ce57f43 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -195,11 +195,6 @@ int main(int argc, char ** argv) {
// tokenize the prompt
std::vector<llama_token> embd_inp;
- if (llama_vocab_type(ctx) == LLAMA_VOCAB_TYPE_SPM) {
- // Add a space in front of the first character to match OG llama tokenizer behavior
- params.prompt.insert(0, 1, ' ');
- }
-
if (params.interactive_first || params.instruct || !params.prompt.empty() || session_tokens.empty()) {
embd_inp = ::llama_tokenize(ctx, params.prompt, add_bos);
} else {
@@ -216,7 +211,6 @@ int main(int argc, char ** argv) {
int guidance_offset = 0;
int original_prompt_len = 0;
if (ctx_guidance) {
- params.cfg_negative_prompt.insert(0, 1, ' ');
guidance_inp = ::llama_tokenize(ctx_guidance, params.cfg_negative_prompt, add_bos);
std::vector<llama_token> original_inp = ::llama_tokenize(ctx, params.prompt, add_bos);
@@ -285,7 +279,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
for (int i = 0; i < (int) embd_inp.size(); i++) {
- fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]).c_str());
+ fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str());
}
if (ctx_guidance) {
@@ -293,14 +287,14 @@ int main(int argc, char ** argv) {
fprintf(stderr, "%s: negative prompt: '%s'\n", __func__, params.cfg_negative_prompt.c_str());
fprintf(stderr, "%s: number of tokens in negative prompt = %zu\n", __func__, guidance_inp.size());
for (int i = 0; i < (int) guidance_inp.size(); i++) {
- fprintf(stderr, "%6d -> '%s'\n", guidance_inp[i], llama_token_to_str(ctx, guidance_inp[i]).c_str());
+ fprintf(stderr, "%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str());
}
}
if (params.n_keep > 0) {
fprintf(stderr, "%s: static prompt based on n_keep: '", __func__);
for (int i = 0; i < params.n_keep; i++) {
- fprintf(stderr, "%s", llama_token_to_str(ctx, embd_inp[i]).c_str());
+ fprintf(stderr, "%s", llama_token_to_piece(ctx, embd_inp[i]).c_str());
}
fprintf(stderr, "'\n");
}
@@ -456,7 +450,7 @@ int main(int argc, char ** argv) {
//printf("\n---\n");
//printf("resetting: '");
//for (int i = 0; i < (int) embd.size(); i++) {
- // printf("%s", llama_token_to_str(ctx, embd[i]));
+ // printf("%s", llama_token_to_piece(ctx, embd[i]));
//}
//printf("'\n");
//printf("\n---\n");
@@ -509,7 +503,7 @@ int main(int argc, char ** argv) {
input_size = embd_guidance.size();
//fprintf(stderr, "\n---------------------\n");
//for (int i = 0; i < (int) embd_guidance.size(); i++) {
- //fprintf(stderr, "%s", llama_token_to_str(ctx, embd_guidance[i]));
+ //fprintf(stderr, "%s", llama_token_to_piece(ctx, embd_guidance[i]));
//}
//fprintf(stderr, "\n---------------------\n");
} else {
@@ -673,7 +667,7 @@ int main(int argc, char ** argv) {
// display text
if (input_echo) {
for (auto id : embd) {
- printf("%s", llama_token_to_str(ctx, id).c_str());
+ printf("%s", llama_token_to_piece(ctx, id).c_str());
}
fflush(stdout);
}
@@ -689,7 +683,7 @@ int main(int argc, char ** argv) {
if (params.antiprompt.size()) {
std::string last_output;
for (auto id : last_n_tokens) {
- last_output += llama_token_to_str(ctx, id);
+ last_output += llama_token_to_piece(ctx, id);
}
is_antiprompt = false;
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index fd89852d..b596d062 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -392,7 +392,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
hs_data[i].context = prompt_lines[idx*6];
hs_data[i].gold_ending_idx = std::stoi( prompt_lines[idx*6+1] );
for (size_t j=0; j < 4; j++) {
- hs_data[i].ending[j] = " " + prompt_lines[idx*6+2+j];
+ hs_data[i].ending[j] = prompt_lines[idx*6+2+j];
}
// Delete the selected random example from the prompt
@@ -417,7 +417,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
size_t context_size = context_embd.size();
for (int i = 0; i < 4; ++i) {
- ending_tokens[i] = ::llama_tokenize(ctx, hs_data[task_idx].context + hs_data[task_idx].ending[i], add_bos);
+ ending_tokens[i] = ::llama_tokenize(ctx, hs_data[task_idx].context + " " + hs_data[task_idx].ending[i], add_bos);
for (int k = 0; k < int(context_size); ++k) {
if (ending_tokens[i][k] != context_embd[k]) {
fprintf(stderr, "Oops: ending %d of task %d differs from context at position %d\n",i,int(task_idx),k);
diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp
index 3db61b75..573bc4ef 100644
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@@ -87,7 +87,7 @@ int main(int argc, char ** argv) {
}
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
auto next_token = llama_sample_token(ctx, &candidates_p);
- auto next_token_str = llama_token_to_str(ctx, next_token);
+ auto next_token_str = llama_token_to_piece(ctx, next_token);
last_n_tokens_data.push_back(next_token);
printf("%s", next_token_str.c_str());
@@ -147,7 +147,7 @@ int main(int argc, char ** argv) {
}
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
auto next_token = llama_sample_token(ctx2, &candidates_p);
- auto next_token_str = llama_token_to_str(ctx2, next_token);
+ auto next_token_str = llama_token_to_piece(ctx2, next_token);
last_n_tokens_data.push_back(next_token);
printf("%s", next_token_str.c_str());
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index a4b4d641..89a3311f 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -94,7 +94,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
std::string ret;
for (; begin != end; ++begin)
{
- ret += llama_token_to_str(ctx, *begin);
+ ret += llama_token_to_piece(ctx, *begin);
}
return ret;
}
@@ -123,7 +123,7 @@ static void server_log(const char *level, const char *function, int line,
// format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
{
- std::string out = token == -1 ? "" : llama_token_to_str(ctx, token);
+ std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character
// (size > 1 meaning it's already a known token)
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
@@ -286,7 +286,6 @@ struct llama_server_context
std::vector<llama_token> p;
if (first)
{
- s.insert(0, 1, ' '); // add a space if it's the first
p = ::llama_tokenize(ctx, s, add_bos);
first = false;
}
@@ -309,7 +308,6 @@ struct llama_server_context
else
{
auto s = json_prompt.template get<std::string>();
- s.insert(0, 1, ' '); // always add a first space
prompt_tokens = ::llama_tokenize(ctx, s, add_bos);
}
@@ -566,7 +564,7 @@ struct llama_server_context
if (!embd.empty() && embd.back() == llama_token_eos(ctx))
{
- // stopping_word = llama_token_to_str(ctx, embd.back());
+ // stopping_word = llama_token_to_piece(ctx, embd.back());
has_next_token = false;
stopped_eos = true;
LOG_VERBOSE("eos token found", {});
@@ -613,7 +611,7 @@ struct llama_server_context
{
const completion_token_output token_with_probs = nextToken();
- const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(ctx, token_with_probs.tok);
+ const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(ctx, token_with_probs.tok);
generated_text += token_text;
if (params.n_probs > 0)
@@ -1254,7 +1252,7 @@ void beam_search_callback(void * callback_data, llama_beams_state beams_state) {
struct token_translator {
llama_context * ctx;
- std::string operator()(llama_token tok) const { return llama_token_to_str(ctx, tok); }
+ std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
std::string operator()(completion_token_output cto) const { return (*this)(cto.tok); }
};
@@ -1364,7 +1362,7 @@ int main(int argc, char **argv)
while (llama.has_next_token) {
const completion_token_output token_with_probs = llama.doCompletion();
- const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(llama.ctx, token_with_probs.tok);
+ const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(llama.ctx, token_with_probs.tok);
stop_pos = llama.findStoppingStrings(llama.generated_text,
token_text.size(), STOP_FULL);
@@ -1395,7 +1393,7 @@ int main(int argc, char **argv)
if (token_with_probs.tok == -1 || llama.multibyte_pending > 0) {
continue;
}
- const std::string token_text = llama_token_to_str(llama.ctx, token_with_probs.tok);
+ const std::string token_text = llama_token_to_piece(llama.ctx, token_with_probs.tok);
size_t pos = std::min(sent_count, llama.generated_text.size());
diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp
index 132f7fbf..4ee85fac 100644
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@@ -63,7 +63,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "\n\n");
for (auto id : tokens_list) {
- fprintf(stderr, "%s", llama_token_to_str(ctx, id).c_str());
+ fprintf(stderr, "%s", llama_token_to_piece(ctx, id).c_str());
}
fflush(stderr);
@@ -112,7 +112,7 @@ int main(int argc, char ** argv) {
}
// print the new token :
- printf("%s", llama_token_to_str(ctx, new_token_id).c_str());
+ printf("%s", llama_token_to_piece(ctx, new_token_id).c_str());
fflush(stdout);
// push this new token for next evaluation
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 79b117df..12d15341 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -1964,7 +1964,7 @@ void print_matrix(struct ggml_tensor * probs) {
void print_token(struct llama_context * ctx, llama_token token) {
- printf("%s", llama_token_to_str(ctx, token).c_str());
+ printf("%s", llama_token_to_piece(ctx, token).c_str());
}
void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) {
@@ -2202,7 +2202,7 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
const char * in = buf.data();
const char * end = buf.data() + buf.size();
for (int i = 0; i < (int) out.size(); ++i) {
- std::string s = llama_token_to_str(lctx, out[i]);
+ std::string s = llama_token_to_piece(lctx, out[i]);
int len = s.length();
if (in >= end) {
printf("%s: unexpected end of original text.\n", __func__);