diff options
Diffstat (limited to 'examples/infill')
-rw-r--r-- | examples/infill/README.md | 1 | ||||
-rw-r--r-- | examples/infill/infill.cpp | 46 |
2 files changed, 20 insertions, 27 deletions
diff --git a/examples/infill/README.md b/examples/infill/README.md index 74f42d2f..810a0c5e 100644 --- a/examples/infill/README.md +++ b/examples/infill/README.md @@ -15,6 +15,7 @@ In this section, we cover the most commonly used options for running the `infill - `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses. - `-n N, --n-predict N`: Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text. - `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. +- `--spm-infill`: Use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this. ## Input Prompts diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 3e82e4a8..dc93d230 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -204,25 +204,23 @@ int main(int argc, char ** argv) { GGML_ASSERT(llama_add_eos_token(model) != 1); LOG("add_bos: %d\n", add_bos); - bool suff_rm_leading_spc = params.escape; - if (suff_rm_leading_spc && params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) { - params.input_suffix.erase(0, 1); - suff_rm_leading_spc = false; - } std::vector<llama_token> embd_inp; + std::vector<llama_token> embd_end; std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false); std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false); - const int space_token = 29871; - if (suff_rm_leading_spc && inp_sfx[0] == space_token) { - inp_sfx.erase(inp_sfx.begin()); - } + + GGML_ASSERT(llama_token_prefix(model) >= 0); + GGML_ASSERT(llama_token_suffix(model) >= 0); + inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model)); + inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model)); + + embd_inp = params.spm_infill ? inp_sfx : inp_pfx; + embd_end = params.spm_infill ? inp_pfx : inp_sfx; if (add_bos) { - inp_pfx.insert(inp_pfx.begin(), llama_token_bos(model)); + embd_inp.insert(embd_inp.begin(), llama_token_bos(model)); } - inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model)); - embd_inp = inp_pfx; - embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); + embd_inp.insert(embd_inp.end(), embd_end.begin(), embd_end.end()); const llama_token middle_token = llama_token_middle(model); if (middle_token >= 0) { @@ -514,26 +512,21 @@ int main(int argc, char ** argv) { string_process_escapes(params.input_prefix); string_process_escapes(params.input_suffix); } - suff_rm_leading_spc = params.escape; - if (suff_rm_leading_spc && params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) { - params.input_suffix.erase(0, 1); - suff_rm_leading_spc = false; - } + // tokenize new prefix and suffix std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false); std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false); - if (suff_rm_leading_spc && inp_sfx[0] == space_token) { - inp_sfx.erase(inp_sfx.begin()); - } + inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model)); + inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model)); + + embd_inp = params.spm_infill ? inp_sfx : inp_pfx; + embd_end = params.spm_infill ? inp_pfx : inp_sfx; if (add_bos) { - inp_pfx.insert(inp_pfx.begin(), llama_token_bos(model)); + embd_inp.insert(embd_inp.begin(), llama_token_bos(model)); } - inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model)); - embd_inp = inp_pfx; - embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); + embd_inp.insert(embd_inp.end(), embd_end.begin(), embd_end.end()); - const llama_token middle_token = llama_token_middle(model); if (middle_token >= 0) { embd_inp.push_back(middle_token); } @@ -657,4 +650,3 @@ int main(int argc, char ** argv) { return 0; } - |