diff options
author | Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com> | 2023-10-23 12:40:03 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-23 22:40:03 +0300 |
commit | 5be6c803fa5378f62a1590f3ad8c6b64c7c0c2ce (patch) | |
tree | 190868e0431070686d797c3c2d86da857b8ba55f /examples/infill/infill.cpp | |
parent | 6336701c9378c23c85d1c0e464b663ca2bbb8e60 (diff) |
llama : remove token functions with `context` args in favor of `model` (#3720)
* added `llama_model_token_*` variants to all the `llama_token_*` functions.
* added `LLAMA_API`
* formatting
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* removed old `llama_token` functions
* changed 3 more functions to take in model
- `llama_token_get_text`
- `llama_token_get_score`
- `llama_token_get_type`
* added back docs
* fixed main.cpp
* changed token functions to use new model variants
* changed token functions to use new model variants
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/infill/infill.cpp')
-rw-r--r-- | examples/infill/infill.cpp | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 6331335e..9c52b7bb 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -246,14 +246,14 @@ int main(int argc, char ** argv) { if (suff_rm_leading_spc && inp_sfx[0] == space_token) { inp_sfx.erase(inp_sfx.begin()); } - inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(ctx)); + inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model)); if (add_bos) { - inp_pfx.insert(inp_pfx.begin(), llama_token_bos(ctx)); + inp_pfx.insert(inp_pfx.begin(), llama_token_bos(model)); } - inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(ctx)); + inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model)); embd_inp = inp_pfx; embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); - embd_inp.push_back(llama_token_middle(ctx)); + embd_inp.push_back(llama_token_middle(model)); LOG("prefix: \"%s\"\n", log_tostr(params.input_prefix)); LOG("suffix: \"%s\"\n", log_tostr(params.input_suffix)); @@ -261,7 +261,7 @@ int main(int argc, char ** argv) { // Should not run without any tokens if (embd_inp.empty()) { - embd_inp.push_back(llama_token_bos(ctx)); + embd_inp.push_back(llama_token_bos(model)); LOG("embd_inp was considered empty and bos was added: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str()); } @@ -577,10 +577,10 @@ int main(int argc, char ** argv) { if ((int) embd_inp.size() <= n_consumed) { // deal with eot token in infill mode - if ((llama_sampling_last(ctx_sampling) == llama_token_eot(ctx) || is_interacting) && params.interactive){ + if ((llama_sampling_last(ctx_sampling) == llama_token_eot(model) || is_interacting) && params.interactive){ if(is_interacting && !params.interactive_first) { // print an eot token - printf("%s", llama_token_to_piece(ctx, llama_token_eot(ctx)).c_str()); + printf("%s", llama_token_to_piece(ctx, llama_token_eot(model)).c_str()); } fflush(stdout); printf("\n"); @@ -627,14 +627,14 @@ int main(int argc, char ** argv) { if (suff_rm_leading_spc && inp_sfx[0] == space_token) { inp_sfx.erase(inp_sfx.begin()); } - inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(ctx)); + inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model)); if (add_bos) { - inp_pfx.insert(inp_pfx.begin(), llama_token_bos(ctx)); + inp_pfx.insert(inp_pfx.begin(), llama_token_bos(model)); } - inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(ctx)); + inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model)); embd_inp = inp_pfx; embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); - embd_inp.push_back(llama_token_middle(ctx)); + embd_inp.push_back(llama_token_middle(model)); embd.clear(); embd_guidance.clear(); n_remain = params.n_predict; @@ -644,7 +644,7 @@ int main(int argc, char ** argv) { is_interacting = false; } // deal with end of text token in interactive mode - else if (llama_sampling_last(ctx_sampling) == llama_token_eos(ctx)) { + else if (llama_sampling_last(ctx_sampling) == llama_token_eos(model)) { LOG("found EOS token\n"); if (params.interactive) { @@ -661,7 +661,7 @@ int main(int argc, char ** argv) { if (params.input_prefix_bos) { LOG("adding input prefix BOS token\n"); - embd_inp.push_back(llama_token_bos(ctx)); + embd_inp.push_back(llama_token_bos(model)); } std::string buffer; @@ -724,7 +724,7 @@ int main(int argc, char ** argv) { } // end of text token - if (!embd.empty() && embd.back() == llama_token_eos(ctx) && !params.interactive) { + if (!embd.empty() && embd.back() == llama_token_eos(model) && !params.interactive) { break; } @@ -736,7 +736,7 @@ int main(int argc, char ** argv) { } } if (!params.interactive && n_remain <= 0) { - printf("%s", llama_token_to_piece(ctx, llama_token_eot(ctx)).c_str()); + printf("%s", llama_token_to_piece(ctx, llama_token_eot(model)).c_str()); fflush(stdout); } |