llama : remove token functions with `context` args in favor of `model` (#3720)

* added `llama_model_token_*` variants to all the `llama_token_*` functions. * added `LLAMA_API` * formatting Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * removed old `llama_token` functions * changed 3 more functions to take in model - `llama_token_get_text` - `llama_token_get_score` - `llama_token_get_type` * added back docs * fixed main.cpp * changed token functions to use new model variants * changed token functions to use new model variants --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com> 2023-10-23 12:40:03 -0700
committer: GitHub <noreply@github.com> 2023-10-23 22:40:03 +0300
commit: 5be6c803fa5378f62a1590f3ad8c6b64c7c0c2ce (patch)
tree: 190868e0431070686d797c3c2d86da857b8ba55f /examples/infill/infill.cpp
parent: 6336701c9378c23c85d1c0e464b663ca2bbb8e60 (diff)
1 files changed, 15 insertions, 15 deletions
diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp
index 6331335e..9c52b7bb 100644
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@@ -246,14 +246,14 @@ int main(int argc, char ** argv) {
     if (suff_rm_leading_spc && inp_sfx[0] == space_token) {
         inp_sfx.erase(inp_sfx.begin());
     }
-    inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(ctx));
+    inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
     if (add_bos) {
-        inp_pfx.insert(inp_pfx.begin(), llama_token_bos(ctx));
+        inp_pfx.insert(inp_pfx.begin(), llama_token_bos(model));
     }
-    inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(ctx));
+    inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
     embd_inp = inp_pfx;
     embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
-    embd_inp.push_back(llama_token_middle(ctx));
+    embd_inp.push_back(llama_token_middle(model));
 
     LOG("prefix: \"%s\"\n", log_tostr(params.input_prefix));
     LOG("suffix: \"%s\"\n", log_tostr(params.input_suffix));
@@ -261,7 +261,7 @@ int main(int argc, char ** argv) {
 
     // Should not run without any tokens
     if (embd_inp.empty()) {
-        embd_inp.push_back(llama_token_bos(ctx));
+        embd_inp.push_back(llama_token_bos(model));
         LOG("embd_inp was considered empty and bos was added: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str());
     }
 
@@ -577,10 +577,10 @@ int main(int argc, char ** argv) {
         if ((int) embd_inp.size() <= n_consumed) {
 
             // deal with eot token in infill mode
-            if ((llama_sampling_last(ctx_sampling) == llama_token_eot(ctx) || is_interacting) && params.interactive){
+            if ((llama_sampling_last(ctx_sampling) == llama_token_eot(model) || is_interacting) && params.interactive){
                 if(is_interacting && !params.interactive_first) {
                     // print an eot token
-                    printf("%s", llama_token_to_piece(ctx, llama_token_eot(ctx)).c_str());
+                    printf("%s", llama_token_to_piece(ctx, llama_token_eot(model)).c_str());
                 }
                 fflush(stdout);
                 printf("\n");
@@ -627,14 +627,14 @@ int main(int argc, char ** argv) {
                 if (suff_rm_leading_spc && inp_sfx[0] == space_token) {
                     inp_sfx.erase(inp_sfx.begin());
                 }
-                inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(ctx));
+                inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
                 if (add_bos) {
-                    inp_pfx.insert(inp_pfx.begin(), llama_token_bos(ctx));
+                    inp_pfx.insert(inp_pfx.begin(), llama_token_bos(model));
                 }
-                inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(ctx));
+                inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
                 embd_inp = inp_pfx;
                 embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
-                embd_inp.push_back(llama_token_middle(ctx));
+                embd_inp.push_back(llama_token_middle(model));
                 embd.clear();
                 embd_guidance.clear();
                 n_remain = params.n_predict;
@@ -644,7 +644,7 @@ int main(int argc, char ** argv) {
                 is_interacting = false;
             }
             // deal with end of text token in interactive mode
-            else if (llama_sampling_last(ctx_sampling) == llama_token_eos(ctx)) {
+            else if (llama_sampling_last(ctx_sampling) == llama_token_eos(model)) {
                 LOG("found EOS token\n");
 
                 if (params.interactive) {
@@ -661,7 +661,7 @@ int main(int argc, char ** argv) {
 
                 if (params.input_prefix_bos) {
                     LOG("adding input prefix BOS token\n");
-                    embd_inp.push_back(llama_token_bos(ctx));
+                    embd_inp.push_back(llama_token_bos(model));
                 }
 
                 std::string buffer;
@@ -724,7 +724,7 @@ int main(int argc, char ** argv) {
         }
 
         // end of text token
-        if (!embd.empty() && embd.back() == llama_token_eos(ctx) && !params.interactive) {
+        if (!embd.empty() && embd.back() == llama_token_eos(model) && !params.interactive) {
             break;
         }
 
@@ -736,7 +736,7 @@ int main(int argc, char ** argv) {
         }
     }
     if (!params.interactive && n_remain <= 0) {
-        printf("%s", llama_token_to_piece(ctx, llama_token_eot(ctx)).c_str());
+        printf("%s", llama_token_to_piece(ctx, llama_token_eot(model)).c_str());
         fflush(stdout);
     }
author	Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com>	2023-10-23 12:40:03 -0700
committer	GitHub <noreply@github.com>	2023-10-23 22:40:03 +0300
commit	5be6c803fa5378f62a1590f3ad8c6b64c7c0c2ce (patch)
tree	190868e0431070686d797c3c2d86da857b8ba55f /examples/infill/infill.cpp
parent	6336701c9378c23c85d1c0e464b663ca2bbb8e60 (diff)