llama : require first token to be BOS (#1303)

* llama : require first token to be BOS * scripts : add ppl-run-all.sh * perplexity : add BOS for each chunk * readme : update perplexity values after BOS fix * perplexity : add clarifying comments
author: Georgi Gerganov <ggerganov@gmail.com> 2023-05-08 17:41:54 +0300
committer: GitHub <noreply@github.com> 2023-05-08 17:41:54 +0300
commit: f9a6364912fd0463fddfdbc9ef9f79fdc281570d (patch)
tree: dde30f98675c55b43ba0f14ad118c2f363616617 /examples/common.cpp
parent: 95078cc554fe03d4512363c7e4dec963f0047c72 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/examples/common.cpp b/examples/common.cpp
index f1c3bae1..6af44027 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -438,8 +438,8 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
 // TODO: not great allocating this every time
 std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
     // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
-    std::vector<llama_token> res(text.size() + (int)add_bos);
-    int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
+    std::vector<llama_token> res(text.size() + (int) add_bos);
+    const int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
     assert(n >= 0);
     res.resize(n);
author	Georgi Gerganov <ggerganov@gmail.com>	2023-05-08 17:41:54 +0300
committer	GitHub <noreply@github.com>	2023-05-08 17:41:54 +0300
commit	f9a6364912fd0463fddfdbc9ef9f79fdc281570d (patch)
tree	dde30f98675c55b43ba0f14ad118c2f363616617 /examples/common.cpp
parent	95078cc554fe03d4512363c7e4dec963f0047c72 (diff)