diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-05-08 17:41:54 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-08 17:41:54 +0300 |
commit | f9a6364912fd0463fddfdbc9ef9f79fdc281570d (patch) | |
tree | dde30f98675c55b43ba0f14ad118c2f363616617 /examples/common.cpp | |
parent | 95078cc554fe03d4512363c7e4dec963f0047c72 (diff) |
llama : require first token to be BOS (#1303)
* llama : require first token to be BOS
* scripts : add ppl-run-all.sh
* perplexity : add BOS for each chunk
* readme : update perplexity values after BOS fix
* perplexity : add clarifying comments
Diffstat (limited to 'examples/common.cpp')
-rw-r--r-- | examples/common.cpp | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/examples/common.cpp b/examples/common.cpp index f1c3bae1..6af44027 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -438,8 +438,8 @@ std::string gpt_random_prompt(std::mt19937 & rng) { // TODO: not great allocating this every time std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) { // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars - std::vector<llama_token> res(text.size() + (int)add_bos); - int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos); + std::vector<llama_token> res(text.size() + (int) add_bos); + const int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos); assert(n >= 0); res.resize(n); |