diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-05-08 17:41:54 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-08 17:41:54 +0300 |
commit | f9a6364912fd0463fddfdbc9ef9f79fdc281570d (patch) | |
tree | dde30f98675c55b43ba0f14ad118c2f363616617 /llama.cpp | |
parent | 95078cc554fe03d4512363c7e4dec963f0047c72 (diff) |
llama : require first token to be BOS (#1303)
* llama : require first token to be BOS
* scripts : add ppl-run-all.sh
* perplexity : add BOS for each chunk
* readme : update perplexity values after BOS fix
* perplexity : add clarifying comments
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 12 |
1 files changed, 11 insertions, 1 deletions
@@ -1052,6 +1052,13 @@ static bool llama_eval_internal( const int n_tokens, const int n_past, const int n_threads) { + + // enforce that the first token is BOS + if (n_past == 0 && tokens[0] != llama_token_bos()) { + fprintf(stderr, "%s: first token must be BOS\n", __func__); + return false; + } + const int64_t t_start_us = ggml_time_us(); const int N = n_tokens; @@ -1482,7 +1489,7 @@ static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, co } if (bos) { - output.push_back(1); + output.push_back(llama_token_bos()); } tokenizer.tokenize(text, output); @@ -2727,11 +2734,14 @@ int llama_eval( fprintf(stderr, "%s: failed to eval\n", __func__); return 1; } + // get a more accurate load time, upon first eval + // TODO: fix this if (!ctx->has_evaluated_once) { ctx->t_load_us = ggml_time_us() - ctx->t_start_us; ctx->has_evaluated_once = true; } + return 0; } |