From f9a6364912fd0463fddfdbc9ef9f79fdc281570d Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 8 May 2023 17:41:54 +0300
Subject: llama : require first token to be BOS (#1303)

* llama : require first token to be BOS

* scripts : add ppl-run-all.sh

* perplexity : add BOS for each chunk

* readme : update perplexity values after BOS fix

* perplexity : add clarifying comments
---
 llama.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'llama.cpp')

diff --git a/llama.cpp b/llama.cpp
index c36c6ced..d54fa502 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1052,6 +1052,13 @@ static bool llama_eval_internal(
             const int   n_tokens,
             const int   n_past,
             const int   n_threads) {
+
+    // enforce that the first token is BOS
+    if (n_past == 0 && tokens[0] != llama_token_bos()) {
+        fprintf(stderr, "%s: first token must be BOS\n", __func__);
+        return false;
+    }
+
     const int64_t t_start_us = ggml_time_us();
 
     const int N = n_tokens;
@@ -1482,7 +1489,7 @@ static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, co
     }
 
     if (bos) {
-        output.push_back(1);
+        output.push_back(llama_token_bos());
     }
 
     tokenizer.tokenize(text, output);
@@ -2727,11 +2734,14 @@ int llama_eval(
         fprintf(stderr, "%s: failed to eval\n", __func__);
         return 1;
     }
+
     // get a more accurate load time, upon first eval
+    // TODO: fix this
     if (!ctx->has_evaluated_once) {
         ctx->t_load_us = ggml_time_us() - ctx->t_start_us;
         ctx->has_evaluated_once = true;
     }
+
     return 0;
 }
 
-- 
cgit v1.2.3