diff options
author | Branden Butler <bwtbutler@hotmail.com> | 2023-11-20 03:50:04 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-20 11:50:04 +0200 |
commit | 40a34fe8d034bd484efd79ccbb95059ca6308dcb (patch) | |
tree | d764cf818b68c98228a2db8bde539241dce78f6c | |
parent | dae06c06e5c6232ae2be4d567dd5101e1e96c814 (diff) |
speculative : fix prompt tokenization in speculative example (#4025)
* Support special tokens and not adding BOS to prompt in speculative
* Adapt to new should_add_bos function
* Ensure tgt and dft have same add_bos setting
-rw-r--r-- | examples/speculative/speculative.cpp | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 3a8e2781..ace755c5 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -94,9 +94,22 @@ int main(int argc, char ** argv) { } } - // tokenize the prompt + + // Tokenize the prompt + const bool add_bos_tgt = llama_should_add_bos_token(model_tgt); + LOG("add_bos tgt: %d\n", add_bos_tgt); + + const bool add_bos_dft = llama_should_add_bos_token(model_dft); + LOG("add_bos dft: %d\n", add_bos_dft); + + if (add_bos_tgt != add_bos_dft) { + fprintf(stderr, "%s: error: draft model add_bos must match target model to use speculation but ", __func__); + fprintf(stderr, "add_bos_dft = %d while add_bos_tgt = %d\n", add_bos_dft, add_bos_tgt); + return 1; + } + std::vector<llama_token> inp; - inp = ::llama_tokenize(ctx_tgt, params.prompt, true); + inp = ::llama_tokenize(ctx_tgt, params.prompt, add_bos_tgt, true); const int max_context_size = llama_n_ctx(ctx_tgt); const int max_tokens_list_size = max_context_size - 4; |