diff options
author | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2023-08-27 16:50:33 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-27 16:50:33 +0300 |
commit | 463173a6c0ff353055eb90665794884c888c790f (patch) | |
tree | 4868e5ed0a6924410c91b149a6a630ea75ea06de /examples/perplexity/perplexity.cpp | |
parent | eaa13a48ff4136f01c1cdb79cacd61b67ec53095 (diff) |
llama : speedup tokenization (#2831)
* Speedup tokenization
On current master it takes ~3.2 seconds to tokenize
Wikitext. With this change it becomes ~525 ms.
* Fixit: it was missing the piece after the last found occurence
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'examples/perplexity/perplexity.cpp')
-rw-r--r-- | examples/perplexity/perplexity.cpp | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index b596d062..ebafa0c2 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -190,10 +190,14 @@ void perplexity(llama_context * ctx, const gpt_params & params) { const bool is_spm = llama_vocab_type(ctx) == LLAMA_VOCAB_TYPE_SPM; const bool add_bos = is_spm; + auto tim1 = std::chrono::high_resolution_clock::now(); fprintf(stderr, "%s: tokenizing the input ..\n", __func__); auto tokens = ::llama_tokenize(ctx, params.prompt, add_bos); + auto tim2 = std::chrono::high_resolution_clock::now(); + fprintf(stderr, "%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count()); + const int n_chunk_max = tokens.size() / params.n_ctx; const int n_chunk = params.n_chunks < 0 ? n_chunk_max : std::min(params.n_chunks, n_chunk_max); |