diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2025-05-12 07:50:26 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-12 07:50:26 +0300 |
commit | 1d2da7feaee3e4dd1b78fb4108988c977b47e266 (patch) | |
tree | 4449c9c892dde93d5b8cae1389454a0d099c581e | |
parent | f27cd405422307e02dffa8949ac30bc56b4d2900 (diff) |
Add batch warmup to sweep-bench (#375)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
-rw-r--r-- | common/common.cpp | 4 | ||||
-rw-r--r-- | common/common.h | 1 | ||||
-rw-r--r-- | examples/sweep-bench/sweep-bench.cpp | 18 |
3 files changed, 22 insertions, 1 deletions
diff --git a/common/common.cpp b/common/common.cpp index 0dbde58f..2df8d4d4 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1468,6 +1468,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.warmup = false; return true; } + if (arg == "--warmup-batch" || arg == "-wb") { + params.batch_warmup = true; + return true; + } if (arg == "--output-format") { CHECK_ARG std::string value(argv[i]); diff --git a/common/common.h b/common/common.h index fd83c9d3..1b4835bd 100644 --- a/common/common.h +++ b/common/common.h @@ -200,6 +200,7 @@ struct gpt_params { bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes bool no_kv_offload = false; // disable KV offloading bool warmup = true; // warmup run + bool batch_warmup = false; // batch warmup run bool check_tensors = false; // validate tensor data bool repack_tensors = false; // repack tensors if interleaved variant is available bool use_thp = false; // use transparent huge pages (linux only) diff --git a/examples/sweep-bench/sweep-bench.cpp b/examples/sweep-bench/sweep-bench.cpp index 27510687..31dd3ce0 100644 --- a/examples/sweep-bench/sweep-bench.cpp +++ b/examples/sweep-bench/sweep-bench.cpp @@ -107,7 +107,7 @@ int main(int argc, char ** argv) { llama_batch batch = llama_batch_init(n_kv_max, 0, 1); // warm up - { + if (params.warmup) { llama_batch_add(batch, bos, 0, { 0 }, false); if (!decode_helper(ctx, batch, ctx_params.n_batch)) { @@ -115,6 +115,22 @@ int main(int argc, char ** argv) { return 1; } } + if (params.batch_warmup) { + // clean up KV cache after generation + llama_kv_cache_seq_rm(ctx, 0, params.n_ubatch, -1); + + // prepare batch of pp size for prompt processing performance measurement + llama_batch_clear(batch); + + for (unsigned int i = 0; i < params.n_ubatch; ++i) { + llama_batch_add(batch, std::rand() % n_vocab, i, { 0 }, false); + } + + if (!decode_helper(ctx, batch, ctx_params.n_ubatch)) { + LOG_TEE("%s: llama_decode() failed\n", __func__); + return 1; + } + } llama_batch_clear(batch); llama_kv_cache_clear(ctx); |