diff options
Diffstat (limited to 'examples/batched-bench')
-rw-r--r-- | examples/batched-bench/CMakeLists.txt | 2 | ||||
-rw-r--r-- | examples/batched-bench/README.md | 8 |
2 files changed, 5 insertions, 5 deletions
diff --git a/examples/batched-bench/CMakeLists.txt b/examples/batched-bench/CMakeLists.txt index 40a032c5..959acaee 100644 --- a/examples/batched-bench/CMakeLists.txt +++ b/examples/batched-bench/CMakeLists.txt @@ -1,4 +1,4 @@ -set(TARGET batched-bench) +set(TARGET llama-batched-bench) add_executable(${TARGET} batched-bench.cpp) install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) diff --git a/examples/batched-bench/README.md b/examples/batched-bench/README.md index fa4baf64..4a07fe6b 100644 --- a/examples/batched-bench/README.md +++ b/examples/batched-bench/README.md @@ -10,16 +10,16 @@ There are 2 modes of operation: - `prompt is shared` - there is a common prompt of size `PP` used by all batches (i.e. `N_KV = PP + B*TG`) ```bash -./batched-bench -m model.gguf -c 2048 -b 2048 -ub 512 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32 [-pps] +./llama-batched-bench -m model.gguf -c 2048 -b 2048 -ub 512 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32 [-pps] # LLaMA 7B, F16, N_KV_MAX = 16384 (8GB), prompt not shared -./batched-bench -m ./models/llama-7b/ggml-model-f16.gguf -c 16384 -b 2048 -ub 512 -ngl 99 +./llama-batched-bench -m ./models/llama-7b/ggml-model-f16.gguf -c 16384 -b 2048 -ub 512 -ngl 99 # LLaMA 7B, Q8_0, N_KV_MAX = 16384 (8GB), prompt is shared -./batched-bench -m ./models/llama-7b/ggml-model-q8_0.gguf -c 16384 -b 2048 -ub 512 -ngl 99 -pps +./llama-batched-bench -m ./models/llama-7b/ggml-model-q8_0.gguf -c 16384 -b 2048 -ub 512 -ngl 99 -pps # custom set of batches -./batched-bench -m ./models/llama-7b/ggml-model-q8_0.gguf -c 2048 -b 512 -ub 512 -ngl 999 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32 +./llama-batched-bench -m ./models/llama-7b/ggml-model-q8_0.gguf -c 2048 -b 512 -ub 512 -ngl 999 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32 ``` ## Sample results |