scripts: update compare_llama_bench.py [no ci] (#7673)

author: Johannes Gäßler <johannesg@5d6.de> 2024-05-31 16:26:21 +0200
committer: GitHub <noreply@github.com> 2024-05-31 16:26:21 +0200
commit: c8047d538f3addab40e3112be60bb92e70ce1a50 (patch)
tree: 2ebf0b78225a9cb9b08f346e8355f868acf86967
parent: 30e238b246f8002cc6eb7cb79afe242243f1f66d (diff)
1 files changed, 10 insertions, 10 deletions
diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py
index 0ede9e67..6016eb2c 100755
--- a/scripts/compare-llama-bench.py
+++ b/scripts/compare-llama-bench.py
@@ -19,22 +19,22 @@ logger = logging.getLogger("compare-llama-bench")
 
 # Properties by which to differentiate results per commit:
 KEY_PROPERTIES = [
-    "cpu_info", "gpu_info", "n_gpu_layers", "main_gpu", "cuda", "opencl", "metal", "gpu_blas",
-    "blas", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_threads",
-    "type_k", "type_v", "no_kv_offload", "tensor_split", "n_prompt", "n_gen"
+    "cpu_info", "gpu_info", "n_gpu_layers", "cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas",
+    "blas", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "embeddings", "n_threads",
+    "type_k", "type_v", "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen"
 ]
 
 # Properties that are boolean and are converted to Yes/No for the table:
-BOOL_PROPERTIES = ["cuda", "opencl", "metal", "gpu_blas", "blas"]
+BOOL_PROPERTIES = ["cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"]
 
 # Header names for the table:
 PRETTY_NAMES = {
-    "cuda": "CUDA", "opencl": "OpenCL", "metal": "Metal", "gpu_blas": "GPU BLAS", "blas": "BLAS",
-    "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model",
-    "model_size": "Model Size [GiB]", "model_n_params": "Num. of Parameters",
-    "n_batch": "Batch size", "n_threads": "Threads", "type_k": "K type", "type_v": "V type",
-    "n_gpu_layers": "GPU layers", "main_gpu": "Main GPU", "no_kv_offload": "NKVO",
-    "tensor_split": "Tensor split"
+    "cuda": "CUDA", "opencl": "OpenCL", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC",
+    "gpu_blas": "GPU BLAS", "blas": "BLAS", "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model",
+    "model_size": "Model Size [GiB]", "model_n_params": "Num. of Par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size",
+    "n_threads": "Threads", "type_k": "K type", "type_v": "V type", "n_gpu_layers": "GPU layers", "split_mode": "Split mode",
+    "main_gpu": "Main GPU", "no_kv_offload": "NKVO", "flash_attn": "FlashAttention", "tensor_split": "Tensor split",
+    "use_mmap": "Use mmap", "embeddings": "Embeddings",
 }
 
 DEFAULT_SHOW = ["model_type"]  # Always show these properties by default.
author	Johannes Gäßler <johannesg@5d6.de>	2024-05-31 16:26:21 +0200
committer	GitHub <noreply@github.com>	2024-05-31 16:26:21 +0200
commit	c8047d538f3addab40e3112be60bb92e70ce1a50 (patch)
tree	2ebf0b78225a9cb9b08f346e8355f868acf86967
parent	30e238b246f8002cc6eb7cb79afe242243f1f66d (diff)