llama : cleanup unused mmq flags (#5772)

* cleanup unused --no-mul-mat-q,-nommq, -mmq, --mul-mat-q, mul_mat_q * remove: mul_mat_q in compare llama bench and usage * update llama-bench --------- Co-authored-by: slaren <slarengh@gmail.com>
author: Pierrick Hymbert <pierrick.hymbert@gmail.com> 2024-03-01 12:39:06 +0100
committer: GitHub <noreply@github.com> 2024-03-01 13:39:06 +0200
commit: 3ab8b3a92ede46df88bc5a2dfca3777de4a2b2b6 (patch)
tree: b0232b770527ead35c5a59971a0802ded16e8c40 /scripts/compare-llama-bench.py
parent: 9600d59e010c18f5872580a21734ea1bf1968d04 (diff)
1 files changed, 1 insertions, 1 deletions
diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py
index 70737f97..39c3e52e 100755
--- a/scripts/compare-llama-bench.py
+++ b/scripts/compare-llama-bench.py
@@ -31,7 +31,7 @@ PRETTY_NAMES = {
     "model_size": "Model Size [GiB]", "model_n_params": "Num. of Parameters",
     "n_batch": "Batch size", "n_threads": "Threads", "type_k": "K type", "type_v": "V type",
     "n_gpu_layers": "GPU layers", "main_gpu": "Main GPU", "no_kv_offload": "NKVO",
-    "mul_mat_q": "MMQ", "tensor_split": "Tensor split"
+    "tensor_split": "Tensor split"
 }
 
 DEFAULT_SHOW = ["model_type"]  # Always show these properties by default.
author	Pierrick Hymbert <pierrick.hymbert@gmail.com>	2024-03-01 12:39:06 +0100
committer	GitHub <noreply@github.com>	2024-03-01 13:39:06 +0200
commit	3ab8b3a92ede46df88bc5a2dfca3777de4a2b2b6 (patch)
tree	b0232b770527ead35c5a59971a0802ded16e8c40 /scripts/compare-llama-bench.py
parent	9600d59e010c18f5872580a21734ea1bf1968d04 (diff)