diff options
author | Pierrick Hymbert <pierrick.hymbert@gmail.com> | 2024-03-01 12:39:06 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-01 13:39:06 +0200 |
commit | 3ab8b3a92ede46df88bc5a2dfca3777de4a2b2b6 (patch) | |
tree | b0232b770527ead35c5a59971a0802ded16e8c40 /examples/server/server.cpp | |
parent | 9600d59e010c18f5872580a21734ea1bf1968d04 (diff) |
llama : cleanup unused mmq flags (#5772)
* cleanup unused --no-mul-mat-q,-nommq, -mmq, --mul-mat-q, mul_mat_q
* remove: mul_mat_q in compare llama bench and usage
* update llama-bench
---------
Co-authored-by: slaren <slarengh@gmail.com>
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r-- | examples/server/server.cpp | 8 |
1 files changed, 0 insertions, 8 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp index eea98796..2b2f4a0f 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2392,14 +2392,6 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n", {}); #endif // GGML_USE_CUBLAS } - else if (arg == "--no-mul-mat-q" || arg == "-nommq") - { -#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL) - params.mul_mat_q = false; -#else - LOG_WARNING("warning: llama.cpp was compiled without cuBLAS. Disabling mul_mat_q kernels has no effect.\n", {}); -#endif // GGML_USE_CUBLAS - } else if (arg == "--main-gpu" || arg == "-mg") { if (++i >= argc) |