ggml : add ggml_soft_max_ext (#4256)

* metal : implement soft_max_ext * cuda : implement soft_max_ext * ggml : implement soft_max_ext (CPU) * batched-bench : print threads ggml-ci * metal : simplify soft_max encoding ggml-ci * cuda : use 512 threads for soft_max instead of 32 * ggml : update soft max cpu * cuda : do warp-based block reduce * cuda : increase max block size to 1024 * cuda : fix warp reduction initialization of shared mem * metal : warp-based reduction for soft max kernel * metal : warp-based reduce for rms_norm * metal : simplify soft max kernel ggml-ci * alloc : fix build with debug
author: Georgi Gerganov <ggerganov@gmail.com> 2023-12-01 10:51:24 +0200
committer: GitHub <noreply@github.com> 2023-12-01 10:51:24 +0200
commit: ef47ec18da469423c276b683dd9b5741cee7023e (patch)
tree: ec3b4780dbe8f629425de499b298e8eadfd1aa4d /examples
parent: 1d144112c0fbbb4ecc07dbcf4f05a380148bd6de (diff)
1 files changed, 1 insertions, 1 deletions
diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp
index 533c55c1..57596ed9 100644
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@@ -155,7 +155,7 @@ int main(int argc, char ** argv) {
     }
 
     LOG_TEE("\n");
-    LOG_TEE("%s: n_kv_max = %d, is_pp_shared = %d, n_gpu_layers = %d, mmq = %d\n", __func__, n_kv_max, is_pp_shared, n_gpu_layers, mmq);
+    LOG_TEE("%s: n_kv_max = %d, is_pp_shared = %d, n_gpu_layers = %d, mmq = %d, n_threads = %d, n_threads_batch = %d\n", __func__, n_kv_max, is_pp_shared, n_gpu_layers, mmq, ctx_params.n_threads, ctx_params.n_threads_batch);
     LOG_TEE("\n");
 
     LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP",     "TG",     "B",    "N_KV",     "T_PP s",   "S_PP t/s", "T_TG s",   "S_TG t/s", "T s",      "S t/s");
author	Georgi Gerganov <ggerganov@gmail.com>	2023-12-01 10:51:24 +0200
committer	GitHub <noreply@github.com>	2023-12-01 10:51:24 +0200
commit	ef47ec18da469423c276b683dd9b5741cee7023e (patch)
tree	ec3b4780dbe8f629425de499b298e8eadfd1aa4d /examples
parent	1d144112c0fbbb4ecc07dbcf4f05a380148bd6de (diff)