summaryrefslogtreecommitdiff
path: root/common/common.cpp
diff options
context:
space:
mode:
authorHenri Vasserman <henv@hot.ee>2023-08-25 12:09:42 +0300
committerGitHub <noreply@github.com>2023-08-25 12:09:42 +0300
commit6bbc598a632560cb45dd2c51ad403bda8723b629 (patch)
tree53be13238531021865642158403fbf92c5a9ff58 /common/common.cpp
parent3f460a2b723c8b936ac29ecfd02f244b3adeba55 (diff)
ROCm Port (#1087)
* use hipblas based on cublas * Update Makefile for the Cuda kernels * Expand arch list and make it overrideable * Fix multi GPU on multiple amd architectures with rocblas_initialize() (#5) * add hipBLAS to README * new build arg LLAMA_CUDA_MMQ_Y * fix half2 decomposition * Add intrinsics polyfills for AMD * AMD assembly optimized __dp4a * Allow overriding CC_TURING * use "ROCm" instead of "CUDA" * ignore all build dirs * Add Dockerfiles * fix llama-bench * fix -nommq help for non CUDA/HIP --------- Co-authored-by: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Co-authored-by: ardfork <134447697+ardfork@users.noreply.github.com> Co-authored-by: funnbot <22226942+funnbot@users.noreply.github.com> Co-authored-by: Engininja2 <139037756+Engininja2@users.noreply.github.com> Co-authored-by: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> Co-authored-by: jammm <2500920+jammm@users.noreply.github.com> Co-authored-by: jdecourval <7315817+jdecourval@users.noreply.github.com>
Diffstat (limited to 'common/common.cpp')
-rw-r--r--common/common.cpp4
1 files changed, 3 insertions, 1 deletions
diff --git a/common/common.cpp b/common/common.cpp
index 53002ba3..ff19ec4e 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -613,9 +613,11 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stdout, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
fprintf(stdout, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n");
fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n");
+#ifdef GGML_USE_CUBLAS
fprintf(stdout, " -nommq, --no-mul-mat-q\n");
- fprintf(stdout, " use cuBLAS instead of custom mul_mat_q CUDA kernels.\n");
+ fprintf(stdout, " use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.\n");
fprintf(stdout, " Not recommended since this is both slower and uses more VRAM.\n");
+#endif // GGML_USE_CUBLAS
#endif
fprintf(stdout, " --mtest compute maximum memory usage\n");
fprintf(stdout, " --export export the computation graph to 'llama.ggml'\n");