diff options
Diffstat (limited to 'common')
-rw-r--r-- | common/common.cpp | 26 |
1 files changed, 15 insertions, 11 deletions
diff --git a/common/common.cpp b/common/common.cpp index 6b07f119..28801367 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -42,6 +42,10 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif +#if (defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)) +#define GGML_USE_CUBLAS_SYCL +#endif + int32_t get_num_physical_cores() { #ifdef __linux__ // enumerate the set of thread siblings, num entries is num cores @@ -599,9 +603,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { break; } params.main_gpu = std::stoi(argv[i]); -#ifndef GGML_USE_CUBLAS - fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. Setting the main GPU has no effect.\n"); -#endif // GGML_USE_CUBLAS +#ifndef GGML_USE_CUBLAS_SYCL + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL. Setting the main GPU has no effect.\n"); +#endif // GGML_USE_CUBLAS_SYCL } else if (arg == "--split-mode" || arg == "-sm") { if (++i >= argc) { invalid_param = true; @@ -618,9 +622,10 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } -#ifndef GGML_USE_CUBLAS - fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. Setting the split mode has no effect.\n"); -#endif // GGML_USE_CUBLAS +#ifndef GGML_USE_CUBLAS_SYCL + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL. Setting the split mode has no effect.\n"); +#endif // GGML_USE_CUBLAS_SYCL + } else if (arg == "--tensor-split" || arg == "-ts") { if (++i >= argc) { invalid_param = true; @@ -643,9 +648,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { params.tensor_split[i] = 0.0f; } } -#ifndef GGML_USE_CUBLAS - fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. Setting a tensor split has no effect.\n"); -#endif // GGML_USE_CUBLAS +#ifndef GGML_USE_CUBLAS_SYCL + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL. Setting a tensor split has no effect.\n"); +#endif // GGML_USE_CUBLAS_SYCL } else if (arg == "--no-mmap") { params.use_mmap = false; } else if (arg == "--numa") { @@ -1007,7 +1012,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n"); printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n"); printf(" or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu); -#endif +#endif // LLAMA_SUPPORTS_GPU_OFFLOAD printf(" --verbose-prompt print a verbose prompt before generation (default: %s)\n", params.verbose_prompt ? "true" : "false"); printf(" --no-display-prompt don't print prompt at generation (default: %s)\n", !params.display_prompt ? "true" : "false"); printf(" -gan N, --grp-attn-n N\n"); @@ -1514,7 +1519,6 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l fprintf(stream, "cpu_has_avx512: %s\n", ggml_cpu_has_avx512() ? "true" : "false"); fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false"); fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false"); - fprintf(stream, "cpu_has_blas: %s\n", ggml_cpu_has_blas() ? "true" : "false"); fprintf(stream, "cpu_has_cublas: %s\n", ggml_cpu_has_cublas() ? "true" : "false"); fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false"); fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false"); |