From 5cb04dbc16d1da38c8fdcc0111b40e67d00dd1c3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 31 Jan 2024 17:30:17 +0200 Subject: llama : remove LLAMA_MAX_DEVICES and LLAMA_SUPPORTS_GPU_OFFLOAD (#5240) * llama : remove LLAMA_MAX_DEVICES from llama.h ggml-ci * Update llama.cpp Co-authored-by: slaren * server : remove LLAMA_MAX_DEVICES ggml-ci * llama : remove LLAMA_SUPPORTS_GPU_OFFLOAD ggml-ci * train : remove LLAMA_SUPPORTS_GPU_OFFLOAD * readme : add deprecation notice * readme : change deprecation notice to "remove" and fix url * llama : remove gpu includes from llama.h ggml-ci --------- Co-authored-by: slaren --- examples/llama-bench/llama-bench.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'examples/llama-bench') diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 542cc7bb..c5a6f744 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -160,7 +160,7 @@ struct cmd_params { std::vector main_gpu; std::vector no_kv_offload; std::vector mul_mat_q; - std::vector> tensor_split; + std::vector> tensor_split; int reps; bool verbose; output_formats output_format; @@ -179,7 +179,7 @@ static const cmd_params cmd_params_defaults = { /* main_gpu */ {0}, /* no_kv_offload */ {false}, /* mul_mat_q */ {true}, - /* tensor_split */ {{}}, + /* tensor_split */ {std::vector(llama_max_devices(), 0.0f)}, /* reps */ 5, /* verbose */ false, /* output_format */ MARKDOWN @@ -380,10 +380,10 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { const std::regex regex{R"([;/]+)"}; std::sregex_token_iterator it{ts.begin(), ts.end(), regex, -1}; std::vector split_arg{it, {}}; - GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES); + GGML_ASSERT(split_arg.size() <= llama_max_devices()); - std::array tensor_split; - for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) { + std::vector tensor_split(llama_max_devices()); + for (size_t i = 0; i < llama_max_devices(); ++i) { if (i < split_arg.size()) { tensor_split[i] = std::stof(split_arg[i]); } else { @@ -459,7 +459,7 @@ struct cmd_params_instance { int main_gpu; bool no_kv_offload; bool mul_mat_q; - std::array tensor_split; + std::vector tensor_split; llama_model_params to_llama_mparams() const { llama_model_params mparams = llama_model_default_params(); @@ -582,7 +582,7 @@ struct test { int main_gpu; bool no_kv_offload; bool mul_mat_q; - std::array tensor_split; + std::vector tensor_split; int n_prompt; int n_gen; std::string test_time; @@ -704,7 +704,7 @@ struct test { std::vector get_values() const { std::string tensor_split_str; int max_nonzero = 0; - for (int i = 0; i < LLAMA_MAX_DEVICES; i++) { + for (size_t i = 0; i < llama_max_devices(); i++) { if (tensor_split[i] > 0) { max_nonzero = i; } -- cgit v1.2.3