diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-01-31 17:30:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-31 17:30:17 +0200 |
commit | 5cb04dbc16d1da38c8fdcc0111b40e67d00dd1c3 (patch) | |
tree | 3ef8dc640d5c08466309c09a8ac2963bb760af06 /examples/llama-bench | |
parent | efb7bdbbd061d087c788598b97992c653f992ddd (diff) |
llama : remove LLAMA_MAX_DEVICES and LLAMA_SUPPORTS_GPU_OFFLOAD (#5240)
* llama : remove LLAMA_MAX_DEVICES from llama.h
ggml-ci
* Update llama.cpp
Co-authored-by: slaren <slarengh@gmail.com>
* server : remove LLAMA_MAX_DEVICES
ggml-ci
* llama : remove LLAMA_SUPPORTS_GPU_OFFLOAD
ggml-ci
* train : remove LLAMA_SUPPORTS_GPU_OFFLOAD
* readme : add deprecation notice
* readme : change deprecation notice to "remove" and fix url
* llama : remove gpu includes from llama.h
ggml-ci
---------
Co-authored-by: slaren <slarengh@gmail.com>
Diffstat (limited to 'examples/llama-bench')
-rw-r--r-- | examples/llama-bench/llama-bench.cpp | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 542cc7bb..c5a6f744 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -160,7 +160,7 @@ struct cmd_params { std::vector<int> main_gpu; std::vector<bool> no_kv_offload; std::vector<bool> mul_mat_q; - std::vector<std::array<float, LLAMA_MAX_DEVICES>> tensor_split; + std::vector<std::vector<float>> tensor_split; int reps; bool verbose; output_formats output_format; @@ -179,7 +179,7 @@ static const cmd_params cmd_params_defaults = { /* main_gpu */ {0}, /* no_kv_offload */ {false}, /* mul_mat_q */ {true}, - /* tensor_split */ {{}}, + /* tensor_split */ {std::vector<float>(llama_max_devices(), 0.0f)}, /* reps */ 5, /* verbose */ false, /* output_format */ MARKDOWN @@ -380,10 +380,10 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { const std::regex regex{R"([;/]+)"}; std::sregex_token_iterator it{ts.begin(), ts.end(), regex, -1}; std::vector<std::string> split_arg{it, {}}; - GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES); + GGML_ASSERT(split_arg.size() <= llama_max_devices()); - std::array<float, LLAMA_MAX_DEVICES> tensor_split; - for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) { + std::vector<float> tensor_split(llama_max_devices()); + for (size_t i = 0; i < llama_max_devices(); ++i) { if (i < split_arg.size()) { tensor_split[i] = std::stof(split_arg[i]); } else { @@ -459,7 +459,7 @@ struct cmd_params_instance { int main_gpu; bool no_kv_offload; bool mul_mat_q; - std::array<float, LLAMA_MAX_DEVICES> tensor_split; + std::vector<float> tensor_split; llama_model_params to_llama_mparams() const { llama_model_params mparams = llama_model_default_params(); @@ -582,7 +582,7 @@ struct test { int main_gpu; bool no_kv_offload; bool mul_mat_q; - std::array<float, LLAMA_MAX_DEVICES> tensor_split; + std::vector<float> tensor_split; int n_prompt; int n_gen; std::string test_time; @@ -704,7 +704,7 @@ struct test { std::vector<std::string> get_values() const { std::string tensor_split_str; int max_nonzero = 0; - for (int i = 0; i < LLAMA_MAX_DEVICES; i++) { + for (size_t i = 0; i < llama_max_devices(); i++) { if (tensor_split[i] > 0) { max_nonzero = i; } |