summaryrefslogtreecommitdiff
path: root/examples/llama-bench
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-01-31 17:30:17 +0200
committerGitHub <noreply@github.com>2024-01-31 17:30:17 +0200
commit5cb04dbc16d1da38c8fdcc0111b40e67d00dd1c3 (patch)
tree3ef8dc640d5c08466309c09a8ac2963bb760af06 /examples/llama-bench
parentefb7bdbbd061d087c788598b97992c653f992ddd (diff)
llama : remove LLAMA_MAX_DEVICES and LLAMA_SUPPORTS_GPU_OFFLOAD (#5240)
* llama : remove LLAMA_MAX_DEVICES from llama.h ggml-ci * Update llama.cpp Co-authored-by: slaren <slarengh@gmail.com> * server : remove LLAMA_MAX_DEVICES ggml-ci * llama : remove LLAMA_SUPPORTS_GPU_OFFLOAD ggml-ci * train : remove LLAMA_SUPPORTS_GPU_OFFLOAD * readme : add deprecation notice * readme : change deprecation notice to "remove" and fix url * llama : remove gpu includes from llama.h ggml-ci --------- Co-authored-by: slaren <slarengh@gmail.com>
Diffstat (limited to 'examples/llama-bench')
-rw-r--r--examples/llama-bench/llama-bench.cpp16
1 files changed, 8 insertions, 8 deletions
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 542cc7bb..c5a6f744 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -160,7 +160,7 @@ struct cmd_params {
std::vector<int> main_gpu;
std::vector<bool> no_kv_offload;
std::vector<bool> mul_mat_q;
- std::vector<std::array<float, LLAMA_MAX_DEVICES>> tensor_split;
+ std::vector<std::vector<float>> tensor_split;
int reps;
bool verbose;
output_formats output_format;
@@ -179,7 +179,7 @@ static const cmd_params cmd_params_defaults = {
/* main_gpu */ {0},
/* no_kv_offload */ {false},
/* mul_mat_q */ {true},
- /* tensor_split */ {{}},
+ /* tensor_split */ {std::vector<float>(llama_max_devices(), 0.0f)},
/* reps */ 5,
/* verbose */ false,
/* output_format */ MARKDOWN
@@ -380,10 +380,10 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
const std::regex regex{R"([;/]+)"};
std::sregex_token_iterator it{ts.begin(), ts.end(), regex, -1};
std::vector<std::string> split_arg{it, {}};
- GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES);
+ GGML_ASSERT(split_arg.size() <= llama_max_devices());
- std::array<float, LLAMA_MAX_DEVICES> tensor_split;
- for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) {
+ std::vector<float> tensor_split(llama_max_devices());
+ for (size_t i = 0; i < llama_max_devices(); ++i) {
if (i < split_arg.size()) {
tensor_split[i] = std::stof(split_arg[i]);
} else {
@@ -459,7 +459,7 @@ struct cmd_params_instance {
int main_gpu;
bool no_kv_offload;
bool mul_mat_q;
- std::array<float, LLAMA_MAX_DEVICES> tensor_split;
+ std::vector<float> tensor_split;
llama_model_params to_llama_mparams() const {
llama_model_params mparams = llama_model_default_params();
@@ -582,7 +582,7 @@ struct test {
int main_gpu;
bool no_kv_offload;
bool mul_mat_q;
- std::array<float, LLAMA_MAX_DEVICES> tensor_split;
+ std::vector<float> tensor_split;
int n_prompt;
int n_gen;
std::string test_time;
@@ -704,7 +704,7 @@ struct test {
std::vector<std::string> get_values() const {
std::string tensor_split_str;
int max_nonzero = 0;
- for (int i = 0; i < LLAMA_MAX_DEVICES; i++) {
+ for (size_t i = 0; i < llama_max_devices(); i++) {
if (tensor_split[i] > 0) {
max_nonzero = i;
}