diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-06-04 21:23:39 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-04 21:23:39 +0300 |
commit | 1442677f92e45a475be7b4d056e3633d1d6f813b (patch) | |
tree | d9dbb111ccaedc44cba527dbddd90bedd1e04ea8 /examples/llama-bench/llama-bench.cpp | |
parent | 554c247caffed64465f372661f2826640cb10430 (diff) |
common : refactor cli arg parsing (#7675)
* common : gpt_params_parse do not print usage
* common : rework usage print (wip)
* common : valign
* common : rework print_usage
* infill : remove cfg support
* common : reorder args
* server : deduplicate parameters
ggml-ci
* common : add missing header
ggml-ci
* common : remote --random-prompt usages
ggml-ci
* examples : migrate to gpt_params
ggml-ci
* batched-bench : migrate to gpt_params
* retrieval : migrate to gpt_params
* common : change defaults for escape and n_ctx
* common : remove chatml and instruct params
ggml-ci
* common : passkey use gpt_params
Diffstat (limited to 'examples/llama-bench/llama-bench.cpp')
-rw-r--r-- | examples/llama-bench/llama-bench.cpp | 48 |
1 files changed, 17 insertions, 31 deletions
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index fa7ad1bd..5c31548a 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -41,20 +41,6 @@ static std::string join(const std::vector<T> & values, const std::string & delim return str.str(); } -template<class T> -static std::vector<T> split(const std::string & str, char delim) { - std::vector<T> values; - std::istringstream str_stream(str); - std::string token; - while (std::getline(str_stream, token, delim)) { - T value; - std::istringstream token_stream(token); - token_stream >> value; - values.push_back(value); - } - return values; -} - template<typename T, typename F> static std::vector<std::string> transform_to_str(const std::vector<T> & values, F f) { std::vector<std::string> str_values; @@ -322,28 +308,28 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { invalid_param = true; break; } - auto p = split<std::string>(argv[i], split_delim); + auto p = string_split<std::string>(argv[i], split_delim); params.model.insert(params.model.end(), p.begin(), p.end()); } else if (arg == "-p" || arg == "--n-prompt") { if (++i >= argc) { invalid_param = true; break; } - auto p = split<int>(argv[i], split_delim); + auto p = string_split<int>(argv[i], split_delim); params.n_prompt.insert(params.n_prompt.end(), p.begin(), p.end()); } else if (arg == "-n" || arg == "--n-gen") { if (++i >= argc) { invalid_param = true; break; } - auto p = split<int>(argv[i], split_delim); + auto p = string_split<int>(argv[i], split_delim); params.n_gen.insert(params.n_gen.end(), p.begin(), p.end()); } else if (arg == "-pg") { if (++i >= argc) { invalid_param = true; break; } - auto p = split<std::string>(argv[i], ','); + auto p = string_split<std::string>(argv[i], ','); if (p.size() != 2) { invalid_param = true; break; @@ -354,21 +340,21 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { invalid_param = true; break; } - auto p = split<int>(argv[i], split_delim); + auto p = string_split<int>(argv[i], split_delim); params.n_batch.insert(params.n_batch.end(), p.begin(), p.end()); } else if (arg == "-ub" || arg == "--ubatch-size") { if (++i >= argc) { invalid_param = true; break; } - auto p = split<int>(argv[i], split_delim); + auto p = string_split<int>(argv[i], split_delim); params.n_ubatch.insert(params.n_ubatch.end(), p.begin(), p.end()); } else if (arg == "-ctk" || arg == "--cache-type-k") { if (++i >= argc) { invalid_param = true; break; } - auto p = split<std::string>(argv[i], split_delim); + auto p = string_split<std::string>(argv[i], split_delim); std::vector<ggml_type> types; for (const auto & t : p) { ggml_type gt = ggml_type_from_name(t); @@ -384,7 +370,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { invalid_param = true; break; } - auto p = split<std::string>(argv[i], split_delim); + auto p = string_split<std::string>(argv[i], split_delim); std::vector<ggml_type> types; for (const auto & t : p) { ggml_type gt = ggml_type_from_name(t); @@ -400,14 +386,14 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { invalid_param = true; break; } - auto p = split<int>(argv[i], split_delim); + auto p = string_split<int>(argv[i], split_delim); params.n_threads.insert(params.n_threads.end(), p.begin(), p.end()); } else if (arg == "-ngl" || arg == "--n-gpu-layers") { if (++i >= argc) { invalid_param = true; break; } - auto p = split<int>(argv[i], split_delim); + auto p = string_split<int>(argv[i], split_delim); params.n_gpu_layers.insert(params.n_gpu_layers.end(), p.begin(), p.end()); } else if (arg == "-rpc" || arg == "--rpc") { if (++i >= argc) { @@ -420,7 +406,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { invalid_param = true; break; } - auto p = split<std::string>(argv[i], split_delim); + auto p = string_split<std::string>(argv[i], split_delim); std::vector<llama_split_mode> modes; for (const auto & m : p) { llama_split_mode mode; @@ -442,13 +428,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { invalid_param = true; break; } - params.main_gpu = split<int>(argv[i], split_delim); + params.main_gpu = string_split<int>(argv[i], split_delim); } else if (arg == "-nkvo" || arg == "--no-kv-offload") { if (++i >= argc) { invalid_param = true; break; } - auto p = split<bool>(argv[i], split_delim); + auto p = string_split<bool>(argv[i], split_delim); params.no_kv_offload.insert(params.no_kv_offload.end(), p.begin(), p.end()); } else if (arg == "--numa") { if (++i >= argc) { @@ -466,28 +452,28 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { invalid_param = true; break; } - auto p = split<bool>(argv[i], split_delim); + auto p = string_split<bool>(argv[i], split_delim); params.flash_attn.insert(params.flash_attn.end(), p.begin(), p.end()); } else if (arg == "-mmp" || arg == "--mmap") { if (++i >= argc) { invalid_param = true; break; } - auto p = split<bool>(argv[i], split_delim); + auto p = string_split<bool>(argv[i], split_delim); params.use_mmap.insert(params.use_mmap.end(), p.begin(), p.end()); } else if (arg == "-embd" || arg == "--embeddings") { if (++i >= argc) { invalid_param = true; break; } - auto p = split<bool>(argv[i], split_delim); + auto p = string_split<bool>(argv[i], split_delim); params.embeddings.insert(params.embeddings.end(), p.begin(), p.end()); } else if (arg == "-ts" || arg == "--tensor-split") { if (++i >= argc) { invalid_param = true; break; } - for (auto ts : split<std::string>(argv[i], split_delim)) { + for (auto ts : string_split<std::string>(argv[i], split_delim)) { // split string by ; and / const std::regex regex{R"([;/]+)"}; std::sregex_token_iterator it{ts.begin(), ts.end(), regex, -1}; |