summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2024-06-04 14:32:42 +0200
committerGitHub <noreply@github.com>2024-06-04 14:32:42 +0200
commitadc9ff384121f4d550d28638a646b336d051bf42 (patch)
tree51b6628ece58334ff6f604fea4604c91a60570d5
parent987d743d6bc4cee4bde6820733ea33a2abc0afac (diff)
llama-bench : allow using a different printer for stderr with -oe (#7722)
compare-commits.sh : hide stdout, use -oe to print markdown
-rw-r--r--examples/llama-bench/llama-bench.cpp145
-rwxr-xr-xscripts/compare-commits.sh16
2 files changed, 101 insertions, 60 deletions
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index c0089044..5d3cbd84 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -140,10 +140,11 @@ static std::string get_gpu_info() {
}
// command line params
-enum output_formats {CSV, JSON, MARKDOWN, SQL};
+enum output_formats {NONE, CSV, JSON, MARKDOWN, SQL};
static const char * output_format_str(output_formats format) {
switch (format) {
+ case NONE: return "none";
case CSV: return "csv";
case JSON: return "json";
case MARKDOWN: return "md";
@@ -152,6 +153,23 @@ static const char * output_format_str(output_formats format) {
}
}
+static bool output_format_from_str(const std::string & s, output_formats & format) {
+ if (s == "none") {
+ format = NONE;
+ } else if (s == "csv") {
+ format = CSV;
+ } else if (s == "json") {
+ format = JSON;
+ } else if (s == "md") {
+ format = MARKDOWN;
+ } else if (s == "sql") {
+ format = SQL;
+ } else {
+ return false;
+ }
+ return true;
+}
+
static const char * split_mode_str(llama_split_mode mode) {
switch (mode) {
case LLAMA_SPLIT_MODE_NONE: return "none";
@@ -190,31 +208,33 @@ struct cmd_params {
int reps;
bool verbose;
output_formats output_format;
+ output_formats output_format_stderr;
};
static const cmd_params cmd_params_defaults = {
- /* model */ {"models/7B/ggml-model-q4_0.gguf"},
- /* n_prompt */ {512},
- /* n_gen */ {128},
- /* n_pg */ {},
- /* n_batch */ {2048},
- /* n_ubatch */ {512},
- /* type_k */ {GGML_TYPE_F16},
- /* type_v */ {GGML_TYPE_F16},
- /* n_threads */ {cpu_get_num_math()},
- /* n_gpu_layers */ {99},
- /* rpc_servers */ {""},
- /* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
- /* main_gpu */ {0},
- /* no_kv_offload */ {false},
- /* flash_attn */ {false},
- /* tensor_split */ {std::vector<float>(llama_max_devices(), 0.0f)},
- /* use_mmap */ {true},
- /* embeddings */ {false},
- /* numa */ GGML_NUMA_STRATEGY_DISABLED,
- /* reps */ 5,
- /* verbose */ false,
- /* output_format */ MARKDOWN
+ /* model */ {"models/7B/ggml-model-q4_0.gguf"},
+ /* n_prompt */ {512},
+ /* n_gen */ {128},
+ /* n_pg */ {},
+ /* n_batch */ {2048},
+ /* n_ubatch */ {512},
+ /* type_k */ {GGML_TYPE_F16},
+ /* type_v */ {GGML_TYPE_F16},
+ /* n_threads */ {cpu_get_num_math()},
+ /* n_gpu_layers */ {99},
+ /* rpc_servers */ {""},
+ /* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
+ /* main_gpu */ {0},
+ /* no_kv_offload */ {false},
+ /* flash_attn */ {false},
+ /* tensor_split */ {std::vector<float>(llama_max_devices(), 0.0f)},
+ /* use_mmap */ {true},
+ /* embeddings */ {false},
+ /* numa */ GGML_NUMA_STRATEGY_DISABLED,
+ /* reps */ 5,
+ /* verbose */ false,
+ /* output_format */ MARKDOWN,
+ /* output_format_stderr */ NONE,
};
static void print_usage(int /* argc */, char ** argv) {
@@ -243,6 +263,7 @@ static void print_usage(int /* argc */, char ** argv) {
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
+ printf(" -oe, --output-err <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
printf("\n");
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
@@ -284,6 +305,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
params.verbose = cmd_params_defaults.verbose;
params.output_format = cmd_params_defaults.output_format;
+ params.output_format_stderr = cmd_params_defaults.output_format_stderr;
params.reps = cmd_params_defaults.reps;
for (int i = 1; i < argc; i++) {
@@ -493,18 +515,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
invalid_param = true;
break;
}
- if (argv[i] == std::string("csv")) {
- params.output_format = CSV;
- } else if (argv[i] == std::string("json")) {
- params.output_format = JSON;
- } else if (argv[i] == std::string("md")) {
- params.output_format = MARKDOWN;
- } else if (argv[i] == std::string("sql")) {
- params.output_format = SQL;
- } else {
+ invalid_param = !output_format_from_str(argv[i], params.output_format);
+ } else if (arg == "-oe" || arg == "--output-err") {
+ if (++i >= argc) {
invalid_param = true;
break;
}
+ invalid_param = !output_format_from_str(argv[i], params.output_format_stderr);
} else if (arg == "-v" || arg == "--verbose") {
params.verbose = true;
} else {
@@ -1278,6 +1295,22 @@ static void llama_null_log_callback(enum ggml_log_level level, const char * text
(void) user_data;
}
+static std::unique_ptr<printer> create_printer(output_formats format) {
+ switch (format) {
+ case NONE:
+ return nullptr;
+ case CSV:
+ return std::unique_ptr<printer>(new csv_printer());
+ case JSON:
+ return std::unique_ptr<printer>(new json_printer());
+ case MARKDOWN:
+ return std::unique_ptr<printer>(new markdown_printer());
+ case SQL:
+ return std::unique_ptr<printer>(new sql_printer());
+ }
+ GGML_ASSERT(false);
+}
+
int main(int argc, char ** argv) {
// try to set locale for unicode characters in markdown
setlocale(LC_CTYPE, ".UTF-8");
@@ -1304,26 +1337,18 @@ int main(int argc, char ** argv) {
llama_numa_init(params.numa);
// initialize printer
- std::unique_ptr<printer> p;
- switch (params.output_format) {
- case CSV:
- p.reset(new csv_printer());
- break;
- case JSON:
- p.reset(new json_printer());
- break;
- case MARKDOWN:
- p.reset(new markdown_printer());
- break;
- case SQL:
- p.reset(new sql_printer());
- break;
- default:
- assert(false);
- exit(1);
+ std::unique_ptr<printer> p = create_printer(params.output_format);
+ std::unique_ptr<printer> p_err = create_printer(params.output_format_stderr);
+
+ if (p) {
+ p->fout = stdout;
+ p->print_header(params);
+ }
+
+ if (p_err) {
+ p_err->fout = stderr;
+ p_err->print_header(params);
}
- p->fout = stdout;
- p->print_header(params);
std::vector<cmd_params_instance> params_instances = get_cmd_params_instances(params);
@@ -1381,7 +1406,15 @@ int main(int argc, char ** argv) {
t.samples_ns.push_back(t_ns);
}
- p->print_test(t);
+ if (p) {
+ p->print_test(t);
+ fflush(p->fout);
+ }
+
+ if (p_err) {
+ p_err->print_test(t);
+ fflush(p_err->fout);
+ }
llama_print_timings(ctx);
@@ -1390,7 +1423,13 @@ int main(int argc, char ** argv) {
llama_free_model(lmodel);
- p->print_footer();
+ if (p) {
+ p->print_footer();
+ }
+
+ if (p_err) {
+ p_err->print_footer();
+ }
llama_backend_free();
diff --git a/scripts/compare-commits.sh b/scripts/compare-commits.sh
index fd0ee88b..a45cd396 100755
--- a/scripts/compare-commits.sh
+++ b/scripts/compare-commits.sh
@@ -10,16 +10,18 @@ set -x
bench_args="${@:3}"
-rm -f llama-bench.sqlite
+rm -f llama-bench.sqlite > /dev/null
# to test a backend, call the script with the corresponding environment variable (e.g. LLAMA_CUDA=1 ./scripts/compare-commits.sh ...)
-git checkout $1
-make clean && make -j32 $make_opts llama-bench
-./llama-bench -o sql $bench_args | tee /dev/tty | sqlite3 llama-bench.sqlite
+git checkout $1 > /dev/null
+make clean > /dev/null
+make -j$(nproc) $make_opts llama-bench > /dev/null
+./llama-bench -o sql -oe md $bench_args | sqlite3 llama-bench.sqlite
-git checkout $2
-make clean && make -j32 $make_opts llama-bench
-./llama-bench -o sql $bench_args | tee /dev/tty | sqlite3 llama-bench.sqlite
+git checkout $2 > /dev/null
+make clean > /dev/null
+make -j$(nproc) $make_opts llama-bench > /dev/null
+./llama-bench -o sql -oe md $bench_args | sqlite3 llama-bench.sqlite
./scripts/compare-llama-bench.py -b $1 -c $2