summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorpudepiedj <pudepiedj@gmail.com>2023-10-06 14:16:38 +0100
committerGitHub <noreply@github.com>2023-10-06 16:16:38 +0300
commita8777ad84e00cda0399e827cdf971e2c3fab1da2 (patch)
tree03be5a38a4db7507579cb7d13ed9428834c12f87 /llama.cpp
parent97af49fa395df77e4c18af0e1655b2fee67c9686 (diff)
parallel : add option to load external prompt file (#3416)
* Enable external file and add datestamp * Add name of external file at end * Upload ToK2024 * Delete ToK2024.txt * Experiments with jeopardy * Move ParallelQuestions to /proimpts and rename * Interim commit * Interim commit * Final revision * Remove trailing whitespace * remove cmake_all.sh * Remove cmake_all.sh * Changed .gitignore * Improved reporting and new question files. * Corrected typo * More LLM questions * Update LLM-questions.txt * Yet more LLM-questions * Remove jeopardy results file * Reinstate original jeopardy.sh * Update examples/parallel/parallel.cpp --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp10
1 files changed, 5 insertions, 5 deletions
diff --git a/llama.cpp b/llama.cpp
index 56413f3a..1a7d37b8 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -8219,14 +8219,14 @@ void llama_print_timings(struct llama_context * ctx) {
const llama_timings timings = llama_get_timings(ctx);
LLAMA_LOG_INFO("\n");
- LLAMA_LOG_INFO("%s: load time = %8.2f ms\n", __func__, timings.t_load_ms);
- LLAMA_LOG_INFO("%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
+ LLAMA_LOG_INFO("%s: load time = %10.2f ms\n", __func__, timings.t_load_ms);
+ LLAMA_LOG_INFO("%s: sample time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample);
- LLAMA_LOG_INFO("%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
+ LLAMA_LOG_INFO("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval);
- LLAMA_LOG_INFO("%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
+ LLAMA_LOG_INFO("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval);
- LLAMA_LOG_INFO("%s: total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
+ LLAMA_LOG_INFO("%s: total time = %10.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
}
void llama_reset_timings(struct llama_context * ctx) {