diff options
author | pudepiedj <pudepiedj@gmail.com> | 2023-10-06 14:16:38 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-06 16:16:38 +0300 |
commit | a8777ad84e00cda0399e827cdf971e2c3fab1da2 (patch) | |
tree | 03be5a38a4db7507579cb7d13ed9428834c12f87 /examples/parallel/parallel.cpp | |
parent | 97af49fa395df77e4c18af0e1655b2fee67c9686 (diff) |
parallel : add option to load external prompt file (#3416)
* Enable external file and add datestamp
* Add name of external file at end
* Upload ToK2024
* Delete ToK2024.txt
* Experiments with jeopardy
* Move ParallelQuestions to /proimpts and rename
* Interim commit
* Interim commit
* Final revision
* Remove trailing whitespace
* remove cmake_all.sh
* Remove cmake_all.sh
* Changed .gitignore
* Improved reporting and new question files.
* Corrected typo
* More LLM questions
* Update LLM-questions.txt
* Yet more LLM-questions
* Remove jeopardy results file
* Reinstate original jeopardy.sh
* Update examples/parallel/parallel.cpp
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/parallel/parallel.cpp')
-rw-r--r-- | examples/parallel/parallel.cpp | 56 |
1 files changed, 51 insertions, 5 deletions
diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index ffd7b1db..721888da 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -10,6 +10,7 @@ #include <cstdio> #include <string> #include <vector> +#include <ctime> // trim whitespace from the beginning and end of a string static std::string trim(const std::string & str) { @@ -70,6 +71,26 @@ struct client { std::vector<llama_token> tokens_prev; }; +static void print_date_time() { + std::time_t current_time = std::time(nullptr); + std::tm* local_time = std::localtime(¤t_time); + char buffer[80]; + strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", local_time); + + printf("\n\033[35mrun parameters as at %s\033[0m\n", buffer); +} + +// Define a split string function to ... +static std::vector<std::string> split_string(const std::string& input, char delimiter) { + std::vector<std::string> tokens; + std::istringstream stream(input); + std::string token; + while (std::getline(stream, token, delimiter)) { + tokens.push_back(token); + } + return tokens; +} + int main(int argc, char ** argv) { srand(1234); @@ -104,6 +125,23 @@ int main(int argc, char ** argv) { params.logits_all = true; std::tie(model, ctx) = llama_init_from_gpt_params(params); + // load the prompts from an external file if there are any + if (params.prompt.empty()) { + printf("\n\033[32mNo new questions so proceed with build-in defaults.\033[0m\n"); + } else { + // Output each line of the input params.prompts vector and copy to k_prompts + int index = 0; + printf("\n\033[32mNow printing the external prompt file %s\033[0m\n\n", params.prompt_file.c_str()); + + std::vector<std::string> prompts = split_string(params.prompt, '\n'); + for (const auto& prompt : prompts) { + k_prompts.resize(index + 1); + k_prompts[index] = prompt; + index++; + printf("%3d prompt: %s\n", index, prompt.c_str()); + } + } + fprintf(stderr, "\n\n"); fflush(stderr); @@ -233,7 +271,7 @@ int main(int argc, char ** argv) { client.n_decoded = 0; client.i_batch = batch.n_tokens - 1; - LOG_TEE("\033[1mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id); + LOG_TEE("\033[31mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id); g_seq_id += 1; @@ -336,8 +374,8 @@ int main(int argc, char ** argv) { const auto t_main_end = ggml_time_us(); - LOG_TEE("\033[1mClient %3d, seq %4d, prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033[0m \n\nInput: %s\nResponse: %s\n\n", - client.id, client.seq_id, client.n_prompt, client.n_decoded, + LOG_TEE("\033[31mClient %3d, seq %3d/%3d, prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033[0m \nInput: %s\n\033[35mResponse: %s\033[0m\n\n", + client.id, client.seq_id, n_seq, client.n_prompt, client.n_decoded, (t_main_end - client.t_start_prompt) / 1e6, (double) (client.n_prompt + client.n_decoded) / (t_main_end - client.t_start_prompt) * 1e6, n_cache_miss, @@ -357,13 +395,21 @@ int main(int argc, char ** argv) { const auto t_main_end = ggml_time_us(); - LOG_TEE("\n\n"); + print_date_time(); + + LOG_TEE("\n%s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n", __func__, n_clients, n_seq, cont_batching, n_tokens_system); + if (params.prompt_file.empty()) { + params.prompt_file = "used built-in defaults"; + } + LOG_TEE("External prompt file: \033[32m%s\033[0m\n", params.prompt_file.c_str()); + LOG_TEE("Model and path used: \033[32m%s\033[0m\n\n", params.model.c_str()); + LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6); LOG_TEE("Total gen tokens: %6d, speed: %5.2f t/s\n", n_total_gen, (double) (n_total_gen ) / (t_main_end - t_main_start) * 1e6); LOG_TEE("Total speed (AVG): %6s speed: %5.2f t/s\n", "", (double) (n_total_prompt + n_total_gen) / (t_main_end - t_main_start) * 1e6); LOG_TEE("Cache misses: %6d\n", n_cache_miss); - LOG_TEE("\n\n"); + LOG_TEE("\n"); llama_print_timings(ctx); |