diff options
Diffstat (limited to 'examples')
-rw-r--r-- | examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp | 8 | ||||
-rw-r--r-- | examples/embd-input/embd-input-lib.cpp | 2 | ||||
-rw-r--r-- | examples/embedding/embedding.cpp | 2 | ||||
-rw-r--r-- | examples/gptneox-wip/falcon-main.cpp | 2 | ||||
-rw-r--r-- | examples/gptneox-wip/gptneox-main.cpp | 2 | ||||
-rw-r--r-- | examples/main/main.cpp | 19 | ||||
-rw-r--r-- | examples/perplexity/perplexity.cpp | 2 | ||||
-rw-r--r-- | examples/quantize-stats/quantize-stats.cpp | 2 | ||||
-rw-r--r-- | examples/quantize/quantize.cpp | 7 | ||||
-rw-r--r-- | examples/save-load-state/save-load-state.cpp | 4 | ||||
-rw-r--r-- | examples/server/server.cpp | 8 | ||||
-rw-r--r-- | examples/train-text-from-scratch/train-text-from-scratch.cpp | 46 |
12 files changed, 39 insertions, 65 deletions
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 9e856c21..293b455d 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -1,5 +1,6 @@ #include "ggml.h" #include "llama.h" +#include "common.h" #include <unordered_map> #include <vector> @@ -499,10 +500,10 @@ struct llama_file { errno = 0; std::size_t ret = std::fread(ptr, size, 1, fp); if (ferror(fp)) { - throw std::runtime_error(format("read error: %s", strerror(errno))); + die_fmt("fread failed: %s", strerror(errno)); } if (ret != 1) { - throw std::runtime_error(std::string("unexpectedly reached end of file")); + die("unexpectedly reached end of file"); } } @@ -597,8 +598,7 @@ void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) printf("Assuming llama2.c vocabulary since %s is not a gguf file\n", filename); llama_file file(filename, "rb"); if (!file.fp) { - fprintf(stderr, "error: %s: %s\n", strerror(errno), filename); - exit(1); + die_fmt("%s: %s", strerror(errno), filename); } const int n_vocab = config->vocab_size; /* uint32_t max_token_length = */ file.read_u32(); // unused diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp index 036bdb39..87aac347 100644 --- a/examples/embd-input/embd-input-lib.cpp +++ b/examples/embd-input/embd-input-lib.cpp @@ -23,7 +23,7 @@ extern "C" { struct MyModel* create_mymodel(int argc, char ** argv) { gpt_params params; - if (gpt_params_parse(argc, argv, params) == false) { + if (!gpt_params_parse(argc, argv, params)) { return nullptr; } diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 93d583b5..49ab3e06 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -11,7 +11,7 @@ int main(int argc, char ** argv) { gpt_params params; - if (gpt_params_parse(argc, argv, params) == false) { + if (!gpt_params_parse(argc, argv, params)) { return 1; } diff --git a/examples/gptneox-wip/falcon-main.cpp b/examples/gptneox-wip/falcon-main.cpp index d4b130b2..7f9a1620 100644 --- a/examples/gptneox-wip/falcon-main.cpp +++ b/examples/gptneox-wip/falcon-main.cpp @@ -953,7 +953,7 @@ int main(int argc, char ** argv) { gpt_params params; - if (gpt_params_parse(argc, argv, params) == false) { + if (!gpt_params_parse(argc, argv, params)) { return 1; } diff --git a/examples/gptneox-wip/gptneox-main.cpp b/examples/gptneox-wip/gptneox-main.cpp index b6cc46c5..55eba0cd 100644 --- a/examples/gptneox-wip/gptneox-main.cpp +++ b/examples/gptneox-wip/gptneox-main.cpp @@ -925,7 +925,7 @@ int main(int argc, char ** argv) { gpt_params params; - if (gpt_params_parse(argc, argv, params) == false) { + if (!gpt_params_parse(argc, argv, params)) { return 1; } diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 9201b53b..c9ca7719 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -48,8 +48,9 @@ static bool is_interacting = false; void write_logfile( const llama_context * ctx, const gpt_params & params, const llama_model * model, - const std::vector<llama_token> input_tokens, const std::string output, const std::vector<llama_token> output_tokens) { - + const std::vector<llama_token> & input_tokens, const std::string & output, + const std::vector<llama_token> & output_tokens +) { if (params.logdir.empty()) { return; } @@ -109,7 +110,7 @@ int main(int argc, char ** argv) { gpt_params params; g_params = ¶ms; - if (gpt_params_parse(argc, argv, params) == false) { + if (!gpt_params_parse(argc, argv, params)) { return 1; } @@ -303,7 +304,7 @@ int main(int argc, char ** argv) { // debug message about similarity of saved session, if applicable size_t n_matching_session_tokens = 0; - if (session_tokens.size() > 0) { + if (!session_tokens.empty()) { for (llama_token id : session_tokens) { if (n_matching_session_tokens >= embd_inp.size() || id != embd_inp[n_matching_session_tokens]) { break; @@ -401,7 +402,7 @@ int main(int argc, char ** argv) { LOG_TEE("%s: interactive mode on.\n", __func__); - if (params.antiprompt.size()) { + if (!params.antiprompt.empty()) { for (const auto & antiprompt : params.antiprompt) { LOG_TEE("Reverse prompt: '%s'\n", antiprompt.c_str()); } @@ -499,7 +500,7 @@ int main(int argc, char ** argv) { while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict - if (embd.size() > 0) { + if (!embd.empty()) { // Note: n_ctx - 4 here is to match the logic for commandline prompt handling via // --prompt or --file which uses the same value. int max_embd_size = n_ctx - 4; @@ -624,7 +625,7 @@ int main(int argc, char ** argv) { LOG("n_past = %d\n", n_past); } - if (embd.size() > 0 && !path_session.empty()) { + if (!embd.empty() && !path_session.empty()) { session_tokens.insert(session_tokens.end(), embd.begin(), embd.end()); n_session_consumed = session_tokens.size(); } @@ -695,7 +696,7 @@ int main(int argc, char ** argv) { // if not currently processing queued inputs; if ((int) embd_inp.size() <= n_consumed) { // check for reverse prompt - if (params.antiprompt.size()) { + if (!params.antiprompt.empty()) { std::string last_output; for (auto id : last_tokens) { last_output += llama_token_to_piece(ctx, id); @@ -732,7 +733,7 @@ int main(int argc, char ** argv) { LOG("found EOS token\n"); if (params.interactive) { - if (params.antiprompt.size() != 0) { + if (!params.antiprompt.empty()) { // tokenize and inject first reverse prompt const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 843b2ae3..1b760683 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -655,7 +655,7 @@ int main(int argc, char ** argv) { gpt_params params; params.n_batch = 512; - if (gpt_params_parse(argc, argv, params) == false) { + if (!gpt_params_parse(argc, argv, params)) { return 1; } diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 06ce18f0..6ce03ba7 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -71,7 +71,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) { } // Check if a layer is included/excluded by command line -bool layer_included(const quantize_stats_params params, const std::string & layer) { +bool layer_included(const quantize_stats_params & params, const std::string & layer) { for (const auto& excluded : params.exclude_layers) { if (std::regex_search(layer, std::regex(excluded))) { return false; diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index c174be06..1bf18248 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -143,10 +143,9 @@ int main(int argc, char ** argv) { if (!try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) { fprintf(stderr, "%s: invalid ftype '%s'\n", __func__, argv[3]); return 1; - } else { - if (ftype_str == "COPY") { - params.only_copy = true; - } + } + if (ftype_str == "COPY") { + params.only_copy = true; } arg_idx++; } diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 573bc4ef..14e9501c 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -13,7 +13,7 @@ int main(int argc, char ** argv) { params.repeat_last_n = 64; params.prompt = "The quick brown fox"; - if (gpt_params_parse(argc, argv, params) == false) { + if (!gpt_params_parse(argc, argv, params)) { return 1; } @@ -44,7 +44,7 @@ int main(int argc, char ** argv) { llama_free_model(model); return 1; } - auto tokens = llama_tokenize(ctx, params.prompt.c_str(), true); + auto tokens = llama_tokenize(ctx, params.prompt, true); auto n_prompt_tokens = tokens.size(); if (n_prompt_tokens < 1) { fprintf(stderr, "%s : failed to tokenize prompt\n", __func__); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 6b606447..3f3c6465 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -139,7 +139,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c } // convert a vector of completion_token_output to json -static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> probs) +static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> & probs) { json out = json::array(); for (const auto &prob : probs) @@ -271,7 +271,7 @@ struct llama_server_context return true; } - std::vector<llama_token> tokenize(json json_prompt, bool add_bos) + std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const { // If `add_bos` is true, we only add BOS, when json_prompt is a string, // or the first element of the json_prompt array is a string. @@ -611,7 +611,7 @@ struct llama_server_context completion_token_output doCompletion() { - const completion_token_output token_with_probs = nextToken(); + auto token_with_probs = nextToken(); const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(ctx, token_with_probs.tok); generated_text += token_text; @@ -1255,7 +1255,7 @@ void beam_search_callback(void * callback_data, llama_beams_state beams_state) { struct token_translator { llama_context * ctx; std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); } - std::string operator()(completion_token_output cto) const { return (*this)(cto.tok); } + std::string operator()(const completion_token_output & cto) const { return (*this)(cto.tok); } }; void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) { diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 6fe85d41..947aa7ed 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -169,10 +169,6 @@ struct my_llama_hparams { float rope_freq_base = 10000.0f; float rope_freq_scale = 1.0f; - - bool operator!=(const my_llama_hparams& other) const { - return memcmp(this, &other, sizeof(my_llama_hparams)); - } }; struct my_llama_layer { @@ -929,28 +925,6 @@ void get_example_targets_batch(struct llama_context * lctx, const int * train_sa } } - -#ifdef __GNUC__ -#ifdef __MINGW32__ -__attribute__((format(gnu_printf, 1, 2))) -#else -__attribute__((format(printf, 1, 2))) -#endif -#endif -static std::string format(const char * fmt, ...) { - va_list ap, ap2; - va_start(ap, fmt); - va_copy(ap2, ap); - int size = vsnprintf(NULL, 0, fmt, ap); - GGML_ASSERT(size >= 0 && size < INT_MAX); - std::vector<char> buf(size + 1); - int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); - GGML_ASSERT(size2 == size); - va_end(ap2); - va_end(ap); - return std::string(buf.data(), size); -} - int tokenize_file(struct llama_context * lctx, const char * filename, std::vector<llama_token>& out) { FILE * fp = std::fopen(filename, "rb"); if (fp == NULL) { @@ -983,10 +957,10 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto out.resize(size+1); if (std::fread(buf.data(), size, 1, fp) != 1) { - throw std::runtime_error(std::string("unexpectedly reached end of file")); + die("unexpectedly reached end of file"); } if (ferror(fp)) { - throw std::runtime_error(format("read error: %s", strerror(errno))); + die_fmt("fread failed: %s", strerror(errno)); } buf[size] = '\0'; @@ -1047,11 +1021,11 @@ void shuffle_ints(int * begin, int * end) { if (kid >= 0) { \ enum gguf_type ktype = gguf_get_kv_type(ctx, kid); \ if (ktype != (type)) { \ - throw std::runtime_error(format("key %s has wrong type: %s", skey.c_str(), gguf_type_name(ktype))); \ + die_fmt("key %s has wrong type: %s", skey.c_str(), gguf_type_name(ktype)); \ } \ (dst) = func(ctx, kid); \ } else if (req) { \ - throw std::runtime_error(format("key not found in model: %s", skey.c_str())); \ + die_fmt("key not found in model: %s", skey.c_str()); \ } \ } @@ -1136,7 +1110,7 @@ void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_g read_tensor_by_name(opt->lbfgs.lms, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_S); read_tensor_by_name(opt->lbfgs.lmy, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_Y); } else { - throw std::runtime_error("unknown optimizer type\n"); + die("unknown optimizer type"); } } @@ -1315,20 +1289,20 @@ void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_mod const int token_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_LIST)); if (token_idx == -1) { - throw std::runtime_error("cannot find tokenizer vocab in model file\n"); + die("cannot find tokenizer vocab in model file"); } const uint32_t n_vocab = gguf_get_arr_n(vctx, token_idx); const int score_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_SCORES)); if (score_idx == -1) { - throw std::runtime_error("cannot find tokenizer scores in model file\n"); + die("cannot find tokenizer scores in model file"); } const float * scores = (const float * ) gguf_get_arr_data(vctx, score_idx); const int toktype_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE)); if (toktype_idx == -1) { - throw std::runtime_error("cannot find token type list in GGUF file\n"); + die("cannot find token type list in GGUF file"); } const int * toktypes = (const int * ) gguf_get_arr_data(vctx, toktype_idx); @@ -1356,7 +1330,7 @@ void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_mod // read and copy bpe merges const int merges_keyidx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_MERGES)); if (merges_keyidx == -1) { - throw std::runtime_error("cannot find tokenizer merges in model file\n"); + die("cannot find tokenizer merges in model file"); } const int n_merges = gguf_get_arr_n(vctx, merges_keyidx); @@ -1988,7 +1962,7 @@ void opt_callback(void * vdata, float * sched) { float min_sched = params->adam_min_alpha / params->adam_alpha; *sched = min_sched + *sched * (1.0f - min_sched); - int impr_plot = std::isnan(opt->loss_after) ? 0 : -(int)(1 + (opt->loss_before - opt->loss_after) * 10.0f + 0.5f); + int impr_plot = std::isnan(opt->loss_after) ? 0 : -std::lround(1 + (opt->loss_before - opt->loss_after) * 10.0f); printf("%s: iter=%*d, sched=%f loss0=%f loss=%f | improvement: %*d>\n", __func__, 6, opt->iter, *sched, opt->loss_before, opt->loss_after, impr_plot, (int)0); if (data->shuffle_countdown < n_batch) { |