diff options
Diffstat (limited to 'common/common.h')
-rw-r--r-- | common/common.h | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/common/common.h b/common/common.h index 64601f99..0e2d3fa6 100644 --- a/common/common.h +++ b/common/common.h @@ -36,6 +36,7 @@ int32_t get_num_physical_cores(); struct gpt_params { uint32_t seed = -1; // RNG seed int32_t n_threads = get_num_physical_cores(); + int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads) int32_t n_predict = -1; // new tokens to predict int32_t n_ctx = 512; // context size int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) @@ -95,7 +96,6 @@ struct gpt_params { bool hellaswag = false; // compute HellaSwag score over random tasks from datafile supplied in prompt size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score - bool low_vram = false; // if true, reduce VRAM usage at the cost of performance bool mul_mat_q = true; // if true, use mul_mat_q kernels instead of cuBLAS bool memory_f16 = true; // use f16 instead of f32 for memory kv bool random_prompt = false; // do not randomize prompt if none provided @@ -126,6 +126,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params); void gpt_print_usage(int argc, char ** argv, const gpt_params & params); +std::string get_system_info(const gpt_params & params); + std::string gpt_random_prompt(std::mt19937 & rng); void process_escapes(std::string& input); @@ -135,6 +137,7 @@ void process_escapes(std::string& input); // std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params); +struct llama_model_params llama_model_params_from_gpt_params(const gpt_params & params); struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params); // @@ -144,7 +147,12 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param // tokenizes a string into a vector of tokens // should work similar to Python's `tokenizer.encode` std::vector<llama_token> llama_tokenize( - struct llama_context * ctx, + const struct llama_context * ctx, + const std::string & text, + bool add_bos); + +std::vector<llama_token> llama_tokenize( + const struct llama_model * model, const std::string & text, bool add_bos); |