summaryrefslogtreecommitdiff
path: root/examples/embd-input/embd-input-lib.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'examples/embd-input/embd-input-lib.cpp')
-rw-r--r--examples/embd-input/embd-input-lib.cpp13
1 files changed, 6 insertions, 7 deletions
diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp
index 9bd4d347..99e6bdad 100644
--- a/examples/embd-input/embd-input-lib.cpp
+++ b/examples/embd-input/embd-input-lib.cpp
@@ -48,8 +48,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) {
// print system information
{
fprintf(stderr, "\n");
- fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
- params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
+ fprintf(stderr, "%s\n", get_system_info(params).c_str());
}
struct MyModel * ret = new MyModel();
ret->ctx = ctx;
@@ -71,7 +70,7 @@ bool eval_float(void * model, float * input, int N){
MyModel * mymodel = (MyModel*)model;
llama_context * ctx = mymodel->ctx;
gpt_params params = mymodel->params;
- int n_emb = llama_n_embd(ctx);
+ int n_emb = llama_n_embd(llama_get_model(ctx));
int n_past = mymodel->n_past;
int n_batch = N; // params.n_batch;
@@ -81,7 +80,7 @@ bool eval_float(void * model, float * input, int N){
n_eval = n_batch;
}
llama_batch batch = { int32_t(n_eval), nullptr, (input+i*n_emb), nullptr, nullptr, nullptr, n_past, 1, 0, };
- if (llama_decode(ctx, batch, params.n_threads)) {
+ if (llama_decode(ctx, batch)) {
fprintf(stderr, "%s : failed to eval\n", __func__);
return false;
}
@@ -102,7 +101,7 @@ bool eval_tokens(void * model, std::vector<llama_token> tokens) {
if (n_eval > params.n_batch) {
n_eval = params.n_batch;
}
- if (llama_decode(ctx, llama_batch_get_one(&tokens[i], n_eval, n_past, 0), params.n_threads)) {
+ if (llama_decode(ctx, llama_batch_get_one(&tokens[i], n_eval, n_past, 0))) {
fprintf(stderr, "%s : failed to eval\n", __func__);
return false;
}
@@ -133,7 +132,7 @@ llama_token sampling_id(struct MyModel* mymodel) {
// out of user input, sample next token
const float temp = params.temp;
- const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(ctx) : params.top_k;
+ const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(llama_get_model(ctx)) : params.top_k;
const float top_p = params.top_p;
const float tfs_z = params.tfs_z;
const float typical_p = params.typical_p;
@@ -149,7 +148,7 @@ llama_token sampling_id(struct MyModel* mymodel) {
llama_token id = 0;
{
auto logits = llama_get_logits(ctx);
- auto n_vocab = llama_n_vocab(ctx);
+ auto n_vocab = llama_n_vocab(llama_get_model(ctx));
// Apply params.logit_bias map
for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) {