summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp25
1 files changed, 25 insertions, 0 deletions
diff --git a/llama.cpp b/llama.cpp
index c7a33364..d2a52bb0 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1036,6 +1036,12 @@ static void llama_model_load_internal(
case 40: model.type = e_model::MODEL_13B; break;
case 60: model.type = e_model::MODEL_30B; break;
case 80: model.type = e_model::MODEL_65B; break;
+ default:
+ {
+ if (hparams.n_layer < 32) {
+ model.type = e_model::MODEL_7B;
+ }
+ } break;
}
hparams.n_ctx = n_ctx;
@@ -1200,6 +1206,7 @@ static void llama_model_load_internal(
mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
(void) vram_scratch;
+ (void) n_batch;
#ifdef GGML_USE_CUBLAS
vram_scratch = n_batch * MB;
ggml_cuda_set_scratch_size(vram_scratch);
@@ -1227,6 +1234,7 @@ static void llama_model_load_internal(
model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor);
}
+ (void) tensor_split;
#if defined(GGML_USE_CUBLAS)
{
ggml_cuda_set_tensor_split(tensor_split);
@@ -2161,6 +2169,10 @@ llama_token llama_sample_token_mirostat_v2(struct llama_context * ctx, llama_tok
return -log2f(candidate.p) > *mu;
}));
+ if (candidates->size == 0) {
+ candidates->size = 1;
+ }
+
// Normalize the probabilities of the remaining words
llama_sample_softmax(ctx, candidates);
@@ -3287,6 +3299,19 @@ int llama_n_embd(const struct llama_context * ctx) {
return ctx->model.hparams.n_embd;
}
+int llama_get_vocab(
+ const struct llama_context * ctx,
+ const char * * strings,
+ float * scores,
+ int capacity) {
+ int n = std::min(capacity, (int) ctx->vocab.id_to_token.size());
+ for (int i = 0; i<n; ++i) {
+ strings[i] = ctx->vocab.id_to_token[i].tok.c_str();
+ scores[i] = ctx->vocab.id_to_token[i].score;
+ }
+ return n;
+}
+
float * llama_get_logits(struct llama_context * ctx) {
return ctx->logits.data();
}