summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp131
1 files changed, 124 insertions, 7 deletions
diff --git a/llama.cpp b/llama.cpp
index 8500b20c..3cc3fc9f 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -604,6 +604,60 @@ static int8_t llama_rope_scaling_type_from_string(const std::string & name) {
return LLAMA_ROPE_SCALING_UNSPECIFIED;
}
+static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
+ switch (type) {
+ case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
+ case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
+ case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
+ case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
+ case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
+ case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
+ case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
+ case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
+ case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
+ case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
+ case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
+ default: return format("unknown type %d", type);
+ }
+}
+
+static std::string gguf_kv_to_str(struct gguf_context * ctx_gguf, int i) {
+ const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
+
+ switch (type) {
+ case GGUF_TYPE_STRING:
+ return gguf_get_val_str(ctx_gguf, i);
+ case GGUF_TYPE_ARRAY:
+ {
+ const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
+ int arr_n = gguf_get_arr_n(ctx_gguf, i);
+ const void * data = gguf_get_arr_data(ctx_gguf, i);
+ std::stringstream ss;
+ ss << "[";
+ for (int j = 0; j < arr_n; j++) {
+ if (arr_type == GGUF_TYPE_STRING) {
+ std::string val = gguf_get_arr_str(ctx_gguf, i, j);
+ // escape quotes
+ replace_all(val, "\\", "\\\\");
+ replace_all(val, "\"", "\\\"");
+ ss << '"' << val << '"';
+ } else if (arr_type == GGUF_TYPE_ARRAY) {
+ ss << "???";
+ } else {
+ ss << gguf_data_to_str(arr_type, data, j);
+ }
+ if (j < arr_n - 1) {
+ ss << ", ";
+ }
+ }
+ ss << "]";
+ return ss.str();
+ }
+ default:
+ return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
+ }
+}
+
//
// ggml helpers
//
@@ -1327,6 +1381,9 @@ struct llama_model {
int n_gpu_layers;
+ // gguf metadata
+ std::unordered_map<std::string, std::string> gguf_kv;
+
// context
struct ggml_context * ctx = NULL;
@@ -1785,10 +1842,10 @@ struct llama_model_loader {
case GGML_TYPE_Q5_K: ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M; break;
case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break;
default:
- {
- LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max));
- ftype = LLAMA_FTYPE_ALL_F32;
- } break;
+ {
+ LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max));
+ ftype = LLAMA_FTYPE_ALL_F32;
+ } break;
}
// this is a way to mark that we have "guessed" the file type
@@ -1802,10 +1859,20 @@ struct llama_model_loader {
}
for (int i = 0; i < n_kv; i++) {
- const char * name = gguf_get_key(ctx_gguf, i);
- const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
+ const char * name = gguf_get_key(ctx_gguf, i);
+ const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
+ const std::string type_name =
+ type == GGUF_TYPE_ARRAY
+ ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx_gguf, i)), gguf_get_arr_n(ctx_gguf, i))
+ : gguf_type_name(type);
+
+ std::string value = gguf_kv_to_str(ctx_gguf, i);
+ const size_t MAX_VALUE_LEN = 40;
+ if (value.size() > MAX_VALUE_LEN) {
+ value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());
+ }
- LLAMA_LOG_INFO("%s: - kv %3d: %42s %-8s\n", __func__, i, name, gguf_type_name(type));
+ LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str());
}
// print type counts
@@ -2100,6 +2167,17 @@ static void llm_load_hparams(
auto & hparams = model.hparams;
+ // get metadata as string
+ for (int i = 0; i < gguf_get_n_kv(ctx); i++) {
+ enum gguf_type type = gguf_get_kv_type(ctx, i);
+ if (type == GGUF_TYPE_ARRAY) {
+ continue;
+ }
+ const char * name = gguf_get_key(ctx, i);
+ const std::string value = gguf_kv_to_str(ctx, i);
+ model.gguf_kv.emplace(name, value);
+ }
+
// get general kv
GGUF_GET_KEY(ctx, model.name, gguf_get_val_str, GGUF_TYPE_STRING, false, kv(LLM_KV_GENERAL_NAME));
@@ -8671,6 +8749,45 @@ float llama_rope_freq_scale_train(const struct llama_model * model) {
return model->hparams.rope_freq_scale_train;
}
+int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
+ const auto & it = model->gguf_kv.find(key);
+ if (it == model->gguf_kv.end()) {
+ if (buf_size > 0) {
+ buf[0] = '\0';
+ }
+ return -1;
+ }
+ return snprintf(buf, buf_size, "%s", it->second.c_str());
+}
+
+int llama_model_meta_count(const struct llama_model * model) {
+ return (int)model->gguf_kv.size();
+}
+
+int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
+ if (i < 0 || i >= (int)model->gguf_kv.size()) {
+ if (buf_size > 0) {
+ buf[0] = '\0';
+ }
+ return -1;
+ }
+ auto it = model->gguf_kv.begin();
+ std::advance(it, i);
+ return snprintf(buf, buf_size, "%s", it->first.c_str());
+}
+
+int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
+ if (i < 0 || i >= (int)model->gguf_kv.size()) {
+ if (buf_size > 0) {
+ buf[0] = '\0';
+ }
+ return -1;
+ }
+ auto it = model->gguf_kv.begin();
+ std::advance(it, i);
+ return snprintf(buf, buf_size, "%s", it->second.c_str());
+}
+
int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
return snprintf(buf, buf_size, "%s %s %s",
llama_model_arch_name(model->arch).c_str(),