summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c137
1 files changed, 119 insertions, 18 deletions
diff --git a/ggml.c b/ggml.c
index 394fb459..855d519b 100644
--- a/ggml.c
+++ b/ggml.c
@@ -19394,7 +19394,7 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
////////////////////////////////////////////////////////////////////////////////
struct gguf_str {
- uint32_t n;
+ uint64_t n; // GGUFv2
char * data;
};
@@ -19408,9 +19408,12 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_FLOAT32] = sizeof(float),
[GGUF_TYPE_BOOL] = sizeof(bool),
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
+ [GGUF_TYPE_UINT64] = sizeof(uint64_t),
+ [GGUF_TYPE_INT64] = sizeof(int64_t),
+ [GGUF_TYPE_FLOAT64] = sizeof(double),
[GGUF_TYPE_ARRAY] = 0, // undefined
};
-static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
+static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = "u8",
@@ -19423,8 +19426,11 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_BOOL] = "bool",
[GGUF_TYPE_STRING] = "str",
[GGUF_TYPE_ARRAY] = "arr",
+ [GGUF_TYPE_UINT64] = "u64",
+ [GGUF_TYPE_INT64] = "i64",
+ [GGUF_TYPE_FLOAT64] = "f64",
};
-static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
+static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
union gguf_value {
uint8_t uint8;
@@ -19434,6 +19440,9 @@ union gguf_value {
uint32_t uint32;
int32_t int32;
float float32;
+ uint64_t uint64;
+ int64_t int64;
+ double float64;
bool bool_;
struct gguf_str str;
@@ -19441,7 +19450,7 @@ union gguf_value {
struct {
enum gguf_type type;
- uint32_t n;
+ uint64_t n; // GGUFv2
void * data;
} arr;
};
@@ -19449,8 +19458,6 @@ union gguf_value {
struct gguf_kv {
struct gguf_str key;
- uint32_t n_bytes; // TODO: is this actually needed?
-
enum gguf_type type;
union gguf_value value;
};
@@ -19458,15 +19465,15 @@ struct gguf_kv {
struct gguf_header {
uint32_t magic;
uint32_t version;
- uint32_t n_tensors;
- uint32_t n_kv;
+ uint64_t n_tensors; // GGUFv2
+ uint64_t n_kv; // GGUFv2
};
struct gguf_tensor_info {
struct gguf_str name;
uint32_t n_dims;
- uint32_t ne[GGML_MAX_DIMS];
+ uint64_t ne[GGML_MAX_DIMS];
enum ggml_type type;
@@ -19497,19 +19504,32 @@ static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset)
return n == size;
}
-static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
+// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
+static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) {
p->n = 0;
p->data = NULL;
bool ok = true;
- // TODO: how to avoid mallocs for strings?
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
return ok;
}
+static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) {
+ p->n = 0;
+ p->data = NULL;
+
+ bool ok = true;
+
+ uint32_t n = 0;
+ ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n;
+ ok = ok && gguf_fread_el(file, p->data, p->n, offset);
+
+ return ok;
+}
+
struct gguf_context * gguf_init_empty(void) {
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
@@ -19565,8 +19585,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ctx->data = NULL;
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
- ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
- ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
+
+ if (ctx->header.version == 1) {
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
+ uint32_t n_tensors = 0;
+ uint32_t n_kv = 0;
+
+ ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset);
+ ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset);
+
+ ctx->header.n_tensors = n_tensors;
+ ctx->header.n_kv = n_kv;
+ } else {
+ ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
+ ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
+ }
if (!ok) {
fprintf(stderr, "%s: failed to read header\n", __func__);
@@ -19576,6 +19609,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
}
}
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
+ bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur;
+ if (ctx->header.version == 1) {
+ gguf_fread_str = gguf_fread_str_v1;
+ }
+
// read the kv pairs
{
ctx->kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
@@ -19585,9 +19624,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
- ok = ok && gguf_fread_str(file, &kv->key, &offset);
- //ok = ok && gguf_fread_el (file, &kv->n_bytes, sizeof(kv->n_bytes), &offset);
- ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
+ ok = ok && gguf_fread_str(file, &kv->key, &offset);
+ ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
@@ -19599,12 +19637,23 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
+ case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
+ case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
+ case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
case GGUF_TYPE_ARRAY:
{
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
- ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
+
+ if (ctx->header.version == 1) {
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
+ uint32_t n = 0;
+ ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset);
+ kv->value.arr.n = n;
+ } else {
+ ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
+ }
switch (kv->value.arr.type) {
case GGUF_TYPE_UINT8:
@@ -19614,6 +19663,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
case GGUF_TYPE_UINT32:
case GGUF_TYPE_INT32:
case GGUF_TYPE_FLOAT32:
+ case GGUF_TYPE_UINT64:
+ case GGUF_TYPE_INT64:
+ case GGUF_TYPE_FLOAT64:
case GGUF_TYPE_BOOL:
{
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
@@ -19660,7 +19712,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ok = ok && gguf_fread_str(file, &info->name, &offset);
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
for (uint32_t j = 0; j < info->n_dims; ++j) {
- ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
+ if (ctx->header.version == 1) {
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
+ uint32_t t = 0;
+ ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset);
+ info->ne[j] = t;
+ } else {
+ ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
+ }
}
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
@@ -19954,6 +20013,18 @@ float gguf_get_val_f32(struct gguf_context * ctx, int i) {
return ctx->kv[i].value.float32;
}
+uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
+ return ctx->kv[i].value.uint64;
+}
+
+int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
+ return ctx->kv[i].value.int64;
+}
+
+double gguf_get_val_f64(struct gguf_context * ctx, int i) {
+ return ctx->kv[i].value.float64;
+}
+
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
return ctx->kv[i].value.bool_;
}
@@ -20056,6 +20127,27 @@ void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
ctx->kv[idx].value.float32 = val;
}
+void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
+ const int idx = gguf_get_or_add_key(ctx, key);
+
+ ctx->kv[idx].type = GGUF_TYPE_UINT64;
+ ctx->kv[idx].value.uint64 = val;
+}
+
+void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
+ const int idx = gguf_get_or_add_key(ctx, key);
+
+ ctx->kv[idx].type = GGUF_TYPE_INT64;
+ ctx->kv[idx].value.int64 = val;
+}
+
+void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
+ const int idx = gguf_get_or_add_key(ctx, key);
+
+ ctx->kv[idx].type = GGUF_TYPE_FLOAT64;
+ ctx->kv[idx].value.float64 = val;
+}
+
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
const int idx = gguf_get_or_add_key(ctx, key);
@@ -20106,6 +20198,9 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
+ case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
+ case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
+ case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
case GGUF_TYPE_ARRAY:
@@ -20267,6 +20362,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
+ case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
+ case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
+ case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
case GGUF_TYPE_ARRAY:
@@ -20282,6 +20380,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
case GGUF_TYPE_UINT32:
case GGUF_TYPE_INT32:
case GGUF_TYPE_FLOAT32:
+ case GGUF_TYPE_UINT64:
+ case GGUF_TYPE_INT64:
+ case GGUF_TYPE_FLOAT64:
case GGUF_TYPE_BOOL:
{
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);