summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ggml.c17
-rw-r--r--ggml.h2
-rw-r--r--gguf-py/gguf/constants.py4
-rw-r--r--gguf-py/gguf/gguf_reader.py12
-rw-r--r--gguf-py/gguf/gguf_writer.py12
5 files changed, 40 insertions, 7 deletions
diff --git a/ggml.c b/ggml.c
index fbc66f65..c94006e5 100644
--- a/ggml.c
+++ b/ggml.c
@@ -470,6 +470,19 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.type_size = sizeof(int32_t),
.is_quantized = false,
},
+ [GGML_TYPE_I64] = {
+ .type_name = "i64",
+ .blck_size = 1,
+ .type_size = sizeof(int64_t),
+ .is_quantized = false,
+ },
+ [GGML_TYPE_F64] = {
+ .type_name = "f64",
+ .blck_size = 1,
+ .type_size = sizeof(double),
+ .is_quantized = false,
+ .nrows = 1,
+ },
[GGML_TYPE_F32] = {
.type_name = "f32",
.blck_size = 1,
@@ -12418,6 +12431,8 @@ static void ggml_compute_forward_alibi(
case GGML_TYPE_I8:
case GGML_TYPE_I16:
case GGML_TYPE_I32:
+ case GGML_TYPE_I64:
+ case GGML_TYPE_F64:
case GGML_TYPE_COUNT:
{
GGML_ASSERT(false);
@@ -12504,6 +12519,8 @@ static void ggml_compute_forward_clamp(
case GGML_TYPE_I8:
case GGML_TYPE_I16:
case GGML_TYPE_I32:
+ case GGML_TYPE_I64:
+ case GGML_TYPE_F64:
case GGML_TYPE_COUNT:
{
GGML_ASSERT(false);
diff --git a/ggml.h b/ggml.h
index ab26c8f5..c937d4a5 100644
--- a/ggml.h
+++ b/ggml.h
@@ -366,6 +366,8 @@ extern "C" {
GGML_TYPE_I8 = 24,
GGML_TYPE_I16 = 25,
GGML_TYPE_I32 = 26,
+ GGML_TYPE_I64 = 27,
+ GGML_TYPE_F64 = 28,
GGML_TYPE_COUNT,
};
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 2d7cf16c..458a641d 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -665,6 +665,8 @@ class GGMLQuantizationType(IntEnum):
I8 = 24
I16 = 25
I32 = 26
+ I64 = 27
+ F64 = 28
class GGUFEndian(IntEnum):
@@ -734,6 +736,8 @@ GGML_QUANT_SIZES = {
GGMLQuantizationType.I8: (1, 1),
GGMLQuantizationType.I16: (1, 2),
GGMLQuantizationType.I32: (1, 4),
+ GGMLQuantizationType.I64: (1, 8),
+ GGMLQuantizationType.F64: (1, 8),
}
diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py
index 1c10f575..33afac55 100644
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -242,12 +242,15 @@ class GGUFReader:
n_bytes = n_elems * type_size // block_size
data_offs = int(start_offs + offset_tensor[0])
item_type: npt.DTypeLike
- if ggml_type == GGMLQuantizationType.F32:
+ if ggml_type == GGMLQuantizationType.F16:
+ item_count = n_elems
+ item_type = np.float16
+ elif ggml_type == GGMLQuantizationType.F32:
item_count = n_elems
item_type = np.float32
- elif ggml_type == GGMLQuantizationType.F16:
+ elif ggml_type == GGMLQuantizationType.F64:
item_count = n_elems
- item_type = np.float16
+ item_type = np.float64
elif ggml_type == GGMLQuantizationType.I8:
item_count = n_elems
item_type = np.int8
@@ -257,6 +260,9 @@ class GGUFReader:
elif ggml_type == GGMLQuantizationType.I32:
item_count = n_elems
item_type = np.int32
+ elif ggml_type == GGMLQuantizationType.I64:
+ item_count = n_elems
+ item_type = np.int64
else:
item_count = n_bytes
item_type = np.uint8
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 81b2eb88..1967b633 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -204,18 +204,22 @@ class GGUFWriter:
for i in range(n_dims):
self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
if raw_dtype is None:
- if tensor_dtype == np.float32:
- dtype = GGMLQuantizationType.F32
- elif tensor_dtype == np.float16:
+ if tensor_dtype == np.float16:
dtype = GGMLQuantizationType.F16
+ elif tensor_dtype == np.float32:
+ dtype = GGMLQuantizationType.F32
+ elif tensor_dtype == np.float64:
+ dtype = GGMLQuantizationType.F64
elif tensor_dtype == np.int8:
dtype = GGMLQuantizationType.I8
elif tensor_dtype == np.int16:
dtype = GGMLQuantizationType.I16
elif tensor_dtype == np.int32:
dtype = GGMLQuantizationType.I32
+ elif tensor_dtype == np.int64:
+ dtype = GGMLQuantizationType.I64
else:
- raise ValueError("Only F32, F16, I8, I16, I32 tensors are supported for now")
+ raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
else:
dtype = raw_dtype
self.ti_data += self._pack("I", dtype)