diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-17 18:41:30 +0300 |
---|---|---|
committer | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-22 12:02:51 +0300 |
commit | f6863cfa1bbc5ac42b78837b355e45d82246a472 (patch) | |
tree | b4718a3e7f7796f2bd5f8f4e6f81a812eeadae32 /ggml-common.h | |
parent | 765622ff8f921319f6b30c556b378d8320500c95 (diff) |
bitnet: add 2 bpw quantization
The scalar dot product already chieves 37 t/s for TG!
Diffstat (limited to 'ggml-common.h')
-rw-r--r-- | ggml-common.h | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/ggml-common.h b/ggml-common.h index 148b41d5..c7f865e8 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -380,6 +380,14 @@ typedef struct { uint8_t qh[QK_IQ1BN/16]; } block_iq1_bn; static_assert(sizeof(block_iq1_bn) == sizeof(uint16_t) + QK_IQ1BN/8 + QK_IQ1BN/16, "wrong iq1_bn block size/padding"); +// +// Bitnet - implemented as 2.0 bpw +// +#define QK_IQ2BN 64 +typedef struct { + uint8_t qs[QK_IQ2BN/4]; +} block_iq2_bn; +static_assert(sizeof(block_iq2_bn) == QK_IQ2BN/4, "wrong iq2_bn block size/padding"); // Used by IQ1_M quants typedef union { |