From f6863cfa1bbc5ac42b78837b355e45d82246a472 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 17 Jun 2024 18:41:30 +0300 Subject: bitnet: add 2 bpw quantization The scalar dot product already chieves 37 t/s for TG! --- ggml-quants.c | 1 + 1 file changed, 1 insertion(+) (limited to 'ggml-quants.c') diff --git a/ggml-quants.c b/ggml-quants.c index 31817b1c..f1ce1345 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -15056,6 +15056,7 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte case GGML_TYPE_I32: case GGML_TYPE_I64: case GGML_TYPE_IQ1_BN: + case GGML_TYPE_IQ2_BN: // nothing to validate break; default: -- cgit v1.2.3