From 707d08792701de5a517c45c6c16bf4d816c00d88 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 24 Jun 2024 16:42:30 +0200 Subject: Bitnet: tiny bity faster 1.625 bpw variant on Metal We get 70.7 t/s for TG-128 vs 69.5 t/s before. --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index 8d2be592..f8d6911b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4130,7 +4130,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) { case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw"; case LLAMA_FTYPE_MOSTLY_IQ1_S :return "IQ1_S - 1.5625 bpw"; case LLAMA_FTYPE_MOSTLY_IQ1_M :return "IQ1_M - 1.75 bpw"; - case LLAMA_FTYPE_MOSTLY_IQ1_BN :return "IQ1_BN - 1.75 bpw Bitnet"; + case LLAMA_FTYPE_MOSTLY_IQ1_BN :return "IQ1_BN - 1.625 bpw Bitnet"; case LLAMA_FTYPE_MOSTLY_IQ2_BN :return "IQ2_BN - 2.00 bpw Bitnet"; case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw"; case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw"; -- cgit v1.2.3