From 5dc9dd7152dedc6046b646855585bd070c91e8c8 Mon Sep 17 00:00:00 2001 From: Carolinabanana <140120812+Carolinabanana@users.noreply.github.com> Date: Tue, 9 Apr 2024 09:16:13 +0100 Subject: llama : add Command R Plus support (#6491) * Add Command R Plus GGUF * Add Command R Plus GGUF * Loading works up to LayerNorm2D * Export new tensors in 1D so they are not quantized. * Fix embedding layer based on Noeda's example * Whitespace * Add line * Fix unexpected tokens on MPS. Re-add F16 fix. ((Noeda) * dranger003: Fix block index overflow in CUDA dequantizing. * Reverted blocked multiplication code as it still has issues and could affect other Llama arches * export norms as f32 * fix overflow issues during quant and other cleanup * Type convention Co-authored-by: Georgi Gerganov * dranger003: Fix more int overflow during quant. --------- Co-authored-by: S Co-authored-by: S Co-authored-by: slaren Co-authored-by: Georgi Gerganov --- convert-hf-to-gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'convert-hf-to-gguf.py') diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 7e601170..37af6328 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -160,7 +160,7 @@ class Model(ABC): data = data.astype(np.float32) # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32 - if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1: + if self.ftype == 1 and data_dtype == np.float16 and (n_dims == 1 or new_name.endswith("_norm.weight")): data = data.astype(np.float32) # if f16 desired, convert any float32 2-dim weight tensors to float16 -- cgit v1.2.3