diff options
author | Romain D <90720+Artefact2@users.noreply.github.com> | 2024-03-18 09:04:41 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-18 10:04:41 +0200 |
commit | 3a6efdd03c46c5ba08e43880d34260c02dd9999b (patch) | |
tree | b7de4172662ad4b5fb78e2578230fbb40adefd3b | |
parent | d01b3c4c32357567f3531d4e6ceffc5d23e87583 (diff) |
convert : use f32 outtype for bf16 tensors (#6106)
The old behaviour is to use f16, but bf16 to f16 is not a lossless conversion.
Change the outtype to f32 to default to a lossless conversion.
-rwxr-xr-x | convert.py | 4 |
1 files changed, 2 insertions, 2 deletions
@@ -1167,9 +1167,9 @@ class OutputFile: def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType: wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0) + ".weight"].data_type - if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32): + if output_type_str == "f32" or (output_type_str is None and wq_type in (DT_F32, DT_BF16)): return GGMLFileType.AllF32 - if output_type_str == "f16" or (output_type_str is None and wq_type in (DT_F16, DT_BF16)): + if output_type_str == "f16" or (output_type_str is None and wq_type == DT_F16): return GGMLFileType.MostlyF16 if output_type_str == "q8_0": return GGMLFileType.MostlyQ8_0 |