Allow "quantizing" to f16 and f32 (#1787)

* Allow "quantizing" to f16 and f32 Fix an issue where quantizing didn't respect LLAMA_NO_K_QUANTS Add brief help to the list of quantization types in the quantize tool Ignore case for quantization type arguments in the quantize tool
author: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> 2023-06-13 04:23:23 -0600
committer: GitHub <noreply@github.com> 2023-06-13 04:23:23 -0600
commit: 74d4cfa3438cb58bd177eed30014e6588694aaa8 (patch)
tree: 8252d459812376411bcf8508ad7d1f059670601a /ggml.c
parent: 74a6d922f12ccfe16b0c265f43be8978c6f25e98 (diff)
1 files changed, 12 insertions, 0 deletions
diff --git a/ggml.c b/ggml.c
index a13de511..252edd58 100644
--- a/ggml.c
+++ b/ggml.c
@@ -16301,6 +16301,18 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
                 result = ggml_quantize_q6_K(src + start, block, n, n, hist);
             } break;
 #endif
+        case GGML_TYPE_F16:
+            {
+                int elemsize = sizeof(ggml_fp16_t);
+                ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
+                result = n * elemsize;
+            } break;
+        case GGML_TYPE_F32:
+            {
+                int elemsize = sizeof(float);
+                result = n * elemsize;
+                memcpy((uint8_t *)dst + start * elemsize, src + start, result);
+            } break;
         default:
             assert(false);
     }
author	Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>	2023-06-13 04:23:23 -0600
committer	GitHub <noreply@github.com>	2023-06-13 04:23:23 -0600
commit	74d4cfa3438cb58bd177eed30014e6588694aaa8 (patch)
tree	8252d459812376411bcf8508ad7d1f059670601a /ggml.c
parent	74a6d922f12ccfe16b0c265f43be8978c6f25e98 (diff)