ggml : always define ggml_fp16_t as uint16_t (#5666)

* ggml : always define ggml_fp16_t as uint16_t ggml-ci * ggml : cont ggml-ci * ggml : cont * ggml : cont ggml-ci * ggml : cont ggml-ci * cuda : no longer ggml headers last ggml-ci * ggml : fix q6_K FP16 -> FP32 conversion ggml-ci * ggml : more FP16 -> FP32 conversion fixes ggml-ci
author: Georgi Gerganov <ggerganov@gmail.com> 2024-02-22 23:21:39 +0200
committer: GitHub <noreply@github.com> 2024-02-22 23:21:39 +0200
commit: 7e4f339c404dbe029d4a117c03b37a9bf646cf0e (patch)
tree: ff0774611cd6f110fa83c2fa469920994db96ae6 /ggml-impl.h
parent: 334f76fa385ed81095165e5ae068756214893901 (diff)
1 files changed, 20 insertions, 7 deletions
diff --git a/ggml-impl.h b/ggml-impl.h
index 19df66bc..c5637e4d 100644
--- a/ggml-impl.h
+++ b/ggml-impl.h
@@ -53,11 +53,23 @@ extern "C" {
 //
 #include <arm_neon.h>
 
-#define GGML_COMPUTE_FP16_TO_FP32(x) ((float) (x))
-#define GGML_COMPUTE_FP32_TO_FP16(x) (x)
+#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
+#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
+
+#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
+
+static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
+    __fp16 tmp;
+    memcpy(&tmp, &h, sizeof(ggml_fp16_t));
+    return (float)tmp;
+}
 
-#define GGML_FP16_TO_FP32(x) ((float) (x))
-#define GGML_FP32_TO_FP16(x) (x)
+static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
+    ggml_fp16_t res;
+    __fp16 tmp = f;
+    memcpy(&res, &tmp, sizeof(ggml_fp16_t));
+    return res;
+}
 
 #else
 
@@ -214,8 +226,7 @@ extern float ggml_table_f32_f16[1 << 16];
 // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
 // so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
 // This is also true for POWER9.
-#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
-
+#if !defined(GGML_FP16_TO_FP32)
 inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
     uint16_t s;
     memcpy(&s, &f, sizeof(uint16_t));
@@ -223,8 +234,10 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
 }
 
 #define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
-#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
+#endif
 
+#if !defined(GGML_FP32_TO_FP16)
+#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
 #endif
 
 #define GGML_HASHTABLE_FULL ((size_t)-1)
author	Georgi Gerganov <ggerganov@gmail.com>	2024-02-22 23:21:39 +0200
committer	GitHub <noreply@github.com>	2024-02-22 23:21:39 +0200
commit	7e4f339c404dbe029d4a117c03b37a9bf646cf0e (patch)
tree	ff0774611cd6f110fa83c2fa469920994db96ae6 /ggml-impl.h
parent	334f76fa385ed81095165e5ae068756214893901 (diff)