diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-02-22 23:21:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-22 23:21:39 +0200 |
commit | 7e4f339c404dbe029d4a117c03b37a9bf646cf0e (patch) | |
tree | ff0774611cd6f110fa83c2fa469920994db96ae6 /ggml-impl.h | |
parent | 334f76fa385ed81095165e5ae068756214893901 (diff) |
ggml : always define ggml_fp16_t as uint16_t (#5666)
* ggml : always define ggml_fp16_t as uint16_t
ggml-ci
* ggml : cont
ggml-ci
* ggml : cont
* ggml : cont
ggml-ci
* ggml : cont
ggml-ci
* cuda : no longer ggml headers last
ggml-ci
* ggml : fix q6_K FP16 -> FP32 conversion
ggml-ci
* ggml : more FP16 -> FP32 conversion fixes
ggml-ci
Diffstat (limited to 'ggml-impl.h')
-rw-r--r-- | ggml-impl.h | 27 |
1 files changed, 20 insertions, 7 deletions
diff --git a/ggml-impl.h b/ggml-impl.h index 19df66bc..c5637e4d 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -53,11 +53,23 @@ extern "C" { // #include <arm_neon.h> -#define GGML_COMPUTE_FP16_TO_FP32(x) ((float) (x)) -#define GGML_COMPUTE_FP32_TO_FP16(x) (x) +#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) +#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) + +#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) + +static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { + __fp16 tmp; + memcpy(&tmp, &h, sizeof(ggml_fp16_t)); + return (float)tmp; +} -#define GGML_FP16_TO_FP32(x) ((float) (x)) -#define GGML_FP32_TO_FP16(x) (x) +static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { + ggml_fp16_t res; + __fp16 tmp = f; + memcpy(&res, &tmp, sizeof(ggml_fp16_t)); + return res; +} #else @@ -214,8 +226,7 @@ extern float ggml_table_f32_f16[1 << 16]; // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32, // so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON. // This is also true for POWER9. -#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16) - +#if !defined(GGML_FP16_TO_FP32) inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) { uint16_t s; memcpy(&s, &f, sizeof(uint16_t)); @@ -223,8 +234,10 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) { } #define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x) -#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) +#endif +#if !defined(GGML_FP32_TO_FP16) +#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) #endif #define GGML_HASHTABLE_FULL ((size_t)-1) |