diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2024-09-17 14:31:29 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-17 14:31:29 +0300 |
commit | 12bbdb8ce7517455e03e6483828e90a475ff0d7e (patch) | |
tree | 245451bc81d50bac2c4aefcf2dfb6ea5ee96967b /ggml/src | |
parent | 4ee889f15891b6e8785c88c932ec27f129cae285 (diff) |
Fix compiler warnings (#58)
* Fix C++ compilation warnings caused by ggml-common.h
* Disable c99-extensions warning
I get tons of those on macOS due to the arm_neon.h header.
* Disable c99-extensions warning only for APPLE
* Fix warnings in iqk_quantize.cpp
Also add GGML_ABORT when implementation is missing.
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src')
-rw-r--r-- | ggml/src/CMakeLists.txt | 4 | ||||
-rw-r--r-- | ggml/src/ggml-common.h | 58 | ||||
-rw-r--r-- | ggml/src/iqk/iqk_quantize.cpp | 15 |
3 files changed, 23 insertions, 54 deletions
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index be6c2cc6..600acf91 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -935,6 +935,10 @@ endif() if (GGML_ALL_WARNINGS) if (NOT MSVC) list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) + if (APPLE) + # shut up c99 extensions warning I get on my system due to arm_neon.h + list(APPEND WARNING_FLAGS -Wno-c99-extensions) + endif() list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration) list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn) diff --git a/ggml/src/ggml-common.h b/ggml/src/ggml-common.h index f1a34f7a..40a4b53c 100644 --- a/ggml/src/ggml-common.h +++ b/ggml/src/ggml-common.h @@ -13,7 +13,7 @@ typedef uint16_t ggml_half; typedef uint32_t ggml_half2; -#define GGML_COMMON_AGGR +#define GGML_SCALE_TYPE1(m, dm) ggml_half d; ggml_half m #define GGML_COMMON_DECL #elif defined(GGML_COMMON_DECL_METAL) @@ -22,7 +22,7 @@ typedef uint32_t ggml_half2; typedef half ggml_half; typedef half2 ggml_half2; -#define GGML_COMMON_AGGR +#define GGML_SCALE_TYPE1(m, dm) union { struct { ggml_half d; ggml_half m; }; ggml_half2 dm; } #define GGML_COMMON_DECL #elif defined(GGML_COMMON_DECL_CUDA) @@ -36,7 +36,7 @@ typedef half2 ggml_half2; typedef half ggml_half; typedef half2 ggml_half2; -#define GGML_COMMON_AGGR data +#define GGML_SCALE_TYPE1(m, dm) union { struct { ggml_half d; ggml_half m; } data; ggml_half2 dm; } #define GGML_COMMON_DECL #elif defined(GGML_COMMON_DECL_HIP) @@ -46,7 +46,7 @@ typedef half2 ggml_half2; typedef half ggml_half; typedef half2 ggml_half2; -#define GGML_COMMON_AGGR data +#define GGML_SCALE_TYPE1(m, dm) union { struct { ggml_half d; ggml_half m; } data; ggml_half2 dm; } #define GGML_COMMON_DECL #elif defined(GGML_COMMON_DECL_SYCL) @@ -56,7 +56,7 @@ typedef half2 ggml_half2; typedef sycl::half ggml_half; typedef sycl::half2 ggml_half2; -#define GGML_COMMON_AGGR data +#define GGML_SCALE_TYPE1(m, dm) union { struct { ggml_half d; ggml_half m; } data; ggml_half2 dm; } #define GGML_COMMON_DECL #endif @@ -166,13 +166,7 @@ static_assert(sizeof(block_q4_0) == sizeof(ggml_half) + QK4_0 / 2, "wrong q4_0 b #define QK4_1 32 typedef struct { - union { - struct { - ggml_half d; // delta - ggml_half m; // min - } GGML_COMMON_AGGR; - ggml_half2 dm; - }; + GGML_SCALE_TYPE1(m, dm); uint8_t qs[QK4_1 / 2]; // nibbles / quants } block_q4_1; static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding"); @@ -187,13 +181,7 @@ static_assert(sizeof(block_q5_0) == sizeof(ggml_half) + sizeof(uint32_t) + QK5_0 #define QK5_1 32 typedef struct { - union { - struct { - ggml_half d; // delta - ggml_half m; // min - } GGML_COMMON_AGGR; - ggml_half2 dm; - }; + GGML_SCALE_TYPE1(m, dm); uint8_t qh[4]; // 5-th bit of quants uint8_t qs[QK5_1 / 2]; // nibbles / quants } block_q5_1; @@ -208,13 +196,7 @@ static_assert(sizeof(block_q8_0) == sizeof(ggml_half) + QK8_0, "wrong q8_0 block #define QK8_1 32 typedef struct { - union { - struct { - ggml_half d; // delta - ggml_half s; // d * sum(qs[i]) - } GGML_COMMON_AGGR; - ggml_half2 ds; - }; + GGML_SCALE_TYPE1(s, ds); int8_t qs[QK8_1]; // quants } block_q8_1; static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding"); @@ -265,13 +247,7 @@ static_assert(sizeof(block_q8_0x8) == 8 * sizeof(ggml_half) + QK8_0 * 8, "wrong typedef struct { uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits uint8_t qs[QK_K/4]; // quants - union { - struct { - ggml_half d; // super-block scale for quantized scales - ggml_half dmin; // super-block scale for quantized mins - } GGML_COMMON_AGGR; - ggml_half2 dm; - }; + GGML_SCALE_TYPE1(dmin, dm); } block_q2_K; static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding"); @@ -292,13 +268,7 @@ static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 12 // weight is represented as x = a * q + b // Effectively 4.5 bits per weight typedef struct { - union { - struct { - ggml_half d; // super-block scale for quantized scales - ggml_half dmin; // super-block scale for quantized mins - } GGML_COMMON_AGGR; - ggml_half2 dm; - }; + GGML_SCALE_TYPE1(dmin, dm); uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits uint8_t qs[QK_K/2]; // 4--bit quants } block_q4_K; @@ -309,13 +279,7 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2, // weight is represented as x = a * q + b // Effectively 5.5 bits per weight typedef struct { - union { - struct { - ggml_half d; // super-block scale for quantized scales - ggml_half dmin; // super-block scale for quantized mins - } GGML_COMMON_AGGR; - ggml_half2 dm; - }; + GGML_SCALE_TYPE1(dmin, dm); uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits uint8_t qh[QK_K/8]; // quants, high bit uint8_t qs[QK_K/2]; // quants, low 4 bits diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp index 9b39a490..42441584 100644 --- a/ggml/src/iqk/iqk_quantize.cpp +++ b/ggml/src/iqk/iqk_quantize.cpp @@ -698,10 +698,8 @@ void vec_dot_iq2_k_q8_k(int n, float * GGML_RESTRICT s, size_t bs, const void * } #endif - const int nb = n / QK_K; + GGML_ABORT("not implemented"); - const block_iq2_k * x = (const block_iq2_k *)vx; - const block_q8_K * y = (const block_q8_K *)vy; } // @@ -971,10 +969,7 @@ void vec_dot_iq3_k_q8_k(int n, float * GGML_RESTRICT s, size_t bs, const void * } #endif - const int nb = n / QK_K; - - const block_iq2_k * x = (const block_iq2_k *)vx; - const block_q8_K * y = (const block_q8_K *)vy; + GGML_ABORT("not implemented"); } // @@ -1664,6 +1659,8 @@ void vec_dot_iq6_k_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, } #endif + GGML_ABORT("not implemented"); + // TODO //const int nb = n / QK_K; @@ -2030,6 +2027,10 @@ void dequantize_row_iq2_tn(const block_iq2_tn * x, float * y, int64_t k) { } void vec_dot_iq2_tn_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) { + GGML_UNUSED(bs); + GGML_UNUSED(bx); + GGML_UNUSED(by); + GGML_UNUSED(nrc); #if GGML_USE_IQK_MULMAT if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_TN, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) { return; |