diff options
author | Gaolingx <947770192@qq.com> | 2025-06-10 13:30:17 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-06-10 08:30:17 +0300 |
commit | 3c1f2c68fdbb9e5be26aad85902c3d86057d69b5 (patch) | |
tree | 560328e5888de190079bd4dcfd0ba5f7638d329a | |
parent | fa90a9864a95da230065fdb9b17d14a485ef6807 (diff) |
Fix Compile error (C2668) (#508)
* cmake: force MSVC compiler charset to utf-8
* build: apply MSVC /bigobj option to c/cpp files only
* Update CMakeLists.txt
* Fix Compile error (C2668)
* revert hsum_float_8x8
-rw-r--r-- | ggml/src/iqk/iqk_common.h | 21 | ||||
-rw-r--r-- | ggml/src/iqk/iqk_quantize.cpp | 45 |
2 files changed, 18 insertions, 48 deletions
diff --git a/ggml/src/iqk/iqk_common.h b/ggml/src/iqk/iqk_common.h index 6feeff1a..cce040dd 100644 --- a/ggml/src/iqk/iqk_common.h +++ b/ggml/src/iqk/iqk_common.h @@ -225,9 +225,15 @@ static inline int hsum_i32_8(const __m256i a) { const __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1)); const __m128i hi64 = _mm_unpackhi_epi64(sum128, sum128); const __m128i sum64 = _mm_add_epi32(hi64, sum128); - const __m128i hi32 = _mm_shuffle_epi32(sum64, _MM_SHUFFLE(2, 3, 0, 1)); + const __m128i hi32 = _mm_shuffle_epi32(sum64, _MM_SHUFFLE(2, 3, 0, 1)); return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32)); } +static inline float hmax_f32_8(__m256 x) { + __m128 max4 = _mm_max_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x)); + max4 = _mm_max_ps(max4, _mm_movehl_ps(max4, max4)); + max4 = _mm_max_ss(max4, _mm_movehdup_ps(max4)); + return _mm_cvtss_f32(max4); +} static inline float hmax_float_8(__m256 x) { __m128 max4 = _mm_max_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x)); max4 = _mm_max_ps( max4, _mm_movehl_ps(max4, max4)); @@ -235,13 +241,22 @@ static inline float hmax_float_8(__m256 x) { return _mm_cvtss_f32(max4); } +static inline __m128 hsum_float_4x4(__m128 * accm) { + accm[0] = _mm_add_ps(_mm_unpacklo_ps(accm[0], accm[2]), _mm_unpackhi_ps(accm[0], accm[2])); + accm[1] = _mm_add_ps(_mm_unpacklo_ps(accm[1], accm[3]), _mm_unpackhi_ps(accm[1], accm[3])); + return _mm_add_ps(_mm_unpacklo_ps(accm[0], accm[1]), _mm_unpackhi_ps(accm[0], accm[1])); +} static inline __m256 hsum_float_8x8(__m256 * accm) { for (int i = 0; i < 4; ++i) { - accm[i] = _mm256_add_ps(_mm256_permute2f128_ps(accm[i], accm[i+4], 0x20), _mm256_permute2f128_ps(accm[i], accm[i+4], 0x31)); + accm[i] = _mm256_add_ps(_mm256_permute2f128_ps(accm[i], accm[i + 4], 0x20), _mm256_permute2f128_ps(accm[i], accm[i + 4], 0x31)); //accm[i] = _mm256_set_m128(_mm_add_ps(_mm256_castps256_ps128(accm[i+4]), _mm256_extractf128_ps(accm[i+4], 1)), // _mm_add_ps(_mm256_castps256_ps128(accm[i+0]), _mm256_extractf128_ps(accm[i+0], 1))); } - for (int i = 0; i < 2; ++i) accm[i] = _mm256_add_ps(_mm256_unpacklo_ps(accm[i], accm[i+2]), _mm256_unpackhi_ps(accm[i], accm[i+2])); + for (int i = 0; i < 2; ++i) accm[i] = _mm256_add_ps(_mm256_unpacklo_ps(accm[i], accm[i + 2]), _mm256_unpackhi_ps(accm[i], accm[i + 2])); + return _mm256_add_ps(_mm256_unpacklo_ps(accm[0], accm[1]), _mm256_unpackhi_ps(accm[0], accm[1])); +} +static inline __m256 hsum_float_4x8(__m256 * accm) { + for (int i = 0; i < 2; ++i) accm[i] = _mm256_add_ps(_mm256_unpacklo_ps(accm[i], accm[i + 2]), _mm256_unpackhi_ps(accm[i], accm[i + 2])); return _mm256_add_ps(_mm256_unpacklo_ps(accm[0], accm[1]), _mm256_unpackhi_ps(accm[0], accm[1])); } diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp index ee85344d..2eb53d1c 100644 --- a/ggml/src/iqk/iqk_quantize.cpp +++ b/ggml/src/iqk/iqk_quantize.cpp @@ -557,32 +557,6 @@ void quantize_row_q8_K64(const float * x, void * y, int64_t k) { quantize_row_q8_K64_ref(x, (block_q8_K64 *)y, k); } -#ifdef __AVX2__ -namespace { -inline float hsum_float_4(__m128 x) { - x = _mm_add_ps(x, _mm_movehl_ps(x, x)); - x = _mm_add_ss(x, _mm_movehdup_ps(x)); - return _mm_cvtss_f32(x); -} -inline float hsum_float_8(__m256 x) { - return hsum_float_4(_mm_add_ps(_mm256_castps256_ps128(x), _mm256_extractf128_ps(x, 1))); -} -inline int hsum_i32_8(const __m256i a) { - const __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1)); - const __m128i hi64 = _mm_unpackhi_epi64(sum128, sum128); - const __m128i sum64 = _mm_add_epi32(hi64, sum128); - const __m128i hi32 = _mm_shuffle_epi32(sum64, _MM_SHUFFLE(2, 3, 0, 1)); - return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32)); -} -inline float hmax_f32_8(__m256 x) { - __m128 max4 = _mm_max_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x)); - max4 = _mm_max_ps( max4, _mm_movehl_ps(max4, max4)); - max4 = _mm_max_ss( max4, _mm_movehdup_ps( max4)); - return _mm_cvtss_f32(max4); -} -} -#endif - void quantize_row_q8_K16(const float * x, void * vy, int64_t nk) { float * dptr = (float *)vy; int8_t * qy = (int8_t *)(dptr + 5); @@ -7413,25 +7387,6 @@ void dequantize_row_ms_i2s(const void * vx, float * y, int64_t k) { } namespace { -#ifdef __AVX2__ -__m128 hsum_float_4x4(__m128 * accm) { - accm[0] = _mm_add_ps(_mm_unpacklo_ps(accm[0], accm[2]), _mm_unpackhi_ps(accm[0], accm[2])); - accm[1] = _mm_add_ps(_mm_unpacklo_ps(accm[1], accm[3]), _mm_unpackhi_ps(accm[1], accm[3])); - return _mm_add_ps(_mm_unpacklo_ps(accm[0], accm[1]), _mm_unpackhi_ps(accm[0], accm[1])); -} -__m256 hsum_float_8x8(__m256 * accm) { - for (int i = 0; i < 4; ++i) { - accm[i] = _mm256_set_m128(_mm_add_ps(_mm256_castps256_ps128(accm[i+4]), _mm256_extractf128_ps(accm[i+4], 1)), - _mm_add_ps(_mm256_castps256_ps128(accm[i+0]), _mm256_extractf128_ps(accm[i+0], 1))); - } - for (int i = 0; i < 2; ++i) accm[i] = _mm256_add_ps(_mm256_unpacklo_ps(accm[i], accm[i+2]), _mm256_unpackhi_ps(accm[i], accm[i+2])); - return _mm256_add_ps(_mm256_unpacklo_ps(accm[0], accm[1]), _mm256_unpackhi_ps(accm[0], accm[1])); -} -__m256 hsum_float_4x8(__m256 * accm) { - for (int i = 0; i < 2; ++i) accm[i] = _mm256_add_ps(_mm256_unpacklo_ps(accm[i], accm[i+2]), _mm256_unpackhi_ps(accm[i], accm[i+2])); - return _mm256_add_ps(_mm256_unpacklo_ps(accm[0], accm[1]), _mm256_unpackhi_ps(accm[0], accm[1])); -} -#endif template <int block_size, int group_size, int num_bits, bool is_abs = false> class QuantizerIQKT { static_assert(group_size == 8 || group_size == 4); |