diff options
Diffstat (limited to 'ggml/src/ggml-cuda')
-rw-r--r-- | ggml/src/ggml-cuda/convert.cu | 22 |
1 files changed, 8 insertions, 14 deletions
diff --git a/ggml/src/ggml-cuda/convert.cu b/ggml/src/ggml-cuda/convert.cu index db5fd2dd..f76c80dc 100644 --- a/ggml/src/ggml-cuda/convert.cu +++ b/ggml/src/ggml-cuda/convert.cu @@ -591,12 +591,6 @@ static __global__ void dequantize_block_iq5_k(const void * __restrict__ vx, dst_ } } -#define A_IQ6K -127.f -#define B_IQ6K 6.2568f -#define C_IQ6K 0.11218f -#define D_IQ6K 0.0011972f -#define S_IQ6K 1 - template<typename dst_t> static __global__ void dequantize_block_iq6_k(const void * __restrict__ vx, dst_t * __restrict__ yy) { @@ -617,14 +611,14 @@ static __global__ void dequantize_block_iq6_k(const void * __restrict__ vx, dst_ const uint8_t extra = x[i].extra >> 4*(ib64%4); for (int j = 0; j < 2; ++j) { const uint8_t h1 = qh[j] >> 4*(ib64%2), h2 = qh[j+16] >> 4*(ib64%2); - float q1 = (qs[j+ 0] & 0xf) | ((h1 & 0x03) << 4); - float q2 = (qs[j+16] & 0xf) | ((h2 & 0x03) << 4); - float q3 = (qs[j+ 0] >> 4) | ((h1 & 0x0c) << 2); - float q4 = (qs[j+16] >> 4) | ((h2 & 0x0c) << 2); - y[j+ 0] = dl1 * (A_IQ6K + q1*(B_IQ6K + q1*(-C_IQ6K + q1*D_IQ6K)) + (extra & 1 ? S_IQ6K : 0)); - y[j+16] = dl2 * (A_IQ6K + q2*(B_IQ6K + q2*(-C_IQ6K + q2*D_IQ6K)) + (extra & 2 ? S_IQ6K : 0)); - y[j+32] = dl3 * (A_IQ6K + q3*(B_IQ6K + q3*(-C_IQ6K + q3*D_IQ6K)) + (extra & 4 ? S_IQ6K : 0)); - y[j+48] = dl4 * (A_IQ6K + q4*(B_IQ6K + q4*(-C_IQ6K + q4*D_IQ6K)) + (extra & 8 ? S_IQ6K : 0)); + uint8_t q1 = (qs[j+ 0] & 0xf) | ((h1 & 0x03) << 4); + uint8_t q2 = (qs[j+16] & 0xf) | ((h2 & 0x03) << 4); + uint8_t q3 = (qs[j+ 0] >> 4) | ((h1 & 0x0c) << 2); + uint8_t q4 = (qs[j+16] >> 4) | ((h2 & 0x0c) << 2); + y[j+ 0] = dl1 * (iq6nl_values[q1] + (extra & 1 ? 1 : 0)); + y[j+16] = dl2 * (iq6nl_values[q2] + (extra & 2 ? 1 : 0)); + y[j+32] = dl3 * (iq6nl_values[q3] + (extra & 4 ? 1 : 0)); + y[j+48] = dl4 * (iq6nl_values[q4] + (extra & 8 ? 1 : 0)); } } |