1 files changed, 8 insertions, 14 deletions
diff --git a/ggml/src/ggml-cuda/convert.cu b/ggml/src/ggml-cuda/convert.cu
index db5fd2dd..f76c80dc 100644
--- a/ggml/src/ggml-cuda/convert.cu
+++ b/ggml/src/ggml-cuda/convert.cu
@@ -591,12 +591,6 @@ static __global__ void dequantize_block_iq5_k(const void * __restrict__ vx, dst_
     }
 }
 
-#define A_IQ6K -127.f
-#define B_IQ6K 6.2568f
-#define C_IQ6K 0.11218f
-#define D_IQ6K 0.0011972f
-#define S_IQ6K 1
-
 template<typename dst_t>
 static __global__ void dequantize_block_iq6_k(const void * __restrict__ vx, dst_t * __restrict__ yy) {
 
@@ -617,14 +611,14 @@ static __global__ void dequantize_block_iq6_k(const void * __restrict__ vx, dst_
     const uint8_t extra = x[i].extra >> 4*(ib64%4);
     for (int j = 0; j < 2; ++j) {
         const uint8_t h1 = qh[j] >> 4*(ib64%2), h2 = qh[j+16] >> 4*(ib64%2);
-        float q1 = (qs[j+ 0] & 0xf) | ((h1 & 0x03) << 4);
-        float q2 = (qs[j+16] & 0xf) | ((h2 & 0x03) << 4);
-        float q3 = (qs[j+ 0] >>  4) | ((h1 & 0x0c) << 2);
-        float q4 = (qs[j+16] >>  4) | ((h2 & 0x0c) << 2);
-        y[j+ 0] = dl1 * (A_IQ6K + q1*(B_IQ6K + q1*(-C_IQ6K + q1*D_IQ6K)) + (extra & 1 ? S_IQ6K : 0));
-        y[j+16] = dl2 * (A_IQ6K + q2*(B_IQ6K + q2*(-C_IQ6K + q2*D_IQ6K)) + (extra & 2 ? S_IQ6K : 0));
-        y[j+32] = dl3 * (A_IQ6K + q3*(B_IQ6K + q3*(-C_IQ6K + q3*D_IQ6K)) + (extra & 4 ? S_IQ6K : 0));
-        y[j+48] = dl4 * (A_IQ6K + q4*(B_IQ6K + q4*(-C_IQ6K + q4*D_IQ6K)) + (extra & 8 ? S_IQ6K : 0));
+        uint8_t q1 = (qs[j+ 0] & 0xf) | ((h1 & 0x03) << 4);
+        uint8_t q2 = (qs[j+16] & 0xf) | ((h2 & 0x03) << 4);
+        uint8_t q3 = (qs[j+ 0] >>  4) | ((h1 & 0x0c) << 2);
+        uint8_t q4 = (qs[j+16] >>  4) | ((h2 & 0x0c) << 2);
+        y[j+ 0] = dl1 * (iq6nl_values[q1] + (extra & 1 ? 1 : 0));
+        y[j+16] = dl2 * (iq6nl_values[q2] + (extra & 2 ? 1 : 0));
+        y[j+32] = dl3 * (iq6nl_values[q3] + (extra & 4 ? 1 : 0));
+        y[j+48] = dl4 * (iq6nl_values[q4] + (extra & 8 ? 1 : 0));
     }
 }