diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-19 17:09:13 +0300 |
---|---|---|
committer | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-22 12:02:52 +0300 |
commit | a2e43b83c9344e7c1130e3e95917bdd61dfb6aab (patch) | |
tree | c0fad469e45f4b8813e405308126369785ab542d /ggml-cuda | |
parent | 58d9e8f1d2efba4b6717043f7a5167be670a6f2e (diff) |
bitnet(scale in a separate tensor): CUDA
Diffstat (limited to 'ggml-cuda')
-rw-r--r-- | ggml-cuda/convert.cu | 14 | ||||
-rw-r--r-- | ggml-cuda/vecdotq.cuh | 9 |
2 files changed, 10 insertions, 13 deletions
diff --git a/ggml-cuda/convert.cu b/ggml-cuda/convert.cu index 2a897738..2be03a3e 100644 --- a/ggml-cuda/convert.cu +++ b/ggml-cuda/convert.cu @@ -432,8 +432,7 @@ static __global__ void dequantize_block_iq1_bn(const void * __restrict__ vx, dst int64_t i = QK_K/QK_IQ1BN * ii + ib/(QK_IQ1BN/32); if (i >= nb64) return; ib = ib%(QK_IQ1BN/32); - float d = iq1bn_fp8_to_float(x[i].extra & 0xff); - const float dl = x[i].extra & (1 << (4*ib + il + 8)) ? -d : d; + const float dl = x[i].extra & (1 << (4*ib + il)) ? -1 : 1; const float ml = -dl; uint16_t idx = x[i].ql[4*ib + il] | ((x[i].qh[2*ib + il/2] << (8 - 4*(il%2))) & 0x0f00); const uint16_t gp = iq1bn_grid_u16[idx]; @@ -454,14 +453,13 @@ static __global__ void dequantize_block_iq2_bn(const void * __restrict__ vx, dst dst_t * y = yy + 256*ii + 64*ib64 + 2*il; int64_t i = 256/QK_IQ1BN * ii + ib64; if (i >= nb64) return; - const float d = x[i].d; - const float m = -d; + const float m = -1; auto qs = x[i].qs + 2*il; for (int j = 0; j < 2; ++j) { - y[j+ 0] = d * ((qs[j] >> 0) & 3) + m; - y[j+16] = d * ((qs[j] >> 2) & 3) + m; - y[j+32] = d * ((qs[j] >> 4) & 3) + m; - y[j+48] = d * ((qs[j] >> 6) & 3) + m; + y[j+ 0] = ((qs[j] >> 0) & 3) + m; + y[j+16] = ((qs[j] >> 2) & 3) + m; + y[j+32] = ((qs[j] >> 4) & 3) + m; + y[j+48] = ((qs[j] >> 6) & 3) + m; } } diff --git a/ggml-cuda/vecdotq.cuh b/ggml-cuda/vecdotq.cuh index 764a19d7..6b831cf6 100644 --- a/ggml-cuda/vecdotq.cuh +++ b/ggml-cuda/vecdotq.cuh @@ -1078,8 +1078,7 @@ static __device__ __forceinline__ float vec_dot_iq1_bn_q8_1( const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) { const block_iq1_bn * bq1 = (const block_iq1_bn *) vbq + kbx; - float d = iq1bn_fp8_to_float(bq1->extra & 0xff); - uint8_t extra = bq1->extra >> (8 + 4*iqs); + uint8_t extra = bq1->extra >> 4*iqs; int sumi = 0; #if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics const int * q8 = (const int *)bq8_1[iqs].qs; @@ -1107,7 +1106,7 @@ static __device__ __forceinline__ float vec_dot_iq1_bn_q8_1( q8 += 8; } #endif - return d * __low2float(bq8_1[iqs].ds) * sumi; + return __low2float(bq8_1[iqs].ds) * sumi; } // TODO @@ -1132,7 +1131,7 @@ static __device__ __forceinline__ float vec_dot_iq2_bn_q8_1( } auto d8l = __half22float2(bq8_1[0].ds); auto d8h = __half22float2(bq8_1[1].ds); - return (float)bq2->d * (d8l.x * (sumi1 + 0.25f*sumi2) + d8h.x * (sumi3 + 0.25f * sumi4) - 0.5f*d8l.y - 0.5f*d8h.y); + return d8l.x * (sumi1 + 0.25f*sumi2) + d8h.x * (sumi3 + 0.25f * sumi4) - 0.5f*d8l.y - 0.5f*d8h.y; #else int sumi1 = 0, sumi2 = 0, sumi3 = 0, sumi4 = 0; auto q8l = bq8_1[0].qs + 8*iqs; @@ -1146,7 +1145,7 @@ static __device__ __forceinline__ float vec_dot_iq2_bn_q8_1( } auto d8l = __half22float2(bq8_1[0].ds); auto d8h = __half22float2(bq8_1[1].ds); - return (float)bq2->d * (d8l.x * (sumi1 + 0.25f*sumi2) + 0.0625f * d8h.x*(sumi3 + 0.25f*sumi4) - 0.5f*d8l.y - 0.5f*d8h.y); + return d8l.x * (sumi1 + 0.25f*sumi2) + 0.0625f * d8h.x*(sumi3 + 0.25f*sumi4) - 0.5f*d8l.y - 0.5f*d8h.y; #endif } |