summaryrefslogtreecommitdiff
path: root/ggml-cuda/vecdotq.cuh
diff options
context:
space:
mode:
authorIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-18 20:08:28 +0300
committerIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-22 12:02:52 +0300
commit927e251a12fa287e13c6bd9667ee97d783486c09 (patch)
tree90ed8827fc28630f52e92d8b8ea664198a6f5829 /ggml-cuda/vecdotq.cuh
parent181fd9c56eaa64d0a92f9e8be7387f409cfa8745 (diff)
Bitnet(1.75 bpw): higher precision fp8 scale
Use 3 bits for the exponent and 5 bits for the mantissa. This makes PPL to be the same as fp16 (but the previous version with 4 bits for the exponent and mantissa was good enough for any practical purposes).
Diffstat (limited to 'ggml-cuda/vecdotq.cuh')
-rw-r--r--ggml-cuda/vecdotq.cuh7
1 files changed, 2 insertions, 5 deletions
diff --git a/ggml-cuda/vecdotq.cuh b/ggml-cuda/vecdotq.cuh
index 6ec2035a..764a19d7 100644
--- a/ggml-cuda/vecdotq.cuh
+++ b/ggml-cuda/vecdotq.cuh
@@ -1078,10 +1078,7 @@ static __device__ __forceinline__ float vec_dot_iq1_bn_q8_1(
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
const block_iq1_bn * bq1 = (const block_iq1_bn *) vbq + kbx;
- typedef union { float f; uint32_t i; } scale_t;
- scale_t s;
- uint8_t u = bq1->extra & 0xff;
- s.i = ((((u >> 4) | 0xf0) - 132) << 23) | ((u & 0x0f) << 19);
+ float d = iq1bn_fp8_to_float(bq1->extra & 0xff);
uint8_t extra = bq1->extra >> (8 + 4*iqs);
int sumi = 0;
#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
@@ -1110,7 +1107,7 @@ static __device__ __forceinline__ float vec_dot_iq1_bn_q8_1(
q8 += 8;
}
#endif
- return s.f * __low2float(bq8_1[iqs].ds) * sumi;
+ return d * __low2float(bq8_1[iqs].ds) * sumi;
}
// TODO