cuda : fix 2-bit quants on amd hip (#5105)

* cuda : fix 2-bit quants on amd hip * use __low2float intrinsic function for new quants
author: Engininja2 <139037756+Engininja2@users.noreply.github.com> 2024-01-24 16:18:15 -0600
committer: GitHub <noreply@github.com> 2024-01-24 23:18:15 +0100
commit: cd4fddb29f81d6a1f6d51a0c016bc6b486d68def (patch)
tree: 5a1882537c3b038749a3e3086a3ea742e65586f1
parent: c9b316c78fba31e65879a2ec91cbafd341b88cce (diff)
1 files changed, 3 insertions, 3 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 7f460449..05e5d18a 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -4283,7 +4283,7 @@ static __device__ __forceinline__ float vec_dot_iq2_xxs_q8_1(
         q8 += 8;
         aux32 >>= 7;
     }
-    const float d = (float)bq2->d * (0.5f + aux32) * (float)bq8_1[ib32].ds.x * 0.25f;
+    const float d = (float)bq2->d * (0.5f + aux32) * __low2float(bq8_1[ib32].ds) * 0.25f;
     return d * sumi;
 #else
     // iqs is 0...15
@@ -4294,7 +4294,7 @@ static __device__ __forceinline__ float vec_dot_iq2_xxs_q8_1(
     const uint8_t  * grid1 = (const uint8_t *)(iq2xxs_grid + aux8[2*il+0]);
     const uint8_t  * grid2 = (const uint8_t *)(iq2xxs_grid + aux8[2*il+1]);
     const uint32_t aux32 = q2[2] | (q2[3] << 16);
-    const float d = (float)bq2->d * (0.5f + (aux32 >> 28)) * (float)bq8_1[ib32].ds.x * 0.25f;
+    const float d = (float)bq2->d * (0.5f + (aux32 >> 28)) * __low2float(bq8_1[ib32].ds) * 0.25f;
     const uint8_t signs1 = ksigns_iq2xs[(aux32 >> 14*il) & 127];
     const uint8_t signs2 = ksigns_iq2xs[(aux32 >> (14*il + 7)) & 127];
     const int8_t * q8 = bq8_1[ib32].qs + 16*il;
@@ -4339,7 +4339,7 @@ static __device__ __forceinline__ float vec_dot_iq2_xs_q8_1(
         }
         q8 += 8;
     }
-    const float d = (float)bq2->d * (float)bq8_1[ib32].ds.x * 0.25f;
+    const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f;
     return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2);
 #else
     assert(false);
author	Engininja2 <139037756+Engininja2@users.noreply.github.com>	2024-01-24 16:18:15 -0600
committer	GitHub <noreply@github.com>	2024-01-24 23:18:15 +0100
commit	cd4fddb29f81d6a1f6d51a0c016bc6b486d68def (patch)
tree	5a1882537c3b038749a3e3086a3ea742e65586f1
parent	c9b316c78fba31e65879a2ec91cbafd341b88cce (diff)