summaryrefslogtreecommitdiff
path: root/ggml-opencl-dequant.cl
diff options
context:
space:
mode:
author0cc4m <picard12@live.de>2023-04-30 20:34:52 +0200
committerGitHub <noreply@github.com>2023-04-30 21:34:52 +0300
commit76a884920aa1d2fc0dc7a7ac12dfc5ec5816377c (patch)
tree759c2a1d93d9f2c2c83dd898f3fe89c511147e43 /ggml-opencl-dequant.cl
parent6bc4400e67e6bc4faad3ad3d5e9d8a6576a9752d (diff)
ggml : add CLBlast q5_0, q5_1, q8_0 dequant kernels (#1225)
* Implement q5_0, q5_1 and q8_0 * Work around q5_0 OpenCL issue * Fix q8_0 dequant kernel * Move cl kernels into ggml-opencl.c * Use two memcpy calls for q5_0 buffer transfer
Diffstat (limited to 'ggml-opencl-dequant.cl')
-rw-r--r--ggml-opencl-dequant.cl63
1 files changed, 0 insertions, 63 deletions
diff --git a/ggml-opencl-dequant.cl b/ggml-opencl-dequant.cl
deleted file mode 100644
index a65a79f4..00000000
--- a/ggml-opencl-dequant.cl
+++ /dev/null
@@ -1,63 +0,0 @@
-#define MULTILINE_QUOTE(...) #__VA_ARGS__
-const char * clblast_dequant = MULTILINE_QUOTE(
-
-struct block_q4_0
-{
- float d;
- uchar qs[16];
-};
-
-__kernel void dequantize_row_q4_0(__global struct block_q4_0* blocks, __global float* result) {
- const uint i = get_global_id(0) / 32;
- const uint l = get_local_id(0);
-
- const float d = blocks[i].d;
-
- const uchar vi = blocks[i].qs[l];
-
- const uint index = i*32 + l*2;
- result[index + 0] = ((vi & 0xf) - 8)*d;
- result[index + 1] = ((vi >> 4) - 8)*d;
-}
-
-struct block_q4_1
-{
- float d;
- float m;
- uchar qs[16];
-};
-
-__kernel void dequantize_row_q4_1(__global struct block_q4_1* blocks, __global float* result) {
- const uint i = get_global_id(0) / 32;
- const uint l = get_local_id(0);
-
- const float d = blocks[i].d;
- const float m = blocks[i].m;
-
- const uchar vi = blocks[i].qs[l];
-
- const uint index = i*32 + l*2;
- result[index + 0] = (vi & 0xf) * d + m;
- result[index + 1] = (vi >> 4) * d + m;
-}
-
-struct block_q4_2
-{
- ushort d;
- uchar qs[8];
-};
-
-__kernel void dequantize_row_q4_2(__global struct block_q4_2* blocks, __global float* result) {
- const uint i = get_global_id(0) / 16;
- const uint l = get_local_id(0);
-
- const float d = vload_half(0, (__global half*) &blocks[i].d);;
-
- const uchar vi = blocks[i].qs[l];
-
- const uint index = i*16 + l*2;
- result[index + 0] = ((vi & 0xf) - 8)*d;
- result[index + 1] = ((vi >> 4) - 8)*d;
-}
-
-);