From 7c7836d9d4062d6858e3fb337b135c417ccee6ce Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Sun, 16 Jun 2024 07:17:31 +0200 Subject: Vulkan Shader Refactor, Memory Debugging Option (#7947) * Refactor shaders, extract GLSL code from ggml_vk_generate_shaders.py into vulkan-shaders directory * Improve debug log code * Add memory debug output option * Fix flake8 * Fix unnecessary high llama-3 VRAM use --- vulkan-shaders/dequant_funcs.comp | 60 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 vulkan-shaders/dequant_funcs.comp (limited to 'vulkan-shaders/dequant_funcs.comp') diff --git a/vulkan-shaders/dequant_funcs.comp b/vulkan-shaders/dequant_funcs.comp new file mode 100644 index 00000000..35d424d1 --- /dev/null +++ b/vulkan-shaders/dequant_funcs.comp @@ -0,0 +1,60 @@ +#if !defined(DATA_A_F32) && !defined(DATA_A_F16) +#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#endif + +#if defined(DATA_A_F32) +vec2 dequantize(uint ib, uint iqs, uint a_offset) { + return vec2(data_a[a_offset + ib], data_a[a_offset + ib + 1]); +} +#endif + +#if defined(DATA_A_F16) +vec2 dequantize(uint ib, uint iqs, uint a_offset) { + return vec2(data_a[a_offset + ib], data_a[a_offset + ib + 1]); +} +#endif + +#if defined(DATA_A_Q4_0) +vec2 dequantize(uint ib, uint iqs, uint a_offset) { + const float d = float(data_a[a_offset + ib].d); + const uint vui = uint(data_a[a_offset + ib].qs[iqs]); + return (vec2(vui & 0xF, vui >> 4) - 8.0f) * d; +} +#endif + +#if defined(DATA_A_Q4_1) +vec2 dequantize(uint ib, uint iqs, uint a_offset) { + const float d = float(data_a[a_offset + ib].d); + const float m = float(data_a[a_offset + ib].m); + const uint vui = uint(data_a[a_offset + ib].qs[iqs]); + return vec2(vui & 0xF, vui >> 4) * d + m; +} +#endif + +#if defined(DATA_A_Q5_0) +vec2 dequantize(uint ib, uint iqs, uint a_offset) { + const float d = float(data_a[a_offset + ib].d); + const uint uint_qh = uint(data_a[a_offset + ib].qh[1]) << 16 | data_a[a_offset + ib].qh[0]; + const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10); + const uint vui = uint(data_a[a_offset + ib].qs[iqs]); + return (vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y) - 16.0f) * d; +} +#endif + +#if defined(DATA_A_Q5_1) +vec2 dequantize(uint ib, uint iqs, uint a_offset) { + const float d = float(data_a[a_offset + ib].d); + const float m = float(data_a[a_offset + ib].m); + const uint uint_qh = data_a[a_offset + ib].qh; + const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10); + const uint vui = uint(data_a[a_offset + ib].qs[iqs]); + return vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y) * d + m; +} +#endif + +#if defined(DATA_A_Q8_0) +vec2 dequantize(uint ib, uint iqs, uint a_offset) { + const float d = float(data_a[a_offset + ib].d); + return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1])) * d; +} +#endif -- cgit v1.2.3