diff options
author | FantasyGmm <16450052+FantasyGmm@users.noreply.github.com> | 2023-12-22 23:11:12 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-22 17:11:12 +0200 |
commit | a55876955b1a83464171de8d578d3ab062a7b62d (patch) | |
tree | 38b9d438a1fc56f3d2f53966673f42a2c70ae2f6 /ggml-quants.c | |
parent | 6724ef16573ec7ecce620be56cbbff145856b2fb (diff) |
cuda : fix jetson compile error (#4560)
* fix old jetson compile error
* Update Makefile
* update jetson detect and cuda version detect
* update cuda marco define
* update makefile and cuda,fix some issue
* Update README.md
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update Makefile
* Update README.md
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'ggml-quants.c')
-rw-r--r-- | ggml-quants.c | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/ggml-quants.c b/ggml-quants.c index 0e8163a1..a15a2404 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -3677,7 +3677,7 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri const uint8x16_t mins = vshrq_n_u8(mins_and_scales, 4); const ggml_int16x8x2_t q8sums = ggml_vld1q_s16_x2(y[i].bsums); - const ggml_int16x8x2_t mins16 = {vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mins))), vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mins)))}; + const ggml_int16x8x2_t mins16 = {{vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mins))), vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mins)))}}; const int32x4_t s0 = vaddq_s32(vmull_s16(vget_low_s16 (mins16.val[0]), vget_low_s16 (q8sums.val[0])), vmull_s16(vget_high_s16(mins16.val[0]), vget_high_s16(q8sums.val[0]))); const int32x4_t s1 = vaddq_s32(vmull_s16(vget_low_s16 (mins16.val[1]), vget_low_s16 (q8sums.val[1])), @@ -6626,7 +6626,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri const ggml_int16x8x2_t q8sums = ggml_vld1q_s16_x2(y[i].bsums); const int8x16_t scales = vld1q_s8(scale); - const ggml_int16x8x2_t q6scales = {vmovl_s8(vget_low_s8(scales)), vmovl_s8(vget_high_s8(scales))}; + const ggml_int16x8x2_t q6scales = {{vmovl_s8(vget_low_s8(scales)), vmovl_s8(vget_high_s8(scales))}}; const int32x4_t prod = vaddq_s32(vaddq_s32(vmull_s16(vget_low_s16 (q8sums.val[0]), vget_low_s16 (q6scales.val[0])), vmull_s16(vget_high_s16(q8sums.val[0]), vget_high_s16(q6scales.val[0]))), |