ggml : add loongarch lsx and lasx support (#6454)

* add loongarch lsx and lasx optimize code * Add loongarch compilation support to makefile * revert stb_image.h * opt bytes_from_nibbles_32 and sum_i16_pairs_float * fix undeclared * format code * update * update 2 --------- Co-authored-by: Jinyang He <hejinyang@loongson.cn>
author: junchao-loongson <68935141+junchao-loongson@users.noreply.github.com> 2024-05-20 15:19:21 +0800
committer: GitHub <noreply@github.com> 2024-05-20 10:19:21 +0300
commit: 65c58207ece92ad213f4bfd0f91dcb2dfb664f5b (patch)
tree: b6fcdb6e1ee912b34e0ce592edc60d71b6cbbc1d /ggml-impl.h
parent: 1cc0155d04918cb3017afa472acea51b77483c4a (diff)
1 files changed, 28 insertions, 0 deletions
diff --git a/ggml-impl.h b/ggml-impl.h
index 5ff014fe..362d40f4 100644
--- a/ggml-impl.h
+++ b/ggml-impl.h
@@ -455,6 +455,34 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
 #include <riscv_vector.h>
 #endif
 
+#if defined(__loongarch64)
+#if defined(__loongarch_asx)
+#include <lasxintrin.h>
+#endif
+#if defined(__loongarch_sx)
+#include <lsxintrin.h>
+#endif
+#endif
+
+#if defined(__loongarch_asx)
+
+typedef union {
+    int32_t i;
+    float f;
+} ft_union;
+
+/* float type data load instructions */
+static __m128 __lsx_vreplfr2vr_s(float val) {
+    ft_union fi_tmpval = {.f = val};
+    return (__m128)__lsx_vreplgr2vr_w(fi_tmpval.i);
+}
+
+static __m256 __lasx_xvreplfr2vr_s(float val) {
+    ft_union fi_tmpval = {.f = val};
+    return (__m256)__lasx_xvreplgr2vr_w(fi_tmpval.i);
+}
+#endif
+
 #ifdef __F16C__
 
 #ifdef _MSC_VER
author	junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>	2024-05-20 15:19:21 +0800
committer	GitHub <noreply@github.com>	2024-05-20 10:19:21 +0300
commit	65c58207ece92ad213f4bfd0f91dcb2dfb664f5b (patch)
tree	b6fcdb6e1ee912b34e0ce592edc60d71b6cbbc1d /ggml-impl.h
parent	1cc0155d04918cb3017afa472acea51b77483c4a (diff)