summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorautomaticcat <daogiatuank54@gmail.com>2023-12-30 15:07:48 +0700
committerGitHub <noreply@github.com>2023-12-30 10:07:48 +0200
commit24a447e20af425fa44cf10feaa632b6bb596c80f (patch)
tree66612870e12dd4be3b05f47371c3a0c5e8346da7
parenta20f3c7465d6d1b33767757c2760643b799a81bf (diff)
ggml : add ggml_cpu_has_avx_vnni() (#4589)
* feat: add avx_vnni based on intel documents * ggml: add avx vnni based on intel document * llama: add avx vnni information display * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * Update ggml.c Fix indentation upgate Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
-rw-r--r--README.md30
-rw-r--r--common/common.cpp1
-rw-r--r--ggml.c8
-rw-r--r--ggml.h1
-rw-r--r--llama.cpp1
5 files changed, 33 insertions, 8 deletions
diff --git a/README.md b/README.md
index 48dcd646..ca6d14e1 100644
--- a/README.md
+++ b/README.md
@@ -385,16 +385,30 @@ Building the program with BLAS support may lead to some performance improvements
Check [BLIS.md](docs/BLIS.md) for more information.
-- #### Intel MKL
+- #### Intel oneMKL
+ - Using manual oneAPI installation:
+ By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps:
+ ```bash
+ mkdir build
+ cd build
+ source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-runtime docker image, only required for manual installation
+ cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
+ cmake --build . --config Release
+ ```
- By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. You may also specify it by:
+ - Using oneAPI docker image:
+ If you do not want to source the environment vars and install oneAPI manually, you can also build the code using intel docker container: [oneAPI-runtime](https://hub.docker.com/r/intel/oneapi-runtime)
- ```bash
- mkdir build
- cd build
- cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
- cmake --build . --config Release
- ```
+ ```bash
+ mkdir build
+ cd build
+ cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
+ cmake --build . --config Release
+ ```
+
+ Building through oneAPI compilers will make avx_vnni instruction set available for intel processors that do not support avx512 and avx512_vnni.
+
+ Check [Optimizing and Running LLaMA2 on IntelĀ® CPU](https://www.intel.com/content/www/us/en/content-details/791610/optimizing-and-running-llama2-on-intel-cpu.html) for more information.
- #### cuBLAS
diff --git a/common/common.cpp b/common/common.cpp
index b3425ab0..eacaee18 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1394,6 +1394,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
fprintf(stream, "build_number: %d\n", LLAMA_BUILD_NUMBER);
fprintf(stream, "cpu_has_arm_fma: %s\n", ggml_cpu_has_arm_fma() ? "true" : "false");
fprintf(stream, "cpu_has_avx: %s\n", ggml_cpu_has_avx() ? "true" : "false");
+ fprintf(stream, "cpu_has_avx_vnni: %s\n", ggml_cpu_has_avx_vnni() ? "true" : "false");
fprintf(stream, "cpu_has_avx2: %s\n", ggml_cpu_has_avx2() ? "true" : "false");
fprintf(stream, "cpu_has_avx512: %s\n", ggml_cpu_has_avx512() ? "true" : "false");
fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false");
diff --git a/ggml.c b/ggml.c
index a9e1ea9b..bcec200f 100644
--- a/ggml.c
+++ b/ggml.c
@@ -19638,6 +19638,14 @@ int ggml_cpu_has_avx(void) {
#endif
}
+int ggml_cpu_has_avx_vnni(void) {
+#if defined(__AVXVNNI__)
+ return 1;
+#else
+ return 0;
+#endif
+}
+
int ggml_cpu_has_avx2(void) {
#if defined(__AVX2__)
return 1;
diff --git a/ggml.h b/ggml.h
index 67d6bc4f..64f4e45e 100644
--- a/ggml.h
+++ b/ggml.h
@@ -2198,6 +2198,7 @@ extern "C" {
//
GGML_API int ggml_cpu_has_avx (void);
+ GGML_API int ggml_cpu_has_avx_vnni (void);
GGML_API int ggml_cpu_has_avx2 (void);
GGML_API int ggml_cpu_has_avx512 (void);
GGML_API int ggml_cpu_has_avx512_vbmi(void);
diff --git a/llama.cpp b/llama.cpp
index 68c7cced..a833d4c1 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -10780,6 +10780,7 @@ const char * llama_print_system_info(void) {
s = "";
s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | ";
+ s += "AVX_VNNI = " + std::to_string(ggml_cpu_has_avx_vnni()) + " | ";
s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | ";
s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | ";
s += "AVX512_VBMI = " + std::to_string(ggml_cpu_has_avx512_vbmi()) + " | ";