make : add LLAMA_HIP_UMA option (#4587)

NB: LLAMA_HIP_UMA=1 (or any value) adds MK_CPPFLAG -DGGML_HIP_UMA
author: Michael Kesper <mkesper@schokokeks.org> 2023-12-22 09:03:25 +0100
committer: GitHub <noreply@github.com> 2023-12-22 10:03:25 +0200
commit: 28cb35a0ecb9852adc3494aa51dde60141939d64 (patch)
tree: 724257ed94bacb18a7f68bfeaf81fe3f4e4c819c
parent: f31b98489824a86c937fa62ccf5dfd4bb0327b86 (diff)
2 files changed, 10 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index 68df7702..42686ce7 100644
--- a/Makefile
+++ b/Makefile
@@ -452,6 +452,9 @@ ifdef LLAMA_HIPBLAS
 	LLAMA_CUDA_MMV_Y        ?= 1
 	LLAMA_CUDA_KQUANTS_ITER ?= 2
 	MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
+ifdef LLAMA_HIP_UMA
+	MK_CPPFLAGS += -DGGML_HIP_UMA
+endif # LLAMA_HIP_UMA
 	MK_LDFLAGS  += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
 	MK_LDFLAGS	+= -lhipblas -lamdhip64 -lrocblas
 	HIPFLAGS    += $(addprefix --offload-arch=,$(GPU_TARGETS))
diff --git a/README.md b/README.md
index 8e17d5ba..377d3928 100644
--- a/README.md
+++ b/README.md
@@ -440,7 +440,13 @@ Building the program with BLAS support may lead to some performance improvements
         && cmake --build build -- -j 16
     ```
     On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON"`.
-    However, this hurts performance for non-integrated GPUs.
+    However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs).
+
+  - Using `make` (example for target gfx1030, build with 16 CPU threads):
+    ```bash
+    make -j16 LLAMA_HIPBLAS=1 LLAMA_HIP_UMA=1 AMDGPU_TARGETS=gxf1030
+    ```
+
   - Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU):
     ```bash
     set PATH=%HIP_PATH%\bin;%PATH%
author	Michael Kesper <mkesper@schokokeks.org>	2023-12-22 09:03:25 +0100
committer	GitHub <noreply@github.com>	2023-12-22 10:03:25 +0200
commit	28cb35a0ecb9852adc3494aa51dde60141939d64 (patch)
tree	724257ed94bacb18a7f68bfeaf81fe3f4e4c819c
parent	f31b98489824a86c937fa62ccf5dfd4bb0327b86 (diff)