summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.devops/nix/package.nix16
-rw-r--r--.github/workflows/build.yml58
-rw-r--r--CMakeLists.txt42
-rw-r--r--Makefile6
-rw-r--r--README.md25
5 files changed, 122 insertions, 25 deletions
diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 2c0ae4e2..1c9633cd 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -227,20 +227,20 @@ effectiveStdenv.mkDerivation (
)
]
++ optionals useRocm [
- (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
- (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
-
- # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
- # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
- # and select the line that matches the current nixpkgs version of rocBLAS.
- # Should likely use `rocmPackages.clr.gpuTargets`.
- "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
+ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
+ (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
]
++ optionals useMetalKit [
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
(cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
];
+ # Environment variables needed for ROCm
+ env = optionals useRocm {
+ ROCM_PATH = "${rocmPackages.clr}";
+ HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
+ };
+
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
# if they haven't been added yet.
postInstall = ''
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 0742443c..0109cc00 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -392,6 +392,33 @@ jobs:
cmake -DLLAMA_VULKAN=ON ..
cmake --build . --config Release -j $(nproc)
+ ubuntu-22-cmake-hip:
+ runs-on: ubuntu-22.04
+ container: rocm/dev-ubuntu-22.04:6.0.2
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v3
+
+ - name: Dependencies
+ id: depends
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
+
+ - name: Build with native CMake HIP support
+ id: cmake_build
+ run: |
+ cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DLLAMA_HIPBLAS=ON
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Build with legacy HIP support
+ id: cmake_build_legacy_hip
+ run: |
+ cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DLLAMA_HIPBLAS=ON
+ cmake --build build2 --config Release -j $(nproc)
+
ubuntu-22-cmake-sycl:
runs-on: ubuntu-22.04
@@ -989,6 +1016,37 @@ jobs:
path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
name: llama-bin-win-sycl-x64.zip
+ windows-latest-cmake-hip:
+ runs-on: windows-latest
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v3
+
+ - name: Install
+ id: depends
+ run: |
+ $ErrorActionPreference = "Stop"
+ write-host "Downloading AMD HIP SDK Installer"
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+ write-host "Installing AMD HIP SDK"
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
+ write-host "Completed AMD HIP SDK installation"
+
+ - name: Verify ROCm
+ id: verify
+ run: |
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
+
+ - name: Build
+ id: cmake_build
+ run: |
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
+ cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DLLAMA_HIPBLAS=ON
+ cmake --build build --config Release
+
ios-xcode-build:
runs-on: macos-latest
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8ab6a45a..990e34b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -555,16 +555,37 @@ if (LLAMA_VULKAN)
endif()
if (LLAMA_HIPBLAS)
- list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
-
- if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
- message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
+ if ($ENV{ROCM_PATH})
+ set(ROCM_PATH $ENV{ROCM_PATH})
+ else()
+ set(ROCM_PATH /opt/rocm)
endif()
+ list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
- if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
- message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
+ # CMake on Windows doesn't support the HIP language yet
+ if(WIN32)
+ set(CXX_IS_HIPCC TRUE)
+ else()
+ string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}")
endif()
+ if(CXX_IS_HIPCC)
+ if(LINUX)
+ if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
+ message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
+ endif()
+
+ message(WARNING "Setting hipcc as the C++ compiler is legacy behavior."
+ " Prefer setting the HIP compiler directly. See README for details.")
+ endif()
+ else()
+ # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES.
+ if(AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
+ set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_ARGETS})
+ endif()
+ cmake_minimum_required(VERSION 3.21)
+ enable_language(HIP)
+ endif()
find_package(hip REQUIRED)
find_package(hipblas REQUIRED)
find_package(rocblas REQUIRED)
@@ -598,13 +619,18 @@ if (LLAMA_HIPBLAS)
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
- set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
+ if (CXX_IS_HIPCC)
+ set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device)
+ else()
+ set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
+ endif()
if (LLAMA_STATIC)
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
endif()
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas)
endif()
if (LLAMA_SYCL)
diff --git a/Makefile b/Makefile
index 3fa56d13..22d52185 100644
--- a/Makefile
+++ b/Makefile
@@ -560,10 +560,10 @@ endif # LLAMA_VULKAN
ifdef LLAMA_HIPBLAS
ifeq ($(wildcard /opt/rocm),)
ROCM_PATH ?= /usr
- GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
+ AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
else
ROCM_PATH ?= /opt/rocm
- GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
+ AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
endif
HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
LLAMA_CUDA_DMMV_X ?= 32
@@ -575,7 +575,7 @@ ifdef LLAMA_HIP_UMA
endif # LLAMA_HIP_UMA
MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
- HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
+ HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
diff --git a/README.md b/README.md
index 5d6217d1..7dd6fc0e 100644
--- a/README.md
+++ b/README.md
@@ -528,13 +528,28 @@ Building the program with BLAS support may lead to some performance improvements
```
- Using `CMake` for Linux (assuming a gfx1030-compatible AMD GPU):
```bash
- CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ \
- cmake -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
+ HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
+ cmake -S . -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
&& cmake --build build --config Release -- -j 16
```
On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON`.
However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs).
+ Note that if you get the following error:
+ ```
+ clang: error: cannot find ROCm device library; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
+ ```
+ Try searching for a directory under `HIP_PATH` that contains the file
+ `oclc_abi_version_400.bc`. Then, add the following to the start of the
+ command: `HIP_DEVICE_LIB_PATH=<directory-you-just-found>`, so something
+ like:
+ ```bash
+ HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -p)" \
+ HIP_DEVICE_LIB_PATH=<directory-you-just-found> \
+ cmake -S . -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
+ && cmake --build build -- -j 16
+ ```
+
- Using `make` (example for target gfx1030, build with 16 CPU threads):
```bash
make -j16 LLAMA_HIPBLAS=1 LLAMA_HIP_UMA=1 AMDGPU_TARGETS=gfx1030
@@ -543,10 +558,8 @@ Building the program with BLAS support may lead to some performance improvements
- Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU):
```bash
set PATH=%HIP_PATH%\bin;%PATH%
- mkdir build
- cd build
- cmake -G Ninja -DAMDGPU_TARGETS=gfx1100 -DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release ..
- cmake --build .
+ cmake -S . -B build -G Ninja -DAMDGPU_TARGETS=gfx1100 -DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release
+ cmake --build build
```
Make sure that `AMDGPU_TARGETS` is set to the GPU arch you want to compile for. The above example uses `gfx1100` that corresponds to Radeon RX 7900XTX/XT/GRE. You can find a list of targets [here](https://llvm.org/docs/AMDGPUUsage.html#processors)
Find your gpu version string by matching the most significant version information from `rocminfo | grep gfx | head -1 | awk '{print $2}'` with the list of processors, e.g. `gfx1035` maps to `gfx1030`.