diff options
Diffstat (limited to 'Makefile')
| -rw-r--r-- | Makefile | 123 |
1 files changed, 83 insertions, 40 deletions
@@ -19,6 +19,7 @@ BUILD_TARGETS = \ llama-imatrix \ llama-infill \ llama-llava-cli \ + llama-minicpmv-cli\ llama-lookahead \ llama-lookup \ llama-lookup-create \ @@ -528,10 +529,21 @@ ifndef GGML_NO_ACCELERATE endif endif # GGML_NO_ACCELERATE +ifdef GGML_MUSA + CC := clang + CXX := clang++ + GGML_CUDA := 1 + MK_CPPFLAGS += -DGGML_USE_MUSA +endif + ifndef GGML_NO_OPENMP MK_CPPFLAGS += -DGGML_USE_OPENMP MK_CFLAGS += -fopenmp MK_CXXFLAGS += -fopenmp + ifdef GGML_MUSA + MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp + MK_LDFLAGS += -L/usr/lib/llvm-10/lib + endif # GGML_MUSA endif # GGML_NO_OPENMP ifdef GGML_OPENBLAS @@ -588,15 +600,27 @@ else endif # GGML_CUDA_FA_ALL_QUANTS ifdef GGML_CUDA - ifneq ('', '$(wildcard /opt/cuda)') - CUDA_PATH ?= /opt/cuda + ifdef GGML_MUSA + ifneq ('', '$(wildcard /opt/musa)') + CUDA_PATH ?= /opt/musa + else + CUDA_PATH ?= /usr/local/musa + endif + + MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include + MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64 + MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_22 else - CUDA_PATH ?= /usr/local/cuda - endif + ifneq ('', '$(wildcard /opt/cuda)') + CUDA_PATH ?= /opt/cuda + else + CUDA_PATH ?= /usr/local/cuda + endif - MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS - MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib - MK_NVCCFLAGS += -use_fast_math + MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS + MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib + MK_NVCCFLAGS += -use_fast_math + endif # GGML_MUSA OBJ_GGML += ggml/src/ggml-cuda.o OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) @@ -606,9 +630,11 @@ ifdef LLAMA_FATAL_WARNINGS MK_NVCCFLAGS += -Werror all-warnings endif # LLAMA_FATAL_WARNINGS +ifndef GGML_MUSA ifndef JETSON_EOL_MODULE_DETECT MK_NVCCFLAGS += --forward-unknown-to-host-compiler endif # JETSON_EOL_MODULE_DETECT +endif # GGML_MUSA ifdef LLAMA_DEBUG MK_NVCCFLAGS += -lineinfo @@ -621,8 +647,12 @@ endif # GGML_CUDA_DEBUG ifdef GGML_CUDA_NVCC NVCC = $(CCACHE) $(GGML_CUDA_NVCC) else - NVCC = $(CCACHE) nvcc -endif #GGML_CUDA_NVCC + ifdef GGML_MUSA + NVCC = $(CCACHE) mcc + else + NVCC = $(CCACHE) nvcc + endif # GGML_MUSA +endif # GGML_CUDA_NVCC ifdef CUDA_DOCKER_ARCH MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) @@ -693,9 +723,15 @@ define NVCC_COMPILE $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ endef # NVCC_COMPILE else + ifdef GGML_MUSA +define NVCC_COMPILE + $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -c $< -o $@ +endef # NVCC_COMPILE + else define NVCC_COMPILE $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ endef # NVCC_COMPILE + endif # GGML_MUSA endif # JETSON_EOL_MODULE_DETECT ggml/src/ggml-cuda/%.o: \ @@ -859,15 +895,16 @@ ggml/src/ggml-metal-embed.o: \ ggml/src/ggml-common.h @echo "Embedding Metal library" @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal - $(eval TEMP_ASSEMBLY=$(shell mktemp)) - @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY) - @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY) - @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY) - @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY) - @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY) - @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY) - @$(AS) $(TEMP_ASSEMBLY) -o $@ - @rm -f ${TEMP_ASSEMBLY} + $(eval TEMP_ASSEMBLY=$(shell mktemp -d)) + @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@ + @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s + @rmdir ${TEMP_ASSEMBLY} endif endif # GGML_METAL @@ -950,6 +987,7 @@ $(info I CXX: $(shell $(CXX) --version | head -n 1)) ifdef GGML_CUDA $(info I NVCC: $(shell $(NVCC) --version | tail -n 1)) CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])') +ifndef GGML_MUSA ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1) ifndef CUDA_DOCKER_ARCH @@ -959,6 +997,7 @@ endif # CUDA_POWER_ARCH endif # CUDA_DOCKER_ARCH endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1) +endif # GGML_MUSA endif # GGML_CUDA $(info ) @@ -1029,7 +1068,6 @@ ggml/src/iqk/iqk_quantize.o: \ ggml/src/iqk/iqk_quantize.cpp \ ggml/src/iqk/iqk_quantize.h \ ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml/include/ggml.h ggml/src/ggml-impl.h - $(CXX) $(CXXFLAGS) -c $< -o $@ ifndef GGML_NO_IQKMULMAT ggml/src/iqk/iqk_mul_mat.o: \ @@ -1189,6 +1227,7 @@ clean: rm -rvf ggml/*.dll rm -rvf ggml/*.so rm -vrf ggml/src/*.o + rm -rvf ggml/src/llamafile/*.o rm -rvf common/build-info.cpp rm -vrf ggml/src/ggml-metal-embed.metal rm -vrf ggml/src/ggml-cuda/*.o @@ -1435,15 +1474,20 @@ libllava.a: examples/llava/llava.cpp \ $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual llama-llava-cli: examples/llava/llava-cli.cpp \ - examples/llava/clip.h \ - examples/llava/clip.cpp \ + examples/llava/llava.cpp \ examples/llava/llava.h \ + examples/llava/clip.cpp \ + examples/llava/clip.h \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual + +llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \ examples/llava/llava.cpp \ + examples/llava/llava.h \ + examples/llava/clip.cpp \ + examples/llava/clip.h \ $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual - $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp) - $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual ifeq ($(UNAME_S),Darwin) swift: examples/batched.swift @@ -1589,42 +1633,41 @@ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \ # Mark legacy binary targets as .PHONY so that they are always checked. .PHONY: main quantize perplexity embedding server +# Define the object file target +examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + # NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate. # Eventually we will want to remove these target from building all the time. -main: examples/deprecation-warning/deprecation-warning.cpp - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) +main: examples/deprecation-warning/deprecation-warning.o + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) @echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead." -server: examples/deprecation-warning/deprecation-warning.cpp - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) +server: examples/deprecation-warning/deprecation-warning.o + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) @echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead." -quantize: examples/deprecation-warning/deprecation-warning.cpp +quantize: examples/deprecation-warning/deprecation-warning.o ifneq (,$(wildcard quantize)) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) @echo "#########" @echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead." @echo " Remove the 'quantize' binary to remove this warning." @echo "#########" endif -perplexity: examples/deprecation-warning/deprecation-warning.cpp +perplexity: examples/deprecation-warning/deprecation-warning.o ifneq (,$(wildcard perplexity)) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) @echo "#########" @echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead." @echo " Remove the 'perplexity' binary to remove this warning." @echo "#########" endif -embedding: examples/deprecation-warning/deprecation-warning.cpp +embedding: examples/deprecation-warning/deprecation-warning.o ifneq (,$(wildcard embedding)) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) @echo "#########" @echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead." @echo " Remove the 'embedding' binary to remove this warning." |
