summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile123
1 files changed, 83 insertions, 40 deletions
diff --git a/Makefile b/Makefile
index f77da312..18860cad 100644
--- a/Makefile
+++ b/Makefile
@@ -19,6 +19,7 @@ BUILD_TARGETS = \
llama-imatrix \
llama-infill \
llama-llava-cli \
+ llama-minicpmv-cli\
llama-lookahead \
llama-lookup \
llama-lookup-create \
@@ -528,10 +529,21 @@ ifndef GGML_NO_ACCELERATE
endif
endif # GGML_NO_ACCELERATE
+ifdef GGML_MUSA
+ CC := clang
+ CXX := clang++
+ GGML_CUDA := 1
+ MK_CPPFLAGS += -DGGML_USE_MUSA
+endif
+
ifndef GGML_NO_OPENMP
MK_CPPFLAGS += -DGGML_USE_OPENMP
MK_CFLAGS += -fopenmp
MK_CXXFLAGS += -fopenmp
+ ifdef GGML_MUSA
+ MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
+ MK_LDFLAGS += -L/usr/lib/llvm-10/lib
+ endif # GGML_MUSA
endif # GGML_NO_OPENMP
ifdef GGML_OPENBLAS
@@ -588,15 +600,27 @@ else
endif # GGML_CUDA_FA_ALL_QUANTS
ifdef GGML_CUDA
- ifneq ('', '$(wildcard /opt/cuda)')
- CUDA_PATH ?= /opt/cuda
+ ifdef GGML_MUSA
+ ifneq ('', '$(wildcard /opt/musa)')
+ CUDA_PATH ?= /opt/musa
+ else
+ CUDA_PATH ?= /usr/local/musa
+ endif
+
+ MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include
+ MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64
+ MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_22
else
- CUDA_PATH ?= /usr/local/cuda
- endif
+ ifneq ('', '$(wildcard /opt/cuda)')
+ CUDA_PATH ?= /opt/cuda
+ else
+ CUDA_PATH ?= /usr/local/cuda
+ endif
- MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
- MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
- MK_NVCCFLAGS += -use_fast_math
+ MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
+ MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
+ MK_NVCCFLAGS += -use_fast_math
+ endif # GGML_MUSA
OBJ_GGML += ggml/src/ggml-cuda.o
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
@@ -606,9 +630,11 @@ ifdef LLAMA_FATAL_WARNINGS
MK_NVCCFLAGS += -Werror all-warnings
endif # LLAMA_FATAL_WARNINGS
+ifndef GGML_MUSA
ifndef JETSON_EOL_MODULE_DETECT
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
endif # JETSON_EOL_MODULE_DETECT
+endif # GGML_MUSA
ifdef LLAMA_DEBUG
MK_NVCCFLAGS += -lineinfo
@@ -621,8 +647,12 @@ endif # GGML_CUDA_DEBUG
ifdef GGML_CUDA_NVCC
NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
else
- NVCC = $(CCACHE) nvcc
-endif #GGML_CUDA_NVCC
+ ifdef GGML_MUSA
+ NVCC = $(CCACHE) mcc
+ else
+ NVCC = $(CCACHE) nvcc
+ endif # GGML_MUSA
+endif # GGML_CUDA_NVCC
ifdef CUDA_DOCKER_ARCH
MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
@@ -693,9 +723,15 @@ define NVCC_COMPILE
$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
endef # NVCC_COMPILE
else
+ ifdef GGML_MUSA
+define NVCC_COMPILE
+ $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -c $< -o $@
+endef # NVCC_COMPILE
+ else
define NVCC_COMPILE
$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
endef # NVCC_COMPILE
+ endif # GGML_MUSA
endif # JETSON_EOL_MODULE_DETECT
ggml/src/ggml-cuda/%.o: \
@@ -859,15 +895,16 @@ ggml/src/ggml-metal-embed.o: \
ggml/src/ggml-common.h
@echo "Embedding Metal library"
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
- $(eval TEMP_ASSEMBLY=$(shell mktemp))
- @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
- @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
- @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
- @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
- @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
- @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
- @$(AS) $(TEMP_ASSEMBLY) -o $@
- @rm -f ${TEMP_ASSEMBLY}
+ $(eval TEMP_ASSEMBLY=$(shell mktemp -d))
+ @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
+ @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
+ @rmdir ${TEMP_ASSEMBLY}
endif
endif # GGML_METAL
@@ -950,6 +987,7 @@ $(info I CXX: $(shell $(CXX) --version | head -n 1))
ifdef GGML_CUDA
$(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
+ifndef GGML_MUSA
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
ifndef CUDA_DOCKER_ARCH
@@ -959,6 +997,7 @@ endif # CUDA_POWER_ARCH
endif # CUDA_DOCKER_ARCH
endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
+endif # GGML_MUSA
endif # GGML_CUDA
$(info )
@@ -1029,7 +1068,6 @@ ggml/src/iqk/iqk_quantize.o: \
ggml/src/iqk/iqk_quantize.cpp \
ggml/src/iqk/iqk_quantize.h \
ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml/include/ggml.h ggml/src/ggml-impl.h
- $(CXX) $(CXXFLAGS) -c $< -o $@
ifndef GGML_NO_IQKMULMAT
ggml/src/iqk/iqk_mul_mat.o: \
@@ -1189,6 +1227,7 @@ clean:
rm -rvf ggml/*.dll
rm -rvf ggml/*.so
rm -vrf ggml/src/*.o
+ rm -rvf ggml/src/llamafile/*.o
rm -rvf common/build-info.cpp
rm -vrf ggml/src/ggml-metal-embed.metal
rm -vrf ggml/src/ggml-cuda/*.o
@@ -1435,15 +1474,20 @@ libllava.a: examples/llava/llava.cpp \
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
llama-llava-cli: examples/llava/llava-cli.cpp \
- examples/llava/clip.h \
- examples/llava/clip.cpp \
+ examples/llava/llava.cpp \
examples/llava/llava.h \
+ examples/llava/clip.cpp \
+ examples/llava/clip.h \
+ $(OBJ_ALL)
+ $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
+
+llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
examples/llava/llava.cpp \
+ examples/llava/llava.h \
+ examples/llava/clip.cpp \
+ examples/llava/clip.h \
$(OBJ_ALL)
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
- $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
- $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
- $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
+ $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
ifeq ($(UNAME_S),Darwin)
swift: examples/batched.swift
@@ -1589,42 +1633,41 @@ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
# Mark legacy binary targets as .PHONY so that they are always checked.
.PHONY: main quantize perplexity embedding server
+# Define the object file target
+examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp
+ $(CXX) $(CXXFLAGS) -c $< -o $@
+
# NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate.
# Eventually we will want to remove these target from building all the time.
-main: examples/deprecation-warning/deprecation-warning.cpp
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
- $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+main: examples/deprecation-warning/deprecation-warning.o
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
@echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead."
-server: examples/deprecation-warning/deprecation-warning.cpp
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+server: examples/deprecation-warning/deprecation-warning.o
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
@echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead."
-quantize: examples/deprecation-warning/deprecation-warning.cpp
+quantize: examples/deprecation-warning/deprecation-warning.o
ifneq (,$(wildcard quantize))
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
@echo "#########"
@echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead."
@echo " Remove the 'quantize' binary to remove this warning."
@echo "#########"
endif
-perplexity: examples/deprecation-warning/deprecation-warning.cpp
+perplexity: examples/deprecation-warning/deprecation-warning.o
ifneq (,$(wildcard perplexity))
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
@echo "#########"
@echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead."
@echo " Remove the 'perplexity' binary to remove this warning."
@echo "#########"
endif
-embedding: examples/deprecation-warning/deprecation-warning.cpp
+embedding: examples/deprecation-warning/deprecation-warning.o
ifneq (,$(wildcard embedding))
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
@echo "#########"
@echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead."
@echo " Remove the 'embedding' binary to remove this warning."