summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/LlamaConfig.cmake.in61
-rw-r--r--scripts/build-info.cmake58
-rwxr-xr-xscripts/build-info.sh10
-rwxr-xr-xscripts/check-requirements.sh26
-rwxr-xr-xscripts/compare-commits.sh2
-rwxr-xr-xscripts/compare-llama-bench.py8
-rwxr-xr-xscripts/convert-gg.sh26
-rwxr-xr-xscripts/debug-test.sh2
-rw-r--r--scripts/gen-build-info-cpp.cmake24
-rw-r--r--scripts/gen-unicode-data.py16
-rw-r--r--scripts/pod-llama.sh31
-rw-r--r--scripts/server-llm.sh2
-rwxr-xr-xscripts/sync-ggml-am.sh135
-rw-r--r--scripts/sync-ggml.last2
-rwxr-xr-xscripts/sync-ggml.sh69
15 files changed, 164 insertions, 308 deletions
diff --git a/scripts/LlamaConfig.cmake.in b/scripts/LlamaConfig.cmake.in
deleted file mode 100644
index 9311055d..00000000
--- a/scripts/LlamaConfig.cmake.in
+++ /dev/null
@@ -1,61 +0,0 @@
-set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@)
-set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
-set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
-set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
-set(LLAMA_BLAS @LLAMA_BLAS@)
-set(LLAMA_CUDA @LLAMA_CUDA@)
-set(LLAMA_METAL @LLAMA_METAL@)
-set(LLAMA_HIPBLAS @LLAMA_HIPBLAS@)
-set(LLAMA_ACCELERATE @LLAMA_ACCELERATE@)
-
-@PACKAGE_INIT@
-
-set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
-set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
-set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
-
-# Ensure transient dependencies satisfied
-
-find_package(Threads REQUIRED)
-if (APPLE AND LLAMA_ACCELERATE)
- find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
-endif()
-
-if (LLAMA_BLAS)
- find_package(BLAS REQUIRED)
-endif()
-
-if (LLAMA_CUDA)
- find_package(CUDAToolkit REQUIRED)
-endif()
-
-if (LLAMA_METAL)
- find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
- find_library(METAL_FRAMEWORK Metal REQUIRED)
- find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
-endif()
-
-if (LLAMA_HIPBLAS)
- find_package(hip REQUIRED)
- find_package(hipblas REQUIRED)
- find_package(rocblas REQUIRED)
-endif()
-
-find_library(llama_LIBRARY llama
- REQUIRED
- HINTS ${LLAMA_LIB_DIR})
-
-set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@")
-set(_llama_transient_defines "@LLAMA_TRANSIENT_DEFINES@")
-add_library(llama UNKNOWN IMPORTED)
-set_target_properties(llama
- PROPERTIES
- INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
- INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
- INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
- IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
- IMPORTED_LOCATION "${llama_LIBRARY}"
- INTERFACE_COMPILE_FEATURES cxx_std_11
- POSITION_INDEPENDENT_CODE ON )
-
-check_required_components(Llama)
diff --git a/scripts/build-info.cmake b/scripts/build-info.cmake
deleted file mode 100644
index ea3dc55c..00000000
--- a/scripts/build-info.cmake
+++ /dev/null
@@ -1,58 +0,0 @@
-set(BUILD_NUMBER 0)
-set(BUILD_COMMIT "unknown")
-set(BUILD_COMPILER "unknown")
-set(BUILD_TARGET "unknown")
-
-# Look for git
-find_package(Git)
-if(NOT Git_FOUND)
- find_program(GIT_EXECUTABLE NAMES git git.exe)
- if(GIT_EXECUTABLE)
- set(Git_FOUND TRUE)
- message(STATUS "Found Git: ${GIT_EXECUTABLE}")
- else()
- message(WARNING "Git not found. Build info will not be accurate.")
- endif()
-endif()
-
-# Get the commit count and hash
-if(Git_FOUND)
- execute_process(
- COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- OUTPUT_VARIABLE HEAD
- OUTPUT_STRIP_TRAILING_WHITESPACE
- RESULT_VARIABLE RES
- )
- if (RES EQUAL 0)
- set(BUILD_COMMIT ${HEAD})
- endif()
- execute_process(
- COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- OUTPUT_VARIABLE COUNT
- OUTPUT_STRIP_TRAILING_WHITESPACE
- RESULT_VARIABLE RES
- )
- if (RES EQUAL 0)
- set(BUILD_NUMBER ${COUNT})
- endif()
-endif()
-
-if(MSVC)
- set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
- set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
-else()
- execute_process(
- COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
- OUTPUT_VARIABLE OUT
- OUTPUT_STRIP_TRAILING_WHITESPACE
- )
- set(BUILD_COMPILER ${OUT})
- execute_process(
- COMMAND ${CMAKE_C_COMPILER} -dumpmachine
- OUTPUT_VARIABLE OUT
- OUTPUT_STRIP_TRAILING_WHITESPACE
- )
- set(BUILD_TARGET ${OUT})
-endif()
diff --git a/scripts/build-info.sh b/scripts/build-info.sh
index 32682afb..fa9e7bac 100755
--- a/scripts/build-info.sh
+++ b/scripts/build-info.sh
@@ -8,20 +8,20 @@ build_compiler="unknown"
build_target="unknown"
if out=$(git rev-list --count HEAD); then
- # git is broken on WSL so we need to strip extra newlines
- build_number=$(printf '%s' "$out" | tr -d '\n')
+ # git is broken on WSL so we need to strip extra newlines
+ build_number=$(printf '%s' "$out" | tr -d '\n')
fi
if out=$(git rev-parse --short HEAD); then
- build_commit=$(printf '%s' "$out" | tr -d '\n')
+ build_commit=$(printf '%s' "$out" | tr -d '\n')
fi
if out=$($CC --version | head -1); then
- build_compiler=$out
+ build_compiler=$out
fi
if out=$($CC -dumpmachine); then
- build_target=$out
+ build_target=$out
fi
echo "int LLAMA_BUILD_NUMBER = ${build_number};"
diff --git a/scripts/check-requirements.sh b/scripts/check-requirements.sh
index 0c6afdd5..d3bbded1 100755
--- a/scripts/check-requirements.sh
+++ b/scripts/check-requirements.sh
@@ -97,9 +97,9 @@ check_requirements() {
}
check_convert_script() {
- local py=$1 # e.g. ./convert-hf-to-gguf.py
- local pyname=${py##*/} # e.g. convert-hf-to-gguf.py
- pyname=${pyname%.py} # e.g. convert-hf-to-gguf
+ local py=$1 # e.g. ./convert_hf_to_gguf.py
+ local pyname=${py##*/} # e.g. convert_hf_to_gguf.py
+ pyname=${pyname%.py} # e.g. convert_hf_to_gguf
info "$py: beginning check"
@@ -108,6 +108,11 @@ check_convert_script() {
fatal "$py missing requirements. Expected: $reqs"
fi
+ # Check that all sub-requirements are added to top-level requirements.txt
+ if ! grep -qF "$reqs" requirements.txt; then
+ fatal "$reqs needs to be added to requirements.txt"
+ fi
+
local venv="$workdir/$pyname-venv"
python3 -m venv "$venv"
@@ -134,12 +139,7 @@ EOF
readonly ignore_eq_eq='check_requirements: ignore "=="'
-for req in "$reqs_dir"/*; do
- # Check that all sub-requirements are added to top-level requirements.txt
- if ! grep -qF "$req" requirements.txt; then
- fatal "$req needs to be added to requirements.txt"
- fi
-
+for req in */**/requirements*.txt; do
# Make sure exact release versions aren't being pinned in the requirements
# Filters out the ignore string
if grep -vF "$ignore_eq_eq" "$req" | grep -q '=='; then
@@ -166,12 +166,12 @@ if (( do_cleanup )); then
rm -rf -- "$all_venv"
fi
-check_convert_script examples/convert-legacy-llama.py
-for py in convert-*.py; do
- # skip convert-hf-to-gguf-update.py
+check_convert_script examples/convert_legacy_llama.py
+for py in convert_*.py; do
+ # skip convert_hf_to_gguf_update.py
# TODO: the check is failing for some reason:
# https://github.com/ggerganov/llama.cpp/actions/runs/8875330981/job/24364557177?pr=6920
- [[ $py == convert-hf-to-gguf-update.py ]] && continue
+ [[ $py == convert_hf_to_gguf_update.py ]] && continue
check_convert_script "$py"
done
diff --git a/scripts/compare-commits.sh b/scripts/compare-commits.sh
index a45cd396..70679f4e 100755
--- a/scripts/compare-commits.sh
+++ b/scripts/compare-commits.sh
@@ -12,7 +12,7 @@ bench_args="${@:3}"
rm -f llama-bench.sqlite > /dev/null
-# to test a backend, call the script with the corresponding environment variable (e.g. LLAMA_CUDA=1 ./scripts/compare-commits.sh ...)
+# to test a backend, call the script with the corresponding environment variable (e.g. GGML_CUDA=1 ./scripts/compare-commits.sh ...)
git checkout $1 > /dev/null
make clean > /dev/null
diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py
index 513dde5e..92b9e682 100755
--- a/scripts/compare-llama-bench.py
+++ b/scripts/compare-llama-bench.py
@@ -123,13 +123,13 @@ builds = cursor.execute("SELECT DISTINCT build_commit FROM test;").fetchall()
try:
repo = git.Repo(".", search_parent_directories=True)
-except git.exc.InvalidGitRepositoryError:
+except git.InvalidGitRepositoryError:
repo = None
-def find_parent_in_data(commit):
+def find_parent_in_data(commit: git.Commit):
"""Helper function to find the most recent parent measured in number of commits for which there is data."""
- heap = [(0, commit)]
+ heap: list[tuple[int, git.Commit]] = [(0, commit)]
seen_hexsha8 = set()
while heap:
depth, current_commit = heapq.heappop(heap)
@@ -144,7 +144,7 @@ def find_parent_in_data(commit):
return None
-def get_all_parent_hexsha8s(commit):
+def get_all_parent_hexsha8s(commit: git.Commit):
"""Helper function to recursively get hexsha8 values for all parents of a commit."""
unvisited = [commit]
visited = []
diff --git a/scripts/convert-gg.sh b/scripts/convert-gg.sh
deleted file mode 100755
index 8a016843..00000000
--- a/scripts/convert-gg.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# LLaMA v1
-python3 examples/convert-legacy-llama.py ../llama1/7B --outfile models/llama-7b/ggml-model-f16.gguf --outtype f16
-python3 examples/convert-legacy-llama.py ../llama1/13B --outfile models/llama-13b/ggml-model-f16.gguf --outtype f16
-python3 examples/convert-legacy-llama.py ../llama1/30B --outfile models/llama-30b/ggml-model-f16.gguf --outtype f16
-python3 examples/convert-legacy-llama.py ../llama1/65B --outfile models/llama-65b/ggml-model-f16.gguf --outtype f16
-
-# LLaMA v2
-python3 examples/convert-legacy-llama.py ../llama2/llama-2-7b --outfile models/llama-7b-v2/ggml-model-f16.gguf --outtype f16
-python3 examples/convert-legacy-llama.py ../llama2/llama-2-13b --outfile models/llama-13b-v2/ggml-model-f16.gguf --outtype f16
-python3 examples/convert-legacy-llama.py ../llama2/llama-2-70b --outfile models/llama-70b-v2/ggml-model-f16.gguf --outtype f16
-
-# Code Llama
-python3 examples/convert-legacy-llama.py ../codellama/CodeLlama-7b/ --outfile models/codellama-7b/ggml-model-f16.gguf --outtype f16
-python3 examples/convert-legacy-llama.py ../codellama/CodeLlama-13b/ --outfile models/codellama-13b/ggml-model-f16.gguf --outtype f16
-python3 examples/convert-legacy-llama.py ../codellama/CodeLlama-34b/ --outfile models/codellama-34b/ggml-model-f16.gguf --outtype f16
-
-# Falcon
-python3 convert-falcon-hf-to-gguf.py ../falcon/falcon-7b 1
-mv -v ../falcon/falcon-7b/ggml-model-f16.gguf models/falcon-7b/ggml-model-f16.gguf
-
-python3 convert-falcon-hf-to-gguf.py ../falcon/falcon-40b 1
-mv -v ../falcon/falcon-40b/ggml-model-f16.gguf models/falcon-40b/ggml-model-f16.gguf
diff --git a/scripts/debug-test.sh b/scripts/debug-test.sh
index 7b2b601a..91946c51 100755
--- a/scripts/debug-test.sh
+++ b/scripts/debug-test.sh
@@ -110,7 +110,7 @@ rm -rf "$build_dir" && mkdir "$build_dir" || abort "Failed to make $build_dir"
###########################################################
# Note: test-eval-callback requires -DLLAMA_CURL
-cmake -B "./$build_dir" -DCMAKE_BUILD_TYPE=Debug -DLLAMA_CUDA=1 -DLLAMA_CURL=1 || abort "Failed to build enviroment"
+cmake -B "./$build_dir" -DCMAKE_BUILD_TYPE=Debug -DGGML_CUDA=1 -DLLAMA_CURL=1 || abort "Failed to build enviroment"
pushd "$build_dir"
make -j || abort "Failed to compile"
popd > /dev/null || exit 1
diff --git a/scripts/gen-build-info-cpp.cmake b/scripts/gen-build-info-cpp.cmake
deleted file mode 100644
index d8933892..00000000
--- a/scripts/gen-build-info-cpp.cmake
+++ /dev/null
@@ -1,24 +0,0 @@
-include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
-
-set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in")
-set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp")
-
-# Only write the build info if it changed
-if(EXISTS ${OUTPUT_FILE})
- file(READ ${OUTPUT_FILE} CONTENTS)
- string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS})
- set(OLD_COMMIT ${CMAKE_MATCH_1})
- string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS})
- set(OLD_COMPILER ${CMAKE_MATCH_1})
- string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS})
- set(OLD_TARGET ${CMAKE_MATCH_1})
- if (
- NOT OLD_COMMIT STREQUAL BUILD_COMMIT OR
- NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR
- NOT OLD_TARGET STREQUAL BUILD_TARGET
- )
- configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
- endif()
-else()
- configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
-endif()
diff --git a/scripts/gen-unicode-data.py b/scripts/gen-unicode-data.py
index 890e4d7c..2d9bde01 100644
--- a/scripts/gen-unicode-data.py
+++ b/scripts/gen-unicode-data.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
import array
import unicodedata
import requests
@@ -133,7 +135,7 @@ table_nfd.sort()
# group ranges with same flags
-ranges_flags = [(0, codepoint_flags[0])] # start, flags
+ranges_flags: list[tuple[int, int]] = [(0, codepoint_flags[0])] # start, flags
for codepoint, flags in enumerate(codepoint_flags):
if flags != ranges_flags[-1][1]:
ranges_flags.append((codepoint, flags))
@@ -141,11 +143,11 @@ ranges_flags.append((MAX_CODEPOINTS, 0x0000))
# group ranges with same nfd
-ranges_nfd = [(0, 0, 0)] # start, last, nfd
+ranges_nfd: list[tuple[int, int, int]] = [(0, 0, 0)] # start, last, nfd
for codepoint, norm in table_nfd:
start = ranges_nfd[-1][0]
if ranges_nfd[-1] != (start, codepoint - 1, norm):
- ranges_nfd.append(None)
+ ranges_nfd.append(None) # type: ignore[arg-type] # dummy, will be replaced below
start = codepoint
ranges_nfd[-1] = (start, codepoint, norm)
@@ -179,13 +181,13 @@ for codepoint in table_whitespace:
out("};\n")
out("const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {")
-for tuple in table_lowercase:
- out("{0x%06X, 0x%06X}," % tuple)
+for tuple_lw in table_lowercase:
+ out("{0x%06X, 0x%06X}," % tuple_lw)
out("};\n")
out("const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {")
-for tuple in table_uppercase:
- out("{0x%06X, 0x%06X}," % tuple)
+for tuple_up in table_uppercase:
+ out("{0x%06X, 0x%06X}," % tuple_up)
out("};\n")
out("const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd")
diff --git a/scripts/pod-llama.sh b/scripts/pod-llama.sh
index 6ba499a2..6e56e1ed 100644
--- a/scripts/pod-llama.sh
+++ b/scripts/pod-llama.sh
@@ -42,7 +42,7 @@ git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
-LLAMA_CUDA=1 make -j
+GGML_CUDA=1 make -j
ln -sfn /workspace/TinyLlama-1.1B-Chat-v0.3 ./models/tinyllama-1b
ln -sfn /workspace/CodeLlama-7b-hf ./models/codellama-7b
@@ -60,7 +60,7 @@ cd /workspace/llama.cpp
mkdir build-cublas
cd build-cublas
-cmake -DLLAMA_CUDA=1 ../
+cmake -DGGML_CUDA=1 ../
make -j
if [ "$1" -eq "0" ]; then
@@ -75,7 +75,7 @@ if [ "$1" -eq "1" ]; then
cd /workspace/llama.cpp
- python3 examples/convert-legacy-llama.py ./models/tinyllama-1b --outfile ./models/tinyllama-1b/ggml-model-f16.gguf --outtype f16
+ python3 examples/convert_legacy_llama.py ./models/tinyllama-1b --outfile ./models/tinyllama-1b/ggml-model-f16.gguf --outtype f16
./llama-quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_0.gguf q4_0
./llama-quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_k.gguf q4_k
@@ -90,7 +90,7 @@ if [ "$1" -eq "2" ]; then
cd /workspace/llama.cpp
- python3 examples/convert-legacy-llama.py ./models/codellama-7b --outfile ./models/codellama-7b/ggml-model-f16.gguf --outtype f16
+ python3 examples/convert_legacy_llama.py ./models/codellama-7b --outfile ./models/codellama-7b/ggml-model-f16.gguf --outtype f16
./llama-quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_0.gguf q4_0
./llama-quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_k.gguf q4_k
@@ -105,7 +105,7 @@ if [ "$1" -eq "3" ]; then
cd /workspace/llama.cpp
- python3 examples/convert-legacy-llama.py ./models/codellama-13b --outfile ./models/codellama-13b/ggml-model-f16.gguf --outtype f16
+ python3 examples/convert_legacy_llama.py ./models/codellama-13b --outfile ./models/codellama-13b/ggml-model-f16.gguf --outtype f16
./llama-quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_0.gguf q4_0
./llama-quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_k.gguf q4_k
@@ -120,7 +120,7 @@ if [ "$1" -eq "4" ]; then
cd /workspace/llama.cpp
- python3 examples/convert-legacy-llama.py ./models/codellama-34b --outfile ./models/codellama-34b/ggml-model-f16.gguf --outtype f16
+ python3 examples/convert_legacy_llama.py ./models/codellama-34b --outfile ./models/codellama-34b/ggml-model-f16.gguf --outtype f16
./llama-quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_0.gguf q4_0
./llama-quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_k.gguf q4_k
@@ -135,7 +135,7 @@ if [ "$1" -eq "5" ]; then
cd /workspace/llama.cpp
- python3 examples/convert-legacy-llama.py ./models/codellama-7b-instruct --outfile ./models/codellama-7b-instruct/ggml-model-f16.gguf --outtype f16
+ python3 examples/convert_legacy_llama.py ./models/codellama-7b-instruct --outfile ./models/codellama-7b-instruct/ggml-model-f16.gguf --outtype f16
./llama-quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_0.gguf q4_0
./llama-quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_k.gguf q4_k
@@ -150,7 +150,7 @@ if [ "$1" -eq "6" ]; then
cd /workspace/llama.cpp
- python3 examples/convert-legacy-llama.py ./models/codellama-13b-instruct --outfile ./models/codellama-13b-instruct/ggml-model-f16.gguf --outtype f16
+ python3 examples/convert_legacy_llama.py ./models/codellama-13b-instruct --outfile ./models/codellama-13b-instruct/ggml-model-f16.gguf --outtype f16
./llama-quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_0.gguf q4_0
./llama-quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_k.gguf q4_k
@@ -165,7 +165,7 @@ if [ "$1" -eq "7" ]; then
cd /workspace/llama.cpp
- python3 examples/convert-legacy-llama.py ./models/codellama-34b-instruct --outfile ./models/codellama-34b-instruct/ggml-model-f16.gguf --outtype f16
+ python3 examples/convert_legacy_llama.py ./models/codellama-34b-instruct --outfile ./models/codellama-34b-instruct/ggml-model-f16.gguf --outtype f16
./llama-quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_0.gguf q4_0
./llama-quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_k.gguf q4_k
@@ -186,17 +186,17 @@ if [ "$1" -eq "1" ]; then
# batched
cd /workspace/llama.cpp
- LLAMA_CUDA=1 make -j && ./llama-batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
+ GGML_CUDA=1 make -j && ./llama-batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
# batched-bench
cd /workspace/llama.cpp
- LLAMA_CUDA=1 make -j && ./llama-batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
+ GGML_CUDA=1 make -j && ./llama-batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
# parallel
cd /workspace/llama.cpp
- LLAMA_CUDA=1 make -j && ./llama-parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
+ GGML_CUDA=1 make -j && ./llama-parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
fi
@@ -204,10 +204,9 @@ fi
#if [ "$1" -eq "7" ]; then
# cd /workspace/llama.cpp
#
-# LLAMA_CUDA=1 make -j && ./llama-speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
+# GGML_CUDA=1 make -j && ./llama-speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
#fi
# more benches
-#LLAMA_CUDA=1 make -j && ./llama-batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
-#LLAMA_CUDA=1 make -j && ./llama-batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
-
+#GGML_CUDA=1 make -j && ./llama-batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
+#GGML_CUDA=1 make -j && ./llama-batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
diff --git a/scripts/server-llm.sh b/scripts/server-llm.sh
index 19923244..802592a3 100644
--- a/scripts/server-llm.sh
+++ b/scripts/server-llm.sh
@@ -380,7 +380,7 @@ fi
if [[ "$backend" == "cuda" ]]; then
printf "[+] Building with CUDA backend\n"
- LLAMA_CUDA=1 make -j llama-server $log
+ GGML_CUDA=1 make -j llama-server $log
elif [[ "$backend" == "cpu" ]]; then
printf "[+] Building with CPU backend\n"
make -j llama-server $log
diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh
index 9e34dc8b..ba3bedf2 100755
--- a/scripts/sync-ggml-am.sh
+++ b/scripts/sync-ggml-am.sh
@@ -53,7 +53,9 @@ while read c; do
fi
git format-patch -k $c~1..$c --stdout -- \
- include/ggml/ggml*.h \
+ CMakeLists.txt \
+ src/CMakeLists.txt \
+ cmake/FindSIMD.cmake \
src/ggml*.h \
src/ggml*.c \
src/ggml*.cpp \
@@ -61,6 +63,8 @@ while read c; do
src/ggml*.metal \
src/ggml*.cu \
src/ggml-cuda/* \
+ src/ggml-sycl/* \
+ include/ggml*.h \
tests/test-opt.cpp \
tests/test-grad0.cpp \
tests/test-quantize-fns.cpp \
@@ -93,30 +97,37 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
# replace filenames:
#
- # src/ggml.c -> ggml.c
- # src/ggml-alloc.c -> ggml-alloc.c
- # src/ggml-backend-impl.h -> ggml-backend-impl.h
- # src/ggml-backend.c -> ggml-backend.c
- # src/ggml-common.h -> ggml-common.h
- # src/ggml-cuda/* -> ggml-cuda/
- # src/ggml-cuda.cu -> ggml-cuda.cu
- # src/ggml-cuda.h -> ggml-cuda.h
- # src/ggml-impl.h -> ggml-impl.h
- # src/ggml-kompute.cpp -> ggml-kompute.cpp
- # src/ggml-kompute.h -> ggml-kompute.h
- # src/ggml-metal.h -> ggml-metal.h
- # src/ggml-metal.m -> ggml-metal.m
- # src/ggml-quants.c -> ggml-quants.c
- # src/ggml-quants.h -> ggml-quants.h
- # src/ggml-rpc.cpp -> ggml-rpc.cpp
- # src/ggml-rpc.h -> ggml-rpc.h
- # src/ggml-sycl.cpp -> ggml-sycl.cpp
- # src/ggml-sycl.h -> ggml-sycl.h
- # src/ggml-vulkan.cpp -> ggml-vulkan.cpp
- # src/ggml-vulkan.h -> ggml-vulkan.h
- # include/ggml/ggml.h -> ggml.h
- # include/ggml/ggml-alloc.h -> ggml-alloc.h
- # include/ggml/ggml-backend.h -> ggml-backend.h
+ # CMakelists.txt -> ggml/CMakeLists.txt
+ # src/CMakeLists.txt -> ggml/src/CMakeLists.txt
+ # cmake/FindSIMD.cmake -> ggml/cmake/FindSIMD.cmake
+ #
+ # src/ggml.c -> ggml/src/ggml.c
+ # src/ggml-alloc.c -> ggml/src/ggml-alloc.c
+ # src/ggml-backend-impl.h -> ggml/src/ggml-backend-impl.h
+ # src/ggml-backend.c -> ggml/src/ggml-backend.c
+ # src/ggml-common.h -> ggml/src/ggml-common.h
+ # src/ggml-cuda/* -> ggml/src/ggml-cuda/
+ # src/ggml-cuda.cu -> ggml/src/ggml-cuda.cu
+ # src/ggml-impl.h -> ggml/src/ggml-impl.h
+ # src/ggml-kompute.cpp -> ggml/src/ggml-kompute.cpp
+ # src/ggml-metal.m -> ggml/src/ggml-metal.m
+ # src/ggml-quants.c -> ggml/src/ggml-quants.c
+ # src/ggml-quants.h -> ggml/src/ggml-quants.h
+ # src/ggml-rpc.cpp -> ggml/src/ggml-rpc.cpp
+ # src/ggml-sycl/* -> ggml/src/ggml-sycl/
+ # src/ggml-sycl.cpp -> ggml/src/ggml-sycl.cpp
+ # src/ggml-vulkan.cpp -> ggml/src/ggml-vulkan.cpp
+ #
+ # include/ggml.h -> ggml/include/ggml.h
+ # include/ggml-alloc.h -> ggml/include/ggml-alloc.h
+ # include/ggml-backend.h -> ggml/include/ggml-backend.h
+ # include/ggml-blas.h -> ggml/include/ggml-blas.h
+ # include/ggml-cuda.h -> ggml/include/ggml-cuda.h
+ # include/ggml-kompute.h -> ggml/include/ggml-kompute.h
+ # include/ggml-metal.h -> ggml/include/ggml-metal.h
+ # include/ggml-rpc.h -> ggml/include/ggml-rpc.h
+ # include/ggml-sycl.h -> ggml/include/ggml-sycl.h
+ # include/ggml-vulkan.h -> ggml/include/ggml-vulkan.h
#
# tests/test-opt.cpp -> tests/test-opt.cpp
# tests/test-grad0.cpp -> tests/test-grad0.cpp
@@ -124,41 +135,45 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
# tests/test-quantize-perf.cpp -> tests/test-quantize-perf.cpp
# tests/test-backend-ops.cpp -> tests/test-backend-ops.cpp
#
- # LICENSE -> LICENSE
- # scripts/gen-authors.sh -> scripts/gen-authors.sh
-
- cat ggml-src.patch | sed \
- -e 's/src\/ggml\.c/ggml.c/g' \
- -e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
- -e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
- -e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
- -e 's/src\/ggml-common\.h/ggml-common.h/g' \
- -e 's/src\/ggml-cuda\//ggml-cuda\//g' \
- -e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
- -e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
- -e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
- -e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \
- -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
- -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
- -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
- -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
- -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
- -e 's/src\/ggml-rpc\.cpp/ggml-rpc.cpp/g' \
- -e 's/src\/ggml-rpc\.h/ggml-rpc.h/g' \
- -e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \
- -e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \
- -e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \
- -e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \
- -e 's/include\/ggml\/ggml\.h/ggml.h/g' \
- -e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
- -e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
- -e 's/tests\/test-opt\.cpp/tests\/test-opt.cpp/g' \
- -e 's/tests\/test-grad0\.cpp/tests\/test-grad0.cpp/g' \
- -e 's/tests\/test-quantize-fns\.cpp/tests\/test-quantize-fns.cpp/g' \
- -e 's/tests\/test-quantize-perf\.cpp/tests\/test-quantize-perf.cpp/g' \
- -e 's/tests\/test-backend-ops\.cpp/tests\/test-backend-ops.cpp/g' \
- -e 's/LICENSE/LICENSE/g' \
- -e 's/scripts\/gen-authors\.sh/scripts\/gen-authors.sh/g' \
+ # LICENSE -> LICENSE
+ # scripts/gen-authors.sh -> scripts/gen-authors.sh
+
+ cat ggml-src.patch | sed -E \
+ -e 's/([[:space:]]|[ab]\/)CMakeLists.txt/\1ggml\/CMakeLists.txt/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/CMakeLists.txt/\1ggml\/src\/CMakeLists.txt/g' \
+ -e 's/([[:space:]]|[ab]\/)cmake\/FindSIMD.cmake/\1ggml\/cmake\/FindSIMD.cmake/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml\.c/\1ggml\/src\/ggml.c/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-alloc\.c/\1ggml\/src\/ggml-alloc.c/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-backend-impl\.h/\1ggml\/src\/ggml-backend-impl.h/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-backend\.c/\1ggml\/src\/ggml-backend.c/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-common\.h/\1ggml\/src\/ggml-common.h/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-cuda\//\1ggml\/src\/ggml-cuda\//g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-cuda\.cu/\1ggml\/src\/ggml-cuda.cu/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-impl\.h/\1ggml\/src\/ggml-impl.h/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-kompute\.cpp/\1ggml\/src\/ggml-kompute.cpp/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-metal\.m/\1ggml\/src\/ggml-metal.m/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-quants\.c/\1ggml\/src\/ggml-quants.c/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-quants\.h/\1ggml\/src\/ggml-quants.h/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-rpc\.cpp/\1ggml\/src\/ggml-rpc.cpp/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-sycl\//\1ggml\/src\/ggml-sycl\//g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-sycl\.cpp/\1ggml\/src\/ggml-sycl.cpp/g' \
+ -e 's/([[:space:]]|[ab]\/)src\/ggml-vulkan\.cpp/\1ggml\/src\/ggml-vulkan.cpp/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml\.h/\1ggml\/include\/ggml.h/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml-alloc\.h/\1ggml\/include\/ggml-alloc.h/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml-backend\.h/\1ggml\/include\/ggml-backend.h/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml-blas\.h/\1ggml\/include\/ggml-blas.h/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml-cuda\.h/\1ggml\/include\/ggml-cuda.h/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml-kompute\.h/\1ggml\/include\/ggml-kompute.h/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml-metal\.h/\1ggml\/include\/ggml-metal.h/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml-rpc\.h/\1ggml\/include\/ggml-rpc.h/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml-sycl\.h/\1ggml\/include\/ggml-sycl.h/g' \
+ -e 's/([[:space:]]|[ab]\/)include\/ggml-vulkan\.h/\1ggml\/include\/ggml-vulkan.h/g' \
+ -e 's/([[:space:]]|[ab]\/)examples\/common\.h/examples\/common.h/g' \
+ -e 's/([[:space:]]|[ab]\/)examples\/common\.cpp/examples\/common.cpp/g' \
+ -e 's/([[:space:]]|[ab]\/)examples\/common-ggml\.h/examples\/common-ggml.h/g' \
+ -e 's/([[:space:]]|[ab]\/)examples\/common-ggml\.cpp/examples\/common-ggml.cpp/g' \
+ -e 's/([[:space:]]|[ab]\/)LICENSE/LICENSE/g' \
+ -e 's/([[:space:]]|[ab]\/)scripts\/gen-authors\.sh/scripts\/gen-authors.sh/g' \
> ggml-src.patch.tmp
mv ggml-src.patch.tmp ggml-src.patch
diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last
index b6c57ec5..80159b70 100644
--- a/scripts/sync-ggml.last
+++ b/scripts/sync-ggml.last
@@ -1 +1 @@
-5653a195935ea3ac54652644c9daf154dbc1571b
+e3b3846976c94163f2b3dd128cc959782653edbb
diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh
index 4843f8a4..402446ef 100755
--- a/scripts/sync-ggml.sh
+++ b/scripts/sync-ggml.sh
@@ -1,34 +1,43 @@
#!/bin/bash
-cp -rpv ../ggml/src/ggml.c ./ggml.c
-cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
-cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
-cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c
-cp -rpv ../ggml/src/ggml-common.h ./ggml-common.h
-cp -rpv ../ggml/src/ggml-cuda/* ./ggml-cuda/
-cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
-cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
-cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h
-cp -rpv ../ggml/src/ggml-kompute.cpp ./ggml-kompute.cpp
-cp -rpv ../ggml/src/ggml-kompute.h ./ggml-kompute.h
-cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
-cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
-cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
-cp -rpv ../ggml/src/ggml-quants.c ./ggml-quants.c
-cp -rpv ../ggml/src/ggml-quants.h ./ggml-quants.h
-cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml-rpc.cpp
-cp -rpv ../ggml/src/ggml-rpc.h ./ggml-rpc.h
-cp -rpv ../ggml/src/ggml-sycl.cpp ./ggml-sycl.cpp
-cp -rpv ../ggml/src/ggml-sycl.h ./ggml-sycl.h
-cp -rpv ../ggml/src/ggml-vulkan.cpp ./ggml-vulkan.cpp
-cp -rpv ../ggml/src/ggml-vulkan.h ./ggml-vulkan.h
-cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
-cp -rpv ../ggml/include/ggml/ggml-alloc.h ./ggml-alloc.h
-cp -rpv ../ggml/include/ggml/ggml-backend.h ./ggml-backend.h
+cp -rpv ../ggml/CMakeLists.txt ./ggml/CMakeLists.txt
+cp -rpv ../ggml/src/CMakeLists.txt ./ggml/src/CMakeLists.txt
+cp -rpv ../ggml/cmake/FindSIMD.cmake ./ggml/cmake/FindSIMD.cmake
-cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
-cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp
-cp -rpv ../ggml/tests/test-backend-ops.cpp ./tests/test-backend-ops.cpp
+cp -rpv ../ggml/src/ggml.c ./ggml/src/ggml.c
+cp -rpv ../ggml/src/ggml-alloc.c ./ggml/src/ggml-alloc.c
+cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml/src/ggml-backend-impl.h
+cp -rpv ../ggml/src/ggml-backend.c ./ggml/src/ggml-backend.c
+cp -rpv ../ggml/src/ggml-common.h ./ggml/src/ggml-common.h
+cp -rpv ../ggml/src/ggml-cuda/* ./ggml/src/ggml-cuda/
+cp -rpv ../ggml/src/ggml-cuda.cu ./ggml/src/ggml-cuda.cu
+cp -rpv ../ggml/src/ggml-impl.h ./ggml/src/ggml-impl.h
+cp -rpv ../ggml/src/ggml-kompute.cpp ./ggml/src/ggml-kompute.cpp
+cp -rpv ../ggml/src/ggml-metal.m ./ggml/src/ggml-metal.m
+cp -rpv ../ggml/src/ggml-metal.metal ./ggml/src/ggml-metal.metal
+cp -rpv ../ggml/src/ggml-quants.c ./ggml/src/ggml-quants.c
+cp -rpv ../ggml/src/ggml-quants.h ./ggml/src/ggml-quants.h
+cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml/src/ggml-rpc.cpp
+cp -rpv ../ggml/src/ggml-sycl/* ./ggml/src/ggml-sycl/
+cp -rpv ../ggml/src/ggml-sycl.cpp ./ggml/src/ggml-sycl.cpp
+cp -rpv ../ggml/src/ggml-vulkan.cpp ./ggml/src/ggml-vulkan.cpp
-cp -rpv ../LICENSE ./LICENSE
-cp -rpv ../ggml/scripts/gen-authors.sh ./scripts/gen-authors.sh
+cp -rpv ../ggml/include/ggml.h ./ggml/include/ggml.h
+cp -rpv ../ggml/include/ggml-alloc.h ./ggml/include/ggml-alloc.h
+cp -rpv ../ggml/include/ggml-backend.h ./ggml/include/ggml-backend.h
+cp -rpv ../ggml/include/ggml-blas.h ./ggml/include/ggml-blas.h
+cp -rpv ../ggml/include/ggml-cuda.h ./ggml/include/ggml-cuda.h
+cp -rpv ../ggml/include/ggml-kompute.h ./ggml/include/ggml-kompute.h
+cp -rpv ../ggml/include/ggml-metal.h ./ggml/include/ggml-metal.h
+cp -rpv ../ggml/include/ggml-rpc.h ./ggml/include/ggml-rpc.h
+cp -rpv ../ggml/include/ggml-sycl.h ./ggml/include/ggml-sycl.h
+cp -rpv ../ggml/include/ggml-vulkan.h ./ggml/include/ggml-vulkan.h
+
+cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
+cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp
+cp -rpv ../ggml/tests/test-quantize-fns.cpp ./tests/test-quantize-fns.cpp
+cp -rpv ../ggml/tests/test-quantize-perf.cpp ./tests/test-quantize-perf.cpp
+cp -rpv ../ggml/tests/test-backend-ops.cpp ./tests/test-backend-ops.cpp
+
+cp -rpv ../LICENSE ./LICENSE
+cp -rpv ../ggml/scripts/gen-authors.sh ./scripts/gen-authors.sh