diff options
author | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-07-27 07:55:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-27 07:55:01 +0200 |
commit | 154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch) | |
tree | 81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /scripts | |
parent | 0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff) |
Merge mainline llama.cpp (#3)
* Merging mainline - WIP
* Merging mainline - WIP
AVX2 and CUDA appear to work.
CUDA performance seems slightly (~1-2%) lower as it is so often
the case with llama.cpp/ggml after some "improvements" have been made.
* Merging mainline - fix Metal
* Remove check
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/LlamaConfig.cmake.in | 61 | ||||
-rw-r--r-- | scripts/build-info.cmake | 58 | ||||
-rwxr-xr-x | scripts/build-info.sh | 10 | ||||
-rwxr-xr-x | scripts/check-requirements.sh | 26 | ||||
-rwxr-xr-x | scripts/compare-commits.sh | 2 | ||||
-rwxr-xr-x | scripts/compare-llama-bench.py | 8 | ||||
-rwxr-xr-x | scripts/convert-gg.sh | 26 | ||||
-rwxr-xr-x | scripts/debug-test.sh | 2 | ||||
-rw-r--r-- | scripts/gen-build-info-cpp.cmake | 24 | ||||
-rw-r--r-- | scripts/gen-unicode-data.py | 16 | ||||
-rw-r--r-- | scripts/pod-llama.sh | 31 | ||||
-rw-r--r-- | scripts/server-llm.sh | 2 | ||||
-rwxr-xr-x | scripts/sync-ggml-am.sh | 135 | ||||
-rw-r--r-- | scripts/sync-ggml.last | 2 | ||||
-rwxr-xr-x | scripts/sync-ggml.sh | 69 |
15 files changed, 164 insertions, 308 deletions
diff --git a/scripts/LlamaConfig.cmake.in b/scripts/LlamaConfig.cmake.in deleted file mode 100644 index 9311055d..00000000 --- a/scripts/LlamaConfig.cmake.in +++ /dev/null @@ -1,61 +0,0 @@ -set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@) -set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@) -set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@) -set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@) -set(LLAMA_BLAS @LLAMA_BLAS@) -set(LLAMA_CUDA @LLAMA_CUDA@) -set(LLAMA_METAL @LLAMA_METAL@) -set(LLAMA_HIPBLAS @LLAMA_HIPBLAS@) -set(LLAMA_ACCELERATE @LLAMA_ACCELERATE@) - -@PACKAGE_INIT@ - -set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@") -set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@") -set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@") - -# Ensure transient dependencies satisfied - -find_package(Threads REQUIRED) -if (APPLE AND LLAMA_ACCELERATE) - find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED) -endif() - -if (LLAMA_BLAS) - find_package(BLAS REQUIRED) -endif() - -if (LLAMA_CUDA) - find_package(CUDAToolkit REQUIRED) -endif() - -if (LLAMA_METAL) - find_library(FOUNDATION_LIBRARY Foundation REQUIRED) - find_library(METAL_FRAMEWORK Metal REQUIRED) - find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) -endif() - -if (LLAMA_HIPBLAS) - find_package(hip REQUIRED) - find_package(hipblas REQUIRED) - find_package(rocblas REQUIRED) -endif() - -find_library(llama_LIBRARY llama - REQUIRED - HINTS ${LLAMA_LIB_DIR}) - -set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@") -set(_llama_transient_defines "@LLAMA_TRANSIENT_DEFINES@") -add_library(llama UNKNOWN IMPORTED) -set_target_properties(llama - PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}" - INTERFACE_LINK_LIBRARIES "${_llama_link_deps}" - INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}" - IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" - IMPORTED_LOCATION "${llama_LIBRARY}" - INTERFACE_COMPILE_FEATURES cxx_std_11 - POSITION_INDEPENDENT_CODE ON ) - -check_required_components(Llama) diff --git a/scripts/build-info.cmake b/scripts/build-info.cmake deleted file mode 100644 index ea3dc55c..00000000 --- a/scripts/build-info.cmake +++ /dev/null @@ -1,58 +0,0 @@ -set(BUILD_NUMBER 0) -set(BUILD_COMMIT "unknown") -set(BUILD_COMPILER "unknown") -set(BUILD_TARGET "unknown") - -# Look for git -find_package(Git) -if(NOT Git_FOUND) - find_program(GIT_EXECUTABLE NAMES git git.exe) - if(GIT_EXECUTABLE) - set(Git_FOUND TRUE) - message(STATUS "Found Git: ${GIT_EXECUTABLE}") - else() - message(WARNING "Git not found. Build info will not be accurate.") - endif() -endif() - -# Get the commit count and hash -if(Git_FOUND) - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - OUTPUT_VARIABLE HEAD - OUTPUT_STRIP_TRAILING_WHITESPACE - RESULT_VARIABLE RES - ) - if (RES EQUAL 0) - set(BUILD_COMMIT ${HEAD}) - endif() - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - OUTPUT_VARIABLE COUNT - OUTPUT_STRIP_TRAILING_WHITESPACE - RESULT_VARIABLE RES - ) - if (RES EQUAL 0) - set(BUILD_NUMBER ${COUNT}) - endif() -endif() - -if(MSVC) - set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") - set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME}) -else() - execute_process( - COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER} - OUTPUT_VARIABLE OUT - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - set(BUILD_COMPILER ${OUT}) - execute_process( - COMMAND ${CMAKE_C_COMPILER} -dumpmachine - OUTPUT_VARIABLE OUT - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - set(BUILD_TARGET ${OUT}) -endif() diff --git a/scripts/build-info.sh b/scripts/build-info.sh index 32682afb..fa9e7bac 100755 --- a/scripts/build-info.sh +++ b/scripts/build-info.sh @@ -8,20 +8,20 @@ build_compiler="unknown" build_target="unknown" if out=$(git rev-list --count HEAD); then - # git is broken on WSL so we need to strip extra newlines - build_number=$(printf '%s' "$out" | tr -d '\n') + # git is broken on WSL so we need to strip extra newlines + build_number=$(printf '%s' "$out" | tr -d '\n') fi if out=$(git rev-parse --short HEAD); then - build_commit=$(printf '%s' "$out" | tr -d '\n') + build_commit=$(printf '%s' "$out" | tr -d '\n') fi if out=$($CC --version | head -1); then - build_compiler=$out + build_compiler=$out fi if out=$($CC -dumpmachine); then - build_target=$out + build_target=$out fi echo "int LLAMA_BUILD_NUMBER = ${build_number};" diff --git a/scripts/check-requirements.sh b/scripts/check-requirements.sh index 0c6afdd5..d3bbded1 100755 --- a/scripts/check-requirements.sh +++ b/scripts/check-requirements.sh @@ -97,9 +97,9 @@ check_requirements() { } check_convert_script() { - local py=$1 # e.g. ./convert-hf-to-gguf.py - local pyname=${py##*/} # e.g. convert-hf-to-gguf.py - pyname=${pyname%.py} # e.g. convert-hf-to-gguf + local py=$1 # e.g. ./convert_hf_to_gguf.py + local pyname=${py##*/} # e.g. convert_hf_to_gguf.py + pyname=${pyname%.py} # e.g. convert_hf_to_gguf info "$py: beginning check" @@ -108,6 +108,11 @@ check_convert_script() { fatal "$py missing requirements. Expected: $reqs" fi + # Check that all sub-requirements are added to top-level requirements.txt + if ! grep -qF "$reqs" requirements.txt; then + fatal "$reqs needs to be added to requirements.txt" + fi + local venv="$workdir/$pyname-venv" python3 -m venv "$venv" @@ -134,12 +139,7 @@ EOF readonly ignore_eq_eq='check_requirements: ignore "=="' -for req in "$reqs_dir"/*; do - # Check that all sub-requirements are added to top-level requirements.txt - if ! grep -qF "$req" requirements.txt; then - fatal "$req needs to be added to requirements.txt" - fi - +for req in */**/requirements*.txt; do # Make sure exact release versions aren't being pinned in the requirements # Filters out the ignore string if grep -vF "$ignore_eq_eq" "$req" | grep -q '=='; then @@ -166,12 +166,12 @@ if (( do_cleanup )); then rm -rf -- "$all_venv" fi -check_convert_script examples/convert-legacy-llama.py -for py in convert-*.py; do - # skip convert-hf-to-gguf-update.py +check_convert_script examples/convert_legacy_llama.py +for py in convert_*.py; do + # skip convert_hf_to_gguf_update.py # TODO: the check is failing for some reason: # https://github.com/ggerganov/llama.cpp/actions/runs/8875330981/job/24364557177?pr=6920 - [[ $py == convert-hf-to-gguf-update.py ]] && continue + [[ $py == convert_hf_to_gguf_update.py ]] && continue check_convert_script "$py" done diff --git a/scripts/compare-commits.sh b/scripts/compare-commits.sh index a45cd396..70679f4e 100755 --- a/scripts/compare-commits.sh +++ b/scripts/compare-commits.sh @@ -12,7 +12,7 @@ bench_args="${@:3}" rm -f llama-bench.sqlite > /dev/null -# to test a backend, call the script with the corresponding environment variable (e.g. LLAMA_CUDA=1 ./scripts/compare-commits.sh ...) +# to test a backend, call the script with the corresponding environment variable (e.g. GGML_CUDA=1 ./scripts/compare-commits.sh ...) git checkout $1 > /dev/null make clean > /dev/null diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py index 513dde5e..92b9e682 100755 --- a/scripts/compare-llama-bench.py +++ b/scripts/compare-llama-bench.py @@ -123,13 +123,13 @@ builds = cursor.execute("SELECT DISTINCT build_commit FROM test;").fetchall() try: repo = git.Repo(".", search_parent_directories=True) -except git.exc.InvalidGitRepositoryError: +except git.InvalidGitRepositoryError: repo = None -def find_parent_in_data(commit): +def find_parent_in_data(commit: git.Commit): """Helper function to find the most recent parent measured in number of commits for which there is data.""" - heap = [(0, commit)] + heap: list[tuple[int, git.Commit]] = [(0, commit)] seen_hexsha8 = set() while heap: depth, current_commit = heapq.heappop(heap) @@ -144,7 +144,7 @@ def find_parent_in_data(commit): return None -def get_all_parent_hexsha8s(commit): +def get_all_parent_hexsha8s(commit: git.Commit): """Helper function to recursively get hexsha8 values for all parents of a commit.""" unvisited = [commit] visited = [] diff --git a/scripts/convert-gg.sh b/scripts/convert-gg.sh deleted file mode 100755 index 8a016843..00000000 --- a/scripts/convert-gg.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -set -e - -# LLaMA v1 -python3 examples/convert-legacy-llama.py ../llama1/7B --outfile models/llama-7b/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../llama1/13B --outfile models/llama-13b/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../llama1/30B --outfile models/llama-30b/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../llama1/65B --outfile models/llama-65b/ggml-model-f16.gguf --outtype f16 - -# LLaMA v2 -python3 examples/convert-legacy-llama.py ../llama2/llama-2-7b --outfile models/llama-7b-v2/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../llama2/llama-2-13b --outfile models/llama-13b-v2/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../llama2/llama-2-70b --outfile models/llama-70b-v2/ggml-model-f16.gguf --outtype f16 - -# Code Llama -python3 examples/convert-legacy-llama.py ../codellama/CodeLlama-7b/ --outfile models/codellama-7b/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../codellama/CodeLlama-13b/ --outfile models/codellama-13b/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../codellama/CodeLlama-34b/ --outfile models/codellama-34b/ggml-model-f16.gguf --outtype f16 - -# Falcon -python3 convert-falcon-hf-to-gguf.py ../falcon/falcon-7b 1 -mv -v ../falcon/falcon-7b/ggml-model-f16.gguf models/falcon-7b/ggml-model-f16.gguf - -python3 convert-falcon-hf-to-gguf.py ../falcon/falcon-40b 1 -mv -v ../falcon/falcon-40b/ggml-model-f16.gguf models/falcon-40b/ggml-model-f16.gguf diff --git a/scripts/debug-test.sh b/scripts/debug-test.sh index 7b2b601a..91946c51 100755 --- a/scripts/debug-test.sh +++ b/scripts/debug-test.sh @@ -110,7 +110,7 @@ rm -rf "$build_dir" && mkdir "$build_dir" || abort "Failed to make $build_dir" ########################################################### # Note: test-eval-callback requires -DLLAMA_CURL -cmake -B "./$build_dir" -DCMAKE_BUILD_TYPE=Debug -DLLAMA_CUDA=1 -DLLAMA_CURL=1 || abort "Failed to build enviroment" +cmake -B "./$build_dir" -DCMAKE_BUILD_TYPE=Debug -DGGML_CUDA=1 -DLLAMA_CURL=1 || abort "Failed to build enviroment" pushd "$build_dir" make -j || abort "Failed to compile" popd > /dev/null || exit 1 diff --git a/scripts/gen-build-info-cpp.cmake b/scripts/gen-build-info-cpp.cmake deleted file mode 100644 index d8933892..00000000 --- a/scripts/gen-build-info-cpp.cmake +++ /dev/null @@ -1,24 +0,0 @@ -include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) - -set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in") -set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp") - -# Only write the build info if it changed -if(EXISTS ${OUTPUT_FILE}) - file(READ ${OUTPUT_FILE} CONTENTS) - string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_COMMIT ${CMAKE_MATCH_1}) - string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_COMPILER ${CMAKE_MATCH_1}) - string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_TARGET ${CMAKE_MATCH_1}) - if ( - NOT OLD_COMMIT STREQUAL BUILD_COMMIT OR - NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR - NOT OLD_TARGET STREQUAL BUILD_TARGET - ) - configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) - endif() -else() - configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) -endif() diff --git a/scripts/gen-unicode-data.py b/scripts/gen-unicode-data.py index 890e4d7c..2d9bde01 100644 --- a/scripts/gen-unicode-data.py +++ b/scripts/gen-unicode-data.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import array import unicodedata import requests @@ -133,7 +135,7 @@ table_nfd.sort() # group ranges with same flags -ranges_flags = [(0, codepoint_flags[0])] # start, flags +ranges_flags: list[tuple[int, int]] = [(0, codepoint_flags[0])] # start, flags for codepoint, flags in enumerate(codepoint_flags): if flags != ranges_flags[-1][1]: ranges_flags.append((codepoint, flags)) @@ -141,11 +143,11 @@ ranges_flags.append((MAX_CODEPOINTS, 0x0000)) # group ranges with same nfd -ranges_nfd = [(0, 0, 0)] # start, last, nfd +ranges_nfd: list[tuple[int, int, int]] = [(0, 0, 0)] # start, last, nfd for codepoint, norm in table_nfd: start = ranges_nfd[-1][0] if ranges_nfd[-1] != (start, codepoint - 1, norm): - ranges_nfd.append(None) + ranges_nfd.append(None) # type: ignore[arg-type] # dummy, will be replaced below start = codepoint ranges_nfd[-1] = (start, codepoint, norm) @@ -179,13 +181,13 @@ for codepoint in table_whitespace: out("};\n") out("const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {") -for tuple in table_lowercase: - out("{0x%06X, 0x%06X}," % tuple) +for tuple_lw in table_lowercase: + out("{0x%06X, 0x%06X}," % tuple_lw) out("};\n") out("const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {") -for tuple in table_uppercase: - out("{0x%06X, 0x%06X}," % tuple) +for tuple_up in table_uppercase: + out("{0x%06X, 0x%06X}," % tuple_up) out("};\n") out("const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd") diff --git a/scripts/pod-llama.sh b/scripts/pod-llama.sh index 6ba499a2..6e56e1ed 100644 --- a/scripts/pod-llama.sh +++ b/scripts/pod-llama.sh @@ -42,7 +42,7 @@ git clone https://github.com/ggerganov/llama.cpp cd llama.cpp -LLAMA_CUDA=1 make -j +GGML_CUDA=1 make -j ln -sfn /workspace/TinyLlama-1.1B-Chat-v0.3 ./models/tinyllama-1b ln -sfn /workspace/CodeLlama-7b-hf ./models/codellama-7b @@ -60,7 +60,7 @@ cd /workspace/llama.cpp mkdir build-cublas cd build-cublas -cmake -DLLAMA_CUDA=1 ../ +cmake -DGGML_CUDA=1 ../ make -j if [ "$1" -eq "0" ]; then @@ -75,7 +75,7 @@ if [ "$1" -eq "1" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/tinyllama-1b --outfile ./models/tinyllama-1b/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/tinyllama-1b --outfile ./models/tinyllama-1b/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_k.gguf q4_k @@ -90,7 +90,7 @@ if [ "$1" -eq "2" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-7b --outfile ./models/codellama-7b/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-7b --outfile ./models/codellama-7b/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_k.gguf q4_k @@ -105,7 +105,7 @@ if [ "$1" -eq "3" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-13b --outfile ./models/codellama-13b/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-13b --outfile ./models/codellama-13b/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_k.gguf q4_k @@ -120,7 +120,7 @@ if [ "$1" -eq "4" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-34b --outfile ./models/codellama-34b/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-34b --outfile ./models/codellama-34b/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_k.gguf q4_k @@ -135,7 +135,7 @@ if [ "$1" -eq "5" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-7b-instruct --outfile ./models/codellama-7b-instruct/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-7b-instruct --outfile ./models/codellama-7b-instruct/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_k.gguf q4_k @@ -150,7 +150,7 @@ if [ "$1" -eq "6" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-13b-instruct --outfile ./models/codellama-13b-instruct/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-13b-instruct --outfile ./models/codellama-13b-instruct/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_k.gguf q4_k @@ -165,7 +165,7 @@ if [ "$1" -eq "7" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-34b-instruct --outfile ./models/codellama-34b-instruct/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-34b-instruct --outfile ./models/codellama-34b-instruct/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_k.gguf q4_k @@ -186,17 +186,17 @@ if [ "$1" -eq "1" ]; then # batched cd /workspace/llama.cpp - LLAMA_CUDA=1 make -j && ./llama-batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999 + GGML_CUDA=1 make -j && ./llama-batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999 # batched-bench cd /workspace/llama.cpp - LLAMA_CUDA=1 make -j && ./llama-batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32 + GGML_CUDA=1 make -j && ./llama-batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32 # parallel cd /workspace/llama.cpp - LLAMA_CUDA=1 make -j && ./llama-parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb + GGML_CUDA=1 make -j && ./llama-parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb fi @@ -204,10 +204,9 @@ fi #if [ "$1" -eq "7" ]; then # cd /workspace/llama.cpp # -# LLAMA_CUDA=1 make -j && ./llama-speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0 +# GGML_CUDA=1 make -j && ./llama-speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0 #fi # more benches -#LLAMA_CUDA=1 make -j && ./llama-batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1 -#LLAMA_CUDA=1 make -j && ./llama-batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1 - +#GGML_CUDA=1 make -j && ./llama-batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1 +#GGML_CUDA=1 make -j && ./llama-batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1 diff --git a/scripts/server-llm.sh b/scripts/server-llm.sh index 19923244..802592a3 100644 --- a/scripts/server-llm.sh +++ b/scripts/server-llm.sh @@ -380,7 +380,7 @@ fi if [[ "$backend" == "cuda" ]]; then printf "[+] Building with CUDA backend\n" - LLAMA_CUDA=1 make -j llama-server $log + GGML_CUDA=1 make -j llama-server $log elif [[ "$backend" == "cpu" ]]; then printf "[+] Building with CPU backend\n" make -j llama-server $log diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh index 9e34dc8b..ba3bedf2 100755 --- a/scripts/sync-ggml-am.sh +++ b/scripts/sync-ggml-am.sh @@ -53,7 +53,9 @@ while read c; do fi git format-patch -k $c~1..$c --stdout -- \ - include/ggml/ggml*.h \ + CMakeLists.txt \ + src/CMakeLists.txt \ + cmake/FindSIMD.cmake \ src/ggml*.h \ src/ggml*.c \ src/ggml*.cpp \ @@ -61,6 +63,8 @@ while read c; do src/ggml*.metal \ src/ggml*.cu \ src/ggml-cuda/* \ + src/ggml-sycl/* \ + include/ggml*.h \ tests/test-opt.cpp \ tests/test-grad0.cpp \ tests/test-quantize-fns.cpp \ @@ -93,30 +97,37 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then # replace filenames: # - # src/ggml.c -> ggml.c - # src/ggml-alloc.c -> ggml-alloc.c - # src/ggml-backend-impl.h -> ggml-backend-impl.h - # src/ggml-backend.c -> ggml-backend.c - # src/ggml-common.h -> ggml-common.h - # src/ggml-cuda/* -> ggml-cuda/ - # src/ggml-cuda.cu -> ggml-cuda.cu - # src/ggml-cuda.h -> ggml-cuda.h - # src/ggml-impl.h -> ggml-impl.h - # src/ggml-kompute.cpp -> ggml-kompute.cpp - # src/ggml-kompute.h -> ggml-kompute.h - # src/ggml-metal.h -> ggml-metal.h - # src/ggml-metal.m -> ggml-metal.m - # src/ggml-quants.c -> ggml-quants.c - # src/ggml-quants.h -> ggml-quants.h - # src/ggml-rpc.cpp -> ggml-rpc.cpp - # src/ggml-rpc.h -> ggml-rpc.h - # src/ggml-sycl.cpp -> ggml-sycl.cpp - # src/ggml-sycl.h -> ggml-sycl.h - # src/ggml-vulkan.cpp -> ggml-vulkan.cpp - # src/ggml-vulkan.h -> ggml-vulkan.h - # include/ggml/ggml.h -> ggml.h - # include/ggml/ggml-alloc.h -> ggml-alloc.h - # include/ggml/ggml-backend.h -> ggml-backend.h + # CMakelists.txt -> ggml/CMakeLists.txt + # src/CMakeLists.txt -> ggml/src/CMakeLists.txt + # cmake/FindSIMD.cmake -> ggml/cmake/FindSIMD.cmake + # + # src/ggml.c -> ggml/src/ggml.c + # src/ggml-alloc.c -> ggml/src/ggml-alloc.c + # src/ggml-backend-impl.h -> ggml/src/ggml-backend-impl.h + # src/ggml-backend.c -> ggml/src/ggml-backend.c + # src/ggml-common.h -> ggml/src/ggml-common.h + # src/ggml-cuda/* -> ggml/src/ggml-cuda/ + # src/ggml-cuda.cu -> ggml/src/ggml-cuda.cu + # src/ggml-impl.h -> ggml/src/ggml-impl.h + # src/ggml-kompute.cpp -> ggml/src/ggml-kompute.cpp + # src/ggml-metal.m -> ggml/src/ggml-metal.m + # src/ggml-quants.c -> ggml/src/ggml-quants.c + # src/ggml-quants.h -> ggml/src/ggml-quants.h + # src/ggml-rpc.cpp -> ggml/src/ggml-rpc.cpp + # src/ggml-sycl/* -> ggml/src/ggml-sycl/ + # src/ggml-sycl.cpp -> ggml/src/ggml-sycl.cpp + # src/ggml-vulkan.cpp -> ggml/src/ggml-vulkan.cpp + # + # include/ggml.h -> ggml/include/ggml.h + # include/ggml-alloc.h -> ggml/include/ggml-alloc.h + # include/ggml-backend.h -> ggml/include/ggml-backend.h + # include/ggml-blas.h -> ggml/include/ggml-blas.h + # include/ggml-cuda.h -> ggml/include/ggml-cuda.h + # include/ggml-kompute.h -> ggml/include/ggml-kompute.h + # include/ggml-metal.h -> ggml/include/ggml-metal.h + # include/ggml-rpc.h -> ggml/include/ggml-rpc.h + # include/ggml-sycl.h -> ggml/include/ggml-sycl.h + # include/ggml-vulkan.h -> ggml/include/ggml-vulkan.h # # tests/test-opt.cpp -> tests/test-opt.cpp # tests/test-grad0.cpp -> tests/test-grad0.cpp @@ -124,41 +135,45 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then # tests/test-quantize-perf.cpp -> tests/test-quantize-perf.cpp # tests/test-backend-ops.cpp -> tests/test-backend-ops.cpp # - # LICENSE -> LICENSE - # scripts/gen-authors.sh -> scripts/gen-authors.sh - - cat ggml-src.patch | sed \ - -e 's/src\/ggml\.c/ggml.c/g' \ - -e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \ - -e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \ - -e 's/src\/ggml-backend\.c/ggml-backend.c/g' \ - -e 's/src\/ggml-common\.h/ggml-common.h/g' \ - -e 's/src\/ggml-cuda\//ggml-cuda\//g' \ - -e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \ - -e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \ - -e 's/src\/ggml-impl\.h/ggml-impl.h/g' \ - -e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \ - -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \ - -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \ - -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \ - -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \ - -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \ - -e 's/src\/ggml-rpc\.cpp/ggml-rpc.cpp/g' \ - -e 's/src\/ggml-rpc\.h/ggml-rpc.h/g' \ - -e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \ - -e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \ - -e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \ - -e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \ - -e 's/include\/ggml\/ggml\.h/ggml.h/g' \ - -e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \ - -e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \ - -e 's/tests\/test-opt\.cpp/tests\/test-opt.cpp/g' \ - -e 's/tests\/test-grad0\.cpp/tests\/test-grad0.cpp/g' \ - -e 's/tests\/test-quantize-fns\.cpp/tests\/test-quantize-fns.cpp/g' \ - -e 's/tests\/test-quantize-perf\.cpp/tests\/test-quantize-perf.cpp/g' \ - -e 's/tests\/test-backend-ops\.cpp/tests\/test-backend-ops.cpp/g' \ - -e 's/LICENSE/LICENSE/g' \ - -e 's/scripts\/gen-authors\.sh/scripts\/gen-authors.sh/g' \ + # LICENSE -> LICENSE + # scripts/gen-authors.sh -> scripts/gen-authors.sh + + cat ggml-src.patch | sed -E \ + -e 's/([[:space:]]|[ab]\/)CMakeLists.txt/\1ggml\/CMakeLists.txt/g' \ + -e 's/([[:space:]]|[ab]\/)src\/CMakeLists.txt/\1ggml\/src\/CMakeLists.txt/g' \ + -e 's/([[:space:]]|[ab]\/)cmake\/FindSIMD.cmake/\1ggml\/cmake\/FindSIMD.cmake/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml\.c/\1ggml\/src\/ggml.c/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-alloc\.c/\1ggml\/src\/ggml-alloc.c/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-backend-impl\.h/\1ggml\/src\/ggml-backend-impl.h/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-backend\.c/\1ggml\/src\/ggml-backend.c/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-common\.h/\1ggml\/src\/ggml-common.h/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-cuda\//\1ggml\/src\/ggml-cuda\//g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-cuda\.cu/\1ggml\/src\/ggml-cuda.cu/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-impl\.h/\1ggml\/src\/ggml-impl.h/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-kompute\.cpp/\1ggml\/src\/ggml-kompute.cpp/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-metal\.m/\1ggml\/src\/ggml-metal.m/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-quants\.c/\1ggml\/src\/ggml-quants.c/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-quants\.h/\1ggml\/src\/ggml-quants.h/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-rpc\.cpp/\1ggml\/src\/ggml-rpc.cpp/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-sycl\//\1ggml\/src\/ggml-sycl\//g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-sycl\.cpp/\1ggml\/src\/ggml-sycl.cpp/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml-vulkan\.cpp/\1ggml\/src\/ggml-vulkan.cpp/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml\.h/\1ggml\/include\/ggml.h/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml-alloc\.h/\1ggml\/include\/ggml-alloc.h/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml-backend\.h/\1ggml\/include\/ggml-backend.h/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml-blas\.h/\1ggml\/include\/ggml-blas.h/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml-cuda\.h/\1ggml\/include\/ggml-cuda.h/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml-kompute\.h/\1ggml\/include\/ggml-kompute.h/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml-metal\.h/\1ggml\/include\/ggml-metal.h/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml-rpc\.h/\1ggml\/include\/ggml-rpc.h/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml-sycl\.h/\1ggml\/include\/ggml-sycl.h/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml-vulkan\.h/\1ggml\/include\/ggml-vulkan.h/g' \ + -e 's/([[:space:]]|[ab]\/)examples\/common\.h/examples\/common.h/g' \ + -e 's/([[:space:]]|[ab]\/)examples\/common\.cpp/examples\/common.cpp/g' \ + -e 's/([[:space:]]|[ab]\/)examples\/common-ggml\.h/examples\/common-ggml.h/g' \ + -e 's/([[:space:]]|[ab]\/)examples\/common-ggml\.cpp/examples\/common-ggml.cpp/g' \ + -e 's/([[:space:]]|[ab]\/)LICENSE/LICENSE/g' \ + -e 's/([[:space:]]|[ab]\/)scripts\/gen-authors\.sh/scripts\/gen-authors.sh/g' \ > ggml-src.patch.tmp mv ggml-src.patch.tmp ggml-src.patch diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index b6c57ec5..80159b70 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -5653a195935ea3ac54652644c9daf154dbc1571b +e3b3846976c94163f2b3dd128cc959782653edbb diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh index 4843f8a4..402446ef 100755 --- a/scripts/sync-ggml.sh +++ b/scripts/sync-ggml.sh @@ -1,34 +1,43 @@ #!/bin/bash -cp -rpv ../ggml/src/ggml.c ./ggml.c -cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c -cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h -cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c -cp -rpv ../ggml/src/ggml-common.h ./ggml-common.h -cp -rpv ../ggml/src/ggml-cuda/* ./ggml-cuda/ -cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu -cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h -cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h -cp -rpv ../ggml/src/ggml-kompute.cpp ./ggml-kompute.cpp -cp -rpv ../ggml/src/ggml-kompute.h ./ggml-kompute.h -cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h -cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m -cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal -cp -rpv ../ggml/src/ggml-quants.c ./ggml-quants.c -cp -rpv ../ggml/src/ggml-quants.h ./ggml-quants.h -cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml-rpc.cpp -cp -rpv ../ggml/src/ggml-rpc.h ./ggml-rpc.h -cp -rpv ../ggml/src/ggml-sycl.cpp ./ggml-sycl.cpp -cp -rpv ../ggml/src/ggml-sycl.h ./ggml-sycl.h -cp -rpv ../ggml/src/ggml-vulkan.cpp ./ggml-vulkan.cpp -cp -rpv ../ggml/src/ggml-vulkan.h ./ggml-vulkan.h -cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h -cp -rpv ../ggml/include/ggml/ggml-alloc.h ./ggml-alloc.h -cp -rpv ../ggml/include/ggml/ggml-backend.h ./ggml-backend.h +cp -rpv ../ggml/CMakeLists.txt ./ggml/CMakeLists.txt +cp -rpv ../ggml/src/CMakeLists.txt ./ggml/src/CMakeLists.txt +cp -rpv ../ggml/cmake/FindSIMD.cmake ./ggml/cmake/FindSIMD.cmake -cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp -cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp -cp -rpv ../ggml/tests/test-backend-ops.cpp ./tests/test-backend-ops.cpp +cp -rpv ../ggml/src/ggml.c ./ggml/src/ggml.c +cp -rpv ../ggml/src/ggml-alloc.c ./ggml/src/ggml-alloc.c +cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml/src/ggml-backend-impl.h +cp -rpv ../ggml/src/ggml-backend.c ./ggml/src/ggml-backend.c +cp -rpv ../ggml/src/ggml-common.h ./ggml/src/ggml-common.h +cp -rpv ../ggml/src/ggml-cuda/* ./ggml/src/ggml-cuda/ +cp -rpv ../ggml/src/ggml-cuda.cu ./ggml/src/ggml-cuda.cu +cp -rpv ../ggml/src/ggml-impl.h ./ggml/src/ggml-impl.h +cp -rpv ../ggml/src/ggml-kompute.cpp ./ggml/src/ggml-kompute.cpp +cp -rpv ../ggml/src/ggml-metal.m ./ggml/src/ggml-metal.m +cp -rpv ../ggml/src/ggml-metal.metal ./ggml/src/ggml-metal.metal +cp -rpv ../ggml/src/ggml-quants.c ./ggml/src/ggml-quants.c +cp -rpv ../ggml/src/ggml-quants.h ./ggml/src/ggml-quants.h +cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml/src/ggml-rpc.cpp +cp -rpv ../ggml/src/ggml-sycl/* ./ggml/src/ggml-sycl/ +cp -rpv ../ggml/src/ggml-sycl.cpp ./ggml/src/ggml-sycl.cpp +cp -rpv ../ggml/src/ggml-vulkan.cpp ./ggml/src/ggml-vulkan.cpp -cp -rpv ../LICENSE ./LICENSE -cp -rpv ../ggml/scripts/gen-authors.sh ./scripts/gen-authors.sh +cp -rpv ../ggml/include/ggml.h ./ggml/include/ggml.h +cp -rpv ../ggml/include/ggml-alloc.h ./ggml/include/ggml-alloc.h +cp -rpv ../ggml/include/ggml-backend.h ./ggml/include/ggml-backend.h +cp -rpv ../ggml/include/ggml-blas.h ./ggml/include/ggml-blas.h +cp -rpv ../ggml/include/ggml-cuda.h ./ggml/include/ggml-cuda.h +cp -rpv ../ggml/include/ggml-kompute.h ./ggml/include/ggml-kompute.h +cp -rpv ../ggml/include/ggml-metal.h ./ggml/include/ggml-metal.h +cp -rpv ../ggml/include/ggml-rpc.h ./ggml/include/ggml-rpc.h +cp -rpv ../ggml/include/ggml-sycl.h ./ggml/include/ggml-sycl.h +cp -rpv ../ggml/include/ggml-vulkan.h ./ggml/include/ggml-vulkan.h + +cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp +cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp +cp -rpv ../ggml/tests/test-quantize-fns.cpp ./tests/test-quantize-fns.cpp +cp -rpv ../ggml/tests/test-quantize-perf.cpp ./tests/test-quantize-perf.cpp +cp -rpv ../ggml/tests/test-backend-ops.cpp ./tests/test-backend-ops.cpp + +cp -rpv ../LICENSE ./LICENSE +cp -rpv ../ggml/scripts/gen-authors.sh ./scripts/gen-authors.sh |