summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/labeler.yml89
-rw-r--r--.github/workflows/bench.yml310
-rw-r--r--.github/workflows/build.yml1310
-rw-r--r--.github/workflows/close-issue.yml23
-rw-r--r--.github/workflows/docker.yml117
-rw-r--r--.github/workflows/editorconfig.yml27
-rw-r--r--.github/workflows/gguf-publish.yml44
-rw-r--r--.github/workflows/labeler.yml17
-rw-r--r--.github/workflows/nix-ci-aarch64.yml65
-rw-r--r--.github/workflows/nix-ci.yml72
-rw-r--r--.github/workflows/nix-flake-update.yml22
-rw-r--r--.github/workflows/nix-publish-flake.yml36
-rw-r--r--.github/workflows/python-check-requirements.yml35
-rw-r--r--.github/workflows/python-lint.yml23
-rw-r--r--.github/workflows/server.yml183
15 files changed, 0 insertions, 2373 deletions
diff --git a/.github/labeler.yml b/.github/labeler.yml
deleted file mode 100644
index 5c12bab7..00000000
--- a/.github/labeler.yml
+++ /dev/null
@@ -1,89 +0,0 @@
-# https://github.com/actions/labeler
-Kompute:
- - changed-files:
- - any-glob-to-any-file:
- - ggml-kompute.h
- - ggml-kompute.cpp
- - README-kompute.md
-Apple Metal:
- - changed-files:
- - any-glob-to-any-file:
- - ggml-metal.h
- - ggml-metal.cpp
- - README-metal.md
-SYCL:
- - changed-files:
- - any-glob-to-any-file:
- - ggml-sycl.h
- - ggml-sycl.cpp
- - README-sycl.md
-Nvidia GPU:
- - changed-files:
- - any-glob-to-any-file:
- - ggml-cuda.h
- - ggml-cuda/**
-Vulkan:
- - changed-files:
- - any-glob-to-any-file:
- - ggml_vk_generate_shaders.py
- - ggml-vulkan*
-documentation:
- - changed-files:
- - any-glob-to-any-file:
- - docs/**
- - media/**
-testing:
- - changed-files:
- - any-glob-to-any-file:
- - tests/**
-build:
- - changed-files:
- - any-glob-to-any-file:
- - cmake/**
- - CMakeLists.txt
- - CMakePresets.json
-examples:
- - changed-files:
- - any-glob-to-any-file: examples/**
-devops:
- - changed-files:
- - any-glob-to-any-file:
- - .devops/**
- - .github/**
- - ci/**
-python:
- - changed-files:
- - any-glob-to-any-file:
- - "**/*.py"
- - requirements/**
- - gguf-py/**
- - .flake8
-script:
- - changed-files:
- - any-glob-to-any-file:
- - scripts/**
-android:
- - changed-files:
- - any-glob-to-any-file:
- - examples/llama.android/**
-server:
- - changed-files:
- - any-glob-to-any-file:
- - examples/server/**
-ggml:
- - changed-files:
- - any-glob-to-any-file:
- - ggml.c
- - ggml.h
- - ggml-*.c
- - ggml-*.h
- - ggml-cuda/**
-nix:
- - changed-files:
- - any-glob-to-any-file:
- - "**/*.nix"
- - .github/workflows/nix-*.yml
- - .devops/nix/nixpkgs-instances.nix
-embedding:
- - changed-files:
- - any-glob-to-any-file: examples/embedding/
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
deleted file mode 100644
index 88ab4844..00000000
--- a/.github/workflows/bench.yml
+++ /dev/null
@@ -1,310 +0,0 @@
-# Benchmark
-name: Benchmark
-
-on:
- workflow_dispatch:
- inputs:
- gpu-series:
- description: 'Azure GPU series to run with'
- required: true
- type: choice
- options:
- - Standard_NC4as_T4_v3
- - Standard_NC24ads_A100_v4
- - Standard_NC80adis_H100_v5
- sha:
- description: 'Commit SHA1 to build'
- required: false
- type: string
- duration:
- description: 'Duration of the bench'
- type: string
- default: 10m
-
- push:
- branches:
- - master
- paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
- pull_request_target:
- types: [opened, synchronize, reopened]
- paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
- schedule:
- - cron: '04 2 * * *'
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
- cancel-in-progress: true
-
-jobs:
- bench-server-baseline:
- runs-on: Standard_NC4as_T4_v3
- env:
- RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
- N_USERS: 8
- DURATION: 10m
-
- strategy:
- matrix:
- model: [phi-2]
- ftype: [q4_0, q8_0, f16]
- include:
- - model: phi-2
- ftype: q4_0
- pr_comment_enabled: "true"
-
- if: |
- inputs.gpu-series == 'Standard_NC4as_T4_v3'
- || (
- github.event_name == 'schedule'
- && github.ref_name == 'master'
- && github.repository_owner == 'ggerganov'
- )
- || github.event_name == 'pull_request_target'
- || (
- github.event_name == 'push'
- && github.event.ref == 'refs/heads/master'
- && github.repository_owner == 'ggerganov'
- )
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
- ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
-
- - name: Install python env
- id: pipenv
- run: |
- cd examples/server/bench
- python3 -m venv venv
- source venv/bin/activate
- pip install -r requirements.txt
-
- - name: Prometheus
- id: install_prometheus
- run: |
- wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
- tar xzf prometheus*.tar.gz --strip-components=1
- ./prometheus --config.file=examples/server/bench/prometheus.yml &
- while ! nc -z localhost 9090; do
- sleep 0.1
- done
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: '1.21'
-
- - name: Install k6 and xk6-sse
- id: k6_installation
- run: |
- cd examples/server/bench
- go install go.k6.io/xk6/cmd/xk6@latest
- xk6 build master \
- --with github.com/phymbert/xk6-sse
-
- - name: Build
- id: cmake_build
- run: |
- set -eux
- cmake -B build \
- -DLLAMA_NATIVE=OFF \
- -DLLAMA_BUILD_SERVER=ON \
- -DLLAMA_CURL=ON \
- -DLLAMA_CUBLAS=ON \
- -DCUDAToolkit_ROOT=/usr/local/cuda \
- -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
- -DCMAKE_CUDA_ARCHITECTURES=75 \
- -DLLAMA_FATAL_WARNINGS=OFF \
- -DLLAMA_ALL_WARNINGS=OFF \
- -DCMAKE_BUILD_TYPE=Release;
- cmake --build build --config Release -j $(nproc) --target llama-server
-
- - name: Download the dataset
- id: download_dataset
- run: |
- cd examples/server/bench
- wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
-
- - name: Server bench
- id: server_bench
- run: |
- set -eux
-
- cd examples/server/bench
- source venv/bin/activate
- python bench.py \
- --runner-label ${{ env.RUNNER_LABEL }} \
- --name ${{ github.job }} \
- --branch ${{ github.head_ref || github.ref_name }} \
- --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
- --scenario script.js \
- --duration ${{ github.event.inputs.duration || env.DURATION }} \
- --hf-repo ggml-org/models \
- --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
- --model-path-prefix /models \
- --parallel ${{ env.N_USERS }} \
- -ngl 33 \
- --batch-size 2048 \
- --ubatch-size 256 \
- --ctx-size 16384 \
- --n-prompts 1000 \
- --max-prompt-tokens 1024 \
- --max-tokens 2048
-
- cat results.github.env >> $GITHUB_ENV
-
- # Remove dataset as we do not want it in the artefact
- rm ShareGPT_V3_unfiltered_cleaned_split.json
-
- - uses: actions/upload-artifact@v4
- with:
- name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
- compression-level: 9
- path: |
- examples/server/bench/*.jpg
- examples/server/bench/*.json
- examples/server/bench/*.log
-
- - name: Commit status
- uses: Sibz/github-status-action@v1
- with:
- authToken: ${{secrets.GITHUB_TOKEN}}
- sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
- context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
- description: |
- ${{ env.BENCH_RESULTS }}
- state: 'success'
-
- - name: Upload benchmark images
- uses: devicons/public-upload-to-imgur@v2.2.2
- continue-on-error: true # Important as it looks unstable: 503
- id: imgur_step
- with:
- client_id: ${{secrets.IMGUR_CLIENT_ID}}
- path: |
- examples/server/bench/prompt_tokens_seconds.jpg
- examples/server/bench/predicted_tokens_seconds.jpg
- examples/server/bench/kv_cache_usage_ratio.jpg
- examples/server/bench/requests_processing.jpg
-
- - name: Extract mermaid
- id: set_mermaid
- run: |
- set -eux
-
- cd examples/server/bench
- PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
- echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
- echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
- echo "EOF" >> $GITHUB_ENV
-
- PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
- echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
- echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
- echo "EOF" >> $GITHUB_ENV
-
- KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
- echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
- echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
- echo "EOF" >> $GITHUB_ENV
-
- REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
- echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
- echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
- echo "EOF" >> $GITHUB_ENV
-
- - name: Extract image url
- id: extract_image_url
- continue-on-error: true
- run: |
- set -eux
-
- echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
- echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
- echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
- echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
-
- - name: Comment PR
- uses: mshick/add-pr-comment@v2
- id: comment_pr
- if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
- with:
- message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
- message: |
- <p align="center">
-
- 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
-
- </p>
-
- <details>
-
- <summary>Expand details for performance related PR only</summary>
-
- - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
- - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
- - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
- - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
- - ${{ env.BENCH_GRAPH_XLABEL }}
-
-
- <p align="center">
-
- <img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
-
- <details>
-
- <summary>More</summary>
-
- ```mermaid
- ${{ env.PROMPT_TOKENS_SECONDS }}
- ```
-
- </details>
-
- <img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
-
- <details>
- <summary>More</summary>
-
- ```mermaid
- ${{ env.PREDICTED_TOKENS_SECONDS }}
- ```
-
- </details>
-
- </p>
-
- <details>
-
- <summary>Details</summary>
-
- <p align="center">
-
- <img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
-
- <details>
- <summary>More</summary>
-
- ```mermaid
- ${{ env.KV_CACHE_USAGE_RATIO }}
- ```
-
- </details>
-
- <img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
-
- <details>
- <summary>More</summary>
-
- ```mermaid
- ${{ env.REQUESTS_PROCESSING }}
- ```
-
- </details>
-
- </p>
- </details>
- </details>
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
deleted file mode 100644
index a8fcae04..00000000
--- a/.github/workflows/build.yml
+++ /dev/null
@@ -1,1310 +0,0 @@
-name: CI
-
-on:
- workflow_dispatch: # allows manual triggering
- inputs:
- create_release:
- description: 'Create new release'
- required: true
- type: boolean
- push:
- branches:
- - master
- paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
- pull_request:
- types: [opened, synchronize, reopened]
- paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m']
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
- cancel-in-progress: true
-
-env:
- BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
- GGML_NLOOP: 3
- GGML_N_THREADS: 1
-
-jobs:
- macOS-latest-cmake-arm64:
- runs-on: macos-14
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Dependencies
- id: depends
- continue-on-error: true
- run: |
- brew update
-
- - name: Build
- id: cmake_build
- run: |
- sysctl -a
- mkdir build
- cd build
- cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
- cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
-
- - name: Test
- id: cmake_test
- run: |
- cd build
- ctest -L 'main|curl' --verbose --timeout 900
-
- - name: Determine tag name
- id: tag
- shell: bash
- run: |
- BUILD_NUMBER="$(git rev-list --count HEAD)"
- SHORT_HASH="$(git rev-parse --short=7 HEAD)"
- if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
- echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
- else
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
- echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
- fi
-
- - name: Pack artifacts
- id: pack_artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- run: |
- cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
-
- - name: Upload artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v4
- with:
- path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
- name: llama-bin-macos-arm64.zip
-
- macOS-latest-cmake-x64:
- runs-on: macos-12
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Dependencies
- id: depends
- continue-on-error: true
- run: |
- brew update
-
- - name: Build
- id: cmake_build
- run: |
- sysctl -a
- # Metal is disabled due to intermittent failures with Github runners not having a GPU:
- # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
- cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF -DLLAMA_CURL=ON
- cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
-
- - name: Test
- id: cmake_test
- run: |
- cd build
- ctest -L main --verbose --timeout 900
-
- - name: Determine tag name
- id: tag
- shell: bash
- run: |
- BUILD_NUMBER="$(git rev-list --count HEAD)"
- SHORT_HASH="$(git rev-parse --short=7 HEAD)"
- if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
- echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
- else
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
- echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
- fi
-
- - name: Pack artifacts
- id: pack_artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- run: |
- cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
-
- - name: Upload artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v4
- with:
- path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
- name: llama-bin-macos-x64.zip
-
- ubuntu-focal-make:
- runs-on: ubuntu-20.04
- env:
- LLAMA_NODE_AVAILABLE: true
- LLAMA_PYTHON_AVAILABLE: true
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
-
- - name: Dependencies
- id: depends
- run: |
- sudo apt-get update
- sudo apt-get install build-essential gcc-8
-
- - uses: actions/setup-node@v4
- with:
- node-version: "20"
-
- - uses: actions/setup-python@v5
- with:
- python-version: "3.11"
-
- - name: Build
- id: make_build
- env:
- LLAMA_FATAL_WARNINGS: 1
- run: |
- CC=gcc-8 make -j $(nproc)
-
- - name: Test
- id: make_test
- run: |
- CC=gcc-8 make tests -j $(nproc)
- make test -j $(nproc)
-
- ubuntu-focal-make-curl:
- runs-on: ubuntu-20.04
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
-
- - name: Dependencies
- id: depends
- run: |
- sudo apt-get update
- sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev
-
- - name: Build
- id: make_build
- env:
- LLAMA_FATAL_WARNINGS: 1
- LLAMA_CURL: 1
- run: |
- CC=gcc-8 make -j $(nproc)
-
- ubuntu-latest-cmake:
- runs-on: ubuntu-latest
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Dependencies
- id: depends
- run: |
- sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
-
- - name: Build
- id: cmake_build
- run: |
- mkdir build
- cd build
- cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
- cmake --build . --config Release -j $(nproc)
-
- - name: Test
- id: cmake_test
- run: |
- cd build
- ctest -L 'main|curl' --verbose --timeout 900
-
- - name: Test llama2c conversion
- id: llama2c_test
- run: |
- cd build
- echo "Fetch tokenizer"
- wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
- echo "Fetch llama2c model"
- wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
- ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
- ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
-
- - name: Determine tag name
- id: tag
- shell: bash
- run: |
- BUILD_NUMBER="$(git rev-list --count HEAD)"
- SHORT_HASH="$(git rev-parse --short=7 HEAD)"
- if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
- echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
- else
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
- echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
- fi
-
- - name: Pack artifacts
- id: pack_artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- run: |
- cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
-
- - name: Upload artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v4
- with:
- path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
- name: llama-bin-ubuntu-x64.zip
-
- ubuntu-latest-cmake-sanitizer:
- runs-on: ubuntu-latest
-
- continue-on-error: true
-
- strategy:
- matrix:
- sanitizer: [ADDRESS, THREAD, UNDEFINED]
- build_type: [Debug, Release]
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
-
- - name: Dependencies
- id: depends
- run: |
- sudo apt-get update
- sudo apt-get install build-essential
-
- - name: Build
- id: cmake_build
- if: ${{ matrix.sanitizer != 'THREAD' }}
- run: |
- mkdir build
- cd build
- cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
- cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
-
- - name: Build (no OpenMP)
- id: cmake_build_no_openmp
- if: ${{ matrix.sanitizer == 'THREAD' }}
- run: |
- mkdir build
- cd build
- cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DLLAMA_OPENMP=OFF
- cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
-
- - name: Test
- id: cmake_test
- run: |
- cd build
- ctest -L main --verbose --timeout 900
-
- ubuntu-latest-cmake-rpc:
- runs-on: ubuntu-latest
-
- continue-on-error: true
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
-
- - name: Dependencies
- id: depends
- run: |
- sudo apt-get update
- sudo apt-get install build-essential
-
- - name: Build
- id: cmake_build
- run: |
- mkdir build
- cd build
- cmake -DLLAMA_RPC=ON ..
- cmake --build . --config Release -j $(nproc)
-
- - name: Test
- id: cmake_test
- run: |
- cd build
- ctest -L main --verbose
-
- ubuntu-22-cmake-vulkan:
- runs-on: ubuntu-22.04
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
-
- - name: Dependencies
- id: depends
- run: |
- sudo apt-get update
- sudo apt-get install build-essential libvulkan-dev
-
- - name: Build
- id: cmake_build
- run: |
- mkdir build
- cd build
- cmake -DLLAMA_VULKAN=ON ..
- cmake --build . --config Release -j $(nproc)
-
- ubuntu-22-cmake-hip:
- runs-on: ubuntu-22.04
- container: rocm/dev-ubuntu-22.04:6.0.2
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v3
-
- - name: Dependencies
- id: depends
- run: |
- sudo apt-get update
- sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
-
- - name: Build with native CMake HIP support
- id: cmake_build
- run: |
- cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DLLAMA_HIPBLAS=ON
- cmake --build build --config Release -j $(nproc)
-
- - name: Build with legacy HIP support
- id: cmake_build_legacy_hip
- run: |
- cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DLLAMA_HIPBLAS=ON
- cmake --build build2 --config Release -j $(nproc)
-
- ubuntu-22-cmake-sycl:
- runs-on: ubuntu-22.04
-
- continue-on-error: true
-
- steps:
- - uses: actions/checkout@v2
-
- - name: add oneAPI to apt
- shell: bash
- run: |
- cd /tmp
- wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
-
- - name: install oneAPI dpcpp compiler
- shell: bash
- run: |
- sudo apt update
- sudo apt install intel-oneapi-compiler-dpcpp-cpp
-
- - name: install oneAPI MKL library
- shell: bash
- run: |
- sudo apt install intel-oneapi-mkl-devel
-
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
-
- - name: Build
- id: cmake_build
- run: |
- source /opt/intel/oneapi/setvars.sh
- mkdir build
- cd build
- cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
- cmake --build . --config Release -j $(nproc)
-
- ubuntu-22-cmake-sycl-fp16:
- runs-on: ubuntu-22.04
-
- continue-on-error: true
-
- steps:
- - uses: actions/checkout@v2
-
- - name: add oneAPI to apt
- shell: bash
- run: |
- cd /tmp
- wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
-
- - name: install oneAPI dpcpp compiler
- shell: bash
- run: |
- sudo apt update
- sudo apt install intel-oneapi-compiler-dpcpp-cpp
-
- - name: install oneAPI MKL library
- shell: bash
- run: |
- sudo apt install intel-oneapi-mkl-devel
-
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
-
- - name: Build
- id: cmake_build
- run: |
- source /opt/intel/oneapi/setvars.sh
- mkdir build
- cd build
- cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON ..
- cmake --build . --config Release -j $(nproc)
-
- # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
- # how to debug it.
- # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
- macOS-latest-make:
- runs-on: macos-latest
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
-
- - name: Dependencies
- id: depends
- continue-on-error: true
- run: |
- brew update
-
- - name: Build
- id: make_build
- env:
- LLAMA_FATAL_WARNINGS: 1
- run: |
- LLAMA_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu)
-
- - name: Test
- id: make_test
- run: |
- LLAMA_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu)
- LLAMA_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu)
-
- # TODO: build with LLAMA_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
- # how to debug it.
- # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
- # would be great if we fix these
- macOS-latest-cmake:
- runs-on: macos-latest
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
-
- - name: Dependencies
- id: depends
- continue-on-error: true
- run: |
- brew update
-
- - name: Build
- id: cmake_build
- run: |
- sysctl -a
- mkdir build
- cd build
- cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF ..
- cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
-
- - name: Test
- id: cmake_test
- run: |
- cd build
- ctest -L main --verbose --timeout 900
-
- macOS-latest-cmake-ios:
- runs-on: macos-latest
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v1
-
- - name: Dependencies
- id: depends
- continue-on-error: true
- run: |
- brew update
-
- - name: Build
- id: cmake_build
- run: |
- sysctl -a
- mkdir build
- cd build
- cmake -G Xcode .. \
- -DLLAMA_METAL_EMBED_LIBRARY=ON \
- -DLLAMA_BUILD_EXAMPLES=OFF \
- -DLLAMA_BUILD_TESTS=OFF \
- -DLLAMA_BUILD_SERVER=OFF \
- -DCMAKE_SYSTEM_NAME=iOS \
- -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
- cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
-
- macOS-latest-cmake-tvos:
- runs-on: macos-latest
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v1
-
- - name: Dependencies
- id: depends
- continue-on-error: true
- run: |
- brew update
-
- - name: Build
- id: cmake_build
- run: |
- sysctl -a
- mkdir build
- cd build
- cmake -G Xcode .. \
- -DLLAMA_METAL_EMBED_LIBRARY=ON \
- -DLLAMA_BUILD_EXAMPLES=OFF \
- -DLLAMA_BUILD_TESTS=OFF \
- -DLLAMA_BUILD_SERVER=OFF \
- -DCMAKE_SYSTEM_NAME=tvOS \
- -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
- cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
-
- macOS-latest-swift:
- runs-on: macos-latest
-
- strategy:
- matrix:
- destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v1
-
- - name: Dependencies
- id: depends
- continue-on-error: true
- run: |
- brew update
-
- - name: xcodebuild for swift package
- id: xcodebuild
- run: |
- xcodebuild -scheme llama -destination "${{ matrix.destination }}"
-
- - name: Build Swift Example
- id: make_build_swift_example
- run: |
- make swift
-
- windows-msys2:
- runs-on: windows-latest
-
- strategy:
- fail-fast: false
- matrix:
- include:
- - { sys: UCRT64, env: ucrt-x86_64, build: Release }
- - { sys: CLANG64, env: clang-x86_64, build: Release }
-
- steps:
- - name: Clone
- uses: actions/checkout@v4
-
- - name: Setup ${{ matrix.sys }}
- uses: msys2/setup-msys2@v2
- with:
- update: true
- msystem: ${{matrix.sys}}
- install: >-
- base-devel
- mingw-w64-${{matrix.env}}-toolchain
- mingw-w64-${{matrix.env}}-cmake
- mingw-w64-${{matrix.env}}-openblas
-
- - name: Build using make
- shell: msys2 {0}
- run: |
- make -j $(nproc)
-
- - name: Clean after building using make
- shell: msys2 {0}
- run: |
- make clean
-
- - name: Build using make w/ OpenBLAS
- shell: msys2 {0}
- run: |
- make LLAMA_OPENBLAS=1 -j $(nproc)
-
- - name: Build using CMake
- shell: msys2 {0}
- run: |
- cmake -B build
- cmake --build build --config ${{ matrix.build }} -j $(nproc)
-
- - name: Clean after building using CMake
- shell: msys2 {0}
- run: |
- rm -rf build
-
- - name: Build using CMake w/ OpenBLAS
- shell: msys2 {0}
- run: |
- cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
- cmake --build build --config ${{ matrix.build }} -j $(nproc)
-
- windows-latest-cmake:
- runs-on: windows-2019
-
- env:
- OPENBLAS_VERSION: 0.3.23
- SDE_VERSION: 9.33.0-2024-01-07
- VULKAN_VERSION: 1.3.261.1
-
- strategy:
- matrix:
- include:
- - build: 'rpc-x64'
- defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_RPC=ON -DBUILD_SHARED_LIBS=ON'
- - build: 'noavx-x64'
- defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
- - build: 'avx2-x64'
- defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
- - build: 'avx-x64'
- defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
- - build: 'avx512-x64'
- defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
- - build: 'openblas-x64'
- defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
- - build: 'kompute-x64'
- defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
- - build: 'vulkan-x64'
- defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
- - build: 'llvm-arm64'
- defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
- - build: 'msvc-arm64'
- defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Clone Kompute submodule
- id: clone_kompute
- if: ${{ matrix.build == 'kompute-x64' }}
- run: |
- git submodule update --init kompute
-
- - name: Download OpenBLAS
- id: get_openblas
- if: ${{ matrix.build == 'openblas-x64' }}
- run: |
- curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
- curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
- mkdir $env:RUNNER_TEMP/openblas
- tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
- $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
- $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
- $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
- & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
-
- - name: Install Vulkan SDK
- id: get_vulkan
- if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
- run: |
- curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
- & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
- Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
- Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
-
- - name: Install Ninja
- id: install_ninja
- run: |
- choco install ninja
-
- - name: Build
- id: cmake_build
- run: |
- cmake -S . -B build ${{ matrix.defines }}
- cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
-
- - name: Add libopenblas.dll
- id: add_libopenblas_dll
- if: ${{ matrix.build == 'openblas-x64' }}
- run: |
- cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
- cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
-
- - name: Check AVX512F support
- id: check_avx512f
- if: ${{ matrix.build == 'avx512-x64' }}
- continue-on-error: true
- run: |
- cd build
- $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
- $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
- $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
- echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
- & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
- .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
-
- - name: Test
- id: cmake_test
- # not all machines have native AVX-512
- if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
- run: |
- cd build
- ctest -L main -C Release --verbose --timeout 900
-
- - name: Test (Intel SDE)
- id: cmake_test_sde
- if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
- run: |
- curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
- # for some weird reason windows tar doesn't like sde tar.xz
- 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
- 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
- $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
- cd build
- & $sde -future -- ctest -L main -C Release --verbose --timeout 900
-
- - name: Determine tag name
- id: tag
- shell: bash
- run: |
- BUILD_NUMBER="$(git rev-list --count HEAD)"
- SHORT_HASH="$(git rev-parse --short=7 HEAD)"
- if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
- echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
- else
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
- echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
- fi
-
- - name: Pack artifacts
- id: pack_artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- run: |
- Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
- 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
-
- - name: Upload artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v4
- with:
- path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
- name: llama-bin-win-${{ matrix.build }}.zip
-
- windows-latest-cmake-cuda:
- runs-on: windows-2019
-
- strategy:
- matrix:
- cuda: ['12.2.0', '11.7.1']
- build: ['cuda']
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Install CUDA toolkit
- id: cuda-toolkit
- uses: Jimver/cuda-toolkit@v0.2.15
- with:
- cuda: ${{ matrix.cuda }}
- method: 'network'
- sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
-
- - name: Build
- id: cmake_build
- run: |
- mkdir build
- cd build
- cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=ON
- cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
-
- - name: Determine tag name
- id: tag
- shell: bash
- run: |
- BUILD_NUMBER="$(git rev-list --count HEAD)"
- SHORT_HASH="$(git rev-parse --short=7 HEAD)"
- if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
- echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
- else
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
- echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
- fi
-
- - name: Pack artifacts
- id: pack_artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- run: |
- 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
-
- - name: Upload artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v4
- with:
- path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
- name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
-
- - name: Copy and pack Cuda runtime
- run: |
- echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
- $dst='.\build\bin\cudart\'
- robocopy "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
- 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
-
- - name: Upload Cuda runtime
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v4
- with:
- path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
- name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
-
- windows-latest-cmake-sycl:
- runs-on: windows-latest
-
- defaults:
- run:
- shell: bash
-
- env:
- WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595_offline.exe
- WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel
- ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Install
- run: scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
-
- - name: Build
- id: cmake_build
- run: examples/sycl/win-build-sycl.bat
-
- - name: Determine tag name
- id: tag
- shell: bash
- run: |
- BUILD_NUMBER="$(git rev-list --count HEAD)"
- SHORT_HASH="$(git rev-parse --short=7 HEAD)"
- if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
- echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
- else
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
- echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
- fi
-
- - name: Pack artifacts
- id: pack_artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- run: |
- echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
- cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.4.dll" ./build/bin
- cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
- cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
-
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_win_proxy_loader.dll" ./build/bin
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_level_zero.dll" ./build/bin
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
- echo "cp oneAPI running time dll files to ./build/bin done"
- 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
-
- - name: Upload artifacts
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v4
- with:
- path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
- name: llama-bin-win-sycl-x64.zip
-
- windows-latest-cmake-hip:
- runs-on: windows-latest
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v3
-
- - name: Install
- id: depends
- run: |
- $ErrorActionPreference = "Stop"
- write-host "Downloading AMD HIP SDK Installer"
- Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
- write-host "Installing AMD HIP SDK"
- Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
- write-host "Completed AMD HIP SDK installation"
-
- - name: Verify ROCm
- id: verify
- run: |
- & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
-
- - name: Build
- id: cmake_build
- run: |
- $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
- $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
- cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DLLAMA_HIPBLAS=ON
- cmake --build build --config Release
-
- ios-xcode-build:
- runs-on: macos-latest
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Build Xcode project
- run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
-
- android-build:
- runs-on: ubuntu-latest
-
- steps:
- - name: Clone
- uses: actions/checkout@v4
-
- - name: Set up JDK
- uses: actions/setup-java@v3
- with:
- java-version: 17
- distribution: zulu
-
- - name: Setup Android SDK
- uses: android-actions/setup-android@v3
- with:
- log-accepted-android-sdk-licenses: false
-
- - name: Build
- run: |
- cd examples/llama.android
-
- ./gradlew build --no-daemon
-
-# freeBSD-latest:
-# runs-on: macos-12
-# steps:
-# - name: Clone
-# uses: actions/checkout@v4
-#
-# - name: Build
-# uses: cross-platform-actions/action@v0.19.0
-# with:
-# operating_system: freebsd
-# version: '13.2'
-# hypervisor: 'qemu'
-# run: |
-# sudo pkg update
-# sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
-# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
-
- release:
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
-
- runs-on: ubuntu-latest
-
- needs:
- - ubuntu-focal-make
- - ubuntu-latest-cmake
- - macOS-latest-make
- - macOS-latest-cmake
- - windows-latest-cmake
- - windows-latest-cmake-cuda
- - macOS-latest-cmake-arm64
- - macOS-latest-cmake-x64
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Determine tag name
- id: tag
- shell: bash
- run: |
- BUILD_NUMBER="$(git rev-list --count HEAD)"
- SHORT_HASH="$(git rev-parse --short=7 HEAD)"
- if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
- echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
- else
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
- echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
- fi
-
- - name: Download artifacts
- id: download-artifact
- uses: actions/download-artifact@v4
- with:
- path: ./artifact
-
- - name: Move artifacts
- id: move_artifacts
- run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
-
- - name: Create release
- id: create_release
- uses: anzz1/action-create-release@v1
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- with:
- tag_name: ${{ steps.tag.outputs.name }}
-
- - name: Upload release
- id: upload_release
- uses: actions/github-script@v3
- with:
- github-token: ${{secrets.GITHUB_TOKEN}}
- script: |
- const path = require('path');
- const fs = require('fs');
- const release_id = '${{ steps.create_release.outputs.id }}';
- for (let file of await fs.readdirSync('./artifact/release')) {
- if (path.extname(file) === '.zip') {
- console.log('uploadReleaseAsset', file);
- await github.repos.uploadReleaseAsset({
- owner: context.repo.owner,
- repo: context.repo.repo,
- release_id: release_id,
- name: file,
- data: await fs.readFileSync(`./artifact/release/${file}`)
- });
- }
- }
-
-# ubuntu-latest-gcc:
-# runs-on: ubuntu-latest
-#
-# strategy:
-# matrix:
-# build: [Debug, Release]
-#
-# steps:
-# - name: Clone
-# uses: actions/checkout@v4
-#
-# - name: Dependencies
-# run: |
-# sudo apt-get update
-# sudo apt-get install build-essential
-# sudo apt-get install cmake
-#
-# - name: Configure
-# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-#
-# - name: Build
-# run: |
-# make
-#
-# ubuntu-latest-clang:
-# runs-on: ubuntu-latest
-#
-# strategy:
-# matrix:
-# build: [Debug, Release]
-#
-# steps:
-# - name: Clone
-# uses: actions/checkout@v4
-#
-# - name: Dependencies
-# run: |
-# sudo apt-get update
-# sudo apt-get install build-essential
-# sudo apt-get install cmake
-#
-# - name: Configure
-# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
-#
-# - name: Build
-# run: |
-# make
-#
-# ubuntu-latest-gcc-sanitized:
-# runs-on: ubuntu-latest
-#
-# strategy:
-# matrix:
-# sanitizer: [ADDRESS, THREAD, UNDEFINED]
-#
-# steps:
-# - name: Clone
-# uses: actions/checkout@v4
-#
-# - name: Dependencies
-# run: |
-# sudo apt-get update
-# sudo apt-get install build-essential
-# sudo apt-get install cmake
-#
-# - name: Configure
-# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
-#
-# - name: Build
-# run: |
-# make
-#
-# windows:
-# runs-on: windows-latest
-#
-# strategy:
-# matrix:
-# build: [Release]
-# arch: [Win32, x64]
-# include:
-# - arch: Win32
-# s2arc: x86
-# - arch: x64
-# s2arc: x64
-#
-# steps:
-# - name: Clone
-# uses: actions/checkout@v4
-#
-# - name: Add msbuild to PATH
-# uses: microsoft/setup-msbuild@v1
-#
-# - name: Configure
-# run: >
-# cmake -S . -B ./build -A ${{ matrix.arch }}
-# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-#
-# - name: Build
-# run: |
-# cd ./build
-# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
-#
-# - name: Upload binaries
-# uses: actions/upload-artifact@v4
-# with:
-# name: llama-bin-${{ matrix.arch }}
-# path: build/bin/${{ matrix.build }}
-#
-# windows-blas:
-# runs-on: windows-latest
-#
-# strategy:
-# matrix:
-# build: [Release]
-# arch: [Win32, x64]
-# blas: [ON]
-# include:
-# - arch: Win32
-# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
-# s2arc: x86
-# - arch: x64
-# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
-# s2arc: x64
-#
-# steps:
-# - name: Clone
-# uses: actions/checkout@v4
-#
-# - name: Add msbuild to PATH
-# uses: microsoft/setup-msbuild@v1
-#
-# - name: Fetch OpenBLAS
-# if: matrix.blas == 'ON'
-# run: |
-# C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
-# 7z x blas.zip -oblas -y
-# copy blas/include/cblas.h .
-# copy blas/include/openblas_config.h .
-# echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
-#
-# - name: Configure
-# run: >
-# cmake -S . -B ./build -A ${{ matrix.arch }}
-# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-# -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
-# -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
-#
-# - name: Build
-# run: |
-# cd ./build
-# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
-#
-# - name: Copy libopenblas.dll
-# if: matrix.blas == 'ON'
-# run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
-#
-# - name: Upload binaries
-# if: matrix.blas == 'ON'
-# uses: actions/upload-artifact@v4
-# with:
-# name: llama-blas-bin-${{ matrix.arch }}
-# path: build/bin/${{ matrix.build }}
-#
-# emscripten:
-# runs-on: ubuntu-latest
-#
-# strategy:
-# matrix:
-# build: [Release]
-#
-# steps:
-# - name: Clone
-# uses: actions/checkout@v4
-#
-# - name: Dependencies
-# run: |
-# wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
-# tar -xvf master.tar.gz
-# emsdk-master/emsdk update
-# emsdk-master/emsdk install latest
-# emsdk-master/emsdk activate latest
-#
-# - name: Configure
-# run: echo "tmp"
-#
-# - name: Build
-# run: |
-# pushd emsdk-master
-# source ./emsdk_env.sh
-# popd
-# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-# make
diff --git a/.github/workflows/close-issue.yml b/.github/workflows/close-issue.yml
deleted file mode 100644
index 69c9f4f6..00000000
--- a/.github/workflows/close-issue.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: Close inactive issues
-on:
- schedule:
- - cron: "42 0 * * *"
-
-jobs:
- close-issues:
- runs-on: ubuntu-latest
- permissions:
- issues: write
- pull-requests: write
- steps:
- - uses: actions/stale@v5
- with:
- exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
- days-before-issue-stale: 30
- days-before-issue-close: 14
- stale-issue-label: "stale"
- close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
- days-before-pr-stale: -1
- days-before-pr-close: -1
- operations-per-run: 10000
- repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
deleted file mode 100644
index 6244b481..00000000
--- a/.github/workflows/docker.yml
+++ /dev/null
@@ -1,117 +0,0 @@
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
-# GitHub recommends pinning actions to a commit SHA.
-# To get a newer version, you will need to update the SHA.
-# You can also reference a tag or branch, but the action may change without warning.
-
-name: Publish Docker image
-
-on:
- pull_request:
- push:
- branches:
- - master
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
- cancel-in-progress: true
-
-jobs:
- push_to_registry:
- name: Push Docker image to Docker Hub
- if: github.event.pull_request.draft == false
-
- runs-on: ubuntu-latest
- env:
- COMMIT_SHA: ${{ github.sha }}
- strategy:
- matrix:
- config:
- - { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" }
- - { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
- - { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
- # NOTE(canardletter): The CUDA builds on arm64 are very slow, so I
- # have disabled them for now until the reason why
- # is understood.
- - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
- - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
- - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
- - { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
- - { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
- - { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
- - { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
- - { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
- steps:
- - name: Check out the repo
- uses: actions/checkout@v4
-
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v2
-
- - name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v2
-
- - name: Log in to Docker Hub
- uses: docker/login-action@v2
- with:
- registry: ghcr.io
- username: ${{ github.repository_owner }}
- password: ${{ secrets.GITHUB_TOKEN }}
-
- # https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
- - name: Free Disk Space (Ubuntu)
- uses: jlumbroso/free-disk-space@main
- with:
- # this might remove tools that are actually needed,
- # if set to "true" but frees about 6 GB
- tool-cache: false
-
- # all of these default to true, but feel free to set to
- # "false" if necessary for your workflow
- android: true
- dotnet: true
- haskell: true
- large-packages: true
- docker-images: true
- swap-storage: true
-
- - name: Determine tag name
- id: tag
- shell: bash
- run: |
- BUILD_NUMBER="$(git rev-list --count HEAD)"
- SHORT_HASH="$(git rev-parse --short=7 HEAD)"
- if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
- echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
- else
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
- echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
- fi
-
- - name: Downcase github.repository_owner
- run: |
- echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
- env:
- GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
-
- - name: Build and push Docker image (versioned)
- if: github.event_name == 'push'
- uses: docker/build-push-action@v4
- with:
- context: .
- push: true
- platforms: ${{ matrix.config.platforms }}
- tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
- file: ${{ matrix.config.dockerfile }}
-
- - name: Build and push Docker image (tagged)
- uses: docker/build-push-action@v4
- with:
- context: .
- push: ${{ github.event_name == 'push' }}
- platforms: ${{ matrix.config.platforms }}
- tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
- file: ${{ matrix.config.dockerfile }}
diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml
deleted file mode 100644
index ae86e992..00000000
--- a/.github/workflows/editorconfig.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: EditorConfig Checker
-
-on:
- workflow_dispatch: # allows manual triggering
- inputs:
- create_release:
- description: 'Create new release'
- required: true
- type: boolean
- push:
- branches:
- - master
- pull_request:
- branches:
- - master
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
- cancel-in-progress: true
-
-jobs:
- editorconfig:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- - uses: editorconfig-checker/action-editorconfig-checker@main
- - run: editorconfig-checker
diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml
deleted file mode 100644
index 3ca4d305..00000000
--- a/.github/workflows/gguf-publish.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-# This workflow will upload a Python Package using Twine when a GGUF release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
-# See `gguf-py/README.md` for how to make a release.
-
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
-name: Upload Python Package
-
-on:
- workflow_dispatch:
- push:
- # Pattern matched against refs/tags
- tags:
- - 'gguf-v*' # Push events to every version tag
-
-
-jobs:
- deploy:
-
- runs-on: ubuntu-latest
-
- steps:
- - uses: actions/checkout@v4
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: '3.9.x'
- - name: Install dependencies
- run: |
- cd gguf-py
- python -m pip install poetry
- poetry install
-
- - name: Build package
- run: cd gguf-py && poetry build
- - name: Publish package
- uses: pypa/gh-action-pypi-publish@release/v1
- with:
- password: ${{ secrets.PYPI_API_TOKEN }}
- packages-dir: gguf-py/dist
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
deleted file mode 100644
index 368dbdbe..00000000
--- a/.github/workflows/labeler.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-name: "Pull Request Labeler"
-on:
-- pull_request_target
-
-jobs:
- labeler:
- permissions:
- contents: read
- pull-requests: write
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- with:
- repository: "ggerganov/llama.cpp"
- - uses: actions/labeler@v5
- with:
- configuration-path: '.github/labeler.yml'
diff --git a/.github/workflows/nix-ci-aarch64.yml b/.github/workflows/nix-ci-aarch64.yml
deleted file mode 100644
index 4aa4b237..00000000
--- a/.github/workflows/nix-ci-aarch64.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-name: Nix aarch64 builds
-
-on:
- workflow_dispatch: # allows manual triggering
- schedule:
- # Rebuild daily rather than on every push because QEMU is expensive (e.g.
- # 1.5h instead of minutes with the cold cache).
- #
- # randint(0, 59), randint(0, 23)
- - cron: '26 12 * * *'
- # But also rebuild if we touched any of the Nix expressions:
- push:
- branches:
- - master
- paths: ['**/*.nix', 'flake.lock']
- pull_request:
- types: [opened, synchronize, reopened]
- paths: ['**/*.nix', 'flake.lock']
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
- cancel-in-progress: true
-
-jobs:
- nix-build-aarch64:
- runs-on: ubuntu-latest
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - name: Install QEMU
- # Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
- run: |
- sudo apt-get update
- sudo apt-get install -y qemu-user-static qemu-system-aarch64
- sudo usermod -a -G kvm $USER
- - name: Install Nix
- uses: DeterminateSystems/nix-installer-action@v9
- with:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- extra-conf: |
- extra-platforms = aarch64-linux
- extra-system-features = nixos-test kvm
- extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
- extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
- - uses: DeterminateSystems/magic-nix-cache-action@v2
- with:
- upstream-cache: https://${{ matrix.cachixName }}.cachix.org
- - name: Set-up cachix to push the results to
- uses: cachix/cachix-action@v13
- with:
- authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
- name: llama-cpp
- - name: Show all output paths
- run: >
- nix run github:nix-community/nix-eval-jobs
- -- --gc-roots-dir gcroot
- --flake
- ".#packages.aarch64-linux"
- - name: Build
- run: >
- nix run github:Mic92/nix-fast-build
- -- --skip-cached --no-nom
- --systems aarch64-linux
- --flake
- ".#checks.aarch64-linux"
diff --git a/.github/workflows/nix-ci.yml b/.github/workflows/nix-ci.yml
deleted file mode 100644
index 8955f38d..00000000
--- a/.github/workflows/nix-ci.yml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: Nix CI
-
-on:
- workflow_dispatch: # allows manual triggering
- push:
- branches:
- - master
- pull_request:
- types: [opened, synchronize, reopened]
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
- cancel-in-progress: true
-
-jobs:
- nix-eval:
- strategy:
- fail-fast: false
- matrix:
- os: [ ubuntu-latest, macos-latest ]
- runs-on: ${{ matrix.os }}
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - name: Install Nix
- uses: DeterminateSystems/nix-installer-action@v9
- with:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- extra-conf: |
- extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
- extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
- - uses: DeterminateSystems/magic-nix-cache-action@v2
- with:
- upstream-cache: https://${{ matrix.cachixName }}.cachix.org
- - name: List all flake outputs
- run: nix flake show --all-systems
- - name: Show all output paths
- run: >
- nix run github:nix-community/nix-eval-jobs
- -- --gc-roots-dir gcroot
- --flake
- ".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
- nix-build:
- strategy:
- fail-fast: false
- matrix:
- os: [ ubuntu-latest, macos-latest ]
- runs-on: ${{ matrix.os }}
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - name: Install Nix
- uses: DeterminateSystems/nix-installer-action@v9
- with:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- extra-conf: |
- extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
- extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
- - uses: DeterminateSystems/magic-nix-cache-action@v2
- with:
- upstream-cache: https://${{ matrix.cachixName }}.cachix.org
- - name: Set-up cachix to push the results to
- uses: cachix/cachix-action@v13
- with:
- authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
- name: llama-cpp
- - name: Build
- run: >
- nix run github:Mic92/nix-fast-build
- -- --skip-cached --no-nom
- --flake
- ".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
diff --git a/.github/workflows/nix-flake-update.yml b/.github/workflows/nix-flake-update.yml
deleted file mode 100644
index 3a6a96e2..00000000
--- a/.github/workflows/nix-flake-update.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: update-flake-lock
-on:
- workflow_dispatch:
- schedule:
- - cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
-
-jobs:
- lockfile:
- runs-on: ubuntu-latest
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - name: Install Nix
- uses: DeterminateSystems/nix-installer-action@main
- - name: Update flake.lock
- uses: DeterminateSystems/update-flake-lock@main
- with:
- pr-title: "nix: update flake.lock"
- pr-labels: |
- nix
- pr-reviewers: philiptaron,SomeoneSerge
- token: ${{ secrets.FLAKE_TOKEN }}
diff --git a/.github/workflows/nix-publish-flake.yml b/.github/workflows/nix-publish-flake.yml
deleted file mode 100644
index 2c3c1ebd..00000000
--- a/.github/workflows/nix-publish-flake.yml
+++ /dev/null
@@ -1,36 +0,0 @@
-# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
-name: "Publish a flake to flakestry & flakehub"
-on:
- push:
- tags:
- - "*"
- workflow_dispatch:
- inputs:
- tag:
- description: "The existing tag to publish"
- type: "string"
- required: true
-jobs:
- flakestry-publish:
- runs-on: ubuntu-latest
- permissions:
- id-token: "write"
- contents: "read"
- steps:
- - uses: flakestry/flakestry-publish@main
- with:
- version: "${{ inputs.tag || github.ref_name }}"
- flakehub-publish:
- runs-on: "ubuntu-latest"
- permissions:
- id-token: "write"
- contents: "read"
- steps:
- - uses: "actions/checkout@v4"
- with:
- ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
- - uses: "DeterminateSystems/nix-installer-action@main"
- - uses: "DeterminateSystems/flakehub-push@main"
- with:
- visibility: "public"
- tag: "${{ inputs.tag }}"
diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml
deleted file mode 100644
index 4e0374fc..00000000
--- a/.github/workflows/python-check-requirements.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: Python check requirements.txt
-
-on:
- push:
- paths:
- - '.github/workflows/python-check-requirements.yml'
- - 'scripts/check-requirements.sh'
- - 'convert*.py'
- - 'requirements.txt'
- - 'requirements/*.txt'
- pull_request:
- paths:
- - '.github/workflows/python-check-requirements.yml'
- - 'scripts/check-requirements.sh'
- - 'convert*.py'
- - 'requirements.txt'
- - 'requirements/*.txt'
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
- cancel-in-progress: true
-
-jobs:
- python-check-requirements:
- runs-on: ubuntu-latest
- name: check-requirements
- steps:
- - name: Check out source repository
- uses: actions/checkout@v4
- - name: Set up Python environment
- uses: actions/setup-python@v5
- with:
- python-version: "3.11"
- - name: Run check-requirements.sh script
- run: bash scripts/check-requirements.sh
diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
deleted file mode 100644
index a8d46f31..00000000
--- a/.github/workflows/python-lint.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: flake8 Lint
-
-on: [push, pull_request]
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
- cancel-in-progress: true
-
-jobs:
- flake8-lint:
- runs-on: ubuntu-latest
- name: Lint
- steps:
- - name: Check out source repository
- uses: actions/checkout@v4
- - name: Set up Python environment
- uses: actions/setup-python@v5
- with:
- python-version: "3.11"
- - name: flake8 Lint
- uses: py-actions/flake8@v2
- with:
- plugins: "flake8-no-print"
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
deleted file mode 100644
index 6155e941..00000000
--- a/.github/workflows/server.yml
+++ /dev/null
@@ -1,183 +0,0 @@
-# Server build and tests
-name: Server
-
-on:
- workflow_dispatch: # allows manual triggering
- inputs:
- sha:
- description: 'Commit SHA1 to build'
- required: false
- type: string
- slow_tests:
- description: 'Run slow tests'
- required: true
- type: boolean
- push:
- branches:
- - master
- paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
- pull_request:
- types: [opened, synchronize, reopened]
- paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
- cancel-in-progress: true
-
-jobs:
- server:
- runs-on: ubuntu-latest
-
- strategy:
- matrix:
- sanitizer: [ADDRESS, THREAD, UNDEFINED]
- build_type: [RelWithDebInfo]
- include:
- - build_type: Release
- sanitizer: ""
- fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
-
- steps:
- - name: Dependencies
- id: depends
- run: |
- sudo apt-get update
- sudo apt-get -y install \
- build-essential \
- xxd \
- git \
- cmake \
- curl \
- wget \
- language-pack-en \
- libcurl4-openssl-dev
-
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
- ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
-
- - name: Python setup
- id: setup_python
- uses: actions/setup-python@v5
- with:
- python-version: '3.11'
-
- - name: Tests dependencies
- id: test_dependencies
- run: |
- pip install -r examples/server/tests/requirements.txt
-
- - name: Verify server deps
- id: verify_server_deps
- run: |
- git config --global --add safe.directory $(realpath .)
- cd examples/server
- git ls-files --others --modified
- git status
- ./deps.sh
- git status
- not_ignored_files="$(git ls-files --others --modified)"
- echo "Modified files: ${not_ignored_files}"
- if [ -n "${not_ignored_files}" ]; then
- echo "Repository is dirty or server deps are not built as expected"
- echo "${not_ignored_files}"
- exit 1
- fi
-
- - name: Build (no OpenMP)
- id: cmake_build_no_openmp
- if: ${{ matrix.sanitizer == 'THREAD' }}
- run: |
- cmake -B build \
- -DLLAMA_NATIVE=OFF \
- -DLLAMA_BUILD_SERVER=ON \
- -DLLAMA_CURL=ON \
- -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
- -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
- -DLLAMA_OPENMP=OFF ;
- cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
-
- - name: Build
- id: cmake_build
- if: ${{ matrix.sanitizer != 'THREAD' }}
- run: |
- cmake -B build \
- -DLLAMA_NATIVE=OFF \
- -DLLAMA_BUILD_SERVER=ON \
- -DLLAMA_CURL=ON \
- -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
- -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
- cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
-
- - name: Tests
- id: server_integration_tests
- run: |
- cd examples/server/tests
- PORT=8888 ./tests.sh
-
- - name: Slow tests
- id: server_integration_tests_slow
- if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
- run: |
- cd examples/server/tests
- PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
-
-
- server-windows:
- runs-on: windows-2019
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
- ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
-
- - name: libCURL
- id: get_libcurl
- env:
- CURL_VERSION: 8.6.0_6
- run: |
- curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
- mkdir $env:RUNNER_TEMP/libcurl
- tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
-
- - name: Build
- id: cmake_build
- run: |
- cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
- cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
-
- - name: Python setup
- id: setup_python
- uses: actions/setup-python@v5
- with:
- python-version: '3.11'
-
- - name: Tests dependencies
- id: test_dependencies
- run: |
- pip install -r examples/server/tests/requirements.txt
-
- - name: Copy Libcurl
- id: prepare_libcurl
- run: |
- cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
-
- - name: Tests
- id: server_integration_tests
- if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
- run: |
- cd examples/server/tests
- behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
-
- - name: Slow tests
- id: server_integration_tests_slow
- if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
- run: |
- cd examples/server/tests
- behave.exe --stop --no-skipped --no-capture --tags slow