diff options
author | Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> | 2024-04-14 13:12:59 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-14 13:12:59 +0200 |
commit | 8800226d65d5c98cd34eede6a6c05c78405c52da (patch) | |
tree | b90c3b037ec31b8735ee5fcf36ec78c217606358 /examples | |
parent | e689fc4e912feb19085be6894f475a873759cbfe (diff) |
Fix --split-max-size (#6655)
* Fix --split-max-size
Byte size calculation was done on int and overflowed.
* add tests.sh
* add examples test scripts to ci run
Will autodiscover examples/*/tests.sh scripts and run them.
* move WORK_PATH to a subdirectory
* clean up before and after test
* explicitly define which scripts to run
* add --split-max-size to readme
Diffstat (limited to 'examples')
-rw-r--r-- | examples/gguf-split/README.md | 1 | ||||
-rw-r--r-- | examples/gguf-split/gguf-split.cpp | 4 | ||||
-rw-r--r-- | examples/gguf-split/tests.sh | 89 |
3 files changed, 92 insertions, 2 deletions
diff --git a/examples/gguf-split/README.md b/examples/gguf-split/README.md index ddb1f764..ad1d8665 100644 --- a/examples/gguf-split/README.md +++ b/examples/gguf-split/README.md @@ -5,5 +5,6 @@ CLI to split / merge GGUF files. **Command line options:** - `--split`: split GGUF to multiple GGUF, default operation. +- `--split-max-size`: max size per split in `M` or `G`, f.ex. `500M` or `2G`. - `--split-max-tensors`: maximum tensors in each split: default(128) - `--merge`: merge multiple GGUF to a single GGUF. diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp index 24acbf02..39c75e0a 100644 --- a/examples/gguf-split/gguf-split.cpp +++ b/examples/gguf-split/gguf-split.cpp @@ -59,10 +59,10 @@ static size_t split_str_to_n_bytes(std::string str) { int n; if (str.back() == 'M') { sscanf(str.c_str(), "%d", &n); - n_bytes = n * 1024 * 1024; // megabytes + n_bytes = (size_t)n * 1024 * 1024; // megabytes } else if (str.back() == 'G') { sscanf(str.c_str(), "%d", &n); - n_bytes = n * 1024 * 1024 * 1024; // gigabytes + n_bytes = (size_t)n * 1024 * 1024 * 1024; // gigabytes } else { throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back())); } diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh new file mode 100644 index 00000000..879522f7 --- /dev/null +++ b/examples/gguf-split/tests.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +set -eu + +if [ $# -lt 1 ] +then + echo "usage: $0 path_to_build_binary [path_to_temp_folder]" + echo "example: $0 ../../build/bin ../../tmp" + exit 1 +fi + +if [ $# -gt 1 ] +then + TMP_DIR=$2 +else + TMP_DIR=/tmp +fi + +set -x + +SPLIT=$1/gguf-split +MAIN=$1/main +WORK_PATH=$TMP_DIR/gguf-split +CUR_DIR=$(pwd) + +mkdir -p "$WORK_PATH" + +# Clean up in case of previously failed test +rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf + +# 1. Get a model +( + cd $WORK_PATH + "$CUR_DIR"/../../scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf +) +echo PASS + +# 2. Split with max tensors strategy +$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split +echo PASS +echo + +# 2b. Test the sharded model is loading properly +$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# 3. Merge +$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-merge.gguf +echo PASS +echo + +# 3b. Test the merged model is loading properly +$MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# 4. Split with no tensor in metadata +#$SPLIT --split-max-tensors 32 --no-tensor-in-metadata $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors +#echo PASS +#echo + +# 4b. Test the sharded model is loading properly +#$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf --random-prompt --n-predict 32 +#echo PASS +#echo + +# 5. Merge +#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf +#echo PASS +#echo + +# 5b. Test the merged model is loading properly +#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --random-prompt --n-predict 32 +#echo PASS +#echo + +# 6. Split with size strategy +$SPLIT --split-max-size 2G $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-2G +echo PASS +echo + +# 6b. Test the sharded model is loading properly +$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# Clean up +rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf |