diff options
author | Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> | 2024-04-14 13:12:59 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-14 13:12:59 +0200 |
commit | 8800226d65d5c98cd34eede6a6c05c78405c52da (patch) | |
tree | b90c3b037ec31b8735ee5fcf36ec78c217606358 /examples/gguf-split/tests.sh | |
parent | e689fc4e912feb19085be6894f475a873759cbfe (diff) |
Fix --split-max-size (#6655)
* Fix --split-max-size
Byte size calculation was done on int and overflowed.
* add tests.sh
* add examples test scripts to ci run
Will autodiscover examples/*/tests.sh scripts and run them.
* move WORK_PATH to a subdirectory
* clean up before and after test
* explicitly define which scripts to run
* add --split-max-size to readme
Diffstat (limited to 'examples/gguf-split/tests.sh')
-rw-r--r-- | examples/gguf-split/tests.sh | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh new file mode 100644 index 00000000..879522f7 --- /dev/null +++ b/examples/gguf-split/tests.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +set -eu + +if [ $# -lt 1 ] +then + echo "usage: $0 path_to_build_binary [path_to_temp_folder]" + echo "example: $0 ../../build/bin ../../tmp" + exit 1 +fi + +if [ $# -gt 1 ] +then + TMP_DIR=$2 +else + TMP_DIR=/tmp +fi + +set -x + +SPLIT=$1/gguf-split +MAIN=$1/main +WORK_PATH=$TMP_DIR/gguf-split +CUR_DIR=$(pwd) + +mkdir -p "$WORK_PATH" + +# Clean up in case of previously failed test +rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf + +# 1. Get a model +( + cd $WORK_PATH + "$CUR_DIR"/../../scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf +) +echo PASS + +# 2. Split with max tensors strategy +$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split +echo PASS +echo + +# 2b. Test the sharded model is loading properly +$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# 3. Merge +$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-merge.gguf +echo PASS +echo + +# 3b. Test the merged model is loading properly +$MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# 4. Split with no tensor in metadata +#$SPLIT --split-max-tensors 32 --no-tensor-in-metadata $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors +#echo PASS +#echo + +# 4b. Test the sharded model is loading properly +#$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf --random-prompt --n-predict 32 +#echo PASS +#echo + +# 5. Merge +#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf +#echo PASS +#echo + +# 5b. Test the merged model is loading properly +#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --random-prompt --n-predict 32 +#echo PASS +#echo + +# 6. Split with size strategy +$SPLIT --split-max-size 2G $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-2G +echo PASS +echo + +# 6b. Test the sharded model is loading properly +$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# Clean up +rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf |