Fix --split-max-size (#6655)

* Fix --split-max-size Byte size calculation was done on int and overflowed. * add tests.sh * add examples test scripts to ci run Will autodiscover examples/*/tests.sh scripts and run them. * move WORK_PATH to a subdirectory * clean up before and after test * explicitly define which scripts to run * add --split-max-size to readme
author: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> 2024-04-14 13:12:59 +0200
committer: GitHub <noreply@github.com> 2024-04-14 13:12:59 +0200
commit: 8800226d65d5c98cd34eede6a6c05c78405c52da (patch)
tree: b90c3b037ec31b8735ee5fcf36ec78c217606358 /examples/gguf-split/tests.sh
parent: e689fc4e912feb19085be6894f475a873759cbfe (diff)
1 files changed, 89 insertions, 0 deletions
diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh
new file mode 100644
index 00000000..879522f7
--- /dev/null
+++ b/examples/gguf-split/tests.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+set -eu
+
+if [ $# -lt 1 ]
+then
+  echo "usage:   $0 path_to_build_binary [path_to_temp_folder]"
+  echo "example: $0 ../../build/bin ../../tmp"
+  exit 1
+fi
+
+if [ $# -gt 1 ]
+then
+  TMP_DIR=$2
+else
+  TMP_DIR=/tmp
+fi
+
+set -x
+
+SPLIT=$1/gguf-split
+MAIN=$1/main
+WORK_PATH=$TMP_DIR/gguf-split
+CUR_DIR=$(pwd)
+
+mkdir -p "$WORK_PATH"
+
+# Clean up in case of previously failed test
+rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf
+
+# 1. Get a model
+(
+  cd $WORK_PATH
+  "$CUR_DIR"/../../scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
+)
+echo PASS
+
+# 2. Split with max tensors strategy
+$SPLIT --split-max-tensors 28  $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split
+echo PASS
+echo
+
+# 2b. Test the sharded model is loading properly
+$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --random-prompt --n-predict 32
+echo PASS
+echo
+
+# 3. Merge
+$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-merge.gguf
+echo PASS
+echo
+
+# 3b. Test the merged model is loading properly
+$MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32
+echo PASS
+echo
+
+# 4. Split with no tensor in metadata
+#$SPLIT --split-max-tensors 32 --no-tensor-in-metadata $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors
+#echo PASS
+#echo
+
+# 4b. Test the sharded model is loading properly
+#$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf --random-prompt --n-predict 32
+#echo PASS
+#echo
+
+# 5. Merge
+#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf
+#echo PASS
+#echo
+
+# 5b. Test the merged model is loading properly
+#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --random-prompt --n-predict 32
+#echo PASS
+#echo
+
+# 6. Split with size strategy
+$SPLIT --split-max-size 2G $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-2G
+echo PASS
+echo
+
+# 6b. Test the sharded model is loading properly
+$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --random-prompt --n-predict 32
+echo PASS
+echo
+
+# Clean up
+rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf
author	Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>	2024-04-14 13:12:59 +0200
committer	GitHub <noreply@github.com>	2024-04-14 13:12:59 +0200
commit	8800226d65d5c98cd34eede6a6c05c78405c52da (patch)
tree	b90c3b037ec31b8735ee5fcf36ec78c217606358 /examples/gguf-split/tests.sh
parent	e689fc4e912feb19085be6894f475a873759cbfe (diff)