summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-01-17 18:38:39 +0200
committerGitHub <noreply@github.com>2024-01-17 18:38:39 +0200
commitc918fe8dca8fa1c4602427e0a4b88e20046f6c34 (patch)
treebb3189993063db1d269f647de25ec7988dd93f14
parent0f83e727af0a7cadf90b7ecc1f8e35de1d0880bc (diff)
metal : create autorelease pool during library build (#4970)
* metal : create autorelease pool during library build ggml-ci * test : simplify ggml-ci
-rw-r--r--.gitignore1
-rw-r--r--Makefile5
-rwxr-xr-xci/run.sh2
-rw-r--r--ggml-metal.m19
-rw-r--r--tests/CMakeLists.txt1
-rw-r--r--tests/test-autorelease.cpp28
6 files changed, 45 insertions, 11 deletions
diff --git a/.gitignore b/.gitignore
index fba20704..5ab81445 100644
--- a/.gitignore
+++ b/.gitignore
@@ -105,3 +105,4 @@ poetry.toml
/tests/test-tokenizer-1-bpe
/tests/test-rope
/tests/test-backend-ops
+/tests/test-autorelease
diff --git a/Makefile b/Makefile
index 995b89f7..a8658a59 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ TEST_TARGETS = \
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
- tests/test-backend-ops
+ tests/test-backend-ops tests/test-autorelease
# Code coverage output files
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -747,3 +747,6 @@ tests/test-c.o: tests/test-c.c llama.h
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
+tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
diff --git a/ci/run.sh b/ci/run.sh
index 47a254f4..86293f0d 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -179,6 +179,8 @@ function gg_run_open_llama_3b_v2 {
wiki_test_60="${path_wiki}/wiki.test-60.raw"
+ ./bin/test-autorelease ${model_f16}
+
./bin/quantize ${model_f16} ${model_q8_0} q8_0
./bin/quantize ${model_f16} ${model_q4_0} q4_0
./bin/quantize ${model_f16} ${model_q4_1} q4_1
diff --git a/ggml-metal.m b/ggml-metal.m
index 8bb4edd6..66d4d675 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -303,22 +303,21 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
return NULL;
}
- // dictionary of preprocessor macros
- NSMutableDictionary * prep = [NSMutableDictionary dictionary];
+ @autoreleasepool {
+ // dictionary of preprocessor macros
+ NSMutableDictionary * prep = [NSMutableDictionary dictionary];
#ifdef GGML_QKK_64
- prep[@"QK_K"] = @(64);
+ prep[@"QK_K"] = @(64);
#endif
- MTLCompileOptions* options = [MTLCompileOptions new];
- options.preprocessorMacros = prep;
+ MTLCompileOptions* options = [MTLCompileOptions new];
+ options.preprocessorMacros = prep;
- //[options setFastMathEnabled:false];
+ //[options setFastMathEnabled:false];
- ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
-
- [options release];
- [prep release];
+ ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
+ }
}
if (error) {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 7c932240..d7aaab84 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -49,6 +49,7 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
llama_build_and_test_executable(test-grad0.cpp)
# llama_build_and_test_executable(test-opt.cpp) # SLOW
llama_build_and_test_executable(test-backend-ops.cpp)
+llama_build_and_test_executable(test-autorelease.cpp)
llama_build_and_test_executable(test-rope.cpp)
diff --git a/tests/test-autorelease.cpp b/tests/test-autorelease.cpp
new file mode 100644
index 00000000..289c6ba6
--- /dev/null
+++ b/tests/test-autorelease.cpp
@@ -0,0 +1,28 @@
+// ref: https://github.com/ggerganov/llama.cpp/issues/4952#issuecomment-1892864763
+
+#include <cstdio>
+#include <string>
+#include <thread>
+
+#include "llama.h"
+
+// This creates a new context inside a pthread and then tries to exit cleanly.
+int main(int argc, char ** argv) {
+ if (argc < 2) {
+ printf("Usage: %s model.gguf\n", argv[0]);
+ return 0; // intentionally return success
+ }
+
+ const std::string fname = argv[1];
+
+ std::thread([&fname]() {
+ llama_backend_init(false);
+ auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params());
+ auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
+ llama_free(ctx);
+ llama_free_model(model);
+ llama_backend_free();
+ }).join();
+
+ return 0;
+}