summaryrefslogtreecommitdiff
path: root/examples/quantize-stats/quantize-stats.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'examples/quantize-stats/quantize-stats.cpp')
-rw-r--r--examples/quantize-stats/quantize-stats.cpp15
1 files changed, 13 insertions, 2 deletions
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
index 6b8018ee..9cea472d 100644
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -320,6 +320,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "Loading model\n");
const int64_t t_main_start_us = ggml_time_us();
+ llama_model * model;
llama_context * ctx;
{
@@ -330,12 +331,20 @@ int main(int argc, char ** argv) {
lparams.f16_kv = false;
lparams.use_mlock = false;
- ctx = llama_init_from_file(params.model.c_str(), lparams);
+ model = llama_load_model_from_file(params.model.c_str(), lparams);
- if (ctx == NULL) {
+ if (model == NULL) {
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
return 1;
}
+
+ ctx = llama_new_context_with_model(model, lparams);
+
+ if (ctx == NULL) {
+ fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, params.model.c_str());
+ llama_free_model(model);
+ return 1;
+ }
}
const auto &tensors = llama_internal_get_tensor_map(ctx);
@@ -357,6 +366,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "%s: error: Quantization should be tested with a float model, "
"this model contains already quantized layers (%s is type %d)\n", __func__, kv_tensor.first.c_str(), kv_tensor.second->type);
llama_free(ctx);
+ llama_free_model(model);
return 1;
}
included_layers++;
@@ -415,6 +425,7 @@ int main(int argc, char ** argv) {
llama_free(ctx);
+ llama_free_model(model);
// report timing
{
const int64_t t_main_end_us = ggml_time_us();