From 8843a98c2ba97a25e93319a104f9ddfaf83ce4c4 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Tue, 30 Apr 2024 00:52:50 +0100 Subject: Improve usability of --model-url & related flags (#6930) * args: default --model to models/ + filename from --model-url or --hf-file (or else legacy models/7B/ggml-model-f16.gguf) * args: main & server now call gpt_params_handle_model_default * args: define DEFAULT_MODEL_PATH + update cli docs * curl: check url of previous download (.json metadata w/ url, etag & lastModified) * args: fix update to quantize-stats.cpp * curl: support legacy .etag / .lastModified companion files * curl: rm legacy .etag file support * curl: reuse regex across headers callback calls * curl: unique_ptr to manage lifecycle of curl & outfile * curl: nit: no need for multiline regex flag * curl: update failed test (model file collision) + gitignore *.gguf.json --- examples/main/README.md | 2 +- examples/quantize-stats/quantize-stats.cpp | 2 +- examples/server/server.cpp | 4 +++- examples/server/tests/features/embeddings.feature | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) (limited to 'examples') diff --git a/examples/main/README.md b/examples/main/README.md index 649f4e0f..e7a38743 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -66,7 +66,7 @@ main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 --random-prompt In this section, we cover the most commonly used options for running the `main` program with the LLaMA models: -- `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`). +- `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`; inferred from `--model-url` if set). - `-mu MODEL_URL --model-url MODEL_URL`: Specify a remote http url to download the file (e.g https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf). - `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses. - `-ins, --instruct`: Run the program in instruction mode, which is particularly useful when working with Alpaca models. diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 1d05f139..746df844 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -23,7 +23,7 @@ #endif struct quantize_stats_params { - std::string model = "models/7B/ggml-model-f16.gguf"; + std::string model = DEFAULT_MODEL_PATH; bool verbose = false; bool per_layer_stats = false; bool print_histogram = false; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2760aea8..01453af2 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2353,7 +2353,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co printf(" disable KV offload\n"); } printf(" -m FNAME, --model FNAME\n"); - printf(" model path (default: %s)\n", params.model.c_str()); + printf(" model path (default: models/$filename with filename from --hf-file or --model-url if set, otherwise %s)\n", DEFAULT_MODEL_PATH); printf(" -mu MODEL_URL, --model-url MODEL_URL\n"); printf(" model download url (default: unused)\n"); printf(" -hfr REPO, --hf-repo REPO\n"); @@ -2835,6 +2835,8 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams, } } + gpt_params_handle_model_default(params); + if (!params.kv_overrides.empty()) { params.kv_overrides.emplace_back(); params.kv_overrides.back().key[0] = 0; diff --git a/examples/server/tests/features/embeddings.feature b/examples/server/tests/features/embeddings.feature index dcf1434f..6f163ce0 100644 --- a/examples/server/tests/features/embeddings.feature +++ b/examples/server/tests/features/embeddings.feature @@ -5,7 +5,7 @@ Feature: llama.cpp server Background: Server startup Given a server listening on localhost:8080 And a model url https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16.gguf - And a model file ggml-model-f16.gguf + And a model file bert-bge-small.gguf And a model alias bert-bge-small And 42 as server seed And 2 slots -- cgit v1.2.3