summaryrefslogtreecommitdiff
path: root/examples/main
diff options
context:
space:
mode:
Diffstat (limited to 'examples/main')
-rw-r--r--examples/main/README.md5
-rw-r--r--examples/main/main.cpp69
2 files changed, 10 insertions, 64 deletions
diff --git a/examples/main/README.md b/examples/main/README.md
index ee930f4e..4eaa6847 100644
--- a/examples/main/README.md
+++ b/examples/main/README.md
@@ -53,13 +53,13 @@ The following command generates "infinite" text from a starting prompt (you can
#### Unix-based systems (Linux, macOS, etc.):
```bash
-./main -m models/7B/ggml-model.bin --ignore-eos -n -1 --random-prompt
+./main -m models/7B/ggml-model.bin --ignore-eos -n -1
```
#### Windows:
```powershell
-main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 --random-prompt
+main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1
```
## Common Options
@@ -80,7 +80,6 @@ The `main` program provides several ways to interact with the LLaMA models using
- `--prompt PROMPT`: Provide a prompt directly as a command-line option.
- `--file FNAME`: Provide a file containing a prompt or multiple prompts.
- `--interactive-first`: Run the program in interactive mode and wait for input right away. (More on this below.)
-- `--random-prompt`: Start with a randomized prompt.
## Interaction
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 44949ba8..b97b7b79 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -122,8 +122,10 @@ int main(int argc, char ** argv) {
g_params = &params;
if (!gpt_params_parse(argc, argv, params)) {
+ gpt_params_print_usage(argc, argv, params);
return 1;
}
+
llama_sampling_params & sparams = params.sparams;
#ifndef LOG_DISABLE_LOGS
@@ -180,9 +182,6 @@ int main(int argc, char ** argv) {
LOG_TEE("%s: seed = %u\n", __func__, params.seed);
std::mt19937 rng(params.seed);
- if (params.random_prompt) {
- params.prompt = string_random_prompt(rng);
- }
LOG("%s: llama backend init\n", __func__);
llama_backend_init();
@@ -250,11 +249,8 @@ int main(int argc, char ** argv) {
std::vector<llama_token> embd_inp;
- if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) {
+ if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
LOG("tokenize the prompt\n");
- if (params.chatml) {
- params.prompt = "<|im_start|>system\n" + params.prompt + "<|im_end|>";
- }
embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
} else {
LOG("use session tokens\n");
@@ -332,37 +328,13 @@ int main(int argc, char ** argv) {
}
// number of tokens to keep when resetting context
- if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct || params.chatml) {
+ if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size()) {
params.n_keep = (int)embd_inp.size();
} else {
params.n_keep += add_bos; // always keep the BOS token
}
- // prefix & suffix for instruct mode
- const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true, true);
- const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false, true);
-
- LOG("inp_pfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, inp_pfx).c_str());
- LOG("inp_sfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, inp_sfx).c_str());
-
- // chatml prefix & suffix
- const auto cml_pfx = ::llama_tokenize(ctx, "\n<|im_start|>user\n", true, true);
- const auto cml_sfx = ::llama_tokenize(ctx, "<|im_end|>\n<|im_start|>assistant\n", false, true);
-
- LOG("cml_pfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, cml_pfx).c_str());
- LOG("cml_sfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, cml_sfx).c_str());
-
- // in instruct mode, we inject a prefix and a suffix to each input by the user
- if (params.instruct) {
- params.interactive_first = true;
- params.antiprompt.emplace_back("### Instruction:\n\n");
- }
- // similar for chatml mode
- else if (params.chatml) {
- params.interactive_first = true;
- params.antiprompt.emplace_back("<|im_start|>user\n");
- }
- else if (params.conversation) {
+ if (params.conversation) {
params.interactive_first = true;
}
@@ -823,15 +795,13 @@ int main(int argc, char ** argv) {
is_interacting = true;
printf("\n");
- } else if (params.instruct || params.chatml) {
- is_interacting = true;
}
}
if (n_past > 0 && is_interacting) {
LOG("waiting for user input\n");
- if (params.conversation || params.instruct || params.chatml) {
+ if (params.conversation) {
printf("\n> ");
}
@@ -874,24 +844,12 @@ int main(int argc, char ** argv) {
const size_t original_size = embd_inp.size();
- // instruct mode: insert instruction prefix
- if (params.instruct && !is_antiprompt) {
- LOG("inserting instruction prefix\n");
- n_consumed = embd_inp.size();
- embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
- }
- // chatml mode: insert user chat prefix
- if (params.chatml && !is_antiprompt) {
- LOG("inserting chatml prefix\n");
- n_consumed = embd_inp.size();
- embd_inp.insert(embd_inp.end(), cml_pfx.begin(), cml_pfx.end());
- }
if (params.escape) {
string_process_escapes(buffer);
}
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
- const auto line_inp = ::llama_tokenize(ctx, buffer, false, params.interactive_specials);
+ const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
@@ -900,17 +858,6 @@ int main(int argc, char ** argv) {
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());
- // instruct mode: insert response suffix
- if (params.instruct) {
- LOG("inserting instruction suffix\n");
- embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
- }
- // chatml mode: insert assistant chat suffix
- if (params.chatml) {
- LOG("inserting chatml suffix\n");
- embd_inp.insert(embd_inp.end(), cml_sfx.begin(), cml_sfx.end());
- }
-
for (size_t i = original_size; i < embd_inp.size(); ++i) {
const llama_token token = embd_inp[i];
output_tokens.push_back(token);
@@ -935,7 +882,7 @@ int main(int argc, char ** argv) {
}
// end of generation
- if (!embd.empty() && llama_token_is_eog(model, embd.back()) && !(params.instruct || params.interactive || params.chatml)) {
+ if (!embd.empty() && llama_token_is_eog(model, embd.back()) && !(params.interactive)) {
LOG_TEE(" [end of text]\n");
break;
}