diff options
author | Justine Tunney <jtunney@mozilla.com> | 2024-05-25 05:04:03 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-25 19:04:03 +1000 |
commit | 00c63907931bb08a0ed2b7e38cf44dd290143cb9 (patch) | |
tree | c2248d26ae5d25160594523b562bb2481fb1c87a /examples/main/main.cpp | |
parent | faa0e6979a11dcb731e9d778ad42ceaa0302015e (diff) |
main : don't print special tokens with --grammar (#6923)
* main : don't print special tokens with --grammar
The CLI interface was recently changed to print special control tokens
like the </s> stop message one. This token shouldn't be printed if the
grammar flag was passed, unless the grammar specifies it, because that
breaks shell-scriptability.
* main: use seperate stream for control characters
* main: use dprintf and add --ctrl-token-no-out and --ctrl-token-fd-out
* main: dprintf isn't part of the IEEE POSIX standard. Just use write().
* main: remove --ctrl-token-fd-out in favor for fcntl() based detection
* common.cpp: accidentally removed --interactive-first
* main: only merge stdout and control token if not in conversation or grammar mode
* main: rejig control token descriptor handling
* main: must check pipe status on very top of program
* main: renamed --no-special from --ctrl-token-no-out and other refactoring
* main: refactor ctrl_token_no_out --> no_special
* llama: rename llama_token_is_control_token() to llama_token_is_control()
* main: remove special token file descriptor feature (#5)
---------
Co-authored-by: Brian <mofosyne@gmail.com>
Diffstat (limited to 'examples/main/main.cpp')
-rw-r--r-- | examples/main/main.cpp | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 09fa85fc..ac35772f 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -740,18 +740,32 @@ int main(int argc, char ** argv) { // display text if (input_echo && display) { for (auto id : embd) { - const std::string token_str = llama_token_to_piece(ctx, id, !params.conversation); - printf("%s", token_str.c_str()); + const std::string token_str = llama_token_to_piece(ctx, id); + + // Console/Stream Output + if (!llama_token_is_control(llama_get_model(ctx), id)) { + // Stream Output Token To Standard Output + fprintf(stdout, "%s", token_str.c_str()); + } else if (!params.no_special && !params.conversation) { + // Stream Control Token To Standard Output Stream + fprintf(stdout, "%s", token_str.c_str()); + } + // Record Displayed Tokens To Log + // Note: Generated tokens are created one by one hence this check if (embd.size() > 1) { + // Incoming Requested Tokens input_tokens.push_back(id); } else { + // Outgoing Generated Tokens output_tokens.push_back(id); output_ss << token_str; } + + fflush(stdout); } - fflush(stdout); } + // reset color to default if there is no pending user input if (input_echo && (int) embd_inp.size() == n_consumed) { console::set_display(console::reset); |