From 68e210b3543e0cc71268bee0920441747679ee13 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 22 Mar 2024 13:08:28 +0200 Subject: server : enable continuous batching by default (#6231) --- examples/server/server.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'examples/server') diff --git a/examples/server/server.cpp b/examples/server/server.cpp index cf075d6c..c918fabe 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1758,7 +1758,7 @@ struct server_context { } // process in chunks of params.n_batch - int32_t n_batch = llama_n_batch(ctx); + int32_t n_batch = llama_n_batch(ctx); int32_t n_ubatch = llama_n_ubatch(ctx); // next, batch any pending prompts without exceeding n_batch @@ -2225,7 +2225,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co printf(" -to N, --timeout N server read/write timeout in seconds (default: %d)\n", sparams.read_timeout); printf(" --embeddings enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled"); printf(" -np N, --parallel N number of slots for process requests (default: %d)\n", params.n_parallel); - printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n"); + printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: enabled)\n"); printf(" -spf FNAME, --system-prompt-file FNAME\n"); printf(" set a file to load a system prompt (initial prompt of all slots), this is useful for chat applications.\n"); printf(" -ctk TYPE, --cache-type-k TYPE\n"); -- cgit v1.2.3