diff options
author | Xuan Son Nguyen <thichthat@gmail.com> | 2024-01-18 21:33:05 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-18 22:33:05 +0200 |
commit | 821f0a271e7c9ee737945245dd7abfa22cc9b5b0 (patch) | |
tree | 57736f3c7ebd7e24d4d9dcffdb6ee76b2c108efa /examples/server/server.cpp | |
parent | 96d7f56d2918ffde1995dbb32392571deb76d7fc (diff) |
server : defer tasks when "slot unavailable" (#5018)
* server: defer task when no slot is available
* remove unnecessary log
---------
Co-authored-by: Xuan Son Nguyen <xuanson.nguyen@snowpack.eu>
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r-- | examples/server/server.cpp | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 93f99929..0462fbd2 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1558,6 +1558,7 @@ struct llama_server_context void process_tasks() { std::unique_lock<std::mutex> lock(mutex_tasks); + std::vector<task_server> deferred_tasks; while (!queue_tasks.empty()) { task_server task = queue_tasks.front(); @@ -1568,9 +1569,8 @@ struct llama_server_context llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); if (slot == nullptr) { - LOG_TEE("slot unavailable\n"); - // send error result - send_error(task, "slot unavailable"); + // if no slot is available, we defer this task for processing later + deferred_tasks.push_back(task); break; } @@ -1616,6 +1616,12 @@ struct llama_server_context } } + // add all the deferred tasks back the the queue + for (task_server &task : deferred_tasks) + { + queue_tasks.push_back(task); + } + // remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue std::vector<task_result> agg_results; auto queue_iterator = queue_multitasks.begin(); |