summaryrefslogtreecommitdiff
path: root/examples/server/server.cpp
diff options
context:
space:
mode:
authorXuan Son Nguyen <thichthat@gmail.com>2024-01-18 21:33:05 +0100
committerGitHub <noreply@github.com>2024-01-18 22:33:05 +0200
commit821f0a271e7c9ee737945245dd7abfa22cc9b5b0 (patch)
tree57736f3c7ebd7e24d4d9dcffdb6ee76b2c108efa /examples/server/server.cpp
parent96d7f56d2918ffde1995dbb32392571deb76d7fc (diff)
server : defer tasks when "slot unavailable" (#5018)
* server: defer task when no slot is available * remove unnecessary log --------- Co-authored-by: Xuan Son Nguyen <xuanson.nguyen@snowpack.eu>
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r--examples/server/server.cpp12
1 files changed, 9 insertions, 3 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 93f99929..0462fbd2 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1558,6 +1558,7 @@ struct llama_server_context
void process_tasks()
{
std::unique_lock<std::mutex> lock(mutex_tasks);
+ std::vector<task_server> deferred_tasks;
while (!queue_tasks.empty())
{
task_server task = queue_tasks.front();
@@ -1568,9 +1569,8 @@ struct llama_server_context
llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1));
if (slot == nullptr)
{
- LOG_TEE("slot unavailable\n");
- // send error result
- send_error(task, "slot unavailable");
+ // if no slot is available, we defer this task for processing later
+ deferred_tasks.push_back(task);
break;
}
@@ -1616,6 +1616,12 @@ struct llama_server_context
}
}
+ // add all the deferred tasks back the the queue
+ for (task_server &task : deferred_tasks)
+ {
+ queue_tasks.push_back(task);
+ }
+
// remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue
std::vector<task_result> agg_results;
auto queue_iterator = queue_multitasks.begin();