diff options
author | Ziad Ben Hadj-Alouane <zied.benhadjalouane@gmail.com> | 2024-01-13 09:20:46 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-13 16:20:46 +0200 |
commit | 356327feb3f66980ab687040495d722696d98970 (patch) | |
tree | 80c0f9d3d3f60f6183fddba61b65128e835c5c34 /examples/server | |
parent | ee8243adaa9a9f51ff449213383874e49efe368f (diff) |
server : fix deadlock that occurs in multi-prompt scenarios (#4905)
* * fix deadlock
* * dont ruint all whitespace
Diffstat (limited to 'examples/server')
-rw-r--r-- | examples/server/server.cpp | 22 |
1 files changed, 17 insertions, 5 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 7b33aea1..79eacf82 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1350,14 +1350,17 @@ struct llama_server_context res.result_json["model"] = slot.oaicompat_model; } + queue_results.push_back(res); + condition_results.notify_all(); + + // done with results, unlock + lock.unlock(); + // parent multitask, if any, needs to be updated if (slot.multitask_id != -1) { update_multi_task(slot.multitask_id, slot.task_id, res); } - - queue_results.push_back(res); - condition_results.notify_all(); } void send_embedding(llama_client_slot &slot) @@ -1603,6 +1606,7 @@ struct llama_server_context } // remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue + std::vector<task_result> agg_results; auto queue_iterator = queue_multitasks.begin(); while (queue_iterator != queue_multitasks.end()) { @@ -1623,8 +1627,9 @@ struct llama_server_context } aggregate_result.result_json = json{ "results", result_jsons }; - std::lock_guard<std::mutex> lock(mutex_results); - queue_results.push_back(aggregate_result); + + agg_results.push_back(aggregate_result); + condition_results.notify_all(); queue_iterator = queue_multitasks.erase(queue_iterator); @@ -1634,6 +1639,13 @@ struct llama_server_context ++queue_iterator; } } + + // done with tasks, unlock + lock.unlock(); + + // copy aggregate results of complete multi-tasks to the results queue + std::lock_guard<std::mutex> lock_results(mutex_results); + queue_results.insert(queue_results.end(), agg_results.begin(), agg_results.end()); } bool update_slots() { |