From c0a8c6db371cb3e4379900867b948879f5842201 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Tue, 20 Feb 2024 08:48:19 +0100 Subject: server : health endpoint configurable failure on no slot (#5594) --- examples/server/server.cpp | 52 +++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'examples/server/server.cpp') diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 22c344dd..23482ed9 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2582,40 +2582,40 @@ int main(int argc, char **argv) res.set_header("Access-Control-Allow-Headers", "*"); }); - svr.Get("/health", [&](const httplib::Request&, httplib::Response& res) { + svr.Get("/health", [&](const httplib::Request& req, httplib::Response& res) { server_state current_state = state.load(); switch(current_state) { - case SERVER_STATE_READY: - if (llama.all_slots_are_idle) { - res.set_content(R"({"status": "ok"})", "application/json"); + case SERVER_STATE_READY: { + int available_slots = 0; + int processing_slots = 0; + for (llama_client_slot &slot: llama.slots) { + if (slot.available()) { + available_slots++; + } else { + processing_slots++; + } + } + if (available_slots > 0) { + json health = { + {"status", "ok"}, + {"slots_idle", available_slots}, + {"slots_processing", processing_slots}}; + res.set_content(health.dump(), "application/json"); res.status = 200; // HTTP OK } else { - int available_slots = 0; - int processing_slots = 0; - for (llama_client_slot & slot : llama.slots) { - if (slot.available()) { - available_slots++; - } else { - processing_slots++; - } - } - if (available_slots > 0) { - json health = { - {"status", "ok"}, - {"slots_idle", available_slots}, - {"slots_processing", processing_slots}}; - res.set_content(health.dump(), "application/json"); - res.status = 200; // HTTP OK - } else { - json health = { - {"status", "no slot available"}, - {"slots_idle", available_slots}, - {"slots_processing", processing_slots}}; - res.set_content(health.dump(), "application/json"); + json health = { + {"status", "no slot available"}, + {"slots_idle", available_slots}, + {"slots_processing", processing_slots}}; + res.set_content(health.dump(), "application/json"); + if (req.has_param("fail_on_no_slot")) { res.status = 503; // HTTP Service Unavailable + } else { + res.status = 200; // HTTP OK } } break; + } case SERVER_STATE_LOADING_MODEL: res.set_content(R"({"status": "loading model"})", "application/json"); res.status = 503; // HTTP Service Unavailable -- cgit v1.2.3