server : health endpoint configurable failure on no slot (#5594)

author: Pierrick Hymbert <pierrick.hymbert@gmail.com> 2024-02-20 08:48:19 +0100
committer: GitHub <noreply@github.com> 2024-02-20 09:48:19 +0200
commit: c0a8c6db371cb3e4379900867b948879f5842201 (patch)
tree: bf818b206dca759d3f77bcc2f178c551fb0544a6 /examples/server/server.cpp
parent: b9111bd209c7b11b0592450a6ed2e0ca545b2c84 (diff)
1 files changed, 26 insertions, 26 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 22c344dd..23482ed9 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2582,40 +2582,40 @@ int main(int argc, char **argv)
         res.set_header("Access-Control-Allow-Headers", "*");
     });
 
-    svr.Get("/health", [&](const httplib::Request&, httplib::Response& res) {
+    svr.Get("/health", [&](const httplib::Request& req, httplib::Response& res) {
         server_state current_state = state.load();
         switch(current_state) {
-            case SERVER_STATE_READY:
-                if (llama.all_slots_are_idle) {
-                    res.set_content(R"({"status": "ok"})", "application/json");
+            case SERVER_STATE_READY: {
+                int available_slots  = 0;
+                int processing_slots = 0;
+                for (llama_client_slot &slot: llama.slots) {
+                    if (slot.available()) {
+                        available_slots++;
+                    } else {
+                        processing_slots++;
+                    }
+                }
+                if (available_slots > 0) {
+                    json health = {
+                            {"status",           "ok"},
+                            {"slots_idle",       available_slots},
+                            {"slots_processing", processing_slots}};
+                    res.set_content(health.dump(), "application/json");
                     res.status = 200; // HTTP OK
                 } else {
-                    int available_slots = 0;
-                    int processing_slots = 0;
-                    for (llama_client_slot & slot : llama.slots) {
-                        if (slot.available()) {
-                            available_slots++;
-                        } else {
-                            processing_slots++;
-                        }
-                    }
-                    if (available_slots > 0) {
-                        json health = {
-                                {"status",           "ok"},
-                                {"slots_idle",       available_slots},
-                                {"slots_processing", processing_slots}};
-                        res.set_content(health.dump(), "application/json");
-                        res.status = 200; // HTTP OK
-                    } else {
-                        json health = {
-                                {"status",           "no slot available"},
-                                {"slots_idle",       available_slots},
-                                {"slots_processing", processing_slots}};
-                        res.set_content(health.dump(), "application/json");
+                    json health = {
+                            {"status",           "no slot available"},
+                            {"slots_idle",       available_slots},
+                            {"slots_processing", processing_slots}};
+                    res.set_content(health.dump(), "application/json");
+                    if (req.has_param("fail_on_no_slot")) {
                         res.status = 503; // HTTP Service Unavailable
+                    } else {
+                        res.status = 200; // HTTP OK
                     }
                 }
                 break;
+            }
             case SERVER_STATE_LOADING_MODEL:
                 res.set_content(R"({"status": "loading model"})", "application/json");
                 res.status = 503; // HTTP Service Unavailable
author	Pierrick Hymbert <pierrick.hymbert@gmail.com>	2024-02-20 08:48:19 +0100
committer	GitHub <noreply@github.com>	2024-02-20 09:48:19 +0200
commit	c0a8c6db371cb3e4379900867b948879f5842201 (patch)
tree	bf818b206dca759d3f77bcc2f178c551fb0544a6 /examples/server/server.cpp
parent	b9111bd209c7b11b0592450a6ed2e0ca545b2c84 (diff)