summaryrefslogtreecommitdiff
path: root/examples/server/server.cpp
diff options
context:
space:
mode:
authorPierrick Hymbert <pierrick.hymbert@gmail.com>2024-02-20 08:48:19 +0100
committerGitHub <noreply@github.com>2024-02-20 09:48:19 +0200
commitc0a8c6db371cb3e4379900867b948879f5842201 (patch)
treebf818b206dca759d3f77bcc2f178c551fb0544a6 /examples/server/server.cpp
parentb9111bd209c7b11b0592450a6ed2e0ca545b2c84 (diff)
server : health endpoint configurable failure on no slot (#5594)
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r--examples/server/server.cpp52
1 files changed, 26 insertions, 26 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 22c344dd..23482ed9 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2582,40 +2582,40 @@ int main(int argc, char **argv)
res.set_header("Access-Control-Allow-Headers", "*");
});
- svr.Get("/health", [&](const httplib::Request&, httplib::Response& res) {
+ svr.Get("/health", [&](const httplib::Request& req, httplib::Response& res) {
server_state current_state = state.load();
switch(current_state) {
- case SERVER_STATE_READY:
- if (llama.all_slots_are_idle) {
- res.set_content(R"({"status": "ok"})", "application/json");
+ case SERVER_STATE_READY: {
+ int available_slots = 0;
+ int processing_slots = 0;
+ for (llama_client_slot &slot: llama.slots) {
+ if (slot.available()) {
+ available_slots++;
+ } else {
+ processing_slots++;
+ }
+ }
+ if (available_slots > 0) {
+ json health = {
+ {"status", "ok"},
+ {"slots_idle", available_slots},
+ {"slots_processing", processing_slots}};
+ res.set_content(health.dump(), "application/json");
res.status = 200; // HTTP OK
} else {
- int available_slots = 0;
- int processing_slots = 0;
- for (llama_client_slot & slot : llama.slots) {
- if (slot.available()) {
- available_slots++;
- } else {
- processing_slots++;
- }
- }
- if (available_slots > 0) {
- json health = {
- {"status", "ok"},
- {"slots_idle", available_slots},
- {"slots_processing", processing_slots}};
- res.set_content(health.dump(), "application/json");
- res.status = 200; // HTTP OK
- } else {
- json health = {
- {"status", "no slot available"},
- {"slots_idle", available_slots},
- {"slots_processing", processing_slots}};
- res.set_content(health.dump(), "application/json");
+ json health = {
+ {"status", "no slot available"},
+ {"slots_idle", available_slots},
+ {"slots_processing", processing_slots}};
+ res.set_content(health.dump(), "application/json");
+ if (req.has_param("fail_on_no_slot")) {
res.status = 503; // HTTP Service Unavailable
+ } else {
+ res.status = 200; // HTTP OK
}
}
break;
+ }
case SERVER_STATE_LOADING_MODEL:
res.set_content(R"({"status": "loading model"})", "application/json");
res.status = 503; // HTTP Service Unavailable