From 76e868821a94072fbc87cb1fcca291694319eae8 Mon Sep 17 00:00:00 2001
From: Pierrick Hymbert <pierrick.hymbert@gmail.com>
Date: Fri, 8 Mar 2024 12:25:04 +0100
Subject: server: metrics: add llamacpp:prompt_seconds_total and
 llamacpp:tokens_predicted_seconds_total, reset bucket only on /metrics. Fix
 values cast to int. Add Process-Start-Time-Unix header. (#5937)

Closes #5850
---
 examples/server/tests/features/server.feature |  1 +
 examples/server/tests/features/steps/steps.py | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'examples/server/tests')

diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index f3b758c7..878ac136 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -29,6 +29,7 @@ Feature: llama.cpp server
     And   a completion request with no api error
     Then  <n_predicted> tokens are predicted matching <re_content>
     And   prometheus metrics are exposed
+    And   metric llamacpp:tokens_predicted is <n_predicted>
 
     Examples: Prompts
       | prompt                           | n_predict | re_content                       | n_predicted |
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index a0b2ffdf..d7f00583 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -586,14 +586,24 @@ async def step_prometheus_metrics_exported(context):
             metric_exported = False
             if context.debug:
                 print(f"/metrics answer:\n{metrics_raw}\n")
+            context.metrics = {}
             for metric in parser.text_string_to_metric_families(metrics_raw):
                 match metric.name:
                     case "llamacpp:kv_cache_usage_ratio":
                         assert len(metric.samples) > 0
                         metric_exported = True
+                context.metrics[metric.name] = metric
+            assert int(metrics_response.headers["Process-Start-Time-Unix"]) > 0, "no header process start time"
             assert metric_exported, "No metrics exported"
 
 
+@step(u'metric {metric_name} is {metric_value:d}')
+def step_assert_metric_value(context, metric_name, metric_value):
+    if metric_name not in context.metrics:
+        assert False, f"no metric {metric_name} in {context.metrics.keys()}"
+    assert context.metrics[metric_name].samples[0].value == metric_value, f"metric: {context.metrics[metric_name]}"
+
+
 @step(u'available models')
 def step_available_models(context):
     # openai client always expects an api_key
@@ -879,7 +889,6 @@ def assert_n_tokens_predicted(completion_response, expected_predicted_n=None, re
                                                      f' {n_predicted} <> {expected_predicted_n}')
 
 
-
 async def gather_tasks_results(context):
     n_tasks = len(context.concurrent_tasks)
     if context.debug:
-- 
cgit v1.2.3