From 76e868821a94072fbc87cb1fcca291694319eae8 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Fri, 8 Mar 2024 12:25:04 +0100 Subject: server: metrics: add llamacpp:prompt_seconds_total and llamacpp:tokens_predicted_seconds_total, reset bucket only on /metrics. Fix values cast to int. Add Process-Start-Time-Unix header. (#5937) Closes #5850 --- examples/server/tests/features/server.feature | 1 + examples/server/tests/features/steps/steps.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'examples/server/tests') diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index f3b758c7..878ac136 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -29,6 +29,7 @@ Feature: llama.cpp server And a completion request with no api error Then tokens are predicted matching And prometheus metrics are exposed + And metric llamacpp:tokens_predicted is Examples: Prompts | prompt | n_predict | re_content | n_predicted | diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index a0b2ffdf..d7f00583 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -586,14 +586,24 @@ async def step_prometheus_metrics_exported(context): metric_exported = False if context.debug: print(f"/metrics answer:\n{metrics_raw}\n") + context.metrics = {} for metric in parser.text_string_to_metric_families(metrics_raw): match metric.name: case "llamacpp:kv_cache_usage_ratio": assert len(metric.samples) > 0 metric_exported = True + context.metrics[metric.name] = metric + assert int(metrics_response.headers["Process-Start-Time-Unix"]) > 0, "no header process start time" assert metric_exported, "No metrics exported" +@step(u'metric {metric_name} is {metric_value:d}') +def step_assert_metric_value(context, metric_name, metric_value): + if metric_name not in context.metrics: + assert False, f"no metric {metric_name} in {context.metrics.keys()}" + assert context.metrics[metric_name].samples[0].value == metric_value, f"metric: {context.metrics[metric_name]}" + + @step(u'available models') def step_available_models(context): # openai client always expects an api_key @@ -879,7 +889,6 @@ def assert_n_tokens_predicted(completion_response, expected_predicted_n=None, re f' {n_predicted} <> {expected_predicted_n}') - async def gather_tasks_results(context): n_tasks = len(context.concurrent_tasks) if context.debug: -- cgit v1.2.3