diff options
Diffstat (limited to 'examples/server/tests/features')
-rw-r--r-- | examples/server/tests/features/environment.py | 2 | ||||
-rw-r--r-- | examples/server/tests/features/server.feature | 2 | ||||
-rw-r--r-- | examples/server/tests/features/steps/steps.py | 27 |
3 files changed, 31 insertions, 0 deletions
diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py index 13cc8410..09e82674 100644 --- a/examples/server/tests/features/environment.py +++ b/examples/server/tests/features/environment.py @@ -16,6 +16,8 @@ def before_scenario(context, scenario): def after_scenario(context, scenario): + if context.server_process is None: + return if scenario.status == "failed": if 'GITHUB_ACTIONS' in os.environ: print(f"\x1b[33;101mSCENARIO FAILED: {scenario.name} server logs:\x1b[0m\n\n") diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index 5f81d256..0139f89d 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -13,6 +13,7 @@ Feature: llama.cpp server And 1 slots And embeddings extraction And 32 server max tokens to predict + And prometheus compatible metrics exposed Then the server is starting Then the server is healthy @@ -25,6 +26,7 @@ Feature: llama.cpp server And <n_predict> max tokens to predict And a completion request with no api error Then <n_predicted> tokens are predicted matching <re_content> + And prometheus metrics are exposed Examples: Prompts | prompt | n_predict | re_content | n_predicted | diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 9c825fdb..051fd440 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -13,6 +13,7 @@ import aiohttp import openai from behave import step from behave.api.async_step import async_run_until_complete +from prometheus_client import parser @step(u"a server listening on {server_fqdn}:{server_port}") @@ -34,6 +35,8 @@ def step_server_config(context, server_fqdn, server_port): context.server_api_key = None context.server_continuous_batching = False context.server_embeddings = False + context.server_metrics = False + context.server_process = None context.server_seed = None context.user_api_key = None @@ -82,6 +85,11 @@ def step_server_embeddings(context): context.server_embeddings = True +@step(u'prometheus compatible metrics exposed') +def step_server_metrics(context): + context.server_metrics = True + + @step(u"the server is starting") def step_start_server(context): start_server_background(context) @@ -424,6 +432,23 @@ def step_check_options_header_value(context, cors_header, cors_header_value): assert context.options_response.headers[cors_header] == cors_header_value +@step(u'prometheus metrics are exposed') +@async_run_until_complete +async def step_prometheus_metrics_exported(context): + async with aiohttp.ClientSession() as session: + async with await session.get(f'{context.base_url}/metrics') as metrics_response: + assert metrics_response.status == 200 + assert metrics_response.headers['Content-Type'] == "text/plain; version=0.0.4" + metrics_raw = await metrics_response.text() + metric_exported = False + for metric in parser.text_string_to_metric_families(metrics_raw): + match metric.name: + case "llamacpp:kv_cache_usage_ratio": + assert len(metric.samples) > 0 + metric_exported = True + assert metric_exported, "No metrics exported" + + async def concurrent_requests(context, f_completion, *args, **kwargs): n_prompts = len(context.prompts) if context.debug: @@ -753,6 +778,8 @@ def start_server_background(context): server_args.append('--cont-batching') if context.server_embeddings: server_args.append('--embedding') + if context.server_metrics: + server_args.append('--metrics') if context.model_alias is not None: server_args.extend(['--alias', context.model_alias]) if context.n_ctx is not None: |