ci: bench: support sse and fix prompt processing time / server: add tokens usage in stream OAI response (#6495)

* ci: bench: support sse and fix prompt processing time server: add tokens usage in stream mode * ci: bench: README.md EOL * ci: bench: remove total pp and tg as it is not accurate * ci: bench: fix case when there is no token generated * ci: bench: change to the 95 percentile for pp and tg as it is closer to what the server exports in metrics * ci: bench: fix finish reason rate
author: Pierrick Hymbert <pierrick.hymbert@gmail.com> 2024-04-06 05:40:47 +0200
committer: GitHub <noreply@github.com> 2024-04-06 05:40:47 +0200
commit: 75cd4c77292034ecec587ecb401366f57338f7c0 (patch)
tree: de137718780505410bc75ce219f4bc164961c4fd /examples/server/bench/bench.py
parent: a8bd14d55717754a1f48313a846a2b16fa998ad2 (diff)
1 files changed, 5 insertions, 6 deletions
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py
index 86eeeccf..6ca637bd 100644
--- a/examples/server/bench/bench.py
+++ b/examples/server/bench/bench.py
@@ -76,7 +76,6 @@ def main(args_in: list[str] | None = None) -> None:
                             data['metrics'][metric_name][metric_metric]=value
                             github_env.write(
                                 f"{escape_metric_name(metric_name)}_{escape_metric_name(metric_metric)}={value}\n")
-                token_seconds = data['metrics']['llamacpp_tokens_second']['avg']
                 iterations = data['root_group']['checks']['success completion']['passes']
 
     except Exception:
@@ -181,16 +180,16 @@ xychart-beta
     bench_results = {
         "i": iterations,
         "req": {
-            "p90": round(data['metrics']["http_req_duration"]["p(90)"], 2),
+            "p95": round(data['metrics']["http_req_duration"]["p(95)"], 2),
             "avg": round(data['metrics']["http_req_duration"]["avg"], 2),
         },
         "pp": {
-            "p90": round(data['metrics']["llamacpp_prompt_tokens"]["p(90)"], 2),
-            "avg": round(data['metrics']["llamacpp_prompt_tokens"]["avg"], 2),
+            "p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2),
+            "avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
             "0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
         },
         "tg": {
-            "p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2),
+            "p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2),
             "avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
             "0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
         },
@@ -206,7 +205,7 @@ xychart-beta
 
 
 def start_benchmark(args):
-    k6_path = 'k6'
+    k6_path = './k6'
     if 'BENCH_K6_BIN_PATH' in os.environ:
         k6_path = os.environ['BENCH_K6_BIN_PATH']
     k6_args = [
author	Pierrick Hymbert <pierrick.hymbert@gmail.com>	2024-04-06 05:40:47 +0200
committer	GitHub <noreply@github.com>	2024-04-06 05:40:47 +0200
commit	75cd4c77292034ecec587ecb401366f57338f7c0 (patch)
tree	de137718780505410bc75ce219f4bc164961c4fd /examples/server/bench/bench.py
parent	a8bd14d55717754a1f48313a846a2b16fa998ad2 (diff)