ci: bench: add more ftype, fix triggers and bot comment (#6466)

* ci: bench: change trigger path to not spawn on each PR * ci: bench: add more file type for phi-2: q8_0 and f16. - do not show the comment by default * ci: bench: add seed parameter in k6 script * ci: bench: artefact name perf job * Add iteration in the commit status, reduce again the autocomment * ci: bench: add per slot metric in the commit status * Fix trailing spaces
author: Pierrick Hymbert <pierrick.hymbert@gmail.com> 2024-04-04 11:57:58 +0200
committer: GitHub <noreply@github.com> 2024-04-04 12:57:58 +0300
commit: 7a2c92637ae265654a68f62e6a7610b358255d3f (patch)
tree: 583a27505f3c0146ccaa2684ae6cc824d9355dfb /examples/server
parent: 4bcd6b959ca3991084ad1d8464caf2a734e29b1d (diff)
2 files changed, 13 insertions, 6 deletions
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py
index ea5d3854..86eeeccf 100644
--- a/examples/server/bench/bench.py
+++ b/examples/server/bench/bench.py
@@ -16,6 +16,7 @@ import matplotlib
 import matplotlib.dates
 import matplotlib.pyplot as plt
 import requests
+from statistics import mean
 
 
 def main(args_in: list[str] | None = None) -> None:
@@ -109,6 +110,7 @@ def main(args_in: list[str] | None = None) -> None:
 
     # Prometheus
     end_time = time.time()
+    prometheus_metrics = {}
     if is_server_listening("0.0.0.0", 9090):
         metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds',
                    'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred']
@@ -127,6 +129,7 @@ def main(args_in: list[str] | None = None) -> None:
                 values = metric_data['data']['result'][0]['values']
                 timestamps, metric_values = zip(*values)
                 metric_values = [float(value) for value in metric_values]
+                prometheus_metrics[metric] = metric_values
                 timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
                 plt.figure(figsize=(16, 10), dpi=80)
                 plt.plot(timestamps_dt, metric_values, label=metric)
@@ -176,17 +179,20 @@ xychart-beta
 
     # 140 chars max for commit status description
     bench_results = {
+        "i": iterations,
         "req": {
-            "p90": data['metrics']["http_req_duration"]["p(90)"],
-            "avg": data['metrics']["http_req_duration"]["avg"],
+            "p90": round(data['metrics']["http_req_duration"]["p(90)"], 2),
+            "avg": round(data['metrics']["http_req_duration"]["avg"], 2),
         },
         "pp": {
-            "p90": data['metrics']["llamacpp_prompt_tokens"]["p(90)"],
-            "avg": data['metrics']["llamacpp_prompt_tokens"]["avg"],
+            "p90": round(data['metrics']["llamacpp_prompt_tokens"]["p(90)"], 2),
+            "avg": round(data['metrics']["llamacpp_prompt_tokens"]["avg"], 2),
+            "0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
         },
         "tg": {
-            "p90": data['metrics']["llamacpp_tokens_second"]["p(90)"],
-            "avg": data['metrics']["llamacpp_tokens_second"]["avg"],
+            "p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2),
+            "avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
+            "0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
         },
     }
     with open("results.github.env", 'a') as github_env:
diff --git a/examples/server/bench/script.js b/examples/server/bench/script.js
index a4f5ac5a..dc41e8d9 100644
--- a/examples/server/bench/script.js
+++ b/examples/server/bench/script.js
@@ -87,6 +87,7 @@ export default function () {
         ],
         "model": model,
         "stream": false,
+        "seed": 42,
         "max_tokens": max_tokens
     }
author	Pierrick Hymbert <pierrick.hymbert@gmail.com>	2024-04-04 11:57:58 +0200
committer	GitHub <noreply@github.com>	2024-04-04 12:57:58 +0300
commit	7a2c92637ae265654a68f62e6a7610b358255d3f (patch)
tree	583a27505f3c0146ccaa2684ae6cc824d9355dfb /examples/server
parent	4bcd6b959ca3991084ad1d8464caf2a734e29b1d (diff)