diff options
author | Pierrick Hymbert <pierrick.hymbert@gmail.com> | 2024-04-04 11:57:58 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-04 12:57:58 +0300 |
commit | 7a2c92637ae265654a68f62e6a7610b358255d3f (patch) | |
tree | 583a27505f3c0146ccaa2684ae6cc824d9355dfb /examples/server | |
parent | 4bcd6b959ca3991084ad1d8464caf2a734e29b1d (diff) |
ci: bench: add more ftype, fix triggers and bot comment (#6466)
* ci: bench: change trigger path to not spawn on each PR
* ci: bench: add more file type for phi-2: q8_0 and f16.
- do not show the comment by default
* ci: bench: add seed parameter in k6 script
* ci: bench: artefact name perf job
* Add iteration in the commit status, reduce again the autocomment
* ci: bench: add per slot metric in the commit status
* Fix trailing spaces
Diffstat (limited to 'examples/server')
-rw-r--r-- | examples/server/bench/bench.py | 18 | ||||
-rw-r--r-- | examples/server/bench/script.js | 1 |
2 files changed, 13 insertions, 6 deletions
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py index ea5d3854..86eeeccf 100644 --- a/examples/server/bench/bench.py +++ b/examples/server/bench/bench.py @@ -16,6 +16,7 @@ import matplotlib import matplotlib.dates import matplotlib.pyplot as plt import requests +from statistics import mean def main(args_in: list[str] | None = None) -> None: @@ -109,6 +110,7 @@ def main(args_in: list[str] | None = None) -> None: # Prometheus end_time = time.time() + prometheus_metrics = {} if is_server_listening("0.0.0.0", 9090): metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds', 'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred'] @@ -127,6 +129,7 @@ def main(args_in: list[str] | None = None) -> None: values = metric_data['data']['result'][0]['values'] timestamps, metric_values = zip(*values) metric_values = [float(value) for value in metric_values] + prometheus_metrics[metric] = metric_values timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps] plt.figure(figsize=(16, 10), dpi=80) plt.plot(timestamps_dt, metric_values, label=metric) @@ -176,17 +179,20 @@ xychart-beta # 140 chars max for commit status description bench_results = { + "i": iterations, "req": { - "p90": data['metrics']["http_req_duration"]["p(90)"], - "avg": data['metrics']["http_req_duration"]["avg"], + "p90": round(data['metrics']["http_req_duration"]["p(90)"], 2), + "avg": round(data['metrics']["http_req_duration"]["avg"], 2), }, "pp": { - "p90": data['metrics']["llamacpp_prompt_tokens"]["p(90)"], - "avg": data['metrics']["llamacpp_prompt_tokens"]["avg"], + "p90": round(data['metrics']["llamacpp_prompt_tokens"]["p(90)"], 2), + "avg": round(data['metrics']["llamacpp_prompt_tokens"]["avg"], 2), + "0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2), }, "tg": { - "p90": data['metrics']["llamacpp_tokens_second"]["p(90)"], - "avg": data['metrics']["llamacpp_tokens_second"]["avg"], + "p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2), + "avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2), + "0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2), }, } with open("results.github.env", 'a') as github_env: diff --git a/examples/server/bench/script.js b/examples/server/bench/script.js index a4f5ac5a..dc41e8d9 100644 --- a/examples/server/bench/script.js +++ b/examples/server/bench/script.js @@ -87,6 +87,7 @@ export default function () { ], "model": model, "stream": false, + "seed": 42, "max_tokens": max_tokens } |