summaryrefslogtreecommitdiff
path: root/examples/server/bench/bench.py
diff options
context:
space:
mode:
authorPierrick Hymbert <pierrick.hymbert@gmail.com>2024-04-06 05:40:47 +0200
committerGitHub <noreply@github.com>2024-04-06 05:40:47 +0200
commit75cd4c77292034ecec587ecb401366f57338f7c0 (patch)
treede137718780505410bc75ce219f4bc164961c4fd /examples/server/bench/bench.py
parenta8bd14d55717754a1f48313a846a2b16fa998ad2 (diff)
ci: bench: support sse and fix prompt processing time / server: add tokens usage in stream OAI response (#6495)
* ci: bench: support sse and fix prompt processing time server: add tokens usage in stream mode * ci: bench: README.md EOL * ci: bench: remove total pp and tg as it is not accurate * ci: bench: fix case when there is no token generated * ci: bench: change to the 95 percentile for pp and tg as it is closer to what the server exports in metrics * ci: bench: fix finish reason rate
Diffstat (limited to 'examples/server/bench/bench.py')
-rw-r--r--examples/server/bench/bench.py11
1 files changed, 5 insertions, 6 deletions
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py
index 86eeeccf..6ca637bd 100644
--- a/examples/server/bench/bench.py
+++ b/examples/server/bench/bench.py
@@ -76,7 +76,6 @@ def main(args_in: list[str] | None = None) -> None:
data['metrics'][metric_name][metric_metric]=value
github_env.write(
f"{escape_metric_name(metric_name)}_{escape_metric_name(metric_metric)}={value}\n")
- token_seconds = data['metrics']['llamacpp_tokens_second']['avg']
iterations = data['root_group']['checks']['success completion']['passes']
except Exception:
@@ -181,16 +180,16 @@ xychart-beta
bench_results = {
"i": iterations,
"req": {
- "p90": round(data['metrics']["http_req_duration"]["p(90)"], 2),
+ "p95": round(data['metrics']["http_req_duration"]["p(95)"], 2),
"avg": round(data['metrics']["http_req_duration"]["avg"], 2),
},
"pp": {
- "p90": round(data['metrics']["llamacpp_prompt_tokens"]["p(90)"], 2),
- "avg": round(data['metrics']["llamacpp_prompt_tokens"]["avg"], 2),
+ "p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2),
+ "avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
},
"tg": {
- "p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2),
+ "p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2),
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
},
@@ -206,7 +205,7 @@ xychart-beta
def start_benchmark(args):
- k6_path = 'k6'
+ k6_path = './k6'
if 'BENCH_K6_BIN_PATH' in os.environ:
k6_path = os.environ['BENCH_K6_BIN_PATH']
k6_args = [