From 154e0d75fccf1784fe9ff6fd76a630b66563da3d Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Sat, 27 Jul 2024 07:55:01 +0200 Subject: Merge mainline llama.cpp (#3) * Merging mainline - WIP * Merging mainline - WIP AVX2 and CUDA appear to work. CUDA performance seems slightly (~1-2%) lower as it is so often the case with llama.cpp/ggml after some "improvements" have been made. * Merging mainline - fix Metal * Remove check --------- Co-authored-by: Iwan Kawrakow --- examples/batched.swift/Sources/main.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'examples/batched.swift/Sources/main.swift') diff --git a/examples/batched.swift/Sources/main.swift b/examples/batched.swift/Sources/main.swift index dbbd06da..616494d2 100644 --- a/examples/batched.swift/Sources/main.swift +++ b/examples/batched.swift/Sources/main.swift @@ -229,7 +229,7 @@ private func tokenize(text: String, add_bos: Bool) -> [llama_token] { private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String? { var result = [CChar](repeating: 0, count: 8) - let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count), false) + let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count), 0, false) if nTokens < 0 { let actualTokensCount = -Int(nTokens) result = .init(repeating: 0, count: actualTokensCount) @@ -238,6 +238,7 @@ private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String token, &result, Int32(result.count), + 0, false ) assert(check == actualTokensCount) -- cgit v1.2.3