From 1a159553f921a9209fed8c714494e57b3649f232 Mon Sep 17 00:00:00 2001 From: staviq Date: Tue, 17 Oct 2023 17:11:01 +0200 Subject: tokenizer : special token handling (#3538) * Rewrite special token handling from #1931 * shorten param name, add st verification by type * use offsets instead of copy by substr * formatting, remove copying iterator on delete * llama : normalize code-style * swift fix * print pfx/sfx if verb, main: split pfx input sfx * dont add space when using special tokens * minor : comment + spacing --------- Co-authored-by: Georgi Gerganov --- examples/batched.swift/Sources/main.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'examples/batched.swift/Sources/main.swift') diff --git a/examples/batched.swift/Sources/main.swift b/examples/batched.swift/Sources/main.swift index 938f3051..05d1bb9d 100644 --- a/examples/batched.swift/Sources/main.swift +++ b/examples/batched.swift/Sources/main.swift @@ -209,7 +209,7 @@ llama_print_timings(context) private func tokenize(text: String, add_bos: Bool) -> [llama_token] { let n_tokens = text.count + (add_bos ? 1 : 0) let tokens = UnsafeMutablePointer.allocate(capacity: n_tokens) - let tokenCount = llama_tokenize(model, text, Int32(text.count), tokens, Int32(n_tokens), add_bos) + let tokenCount = llama_tokenize(model, text, Int32(text.count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false) var swiftTokens: [llama_token] = [] for i in 0 ..< tokenCount { swiftTokens.append(tokens[Int(i)]) -- cgit v1.2.3