From 911b3900dded9a1cfe0f0e41b82c7a29baf3a217 Mon Sep 17 00:00:00 2001
From: Johan <JohanAR@users.noreply.github.com>
Date: Wed, 8 May 2024 14:27:58 +0200
Subject: server : add_special option for tokenize endpoint (#7059)

---
 examples/server/tests/features/server.feature | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'examples/server/tests/features/server.feature')

diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index 646a4e49..d21c0913 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -7,6 +7,7 @@ Feature: llama.cpp server
     And   a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
     And   a model file test-model.gguf
     And   a model alias tinyllama-2
+    And   BOS token is 1
     And   42 as server seed
       # KV Cache corresponds to the total amount of tokens
       # that can be stored across all independent sequences: #4130
@@ -91,7 +92,18 @@ Feature: llama.cpp server
     """
     What is the capital of France ?
     """
-    Then tokens can be detokenize
+    Then tokens can be detokenized
+    And  tokens do not begin with BOS
+
+  Scenario: Tokenize w/ BOS
+    Given adding special tokens
+    When  tokenizing:
+    """
+    What is the capital of Germany?
+    """
+    Then  tokens begin with BOS
+    Given first token is removed
+    Then  tokens can be detokenized
 
   Scenario: Models available
     Given available models
-- 
cgit v1.2.3