From cfa0750bc9dbc2d957a91b8ed09ab0035d8f3d4e Mon Sep 17 00:00:00 2001
From: ningshanwutuobang <ningshanwutuobang@gmail.com>
Date: Wed, 28 Jun 2023 23:53:37 +0800
Subject: llama : support input embeddings directly  (#1910)

* add interface for float input

* fixed inpL shape and type

* add examples of input floats

* add test example for embd input

* fixed sampling

* add free for context

* fixed add end condition for generating

* add examples for llava.py

* add READMD for llava.py

* add READMD for llava.py

* add example of PandaGPT

* refactor the interface and fixed the styles

* add cmake build for embd-input

* add cmake build for embd-input

* Add MiniGPT-4 example

* change the order of the args of llama_eval_internal

* fix ci error
---
 llama.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'llama.h')

diff --git a/llama.h b/llama.h
index 76239be2..c2f2e533 100644
--- a/llama.h
+++ b/llama.h
@@ -226,6 +226,14 @@ extern "C" {
                              int   n_past,
                              int   n_threads);
 
+    // Same as llama_eval, but use float matrix input directly.
+    LLAMA_API int llama_eval_embd(
+            struct llama_context * ctx,
+                     const float * embd,
+                             int   n_tokens,
+                             int   n_past,
+                             int   n_threads);
+
     // Export a static computation graph for context of 511 and batch size of 1
     // NOTE: since this functionality is mostly for debugging and demonstration purposes, we hardcode these
     //       parameters here to keep things simple
-- 
cgit v1.2.3