diff options
Diffstat (limited to 'examples/llava')
-rw-r--r-- | examples/llava/CMakeLists.txt | 11 | ||||
-rw-r--r-- | examples/llava/MobileVLM-README.md | 18 | ||||
-rw-r--r-- | examples/llava/README.md | 10 | ||||
-rwxr-xr-x | examples/llava/android/adb_run.sh | 2 |
4 files changed, 21 insertions, 20 deletions
diff --git a/examples/llava/CMakeLists.txt b/examples/llava/CMakeLists.txt index 2985caff..e9fa73ac 100644 --- a/examples/llava/CMakeLists.txt +++ b/examples/llava/CMakeLists.txt @@ -30,8 +30,9 @@ if(TARGET BUILD_INFO) add_dependencies(llava BUILD_INFO) endif() -set(TARGET llava-cli) -add_executable(llava-cli llava-cli.cpp) -install(TARGETS llava-cli RUNTIME) -target_link_libraries(llava-cli PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT}) -target_compile_features(llava PRIVATE cxx_std_11) +set(TARGET llama-llava-cli) +add_executable(${TARGET} llava-cli.cpp) +set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-cli) +install(TARGETS ${TARGET} RUNTIME) +target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/examples/llava/MobileVLM-README.md b/examples/llava/MobileVLM-README.md index 74f021de..05a8207e 100644 --- a/examples/llava/MobileVLM-README.md +++ b/examples/llava/MobileVLM-README.md @@ -9,12 +9,12 @@ The implementation is based on llava, and is compatible with llava and mobileVLM Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using **MobileVLM-1.7B** as an example, the different conversion step will be shown. ## Usage -Build with cmake or run `make llava-cli` to build it. +Build with cmake or run `make llama-llava-cli` to build it. -After building, run: `./llava-cli` to see the usage. For example: +After building, run: `./llama-llava-cli` to see the usage. For example: ```sh -./llava-cli -m MobileVLM-1.7B/ggml-model-q4_k.gguf \ +./llama-llava-cli -m MobileVLM-1.7B/ggml-model-q4_k.gguf \ --mmproj MobileVLM-1.7B/mmproj-model-f16.gguf \ --image path/to/an/image.jpg \ -p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWho is the author of this book? Answer the question using a single word or phrase. ASSISTANT:" @@ -62,7 +62,7 @@ python ./examples/convert-legacy-llama.py path/to/MobileVLM-1.7B 5. Use `quantize` to convert LLaMA part's DataType from `fp16` to `q4_k` ```sh -./quantize path/to/MobileVLM-1.7B/ggml-model-f16.gguf path/to/MobileVLM-1.7B/ggml-model-q4_k.gguf q4_k_s +./llama-quantize path/to/MobileVLM-1.7B/ggml-model-f16.gguf path/to/MobileVLM-1.7B/ggml-model-q4_k.gguf q4_k_s ``` Now both the LLaMA part and the image encoder is in the `MobileVLM-1.7B` directory. @@ -82,7 +82,7 @@ refer to `android/adb_run.sh`, modify resources' `name` and `path` ### case 1 **input** ```sh -/data/local/tmp/llava-cli \ +/data/local/tmp/llama-llava-cli \ -m /data/local/tmp/ggml-model-q4_k.gguf \ --mmproj /data/local/tmp/mmproj-model-f16.gguf \ -t 4 \ @@ -102,7 +102,7 @@ llama_print_timings: total time = 34731.93 ms ### case 2 **input** ```sh -/data/local/tmp/llava-cli \ +/data/local/tmp/llama-llava-cli \ -m /data/local/tmp/ggml-model-q4_k.gguf \ --mmproj /data/local/tmp/mmproj-model-f16.gguf \ -t 4 \ @@ -126,7 +126,7 @@ llama_print_timings: total time = 34570.79 ms #### llava-cli release-b2005 **input** ```sh -/data/local/tmp/llava-cli \ +/data/local/tmp/llama-llava-cli \ -m /data/local/tmp/ggml-model-q4_k.gguf \ --mmproj /data/local/tmp/mmproj-model-f16.gguf \ -t 4 \ @@ -200,7 +200,7 @@ make LLAMA_CUDA=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32 ### case 1 **input** ```sh -./llava-cli \ +./llama-llava-cli \ -m /data/local/tmp/ggml-model-q4_k.gguf \ --mmproj /data/local/tmp/mmproj-model-f16.gguf \ --image /data/local/tmp/demo.jpeg \ @@ -224,7 +224,7 @@ llama_print_timings: total time = 1352.63 ms / 252 tokens ### case 2 **input** ```sh -./llava-cli \ +./llama-llava-cli \ -m /data/local/tmp/ggml-model-q4_k.gguf \ --mmproj /data/local/tmp/mmproj-model-f16.gguf \ -p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat is in the image? ASSISTANT:" \ diff --git a/examples/llava/README.md b/examples/llava/README.md index 8d1ae527..f4554de6 100644 --- a/examples/llava/README.md +++ b/examples/llava/README.md @@ -11,12 +11,12 @@ For llava-1.6 a variety of prepared gguf models are available as well [7b-34b](h After API is confirmed, more models will be supported / uploaded. ## Usage -Build with cmake or run `make llava-cli` to build it. +Build with cmake or run `make llama-llava-cli` to build it. -After building, run: `./llava-cli` to see the usage. For example: +After building, run: `./llama-llava-cli` to see the usage. For example: ```sh -./llava-cli -m ../llava-v1.5-7b/ggml-model-f16.gguf --mmproj ../llava-v1.5-7b/mmproj-model-f16.gguf --image path/to/an/image.jpg +./llama-llava-cli -m ../llava-v1.5-7b/ggml-model-f16.gguf --mmproj ../llava-v1.5-7b/mmproj-model-f16.gguf --image path/to/an/image.jpg ``` **note**: A lower temperature like 0.1 is recommended for better quality. add `--temp 0.1` to the command to do so. @@ -95,9 +95,9 @@ python ./examples/llava/convert-image-encoder-to-gguf.py -m vit --llava-projecto python ./examples/convert-legacy-llama.py ../llava-v1.6-vicuna-7b/ --skip-unknown ``` -7) And finally we can run the llava-cli using the 1.6 model version: +7) And finally we can run the llava cli using the 1.6 model version: ```console -./llava-cli -m ../llava-v1.6-vicuna-7b/ggml-model-f16.gguf --mmproj vit/mmproj-model-f16.gguf --image some-image.jpg -c 4096 +./llama-llava-cli -m ../llava-v1.6-vicuna-7b/ggml-model-f16.gguf --mmproj vit/mmproj-model-f16.gguf --image some-image.jpg -c 4096 ``` **note** llava-1.6 needs more context than llava-1.5, at least 3000 is needed (just run it at -c 4096) diff --git a/examples/llava/android/adb_run.sh b/examples/llava/android/adb_run.sh index f73623ae..45ccf8d7 100755 --- a/examples/llava/android/adb_run.sh +++ b/examples/llava/android/adb_run.sh @@ -10,7 +10,7 @@ prompt="A chat between a curious user and an artificial intelligence assistant. # prompt="A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat is in the image? ASSISTANT:" program_dir="build_64/bin" -binName="llava-cli" +binName="llama-llava-cli" n_threads=4 |