summaryrefslogtreecommitdiff
path: root/examples/llava/convert-image-encoder-to-gguf.py
diff options
context:
space:
mode:
authorXiaotaoChen <chenxiaotao1234@gmail.com>2024-01-22 21:09:35 +0800
committerGitHub <noreply@github.com>2024-01-22 15:09:35 +0200
commit3ce7e8f8e7ccfce07e5947ac5f1f3f4628cf68ea (patch)
tree75c5f7d2eb2e6df853fe1fa8cb7119f3178a59a3 /examples/llava/convert-image-encoder-to-gguf.py
parentb2d80e105a59b54822edf7ce7f3ed5f317e96e21 (diff)
llava : MobileVLM support (#4954)
* MobileVLM native implementation * delete depthwise_conv_2d and permute_cpy relative code, replace the two by the existed functions, and opt ldp definition, support LLAMA_PERF option for CMake * move android script to example/llava directory * Fix the editor config checks --------- Co-authored-by: Chenxiaotao03 <chenxiaotao03@meituan.com>
Diffstat (limited to 'examples/llava/convert-image-encoder-to-gguf.py')
-rw-r--r--examples/llava/convert-image-encoder-to-gguf.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/examples/llava/convert-image-encoder-to-gguf.py b/examples/llava/convert-image-encoder-to-gguf.py
index 03688e0e..f5a3c9b4 100644
--- a/examples/llava/convert-image-encoder-to-gguf.py
+++ b/examples/llava/convert-image-encoder-to-gguf.py
@@ -81,6 +81,7 @@ ap.add_argument("--vision-only", action="store_true", required=False,
ap.add_argument("--clip_model_is_vision", action="store_true", required=False,
help="The clip model is a pure vision model (ShareGPT4V vision extract for example)")
ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.")
+ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp", choices=["mlp", "ldp"], default="mlp")
ap.add_argument("--image-mean", nargs=3, type=float, required=False, help="Override image mean values")
ap.add_argument("--image-std", nargs=3, type=float, required=False, help="Override image std values")
ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None)
@@ -174,6 +175,8 @@ elif args.vision_only and not has_llava_projector:
fout.add_description("vision-only CLIP model")
elif has_llava_projector:
fout.add_description("image encoder for LLaVA")
+ # add projector type
+ fout.add_string("clip.projector_type", args.projector_type)
else:
fout.add_description("two-tower CLIP model")
@@ -218,7 +221,8 @@ if has_llava_projector:
projector = torch.load(args.llava_projector)
for name, data in projector.items():
name = get_tensor_name(name)
- if data.ndim == 2:
+ # pw and dw conv ndim==4
+ if data.ndim == 2 or data.ndim == 4:
data = data.squeeze().numpy().astype(np.float16)
else:
data = data.squeeze().numpy().astype(np.float32)