| |
| |
|
|
| import sys |
| import torch |
| import struct |
| import numpy as np |
|
|
| if len(sys.argv) < 3: |
| print("Usage: convert-pth-to-ggml.py file-model dir-output [ftype]\n") |
| print(" ftype == 0 -> float32") |
| print(" ftype == 1 -> float16") |
| sys.exit(1) |
|
|
| |
| fname_model = sys.argv[1] |
| dir_out = sys.argv[2] |
| fname_out = dir_out + "/ggml-model.bin" |
|
|
| |
| |
| |
| |
| |
| ftype_str = ["f32", "f16"] |
|
|
| ftype = 1 |
| if len(sys.argv) > 3: |
| ftype = int(sys.argv[3]) |
|
|
| if ftype < 0 or ftype > 1: |
| print("Invalid ftype: " + str(ftype)) |
| sys.exit(1) |
|
|
| fname_out = fname_out.replace(".bin", "-" + ftype_str[ftype] + ".bin") |
|
|
| |
| n_enc_state = 768 |
| n_enc_layers = 12 |
| n_enc_heads = 12 |
| n_enc_out_chans = 256 |
| n_pt_embd = 4 |
|
|
| model = torch.load(fname_model, map_location="cpu") |
| for k, v in model.items(): |
| print(k, v.shape) |
| if k == "image_encoder.blocks.0.norm1.weight": |
| n_enc_state = v.shape[0] |
|
|
| if n_enc_state == 1024: |
| n_enc_layers = 24 |
| n_enc_heads = 16 |
| elif n_enc_state == 1280: |
| n_enc_layers = 32 |
| n_enc_heads = 16 |
|
|
| hparams = { |
| "n_enc_state": n_enc_state, |
| "n_enc_layers": n_enc_layers, |
| "n_enc_heads": n_enc_heads, |
| "n_enc_out_chans": n_enc_out_chans, |
| "n_pt_embd": n_pt_embd, |
| } |
|
|
| print(hparams) |
|
|
| for k, v in model.items(): |
| print(k, v.shape) |
|
|
| |
| |
|
|
| fout = open(fname_out, "wb") |
|
|
| fout.write(struct.pack("i", 0x67676d6c)) |
| fout.write(struct.pack("i", hparams["n_enc_state"])) |
| fout.write(struct.pack("i", hparams["n_enc_layers"])) |
| fout.write(struct.pack("i", hparams["n_enc_heads"])) |
| fout.write(struct.pack("i", hparams["n_enc_out_chans"])) |
| fout.write(struct.pack("i", hparams["n_pt_embd"])) |
| fout.write(struct.pack("i", ftype)) |
|
|
| for k, v in model.items(): |
| name = k |
| shape = v.shape |
|
|
| if name[:19] == "prompt_encoder.mask": |
| continue |
|
|
| print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype) |
|
|
| |
| |
| data = v.numpy() |
| n_dims = len(data.shape) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| dshape = data.shape |
|
|
| |
| ftype_cur = 1 |
| if ftype == 0 or n_dims == 1 or \ |
| name == "image_encoder.pos_embed" or \ |
| name.startswith("prompt_encoder") or \ |
| name.startswith("mask_decoder.iou_token") or \ |
| name.startswith("mask_decoder.mask_tokens"): |
| print(" Converting to float32") |
| data = data.astype(np.float32) |
| ftype_cur = 0 |
| else: |
| print(" Converting to float16") |
| data = data.astype(np.float16) |
|
|
| |
| |
| if name == "image_encoder.patch_embed.proj.bias": |
| data = data.reshape(1, data.shape[0], 1, 1) |
| n_dims = len(data.shape) |
| dshape = data.shape |
|
|
| print(" New shape: ", dshape) |
|
|
| |
| str = name.encode('utf-8') |
| fout.write(struct.pack("iii", n_dims, len(str), ftype_cur)) |
| for i in range(n_dims): |
| fout.write(struct.pack("i", dshape[n_dims - 1 - i])) |
| fout.write(str) |
|
|
| |
| data.tofile(fout) |
|
|
| fout.close() |
|
|
| print("Done. Output file: " + fname_out) |
| print("") |
|
|