diff --git a/.gitignore b/.gitignore index 34a9fc5..efd1764 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ build .vscode *.bin -__pycache__ \ No newline at end of file +__pycache__ +*.gguf \ No newline at end of file diff --git a/README.md b/README.md index 32321e7..a730ed7 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,9 @@ python -m pytest tests/test.py - [x] Basic FF example - [x] Python-CPP tests - [ ] Add GGUF -- [ ] Trying on real model +- [ ] Try on real model - [ ] Adapt template for real case usage -- [ ] Writing comments +- [ ] Write comments - [ ] Add argparse for `model.cpp` - [ ] Support FP16 - [ ] Quantization (?) diff --git a/example/main.cpp b/example/main.cpp index 54227d2..ca1a513 100644 --- a/example/main.cpp +++ b/example/main.cpp @@ -10,19 +10,19 @@ int main(void) // Load model and run forward module model; - load_model("../ggml-model.bin", model); - struct ggml_tensor *result = compute(model, input); + load_model("../example.gguf", model); + // struct ggml_tensor *result = compute(model, input); - // Printing - std::vector out_data(ggml_nelements(result)); - memcpy(out_data.data(), result->data, ggml_nbytes(result)); + // // Printing + // std::vector out_data(ggml_nelements(result)); + // memcpy(out_data.data(), result->data, ggml_nbytes(result)); - printf("Result: ["); - for (int i = 0; i < result->ne[0]; i++) - { - printf("%.2f, ", out_data[i]); - } - printf("]\n"); + // printf("Result: ["); + // for (int i = 0; i < result->ne[0]; i++) + // { + // printf("%.2f, ", out_data[i]); + // } + // printf("]\n"); ggml_free(model.ctx); return 0; diff --git a/template.cpp b/template.cpp index f320bc0..d04d54e 100644 --- a/template.cpp +++ b/template.cpp @@ -32,7 +32,7 @@ bool verify_magic(std::ifstream &infile) { uint32_t magic; read_safe(infile, magic); - if (magic != FILE_MAGIC) + if (magic != GGUF_FILE_MAGIC) { fprintf(stderr, "%s: invalid model file (bad magic)\n", __func__); return false; @@ -40,10 +40,10 @@ bool verify_magic(std::ifstream &infile) return true; } -void load_hparams(std::ifstream &infile, module &model) +void load_hparams(gguf_context *ctx, module &model) { auto &hparams = model.hparams; - read_safe(infile, hparams.in_channels); + // gguf_get_key(ctx, "in_channels", ); printf("%s: in_channels = %d\n", __func__, hparams.in_channels); } @@ -190,44 +190,52 @@ bool load_model(const std::string &fname, module &model) { fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str()); - auto infile = std::ifstream(fname, std::ios::binary); - if (!infile) + struct ggml_context *meta = NULL; + + struct gguf_init_params params = { + /*.no_alloc = */ true, + /*.ctx = */ &meta, + }; + + struct gguf_context *ctx = gguf_init_from_file(fname.c_str(), params); + + if (!ctx) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; } - if (!verify_magic(infile)) - { - return false; - } + const int n_tensors = gguf_get_n_tensors(ctx); + const int n_kv = gguf_get_n_kv(ctx); + printf("N_TENSORS: %i\n", n_tensors); + printf("N_KV: %i\n", n_kv); - load_hparams(infile, model); - size_t ctx_size = evaluate_context_size(model); + // load_hparams(ctx, model); + // size_t ctx_size = evaluate_context_size(model); - if (!init_model_context(model, ctx_size)) - { - return false; - } + // if (!init_model_context(model, ctx_size)) + // { + // return false; + // } - create_model_weight_tensors(model); + // create_model_weight_tensors(model); - if (!init_model_backend(model)) - { - return false; - } + // if (!init_model_backend(model)) + // { + // return false; + // } - if (!allocate_model_buffer(model)) - { - return false; - } + // if (!allocate_model_buffer(model)) + // { + // return false; + // } - if (!load_weights(infile, model)) - { - return false; - } + // if (!load_weights(infile, model)) + // { + // return false; + // } - infile.close(); + // infile.close(); return true; } diff --git a/template.h b/template.h index 3727cb3..040394a 100644 --- a/template.h +++ b/template.h @@ -5,7 +5,7 @@ #include #include -#define FILE_MAGIC 'ggml' +#define GGUF_FILE_MAGIC 0x46554747 // "GGUF" static const size_t MB = 1024 * 1024; diff --git a/weights_export/export_model_weights.py b/weights_export/export_model_weights.py index 4e8c632..166baf6 100644 --- a/weights_export/export_model_weights.py +++ b/weights_export/export_model_weights.py @@ -1,6 +1,7 @@ import torch import struct import numpy as np +from gguf import GGUFWriter torch.manual_seed(52) @@ -17,41 +18,20 @@ def forward(self, x): return x -def parse_hparams(outfile): +def parse_hparams(gguf_writer): in_channels = 5 - outfile.write(struct.pack("i", in_channels)) + gguf_writer.add_int32("in_channels", in_channels) -def parse_model(checkpoint, outfile): +def parse_model(checkpoint, gguf_writer): """Load encodec model checkpoint.""" - n_f32 = 0 - for name in checkpoint.keys(): var_data = checkpoint[name] - var_data = var_data.numpy().squeeze() + var_data = var_data.numpy().squeeze().astype(np.float32) + gguf_writer.add_tensor(name, var_data) print(f"Processing variable: {name} with shape: {var_data.shape}") - print(" Converting to float32") - var_data = var_data.astype(np.float32) - ftype_cur = 0 - n_f32 += 1 - - n_dims = len(var_data.shape) - encoded_name = name.encode("utf-8") - outfile.write(struct.pack("iii", n_dims, len(encoded_name), ftype_cur)) - - for i in range(n_dims): - outfile.write(struct.pack("i", var_data.shape[n_dims - 1 - i])) - - outfile.write(encoded_name) - var_data.tofile(outfile) - - outfile.close() - - print() - print(f"n_f32: {n_f32}") - if __name__ == "__main__": model = Model().cpu() @@ -63,12 +43,17 @@ def parse_model(checkpoint, outfile): checkpoint = model.state_dict() - # Step 1: insert ggml magic - outfile = open("ggml-model.bin", "wb") - outfile.write(struct.pack("i", 0x67676D6C)) + gguf_writer = GGUFWriter("example.gguf", "linear") # Step 2: insert hyperparameters - parse_hparams(outfile) + parse_hparams(gguf_writer) # Step 3: insert weights - parse_model(checkpoint, outfile) + parse_model(checkpoint, gguf_writer) + + # Step 4: saving model and hparams to file + gguf_writer.write_header_to_file() + gguf_writer.write_kv_data_to_file() + gguf_writer.write_tensors_to_file() + + gguf_writer.close() diff --git a/weights_export/requirements.txt b/weights_export/requirements.txt index ec8e5d1..f72037e 100644 --- a/weights_export/requirements.txt +++ b/weights_export/requirements.txt @@ -1,3 +1,4 @@ torch numpy -pytest \ No newline at end of file +pytest +gguf \ No newline at end of file