gguf current state

grazder · Apr 29, 2024 · 0d1b685 · 0d1b685
1 parent 9c519fc
commit 0d1b685
Show file tree

Hide file tree

Showing 7 changed files with 71 additions and 76 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 build
 .vscode
 *.bin
-__pycache__
+__pycache__
+*.gguf
diff --git a/README.md b/README.md
@@ -43,9 +43,9 @@ python -m pytest tests/test.py
 - [x] Basic FF example
 - [x] Python-CPP tests
 - [ ] Add GGUF 
-- [ ] Trying on real model
+- [ ] Try on real model
 - [ ] Adapt template for real case usage
-- [ ] Writing comments
+- [ ] Write comments
 - [ ] Add argparse for `model.cpp`
 - [ ] Support FP16
 - [ ] Quantization (?)

diff --git a/example/main.cpp b/example/main.cpp
@@ -10,19 +10,19 @@ int main(void)
 
     // Load model and run forward
     module model;
-    load_model("../ggml-model.bin", model);
-    struct ggml_tensor *result = compute(model, input);
+    load_model("../example.gguf", model);
+    // struct ggml_tensor *result = compute(model, input);
 
-    // Printing
-    std::vector<float> out_data(ggml_nelements(result));
-    memcpy(out_data.data(), result->data, ggml_nbytes(result));
+    // // Printing
+    // std::vector<float> out_data(ggml_nelements(result));
+    // memcpy(out_data.data(), result->data, ggml_nbytes(result));
 
-    printf("Result: [");
-    for (int i = 0; i < result->ne[0]; i++)
-    {
-        printf("%.2f, ", out_data[i]);
-    }
-    printf("]\n");
+    // printf("Result: [");
+    // for (int i = 0; i < result->ne[0]; i++)
+    // {
+    //     printf("%.2f, ", out_data[i]);
+    // }
+    // printf("]\n");
 
     ggml_free(model.ctx);
     return 0;

diff --git a/template.cpp b/template.cpp
@@ -32,18 +32,18 @@ bool verify_magic(std::ifstream &infile)
 {
     uint32_t magic;
     read_safe(infile, magic);
-    if (magic != FILE_MAGIC)
+    if (magic != GGUF_FILE_MAGIC)
     {
         fprintf(stderr, "%s: invalid model file (bad magic)\n", __func__);
         return false;
     }
     return true;
 }
 
-void load_hparams(std::ifstream &infile, module &model)
+void load_hparams(gguf_context *ctx, module &model)
 {
     auto &hparams = model.hparams;
-    read_safe(infile, hparams.in_channels);
+    // gguf_get_key(ctx, "in_channels", );
     printf("%s: in_channels = %d\n", __func__, hparams.in_channels);
 }
 
@@ -190,44 +190,52 @@ bool load_model(const std::string &fname, module &model)
 {
     fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str());
 
-    auto infile = std::ifstream(fname, std::ios::binary);
-    if (!infile)
+    struct ggml_context *meta = NULL;
+
+    struct gguf_init_params params = {
+        /*.no_alloc = */ true,
+        /*.ctx      = */ &meta,
+    };
+
+    struct gguf_context *ctx = gguf_init_from_file(fname.c_str(), params);
+
+    if (!ctx)
     {
         fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
         return false;
     }
 
-    if (!verify_magic(infile))
-    {
-        return false;
-    }
+    const int n_tensors = gguf_get_n_tensors(ctx);
+    const int n_kv = gguf_get_n_kv(ctx);
+    printf("N_TENSORS: %i\n", n_tensors);
+    printf("N_KV: %i\n", n_kv);
 
-    load_hparams(infile, model);
-    size_t ctx_size = evaluate_context_size(model);
+    // load_hparams(ctx, model);
+    // size_t ctx_size = evaluate_context_size(model);
 
-    if (!init_model_context(model, ctx_size))
-    {
-        return false;
-    }
+    // if (!init_model_context(model, ctx_size))
+    // {
+    //     return false;
+    // }
 
-    create_model_weight_tensors(model);
+    // create_model_weight_tensors(model);
 
-    if (!init_model_backend(model))
-    {
-        return false;
-    }
+    // if (!init_model_backend(model))
+    // {
+    //     return false;
+    // }
 
-    if (!allocate_model_buffer(model))
-    {
-        return false;
-    }
+    // if (!allocate_model_buffer(model))
+    // {
+    //     return false;
+    // }
 
-    if (!load_weights(infile, model))
-    {
-        return false;
-    }
+    // if (!load_weights(infile, model))
+    // {
+    //     return false;
+    // }
 
-    infile.close();
+    // infile.close();
     return true;
 }
 

diff --git a/template.h b/template.h
@@ -5,7 +5,7 @@
 #include <string>
 #include <vector>
 
-#define FILE_MAGIC 'ggml'
+#define GGUF_FILE_MAGIC 0x46554747 // "GGUF"
 
 static const size_t MB = 1024 * 1024;
 

diff --git a/weights_export/export_model_weights.py b/weights_export/export_model_weights.py
@@ -1,6 +1,7 @@
 import torch
 import struct
 import numpy as np
+from gguf import GGUFWriter
 
 torch.manual_seed(52)
 
@@ -17,41 +18,20 @@ def forward(self, x):
         return x
 
 
-def parse_hparams(outfile):
+def parse_hparams(gguf_writer):
     in_channels = 5
-    outfile.write(struct.pack("i", in_channels))
+    gguf_writer.add_int32("in_channels", in_channels)
 
 
-def parse_model(checkpoint, outfile):
+def parse_model(checkpoint, gguf_writer):
     """Load encodec model checkpoint."""
-    n_f32 = 0
-
     for name in checkpoint.keys():
         var_data = checkpoint[name]
-        var_data = var_data.numpy().squeeze()
+        var_data = var_data.numpy().squeeze().astype(np.float32)
+        gguf_writer.add_tensor(name, var_data)
 
         print(f"Processing variable: {name} with shape: {var_data.shape}")
 
-        print("  Converting to float32")
-        var_data = var_data.astype(np.float32)
-        ftype_cur = 0
-        n_f32 += 1
-
-        n_dims = len(var_data.shape)
-        encoded_name = name.encode("utf-8")
-        outfile.write(struct.pack("iii", n_dims, len(encoded_name), ftype_cur))
-
-        for i in range(n_dims):
-            outfile.write(struct.pack("i", var_data.shape[n_dims - 1 - i]))
-
-        outfile.write(encoded_name)
-        var_data.tofile(outfile)
-
-    outfile.close()
-
-    print()
-    print(f"n_f32: {n_f32}")
-
 
 if __name__ == "__main__":
     model = Model().cpu()
@@ -63,12 +43,17 @@ def parse_model(checkpoint, outfile):
 
     checkpoint = model.state_dict()
 
-    # Step 1: insert ggml magic
-    outfile = open("ggml-model.bin", "wb")
-    outfile.write(struct.pack("i", 0x67676D6C))
+    gguf_writer = GGUFWriter("example.gguf", "linear")
 
     # Step 2: insert hyperparameters
-    parse_hparams(outfile)
+    parse_hparams(gguf_writer)
 
     # Step 3: insert weights
-    parse_model(checkpoint, outfile)
+    parse_model(checkpoint, gguf_writer)
+
+    # Step 4: saving model and hparams to file
+    gguf_writer.write_header_to_file()
+    gguf_writer.write_kv_data_to_file()
+    gguf_writer.write_tensors_to_file()
+
+    gguf_writer.close()
diff --git a/weights_export/requirements.txt b/weights_export/requirements.txt
@@ -1,3 +1,4 @@
 torch
 numpy
-pytest
+pytest
+gguf
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
     torch
     numpy
-    pytest
+    pytest
+    gguf