Skip to content

Commit

Permalink
gguf current state
Browse files Browse the repository at this point in the history
  • Loading branch information
grazder committed Apr 29, 2024
1 parent 9c519fc commit 0d1b685
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 76 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
build
.vscode
*.bin
__pycache__
__pycache__
*.gguf
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ python -m pytest tests/test.py
- [x] Basic FF example
- [x] Python-CPP tests
- [ ] Add GGUF
- [ ] Trying on real model
- [ ] Try on real model
- [ ] Adapt template for real case usage
- [ ] Writing comments
- [ ] Write comments
- [ ] Add argparse for `model.cpp`
- [ ] Support FP16
- [ ] Quantization (?)
Expand Down
22 changes: 11 additions & 11 deletions example/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,19 @@ int main(void)

// Load model and run forward
module model;
load_model("../ggml-model.bin", model);
struct ggml_tensor *result = compute(model, input);
load_model("../example.gguf", model);
// struct ggml_tensor *result = compute(model, input);

// Printing
std::vector<float> out_data(ggml_nelements(result));
memcpy(out_data.data(), result->data, ggml_nbytes(result));
// // Printing
// std::vector<float> out_data(ggml_nelements(result));
// memcpy(out_data.data(), result->data, ggml_nbytes(result));

printf("Result: [");
for (int i = 0; i < result->ne[0]; i++)
{
printf("%.2f, ", out_data[i]);
}
printf("]\n");
// printf("Result: [");
// for (int i = 0; i < result->ne[0]; i++)
// {
// printf("%.2f, ", out_data[i]);
// }
// printf("]\n");

ggml_free(model.ctx);
return 0;
Expand Down
66 changes: 37 additions & 29 deletions template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,18 @@ bool verify_magic(std::ifstream &infile)
{
uint32_t magic;
read_safe(infile, magic);
if (magic != FILE_MAGIC)
if (magic != GGUF_FILE_MAGIC)
{
fprintf(stderr, "%s: invalid model file (bad magic)\n", __func__);
return false;
}
return true;
}

void load_hparams(std::ifstream &infile, module &model)
void load_hparams(gguf_context *ctx, module &model)
{
auto &hparams = model.hparams;
read_safe(infile, hparams.in_channels);
// gguf_get_key(ctx, "in_channels", );
printf("%s: in_channels = %d\n", __func__, hparams.in_channels);
}

Expand Down Expand Up @@ -190,44 +190,52 @@ bool load_model(const std::string &fname, module &model)
{
fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str());

auto infile = std::ifstream(fname, std::ios::binary);
if (!infile)
struct ggml_context *meta = NULL;

struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &meta,
};

struct gguf_context *ctx = gguf_init_from_file(fname.c_str(), params);

if (!ctx)
{
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
return false;
}

if (!verify_magic(infile))
{
return false;
}
const int n_tensors = gguf_get_n_tensors(ctx);
const int n_kv = gguf_get_n_kv(ctx);
printf("N_TENSORS: %i\n", n_tensors);
printf("N_KV: %i\n", n_kv);

load_hparams(infile, model);
size_t ctx_size = evaluate_context_size(model);
// load_hparams(ctx, model);
// size_t ctx_size = evaluate_context_size(model);

if (!init_model_context(model, ctx_size))
{
return false;
}
// if (!init_model_context(model, ctx_size))
// {
// return false;
// }

create_model_weight_tensors(model);
// create_model_weight_tensors(model);

if (!init_model_backend(model))
{
return false;
}
// if (!init_model_backend(model))
// {
// return false;
// }

if (!allocate_model_buffer(model))
{
return false;
}
// if (!allocate_model_buffer(model))
// {
// return false;
// }

if (!load_weights(infile, model))
{
return false;
}
// if (!load_weights(infile, model))
// {
// return false;
// }

infile.close();
// infile.close();
return true;
}

Expand Down
2 changes: 1 addition & 1 deletion template.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <string>
#include <vector>

#define FILE_MAGIC 'ggml'
#define GGUF_FILE_MAGIC 0x46554747 // "GGUF"

static const size_t MB = 1024 * 1024;

Expand Down
47 changes: 16 additions & 31 deletions weights_export/export_model_weights.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import torch
import struct
import numpy as np
from gguf import GGUFWriter

torch.manual_seed(52)

Expand All @@ -17,41 +18,20 @@ def forward(self, x):
return x


def parse_hparams(outfile):
def parse_hparams(gguf_writer):
in_channels = 5
outfile.write(struct.pack("i", in_channels))
gguf_writer.add_int32("in_channels", in_channels)


def parse_model(checkpoint, outfile):
def parse_model(checkpoint, gguf_writer):
"""Load encodec model checkpoint."""
n_f32 = 0

for name in checkpoint.keys():
var_data = checkpoint[name]
var_data = var_data.numpy().squeeze()
var_data = var_data.numpy().squeeze().astype(np.float32)
gguf_writer.add_tensor(name, var_data)

print(f"Processing variable: {name} with shape: {var_data.shape}")

print(" Converting to float32")
var_data = var_data.astype(np.float32)
ftype_cur = 0
n_f32 += 1

n_dims = len(var_data.shape)
encoded_name = name.encode("utf-8")
outfile.write(struct.pack("iii", n_dims, len(encoded_name), ftype_cur))

for i in range(n_dims):
outfile.write(struct.pack("i", var_data.shape[n_dims - 1 - i]))

outfile.write(encoded_name)
var_data.tofile(outfile)

outfile.close()

print()
print(f"n_f32: {n_f32}")


if __name__ == "__main__":
model = Model().cpu()
Expand All @@ -63,12 +43,17 @@ def parse_model(checkpoint, outfile):

checkpoint = model.state_dict()

# Step 1: insert ggml magic
outfile = open("ggml-model.bin", "wb")
outfile.write(struct.pack("i", 0x67676D6C))
gguf_writer = GGUFWriter("example.gguf", "linear")

# Step 2: insert hyperparameters
parse_hparams(outfile)
parse_hparams(gguf_writer)

# Step 3: insert weights
parse_model(checkpoint, outfile)
parse_model(checkpoint, gguf_writer)

# Step 4: saving model and hparams to file
gguf_writer.write_header_to_file()
gguf_writer.write_kv_data_to_file()
gguf_writer.write_tensors_to_file()

gguf_writer.close()
3 changes: 2 additions & 1 deletion weights_export/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
torch
numpy
pytest
pytest
gguf

0 comments on commit 0d1b685

Please sign in to comment.