Skip to content

Commit

Permalink
pivot to write data
Browse files Browse the repository at this point in the history
  • Loading branch information
mike dupont committed Dec 1, 2023
1 parent 6bd3472 commit 297c260
Showing 1 changed file with 281 additions and 41 deletions.
322 changes: 281 additions & 41 deletions libdynet.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
//#include <format>
#define FMT_HEADER_ONLY 1
#include <fmt/core.h>
#include <iostream>
#include <fstream>
#include <type_traits>

#include <iostream>

using namespace std;
#include <refl-cpp/refl.hpp>

Expand Down Expand Up @@ -372,11 +378,11 @@ REFL_END

void trainmain();

static ParameterCollection model;
size_t BATCH_SIZE=500;
static ComputationGraph cg;
static vector<Tensor> batch(BATCH_SIZE);

size_t BATCH_SIZE=1000;
static vector<vector<float>> batch(BATCH_SIZE);
static int next_id = 0;
static int batch_id = 0;
void ggml_tensor_add(const char * name,const struct ggml_tensor * tensor);

#include <eigen3/Eigen/Core>
Expand All @@ -395,12 +401,33 @@ void ggml_tensor_add(const char * name,const struct ggml_tensor * tensor){
float* buffer = ggml_get_data_f32(tensor);
//Expression x = input(cg, buffer);
// runtime2::debug(std::cout,x);
Tensor eigen_tensor(Dim({num_elements},1),buffer, nullptr,DeviceMempool::NONE);
// Create a copy of the eigen tensor
// Tensor<float, 1> eigen_tensor_copy = eigen_tensor;
batch[(next_id++) % BATCH_SIZE] = eigen_tensor;
std::vector<float> values;

// copy the elements in
std::copy(buffer, &buffer[num_elements], back_inserter(values));

batch[(next_id++) % BATCH_SIZE] = values;

if ((next_id) % BATCH_SIZE == 0)
{
batch_id ++;
ofstream data_file; // pay attention here! ofstream

data_file.open(fmt::format("batch{}.bin", batch_id), ios::out | ios::binary);

for (auto &row: batch) {

trainmain();
auto bsize = row.size();
data_file.write(reinterpret_cast<char*>(&bsize),4);

for (auto &cell: row) {
data_file.write(reinterpret_cast<char*>(&cell), 4);
}
}
data_file.close();
}

//trainmain();
//runtime2::debug(std::cout,batch);
}

Expand All @@ -425,43 +452,256 @@ int ITERATIONS = 5;

void trainmain() {

char** argv = 0;
//= {""};
int argc = 0;
dynet::initialize(argc,argv);
static SimpleSGDTrainer trainer(model);
Parameter p_W = model.add_parameters({HIDDEN_SIZE, 2});
Parameter p_b = model.add_parameters({HIDDEN_SIZE});
Parameter p_V = model.add_parameters({1, HIDDEN_SIZE});
Parameter p_a = model.add_parameters({1});

Expression W = parameter(cg, p_W);
Expression b = parameter(cg, p_b);
Expression V = parameter(cg, p_V);
Expression a = parameter(cg, p_a);

// Train the parameters.
for (unsigned iter = 0; iter < ITERATIONS; ++iter) {
double loss = 0;
for (unsigned mi = 0; mi < BATCH_SIZE; ++mi) {
// char** argv = 0;
// //= {""};
// int argc = 0;
// dynet::initialize(argc,argv);
// static SimpleSGDTrainer trainer(model);
// Parameter p_W = model.add_parameters({HIDDEN_SIZE, 2});
// Parameter p_b = model.add_parameters({HIDDEN_SIZE});
// Parameter p_V = model.add_parameters({1, HIDDEN_SIZE});
// Parameter p_a = model.add_parameters({1});

// Expression W = parameter(cg, p_W);
// Expression b = parameter(cg, p_b);
// Expression V = parameter(cg, p_V);
// Expression a = parameter(cg, p_a);

// // Train the parameters.
// for (unsigned iter = 0; iter < ITERATIONS; ++iter) {
// double loss = 0;
// for (unsigned mi = 0; mi < BATCH_SIZE; ++mi) {

auto x_values = batch[mi];
auto y_value = x_values.batch_ptr(0);
// auto x_values = batch[mi];
// //auto y_value = x_values.batch_ptr(0);

Expression y = input(cg, y_value);
// Expression y = input(cg, y_value);

Expression x = input(cg, x_values.batch_ptr(0));
Expression h = tanh(W*x + b);
Expression y_pred = V*h + a;
Expression loss_expr = squared_distance(y_pred, y);
// Expression x = input(cg, x_values.batch_ptr(0));
// Expression h = tanh(W*x + b);
// Expression y_pred = V*h + a;
// Expression loss_expr = squared_distance(y_pred, y);

loss += as_scalar(cg.forward(loss_expr));
cg.backward(loss_expr);
trainer.update();
// loss += as_scalar(cg.forward(loss_expr));
// cg.backward(loss_expr);
// trainer.update();
// }
// loss /= 4;
// cerr << "E = " << loss << endl;
// }

}





#include <vector>
#include <stdexcept>
#include <fstream>
#include <chrono>
#ifdef BOOST_REGEX
#include <boost/regex.hpp>
using namespace boost;
#else
#include <regex>
#endif

#include <dynet/training.h>
#include <dynet/expr.h>
#include <dynet/dict.h>
#include <dynet/lstm.h>

using namespace std;
using namespace std::chrono;
using namespace dynet;

// Read a file where each line is of the form "word1|tag1 word2|tag2 ..."
// Yields pairs of lists of the form < [word1, word2, ...], [tag1, tag2, ...] >
vector<pair<vector<string>, vector<string> > > read(const string & fname) {
ifstream fh(fname);
if(!fh) throw std::runtime_error("Could not open file");
string str;
regex re("[ |]");
vector<pair<vector<string>, vector<string> > > sents;
while(getline(fh, str)) {
pair<vector<string>,vector<string> > word_tags;
sregex_token_iterator first{str.begin(), str.end(), re, -1}, last;
while(first != last) {
word_tags.first.push_back(*first++);
assert(first != last);
word_tags.second.push_back(*first++);
}
loss /= 4;
cerr << "E = " << loss << endl;
sents.push_back(word_tags);
}

return sents;
}

class BiLSTMTagger {
public:

BiLSTMTagger(unsigned layers, unsigned wembed_dim, unsigned hidden_dim, unsigned mlp_dim, ParameterCollection & model, Dict & wv, Dict & tv, unordered_map<string,int> & wc)
: wv(wv), tv(tv), wc(wc) {
unsigned nwords = wv.size();
unsigned ntags = tv.size();
word_lookup = model.add_lookup_parameters(nwords, {wembed_dim});

// MLP on top of biLSTM outputs 100 -> 32 -> ntags
pH = model.add_parameters({mlp_dim, hidden_dim*2});
pO = model.add_parameters({ntags, mlp_dim});

// word-level LSTMs
fwdRNN = VanillaLSTMBuilder(layers, wembed_dim, hidden_dim, model); // layers, in-dim, out-dim, model
bwdRNN = VanillaLSTMBuilder(layers, wembed_dim, hidden_dim, model);
}

Dict &wv, &tv;
unordered_map<string,int> & wc;
LookupParameter word_lookup;
Parameter pH, pO;
VanillaLSTMBuilder fwdRNN, bwdRNN;

// Do word representation
Expression word_rep(ComputationGraph & cg, const string & w) {
return lookup(cg, word_lookup, wv.convert(wc[w] > 5 ? w : "<unk>"));
}

vector<Expression> build_tagging_graph(ComputationGraph & cg, const vector<string> & words) {
// parameters -> expressions
Expression H = parameter(cg, pH);
Expression O = parameter(cg, pO);

// initialize the RNNs
fwdRNN.new_graph(cg);
bwdRNN.new_graph(cg);

// get the word vectors. word_rep(...) returns a 128-dim vector expression for each word.
vector<Expression> wembs(words.size()), fwds(words.size()), bwds(words.size()), fbwds(words.size());
for(size_t i = 0; i < words.size(); ++i)
wembs[i] = word_rep(cg, words[i]);

// feed word vectors into biLSTM
fwdRNN.start_new_sequence();
for(size_t i = 0; i < wembs.size(); ++i)
fwds[i] = fwdRNN.add_input(wembs[i]);
bwdRNN.start_new_sequence();
for(size_t i = wembs.size(); i > 0; --i)
bwds[i-1] = bwdRNN.add_input(wembs[i-1]);

// Concatenate and MLP
for(size_t i = 0; i < wembs.size(); ++i)
fbwds[i] = O * tanh( H * concatenate({fwds[i], bwds[i]}) );

return fbwds;
}

Expression sent_loss(ComputationGraph & cg, vector<string> & words, vector<string> & tags) {
vector<Expression> exprs = build_tagging_graph(cg, words), errs(words.size());
for(size_t i = 0; i < tags.size(); ++i)
errs[i] = pickneglogsoftmax(exprs[i], tv.convert(tags[i]));
return sum(errs);
}

vector<string> tag_sent(vector<string> & words) {
ComputationGraph cg;
vector<Expression> exprs = build_tagging_graph(cg, words), errs(words.size());
vector<string> tags(words.size());
for(size_t i = 0; i < words.size(); ++i) {
vector<float> scores = as_vector(exprs[i].value());
size_t max_id = distance(scores.begin(), max_element(scores.begin(), scores.end()));
tags[i] = tv.convert(max_id);
}
return tags;
}

};

int othermain() {
int argc=0;
char**argv=0;
time_point<system_clock> start = system_clock::now();

vector<pair<vector<string>, vector<string> > > train = read("data/tags/train.txt");
vector<pair<vector<string>, vector<string> > > dev = read("data/tags/dev.txt");
Dict word_voc, tag_voc;
unordered_map<string, int> word_cnt;
for(auto & sent : train) {
for(auto & w : sent.first) {
word_voc.convert(w);
word_cnt[w]++;
}
for(auto & t : sent.second)
tag_voc.convert(t);
}
tag_voc.freeze();
word_voc.convert("<unk>"); word_voc.freeze(); word_voc.set_unk("<unk>");

// DyNet Starts
dynet::initialize(argc, argv);
ParameterCollection model;
AdamTrainer trainer(model);
trainer.clipping_enabled = false;

if(argc != 6) {
cerr << "Usage: " << argv[0] << " WEMBED_SIZE HIDDEN_SIZE MLP_SIZE SPARSE TIMEOUT" << endl;
return 1;
}
int WEMBED_SIZE = atoi(argv[1]);
int HIDDEN_SIZE = atoi(argv[2]);
int MLP_SIZE = atoi(argv[3]);
trainer.sparse_updates_enabled = atoi(argv[4]);
int TIMEOUT = atoi(argv[5]);

// Initilaize the tagger
BiLSTMTagger tagger(1, WEMBED_SIZE, HIDDEN_SIZE, MLP_SIZE, model, word_voc, tag_voc, word_cnt);

{
duration<float> fs = (system_clock::now() - start);
float startup_time = duration_cast<milliseconds>(fs).count() / float(1000);
cout << "startup time: " << startup_time << endl;
}

// Do training
start = system_clock::now();
int i = 0, all_tagged = 0, this_words = 0;
float this_loss = 0.f, all_time = 0.f;
for(int iter = 0; iter < 100; iter++) {
shuffle(train.begin(), train.end(), *dynet::rndeng);
for(auto & s : train) {
i++;
if(i % 500 == 0) {
trainer.status();
cout << this_loss/this_words << endl;
all_tagged += this_words;
this_loss = 0.f;
this_words = 0;
}
if(i % 10000 == 0) {
duration<float> fs = (system_clock::now() - start);
all_time += duration_cast<milliseconds>(fs).count() / float(1000);
int dev_words = 0, dev_good = 0;
float dev_loss = 0;
for(auto & sent : dev) {
vector<string> tags = tagger.tag_sent(sent.first);
for(size_t j = 0; j < tags.size(); ++j)
if(tags[j] == sent.second[j])
dev_good++;
dev_words += sent.second.size();
}
cout << "acc=" << dev_good/float(dev_words) << ", time=" << all_time << ", word_per_sec=" << all_tagged/all_time << endl;
if(all_time > TIMEOUT)
exit(0);
start = system_clock::now();
}

ComputationGraph cg;
Expression loss_exp = tagger.sent_loss(cg, s.first, s.second);
float my_loss = as_scalar(cg.forward(loss_exp));
this_loss += my_loss;
this_words += s.first.size();
cg.backward(loss_exp);
trainer.update();
}
}
return 0;
}

0 comments on commit 297c260

Please sign in to comment.