Skip to content

Commit

Permalink
major refactor, break bad circular deps
Browse files Browse the repository at this point in the history
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@509 ec762483-ff6d-05da-a07a-a48fb63a330f
  • Loading branch information
redpony committed Aug 11, 2010
1 parent 19b5948 commit a534616
Show file tree
Hide file tree
Showing 89 changed files with 772 additions and 333 deletions.
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SUBDIRS = decoder training vest extools gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava
SUBDIRS = utils mteval decoder training vest extools gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava
AUTOMAKE_OPTIONS = foreign

ACLOCAL_AMFLAGS = -I m4
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,4 @@ then
AM_CONDITIONAL([RAND_LM], true)
fi

AC_OUTPUT(Makefile extools/Makefile decoder/Makefile training/Makefile vest/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile)
AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile training/Makefile vest/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile)
37 changes: 6 additions & 31 deletions decoder/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,16 @@ bin_PROGRAMS = cdec

if HAVE_GTEST
noinst_PROGRAMS = \
dict_test \
weights_test \
trule_test \
hg_test \
ff_test \
logval_test \
parser_test \
grammar_test \
small_vector_test
grammar_test
endif

cdec_SOURCES = cdec.cc forest_writer.cc maxtrans_blunsom.cc cdec_ff.cc timing_stats.cc
small_vector_test_SOURCES = small_vector_test.cc
small_vector_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
cdec_SOURCES = cdec.cc forest_writer.cc maxtrans_blunsom.cc cdec_ff.cc
parser_test_SOURCES = parser_test.cc
parser_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
dict_test_SOURCES = dict_test.cc
dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
ff_test_SOURCES = ff_test.cc
ff_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
grammar_test_SOURCES = grammar_test.cc
Expand All @@ -28,15 +20,12 @@ hg_test_SOURCES = hg_test.cc
hg_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
trule_test_SOURCES = trule_test.cc
trule_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
weights_test_SOURCES = weights_test.cc
weights_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a
logval_test_SOURCES = logval_test.cc
logval_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)

LDADD = libcdec.a
LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a

AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I..
AM_LDFLAGS = -lz
AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils

AM_LDFLAGS = ../utils/libutils.a -lz

rule_lexer.cc: rule_lexer.l
$(LEX) -s -CF -8 -o$@ $<
Expand All @@ -49,7 +38,6 @@ libcdec_a_SOURCES = \
rule_lexer.cc \
fst_translator.cc \
csplit.cc \
dict.cc \
translator.cc \
scfg_translator.cc \
hg.cc \
Expand All @@ -58,17 +46,10 @@ libcdec_a_SOURCES = \
viterbi.cc \
lattice.cc \
aligner.cc \
gzstream.cc \
apply_models.cc \
earley_composer.cc \
phrasetable_fst.cc \
sparse_vector.cc \
trule.cc \
filelib.cc \
stringlib.cc \
fdict.cc \
tdict.cc \
weights.cc \
ttables.cc \
ff.cc \
ff_lm.cc \
Expand All @@ -78,12 +59,6 @@ libcdec_a_SOURCES = \
ff_tagger.cc \
ff_bleu.cc \
ff_factory.cc \
../vest/scorer.cc \
../vest/ter.cc \
../vest/aer_scorer.cc \
../vest/comb_scorer.cc \
../vest/error_surface.cc \
../vest/viterbi_envelope.cc \
freqdict.cc \
lexalign.cc \
lextrans.cc \
Expand Down
74 changes: 2 additions & 72 deletions decoder/aligner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,81 +5,11 @@
#include "sentence_metadata.h"
#include "inside_outside.h"
#include "viterbi.h"
#include "alignment_pharaoh.h"
#include <set>

using namespace std;

static bool is_digit(char x) { return x >= '0' && x <= '9'; }

boost::shared_ptr<Array2D<bool> > AlignerTools::ReadPharaohAlignmentGrid(const string& al) {
int max_x = 0;
int max_y = 0;
int i = 0;
size_t pos = al.rfind(" ||| ");
if (pos != string::npos) { i = pos + 5; }
while (i < al.size()) {
if (al[i] == '\n' || al[i] == '\r') break;
int x = 0;
while(i < al.size() && is_digit(al[i])) {
x *= 10;
x += al[i] - '0';
++i;
}
if (x > max_x) max_x = x;
assert(i < al.size());
if(al[i] != '-') {
cerr << "BAD ALIGNMENT: " << al << endl;
abort();
}
++i;
int y = 0;
while(i < al.size() && is_digit(al[i])) {
y *= 10;
y += al[i] - '0';
++i;
}
if (y > max_y) max_y = y;
while(i < al.size() && al[i] == ' ') { ++i; }
}

boost::shared_ptr<Array2D<bool> > grid(new Array2D<bool>(max_x + 1, max_y + 1));
i = 0;
if (pos != string::npos) { i = pos + 5; }
while (i < al.size()) {
if (al[i] == '\n' || al[i] == '\r') break;
int x = 0;
while(i < al.size() && is_digit(al[i])) {
x *= 10;
x += al[i] - '0';
++i;
}
assert(i < al.size());
assert(al[i] == '-');
++i;
int y = 0;
while(i < al.size() && is_digit(al[i])) {
y *= 10;
y += al[i] - '0';
++i;
}
(*grid)(x, y) = true;
while(i < al.size() && al[i] == ' ') { ++i; }
}
// cerr << *grid << endl;
return grid;
}

void AlignerTools::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* out) {
bool need_space = false;
for (int i = 0; i < alignment.width(); ++i)
for (int j = 0; j < alignment.height(); ++j)
if (alignment(i,j)) {
if (need_space) (*out) << ' '; else need_space = true;
(*out) << i << '-' << j;
}
(*out) << endl;
}

// used with lexical models since they may not fully generate the
// source string
void SourceEdgeCoveragesUsingParseIndices(const Hypergraph& g,
Expand Down Expand Up @@ -317,6 +247,6 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
cerr << grid << endl;
}
(*out) << TD::GetString(src_sent) << " ||| " << TD::GetString(trg_sent) << " ||| ";
SerializePharaohFormat(grid, out);
AlignmentPharaoh::SerializePharaohFormat(grid, out);
};

2 changes: 0 additions & 2 deletions decoder/aligner.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ class Hypergraph;
class SentenceMetadata;

struct AlignerTools {
static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);

// assumption: g contains derivations of input/ref and
// ONLY input/ref.
Expand Down
11 changes: 10 additions & 1 deletion decoder/cdec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include "inside_outside.h"
#include "exp_semiring.h"
#include "sentence_metadata.h"
#include "../vest/scorer.h"
#include "scorer.h"
#include "apply_fsa_models.h"
#include "program_options.h"
#include "cfg_options.h"
Expand All @@ -59,6 +59,15 @@ void ShowBanner() {
cerr << "cdec v1.0 (c) 2009-2010 by Chris Dyer\n";
}

void ParseTranslatorInputLattice(const string& line, string* input, Lattice* ref) {
string sref;
ParseTranslatorInput(line, input, &sref);
if (sref.size() > 0) {
assert(ref);
LatticeTools::ConvertTextOrPLF(sref, ref);
}
}

void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {
for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it)
trg->set_value(it->first, it->second);
Expand Down
2 changes: 1 addition & 1 deletion decoder/ff_bleu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ char const* bleu_usage_verbose="Uses feature id 0! Make sure there are no other
#include "hg.h"
#include "stringlib.h"
#include "sentence_metadata.h"
#include "../vest/scorer.h"
#include "scorer.h"

using namespace std;

Expand Down
2 changes: 1 addition & 1 deletion decoder/ff_lm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ LanguageModelRandLM::LanguageModelRandLM(const string& param) :
filename = argv[0];
}
}
set_order(order);
// set_order(order);
int cache_MB = 200; // increase cache size
randlm::RandLM* rlm = randlm::RandLM::initRandLM(filename, order, cache_MB);
assert(rlm != NULL);
Expand Down
3 changes: 2 additions & 1 deletion decoder/ff_wordalign.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <string>
#include <cmath>

#include "alignment_pharaoh.h"
#include "stringlib.h"
#include "sentence_metadata.h"
#include "hg.h"
Expand Down Expand Up @@ -354,7 +355,7 @@ AlignerResults::AlignerResults(const std::string& param) :
getline(in, line);
if (!in) break;
++lc;
is_aligned_.push_back(AlignerTools::ReadPharaohAlignmentGrid(line));
is_aligned_.push_back(AlignmentPharaoh::ReadPharaohAlignmentGrid(line));
}
cerr << " Loaded " << lc << " refs\n";
}
Expand Down
4 changes: 3 additions & 1 deletion decoder/hg.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ class Hypergraph {
void copy_info(Edge const& o) {
#if USE_INFO_EDGE
set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space). it's empty if nobody put any info there.
#else
(void) o;
#endif
}
void copy_pod(Edge const& o) {
Expand Down Expand Up @@ -142,7 +144,7 @@ class Hypergraph {
#else
std::string info() const { return std::string(); }
void reset_info() { }
void set_info(std::string const& s) { }
void set_info(std::string const& ) { }
#endif
void show(std::ostream &o,unsigned mask=SPAN|RULE) const {
o<<'{';
Expand Down
53 changes: 0 additions & 53 deletions decoder/hg_io.cc
Original file line number Diff line number Diff line change
Expand Up @@ -622,56 +622,3 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) {
}
}

namespace B64 {

static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const char cd64[]="|$$$}rstuvwxyz{$$$$$$$>?@ABCDEFGHIJKLMNOPQRSTUVW$$$$$$XYZ[\\]^_`abcdefghijklmnopq";

static void encodeblock(const unsigned char* in, ostream* os, int len) {
char out[4];
out[0] = cb64[ in[0] >> 2 ];
out[1] = cb64[ ((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4) ];
out[2] = (len > 1 ? cb64[ ((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6) ] : '=');
out[3] = (len > 2 ? cb64[ in[2] & 0x3f ] : '=');
os->write(out, 4);
}

void b64encode(const char* data, const size_t size, ostream* out) {
size_t cur = 0;
while(cur < size) {
int len = min(static_cast<size_t>(3), size - cur);
encodeblock(reinterpret_cast<const unsigned char*>(&data[cur]), out, len);
cur += len;
}
}

static void decodeblock(const unsigned char* in, unsigned char* out) {
out[0] = (unsigned char ) (in[0] << 2 | in[1] >> 4);
out[1] = (unsigned char ) (in[1] << 4 | in[2] >> 2);
out[2] = (unsigned char ) (((in[2] << 6) & 0xc0) | in[3]);
}

bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize) {
size_t cur = 0;
size_t ocur = 0;
unsigned char in[4];
while(cur < insize) {
assert(ocur < outsize);
for (int i = 0; i < 4; ++i) {
unsigned char v = data[cur];
v = (unsigned char) ((v < 43 || v > 122) ? '\0' : cd64[ v - 43 ]);
if (!v) {
cerr << "B64 decode error at offset " << cur << " offending character: " << (int)data[cur] << endl;
return false;
}
v = (unsigned char) ((v == '$') ? '\0' : v - 61);
if (v) in[i] = v - 1; else in[i] = 0;
++cur;
}
decodeblock(in, reinterpret_cast<unsigned char*>(&out[ocur]));
ocur += 3;
}
return true;
}
}

5 changes: 0 additions & 5 deletions decoder/hg_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,4 @@ struct HypergraphIO {
static std::string Escape(const std::string& s); // PLF helper
};

namespace B64 {
bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize);
void b64encode(const char* data, const size_t size, std::ostream* out);
}

#endif
2 changes: 1 addition & 1 deletion decoder/oracle_bleu.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <vector>
#include <boost/program_options.hpp>
#include <boost/program_options/variables_map.hpp>
#include "../vest/scorer.h"
#include "scorer.h"
#include "hg.h"
#include "ff_factory.h"
#include "ff_bleu.h"
Expand Down
4 changes: 2 additions & 2 deletions decoder/phrasebased_translator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ struct PhraseBasedTranslatorImpl {
PhraseBasedTranslatorImpl(const boost::program_options::variables_map& conf) :
add_pass_through_rules(conf.count("add_pass_through_rules")),
max_distortion(conf["pb_max_distortion"].as<int>()),
kSOURCE_RULE(new TRule("[X] ||| [X,1] ||| [X,1]", true)),
kCONCAT_RULE(new TRule("[X] ||| [X,1] [X,2] ||| [X,1] [X,2]", true)),
kNT_TYPE(TD::Convert("X") * -1) {
assert(max_distortion >= 0);
Expand Down Expand Up @@ -141,6 +140,8 @@ struct PhraseBasedTranslatorImpl {
for (int i = 0; i < phrases.size(); ++i) {
Hypergraph::Edge* edge = minus_lm_forest->AddEdge(phrases[i], Hypergraph::TailNodeVector());
edge->feature_values_ = edge->rule_->scores_;
edge->i_ = s.i;
edge->j_ = s.j;
minus_lm_forest->ConnectEdgeToHeadNode(edge->id_, phrase_head_index);
}
CoverageNodeMap::iterator cit = c.find(s.coverage);
Expand Down Expand Up @@ -189,7 +190,6 @@ struct PhraseBasedTranslatorImpl {

const bool add_pass_through_rules;
const int max_distortion;
TRulePtr kSOURCE_RULE;
const TRulePtr kCONCAT_RULE;
const WordID kNT_TYPE;
boost::shared_ptr<FSTNode> fst;
Expand Down
2 changes: 1 addition & 1 deletion decoder/sentence_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include <cassert>
#include "lattice.h"
#include "../vest/scorer.h"
#include "scorer.h"

struct SentenceMetadata {
SentenceMetadata(int id, const Lattice& ref) :
Expand Down
Loading

0 comments on commit a534616

Please sign in to comment.