-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdependency.h
162 lines (124 loc) · 4.06 KB
/
dependency.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
//
// dependency.h
// Perceptron GLM NLP Tasks
//
// Created by husnu sensoy on 13/01/14.
// Copyright (c) 2014 husnu sensoy. All rights reserved.
//
#ifndef Perceptron_GLM_NLP_Tasks_dependency_h
#define Perceptron_GLM_NLP_Tasks_dependency_h
#include "darray.h"
#include "hashmap.h"
#include "corpus.h"
#include "datastructure.h"
#include <stdbool.h>
enum FeatureGroup {
pword_ppos = 0,
pword = 1,
ppos = 2,
cword_cpos = 3,
cword = 4,
cpos = 5,
pword_ppos_cword_cpos = 6,
ppos_cword_cpos = 7,
pword_ppos_cpos = 8,
pword_ppos_cword = 9,
pword_cword = 10,
ppos_cpos = 11,
ppos_pposP1_cposM1_cpos = 12,
pposM1_ppos_cposM1_cpos = 13,
ppos_pposP1_cpos_cposP1 = 14,
pposM1_ppos_cpos_cposP1 = 15,
ppos_bpos_cpos = 16
};
typedef enum FeatureGroup FeatureGroup;
IntegerIndexedFeatures IntegerIndexedFeatures_create();
struct FeatureKey {
FeatureGroup grp;
char* value;
};
typedef struct FeatureKey* FeatureKey;
/**
n1: n1++ if feature defines an arc
n2: n2++ if feature occurs for a potential arc
*/
struct FeatureValue {
uint32_t feature_id;
uint32_t n1;
uint32_t n2;
};
typedef struct FeatureValue* FeatureValue;
struct PerceptronModel {
IntegerIndexedFeatures features;
vector discrete_w;
vector discrete_w_avg;
vector discrete_w_temp;
vector embedding_w;
vector embedding_w_avg;
vector embedding_w_temp;
vector embedding_w_best;
int best_numit;
int c;
size_t n;
bool use_discrete_features;
};
typedef struct PerceptronModel* PerceptronModel;
struct HeadPredictionMetric{
int true_prediction;
int total_prediction;
};
typedef struct HeadPredictionMetric* HeadPredictionMetric;
/**
* all: Number of head predictions made correctly
* without_punc: Number of head predictions made correctly (punctuation heads are excluded)
* true_root_predicted: Number of roots predicted to be true
* total_sentence: Total number of sentence.
* complete_sentence: +1 if all head predictions are correct for the sentence.
* complete_sentence_without_punc: +1 if all head predictions are correnct for non-punctuation words in a sentence.
*
*/
struct ParserTestMetric{
HeadPredictionMetric all;
HeadPredictionMetric without_punc;
int true_root_predicted;
int total_sentence;
int complete_sentence;
int complete_sentence_without_punc;
};
typedef struct ParserTestMetric* ParserTestMetric;
/**
FeatureKey, FeatureValue creation functions.
*/
FeatureKey FeatureKey_create(FeatureGroup group, char* value);
FeatureValue FeatureValue_create(uint32_t fid);
int feature_equal(void *k1, void *k2);
uint32_t feature_hash(void *f);
/**
*
* @param transformed_embedding_length Transformed embedding length.
* @param iif For feature enhancements (current value is always NULL)
* @return initialized Perceptron model
*/
PerceptronModel create_PerceptronModel(size_t transformed_embedding_length, IntegerIndexedFeatures iif);
void PerceptronModel_free(PerceptronModel model);
void train_perceptron_parser(PerceptronModel mdl, const CoNLLCorpus corpus, int numit, int max_rec);
void train_once_PerceptronModel(PerceptronModel mdl, const CoNLLCorpus corpus, int max_rec);
ParserTestMetric create_ParserTestMetric();
void printParserTestMetric(ParserTestMetric metric);
void freeParserTestMetric(ParserTestMetric ptm);
void fill_features(Hashmap *featuremap, DArray *farr, int from, int to, FeaturedSentence sentence);
void parse_and_dump(PerceptronModel mdl, FILE *fp, CoNLLCorpus corpus);
/**
*
* @param fp File pointer of model file
* @param edimension Word embedding dimensions
* @param w Weight vector to be stored
* @param best_numit Number of iterations for best performance.
*/
void dump_PerceptronModel(FILE *fp, int edimension, vector w, int best_numit);
int* parse(FeaturedSentence sent);
int* get_parents(const FeaturedSentence sent);
int nmatch(const int* model, const int* empirical, int length);
void printfarch(int *parent, int len);
void mark_best_PerceptronModel(PerceptronModel model, int numit);
#endif