23
struct timeval t_start,t_end;
29
gettimeofday(&t_start, NULL);
30
start = ((long)t_start.tv_sec)*1000+(long)t_start.tv_usec/1000;
34
gettimeofday(&t_end, NULL);
35
end = ((long)t_end.tv_sec)*1000+(long)t_end.tv_usec/1000;
36
cout<<"time(s):\t"<<(double(end)-double(start))/1000<<endl;
41
vector<float> train(int *sentence, int *trainPositionE1, int *trainPositionE2, int len, vector<int> &tip) {
44
for (int i = 0; i < dimensionC; i++) {
45
int last = i * dimension * window;
46
int lastt = i * dimensionWPE * window;
49
for (int i1 = 0; i1<3; i1++)
52
for (int i1 = -window+1; i1 < len; i1++)
57
for (int j = i1; j < i1 + window; j++)
60
int last1 = sentence[j] * dimension;
61
for (int k = 0; k < dimension; k++) {
62
res += matrixW1Dao[last + tot] * wordVecDao[last1+k];
65
int last2 = trainPositionE1[j] * dimensionWPE;
66
int last3 = trainPositionE2[j] * dimensionWPE;
67
for (int k = 0; k < dimensionWPE; k++) {
68
res += matrixW1PositionE1Dao[lastt + tot1] * positionVecDaoE1[last2+k];
69
res += matrixW1PositionE2Dao[lastt + tot1] * positionVecDaoE2[last3+k];
83
if (i1>=0&&trainPositionE1[i1]==-PositionMinE1)
85
if (i1>=0&&trainPositionE2[i1]==-PositionMinE2)
90
for (int i1 = 0; i1<3; i1++)
92
r.push_back(mx[i1]+matrixB1Dao[3*i+i1]);
93
tip.push_back(ti[i1]);
97
for (int i = 0; i < 3 * dimensionC; i++) {
98
r[i] = CalcTanh(r[i]);
103
void train_gradient(int *sentence, int *trainPositionE1, int *trainPositionE2, int len, int e1, int e2, int r1, float alpha, vector<float> &r,vector<int> &tip, vector<float> &grad)
105
for (int i = 0; i < 3 * dimensionC; i++) {
106
if (fabs(grad[i])<1e-8)
108
int last = (i/3) * dimension * window;
110
int lastt = (i/3) * dimensionWPE * window;
112
float g1 = grad[i] * (1 - r[i] * r[i]);
113
for (int j = 0; j < window; j++)
114
if (tip[i]+j>=0&&tip[i]+j<len)
116
int last1 = sentence[tip[i] + j] * dimension;
117
for (int k = 0; k < dimension; k++) {
118
matrixW1[last + tot] -= g1 * wordVecDao[last1+k];
119
wordVec[last1 + k] -= g1 * matrixW1Dao[last + tot];
122
int last2 = trainPositionE1[tip[i] + j] * dimensionWPE;
123
int last3 = trainPositionE2[tip[i] + j] * dimensionWPE;
124
for (int k = 0; k < dimensionWPE; k++) {
125
matrixW1PositionE1[lastt + tot1] -= g1 * positionVecDaoE1[last2 + k];
126
matrixW1PositionE2[lastt + tot1] -= g1 * positionVecDaoE2[last3 + k];
127
positionVecE1[last2 + k] -= g1 * matrixW1PositionE1Dao[lastt + tot1];
128
positionVecE2[last3 + k] -= g1 * matrixW1PositionE2Dao[lastt + tot1];
136
float train_bags(string bags_name)
138
int bags_size = bags_train[bags_name].size();
139
vector<vector<float> > rList;
140
vector<vector<int> > tipList;
141
tipList.resize(bags_size);
143
for (int k=0; k<bags_size; k++)
146
int i = bags_train[bags_name][k];
148
r1 = relationList[i];
150
assert(r1==relationList[i]);
151
rList.push_back(train(trainLists[i], trainPositionE1[i], trainPositionE2[i], trainLength[i], tipList[k]));
157
for (int i = 0; i < 3 * dimensionC; i++)
158
dropout.push_back(rand()%2);
159
vector<float> weight;
160
float weight_sum = 0;
161
for (int k=0; k<bags_size; k++)
164
for (int i = 0; i < 3 * dimensionC; i++)
165
s += rList[k][i] * matrixRelationDao[r1 * 3 * dimensionC + i] * wt;
170
for (int k=0; k<bags_size; k++)
171
weight[k]/=weight_sum;
173
for (int j = 0; j < relationTotal; j++) {
175
r.resize(3 * dimensionC);
176
for (int i = 0; i < 3 * dimensionC; i++)
177
for (int k=0; k<bags_size; k++)
178
r[i] += rList[k][i] * weight[k];
181
for (int i = 0; i < 3 * dimensionC; i++) {
182
ss += dropout[i] * r[i] * matrixRelationDao[j * 3 * dimensionC + i];
184
ss += matrixRelationPrDao[j];
185
f_r.push_back(exp(ss));
188
double rt = (log(f_r[r1]) - log(sum));
190
vector<vector<float> > grad;
191
grad.resize(bags_size);
192
for (int k=0; k<bags_size; k++)
193
grad[k].resize(3 * dimensionC);
194
vector<float> g1_tmp;
195
g1_tmp.resize(3 * dimensionC);
196
for (int r2 = 0; r2<relationTotal; r2++)
199
r.resize(3 * dimensionC);
200
for (int i = 0; i < 3 * dimensionC; i++)
201
for (int k=0; k<bags_size; k++)
202
r[i] += rList[k][i] * weight[k];
204
float g = f_r[r2]/sum*alpha1;
207
for (int i = 0; i < 3 * dimensionC; i++)
212
g1 += g * matrixRelationDao[r2 * dimensionC * 3 + i];
213
matrixRelation[r2 * 3 * dimensionC + i] -= g * r[i];
217
matrixRelationPr[r2] -= g;
219
for (int i = 0; i < 3 * dimensionC; i++)
221
float g1 = g1_tmp[i];
223
for (int k=0; k<bags_size; k++)
225
grad[k][i]+=g1*weight[k];
226
grad[k][i]+=g1*rList[k][i]*weight[k]*matrixRelationDao[r1 * 3 * dimensionC + i] * wt;
227
matrixRelation[r1 * 3 * dimensionC + i] += g1*rList[k][i]*weight[k]*rList[k][i] * wt;
228
tmp_sum += rList[k][i]*weight[k];
230
for (int k1=0; k1<bags_size; k1++)
232
grad[k1][i]-=g1*tmp_sum*weight[k1]*matrixRelationDao[r1 * 3 * dimensionC + i] * wt;
233
matrixRelation[r1 * 3 * dimensionC + i] -= g1*tmp_sum*weight[k1]*rList[k1][i] * wt;
238
for (int k=0; k<bags_size; k++)
240
int i = bags_train[bags_name][k];
241
train_gradient(trainLists[i], trainPositionE1[i], trainPositionE2[i], trainLength[i], headList[i], tailList[i], relationList[i], alpha1,rList[k], tipList[k], grad[k]);
253
vector<string> b_train;
255
double score_tmp = 0, score_max = 0;
256
pthread_mutex_t mutex1;
259
void* trainMode(void *id ) {
260
unsigned long long next_random = (long long)id;
266
pthread_mutex_lock (&mutex1);
267
if (score_tmp>=score_max)
269
pthread_mutex_unlock (&mutex1);
274
pthread_mutex_unlock (&mutex1);
275
int j = getRand(0, c_train.size());
281
score += train_bags(b_train[j]);
290
for (map<string,vector<int> >:: iterator it = bags_train.begin(); it!=bags_train.end(); it++)
293
for (int i=0; i<max(1,max_size); i++)
294
c_train.push_back(b_train.size());
295
b_train.push_back(it->first);
296
tmp+=it->second.size();
298
cout<<c_train.size()<<endl;
300
float con = sqrt(6.0/(dimensionC+relationTotal));
301
float con1 = sqrt(6.0/((dimensionWPE+dimension)*window));
302
matrixRelation = (float *)calloc(3 * dimensionC * relationTotal, sizeof(float));
303
matrixRelationPr = (float *)calloc(relationTotal, sizeof(float));
304
matrixRelationPrDao = (float *)calloc(relationTotal, sizeof(float));
305
wordVecDao = (float *)calloc(dimension * wordTotal, sizeof(float));
306
positionVecE1 = (float *)calloc(PositionTotalE1 * dimensionWPE, sizeof(float));
307
positionVecE2 = (float *)calloc(PositionTotalE2 * dimensionWPE, sizeof(float));
309
matrixW1 = (float*)calloc(dimensionC * dimension * window, sizeof(float));
310
matrixW1PositionE1 = (float *)calloc(dimensionC * dimensionWPE * window, sizeof(float));
311
matrixW1PositionE2 = (float *)calloc(dimensionC * dimensionWPE * window, sizeof(float));
312
matrixB1 = (float*)calloc(3 * dimensionC, sizeof(float));
314
for (int i = 0; i < dimensionC; i++) {
315
int last = i * window * dimension;
316
for (int j = dimension * window - 1; j >=0; j--)
317
matrixW1[last + j] = getRandU(-con1, con1);
318
last = i * window * dimensionWPE;
321
for (int j = dimensionWPE * window - 1; j >=0; j--) {
322
matrixW1PositionE1[last + j] = getRandU(-con1, con1);
323
tmp1 += matrixW1PositionE1[last + j] * matrixW1PositionE1[last + j] ;
324
matrixW1PositionE2[last + j] = getRandU(-con1, con1);
325
tmp2 += matrixW1PositionE2[last + j] * matrixW1PositionE2[last + j] ;
327
for (int j=0; j<3; j++)
328
matrixB1[i] = getRandU(-con1, con1);
331
for (int i = 0; i < relationTotal; i++)
333
matrixRelationPr[i] = getRandU(-con, con);
334
for (int j = 0; j < 3 * dimensionC; j++)
335
matrixRelation[i * 3 * dimensionC + j] = getRandU(-con, con);
338
for (int i = 0; i < PositionTotalE1; i++) {
340
for (int j = 0; j < dimensionWPE; j++) {
341
positionVecE1[i * dimensionWPE + j] = getRandU(-con1, con1);
342
tmp += positionVecE1[i * dimensionWPE + j] * positionVecE1[i * dimensionWPE + j];
346
for (int i = 0; i < PositionTotalE2; i++) {
348
for (int j = 0; j < dimensionWPE; j++) {
349
positionVecE2[i * dimensionWPE + j] = getRandU(-con1, con1);
350
tmp += positionVecE2[i * dimensionWPE + j] * positionVecE2[i * dimensionWPE + j];
354
matrixRelationDao = (float *)calloc(3 * dimensionC*relationTotal, sizeof(float));
355
matrixW1Dao = (float*)calloc(dimensionC * dimension * window, sizeof(float));
356
matrixB1Dao = (float*)calloc(3 * dimensionC, sizeof(float));
358
positionVecDaoE1 = (float *)calloc(PositionTotalE1 * dimensionWPE, sizeof(float));
359
positionVecDaoE2 = (float *)calloc(PositionTotalE2 * dimensionWPE, sizeof(float));
360
matrixW1PositionE1Dao = (float *)calloc(dimensionC * dimensionWPE * window, sizeof(float));
361
matrixW1PositionE2Dao = (float *)calloc(dimensionC * dimensionWPE * window, sizeof(float));
366
for (turn = 0; turn < trainTimes; turn ++) {
369
len = c_train.size();
370
npoch = len / (batch * num_threads);
371
alpha1 = alpha*rate/batch;
376
double score1 = score;
378
for (int k = 1; k <= npoch; k++) {
379
score_max += batch * num_threads;
381
memcpy(positionVecDaoE1, positionVecE1, PositionTotalE1 * dimensionWPE* sizeof(float));
382
memcpy(positionVecDaoE2, positionVecE2, PositionTotalE2 * dimensionWPE* sizeof(float));
383
memcpy(matrixW1PositionE1Dao, matrixW1PositionE1, dimensionC * dimensionWPE * window* sizeof(float));
384
memcpy(matrixW1PositionE2Dao, matrixW1PositionE2, dimensionC * dimensionWPE * window* sizeof(float));
385
memcpy(wordVecDao, wordVec, dimension * wordTotal * sizeof(float));
387
memcpy(matrixW1Dao, matrixW1, sizeof(float) * dimensionC * dimension * window);
388
memcpy(matrixB1Dao, matrixB1, sizeof(float) * 3 * dimensionC);
389
memcpy(matrixRelationPrDao, matrixRelationPr, relationTotal * sizeof(float));
390
memcpy(matrixRelationDao, matrixRelation, 3 * dimensionC*relationTotal * sizeof(float));
391
pthread_t *pt = (pthread_t *)malloc(num_threads * sizeof(pthread_t));
392
for (int a = 0; a < num_threads; a++)
393
pthread_create(&pt[a], NULL, trainMode, (void *)a);
394
for (int a = 0; a < num_threads; a++)
395
pthread_join(pt[a], NULL);
399
cout<<"npoch:\t"<<k<<'/'<<npoch<<endl;
402
cout<<"score:\t"<<score-score1<<' '<<score_tmp<<endl;
406
printf("Total Score:\t%f\n",score);
412
cout<<"Train End"<<endl;
415
int main(int argc, char ** argv) {
417
logg = fopen("log.txt","w");
418
cout<<"Init Begin."<<endl;
420
cout<<"Init End."<<endl;