WantWords

Форк
0
600 строк · 26.7 Кб
1
import torch, gc, json, os, thulac, string, re, requests, hashlib, urllib.parse 
2
import numpy as np
3
from django.shortcuts import render, render_to_response
4
from django.http import HttpResponse
5
from datetime import datetime
6
from pytorch_transformers import *
7

8
from sklearn.cluster import KMeans
9
kmeans = KMeans(n_clusters=6, n_jobs=1, random_state=0, init='k-means++', max_iter=10)
10

11
def md5(str):
12
    m = hashlib.md5()
13
    m.update(str.encode("utf8"))
14
    return m.hexdigest()
15
appid = '20***************79' 
16
secretKey = 'D2u0***********Yhz5' 
17

18
BASE_DIR = './website_RD/'
19
device = torch.device('cpu')
20
torch.backends.cudnn.benchmark = True
21
words_t = torch.tensor(np.array([0]))
22
itemsPerCol = 20
23
GET_NUM = 100
24
NUM_RESPONSE = 500
25
words_t = torch.tensor(np.array([0]))
26

27
tokenizer_class = BertTokenizer
28
tokenizer_Ch = tokenizer_class.from_pretrained('bert-base-chinese')
29
tokenizer_En = tokenizer_class.from_pretrained('bert-base-uncased')
30
#========================ChineseRD
31
MODE = 'Psc'
32
lac = thulac.thulac()
33

34
def load_data():
35
    (word2index, index2word, _, _, _, _, _) = np.load(BASE_DIR + 'data_inUse1.npy', allow_pickle=True)
36
    wd_charas = np.load(BASE_DIR + 'data_inUse2.npy', allow_pickle=True)
37
    ((_, _, _, wd_sems, wd_POSs),(_, mask_s)) = np.load(BASE_DIR + 'data_inUse3.npy', allow_pickle=True)
38
    mask_s = torch.from_numpy(mask_s).to(device)
39
    wd_POSs = torch.from_numpy(wd_POSs).float().to(device)
40
    wd_charas = torch.from_numpy(wd_charas).float().to(device)
41
    wd_sems = torch.from_numpy(wd_sems).float().to(device)
42
    wd_C = []
43
    mask_c = []
44
    mask_s = mask_s.float()
45
    return word2index, index2word, (wd_C, wd_sems, wd_POSs, wd_charas), (mask_c, mask_s)
46

47
word2index, index2word, wd_features, mask_ = load_data()
48
(wd_C, wd_sems, wd_POSs, wd_charas) = wd_features
49
(mask_c, mask_s) = mask_
50
index2word = np.array(index2word)
51

52
# 添加同义词词林用于描述为一个词时的同义词推荐
53
index2synset = [[] for i in range(len(word2index))]
54
for line in open(BASE_DIR + 'word2synset_synset.txt').readlines():
55
    wd = line.split()[0]
56
    synset = line.split()[1:]
57
    for syn in synset:
58
        index2synset[word2index[wd]].append(word2index[syn])
59

60
MODEL_FILE = BASE_DIR + 'Zh.model'
61
model = torch.load(MODEL_FILE, map_location=lambda storage, loc: storage)
62
model.eval()
63
wd_data_ = json.load(open(BASE_DIR+'wd_def_for_website_zh.json'))
64

65
#wd_data = dict()
66
wd_data = wd_data_.copy()
67
wd_defi = wd_data_.copy()
68
for wd in wd_data_:
69
    #wd_data[wd] = {'w': wd_data_[wd]['word'], 'd': wd_data_[wd]['definition'], 'P': wd_data_[wd]['POS'], 'l': wd_data_[wd]['length'], 'b': wd_data_[wd]['bihuashu'], 'B': wd_data_[wd]['bihuashu1st'], 'p': wd_data_[wd]['pinyin'], 's': wd_data_[wd]['pinyinshouzimu'], 'r': wd_data_[wd]['rhyme']}
70
    wd_data[wd] = {'w': wd_data_[wd]['word'], 'P': wd_data_[wd]['POS'], 'l': wd_data_[wd]['length'], 'b': wd_data_[wd]['bihuashu'], 'B': wd_data_[wd]['bihuashu1st'], 'p': wd_data_[wd]['pinyin'], 's': wd_data_[wd]['pinyinshouzimu'], 'r': wd_data_[wd]['rhyme']}
71
    wd_defi[wd] = wd_data_[wd]['definition']
72
del wd_data_
73

74
#========================EnglishRD
75
MODE_en = 'rsl'
76
MODEL_FILE_en = BASE_DIR + 'En.model'
77
wd_data_en_ = json.load(open(BASE_DIR+'wd_def_for_website_En.json'))
78

79
wd_data_en = wd_data_en_.copy()
80
wd_defi_en = wd_data_en_.copy()
81
for wd in wd_data_en_:
82
    #wd_data_en[wd] = {'w': wd_data_en_[wd]['word'], 'd': wd_data_en_[wd]['definition'], 'P': wd_data_en_[wd]['POS']}
83
    wd_data_en[wd] = {'w': wd_data_en_[wd]['word'], 'P': wd_data_en_[wd]['POS']}
84
    wd_defi_en[wd] = wd_data_en_[wd]['definition']
85
del wd_data_en_
86
gc.collect()
87

88
def label_multihot(labels, num):
89
    sm = np.zeros((len(labels), num), dtype=np.float32)
90
    for i in range(len(labels)):
91
        for s in labels[i]:
92
            if s >= num:
93
                break
94
            sm[i, s] = 1
95
    return sm
96

97
def word2feature(dataset, word_num, feature_num, feature_name):
98
    max_feature_num = max([len(instance[feature_name]) for instance in dataset])
99
    ret = np.zeros((word_num, max_feature_num), dtype=np.int64)
100
    ret.fill(feature_num)
101
    for instance in dataset:
102
        if ret[instance['word'], 0] != feature_num: 
103
            continue # this target_words has been given a feature mapping, because same word with different definition in dataset
104
        feature = instance[feature_name]
105
        ret[instance['word'], :len(feature)] = np.array(feature)
106
    return torch.tensor(ret, dtype=torch.int64, device=device)
107
    
108
def mask_noFeature(label_size, wd2fea, feature_num):
109
    mask_nofea = torch.zeros(label_size, dtype=torch.float32, device=device)
110
    for i in range(label_size):
111
        feas = set(wd2fea[i].detach().cpu().numpy().tolist())-set([feature_num])
112
        if len(feas)==0:
113
            mask_nofea[i] = 1
114
    return mask_nofea
115
 
116
(_, (_, label_size, _, _), (word2index_en, index2word_en, index2sememe, index2lexname, index2rootaffix)) = np.load(BASE_DIR + 'data_inUse1_en.npy', allow_pickle=True)
117
(data_train_idx, data_dev_idx, data_test_500_seen_idx, data_test_500_unseen_idx, data_defi_c_idx, data_desc_c_idx) = np.load(BASE_DIR + 'data_inUse2_en.npy', allow_pickle=True)
118
data_all_idx = data_train_idx + data_dev_idx + data_test_500_seen_idx + data_test_500_unseen_idx + data_defi_c_idx
119
index2word_en = np.array(index2word_en)
120
sememe_num = len(index2sememe)
121
wd2sem = word2feature(data_all_idx, label_size, sememe_num, 'sememes')
122
wd_sems_ = label_multihot(wd2sem, sememe_num)
123
wd_sems_ = torch.from_numpy(np.array(wd_sems_)).to(device) 
124
lexname_num = len(index2lexname)
125
wd2lex = word2feature(data_all_idx, label_size, lexname_num, 'lexnames') 
126
wd_lex = label_multihot(wd2lex, lexname_num)
127
wd_lex = torch.from_numpy(np.array(wd_lex)).to(device)
128
rootaffix_num = len(index2rootaffix)
129
wd2ra = word2feature(data_all_idx, label_size, rootaffix_num, 'root_affix') 
130
wd_ra = label_multihot(wd2ra, rootaffix_num)
131
wd_ra = torch.from_numpy(np.array(wd_ra)).to(device)
132
mask_s_ = mask_noFeature(label_size, wd2sem, sememe_num)
133
mask_l = mask_noFeature(label_size, wd2lex, lexname_num)
134
mask_r = mask_noFeature(label_size, wd2ra, rootaffix_num)
135
#del data_all_idx, data_train_idx, data_dev_idx, data_test_idx
136
del data_all_idx, data_train_idx, data_dev_idx, data_test_500_seen_idx, data_test_500_unseen_idx, data_defi_c_idx
137
gc.collect()
138
print('-------------------------3')
139
# 添加wordnet synset用于描述为一个词时的同义词推荐
140
index2synset_en = [[] for i in range(len(word2index_en))]
141
for line in open(BASE_DIR + 'word_synsetWords.txt').readlines():
142
    wd = line.split()[0]
143
    synset = line.split()[1:]
144
    for syn in synset:
145
        index2synset_en[word2index_en[wd]].append(word2index_en[syn])
146
model_en = torch.load(MODEL_FILE_en, map_location=lambda storage, loc: storage)
147
model_en.eval()
148
def home(request):
149
    return render(request, 'home.html')
150
    
151
def admin(request):
152
    result = json.load(open('datastatistics.current', 'r'))
153
    [updatetime, pageview, totalqueries, uniquevisitor, effectiveflow, weeknum, weekvalue, month2019v, month2020v, visit2019v, visit2020v, feedbackinfo] = result
154
    
155
    pageview = format(pageview, ',')
156
    totalqueries = format(totalqueries, ',')
157
    uniquevisitor = format(uniquevisitor, ',')
158
    effectiveflow = format(effectiveflow, ',')
159
    def fixvalue2str(value):
160
        i = -1
161
        while(True): # fix the value if 0
162
            if value[i] == 0:
163
                value[i] = -1
164
                i -= 1
165
            else:
166
                break
167
        value = [i+1 for i in value]
168
        value = str(value).replace(', 0', ', ') # replace 0 to null for painting
169
        return value
170
    weekvalue = fixvalue2str(weekvalue)
171
    return render(request, 'admin.html', context=locals())
172

173
def about(request):
174
    return render(request, 'about.html')
175
    
176
def about_en(request):
177
    return render(request, 'about_en.html')
178
    
179
def papers(request):
180
    return render(request, 'papers.html')
181
    
182
def help(request):
183
    return render(request, 'help.html')
184
    
185
def Score2Hexstr(score, maxsc):
186
    thr = maxsc/1.5
187
    l = len(score)
188
    ret = ['00']*l
189
    for i in range(l):
190
        res = int(200*(score[i] - thr)/thr)
191
        if res>15:
192
            ret[i] = hex(res)[2:]
193
        else:
194
            break
195
    return ret
196
    
197
def ChineseRD(request):
198
    description = request.GET['description']
199
    RD_mode = request.GET['mode']
200
    if RD_mode=='EC':
201
        q = description
202
        fromLang = 'en'
203
        toLang = 'zh'
204
        salt = "35555"
205
        sign = appid+q+salt+secretKey
206
        sign = md5(sign)
207
        url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
208
        url = url + '?appid='+appid+'&q='+urllib.parse.quote(q)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
209
        response = requests.request("GET", url)
210
        description = eval(response.text)['trans_result'][0]['dst']
211
    with torch.no_grad():
212
        def_words = [w for w, p in lac.cut(description)]
213
        def_word_idx = []
214
        if len(def_words) > 0:
215
            for def_word in def_words:
216
                if def_word in word2index:
217
                    def_word_idx.append(word2index[def_word])
218
                else:
219
                    for dw in def_word:
220
                        try:
221
                            def_word_idx.append(word2index[dw])
222
                        except:
223
                            def_word_idx.append(word2index['<OOV>'])
224
            x_len = len(def_word_idx)
225
            if set(def_word_idx)=={word2index['<OOV>']}:
226
                x_len = 1
227
            if x_len==1:
228
                if def_word_idx[0]>1:
229
                    score = ((model.embedding.weight.data).mm((model.embedding.weight.data[def_word_idx[0]]).unsqueeze(1))).squeeze(1)
230
                    if RD_mode=='CC':
231
                        score[def_word_idx[0]] = -10.
232
                    score[np.array(index2synset[def_word_idx[0]])] *= 2
233
                    sc, indices = torch.sort(score, descending=True)
234
                    predicted = indices[:NUM_RESPONSE].detach().cpu().numpy()
235
                    score = sc[:NUM_RESPONSE].detach().numpy()
236
                    maxsc = sc[0].detach().item()
237
                    s2h = Score2Hexstr(score, maxsc)
238
                else:
239
                    predicted= []
240
                    ret = {'error': 1} # 字符无法识别
241
            else:
242
                defi = '[CLS] ' + description
243
                def_word_idx = tokenizer_Ch.encode(defi)[:80]
244
                def_word_idx.extend(tokenizer_Ch.encode('[SEP]'))
245
                definition_words_t = torch.tensor(np.array(def_word_idx), dtype=torch.int64, device=device)
246
                definition_words_t = definition_words_t.unsqueeze(0) # batch_size = 1
247
                score = model('test', x=definition_words_t, w=words_t, ws=wd_sems, wP=wd_POSs, wc=wd_charas, wC=wd_C, msk_s=mask_s, msk_c=mask_c, mode=MODE)
248
                sc, indices = torch.sort(score, descending=True)
249
                predicted = indices[0, :NUM_RESPONSE].detach().cpu().numpy()
250
                score = sc[0, :NUM_RESPONSE].detach().numpy()
251
                maxsc = sc[0, 0].detach().item()
252
                s2h = Score2Hexstr(score, maxsc)
253
        else:
254
            predicted= []
255
            ret = {'error': 0} # 输入为空
256
    if len(predicted)>0:
257
        res = index2word[predicted]
258
        ret = [] 
259
        cn = -1
260
        if RD_mode=='CC':
261
            def_words = set(def_words)
262
            for wd in res:
263
                cn += 1
264
                if wd not in def_words:
265
                    try:
266
                        ret.append(wd_data[wd])
267
                        ret[len(ret)-1]['c'] = s2h[cn]
268
                    except:
269
                        continue
270
        else:
271
            for wd in res:
272
                cn += 1
273
                try:
274
                    ret.append(wd_data[wd])
275
                    ret[len(ret)-1]['c'] = s2h[cn]
276
                except:
277
                    continue
278
    return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
279

280
def getClass2Class(r, score): 
281
    perCluster = [[],[],[],[],[],[]]
282
    for i in range(GET_NUM):
283
        perCluster[r[i]].append(score[i])
284
    scorePC = []
285
    for i in range(6):
286
        l = len(perCluster[i]) if len(perCluster[i])<5 else 5
287
        scorePC.append(sum(perCluster[i][:l])/l)
288
    ind = [indsc[0] for indsc in sorted(enumerate(scorePC), key=lambda x:x[1], reverse=True)]
289
    class2class = [0,0,0,0,0,0]
290
    for i in range(6):
291
        class2class[ind[i]] = i
292
    return class2class
293
                    
294
def ChineseRDCluster(request):
295
    description = request.GET['description']
296
    RD_mode = request.GET['mode']
297
    if RD_mode=='EC':
298
        q = description
299
        fromLang = 'en'
300
        toLang = 'zh'
301
        salt = "35555"
302
        sign = appid+q+salt+secretKey
303
        sign = md5(sign)
304
        url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
305
        url = url + '?appid='+appid+'&q='+urllib.parse.quote(q)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
306
        response = requests.request("GET", url)
307
        description = eval(response.text)['trans_result'][0]['dst']
308
    with torch.no_grad():
309
        def_words = [w for w, p in lac.cut(description)]
310
        def_word_idx = []
311
        if len(def_words) > 0:
312
            for def_word in def_words:
313
                if def_word in word2index:
314
                    def_word_idx.append(word2index[def_word])
315
                else:
316
                    for dw in def_word:
317
                        try:
318
                            def_word_idx.append(word2index[dw])
319
                        except:
320
                            def_word_idx.append(word2index['<OOV>'])
321
            x_len = len(def_word_idx)
322
            if set(def_word_idx)=={word2index['<OOV>']}:
323
                x_len = 1
324
            if x_len==1:
325
                if def_word_idx[0]>1:
326
                    score = ((model.embedding.weight.data).mm((model.embedding.weight.data[def_word_idx[0]]).unsqueeze(1))).squeeze(1)
327
                    if RD_mode=='CC':
328
                        score[def_word_idx[0]] = -10.
329
                    score[np.array(index2synset[def_word_idx[0]])] *= 2
330
                    sc, indices = torch.sort(score, descending=True)
331
                    predicted = indices[:GET_NUM].detach().cpu().numpy()
332
                    score = sc[:GET_NUM].detach().numpy()
333
                    maxsc = sc[0].detach().item()
334
                    s2h = Score2Hexstr(score, maxsc)
335
                    r = kmeans.fit_predict(model.embedding.weight.data[predicted[:GET_NUM]].cpu().numpy()) # GET_NUM
336
                    class2class = getClass2Class(r, score[:GET_NUM])
337
                else:
338
                    predicted= []
339
                    ret = {'error': 1} # 字符无法识别
340
            else:
341
                defi = '[CLS] ' + description
342
                def_word_idx = tokenizer_Ch.encode(defi)[:80]
343
                def_word_idx.extend(tokenizer_Ch.encode('[SEP]'))
344
                definition_words_t = torch.tensor(np.array(def_word_idx), dtype=torch.int64, device=device)
345
                definition_words_t = definition_words_t.unsqueeze(0) # batch_size = 1
346
                score = model('test', x=definition_words_t, w=words_t, ws=wd_sems, wP=wd_POSs, wc=wd_charas, wC=wd_C, msk_s=mask_s, msk_c=mask_c, mode=MODE)
347
                sc, indices = torch.sort(score, descending=True)
348
                predicted = indices[0, :GET_NUM].detach().cpu().numpy()
349
                score = sc[0, :GET_NUM].detach().numpy()
350
                maxsc = sc[0, 0].detach().item()
351
                s2h = Score2Hexstr(score, maxsc)
352
                r = kmeans.fit_predict(model.embedding.weight.data[predicted[:GET_NUM]].cpu().numpy()) # GET_NUM
353
                class2class = getClass2Class(r, score[:GET_NUM])
354
        else:
355
            predicted= []
356
            ret = {'error': 0} # 输入为空
357
    if len(predicted)>0:
358
        res = index2word[predicted]
359
        ret = [] 
360
        cn = -1
361
        if RD_mode=='CC':
362
            def_words = set(def_words)
363
            for wd in res:
364
                cn += 1
365
                if wd not in def_words:
366
                    try:
367
                        ret.append(wd_data[wd])
368
                        ret[len(ret)-1]['c'] = s2h[cn]
369
                        ret[len(ret)-1]['C'] = class2class[int(r[cn])] # 必须转为int,否则其实是int64类型,会报不能json序列化的错误
370
                        ret[len(ret)-1]['d'] = wd_defi[wd]
371
                        ret.sort(key=lambda x: x['C'])
372
                    except:
373
                        continue
374
        else:
375
            for wd in res:
376
                cn += 1
377
                try:
378
                    ret.append(wd_data[wd])
379
                    ret[len(ret)-1]['c'] = s2h[cn]
380
                    ret[len(ret)-1]['C'] = class2class[int(r[cn])] # 必须转为int,否则其实是int64类型,会报不能json序列化的错误
381
                    ret[len(ret)-1]['d'] = wd_defi[wd]
382
                    ret.sort(key=lambda x: x['C'])
383
                except:
384
                    continue
385
    return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
386
      
387
def EnglishRDCluster(request):
388
    description = request.GET['description']
389
    RD_mode = request.GET['mode']
390
    if RD_mode=='CE':
391
        filter = re.compile(r"[\u4e00-\u9fa5]+")
392
        desc = ''.join(filter.findall(description))
393
        def_words = [w for w, p in lac.cut(desc)]
394
        q = description
395
        fromLang = 'zh'
396
        toLang = 'en'
397
        salt = "35555"
398
        sign = appid+q+salt+secretKey
399
        sign = md5(sign)
400
        url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
401
        url = url + '?appid='+appid+'&q='+urllib.parse.quote(q)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
402
        response = requests.request("GET", url)
403
        description = eval(response.text)['trans_result'][0]['dst']
404
    with torch.no_grad():
405
        def_words = re.sub('[%s]' % re.escape(string.punctuation), ' ', description)
406
        def_words = def_words.lower()
407
        def_words = def_words.strip().split()
408
        def_word_idx = []
409
        if len(def_words) > 0:
410
            for def_word in def_words:
411
                if def_word in word2index_en:
412
                    def_word_idx.append(word2index_en[def_word])
413
                else:
414
                    def_word_idx.append(word2index_en['<OOV>'])
415
            x_len = len(def_word_idx)
416
            if set(def_word_idx)=={word2index_en['<OOV>']}:
417
                x_len = 1
418
            if x_len==1:
419
                if def_word_idx[0]>1:
420
                    score = ((model_en.embedding.weight.data).mm((model_en.embedding.weight.data[def_word_idx[0]]).unsqueeze(1))).squeeze(1)
421
                    if RD_mode=='EE': 
422
                        score[def_word_idx[0]] = -10.
423
                    score[np.array(index2synset_en[def_word_idx[0]])] *= 2
424
                    sc, indices = torch.sort(score, descending=True)
425
                    predicted = indices[:GET_NUM].detach().cpu().numpy()
426
                    
427
                    score = sc[:GET_NUM].detach().numpy()
428
                    maxsc = sc[0].detach().item()
429
                    s2h = Score2Hexstr(score, maxsc)
430
                    r = kmeans.fit_predict(model.embedding.weight.data[predicted[:GET_NUM]].cpu().numpy()) # GET_NUM
431
                    class2class = getClass2Class(r, score[:GET_NUM])
432
                else:
433
                    predicted= []
434
                    ret = {'error': 1} # 字符无法识别
435
            else:
436
                defi = '[CLS] ' + description
437
                def_word_idx = tokenizer_En.encode(defi)[:60]
438
                def_word_idx.extend(tokenizer_En.encode('[SEP]'))
439
                definition_words_t = torch.tensor(np.array(def_word_idx), dtype=torch.int64, device=device)
440
                definition_words_t = definition_words_t.unsqueeze(0) # batch_size = 1
441
                score = model_en('test', x=definition_words_t, w=words_t, ws=wd_sems_, wl=wd_lex, wr=wd_ra, msk_s=mask_s_, msk_l=mask_l, msk_r=mask_r, mode=MODE_en)
442
                sc, indices = torch.sort(score, descending=True)
443
                predicted = indices[0, :GET_NUM].detach().cpu().numpy()
444
                score = sc[0, :GET_NUM].detach().numpy()
445
                maxsc = sc[0, 0].detach().item()
446
                s2h = Score2Hexstr(score, maxsc)
447
                r = kmeans.fit_predict(model.embedding.weight.data[predicted[:GET_NUM]].cpu().numpy()) # GET_NUM
448
                class2class = getClass2Class(r, score[:GET_NUM])
449
        else:
450
            predicted= []
451
            ret = {'error': 0} # 输入为空
452
    if len(predicted)>0:
453
        res = index2word_en[predicted]
454
        ret = [] 
455
        cn = -1
456
        if RD_mode == "EE":
457
            def_words = set(def_words)
458
            for wd in res:
459
                cn += 1
460
                if len(wd)>1 and (wd not in def_words):
461
                    try:
462
                        ret.append(wd_data_en[wd]) # wd_data_en[wd] = {'word': word, 'definition':defis, 'POS':['n']}]
463
                        ret[len(ret)-1]['c'] = s2h[cn]
464
                        ret[len(ret)-1]['C'] = class2class[int(r[cn])] # 必须转为int,否则其实是int64类型,会报不能json序列化的错误
465
                        ret[len(ret)-1]['d'] = wd_defi_en[wd]
466
                        ret.sort(key=lambda x: x['C'])
467
                    except:
468
                        continue
469
        else:
470
            for wd in res:
471
                cn += 1
472
                if len(wd)>1:
473
                    try:
474
                        ret.append(wd_data_en[wd]) # wd_data_en[wd] = {'word': word, 'definition':defis, 'POS':['n']}]
475
                        ret[len(ret)-1]['c'] = s2h[cn]
476
                        ret[len(ret)-1]['C'] = class2class[int(r[cn])] # 必须转为int,否则其实是int64类型,会报不能json序列化的错误
477
                        ret[len(ret)-1]['d'] = wd_defi_en[wd]
478
                        ret.sort(key=lambda x: x['C'])
479
                    except:
480
                        continue
481
    return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
482
    
483

484
def EnglishRD(request):
485
    description = request.GET['description']
486
    RD_mode = request.GET['mode']
487
    if RD_mode=='CE':
488
        q = description
489
        fromLang = 'zh'
490
        toLang = 'en'
491
        salt = "35555"
492
        sign = appid+q+salt+secretKey
493
        sign = md5(sign)
494
        url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
495
        url = url + '?appid='+appid+'&q='+urllib.parse.quote(q)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
496
        response = requests.request("GET", url)
497
        description = eval(response.text)['trans_result'][0]['dst']
498
        #print(description)
499
    with torch.no_grad():
500
        def_words = re.sub('[%s]' % re.escape(string.punctuation), ' ', description)
501
        def_words = def_words.lower()
502
        def_words = def_words.strip().split()
503
        def_word_idx = []
504
        if len(def_words) > 0:
505
            for def_word in def_words:
506
                if def_word in word2index_en:
507
                    def_word_idx.append(word2index_en[def_word])
508
                else:
509
                    def_word_idx.append(word2index_en['<OOV>'])
510
            x_len = len(def_word_idx)
511
            if set(def_word_idx)=={word2index_en['<OOV>']}:
512
                x_len = 1
513
            if x_len==1:
514
                if def_word_idx[0]>1:
515
                    score = ((model_en.embedding.weight.data).mm((model_en.embedding.weight.data[def_word_idx[0]]).unsqueeze(1))).squeeze(1)
516
                    if RD_mode=='EE': 
517
                        score[def_word_idx[0]] = -10.
518
                    score[np.array(index2synset_en[def_word_idx[0]])] *= 2
519
                    sc, indices = torch.sort(score, descending=True)
520
                    predicted = indices[:NUM_RESPONSE].detach().cpu().numpy()
521
                    
522
                    score = sc[:NUM_RESPONSE].detach().numpy()
523
                    maxsc = sc[0].detach().item()
524
                    s2h = Score2Hexstr(score, maxsc)
525
                else:
526
                    predicted= []
527
                    ret = {'error': 1} # 字符无法识别
528
            else:
529
                defi = '[CLS] ' + description
530
                def_word_idx = tokenizer_En.encode(defi)[:60]
531
                def_word_idx.extend(tokenizer_En.encode('[SEP]'))
532
                definition_words_t = torch.tensor(np.array(def_word_idx), dtype=torch.int64, device=device)
533
                definition_words_t = definition_words_t.unsqueeze(0) # batch_size = 1
534
                score = model_en('test', x=definition_words_t, w=words_t, ws=wd_sems_, wl=wd_lex, wr=wd_ra, msk_s=mask_s_, msk_l=mask_l, msk_r=mask_r, mode=MODE_en)
535
                sc, indices = torch.sort(score, descending=True)
536
                predicted = indices[0, :NUM_RESPONSE].detach().cpu().numpy()
537
                score = sc[0, :NUM_RESPONSE].detach().numpy()
538
                maxsc = sc[0, 0].detach().item()
539
                s2h = Score2Hexstr(score, maxsc)
540
                
541
        else:
542
            predicted= []
543
            ret = {'error': 0} # 输入为空
544
    if len(predicted)>0:
545
        res = index2word_en[predicted]
546
        ret = []
547
        cn = -1
548
        if RD_mode == "EE":
549
            def_words = set(def_words)
550
            for wd in res:
551
                cn += 1
552
                if len(wd)>1 and (wd not in def_words):
553
                    try:
554
                        ret.append(wd_data_en[wd]) # wd_data_en[wd] = {'word': word, 'definition':defis, 'POS':['n']}]
555
                        ret[len(ret)-1]['c'] = s2h[cn]
556
                    except:
557
                        continue
558
        else:
559
            for wd in res:
560
                cn += 1
561
                if len(wd)>1:
562
                    try:
563
                        ret.append(wd_data_en[wd]) # wd_data_en[wd] = {'word': word, 'definition':defis, 'POS':['n']}]
564
                        ret[len(ret)-1]['c'] = s2h[cn]
565
                    except:
566
                        continue
567
    return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
568
    
569
    
570
def feedback(request):
571
    content = request.GET['content']
572
    FBmode = request.GET['mode']
573
    if FBmode=='FBS':
574
        f = open('./feedBackLog/'+datetime.now().date().strftime('%Y%m')+'suggestion.log', 'a')
575
        f.write(datetime.now().strftime('[%Y%m%d%H%M%S] ')+content+'\n')
576
    elif FBmode=='FBW':
577
        f = open('./feedBackLog/'+datetime.now().date().strftime('%Y%m')+'wordsDesc.log', 'a')
578
        f.write(datetime.now().strftime('[%Y%m%d%H%M%S] ')+content+'\n')
579
    f.close()
580
    return HttpResponse("")
581

582
def GetChDefis(request):
583
    if(request.method == 'POST'):
584
        words = request.POST['w'].split()
585
    else: # GET method
586
        words = request.GET['w'].split()
587
    ret = []
588
    for w in words:
589
        ret.append(wd_defi[w])
590
    return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
591
    
592
def GetEnDefis(request):
593
    if(request.method == 'POST'):
594
        words = request.POST['w'].split()
595
    else: # GET method
596
        words = request.GET['w'].split()
597
    ret = []
598
    for w in words:
599
        ret.append(wd_defi_en[w])
600
    return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
601

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.