WantWords
600 строк · 26.7 Кб
1import torch, gc, json, os, thulac, string, re, requests, hashlib, urllib.parse
2import numpy as np
3from django.shortcuts import render, render_to_response
4from django.http import HttpResponse
5from datetime import datetime
6from pytorch_transformers import *
7
8from sklearn.cluster import KMeans
9kmeans = KMeans(n_clusters=6, n_jobs=1, random_state=0, init='k-means++', max_iter=10)
10
11def md5(str):
12m = hashlib.md5()
13m.update(str.encode("utf8"))
14return m.hexdigest()
15appid = '20***************79'
16secretKey = 'D2u0***********Yhz5'
17
18BASE_DIR = './website_RD/'
19device = torch.device('cpu')
20torch.backends.cudnn.benchmark = True
21words_t = torch.tensor(np.array([0]))
22itemsPerCol = 20
23GET_NUM = 100
24NUM_RESPONSE = 500
25words_t = torch.tensor(np.array([0]))
26
27tokenizer_class = BertTokenizer
28tokenizer_Ch = tokenizer_class.from_pretrained('bert-base-chinese')
29tokenizer_En = tokenizer_class.from_pretrained('bert-base-uncased')
30#========================ChineseRD
31MODE = 'Psc'
32lac = thulac.thulac()
33
34def load_data():
35(word2index, index2word, _, _, _, _, _) = np.load(BASE_DIR + 'data_inUse1.npy', allow_pickle=True)
36wd_charas = np.load(BASE_DIR + 'data_inUse2.npy', allow_pickle=True)
37((_, _, _, wd_sems, wd_POSs),(_, mask_s)) = np.load(BASE_DIR + 'data_inUse3.npy', allow_pickle=True)
38mask_s = torch.from_numpy(mask_s).to(device)
39wd_POSs = torch.from_numpy(wd_POSs).float().to(device)
40wd_charas = torch.from_numpy(wd_charas).float().to(device)
41wd_sems = torch.from_numpy(wd_sems).float().to(device)
42wd_C = []
43mask_c = []
44mask_s = mask_s.float()
45return word2index, index2word, (wd_C, wd_sems, wd_POSs, wd_charas), (mask_c, mask_s)
46
47word2index, index2word, wd_features, mask_ = load_data()
48(wd_C, wd_sems, wd_POSs, wd_charas) = wd_features
49(mask_c, mask_s) = mask_
50index2word = np.array(index2word)
51
52# 添加同义词词林用于描述为一个词时的同义词推荐
53index2synset = [[] for i in range(len(word2index))]
54for line in open(BASE_DIR + 'word2synset_synset.txt').readlines():
55wd = line.split()[0]
56synset = line.split()[1:]
57for syn in synset:
58index2synset[word2index[wd]].append(word2index[syn])
59
60MODEL_FILE = BASE_DIR + 'Zh.model'
61model = torch.load(MODEL_FILE, map_location=lambda storage, loc: storage)
62model.eval()
63wd_data_ = json.load(open(BASE_DIR+'wd_def_for_website_zh.json'))
64
65#wd_data = dict()
66wd_data = wd_data_.copy()
67wd_defi = wd_data_.copy()
68for wd in wd_data_:
69#wd_data[wd] = {'w': wd_data_[wd]['word'], 'd': wd_data_[wd]['definition'], 'P': wd_data_[wd]['POS'], 'l': wd_data_[wd]['length'], 'b': wd_data_[wd]['bihuashu'], 'B': wd_data_[wd]['bihuashu1st'], 'p': wd_data_[wd]['pinyin'], 's': wd_data_[wd]['pinyinshouzimu'], 'r': wd_data_[wd]['rhyme']}
70wd_data[wd] = {'w': wd_data_[wd]['word'], 'P': wd_data_[wd]['POS'], 'l': wd_data_[wd]['length'], 'b': wd_data_[wd]['bihuashu'], 'B': wd_data_[wd]['bihuashu1st'], 'p': wd_data_[wd]['pinyin'], 's': wd_data_[wd]['pinyinshouzimu'], 'r': wd_data_[wd]['rhyme']}
71wd_defi[wd] = wd_data_[wd]['definition']
72del wd_data_
73
74#========================EnglishRD
75MODE_en = 'rsl'
76MODEL_FILE_en = BASE_DIR + 'En.model'
77wd_data_en_ = json.load(open(BASE_DIR+'wd_def_for_website_En.json'))
78
79wd_data_en = wd_data_en_.copy()
80wd_defi_en = wd_data_en_.copy()
81for wd in wd_data_en_:
82#wd_data_en[wd] = {'w': wd_data_en_[wd]['word'], 'd': wd_data_en_[wd]['definition'], 'P': wd_data_en_[wd]['POS']}
83wd_data_en[wd] = {'w': wd_data_en_[wd]['word'], 'P': wd_data_en_[wd]['POS']}
84wd_defi_en[wd] = wd_data_en_[wd]['definition']
85del wd_data_en_
86gc.collect()
87
88def label_multihot(labels, num):
89sm = np.zeros((len(labels), num), dtype=np.float32)
90for i in range(len(labels)):
91for s in labels[i]:
92if s >= num:
93break
94sm[i, s] = 1
95return sm
96
97def word2feature(dataset, word_num, feature_num, feature_name):
98max_feature_num = max([len(instance[feature_name]) for instance in dataset])
99ret = np.zeros((word_num, max_feature_num), dtype=np.int64)
100ret.fill(feature_num)
101for instance in dataset:
102if ret[instance['word'], 0] != feature_num:
103continue # this target_words has been given a feature mapping, because same word with different definition in dataset
104feature = instance[feature_name]
105ret[instance['word'], :len(feature)] = np.array(feature)
106return torch.tensor(ret, dtype=torch.int64, device=device)
107
108def mask_noFeature(label_size, wd2fea, feature_num):
109mask_nofea = torch.zeros(label_size, dtype=torch.float32, device=device)
110for i in range(label_size):
111feas = set(wd2fea[i].detach().cpu().numpy().tolist())-set([feature_num])
112if len(feas)==0:
113mask_nofea[i] = 1
114return mask_nofea
115
116(_, (_, label_size, _, _), (word2index_en, index2word_en, index2sememe, index2lexname, index2rootaffix)) = np.load(BASE_DIR + 'data_inUse1_en.npy', allow_pickle=True)
117(data_train_idx, data_dev_idx, data_test_500_seen_idx, data_test_500_unseen_idx, data_defi_c_idx, data_desc_c_idx) = np.load(BASE_DIR + 'data_inUse2_en.npy', allow_pickle=True)
118data_all_idx = data_train_idx + data_dev_idx + data_test_500_seen_idx + data_test_500_unseen_idx + data_defi_c_idx
119index2word_en = np.array(index2word_en)
120sememe_num = len(index2sememe)
121wd2sem = word2feature(data_all_idx, label_size, sememe_num, 'sememes')
122wd_sems_ = label_multihot(wd2sem, sememe_num)
123wd_sems_ = torch.from_numpy(np.array(wd_sems_)).to(device)
124lexname_num = len(index2lexname)
125wd2lex = word2feature(data_all_idx, label_size, lexname_num, 'lexnames')
126wd_lex = label_multihot(wd2lex, lexname_num)
127wd_lex = torch.from_numpy(np.array(wd_lex)).to(device)
128rootaffix_num = len(index2rootaffix)
129wd2ra = word2feature(data_all_idx, label_size, rootaffix_num, 'root_affix')
130wd_ra = label_multihot(wd2ra, rootaffix_num)
131wd_ra = torch.from_numpy(np.array(wd_ra)).to(device)
132mask_s_ = mask_noFeature(label_size, wd2sem, sememe_num)
133mask_l = mask_noFeature(label_size, wd2lex, lexname_num)
134mask_r = mask_noFeature(label_size, wd2ra, rootaffix_num)
135#del data_all_idx, data_train_idx, data_dev_idx, data_test_idx
136del data_all_idx, data_train_idx, data_dev_idx, data_test_500_seen_idx, data_test_500_unseen_idx, data_defi_c_idx
137gc.collect()
138print('-------------------------3')
139# 添加wordnet synset用于描述为一个词时的同义词推荐
140index2synset_en = [[] for i in range(len(word2index_en))]
141for line in open(BASE_DIR + 'word_synsetWords.txt').readlines():
142wd = line.split()[0]
143synset = line.split()[1:]
144for syn in synset:
145index2synset_en[word2index_en[wd]].append(word2index_en[syn])
146model_en = torch.load(MODEL_FILE_en, map_location=lambda storage, loc: storage)
147model_en.eval()
148def home(request):
149return render(request, 'home.html')
150
151def admin(request):
152result = json.load(open('datastatistics.current', 'r'))
153[updatetime, pageview, totalqueries, uniquevisitor, effectiveflow, weeknum, weekvalue, month2019v, month2020v, visit2019v, visit2020v, feedbackinfo] = result
154
155pageview = format(pageview, ',')
156totalqueries = format(totalqueries, ',')
157uniquevisitor = format(uniquevisitor, ',')
158effectiveflow = format(effectiveflow, ',')
159def fixvalue2str(value):
160i = -1
161while(True): # fix the value if 0
162if value[i] == 0:
163value[i] = -1
164i -= 1
165else:
166break
167value = [i+1 for i in value]
168value = str(value).replace(', 0', ', ') # replace 0 to null for painting
169return value
170weekvalue = fixvalue2str(weekvalue)
171return render(request, 'admin.html', context=locals())
172
173def about(request):
174return render(request, 'about.html')
175
176def about_en(request):
177return render(request, 'about_en.html')
178
179def papers(request):
180return render(request, 'papers.html')
181
182def help(request):
183return render(request, 'help.html')
184
185def Score2Hexstr(score, maxsc):
186thr = maxsc/1.5
187l = len(score)
188ret = ['00']*l
189for i in range(l):
190res = int(200*(score[i] - thr)/thr)
191if res>15:
192ret[i] = hex(res)[2:]
193else:
194break
195return ret
196
197def ChineseRD(request):
198description = request.GET['description']
199RD_mode = request.GET['mode']
200if RD_mode=='EC':
201q = description
202fromLang = 'en'
203toLang = 'zh'
204salt = "35555"
205sign = appid+q+salt+secretKey
206sign = md5(sign)
207url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
208url = url + '?appid='+appid+'&q='+urllib.parse.quote(q)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
209response = requests.request("GET", url)
210description = eval(response.text)['trans_result'][0]['dst']
211with torch.no_grad():
212def_words = [w for w, p in lac.cut(description)]
213def_word_idx = []
214if len(def_words) > 0:
215for def_word in def_words:
216if def_word in word2index:
217def_word_idx.append(word2index[def_word])
218else:
219for dw in def_word:
220try:
221def_word_idx.append(word2index[dw])
222except:
223def_word_idx.append(word2index['<OOV>'])
224x_len = len(def_word_idx)
225if set(def_word_idx)=={word2index['<OOV>']}:
226x_len = 1
227if x_len==1:
228if def_word_idx[0]>1:
229score = ((model.embedding.weight.data).mm((model.embedding.weight.data[def_word_idx[0]]).unsqueeze(1))).squeeze(1)
230if RD_mode=='CC':
231score[def_word_idx[0]] = -10.
232score[np.array(index2synset[def_word_idx[0]])] *= 2
233sc, indices = torch.sort(score, descending=True)
234predicted = indices[:NUM_RESPONSE].detach().cpu().numpy()
235score = sc[:NUM_RESPONSE].detach().numpy()
236maxsc = sc[0].detach().item()
237s2h = Score2Hexstr(score, maxsc)
238else:
239predicted= []
240ret = {'error': 1} # 字符无法识别
241else:
242defi = '[CLS] ' + description
243def_word_idx = tokenizer_Ch.encode(defi)[:80]
244def_word_idx.extend(tokenizer_Ch.encode('[SEP]'))
245definition_words_t = torch.tensor(np.array(def_word_idx), dtype=torch.int64, device=device)
246definition_words_t = definition_words_t.unsqueeze(0) # batch_size = 1
247score = model('test', x=definition_words_t, w=words_t, ws=wd_sems, wP=wd_POSs, wc=wd_charas, wC=wd_C, msk_s=mask_s, msk_c=mask_c, mode=MODE)
248sc, indices = torch.sort(score, descending=True)
249predicted = indices[0, :NUM_RESPONSE].detach().cpu().numpy()
250score = sc[0, :NUM_RESPONSE].detach().numpy()
251maxsc = sc[0, 0].detach().item()
252s2h = Score2Hexstr(score, maxsc)
253else:
254predicted= []
255ret = {'error': 0} # 输入为空
256if len(predicted)>0:
257res = index2word[predicted]
258ret = []
259cn = -1
260if RD_mode=='CC':
261def_words = set(def_words)
262for wd in res:
263cn += 1
264if wd not in def_words:
265try:
266ret.append(wd_data[wd])
267ret[len(ret)-1]['c'] = s2h[cn]
268except:
269continue
270else:
271for wd in res:
272cn += 1
273try:
274ret.append(wd_data[wd])
275ret[len(ret)-1]['c'] = s2h[cn]
276except:
277continue
278return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
279
280def getClass2Class(r, score):
281perCluster = [[],[],[],[],[],[]]
282for i in range(GET_NUM):
283perCluster[r[i]].append(score[i])
284scorePC = []
285for i in range(6):
286l = len(perCluster[i]) if len(perCluster[i])<5 else 5
287scorePC.append(sum(perCluster[i][:l])/l)
288ind = [indsc[0] for indsc in sorted(enumerate(scorePC), key=lambda x:x[1], reverse=True)]
289class2class = [0,0,0,0,0,0]
290for i in range(6):
291class2class[ind[i]] = i
292return class2class
293
294def ChineseRDCluster(request):
295description = request.GET['description']
296RD_mode = request.GET['mode']
297if RD_mode=='EC':
298q = description
299fromLang = 'en'
300toLang = 'zh'
301salt = "35555"
302sign = appid+q+salt+secretKey
303sign = md5(sign)
304url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
305url = url + '?appid='+appid+'&q='+urllib.parse.quote(q)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
306response = requests.request("GET", url)
307description = eval(response.text)['trans_result'][0]['dst']
308with torch.no_grad():
309def_words = [w for w, p in lac.cut(description)]
310def_word_idx = []
311if len(def_words) > 0:
312for def_word in def_words:
313if def_word in word2index:
314def_word_idx.append(word2index[def_word])
315else:
316for dw in def_word:
317try:
318def_word_idx.append(word2index[dw])
319except:
320def_word_idx.append(word2index['<OOV>'])
321x_len = len(def_word_idx)
322if set(def_word_idx)=={word2index['<OOV>']}:
323x_len = 1
324if x_len==1:
325if def_word_idx[0]>1:
326score = ((model.embedding.weight.data).mm((model.embedding.weight.data[def_word_idx[0]]).unsqueeze(1))).squeeze(1)
327if RD_mode=='CC':
328score[def_word_idx[0]] = -10.
329score[np.array(index2synset[def_word_idx[0]])] *= 2
330sc, indices = torch.sort(score, descending=True)
331predicted = indices[:GET_NUM].detach().cpu().numpy()
332score = sc[:GET_NUM].detach().numpy()
333maxsc = sc[0].detach().item()
334s2h = Score2Hexstr(score, maxsc)
335r = kmeans.fit_predict(model.embedding.weight.data[predicted[:GET_NUM]].cpu().numpy()) # GET_NUM
336class2class = getClass2Class(r, score[:GET_NUM])
337else:
338predicted= []
339ret = {'error': 1} # 字符无法识别
340else:
341defi = '[CLS] ' + description
342def_word_idx = tokenizer_Ch.encode(defi)[:80]
343def_word_idx.extend(tokenizer_Ch.encode('[SEP]'))
344definition_words_t = torch.tensor(np.array(def_word_idx), dtype=torch.int64, device=device)
345definition_words_t = definition_words_t.unsqueeze(0) # batch_size = 1
346score = model('test', x=definition_words_t, w=words_t, ws=wd_sems, wP=wd_POSs, wc=wd_charas, wC=wd_C, msk_s=mask_s, msk_c=mask_c, mode=MODE)
347sc, indices = torch.sort(score, descending=True)
348predicted = indices[0, :GET_NUM].detach().cpu().numpy()
349score = sc[0, :GET_NUM].detach().numpy()
350maxsc = sc[0, 0].detach().item()
351s2h = Score2Hexstr(score, maxsc)
352r = kmeans.fit_predict(model.embedding.weight.data[predicted[:GET_NUM]].cpu().numpy()) # GET_NUM
353class2class = getClass2Class(r, score[:GET_NUM])
354else:
355predicted= []
356ret = {'error': 0} # 输入为空
357if len(predicted)>0:
358res = index2word[predicted]
359ret = []
360cn = -1
361if RD_mode=='CC':
362def_words = set(def_words)
363for wd in res:
364cn += 1
365if wd not in def_words:
366try:
367ret.append(wd_data[wd])
368ret[len(ret)-1]['c'] = s2h[cn]
369ret[len(ret)-1]['C'] = class2class[int(r[cn])] # 必须转为int,否则其实是int64类型,会报不能json序列化的错误
370ret[len(ret)-1]['d'] = wd_defi[wd]
371ret.sort(key=lambda x: x['C'])
372except:
373continue
374else:
375for wd in res:
376cn += 1
377try:
378ret.append(wd_data[wd])
379ret[len(ret)-1]['c'] = s2h[cn]
380ret[len(ret)-1]['C'] = class2class[int(r[cn])] # 必须转为int,否则其实是int64类型,会报不能json序列化的错误
381ret[len(ret)-1]['d'] = wd_defi[wd]
382ret.sort(key=lambda x: x['C'])
383except:
384continue
385return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
386
387def EnglishRDCluster(request):
388description = request.GET['description']
389RD_mode = request.GET['mode']
390if RD_mode=='CE':
391filter = re.compile(r"[\u4e00-\u9fa5]+")
392desc = ''.join(filter.findall(description))
393def_words = [w for w, p in lac.cut(desc)]
394q = description
395fromLang = 'zh'
396toLang = 'en'
397salt = "35555"
398sign = appid+q+salt+secretKey
399sign = md5(sign)
400url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
401url = url + '?appid='+appid+'&q='+urllib.parse.quote(q)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
402response = requests.request("GET", url)
403description = eval(response.text)['trans_result'][0]['dst']
404with torch.no_grad():
405def_words = re.sub('[%s]' % re.escape(string.punctuation), ' ', description)
406def_words = def_words.lower()
407def_words = def_words.strip().split()
408def_word_idx = []
409if len(def_words) > 0:
410for def_word in def_words:
411if def_word in word2index_en:
412def_word_idx.append(word2index_en[def_word])
413else:
414def_word_idx.append(word2index_en['<OOV>'])
415x_len = len(def_word_idx)
416if set(def_word_idx)=={word2index_en['<OOV>']}:
417x_len = 1
418if x_len==1:
419if def_word_idx[0]>1:
420score = ((model_en.embedding.weight.data).mm((model_en.embedding.weight.data[def_word_idx[0]]).unsqueeze(1))).squeeze(1)
421if RD_mode=='EE':
422score[def_word_idx[0]] = -10.
423score[np.array(index2synset_en[def_word_idx[0]])] *= 2
424sc, indices = torch.sort(score, descending=True)
425predicted = indices[:GET_NUM].detach().cpu().numpy()
426
427score = sc[:GET_NUM].detach().numpy()
428maxsc = sc[0].detach().item()
429s2h = Score2Hexstr(score, maxsc)
430r = kmeans.fit_predict(model.embedding.weight.data[predicted[:GET_NUM]].cpu().numpy()) # GET_NUM
431class2class = getClass2Class(r, score[:GET_NUM])
432else:
433predicted= []
434ret = {'error': 1} # 字符无法识别
435else:
436defi = '[CLS] ' + description
437def_word_idx = tokenizer_En.encode(defi)[:60]
438def_word_idx.extend(tokenizer_En.encode('[SEP]'))
439definition_words_t = torch.tensor(np.array(def_word_idx), dtype=torch.int64, device=device)
440definition_words_t = definition_words_t.unsqueeze(0) # batch_size = 1
441score = model_en('test', x=definition_words_t, w=words_t, ws=wd_sems_, wl=wd_lex, wr=wd_ra, msk_s=mask_s_, msk_l=mask_l, msk_r=mask_r, mode=MODE_en)
442sc, indices = torch.sort(score, descending=True)
443predicted = indices[0, :GET_NUM].detach().cpu().numpy()
444score = sc[0, :GET_NUM].detach().numpy()
445maxsc = sc[0, 0].detach().item()
446s2h = Score2Hexstr(score, maxsc)
447r = kmeans.fit_predict(model.embedding.weight.data[predicted[:GET_NUM]].cpu().numpy()) # GET_NUM
448class2class = getClass2Class(r, score[:GET_NUM])
449else:
450predicted= []
451ret = {'error': 0} # 输入为空
452if len(predicted)>0:
453res = index2word_en[predicted]
454ret = []
455cn = -1
456if RD_mode == "EE":
457def_words = set(def_words)
458for wd in res:
459cn += 1
460if len(wd)>1 and (wd not in def_words):
461try:
462ret.append(wd_data_en[wd]) # wd_data_en[wd] = {'word': word, 'definition':defis, 'POS':['n']}]
463ret[len(ret)-1]['c'] = s2h[cn]
464ret[len(ret)-1]['C'] = class2class[int(r[cn])] # 必须转为int,否则其实是int64类型,会报不能json序列化的错误
465ret[len(ret)-1]['d'] = wd_defi_en[wd]
466ret.sort(key=lambda x: x['C'])
467except:
468continue
469else:
470for wd in res:
471cn += 1
472if len(wd)>1:
473try:
474ret.append(wd_data_en[wd]) # wd_data_en[wd] = {'word': word, 'definition':defis, 'POS':['n']}]
475ret[len(ret)-1]['c'] = s2h[cn]
476ret[len(ret)-1]['C'] = class2class[int(r[cn])] # 必须转为int,否则其实是int64类型,会报不能json序列化的错误
477ret[len(ret)-1]['d'] = wd_defi_en[wd]
478ret.sort(key=lambda x: x['C'])
479except:
480continue
481return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
482
483
484def EnglishRD(request):
485description = request.GET['description']
486RD_mode = request.GET['mode']
487if RD_mode=='CE':
488q = description
489fromLang = 'zh'
490toLang = 'en'
491salt = "35555"
492sign = appid+q+salt+secretKey
493sign = md5(sign)
494url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
495url = url + '?appid='+appid+'&q='+urllib.parse.quote(q)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
496response = requests.request("GET", url)
497description = eval(response.text)['trans_result'][0]['dst']
498#print(description)
499with torch.no_grad():
500def_words = re.sub('[%s]' % re.escape(string.punctuation), ' ', description)
501def_words = def_words.lower()
502def_words = def_words.strip().split()
503def_word_idx = []
504if len(def_words) > 0:
505for def_word in def_words:
506if def_word in word2index_en:
507def_word_idx.append(word2index_en[def_word])
508else:
509def_word_idx.append(word2index_en['<OOV>'])
510x_len = len(def_word_idx)
511if set(def_word_idx)=={word2index_en['<OOV>']}:
512x_len = 1
513if x_len==1:
514if def_word_idx[0]>1:
515score = ((model_en.embedding.weight.data).mm((model_en.embedding.weight.data[def_word_idx[0]]).unsqueeze(1))).squeeze(1)
516if RD_mode=='EE':
517score[def_word_idx[0]] = -10.
518score[np.array(index2synset_en[def_word_idx[0]])] *= 2
519sc, indices = torch.sort(score, descending=True)
520predicted = indices[:NUM_RESPONSE].detach().cpu().numpy()
521
522score = sc[:NUM_RESPONSE].detach().numpy()
523maxsc = sc[0].detach().item()
524s2h = Score2Hexstr(score, maxsc)
525else:
526predicted= []
527ret = {'error': 1} # 字符无法识别
528else:
529defi = '[CLS] ' + description
530def_word_idx = tokenizer_En.encode(defi)[:60]
531def_word_idx.extend(tokenizer_En.encode('[SEP]'))
532definition_words_t = torch.tensor(np.array(def_word_idx), dtype=torch.int64, device=device)
533definition_words_t = definition_words_t.unsqueeze(0) # batch_size = 1
534score = model_en('test', x=definition_words_t, w=words_t, ws=wd_sems_, wl=wd_lex, wr=wd_ra, msk_s=mask_s_, msk_l=mask_l, msk_r=mask_r, mode=MODE_en)
535sc, indices = torch.sort(score, descending=True)
536predicted = indices[0, :NUM_RESPONSE].detach().cpu().numpy()
537score = sc[0, :NUM_RESPONSE].detach().numpy()
538maxsc = sc[0, 0].detach().item()
539s2h = Score2Hexstr(score, maxsc)
540
541else:
542predicted= []
543ret = {'error': 0} # 输入为空
544if len(predicted)>0:
545res = index2word_en[predicted]
546ret = []
547cn = -1
548if RD_mode == "EE":
549def_words = set(def_words)
550for wd in res:
551cn += 1
552if len(wd)>1 and (wd not in def_words):
553try:
554ret.append(wd_data_en[wd]) # wd_data_en[wd] = {'word': word, 'definition':defis, 'POS':['n']}]
555ret[len(ret)-1]['c'] = s2h[cn]
556except:
557continue
558else:
559for wd in res:
560cn += 1
561if len(wd)>1:
562try:
563ret.append(wd_data_en[wd]) # wd_data_en[wd] = {'word': word, 'definition':defis, 'POS':['n']}]
564ret[len(ret)-1]['c'] = s2h[cn]
565except:
566continue
567return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
568
569
570def feedback(request):
571content = request.GET['content']
572FBmode = request.GET['mode']
573if FBmode=='FBS':
574f = open('./feedBackLog/'+datetime.now().date().strftime('%Y%m')+'suggestion.log', 'a')
575f.write(datetime.now().strftime('[%Y%m%d%H%M%S] ')+content+'\n')
576elif FBmode=='FBW':
577f = open('./feedBackLog/'+datetime.now().date().strftime('%Y%m')+'wordsDesc.log', 'a')
578f.write(datetime.now().strftime('[%Y%m%d%H%M%S] ')+content+'\n')
579f.close()
580return HttpResponse("")
581
582def GetChDefis(request):
583if(request.method == 'POST'):
584words = request.POST['w'].split()
585else: # GET method
586words = request.GET['w'].split()
587ret = []
588for w in words:
589ret.append(wd_defi[w])
590return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
591
592def GetEnDefis(request):
593if(request.method == 'POST'):
594words = request.POST['w'].split()
595else: # GET method
596words = request.GET['w'].split()
597ret = []
598for w in words:
599ret.append(wd_defi_en[w])
600return HttpResponse(json.dumps(ret,ensure_ascii=False),content_type="application/json,charset=utf-8")
601