TheAlgorithms-Python
506 строк · 15.6 Кб
1import matplotlib.gridspec as gridspec
2import matplotlib.pyplot as plt
3import numpy as np
4from sklearn.utils import shuffle
5import input_data
6
7random_numer = 42
8
9np.random.seed(random_numer)
10
11
12def ReLu(x):
13mask = (x > 0) * 1.0
14return mask * x
15
16
17def d_ReLu(x):
18mask = (x > 0) * 1.0
19return mask
20
21
22def arctan(x):
23return np.arctan(x)
24
25
26def d_arctan(x):
27return 1 / (1 + x ** 2)
28
29
30def log(x):
31return 1 / (1 + np.exp(-1 * x))
32
33
34def d_log(x):
35return log(x) * (1 - log(x))
36
37
38def tanh(x):
39return np.tanh(x)
40
41
42def d_tanh(x):
43return 1 - np.tanh(x) ** 2
44
45
46def plot(samples):
47fig = plt.figure(figsize=(4, 4))
48gs = gridspec.GridSpec(4, 4)
49gs.update(wspace=0.05, hspace=0.05)
50
51for i, sample in enumerate(samples):
52ax = plt.subplot(gs[i])
53plt.axis("off")
54ax.set_xticklabels([])
55ax.set_yticklabels([])
56ax.set_aspect("equal")
57plt.imshow(sample.reshape(28, 28), cmap="Greys_r")
58
59return fig
60
61
62if __name__ == "__main__":
63# 1. Load Data and declare hyper
64print("--------- Load Data ----------")
65mnist = input_data.read_data_sets("MNIST_data", one_hot=False)
66temp = mnist.test
67images, labels = temp.images, temp.labels
68images, labels = shuffle(np.asarray(images), np.asarray(labels))
69num_epoch = 10
70learing_rate = 0.00009
71G_input = 100
72hidden_input, hidden_input2, hidden_input3 = 128, 256, 346
73hidden_input4, hidden_input5, hidden_input6 = 480, 560, 686
74
75print("--------- Declare Hyper Parameters ----------")
76# 2. Declare Weights
77D_W1 = (
78np.random.normal(size=(784, hidden_input), scale=(1.0 / np.sqrt(784 / 2.0)))
79* 0.002
80)
81# D_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.))) *0.002
82D_b1 = np.zeros(hidden_input)
83
84D_W2 = (
85np.random.normal(
86size=(hidden_input, 1), scale=(1.0 / np.sqrt(hidden_input / 2.0))
87)
88* 0.002
89)
90# D_b2 = np.random.normal(size=(1),scale=(1. / np.sqrt(1 / 2.))) *0.002
91D_b2 = np.zeros(1)
92
93G_W1 = (
94np.random.normal(
95size=(G_input, hidden_input), scale=(1.0 / np.sqrt(G_input / 2.0))
96)
97* 0.002
98)
99# G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.))) *0.002
100G_b1 = np.zeros(hidden_input)
101
102G_W2 = (
103np.random.normal(
104size=(hidden_input, hidden_input2),
105scale=(1.0 / np.sqrt(hidden_input / 2.0)),
106)
107* 0.002
108)
109# G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.))) *0.002
110G_b2 = np.zeros(hidden_input2)
111
112G_W3 = (
113np.random.normal(
114size=(hidden_input2, hidden_input3),
115scale=(1.0 / np.sqrt(hidden_input2 / 2.0)),
116)
117* 0.002
118)
119# G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.))) *0.002
120G_b3 = np.zeros(hidden_input3)
121
122G_W4 = (
123np.random.normal(
124size=(hidden_input3, hidden_input4),
125scale=(1.0 / np.sqrt(hidden_input3 / 2.0)),
126)
127* 0.002
128)
129# G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.))) *0.002
130G_b4 = np.zeros(hidden_input4)
131
132G_W5 = (
133np.random.normal(
134size=(hidden_input4, hidden_input5),
135scale=(1.0 / np.sqrt(hidden_input4 / 2.0)),
136)
137* 0.002
138)
139# G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.))) *0.002
140G_b5 = np.zeros(hidden_input5)
141
142G_W6 = (
143np.random.normal(
144size=(hidden_input5, hidden_input6),
145scale=(1.0 / np.sqrt(hidden_input5 / 2.0)),
146)
147* 0.002
148)
149# G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.))) *0.002
150G_b6 = np.zeros(hidden_input6)
151
152G_W7 = (
153np.random.normal(
154size=(hidden_input6, 784), scale=(1.0 / np.sqrt(hidden_input6 / 2.0))
155)
156* 0.002
157)
158# G_b2 = np.random.normal(size=(784),scale=(1. / np.sqrt(784 / 2.))) *0.002
159G_b7 = np.zeros(784)
160
161# 3. For Adam Optimizer
162v1, m1 = 0, 0
163v2, m2 = 0, 0
164v3, m3 = 0, 0
165v4, m4 = 0, 0
166
167v5, m5 = 0, 0
168v6, m6 = 0, 0
169v7, m7 = 0, 0
170v8, m8 = 0, 0
171v9, m9 = 0, 0
172v10, m10 = 0, 0
173v11, m11 = 0, 0
174v12, m12 = 0, 0
175
176v13, m13 = 0, 0
177v14, m14 = 0, 0
178
179v15, m15 = 0, 0
180v16, m16 = 0, 0
181
182v17, m17 = 0, 0
183v18, m18 = 0, 0
184
185beta_1, beta_2, eps = 0.9, 0.999, 0.00000001
186
187print("--------- Started Training ----------")
188for iter in range(num_epoch):
189
190random_int = np.random.randint(len(images) - 5)
191current_image = np.expand_dims(images[random_int], axis=0)
192
193# Func: Generate The first Fake Data
194Z = np.random.uniform(-1.0, 1.0, size=[1, G_input])
195Gl1 = Z.dot(G_W1) + G_b1
196Gl1A = arctan(Gl1)
197Gl2 = Gl1A.dot(G_W2) + G_b2
198Gl2A = ReLu(Gl2)
199Gl3 = Gl2A.dot(G_W3) + G_b3
200Gl3A = arctan(Gl3)
201
202Gl4 = Gl3A.dot(G_W4) + G_b4
203Gl4A = ReLu(Gl4)
204Gl5 = Gl4A.dot(G_W5) + G_b5
205Gl5A = tanh(Gl5)
206Gl6 = Gl5A.dot(G_W6) + G_b6
207Gl6A = ReLu(Gl6)
208Gl7 = Gl6A.dot(G_W7) + G_b7
209
210current_fake_data = log(Gl7)
211
212# Func: Forward Feed for Real data
213Dl1_r = current_image.dot(D_W1) + D_b1
214Dl1_rA = ReLu(Dl1_r)
215Dl2_r = Dl1_rA.dot(D_W2) + D_b2
216Dl2_rA = log(Dl2_r)
217
218# Func: Forward Feed for Fake Data
219Dl1_f = current_fake_data.dot(D_W1) + D_b1
220Dl1_fA = ReLu(Dl1_f)
221Dl2_f = Dl1_fA.dot(D_W2) + D_b2
222Dl2_fA = log(Dl2_f)
223
224# Func: Cost D
225D_cost = -np.log(Dl2_rA) + np.log(1.0 - Dl2_fA)
226
227# Func: Gradient
228grad_f_w2_part_1 = 1 / (1.0 - Dl2_fA)
229grad_f_w2_part_2 = d_log(Dl2_f)
230grad_f_w2_part_3 = Dl1_fA
231grad_f_w2 = grad_f_w2_part_3.T.dot(grad_f_w2_part_1 * grad_f_w2_part_2)
232grad_f_b2 = grad_f_w2_part_1 * grad_f_w2_part_2
233
234grad_f_w1_part_1 = (grad_f_w2_part_1 * grad_f_w2_part_2).dot(D_W2.T)
235grad_f_w1_part_2 = d_ReLu(Dl1_f)
236grad_f_w1_part_3 = current_fake_data
237grad_f_w1 = grad_f_w1_part_3.T.dot(grad_f_w1_part_1 * grad_f_w1_part_2)
238grad_f_b1 = grad_f_w1_part_1 * grad_f_w1_part_2
239
240grad_r_w2_part_1 = -1 / Dl2_rA
241grad_r_w2_part_2 = d_log(Dl2_r)
242grad_r_w2_part_3 = Dl1_rA
243grad_r_w2 = grad_r_w2_part_3.T.dot(grad_r_w2_part_1 * grad_r_w2_part_2)
244grad_r_b2 = grad_r_w2_part_1 * grad_r_w2_part_2
245
246grad_r_w1_part_1 = (grad_r_w2_part_1 * grad_r_w2_part_2).dot(D_W2.T)
247grad_r_w1_part_2 = d_ReLu(Dl1_r)
248grad_r_w1_part_3 = current_image
249grad_r_w1 = grad_r_w1_part_3.T.dot(grad_r_w1_part_1 * grad_r_w1_part_2)
250grad_r_b1 = grad_r_w1_part_1 * grad_r_w1_part_2
251
252grad_w1 = grad_f_w1 + grad_r_w1
253grad_b1 = grad_f_b1 + grad_r_b1
254
255grad_w2 = grad_f_w2 + grad_r_w2
256grad_b2 = grad_f_b2 + grad_r_b2
257
258# ---- Update Gradient ----
259m1 = beta_1 * m1 + (1 - beta_1) * grad_w1
260v1 = beta_2 * v1 + (1 - beta_2) * grad_w1 ** 2
261
262m2 = beta_1 * m2 + (1 - beta_1) * grad_b1
263v2 = beta_2 * v2 + (1 - beta_2) * grad_b1 ** 2
264
265m3 = beta_1 * m3 + (1 - beta_1) * grad_w2
266v3 = beta_2 * v3 + (1 - beta_2) * grad_w2 ** 2
267
268m4 = beta_1 * m4 + (1 - beta_1) * grad_b2
269v4 = beta_2 * v4 + (1 - beta_2) * grad_b2 ** 2
270
271D_W1 = D_W1 - (learing_rate / (np.sqrt(v1 / (1 - beta_2)) + eps)) * (
272m1 / (1 - beta_1)
273)
274D_b1 = D_b1 - (learing_rate / (np.sqrt(v2 / (1 - beta_2)) + eps)) * (
275m2 / (1 - beta_1)
276)
277
278D_W2 = D_W2 - (learing_rate / (np.sqrt(v3 / (1 - beta_2)) + eps)) * (
279m3 / (1 - beta_1)
280)
281D_b2 = D_b2 - (learing_rate / (np.sqrt(v4 / (1 - beta_2)) + eps)) * (
282m4 / (1 - beta_1)
283)
284
285# Func: Forward Feed for G
286Z = np.random.uniform(-1.0, 1.0, size=[1, G_input])
287Gl1 = Z.dot(G_W1) + G_b1
288Gl1A = arctan(Gl1)
289Gl2 = Gl1A.dot(G_W2) + G_b2
290Gl2A = ReLu(Gl2)
291Gl3 = Gl2A.dot(G_W3) + G_b3
292Gl3A = arctan(Gl3)
293
294Gl4 = Gl3A.dot(G_W4) + G_b4
295Gl4A = ReLu(Gl4)
296Gl5 = Gl4A.dot(G_W5) + G_b5
297Gl5A = tanh(Gl5)
298Gl6 = Gl5A.dot(G_W6) + G_b6
299Gl6A = ReLu(Gl6)
300Gl7 = Gl6A.dot(G_W7) + G_b7
301
302current_fake_data = log(Gl7)
303
304Dl1 = current_fake_data.dot(D_W1) + D_b1
305Dl1_A = ReLu(Dl1)
306Dl2 = Dl1_A.dot(D_W2) + D_b2
307Dl2_A = log(Dl2)
308
309# Func: Cost G
310G_cost = -np.log(Dl2_A)
311
312# Func: Gradient
313grad_G_w7_part_1 = ((-1 / Dl2_A) * d_log(Dl2).dot(D_W2.T) * (d_ReLu(Dl1))).dot(
314D_W1.T
315)
316grad_G_w7_part_2 = d_log(Gl7)
317grad_G_w7_part_3 = Gl6A
318grad_G_w7 = grad_G_w7_part_3.T.dot(grad_G_w7_part_1 * grad_G_w7_part_1)
319grad_G_b7 = grad_G_w7_part_1 * grad_G_w7_part_2
320
321grad_G_w6_part_1 = (grad_G_w7_part_1 * grad_G_w7_part_2).dot(G_W7.T)
322grad_G_w6_part_2 = d_ReLu(Gl6)
323grad_G_w6_part_3 = Gl5A
324grad_G_w6 = grad_G_w6_part_3.T.dot(grad_G_w6_part_1 * grad_G_w6_part_2)
325grad_G_b6 = grad_G_w6_part_1 * grad_G_w6_part_2
326
327grad_G_w5_part_1 = (grad_G_w6_part_1 * grad_G_w6_part_2).dot(G_W6.T)
328grad_G_w5_part_2 = d_tanh(Gl5)
329grad_G_w5_part_3 = Gl4A
330grad_G_w5 = grad_G_w5_part_3.T.dot(grad_G_w5_part_1 * grad_G_w5_part_2)
331grad_G_b5 = grad_G_w5_part_1 * grad_G_w5_part_2
332
333grad_G_w4_part_1 = (grad_G_w5_part_1 * grad_G_w5_part_2).dot(G_W5.T)
334grad_G_w4_part_2 = d_ReLu(Gl4)
335grad_G_w4_part_3 = Gl3A
336grad_G_w4 = grad_G_w4_part_3.T.dot(grad_G_w4_part_1 * grad_G_w4_part_2)
337grad_G_b4 = grad_G_w4_part_1 * grad_G_w4_part_2
338
339grad_G_w3_part_1 = (grad_G_w4_part_1 * grad_G_w4_part_2).dot(G_W4.T)
340grad_G_w3_part_2 = d_arctan(Gl3)
341grad_G_w3_part_3 = Gl2A
342grad_G_w3 = grad_G_w3_part_3.T.dot(grad_G_w3_part_1 * grad_G_w3_part_2)
343grad_G_b3 = grad_G_w3_part_1 * grad_G_w3_part_2
344
345grad_G_w2_part_1 = (grad_G_w3_part_1 * grad_G_w3_part_2).dot(G_W3.T)
346grad_G_w2_part_2 = d_ReLu(Gl2)
347grad_G_w2_part_3 = Gl1A
348grad_G_w2 = grad_G_w2_part_3.T.dot(grad_G_w2_part_1 * grad_G_w2_part_2)
349grad_G_b2 = grad_G_w2_part_1 * grad_G_w2_part_2
350
351grad_G_w1_part_1 = (grad_G_w2_part_1 * grad_G_w2_part_2).dot(G_W2.T)
352grad_G_w1_part_2 = d_arctan(Gl1)
353grad_G_w1_part_3 = Z
354grad_G_w1 = grad_G_w1_part_3.T.dot(grad_G_w1_part_1 * grad_G_w1_part_2)
355grad_G_b1 = grad_G_w1_part_1 * grad_G_w1_part_2
356
357# ---- Update Gradient ----
358m5 = beta_1 * m5 + (1 - beta_1) * grad_G_w1
359v5 = beta_2 * v5 + (1 - beta_2) * grad_G_w1 ** 2
360
361m6 = beta_1 * m6 + (1 - beta_1) * grad_G_b1
362v6 = beta_2 * v6 + (1 - beta_2) * grad_G_b1 ** 2
363
364m7 = beta_1 * m7 + (1 - beta_1) * grad_G_w2
365v7 = beta_2 * v7 + (1 - beta_2) * grad_G_w2 ** 2
366
367m8 = beta_1 * m8 + (1 - beta_1) * grad_G_b2
368v8 = beta_2 * v8 + (1 - beta_2) * grad_G_b2 ** 2
369
370m9 = beta_1 * m9 + (1 - beta_1) * grad_G_w3
371v9 = beta_2 * v9 + (1 - beta_2) * grad_G_w3 ** 2
372
373m10 = beta_1 * m10 + (1 - beta_1) * grad_G_b3
374v10 = beta_2 * v10 + (1 - beta_2) * grad_G_b3 ** 2
375
376m11 = beta_1 * m11 + (1 - beta_1) * grad_G_w4
377v11 = beta_2 * v11 + (1 - beta_2) * grad_G_w4 ** 2
378
379m12 = beta_1 * m12 + (1 - beta_1) * grad_G_b4
380v12 = beta_2 * v12 + (1 - beta_2) * grad_G_b4 ** 2
381
382m13 = beta_1 * m13 + (1 - beta_1) * grad_G_w5
383v13 = beta_2 * v13 + (1 - beta_2) * grad_G_w5 ** 2
384
385m14 = beta_1 * m14 + (1 - beta_1) * grad_G_b5
386v14 = beta_2 * v14 + (1 - beta_2) * grad_G_b5 ** 2
387
388m15 = beta_1 * m15 + (1 - beta_1) * grad_G_w6
389v15 = beta_2 * v15 + (1 - beta_2) * grad_G_w6 ** 2
390
391m16 = beta_1 * m16 + (1 - beta_1) * grad_G_b6
392v16 = beta_2 * v16 + (1 - beta_2) * grad_G_b6 ** 2
393
394m17 = beta_1 * m17 + (1 - beta_1) * grad_G_w7
395v17 = beta_2 * v17 + (1 - beta_2) * grad_G_w7 ** 2
396
397m18 = beta_1 * m18 + (1 - beta_1) * grad_G_b7
398v18 = beta_2 * v18 + (1 - beta_2) * grad_G_b7 ** 2
399
400G_W1 = G_W1 - (learing_rate / (np.sqrt(v5 / (1 - beta_2)) + eps)) * (
401m5 / (1 - beta_1)
402)
403G_b1 = G_b1 - (learing_rate / (np.sqrt(v6 / (1 - beta_2)) + eps)) * (
404m6 / (1 - beta_1)
405)
406
407G_W2 = G_W2 - (learing_rate / (np.sqrt(v7 / (1 - beta_2)) + eps)) * (
408m7 / (1 - beta_1)
409)
410G_b2 = G_b2 - (learing_rate / (np.sqrt(v8 / (1 - beta_2)) + eps)) * (
411m8 / (1 - beta_1)
412)
413
414G_W3 = G_W3 - (learing_rate / (np.sqrt(v9 / (1 - beta_2)) + eps)) * (
415m9 / (1 - beta_1)
416)
417G_b3 = G_b3 - (learing_rate / (np.sqrt(v10 / (1 - beta_2)) + eps)) * (
418m10 / (1 - beta_1)
419)
420
421G_W4 = G_W4 - (learing_rate / (np.sqrt(v11 / (1 - beta_2)) + eps)) * (
422m11 / (1 - beta_1)
423)
424G_b4 = G_b4 - (learing_rate / (np.sqrt(v12 / (1 - beta_2)) + eps)) * (
425m12 / (1 - beta_1)
426)
427
428G_W5 = G_W5 - (learing_rate / (np.sqrt(v13 / (1 - beta_2)) + eps)) * (
429m13 / (1 - beta_1)
430)
431G_b5 = G_b5 - (learing_rate / (np.sqrt(v14 / (1 - beta_2)) + eps)) * (
432m14 / (1 - beta_1)
433)
434
435G_W6 = G_W6 - (learing_rate / (np.sqrt(v15 / (1 - beta_2)) + eps)) * (
436m15 / (1 - beta_1)
437)
438G_b6 = G_b6 - (learing_rate / (np.sqrt(v16 / (1 - beta_2)) + eps)) * (
439m16 / (1 - beta_1)
440)
441
442G_W7 = G_W7 - (learing_rate / (np.sqrt(v17 / (1 - beta_2)) + eps)) * (
443m17 / (1 - beta_1)
444)
445G_b7 = G_b7 - (learing_rate / (np.sqrt(v18 / (1 - beta_2)) + eps)) * (
446m18 / (1 - beta_1)
447)
448
449# --- Print Error ----
450# print("Current Iter: ",iter, " Current D cost:",D_cost, " Current G cost: ", G_cost,end='\r')
451
452if iter == 0:
453learing_rate = learing_rate * 0.01
454if iter == 40:
455learing_rate = learing_rate * 0.01
456
457# ---- Print to Out put ----
458if iter % 10 == 0:
459
460print(
461"Current Iter: ",
462iter,
463" Current D cost:",
464D_cost,
465" Current G cost: ",
466G_cost,
467end="\r",
468)
469print("--------- Show Example Result See Tab Above ----------")
470print("--------- Wait for the image to load ---------")
471Z = np.random.uniform(-1.0, 1.0, size=[16, G_input])
472
473Gl1 = Z.dot(G_W1) + G_b1
474Gl1A = arctan(Gl1)
475Gl2 = Gl1A.dot(G_W2) + G_b2
476Gl2A = ReLu(Gl2)
477Gl3 = Gl2A.dot(G_W3) + G_b3
478Gl3A = arctan(Gl3)
479
480Gl4 = Gl3A.dot(G_W4) + G_b4
481Gl4A = ReLu(Gl4)
482Gl5 = Gl4A.dot(G_W5) + G_b5
483Gl5A = tanh(Gl5)
484Gl6 = Gl5A.dot(G_W6) + G_b6
485Gl6A = ReLu(Gl6)
486Gl7 = Gl6A.dot(G_W7) + G_b7
487
488current_fake_data = log(Gl7)
489
490fig = plot(current_fake_data)
491fig.savefig(
492"Click_Me_{}.png".format(
493str(iter).zfill(3)
494+ "_Ginput_"
495+ str(G_input)
496+ "_hiddenone"
497+ str(hidden_input)
498+ "_hiddentwo"
499+ str(hidden_input2)
500+ "_LR_"
501+ str(learing_rate)
502),
503bbox_inches="tight",
504)
505# for complete explanation visit https://towardsdatascience.com/only-numpy-implementing-gan-general-adversarial-networks-and-adam-optimizer-using-numpy-with-2a7e4e032021
506# -- end code --
507