TheAlgorithms-Python

Форк
0
506 строк · 15.6 Кб
1
import matplotlib.gridspec as gridspec
2
import matplotlib.pyplot as plt
3
import numpy as np
4
from sklearn.utils import shuffle
5
import input_data
6

7
random_numer = 42
8

9
np.random.seed(random_numer)
10

11

12
def ReLu(x):
13
    mask = (x > 0) * 1.0
14
    return mask * x
15

16

17
def d_ReLu(x):
18
    mask = (x > 0) * 1.0
19
    return mask
20

21

22
def arctan(x):
23
    return np.arctan(x)
24

25

26
def d_arctan(x):
27
    return 1 / (1 + x ** 2)
28

29

30
def log(x):
31
    return 1 / (1 + np.exp(-1 * x))
32

33

34
def d_log(x):
35
    return log(x) * (1 - log(x))
36

37

38
def tanh(x):
39
    return np.tanh(x)
40

41

42
def d_tanh(x):
43
    return 1 - np.tanh(x) ** 2
44

45

46
def plot(samples):
47
    fig = plt.figure(figsize=(4, 4))
48
    gs = gridspec.GridSpec(4, 4)
49
    gs.update(wspace=0.05, hspace=0.05)
50

51
    for i, sample in enumerate(samples):
52
        ax = plt.subplot(gs[i])
53
        plt.axis("off")
54
        ax.set_xticklabels([])
55
        ax.set_yticklabels([])
56
        ax.set_aspect("equal")
57
        plt.imshow(sample.reshape(28, 28), cmap="Greys_r")
58

59
    return fig
60

61

62
if __name__ == "__main__":
63
    # 1. Load Data and declare hyper
64
    print("--------- Load Data ----------")
65
    mnist = input_data.read_data_sets("MNIST_data", one_hot=False)
66
    temp = mnist.test
67
    images, labels = temp.images, temp.labels
68
    images, labels = shuffle(np.asarray(images), np.asarray(labels))
69
    num_epoch = 10
70
    learing_rate = 0.00009
71
    G_input = 100
72
    hidden_input, hidden_input2, hidden_input3 = 128, 256, 346
73
    hidden_input4, hidden_input5, hidden_input6 = 480, 560, 686
74

75
    print("--------- Declare Hyper Parameters ----------")
76
    # 2. Declare Weights
77
    D_W1 = (
78
        np.random.normal(size=(784, hidden_input), scale=(1.0 / np.sqrt(784 / 2.0)))
79
        * 0.002
80
    )
81
    # D_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.)))       *0.002
82
    D_b1 = np.zeros(hidden_input)
83

84
    D_W2 = (
85
        np.random.normal(
86
            size=(hidden_input, 1), scale=(1.0 / np.sqrt(hidden_input / 2.0))
87
        )
88
        * 0.002
89
    )
90
    # D_b2 = np.random.normal(size=(1),scale=(1. / np.sqrt(1 / 2.)))           *0.002
91
    D_b2 = np.zeros(1)
92

93
    G_W1 = (
94
        np.random.normal(
95
            size=(G_input, hidden_input), scale=(1.0 / np.sqrt(G_input / 2.0))
96
        )
97
        * 0.002
98
    )
99
    # G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.)))      *0.002
100
    G_b1 = np.zeros(hidden_input)
101

102
    G_W2 = (
103
        np.random.normal(
104
            size=(hidden_input, hidden_input2),
105
            scale=(1.0 / np.sqrt(hidden_input / 2.0)),
106
        )
107
        * 0.002
108
    )
109
    # G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.)))      *0.002
110
    G_b2 = np.zeros(hidden_input2)
111

112
    G_W3 = (
113
        np.random.normal(
114
            size=(hidden_input2, hidden_input3),
115
            scale=(1.0 / np.sqrt(hidden_input2 / 2.0)),
116
        )
117
        * 0.002
118
    )
119
    # G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.)))      *0.002
120
    G_b3 = np.zeros(hidden_input3)
121

122
    G_W4 = (
123
        np.random.normal(
124
            size=(hidden_input3, hidden_input4),
125
            scale=(1.0 / np.sqrt(hidden_input3 / 2.0)),
126
        )
127
        * 0.002
128
    )
129
    # G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.)))      *0.002
130
    G_b4 = np.zeros(hidden_input4)
131

132
    G_W5 = (
133
        np.random.normal(
134
            size=(hidden_input4, hidden_input5),
135
            scale=(1.0 / np.sqrt(hidden_input4 / 2.0)),
136
        )
137
        * 0.002
138
    )
139
    # G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.)))      *0.002
140
    G_b5 = np.zeros(hidden_input5)
141

142
    G_W6 = (
143
        np.random.normal(
144
            size=(hidden_input5, hidden_input6),
145
            scale=(1.0 / np.sqrt(hidden_input5 / 2.0)),
146
        )
147
        * 0.002
148
    )
149
    # G_b1 = np.random.normal(size=(128),scale=(1. / np.sqrt(128 / 2.)))      *0.002
150
    G_b6 = np.zeros(hidden_input6)
151

152
    G_W7 = (
153
        np.random.normal(
154
            size=(hidden_input6, 784), scale=(1.0 / np.sqrt(hidden_input6 / 2.0))
155
        )
156
        * 0.002
157
    )
158
    # G_b2 = np.random.normal(size=(784),scale=(1. / np.sqrt(784 / 2.)))      *0.002
159
    G_b7 = np.zeros(784)
160

161
    # 3. For Adam Optimizer
162
    v1, m1 = 0, 0
163
    v2, m2 = 0, 0
164
    v3, m3 = 0, 0
165
    v4, m4 = 0, 0
166

167
    v5, m5 = 0, 0
168
    v6, m6 = 0, 0
169
    v7, m7 = 0, 0
170
    v8, m8 = 0, 0
171
    v9, m9 = 0, 0
172
    v10, m10 = 0, 0
173
    v11, m11 = 0, 0
174
    v12, m12 = 0, 0
175

176
    v13, m13 = 0, 0
177
    v14, m14 = 0, 0
178

179
    v15, m15 = 0, 0
180
    v16, m16 = 0, 0
181

182
    v17, m17 = 0, 0
183
    v18, m18 = 0, 0
184

185
    beta_1, beta_2, eps = 0.9, 0.999, 0.00000001
186

187
    print("--------- Started Training ----------")
188
    for iter in range(num_epoch):
189

190
        random_int = np.random.randint(len(images) - 5)
191
        current_image = np.expand_dims(images[random_int], axis=0)
192

193
        # Func: Generate The first Fake Data
194
        Z = np.random.uniform(-1.0, 1.0, size=[1, G_input])
195
        Gl1 = Z.dot(G_W1) + G_b1
196
        Gl1A = arctan(Gl1)
197
        Gl2 = Gl1A.dot(G_W2) + G_b2
198
        Gl2A = ReLu(Gl2)
199
        Gl3 = Gl2A.dot(G_W3) + G_b3
200
        Gl3A = arctan(Gl3)
201

202
        Gl4 = Gl3A.dot(G_W4) + G_b4
203
        Gl4A = ReLu(Gl4)
204
        Gl5 = Gl4A.dot(G_W5) + G_b5
205
        Gl5A = tanh(Gl5)
206
        Gl6 = Gl5A.dot(G_W6) + G_b6
207
        Gl6A = ReLu(Gl6)
208
        Gl7 = Gl6A.dot(G_W7) + G_b7
209

210
        current_fake_data = log(Gl7)
211

212
        # Func: Forward Feed for Real data
213
        Dl1_r = current_image.dot(D_W1) + D_b1
214
        Dl1_rA = ReLu(Dl1_r)
215
        Dl2_r = Dl1_rA.dot(D_W2) + D_b2
216
        Dl2_rA = log(Dl2_r)
217

218
        # Func: Forward Feed for Fake Data
219
        Dl1_f = current_fake_data.dot(D_W1) + D_b1
220
        Dl1_fA = ReLu(Dl1_f)
221
        Dl2_f = Dl1_fA.dot(D_W2) + D_b2
222
        Dl2_fA = log(Dl2_f)
223

224
        # Func: Cost D
225
        D_cost = -np.log(Dl2_rA) + np.log(1.0 - Dl2_fA)
226

227
        # Func: Gradient
228
        grad_f_w2_part_1 = 1 / (1.0 - Dl2_fA)
229
        grad_f_w2_part_2 = d_log(Dl2_f)
230
        grad_f_w2_part_3 = Dl1_fA
231
        grad_f_w2 = grad_f_w2_part_3.T.dot(grad_f_w2_part_1 * grad_f_w2_part_2)
232
        grad_f_b2 = grad_f_w2_part_1 * grad_f_w2_part_2
233

234
        grad_f_w1_part_1 = (grad_f_w2_part_1 * grad_f_w2_part_2).dot(D_W2.T)
235
        grad_f_w1_part_2 = d_ReLu(Dl1_f)
236
        grad_f_w1_part_3 = current_fake_data
237
        grad_f_w1 = grad_f_w1_part_3.T.dot(grad_f_w1_part_1 * grad_f_w1_part_2)
238
        grad_f_b1 = grad_f_w1_part_1 * grad_f_w1_part_2
239

240
        grad_r_w2_part_1 = -1 / Dl2_rA
241
        grad_r_w2_part_2 = d_log(Dl2_r)
242
        grad_r_w2_part_3 = Dl1_rA
243
        grad_r_w2 = grad_r_w2_part_3.T.dot(grad_r_w2_part_1 * grad_r_w2_part_2)
244
        grad_r_b2 = grad_r_w2_part_1 * grad_r_w2_part_2
245

246
        grad_r_w1_part_1 = (grad_r_w2_part_1 * grad_r_w2_part_2).dot(D_W2.T)
247
        grad_r_w1_part_2 = d_ReLu(Dl1_r)
248
        grad_r_w1_part_3 = current_image
249
        grad_r_w1 = grad_r_w1_part_3.T.dot(grad_r_w1_part_1 * grad_r_w1_part_2)
250
        grad_r_b1 = grad_r_w1_part_1 * grad_r_w1_part_2
251

252
        grad_w1 = grad_f_w1 + grad_r_w1
253
        grad_b1 = grad_f_b1 + grad_r_b1
254

255
        grad_w2 = grad_f_w2 + grad_r_w2
256
        grad_b2 = grad_f_b2 + grad_r_b2
257

258
        # ---- Update Gradient ----
259
        m1 = beta_1 * m1 + (1 - beta_1) * grad_w1
260
        v1 = beta_2 * v1 + (1 - beta_2) * grad_w1 ** 2
261

262
        m2 = beta_1 * m2 + (1 - beta_1) * grad_b1
263
        v2 = beta_2 * v2 + (1 - beta_2) * grad_b1 ** 2
264

265
        m3 = beta_1 * m3 + (1 - beta_1) * grad_w2
266
        v3 = beta_2 * v3 + (1 - beta_2) * grad_w2 ** 2
267

268
        m4 = beta_1 * m4 + (1 - beta_1) * grad_b2
269
        v4 = beta_2 * v4 + (1 - beta_2) * grad_b2 ** 2
270

271
        D_W1 = D_W1 - (learing_rate / (np.sqrt(v1 / (1 - beta_2)) + eps)) * (
272
            m1 / (1 - beta_1)
273
        )
274
        D_b1 = D_b1 - (learing_rate / (np.sqrt(v2 / (1 - beta_2)) + eps)) * (
275
            m2 / (1 - beta_1)
276
        )
277

278
        D_W2 = D_W2 - (learing_rate / (np.sqrt(v3 / (1 - beta_2)) + eps)) * (
279
            m3 / (1 - beta_1)
280
        )
281
        D_b2 = D_b2 - (learing_rate / (np.sqrt(v4 / (1 - beta_2)) + eps)) * (
282
            m4 / (1 - beta_1)
283
        )
284

285
        # Func: Forward Feed for G
286
        Z = np.random.uniform(-1.0, 1.0, size=[1, G_input])
287
        Gl1 = Z.dot(G_W1) + G_b1
288
        Gl1A = arctan(Gl1)
289
        Gl2 = Gl1A.dot(G_W2) + G_b2
290
        Gl2A = ReLu(Gl2)
291
        Gl3 = Gl2A.dot(G_W3) + G_b3
292
        Gl3A = arctan(Gl3)
293

294
        Gl4 = Gl3A.dot(G_W4) + G_b4
295
        Gl4A = ReLu(Gl4)
296
        Gl5 = Gl4A.dot(G_W5) + G_b5
297
        Gl5A = tanh(Gl5)
298
        Gl6 = Gl5A.dot(G_W6) + G_b6
299
        Gl6A = ReLu(Gl6)
300
        Gl7 = Gl6A.dot(G_W7) + G_b7
301

302
        current_fake_data = log(Gl7)
303

304
        Dl1 = current_fake_data.dot(D_W1) + D_b1
305
        Dl1_A = ReLu(Dl1)
306
        Dl2 = Dl1_A.dot(D_W2) + D_b2
307
        Dl2_A = log(Dl2)
308

309
        # Func: Cost G
310
        G_cost = -np.log(Dl2_A)
311

312
        # Func: Gradient
313
        grad_G_w7_part_1 = ((-1 / Dl2_A) * d_log(Dl2).dot(D_W2.T) * (d_ReLu(Dl1))).dot(
314
            D_W1.T
315
        )
316
        grad_G_w7_part_2 = d_log(Gl7)
317
        grad_G_w7_part_3 = Gl6A
318
        grad_G_w7 = grad_G_w7_part_3.T.dot(grad_G_w7_part_1 * grad_G_w7_part_1)
319
        grad_G_b7 = grad_G_w7_part_1 * grad_G_w7_part_2
320

321
        grad_G_w6_part_1 = (grad_G_w7_part_1 * grad_G_w7_part_2).dot(G_W7.T)
322
        grad_G_w6_part_2 = d_ReLu(Gl6)
323
        grad_G_w6_part_3 = Gl5A
324
        grad_G_w6 = grad_G_w6_part_3.T.dot(grad_G_w6_part_1 * grad_G_w6_part_2)
325
        grad_G_b6 = grad_G_w6_part_1 * grad_G_w6_part_2
326

327
        grad_G_w5_part_1 = (grad_G_w6_part_1 * grad_G_w6_part_2).dot(G_W6.T)
328
        grad_G_w5_part_2 = d_tanh(Gl5)
329
        grad_G_w5_part_3 = Gl4A
330
        grad_G_w5 = grad_G_w5_part_3.T.dot(grad_G_w5_part_1 * grad_G_w5_part_2)
331
        grad_G_b5 = grad_G_w5_part_1 * grad_G_w5_part_2
332

333
        grad_G_w4_part_1 = (grad_G_w5_part_1 * grad_G_w5_part_2).dot(G_W5.T)
334
        grad_G_w4_part_2 = d_ReLu(Gl4)
335
        grad_G_w4_part_3 = Gl3A
336
        grad_G_w4 = grad_G_w4_part_3.T.dot(grad_G_w4_part_1 * grad_G_w4_part_2)
337
        grad_G_b4 = grad_G_w4_part_1 * grad_G_w4_part_2
338

339
        grad_G_w3_part_1 = (grad_G_w4_part_1 * grad_G_w4_part_2).dot(G_W4.T)
340
        grad_G_w3_part_2 = d_arctan(Gl3)
341
        grad_G_w3_part_3 = Gl2A
342
        grad_G_w3 = grad_G_w3_part_3.T.dot(grad_G_w3_part_1 * grad_G_w3_part_2)
343
        grad_G_b3 = grad_G_w3_part_1 * grad_G_w3_part_2
344

345
        grad_G_w2_part_1 = (grad_G_w3_part_1 * grad_G_w3_part_2).dot(G_W3.T)
346
        grad_G_w2_part_2 = d_ReLu(Gl2)
347
        grad_G_w2_part_3 = Gl1A
348
        grad_G_w2 = grad_G_w2_part_3.T.dot(grad_G_w2_part_1 * grad_G_w2_part_2)
349
        grad_G_b2 = grad_G_w2_part_1 * grad_G_w2_part_2
350

351
        grad_G_w1_part_1 = (grad_G_w2_part_1 * grad_G_w2_part_2).dot(G_W2.T)
352
        grad_G_w1_part_2 = d_arctan(Gl1)
353
        grad_G_w1_part_3 = Z
354
        grad_G_w1 = grad_G_w1_part_3.T.dot(grad_G_w1_part_1 * grad_G_w1_part_2)
355
        grad_G_b1 = grad_G_w1_part_1 * grad_G_w1_part_2
356

357
        # ---- Update Gradient ----
358
        m5 = beta_1 * m5 + (1 - beta_1) * grad_G_w1
359
        v5 = beta_2 * v5 + (1 - beta_2) * grad_G_w1 ** 2
360

361
        m6 = beta_1 * m6 + (1 - beta_1) * grad_G_b1
362
        v6 = beta_2 * v6 + (1 - beta_2) * grad_G_b1 ** 2
363

364
        m7 = beta_1 * m7 + (1 - beta_1) * grad_G_w2
365
        v7 = beta_2 * v7 + (1 - beta_2) * grad_G_w2 ** 2
366

367
        m8 = beta_1 * m8 + (1 - beta_1) * grad_G_b2
368
        v8 = beta_2 * v8 + (1 - beta_2) * grad_G_b2 ** 2
369

370
        m9 = beta_1 * m9 + (1 - beta_1) * grad_G_w3
371
        v9 = beta_2 * v9 + (1 - beta_2) * grad_G_w3 ** 2
372

373
        m10 = beta_1 * m10 + (1 - beta_1) * grad_G_b3
374
        v10 = beta_2 * v10 + (1 - beta_2) * grad_G_b3 ** 2
375

376
        m11 = beta_1 * m11 + (1 - beta_1) * grad_G_w4
377
        v11 = beta_2 * v11 + (1 - beta_2) * grad_G_w4 ** 2
378

379
        m12 = beta_1 * m12 + (1 - beta_1) * grad_G_b4
380
        v12 = beta_2 * v12 + (1 - beta_2) * grad_G_b4 ** 2
381

382
        m13 = beta_1 * m13 + (1 - beta_1) * grad_G_w5
383
        v13 = beta_2 * v13 + (1 - beta_2) * grad_G_w5 ** 2
384

385
        m14 = beta_1 * m14 + (1 - beta_1) * grad_G_b5
386
        v14 = beta_2 * v14 + (1 - beta_2) * grad_G_b5 ** 2
387

388
        m15 = beta_1 * m15 + (1 - beta_1) * grad_G_w6
389
        v15 = beta_2 * v15 + (1 - beta_2) * grad_G_w6 ** 2
390

391
        m16 = beta_1 * m16 + (1 - beta_1) * grad_G_b6
392
        v16 = beta_2 * v16 + (1 - beta_2) * grad_G_b6 ** 2
393

394
        m17 = beta_1 * m17 + (1 - beta_1) * grad_G_w7
395
        v17 = beta_2 * v17 + (1 - beta_2) * grad_G_w7 ** 2
396

397
        m18 = beta_1 * m18 + (1 - beta_1) * grad_G_b7
398
        v18 = beta_2 * v18 + (1 - beta_2) * grad_G_b7 ** 2
399

400
        G_W1 = G_W1 - (learing_rate / (np.sqrt(v5 / (1 - beta_2)) + eps)) * (
401
            m5 / (1 - beta_1)
402
        )
403
        G_b1 = G_b1 - (learing_rate / (np.sqrt(v6 / (1 - beta_2)) + eps)) * (
404
            m6 / (1 - beta_1)
405
        )
406

407
        G_W2 = G_W2 - (learing_rate / (np.sqrt(v7 / (1 - beta_2)) + eps)) * (
408
            m7 / (1 - beta_1)
409
        )
410
        G_b2 = G_b2 - (learing_rate / (np.sqrt(v8 / (1 - beta_2)) + eps)) * (
411
            m8 / (1 - beta_1)
412
        )
413

414
        G_W3 = G_W3 - (learing_rate / (np.sqrt(v9 / (1 - beta_2)) + eps)) * (
415
            m9 / (1 - beta_1)
416
        )
417
        G_b3 = G_b3 - (learing_rate / (np.sqrt(v10 / (1 - beta_2)) + eps)) * (
418
            m10 / (1 - beta_1)
419
        )
420

421
        G_W4 = G_W4 - (learing_rate / (np.sqrt(v11 / (1 - beta_2)) + eps)) * (
422
            m11 / (1 - beta_1)
423
        )
424
        G_b4 = G_b4 - (learing_rate / (np.sqrt(v12 / (1 - beta_2)) + eps)) * (
425
            m12 / (1 - beta_1)
426
        )
427

428
        G_W5 = G_W5 - (learing_rate / (np.sqrt(v13 / (1 - beta_2)) + eps)) * (
429
            m13 / (1 - beta_1)
430
        )
431
        G_b5 = G_b5 - (learing_rate / (np.sqrt(v14 / (1 - beta_2)) + eps)) * (
432
            m14 / (1 - beta_1)
433
        )
434

435
        G_W6 = G_W6 - (learing_rate / (np.sqrt(v15 / (1 - beta_2)) + eps)) * (
436
            m15 / (1 - beta_1)
437
        )
438
        G_b6 = G_b6 - (learing_rate / (np.sqrt(v16 / (1 - beta_2)) + eps)) * (
439
            m16 / (1 - beta_1)
440
        )
441

442
        G_W7 = G_W7 - (learing_rate / (np.sqrt(v17 / (1 - beta_2)) + eps)) * (
443
            m17 / (1 - beta_1)
444
        )
445
        G_b7 = G_b7 - (learing_rate / (np.sqrt(v18 / (1 - beta_2)) + eps)) * (
446
            m18 / (1 - beta_1)
447
        )
448

449
        # --- Print Error ----
450
        # print("Current Iter: ",iter, " Current D cost:",D_cost, " Current G cost: ", G_cost,end='\r')
451

452
        if iter == 0:
453
            learing_rate = learing_rate * 0.01
454
        if iter == 40:
455
            learing_rate = learing_rate * 0.01
456

457
        # ---- Print to Out put ----
458
        if iter % 10 == 0:
459

460
            print(
461
                "Current Iter: ",
462
                iter,
463
                " Current D cost:",
464
                D_cost,
465
                " Current G cost: ",
466
                G_cost,
467
                end="\r",
468
            )
469
            print("--------- Show Example Result See Tab Above ----------")
470
            print("--------- Wait for the image to load ---------")
471
            Z = np.random.uniform(-1.0, 1.0, size=[16, G_input])
472

473
            Gl1 = Z.dot(G_W1) + G_b1
474
            Gl1A = arctan(Gl1)
475
            Gl2 = Gl1A.dot(G_W2) + G_b2
476
            Gl2A = ReLu(Gl2)
477
            Gl3 = Gl2A.dot(G_W3) + G_b3
478
            Gl3A = arctan(Gl3)
479

480
            Gl4 = Gl3A.dot(G_W4) + G_b4
481
            Gl4A = ReLu(Gl4)
482
            Gl5 = Gl4A.dot(G_W5) + G_b5
483
            Gl5A = tanh(Gl5)
484
            Gl6 = Gl5A.dot(G_W6) + G_b6
485
            Gl6A = ReLu(Gl6)
486
            Gl7 = Gl6A.dot(G_W7) + G_b7
487

488
            current_fake_data = log(Gl7)
489

490
            fig = plot(current_fake_data)
491
            fig.savefig(
492
                "Click_Me_{}.png".format(
493
                    str(iter).zfill(3)
494
                    + "_Ginput_"
495
                    + str(G_input)
496
                    + "_hiddenone"
497
                    + str(hidden_input)
498
                    + "_hiddentwo"
499
                    + str(hidden_input2)
500
                    + "_LR_"
501
                    + str(learing_rate)
502
                ),
503
                bbox_inches="tight",
504
            )
505
    # for complete explanation visit https://towardsdatascience.com/only-numpy-implementing-gan-general-adversarial-networks-and-adam-optimizer-using-numpy-with-2a7e4e032021
506
    # -- end code --
507

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.