dmgcpu

ALU.v
398 строк · 14.9 Кб
Перенос по словам
1
`timescale 1ns/1ns
2

3
module ALU ( CLK2, CLK4, CLK5, CLK6, CLK7, DV, Res, AllZeros, d42, d58, w, x, bc, alu, bq4, bq5, bq7, ALU_to_Thingy,
4
	Temp_C, Temp_H, Temp_N, Temp_Z, ALU_Out1, IR, nIR );
5

6
	input CLK2;
7
	input CLK4;			// Used as LoadEnable for ALU_to_bot latch.
8
	input CLK5;
9
	input CLK6;
10
	input CLK7;
11

12
	input [7:0] DV; 		// ALU Operand2
13
	output [7:0] Res; 		// ALU Result
14
	input AllZeros;			// Res == 0
15
	input d42; 			// Gekkio: s1_cb_00_to_3f
16
	input d58; 			// Gekkio: s1_op_pop_sx10
17
	input [40:0] w;		// Decoder2 outputs
18
	input [68:0] x;		// Decoder3 outputs
19
	output [5:0] bc;
20
	input [7:0] alu;		// ALU Operand1
21
	input bq4;
22
	input bq5;
23
	input bq7;
24
	output ALU_to_Thingy; 		// ALU Carry Out
25
	input Temp_C;		// Flag C from temp Z register  (zbus[4])
26
	input Temp_H;		// Flag H from temp Z register  (zbus[5])
27
	input Temp_N;		// Flag N from temp Z register    (zbus[6])
28
	input Temp_Z;			// Flag Z from temp Z register / zbus msb  (zbus[7])
29
	output ALU_Out1;
30
	input [7:0] IR;
31
	input [5:0] nIR;
32

33
	// Internal wires
34

35
	wire [7:0] e;		// Operand1 processing results for SET/RES opcodes; module2 e in
36
	wire [7:0] f;		// module2 f out; Optionaly complemented Operand2
37
	wire [7:0] ca; 		// Shifter (comb1-3) out  (active-low)
38
	wire [7:0] bx;		// module2 x out
39
	wire [7:0] bm;		// module2 m out (G-terms)
40
	wire [7:0] bh;		// module2 h out (P-terms)
41
	wire [7:0] logic_op;		// module2 w out; The result of the logical operation AND/OR/permutation of Operand2 bits.
42
	wire [7:0] ao; 		// G/P ands outputs to module6  (logic xor)
43
	wire [7:1] na; 		// CLA Carry outputs; CLA nots outputs to module6
44
	wire [7:0] q; 		// CLA carry complement outputs (bits 0-3: topologicaly left, bits 4-7: topologicaly right)
45
	wire [5:0] nbc; 	// #bc
46
	wire [13:0] azo;	// Random logic results
47
	wire ALU_to_top; 		// Carry In
48
	wire ALU_L0; 		// ~Carry7
49
	wire ALU_L3; 		// ~Carry4
50
	wire ALU_L5; 		// Carry4
51
	wire ALU_to_bot;		// Derived from zbus[7] .  As a result of the optimization and transposition of the `bc` derivation circuit, the signal became internal.
52

53
	// Top part (CLA + Sum)
54

55
	module6 Sums [7:0] (
56
		.a({na[7:1],ALU_to_top}),
57
		.b(ao),
58
		.c({8{`s3_alu_xor}}),
59
		.d({8{`s3_alu_sum}}),
60
		.e(logic_op),
61
		.x(Res) );
62

63
	assign ALU_L0 = ~ALU_to_Thingy;  		// ~cout
64
	assign ALU_L3 = ~na[4]; 			// ~half cout
65
	assign ALU_L5 = na[4]; 				// half cout
66
	assign {ALU_to_Thingy, na[7:1]} = ~q;
67
	assign ao = bh & bx; 		// ands
68

69
	module5 cla_low ( .m(bm[3:0]), .h(bh[3:0]), .c(ALU_to_top), .q(q[3:0]) );
70
	module5 cla_high ( .m(bm[7:4]), .h(bh[7:4]), .c(na[4]), .q(q[7:4]) );
71

72
	// Middle part
73

74
	module2 GP_Terms [7:0] (
75
		.a(ca), 
76
		.b({8{`s3_alu_logic_and}}), 
77
		.c({8{`s3_alu_logic_or}}), 
78
		.e(e), 
79
		.f(f), 
80
		.g({8{`s3_alu_b_complement}}), 
81
		.h(bh), 
82
		.k(DV), 
83
		.m(bm), 
84
		.x(bx), 
85
		.w(logic_op) );
86

87
	// Shifter
88

89
	Comb3 bit_lsb ( .clk(CLK2), .x(ca[0]), .a({`s3_alu_rlc,DV[7]}), .b({`s3_alu_rotate_shift_right,DV[1]}), .c({`s3_alu_swap,DV[4]}), .d({`s3_alu_rl,bc[1]}) );
90
	Comb2 bits_mid [6:1] ( .clk({6{CLK2}}), .x(ca[6:1]), 
91
		.a({{`s3_alu_rotate_shift_left,DV[5]},{`s3_alu_rotate_shift_left,DV[4]},{`s3_alu_rotate_shift_left,DV[3]},{`s3_alu_rotate_shift_left,DV[2]},{`s3_alu_rotate_shift_left,DV[1]},{`s3_alu_rotate_shift_left,DV[0]}}), 
92
		.b({{`s3_alu_rotate_shift_right,DV[7]},{`s3_alu_rotate_shift_right,DV[6]},{`s3_alu_rotate_shift_right,DV[5]},{`s3_alu_rotate_shift_right,DV[4]},{`s3_alu_rotate_shift_right,DV[3]},{`s3_alu_rotate_shift_right,DV[2]}}), 
93
		.c({{`s3_alu_swap,DV[2]},{`s3_alu_swap,DV[1]},{`s3_alu_swap,DV[0]},{`s3_alu_swap,DV[7]},{`s3_alu_swap,DV[6]},{`s3_alu_swap,DV[5]}}) );
94
	Comb1 bit_msb ( .clk(CLK2), .x(ca[7]), .a({`s3_alu_rotate_shift_left,DV[6]}), .b({`s3_alu_rr,bc[1]}), .c({`s3_alu_sra,DV[7]}), .d({`s3_alu_rrc,DV[0]}), .e({`s3_alu_swap,DV[3]}) );
95

96
	// Random logic (large spaghetti at the bottom)
97

98
	LargeComb1 rand_logic (
99
		.CLK2(CLK2),
100
		.CLK6(CLK6),
101
		.CLK7(CLK7),
102
		.Temp_Z(Temp_Z),
103
		.AllZeros(AllZeros),
104
		.d42(d42),
105
		.d58(d58),
106
		.w(w),
107
		.x(x),
108
		.alu(alu),
109
		.IR(IR),
110
		.nIR(nIR),
111
		.f(f),
112
		.bc(bc),
113
		.nbc(nbc),
114
		.ALU_to_Thingy(ALU_to_Thingy),
115
		.ALU_L0(ALU_L0),
116
		.Temp_H(Temp_H),
117
		.Temp_C(Temp_C),
118
		.ALU_L3(ALU_L3),
119
		.Temp_N(Temp_N),
120
		.ALU_L5(ALU_L5),
121
		.bq4(bq4),
122
		.bq5(bq5),
123
		.bq7(bq7),
124
		.azo(azo) );
125

126
	// Flags (part of the circuit below spaghetti, some FF and domino inverters)
127

128
	assign e = ~{azo[10],azo[9],azo[8],azo[6],azo[5],azo[4],azo[3],azo[0]};
129
	assign ALU_to_top = ~azo[13];
130
	assign ALU_Out1 = ~azo[11];
131

132
	bc bc5 ( .nd(azo[1]), .CLK(CLK6), .CCLK(CLK5), .Load(`s3_wren_hf_nf_zf), .q(bc[5]), .nq(nbc[5]) ); 			// Flag H
133
	bc bc1 ( .nd(azo[2]), .CLK(CLK6), .CCLK(CLK5), .Load(`s3_wren_cf), .q(bc[1]), .nq(nbc[1]) ); 			// Flag C
134
	bc bc2 ( .nd(azo[7]), .CLK(CLK6), .CCLK(CLK5), .Load(`s3_wren_hf_nf_zf), .q(bc[2]), .nq(nbc[2]) );  		// Flag N
135
	bc bc3 ( .nd(azo[12]), .CLK(CLK6), .CCLK(CLK5), .Load(`s3_wren_hf_nf_zf), .q(bc[3]), .nq(nbc[3]) ); 	// Flag Z
136
	ALU_to_bot_latch zbus_msb ( .d( Temp_Z /* =zbus[7] */ ), .CLK(CLK6), .CCLK(CLK5), .Load(CLK4), .q(ALU_to_bot) ); 			// zbus msb latch
137

138
	// Regarding "bc". I tend to think that even though bc0/bc4 is at the bottom, it is still part of the ALU.
139
	// Moved this circuit in my HDL inside the ALU instead of at the bottom. Then wire [5:0] bc; will become output.
140

141
	assign bc[0] = (IR[4] & IR[5] & `s2_op_push_sx10);
142
	assign bc[4] = ALU_to_bot & `s2_op_sp_e_sx10;
143
	assign nbc[0] = ~bc[0];
144
	assign nbc[4] = ~bc[4];
145

146
endmodule // ALU
147

148
// Carry lookahead generator
149
module module5 ( m, h, c, q );
150

151
	input [3:0] m; 		// G
152
	input [3:0] h;		// P
153
	input c;			// CarryIn
154
	/* verilator lint_off UNOPTFLAT */
155
	output [3:0] q; 	// C1...C4  (inverted)
156

157
	assign q[0] = ~(m[0] | (h[0] & c)); 		// ~Carry1 out
158
	assign q[1] = ~(m[1] | (h[1] & ~q[0]));		// ~Carry2 out
159
	assign q[2] = ~(m[2] | (h[2] & ~q[1]));		// ~Carry3 out
160
	assign q[3] = ~(m[3] | (h[3] & ~q[2]));		// ~Carry4 out
161

162
endmodule // module5
163

164
// Sums block
165
module module6 ( a, b, c, d, e, x );
166

167
	input a;
168
	input b;
169
	input c; 			// x18 (s3_alu_xor)
170
	input d; 			// x3 (s3_alu_sum)
171
	input e; 			// The result of the logical operation AND/OR/permutation of Operand2 bits.
172
	output x;
173

174
	assign x = ( (b & c) | ((a ^ b) & d) | (e) );
175

176
endmodule // module6
177

178
// G/P Terms Product.
179
// The module "hybridizes" the computation of G/P terms by reusing them for logical AND/OR operations. It also contains a Shifter result bypass.
180
module module2 ( a, b, c, e, f, g, h, k, m, x, w );
181

182
	input a; 		// Result of permutation(shift/rotate/swap) of Operand2 bits; [!] active low input
183
	input b;  			// x19 (s3_alu_logic_and)
184
	input c; 			// x4 (s3_alu_logic_or)
185
	input e; 		// Large Comb results; Result of executing SET/RES opcodes for operand1
186
	output f; 		// To Large Comb NAND trees; Operand2 optionally complemented
187
	input g; 		// x25 (s3_alu_b_complement)
188
	output h; 		// To CLA Generator (P-terms)
189
	input k; 		// Operand2: DV[n]
190
	output m; 		// To CLA Generator (G-terms)
191
	output x; 		// To ands near CLA
192
	output w; 		// To Sums; The result of the logical operation AND/OR/permutation of Operand2 bits.
193

194
	// Missing transparent DLatch that stores the result of the shifter (permutation result). This DLatch is critically needed, for example, when shifting DV to the left, in this case the following will happen (get ready, it's complicated):
195
	// The dynamic comb of shifter during CLK2 pre-charges the output to 1 - this will be the complement of the result of the shifter bit (i.e. - 0). At the same time, the s3_alu_rotate_shift_left command does not multiplex the output of the dynamic comb for lsb in any way;
196
	// Therefore, the output for lsb will be 0 (or rather the complementary value of 1 pre-charge, which is what is stored on the DLatch).
197
	wire shift_res_q;  		// <-- active low
198
	BusKeeper perm_ff (.d(a), .q(shift_res_q) );
199

200
	assign f = k ^ g;
201
	assign h = e | f;
202
	assign x = ~(e & f);
203
	assign m = ~x;
204
	assign w = ~(shift_res_q & (~(b&m)) & (~(c&h))); 		// or simply 3-OR, if you demorganize the operation.
205

206
endmodule // module2
207

208
// AOI-22222 dynamic (5 ANDs to OR Inverted)
209
module Comb1 ( clk, x, a, b, c, d, e );
210

211
	input clk;
212
	output x;
213
	input [1:0] a;
214
	input [1:0] b;
215
	input [1:0] c;
216
	input [1:0] d;
217
	input [1:0] e;
218

219
	assign x = clk ? ~((a[0]&a[1]) | (b[0]&b[1]) | (c[0]&c[1]) | (d[0]&d[1]) | (e[0]&e[1])) : 1'b1;
220

221
endmodule // Comb1
222

223
// AOI-222 dynamic (3 ANDs to OR Inverted)
224
module Comb2 ( clk, x, a, b, c );
225

226
	input clk;
227
	output x;
228
	input [1:0] a;
229
	input [1:0] b;
230
	input [1:0] c;
231

232
	assign x = clk ? ~((a[0]&a[1]) | (b[0]&b[1]) | (c[0]&c[1]) ) : 1'b1;
233

234
endmodule // Comb2
235

236
// AOI-2222 dynamic (4 ANDs to OR Inverted)
237
module Comb3 ( clk, x, a, b, c, d );
238

239
	input clk;
240
	output x;
241
	input [1:0] a;
242
	input [1:0] b;
243
	input [1:0] c;
244
	input [1:0] d;
245

246
	assign x = clk ? ~((a[0]&a[1]) | (b[0]&b[1]) | (c[0]&c[1]) | (d[0]&d[1]) ) : 1'b1;
247

248
endmodule // Comb3
249

250
// Random logic
251
module LargeComb1 ( CLK2, CLK6, CLK7, Temp_Z, AllZeros, d42, d58, w, x, alu, IR, nIR, f, bc, nbc, ALU_to_Thingy, ALU_L0, Temp_H, Temp_C, ALU_L3, Temp_N, ALU_L5, bq4, bq5, bq7, azo );
252

253
	input CLK2;
254
	input CLK6;
255
	input CLK7;
256
	input Temp_Z;
257
	input AllZeros;
258
	input d42;
259
	input d58;
260
	input [40:0] w;
261
	input [68:0] x;
262
	input [7:0] alu;
263
	input [7:0] IR;
264
	input [5:0] nIR;
265
	input [7:0] f;
266
	input [5:0] bc;
267
	input [5:0] nbc;
268
	input ALU_to_Thingy;
269
	input ALU_L0;
270
	input Temp_H;
271
	input Temp_C;
272
	input ALU_L3;
273
	input Temp_N;
274
	input ALU_L5;
275
	input bq4;
276
	input bq5;
277
	input bq7;
278
	output [13:0] azo; 		// "azo" means absolutely nothing, just the name of the random logic results
279

280
	wire [13:0] azo_latched; 		// Inputs to DLatch from dynamic logic
281
	wire [13:0] az;		// random logic results (non-dynamic)
282

283
	// ALU Trees (by hand); Tree numbering is topological (how they are arranged on the chip)
284
	// Random logic in SM83 is organized in such a way that all related calculations are performed in one place (topologically). On the one hand it is very convenient (the logic is isolated), on the other hand it turns out to be a very confusing doshirak.
285

286
	// ALU trees 0,3-6,8-10 are responsible for preprocessing operand 1 for SET/RES opcodes (CB table) as well as DAA (decimal correction)
287
	// Because of the topological numbering of the trees, they don't go in order, which is a bit ugly.
288
	// ALU tree 11 deals with code checking for conditional instructions (NZ/Z/NC/C)
289

290
	assign az[0] = ~( alu[0] | (`s2_alu_set&nIR[3]&nIR[4]&nIR[5]) | (`s2_alu_res&(IR[3]|IR[4]|IR[5])) );
291
	assign az[1] = ~( (ALU_L5&((nIR[0]&`s2_op_incdec8)|`s3_alu_sum_pos_hf_cf)) | (ALU_L3&`s3_alu_sum_neg_hf_nf) | `s3_alu_cpl | `s2_cb_bit | `s3_alu_logic_and | (Temp_H&d58) );
292
	assign az[2] = ~( (f[0]&`s3_alu_rotate_shift_right) | (Temp_C&d58) | (nbc[1]&IR[3]&`s3_alu_ccf_scf) | (`s3_alu_ccf_scf&nIR[3]) | (`s3_alu_sum_pos_hf_cf&ALU_to_Thingy) | (`s3_alu_daa&(bc[1]|(nbc[2]&ALU_to_Thingy))) | (bc[1]&`s3_alu_cpl) | (f[7]&`s3_alu_rotate_shift_left) | (ALU_L0&`s3_alu_sum_neg_cf) );
293
	assign az[3] = ~( alu[1] | (`s2_alu_set&IR[3]&nIR[4]&nIR[5]) | (`s2_alu_res&(nIR[3]|IR[4]|IR[5])) | (`s3_alu_daa&(bc[5]|(nbc[2]&bq4))) );
294
	assign az[4] = ~( alu[2] | (`s2_alu_set&nIR[3]&IR[4]&nIR[5]) | (`s2_alu_res&(IR[3]|nIR[4]|IR[5])) | (`s3_alu_daa&nbc[2]&(bq4|bc[5])) );
295
	assign az[5] = ~( alu[3] | (`s2_alu_set&IR[3]&IR[4]&nIR[5]) | (`s2_alu_res&(nIR[3]|nIR[4]|IR[5])) | (`s3_alu_daa&bc[2]&bc[5]) );
296
	assign az[6] = ~( alu[4] | (`s2_alu_set&nIR[3]&nIR[4]&IR[5]) | (`s2_alu_res&(IR[3]|IR[4]|nIR[5])) | (`s3_alu_daa&bc[2]&bc[5]) );
297
	assign az[7] = ~( (bc[2]&`s3_alu_daa) | `s3_alu_sum_neg_hf_nf | `s3_alu_cpl | (Temp_N&d58) );
298
	assign az[8] = ~( alu[5] | (`s2_alu_set&IR[3]&nIR[4]&IR[5]) | (`s2_alu_res&(nIR[3]|IR[4]|nIR[5])) | (bc[2]&`s3_alu_daa&((bc[1]&nbc[5])|(nbc[1]&bc[5]))) | (nbc[2]&`s3_alu_daa&((bq5)|(bc[1])|(bq4&bq7))) );
299
	assign az[9] = ~( alu[6] | (`s2_alu_set&nIR[3]&IR[4]&IR[5]) | (`s2_alu_res&(IR[3]|nIR[4]|nIR[5])) | (bc[2]&`s3_alu_daa&(nbc[1]&bc[5])) | (nbc[2]&`s3_alu_daa&((bq4&bq7)|(bc[1])|(bq5))) );
300
	assign az[10] = ~( alu[7] | (`s2_alu_set&IR[3]&IR[4]&IR[5]) | (`s2_alu_res&(nIR[3]|nIR[4]|nIR[5])) | (bc[2]&`s3_alu_daa&(bc[1]|bc[5])) );
301
	assign az[11] = ~( 
302
		`s2_cc_check & (              // inverted condition check
303
			(nIR[3]&nIR[4] & bc[3]) | // 00 | Z
304
			( IR[3]&nIR[4] &nbc[3]) | // 10 | NZ
305
			(nIR[3]& IR[4] & bc[1]) | // 01 | C
306
			( IR[3]& IR[4] &nbc[1])   // 11 | NC
307
		));
308
	assign az[12] = ~(
309
		(f[0]&`s2_cb_bit&nIR[3]&nIR[4]&nIR[5]) |
310
		(f[1]&`s2_cb_bit&IR[3]&nIR[4]&nIR[5]) |
311
		(f[2]&`s2_cb_bit&nIR[3]&IR[4]&nIR[5]) |
312
		(f[3]&`s2_cb_bit&IR[3]&IR[4]&nIR[5]) |
313
		(f[4]&`s2_cb_bit&nIR[3]&nIR[4]&IR[5]) |
314
		(f[5]&`s2_cb_bit&IR[3]&nIR[4]&IR[5]) |
315
		(f[6]&`s2_cb_bit&nIR[3]&IR[4]&IR[5]) |
316
		(f[7]&`s2_cb_bit&IR[3]&IR[4]&IR[5]) |
317
		(AllZeros&(d42|`s2_op_alu8|`s2_op_incdec8|`s3_alu_daa)) | (d58&Temp_Z) | (bc[3]&(`s3_alu_cpl|`s2_op_add_hl_sxx0|`s3_alu_ccf_scf|`s2_op_add_hl_sx01)) );
318
	assign az[13] = ~( `s3_alu_cp | (`s2_op_incdec8&nIR[0]) | (`s2_op_sp_e_sx10&bc[1]) | (`s3_alu_sub_sbc&(nIR[3]|nbc[1])) | (`s2_op_add_hl_sx01&bc[1]) | (`s3_alu_add_adc&IR[3]) );
319

320
	// Dynamic part
321

322
	assign azo_latched[0] = CLK2 ? az[0] : 1'bz;
323
	assign azo_latched[1] = CLK7 ? (CLK6 ? az[1] : 1'bz) : 1'bz;		// -> bc5   -- Flag H
324
	assign azo_latched[2] = CLK7 ? (CLK6 ? az[2] : 1'bz) : 1'bz;		// -> bc1   -- Flag C
325
	assign azo_latched[3] = CLK2 ? az[3] : 1'bz;
326
	assign azo_latched[4] = CLK2 ? az[4] : 1'bz;
327
	assign azo_latched[5] = CLK2 ? az[5] : 1'bz;
328
	assign azo_latched[6] = CLK2 ? az[6] : 1'bz;
329
	assign azo_latched[7] = CLK7 ? (CLK6 ? az[7] : 1'bz) : 1'bz;		// -> bc2   -- Flag N
330
	assign azo_latched[8] = CLK2 ? az[8] : 1'bz;
331
	assign azo_latched[9] = CLK2 ? az[9] : 1'bz;
332
	assign azo_latched[10] = CLK2 ? az[10] : 1'bz;
333
	assign azo_latched[11] = CLK7 ? (CLK6 ? az[11] : 1'bz) : 1'bz; 		// -> ALU_Out1  -- Skip branch
334
	assign azo_latched[12] = CLK7 ? (CLK6 ? az[12] : 1'bz) : 1'bz;		// -> bc3   -- Flag Z
335
	assign azo_latched[13] = CLK2 ? az[13] : 1'bz;		// -> ALU_to_top aka CarryIn
336

337
	// Transparent DLatch is required at least for asymmetric dynamic logic (which uses CLK7/CLK6, i.e. for flags and cc_check);
338
	// The others don't require DLatch, but are made for unification.
339
	// The use of asymmetric CLK is obviously related to the peculiarities of overlapped instruction execution in SM83  (CLK6 = writeback; CLK7 = writeback_ext)
340
	BusKeeper latched_results [13:0] ( .d(azo_latched), .q(azo) );
341

342
endmodule // LargeComb1
343

344
// This latch is used to hold flags (bc3=Z / bc2=N / bc5=H / bc1=C); The silly name `bc` is just from the early stages of research
345
module bc ( nd, CLK, CCLK, Load, q, nq );
346

347
	input nd; 
348
	input CLK; 
349
	input CCLK; 
350
	input Load; 
351
	output q;
352
	output nq;
353

354
	reg val_in;
355
	reg val_out;
356
	initial val_in = 1'b0;
357
	initial val_out = 1'b0;
358

359
	always @(*) begin
360
		if (CLK && Load)
361
			val_in = ~nd;
362
	end
363

364
	always @(negedge Load) begin
365
		val_out <= val_in;
366
	end
367

368
	assign q = val_out;
369
	assign nq = ~q;
370

371
endmodule // bc
372

373
// This latch exists in a single instance and is used to hold the zbus msb (ie - sign)
374
module ALU_to_bot_latch ( d, CLK, CCLK, Load, q );
375

376
	input d; 
377
	input CLK; 
378
	input CCLK; 
379
	input Load; 
380
	output q;
381

382
	reg val_in;
383
	reg val_out;
384
	initial val_in = 1'b0;
385
	initial val_out = 1'b0;
386

387
	always @(*) begin
388
		if (CLK && Load)
389
			val_in = d;
390
	end
391

392
	always @(negedge Load) begin
393
		val_out <= val_in;
394
	end
395

396
	assign q = val_out;
397

398
endmodule // ALU_to_bot_latch
399
dmgcpu

Использование cookies