3
module ALU ( CLK2, CLK4, CLK5, CLK6, CLK7, DV, Res, AllZeros, d42, d58, w, x, bc, alu, bq4, bq5, bq7, ALU_to_Thingy,
4
Temp_C, Temp_H, Temp_N, Temp_Z, ALU_Out1, IR, nIR );
7
input CLK4; // Used as LoadEnable for ALU_to_bot latch.
12
input [7:0] DV; // ALU Operand2
13
output [7:0] Res; // ALU Result
14
input AllZeros; // Res == 0
15
input d42; // Gekkio: s1_cb_00_to_3f
16
input d58; // Gekkio: s1_op_pop_sx10
17
input [40:0] w; // Decoder2 outputs
18
input [68:0] x; // Decoder3 outputs
20
input [7:0] alu; // ALU Operand1
24
output ALU_to_Thingy; // ALU Carry Out
25
input Temp_C; // Flag C from temp Z register (zbus[4])
26
input Temp_H; // Flag H from temp Z register (zbus[5])
27
input Temp_N; // Flag N from temp Z register (zbus[6])
28
input Temp_Z; // Flag Z from temp Z register / zbus msb (zbus[7])
35
wire [7:0] e; // Operand1 processing results for SET/RES opcodes; module2 e in
36
wire [7:0] f; // module2 f out; Optionaly complemented Operand2
37
wire [7:0] ca; // Shifter (comb1-3) out (active-low)
38
wire [7:0] bx; // module2 x out
39
wire [7:0] bm; // module2 m out (G-terms)
40
wire [7:0] bh; // module2 h out (P-terms)
41
wire [7:0] logic_op; // module2 w out; The result of the logical operation AND/OR/permutation of Operand2 bits.
42
wire [7:0] ao; // G/P ands outputs to module6 (logic xor)
43
wire [7:1] na; // CLA Carry outputs; CLA nots outputs to module6
44
wire [7:0] q; // CLA carry complement outputs (bits 0-3: topologicaly left, bits 4-7: topologicaly right)
45
wire [5:0] nbc; // #bc
46
wire [13:0] azo; // Random logic results
47
wire ALU_to_top; // Carry In
48
wire ALU_L0; // ~Carry7
49
wire ALU_L3; // ~Carry4
50
wire ALU_L5; // Carry4
51
wire ALU_to_bot; // Derived from zbus[7] . As a result of the optimization and transposition of the `bc` derivation circuit, the signal became internal.
53
// Top part (CLA + Sum)
56
.a({na[7:1],ALU_to_top}),
63
assign ALU_L0 = ~ALU_to_Thingy; // ~cout
64
assign ALU_L3 = ~na[4]; // ~half cout
65
assign ALU_L5 = na[4]; // half cout
66
assign {ALU_to_Thingy, na[7:1]} = ~q;
67
assign ao = bh & bx; // ands
69
module5 cla_low ( .m(bm[3:0]), .h(bh[3:0]), .c(ALU_to_top), .q(q[3:0]) );
70
module5 cla_high ( .m(bm[7:4]), .h(bh[7:4]), .c(na[4]), .q(q[7:4]) );
74
module2 GP_Terms [7:0] (
76
.b({8{`s3_alu_logic_and}}),
77
.c({8{`s3_alu_logic_or}}),
80
.g({8{`s3_alu_b_complement}}),
89
Comb3 bit_lsb ( .clk(CLK2), .x(ca[0]), .a({`s3_alu_rlc,DV[7]}), .b({`s3_alu_rotate_shift_right,DV[1]}), .c({`s3_alu_swap,DV[4]}), .d({`s3_alu_rl,bc[1]}) );
90
Comb2 bits_mid [6:1] ( .clk({6{CLK2}}), .x(ca[6:1]),
91
.a({{`s3_alu_rotate_shift_left,DV[5]},{`s3_alu_rotate_shift_left,DV[4]},{`s3_alu_rotate_shift_left,DV[3]},{`s3_alu_rotate_shift_left,DV[2]},{`s3_alu_rotate_shift_left,DV[1]},{`s3_alu_rotate_shift_left,DV[0]}}),
92
.b({{`s3_alu_rotate_shift_right,DV[7]},{`s3_alu_rotate_shift_right,DV[6]},{`s3_alu_rotate_shift_right,DV[5]},{`s3_alu_rotate_shift_right,DV[4]},{`s3_alu_rotate_shift_right,DV[3]},{`s3_alu_rotate_shift_right,DV[2]}}),
93
.c({{`s3_alu_swap,DV[2]},{`s3_alu_swap,DV[1]},{`s3_alu_swap,DV[0]},{`s3_alu_swap,DV[7]},{`s3_alu_swap,DV[6]},{`s3_alu_swap,DV[5]}}) );
94
Comb1 bit_msb ( .clk(CLK2), .x(ca[7]), .a({`s3_alu_rotate_shift_left,DV[6]}), .b({`s3_alu_rr,bc[1]}), .c({`s3_alu_sra,DV[7]}), .d({`s3_alu_rrc,DV[0]}), .e({`s3_alu_swap,DV[3]}) );
96
// Random logic (large spaghetti at the bottom)
98
LargeComb1 rand_logic (
114
.ALU_to_Thingy(ALU_to_Thingy),
126
// Flags (part of the circuit below spaghetti, some FF and domino inverters)
128
assign e = ~{azo[10],azo[9],azo[8],azo[6],azo[5],azo[4],azo[3],azo[0]};
129
assign ALU_to_top = ~azo[13];
130
assign ALU_Out1 = ~azo[11];
132
bc bc5 ( .nd(azo[1]), .CLK(CLK6), .CCLK(CLK5), .Load(`s3_wren_hf_nf_zf), .q(bc[5]), .nq(nbc[5]) ); // Flag H
133
bc bc1 ( .nd(azo[2]), .CLK(CLK6), .CCLK(CLK5), .Load(`s3_wren_cf), .q(bc[1]), .nq(nbc[1]) ); // Flag C
134
bc bc2 ( .nd(azo[7]), .CLK(CLK6), .CCLK(CLK5), .Load(`s3_wren_hf_nf_zf), .q(bc[2]), .nq(nbc[2]) ); // Flag N
135
bc bc3 ( .nd(azo[12]), .CLK(CLK6), .CCLK(CLK5), .Load(`s3_wren_hf_nf_zf), .q(bc[3]), .nq(nbc[3]) ); // Flag Z
136
ALU_to_bot_latch zbus_msb ( .d( Temp_Z /* =zbus[7] */ ), .CLK(CLK6), .CCLK(CLK5), .Load(CLK4), .q(ALU_to_bot) ); // zbus msb latch
138
// Regarding "bc". I tend to think that even though bc0/bc4 is at the bottom, it is still part of the ALU.
139
// Moved this circuit in my HDL inside the ALU instead of at the bottom. Then wire [5:0] bc; will become output.
141
assign bc[0] = (IR[4] & IR[5] & `s2_op_push_sx10);
142
assign bc[4] = ALU_to_bot & `s2_op_sp_e_sx10;
143
assign nbc[0] = ~bc[0];
144
assign nbc[4] = ~bc[4];
148
// Carry lookahead generator
149
module module5 ( m, h, c, q );
154
/* verilator lint_off UNOPTFLAT */
155
output [3:0] q; // C1...C4 (inverted)
157
assign q[0] = ~(m[0] | (h[0] & c)); // ~Carry1 out
158
assign q[1] = ~(m[1] | (h[1] & ~q[0])); // ~Carry2 out
159
assign q[2] = ~(m[2] | (h[2] & ~q[1])); // ~Carry3 out
160
assign q[3] = ~(m[3] | (h[3] & ~q[2])); // ~Carry4 out
165
module module6 ( a, b, c, d, e, x );
169
input c; // x18 (s3_alu_xor)
170
input d; // x3 (s3_alu_sum)
171
input e; // The result of the logical operation AND/OR/permutation of Operand2 bits.
174
assign x = ( (b & c) | ((a ^ b) & d) | (e) );
179
// The module "hybridizes" the computation of G/P terms by reusing them for logical AND/OR operations. It also contains a Shifter result bypass.
180
module module2 ( a, b, c, e, f, g, h, k, m, x, w );
182
input a; // Result of permutation(shift/rotate/swap) of Operand2 bits; [!] active low input
183
input b; // x19 (s3_alu_logic_and)
184
input c; // x4 (s3_alu_logic_or)
185
input e; // Large Comb results; Result of executing SET/RES opcodes for operand1
186
output f; // To Large Comb NAND trees; Operand2 optionally complemented
187
input g; // x25 (s3_alu_b_complement)
188
output h; // To CLA Generator (P-terms)
189
input k; // Operand2: DV[n]
190
output m; // To CLA Generator (G-terms)
191
output x; // To ands near CLA
192
output w; // To Sums; The result of the logical operation AND/OR/permutation of Operand2 bits.
194
// Missing transparent DLatch that stores the result of the shifter (permutation result). This DLatch is critically needed, for example, when shifting DV to the left, in this case the following will happen (get ready, it's complicated):
195
// The dynamic comb of shifter during CLK2 pre-charges the output to 1 - this will be the complement of the result of the shifter bit (i.e. - 0). At the same time, the s3_alu_rotate_shift_left command does not multiplex the output of the dynamic comb for lsb in any way;
196
// Therefore, the output for lsb will be 0 (or rather the complementary value of 1 pre-charge, which is what is stored on the DLatch).
197
wire shift_res_q; // <-- active low
198
BusKeeper perm_ff (.d(a), .q(shift_res_q) );
204
assign w = ~(shift_res_q & (~(b&m)) & (~(c&h))); // or simply 3-OR, if you demorganize the operation.
208
// AOI-22222 dynamic (5 ANDs to OR Inverted)
209
module Comb1 ( clk, x, a, b, c, d, e );
219
assign x = clk ? ~((a[0]&a[1]) | (b[0]&b[1]) | (c[0]&c[1]) | (d[0]&d[1]) | (e[0]&e[1])) : 1'b1;
223
// AOI-222 dynamic (3 ANDs to OR Inverted)
224
module Comb2 ( clk, x, a, b, c );
232
assign x = clk ? ~((a[0]&a[1]) | (b[0]&b[1]) | (c[0]&c[1]) ) : 1'b1;
236
// AOI-2222 dynamic (4 ANDs to OR Inverted)
237
module Comb3 ( clk, x, a, b, c, d );
246
assign x = clk ? ~((a[0]&a[1]) | (b[0]&b[1]) | (c[0]&c[1]) | (d[0]&d[1]) ) : 1'b1;
251
module LargeComb1 ( CLK2, CLK6, CLK7, Temp_Z, AllZeros, d42, d58, w, x, alu, IR, nIR, f, bc, nbc, ALU_to_Thingy, ALU_L0, Temp_H, Temp_C, ALU_L3, Temp_N, ALU_L5, bq4, bq5, bq7, azo );
278
output [13:0] azo; // "azo" means absolutely nothing, just the name of the random logic results
280
wire [13:0] azo_latched; // Inputs to DLatch from dynamic logic
281
wire [13:0] az; // random logic results (non-dynamic)
283
// ALU Trees (by hand); Tree numbering is topological (how they are arranged on the chip)
284
// Random logic in SM83 is organized in such a way that all related calculations are performed in one place (topologically). On the one hand it is very convenient (the logic is isolated), on the other hand it turns out to be a very confusing doshirak.
286
// ALU trees 0,3-6,8-10 are responsible for preprocessing operand 1 for SET/RES opcodes (CB table) as well as DAA (decimal correction)
287
// Because of the topological numbering of the trees, they don't go in order, which is a bit ugly.
288
// ALU tree 11 deals with code checking for conditional instructions (NZ/Z/NC/C)
290
assign az[0] = ~( alu[0] | (`s2_alu_set&nIR[3]&nIR[4]&nIR[5]) | (`s2_alu_res&(IR[3]|IR[4]|IR[5])) );
291
assign az[1] = ~( (ALU_L5&((nIR[0]&`s2_op_incdec8)|`s3_alu_sum_pos_hf_cf)) | (ALU_L3&`s3_alu_sum_neg_hf_nf) | `s3_alu_cpl | `s2_cb_bit | `s3_alu_logic_and | (Temp_H&d58) );
292
assign az[2] = ~( (f[0]&`s3_alu_rotate_shift_right) | (Temp_C&d58) | (nbc[1]&IR[3]&`s3_alu_ccf_scf) | (`s3_alu_ccf_scf&nIR[3]) | (`s3_alu_sum_pos_hf_cf&ALU_to_Thingy) | (`s3_alu_daa&(bc[1]|(nbc[2]&ALU_to_Thingy))) | (bc[1]&`s3_alu_cpl) | (f[7]&`s3_alu_rotate_shift_left) | (ALU_L0&`s3_alu_sum_neg_cf) );
293
assign az[3] = ~( alu[1] | (`s2_alu_set&IR[3]&nIR[4]&nIR[5]) | (`s2_alu_res&(nIR[3]|IR[4]|IR[5])) | (`s3_alu_daa&(bc[5]|(nbc[2]&bq4))) );
294
assign az[4] = ~( alu[2] | (`s2_alu_set&nIR[3]&IR[4]&nIR[5]) | (`s2_alu_res&(IR[3]|nIR[4]|IR[5])) | (`s3_alu_daa&nbc[2]&(bq4|bc[5])) );
295
assign az[5] = ~( alu[3] | (`s2_alu_set&IR[3]&IR[4]&nIR[5]) | (`s2_alu_res&(nIR[3]|nIR[4]|IR[5])) | (`s3_alu_daa&bc[2]&bc[5]) );
296
assign az[6] = ~( alu[4] | (`s2_alu_set&nIR[3]&nIR[4]&IR[5]) | (`s2_alu_res&(IR[3]|IR[4]|nIR[5])) | (`s3_alu_daa&bc[2]&bc[5]) );
297
assign az[7] = ~( (bc[2]&`s3_alu_daa) | `s3_alu_sum_neg_hf_nf | `s3_alu_cpl | (Temp_N&d58) );
298
assign az[8] = ~( alu[5] | (`s2_alu_set&IR[3]&nIR[4]&IR[5]) | (`s2_alu_res&(nIR[3]|IR[4]|nIR[5])) | (bc[2]&`s3_alu_daa&((bc[1]&nbc[5])|(nbc[1]&bc[5]))) | (nbc[2]&`s3_alu_daa&((bq5)|(bc[1])|(bq4&bq7))) );
299
assign az[9] = ~( alu[6] | (`s2_alu_set&nIR[3]&IR[4]&IR[5]) | (`s2_alu_res&(IR[3]|nIR[4]|nIR[5])) | (bc[2]&`s3_alu_daa&(nbc[1]&bc[5])) | (nbc[2]&`s3_alu_daa&((bq4&bq7)|(bc[1])|(bq5))) );
300
assign az[10] = ~( alu[7] | (`s2_alu_set&IR[3]&IR[4]&IR[5]) | (`s2_alu_res&(nIR[3]|nIR[4]|nIR[5])) | (bc[2]&`s3_alu_daa&(bc[1]|bc[5])) );
302
`s2_cc_check & ( // inverted condition check
303
(nIR[3]&nIR[4] & bc[3]) | // 00 | Z
304
( IR[3]&nIR[4] &nbc[3]) | // 10 | NZ
305
(nIR[3]& IR[4] & bc[1]) | // 01 | C
306
( IR[3]& IR[4] &nbc[1]) // 11 | NC
309
(f[0]&`s2_cb_bit&nIR[3]&nIR[4]&nIR[5]) |
310
(f[1]&`s2_cb_bit&IR[3]&nIR[4]&nIR[5]) |
311
(f[2]&`s2_cb_bit&nIR[3]&IR[4]&nIR[5]) |
312
(f[3]&`s2_cb_bit&IR[3]&IR[4]&nIR[5]) |
313
(f[4]&`s2_cb_bit&nIR[3]&nIR[4]&IR[5]) |
314
(f[5]&`s2_cb_bit&IR[3]&nIR[4]&IR[5]) |
315
(f[6]&`s2_cb_bit&nIR[3]&IR[4]&IR[5]) |
316
(f[7]&`s2_cb_bit&IR[3]&IR[4]&IR[5]) |
317
(AllZeros&(d42|`s2_op_alu8|`s2_op_incdec8|`s3_alu_daa)) | (d58&Temp_Z) | (bc[3]&(`s3_alu_cpl|`s2_op_add_hl_sxx0|`s3_alu_ccf_scf|`s2_op_add_hl_sx01)) );
318
assign az[13] = ~( `s3_alu_cp | (`s2_op_incdec8&nIR[0]) | (`s2_op_sp_e_sx10&bc[1]) | (`s3_alu_sub_sbc&(nIR[3]|nbc[1])) | (`s2_op_add_hl_sx01&bc[1]) | (`s3_alu_add_adc&IR[3]) );
322
assign azo_latched[0] = CLK2 ? az[0] : 1'bz;
323
assign azo_latched[1] = CLK7 ? (CLK6 ? az[1] : 1'bz) : 1'bz; // -> bc5 -- Flag H
324
assign azo_latched[2] = CLK7 ? (CLK6 ? az[2] : 1'bz) : 1'bz; // -> bc1 -- Flag C
325
assign azo_latched[3] = CLK2 ? az[3] : 1'bz;
326
assign azo_latched[4] = CLK2 ? az[4] : 1'bz;
327
assign azo_latched[5] = CLK2 ? az[5] : 1'bz;
328
assign azo_latched[6] = CLK2 ? az[6] : 1'bz;
329
assign azo_latched[7] = CLK7 ? (CLK6 ? az[7] : 1'bz) : 1'bz; // -> bc2 -- Flag N
330
assign azo_latched[8] = CLK2 ? az[8] : 1'bz;
331
assign azo_latched[9] = CLK2 ? az[9] : 1'bz;
332
assign azo_latched[10] = CLK2 ? az[10] : 1'bz;
333
assign azo_latched[11] = CLK7 ? (CLK6 ? az[11] : 1'bz) : 1'bz; // -> ALU_Out1 -- Skip branch
334
assign azo_latched[12] = CLK7 ? (CLK6 ? az[12] : 1'bz) : 1'bz; // -> bc3 -- Flag Z
335
assign azo_latched[13] = CLK2 ? az[13] : 1'bz; // -> ALU_to_top aka CarryIn
337
// Transparent DLatch is required at least for asymmetric dynamic logic (which uses CLK7/CLK6, i.e. for flags and cc_check);
338
// The others don't require DLatch, but are made for unification.
339
// The use of asymmetric CLK is obviously related to the peculiarities of overlapped instruction execution in SM83 (CLK6 = writeback; CLK7 = writeback_ext)
340
BusKeeper latched_results [13:0] ( .d(azo_latched), .q(azo) );
342
endmodule // LargeComb1
344
// This latch is used to hold flags (bc3=Z / bc2=N / bc5=H / bc1=C); The silly name `bc` is just from the early stages of research
345
module bc ( nd, CLK, CCLK, Load, q, nq );
356
initial val_in = 1'b0;
357
initial val_out = 1'b0;
364
always @(negedge Load) begin
373
// This latch exists in a single instance and is used to hold the zbus msb (ie - sign)
374
module ALU_to_bot_latch ( d, CLK, CCLK, Load, q );
384
initial val_in = 1'b0;
385
initial val_out = 1'b0;
392
always @(negedge Load) begin
398
endmodule // ALU_to_bot_latch