1`timescale 1ns / 1ps
2/*
3 * This software is Copyright (c) 2018 Denis Burykin
4 * [denis_burykin yahoo com], [denis-burykin2014 yandex ru]
5 * and it is hereby released to the general public under the following terms:
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted.
8 *
9 */
10`include "../md5.vh"
11
12
13module cpu #(
14	parameter WIDTH = 16,
15	parameter N_CORES = 3,
16	parameter N_THREADS = 4 * N_CORES,
17	parameter N_THREADS_MSB = `MSB(N_THREADS-1)
18	)(
19	input CLK,
20	input [`ENTRY_PT_MSB:0] entry_pt_curr,
21	// thread_state (ts)
22	output [N_THREADS_MSB :0] ts_rd_num, ts_wr_num, // Thread #
23	output reg ts_wr_en = 0,
24	output reg [`THREAD_STATE_MSB :0] ts_wr,
25	input [`THREAD_STATE_MSB :0] ts_rd,
26
27	// comp_buf & procb_buf
28	output reg comp_wr_en = 0, procb_wr_en = 0,
29	output reg [N_THREADS_MSB :0] comp_procb_wr_thread_num,
30	input [`PROCB_A_WIDTH-1 :0] procb_wr_cnt,
31	output reg [`COMP_DATA1_MSB + `COMP_DATA2_MSB+1 :0] comp_dout,
32	output reg [`PROCB_D_WIDTH-1 :0] procb_dout,
33
34	// input from the memory
35	output reg mem_rd_request = 0,
36	output reg [`MEM_TOTAL_MSB :0] mem_rd_addr,
37	input mem_rd_valid,
38	input [2*WIDTH-1:0] mem_din,
39
40	// unit_output_buf
41	output [15:0] uob_dout,
42	output uob_wr_en, uob_set_input_complete,
43	output [`UOB_ADDR_MSB :0] uob_wr_addr,
44	input uob_ready, uob_full,
45	output err
46	);
47
48	reg z;
49
50	wire INVALIDATE_eqn, INSTR_WAIT_eqn;
51	reg EXECUTED = 0;
52	reg NEXT_THREAD = 0;
53	reg INSTR_WAIT = 0;
54	reg JUMP = 0;
55	reg [`IADDR_LEN-1 :0] jump_addr;
56
57
58	// Thread State Changed flag.
59	// A thread runs only in WR_RDY state, when thread_state
60	// changes it has to switch current thread (that takes 4 cycles).
61	//
62	// The feature allows it to continue running until JMP
63	// or until EXEC_OPT_TS_WR_RDY instruction (that requires WR_RDY state).
64	//
65	reg ts_changed = 0;
66	always @(posedge CLK)
67		if (thread_almost_switched)
68			ts_changed <= 0;
69		else if (thread_state_change)
70			ts_changed <= 1;
71
72
73	// *****************************************************************
74	//
75	// Instruction Execution
76	// - the instruction right out from memory is available here.
77	//
78	// *****************************************************************
79	wire [N_THREADS_MSB :0] thread_num;
80
81	wire [`N_STAGES-1:0] stage_allow;
82	//wire STAGE_INSTR_AVAIL = stage_allow[];
83	wire STAGE_RD0 = stage_allow[1];
84	wire STAGE_RD1 = stage_allow[2];
85	wire STAGE_EXEC = stage_allow[3];
86
87	wire [`FIELD_A_LEN-1 :0] field_a_in;
88	wire [`EXEC_OPT_LEN-1: 0] exec_opt_in;
89	wire [`PARTIAL_INSTR_LEN-1 :0] partial_instruction;
90
91	instruction #( .N_CORES(N_CORES)
92	) instruction(
93		.CLK(CLK),
94		.entry_pt_curr(entry_pt_curr),
95		.ts_rd_num(ts_rd_num), .ts_rd(ts_rd),
96		.thread_num(thread_num),
97		// Asserts for 1 cycle at STAGE_RD1
98		.thread_almost_switched(thread_almost_switched),
99		.instruction({ field_a_in, exec_opt_in, partial_instruction }),
100		.INVALIDATE(INVALIDATE_eqn),
101		.INSTR_WAIT(INSTR_WAIT_eqn | INSTR_WAIT),
102		.NEXT_THREAD(NEXT_THREAD), .EXECUTED(EXECUTED),
103		.JUMP(JUMP), .jump_addr(jump_addr),
104		.stage_allow(stage_allow),
105		.err(err),
106		// dummy 2nd port
107		.wr_en_dummy(1'b0), .wr_addr_dummy(1'b0)
108	);
109
110	assign ts_wr_num = thread_num;
111
112	wire op_type_use_reg0
113		= `OP_TYPE_USE_REG(partial_instruction [`OP_CODE_LEN-1 :0]);
114
115
116	// *****************************************************************
117	//
118	// Input from the memory
119	//
120	// *****************************************************************
121	reg mem_rd_valid_r = 0, mem_wr_lower = 0;
122
123`ifdef CPU_MV_R_MEM_2X
124	// support for MV_R_MEM_2X
125	reg [2*WIDTH-1:0] mem_r;
126	reg mem_wr_2x = 0, mem_rd_valid_r2 = 0;
127
128	always @(posedge CLK) begin
129		if (mem_rd_valid)
130			mem_r <= mem_din;
131		mem_rd_valid_r <= mem_rd_valid;
132		mem_rd_valid_r2 <= mem_rd_valid_r;
133	end
134
135	wire mem_rd_wr_en = mem_rd_valid_r | mem_wr_2x & mem_rd_valid_r2;
136	// 0: lower half, 1: upper half
137	wire mem_wr_select = ~(mem_rd_valid_r & (mem_wr_2x | mem_wr_lower));
138	// writing upper half into the next register
139	wire mem_rd_wr_2x = mem_wr_2x & mem_rd_valid_r2;
140	wire mem_rd_complete = mem_wr_2x ? mem_rd_valid_r2 : mem_rd_valid_r;
141
142`else
143	reg [WIDTH-1:0] mem_r;
144
145	always @(posedge CLK) begin
146		if (mem_rd_valid)
147			mem_r <= mem_wr_lower ? mem_din[15:0] : mem_din[31:16];
148		mem_rd_valid_r <= mem_rd_valid;
149	end
150
151	wire mem_rd_wr_en = mem_rd_valid_r;
152	wire mem_rd_complete = mem_rd_valid_r;
153
154`endif
155
156
157	// *****************************************************************
158	//
159	// Going to stage STAGE_RD0.
160	//
161	// *****************************************************************
162	reg [`EXEC_OPT_LEN-1: 0] exec_opt;
163	reg [`CONDITION_LEN-1 :0] op_condition;
164	reg [`FIELD_B_LEN-1 :0] field_b;
165	reg [`FIELD_C_LEN-1 :0] field_c;
166	reg [`OP_CODE_LEN-1:0] op_code;
167
168	always @(posedge CLK)
169		if (STAGE_RD0) begin
170			exec_opt <= exec_opt_in;
171			{ op_condition, field_b, field_c, op_code }
172				<= partial_instruction;
173		end
174
175	wire op_type_use_reg = `OP_TYPE_USE_REG(op_code);
176
177
178	// *****************************************************************
179	//
180	// Registers
181	// - load is 2 cycles (rd_en0, rd_en1)
182	// - 4 inputs (controlled with reg_din_select)
183	//
184	// *****************************************************************
185	wire [WIDTH-1:0] reg_din1, reg_din2, reg_din3, reg_dout;
186	wire [1:0] reg_din_select;
187	(* SHREG_EXTRACT="no" *)
188	// Write enable, for stages 3,4 respectively
189	reg reg_wr_en3 = 0, reg_wr_en4 = 0;
190	reg [`REG_ADDR_MSB :0] reg_wr_addr4;
191	reg [N_THREADS_MSB :0] reg_wr_thread4;
192
193	registers_bram #( .WIDTH(WIDTH), .N_THREADS(N_THREADS)
194	) registers(
195		.CLK(CLK),
196		.din1(reg_din1), .din2(reg_din2), .din3(reg_din3),
197
198`ifdef CPU_MV_R_MEM_2X
199		.mem_din(mem_wr_select ? mem_r[2*WIDTH-1:WIDTH] : mem_r[WIDTH-1:0]),
200		.wr_addr( { reg_wr_addr4[`REG_ADDR_MSB:1],
201				mem_rd_wr_2x ? 1'b1 : reg_wr_addr4[0] } ),
202`else
203		.mem_din(mem_r), .wr_addr(reg_wr_addr4),
204`endif
205
206		.mem_wr_en(mem_rd_wr_en), .wr_en(reg_wr_en4),
207		.reg_din_select(reg_din_select),
208		.wr_thread_num(reg_wr_thread4),
209
210		.rd_addr(field_a_in),
211		.rd_en0(STAGE_RD0 & op_type_use_reg0),
212		.rd_en1(STAGE_RD1 & op_type_use_reg),
213		.rd_thread_num(thread_num), .dout(reg_dout)
214	);
215
216
217	// *****************************************************************
218	//
219	// Going to stage STAGE_RD1.
220	//
221	// *****************************************************************
222	reg [`EXEC_OPT_LEN-1: 0] exec_opt1;
223	reg [`CONDITION_LEN-1 :0] op_condition1;
224	reg [`FIELD_B_LEN-1 :0] field_b1;
225	reg [`FIELD_C_LEN-1 :0] field_c1;
226	reg [N_THREADS_MSB :0] thread_num1;
227	reg [`OP_CODE_LEN-1:0] op_code1;
228
229	always @(posedge CLK)
230		if (STAGE_RD1) begin
231			exec_opt1 <= exec_opt;
232			op_condition1 <= op_condition;
233			field_b1 <= field_b;
234			field_c1 <= field_c;
235			thread_num1 <= thread_num;
236			op_code1 <= op_code;
237		end
238
239	always @(posedge CLK)
240		if (STAGE_EXEC) begin
241			reg_wr_addr4 <= field_b1 [`REG_ADDR_MSB :0];
242			reg_wr_thread4 <= thread_num1;
243		end
244
245
246	// *****************************************************************
247	//
248	// Integer Operations
249	//
250	// - no "reg <- reg (op) reg" operations so far
251	//
252	// *****************************************************************
253	wire [`N_FLAGS-1 :0] flags;
254
255	// iops (controls for integer operations)
256	reg iop_addsub = 0, iop_sub = 0, iop_use_cf = 0,
257		iop_grp2 = 0, iop_grp3 = 0, iop_shr1 = 0;
258	reg [1:0] iop_grp2_select = 0;
259
260	integer_ops #( .WIDTH(WIDTH) ) integer_ops(
261		.CLK(CLK),
262		.dina(reg_dout), .dinb(field_c1), .en(STAGE_EXEC),
263		.in_cf(`FLAG_CARRY(flags)),
264		.iops({ iop_addsub, iop_sub, iop_use_cf,
265				iop_grp2, iop_grp3, iop_shr1 }),
266		.iop_grp2_select(iop_grp2_select),
267
268		.dout_select(reg_din_select),
269		.dout1(reg_din1), .dout2(reg_din2), .dout3(reg_din3),
270		.flag_zf(flag_zf_in), .flag_of(flag_of_in), .flag_cf(flag_cf_in)
271	);
272
273
274	// *****************************************************************
275	//
276	// Internal CPU Operations - STAGE_RD1
277	//
278	// - op_code is available
279	//
280	// *****************************************************************
281	reg iop_sets_uf = 0, iop_sets_cf = 0, iop_sets_of = 0, iop_sets_zf = 0;
282	reg [1:0] iop_flag_code = 0;
283	reg iop_jmp = 0, iop_halt = 0;
284
285	always @(posedge CLK)
286		if (STAGE_RD1) begin
287			iop_addsub <=
288				op_code == `OP_CODE_ADD_R_C | op_code == `OP_CODE_SUB_R_C
289				| op_code == `OP_CODE_ADDC_R_C | op_code == `OP_CODE_SUBB_R_C;
290			iop_sub <= op_code == `OP_CODE_SUB_R_C
291				| op_code == `OP_CODE_SUBB_R_C;
292			iop_use_cf <= `OP_TYPE_USE_CF(op_code);
293
294			iop_grp2 <= op_code == `OP_CODE_INC_RST
295				| op_code == `OP_CODE_MV_R_C | op_code == `OP_CODE_AND;
296			iop_grp2_select <=
297				op_code == `OP_CODE_INC_RST ? 2'd1 :
298				op_code == `OP_CODE_AND ? 2'd2 :
299				2'd0;
300			if (op_code == `OP_CODE_INC_RST)
301				z <= 1;
302
303			iop_grp3 <= op_code == `OP_CODE_SHR1 | op_code == `OP_CODE_MV_R_R;
304			iop_shr1 <= op_code == `OP_CODE_SHR1;
305
306			// This op. potentially writes into a register
307			// (write might not be performed dependent on conditions).
308			reg_wr_en3 <= `OP_TYPE_WRITE_REG(op_code);
309
310			// It's hardcoded(hardwired?) when instruction checks conditions.
311			//op_checks_conditions <= OP_TYPE_CHECK_CONDITION(op_code);
312
313			iop_sets_zf <= `OP_TYPE_SETS_ZF(op_code);
314			iop_sets_of <= `OP_TYPE_SETS_OF(op_code);
315			iop_sets_cf = `OP_TYPE_SETS_CF(op_code);
316
317			// OP_CODE_FLAG: applicable to UF only
318			iop_sets_uf <= op_code == `OP_CODE_FLAG;
319			if (op_code == `OP_CODE_FLAG)
320				z <= 1;
321			iop_flag_code <= field_b[1:0];
322
323			iop_jmp <= op_code == `OP_CODE_JMP;
324			if (op_code == `OP_CODE_JMP)
325				z <= 1;
326
327			iop_halt <= op_code == `OP_CODE_HALT;
328			if (op_code == `OP_CODE_HALT)
329				z <= 1;
330		end
331
332
333	// *****************************************************************
334	//
335	// Internal CPU Operations - STAGE_EXEC
336	//
337	// - writes integer_ops.dout
338	// - checks conditions
339	// - sets flags
340	//
341	// *****************************************************************
342	cpu_flags #( .N(`N_FLAGS), .N_THREADS(N_THREADS)
343	) cpu_flags(
344		.CLK(CLK),
345		.thread_num(thread_num),
346		.load_en(thread_almost_switched), .save_en(NEXT_THREAD),
347		.flags(flags),
348		.op_condition(op_condition1),	.condition_is_true(condition_is_true),
349
350		.set_flags(STAGE_EXEC),
351		.iop_flag_mask({ iop_sets_uf, iop_sets_cf, iop_sets_of, iop_sets_zf }),
352		.flags_in({ flag_uf_in, flag_cf_in, flag_of_in, flag_zf_in })
353	);
354
355	assign flag_uf_in =
356			iop_flag_code == 2'b00 ? `FLAG_USER(flags) :
357			iop_flag_code == 2'b01 ? 1'b1 :
358			iop_flag_code == 2'b10 ? 1'b0 :
359			~`FLAG_USER(flags);
360
361
362	always @(posedge CLK) begin
363		// Check conditions for all integer operations that write registers
364		reg_wr_en4 <= STAGE_EXEC & reg_wr_en3 & condition_is_true;
365
366		if (iop_jmp)
367			jump_addr <= field_c1 [`IADDR_LEN-1 :0];
368
369		if (op_condition1 == `IF_CARRY)
370			z <= 1;
371	end
372
373
374	// *****************************************************************
375	//
376	// Input/Output Operations.
377	//
378	// - MV_R_MEM_{2X|L|U} (Reg <- Memory)
379	// - MV_UOB_R (Unit output buf. <- Reg)
380	// - SET_OUTPUT_COMPLETE
381	//
382	// *****************************************************************
383	reg op_mv_r_mem = 0, op_mv_r_mem2x = 0, op_mv_r_mem_lower = 0;
384
385	always @(posedge CLK)
386		if (STAGE_RD1) begin
387			op_mv_r_mem <= `OP_TYPE_MV_R_MEM(op_code);
388`ifdef CPU_MV_R_MEM_2X
389			op_mv_r_mem2x <= op_code == `OP_CODE_MV_R_MEM_2X;
390`endif
391			op_mv_r_mem_lower <= op_code == `OP_CODE_MV_R_MEM_L;
392			op_mv_uob_r <= op_code == `OP_CODE_MV_UOB_R;
393			op_set_output_complete <= op_code == `OP_CODE_SET_OUTPUT_COMPLETE;
394		end
395
396	always @(posedge CLK) //!
397		if (STAGE_EXEC & op_mv_r_mem) begin
398			mem_rd_request <= 1;
399			mem_rd_addr <= { thread_num, field_c1 [`MEM_ADDR_MSB :0] };
400`ifdef CPU_MV_R_MEM_2X
401			mem_wr_2x <= op_mv_r_mem2x;
402`endif
403			mem_wr_lower <= op_mv_r_mem_lower;
404		end
405		else if (mem_rd_valid)
406			mem_rd_request <= 0; // data on mem_r; mem_rd_valid_r asserts
407
408
409	// UOB (unit's output buffer). (-)Output takes 2 cycles.
410	// uob_wr_en asserts only on the 1st cycle.
411	reg op_mv_uob_r = 0, op_set_output_complete = 0;
412	reg [N_THREADS_MSB :0] uob_thread_num;
413	//reg mv_uob_r_cycle2 = 0;
414	reg uob_thread_num_eq_thread_num = 0;
415
416	always @(posedge CLK) begin
417		if (thread_almost_switched)
418			uob_thread_num_eq_thread_num <= uob_thread_num == thread_num;
419		if (uob_eqn) begin
420			if (uob_ready)
421				uob_thread_num <= thread_num;
422			//mv_uob_r_cycle2 <= 1;
423			uob_thread_num_eq_thread_num <= 1;
424		end
425		//else
426		//	mv_uob_r_cycle2 <= 0;
427	end
428
429	assign uob_dout = reg_dout;
430	assign uob_wr_addr = field_c1 [`UOB_ADDR_MSB :0];
431	assign uob_wr_en = uob_eqn;
432
433	assign uob_eqn = STAGE_EXEC & op_mv_uob_r
434		& (uob_ready | uob_thread_num_eq_thread_num & ~uob_full);
435
436
437	assign uob_set_input_complete = STAGE_EXEC & op_set_output_complete;
438
439
440	// *****************************************************************
441	//
442	// Integrated (Cryptographic core) Operations - STAGE_RD1
443	//
444	// *****************************************************************
445	(* SHREG_EXTRACT="no" *)
446	reg op_init_ctx = 0, op_init_new = 0, op_procb = 0, op_procb_r = 0;
447	reg op_procb_flags = 0;
448
449	always @(posedge CLK)
450		if (STAGE_RD1) begin
451			op_init_ctx <= `OP_TYPE_INIT_CTX(op_code);
452			//op_init_new <= op_code[0];//op_code == `OP_CODE_NEW_CTX;
453
454			op_procb <= `OP_TYPE_PROCB(op_code);
455			op_procb_r <= `OP_TYPE_PROCB_R(op_code);
456			op_procb_flags <= op_code[1];
457		end
458
459
460	reg [`PROCB_A_WIDTH-1 :0] procb_wr_cnt_r;
461	always @(posedge CLK) begin
462		comp_procb_wr_thread_num <= thread_num;
463		if (thread_almost_switched)
464			procb_wr_cnt_r <= procb_wr_cnt;
465		else if (procb_eqn)
466			procb_wr_cnt_r <= procb_wr_cnt_r + 1'b1;
467	end
468	assign procb_full = procb_wr_cnt_r == `PROCB_N_RECORDS;
469	assign procb_afull = procb_wr_cnt_r == `PROCB_N_RECORDS - 1;
470
471
472	// *****************************************************************
473	//
474	// Integrated (Cryptographic core) Operations - STAGE_EXEC
475	//
476	// *****************************************************************
477	// thread_state disables execution (applicable at STAGE_EXEC)
478	wire ts_disable_exec = (exec_opt1 & `EXEC_OPT_TS_WR_RDY) != 0 & ts_changed;
479
480	// procb_eqn: writes procb_wr_en if thread_state allows
481	assign procb_eqn = op_procb & condition_is_true & ~ts_disable_exec;// & ~procb_full;
482
483	always @(posedge CLK) begin
484		comp_wr_en <= STAGE_EXEC & op_init_ctx & ~ts_disable_exec;
485		if (op_init_ctx)
486			comp_dout <= { 1'b1, field_b1, field_c1[2:0] };
487
488		procb_wr_en <= STAGE_EXEC & procb_eqn;
489		if (op_procb)
490			//procb_dout <= { field_b1, reg_dout[7:0], op_procb_flags };
491			// Allow constant 'cnt' (length)
492			procb_dout <= { field_b1, (op_procb_r
493				? reg_dout[`PROCB_CNT_MSB:0] : field_c1[`PROCB_CNT_MSB:0]),
494				op_procb_flags };
495	end
496
497
498
499	// *****************************************************************
500
501	wire JUMP_eqn = STAGE_EXEC & (iop_jmp & condition_is_true);
502
503	wire NEXT_THREAD_eqn = STAGE_EXEC & (1'b0
504		| iop_halt
505		| ts_disable_exec & condition_is_true
506
507		// Switch to the next thread when:
508		// - Successful PROCESS_BYTES with fin/stop or procb_buf full
509		//| procb_eqn & (op_procb_flags != 0 | procb_afull)
510
511		// - move to UOB, UOB is full or used by other thread
512		| op_mv_uob_r & ~(uob_ready | uob_thread_num_eq_thread_num & ~uob_full)
513		//
514		//| op_set_output_complete
515		//
516		// - JUMP_eqn forces NEXT_THREAD
517	);
518
519
520	// Invalidate loaded instructions, start loading from the beginning
521	// when:
522	// - Successful jump is performed
523	// - Thread is switched
524	assign INVALIDATE_eqn = NEXT_THREAD_eqn | JUMP_eqn;
525
526	// Oops. On INSTR_WAIT, it doesn't preserve reg_dout, field_[b|c]1 etc.
527	assign INSTR_WAIT_eqn = STAGE_EXEC & (1'b0
528		| op_mv_r_mem
529		//| uob_eqn
530	);
531
532	wire INSTR_CONTINUE_eqn = 1'b0
533		| mem_rd_complete
534		//| mv_uob_r_cycle2
535	;
536
537
538	// *****************************************************************
539
540	always @(posedge CLK) begin
541
542		// TODO: improve condition?
543		if (INSTR_CONTINUE_eqn)
544			INSTR_WAIT <= 0;
545		else if (INSTR_WAIT_eqn)
546			INSTR_WAIT <= 1;
547
548
549		NEXT_THREAD <= NEXT_THREAD_eqn | JUMP_eqn;
550
551		JUMP <= JUMP_eqn;
552
553		EXECUTED <= STAGE_EXEC & (1'b0
554			// Instruction typically executed when:
555			// - No Invalidate condition, no Wait condition
556			| (~INVALIDATE_eqn & ~INSTR_WAIT_eqn)
557			// Exceptions:
558			// - execution disabled because of wrong thread_state
559				& ~(ts_disable_exec & condition_is_true)
560			//| procb_eqn & (op_procb_flags != 0 | procb_afull)
561
562		) | (1'b0
563			// - It continues after Wait condition
564			| (INSTR_WAIT & INSTR_CONTINUE_eqn)
565		);
566
567
568		if (ts_wr_en)
569			ts_wr_en <= 0;
570		else if (thread_state_change)
571			ts_wr_en <= 1;
572
573		ts_wr <=
574			op_set_output_complete ? `THREAD_STATE_NONE :
575			`THREAD_STATE_RD_RDY
576		;
577
578	end
579
580	assign thread_state_change = STAGE_EXEC & (1'b0
581		// Successful PROCESS_BYTES with fin/stop or procb_buf becoming full
582		| procb_eqn & (op_procb_flags != 0 | procb_afull)
583		// Sending UOB content for output
584		| op_set_output_complete
585	);
586
587
588`ifdef SIMULATION
589	reg [23:0] X_THREAD_SWITCHES = 0;
590	reg [23:0] X_JUMPS = 0;
591
592	always @(posedge CLK) begin
593		if (NEXT_THREAD)
594			X_THREAD_SWITCHES <= X_THREAD_SWITCHES + 1'b1;
595		if (JUMP)
596			X_JUMPS <= X_JUMPS + 1'b1;
597	end
598`endif
599
600endmodule
601