1/*
2 * This software is Copyright (c) 2018-2019 Denis Burykin
3 * [denis_burykin yahoo com], [denis-burykin2014 yandex ru]
4 * and it is hereby released to the general public under the following terms:
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted.
7 *
8 */
9
10// Include Verilog code for "blackbox" modules
11// (ISE Project Navigator requires restart after switching this)
12//`define	SIMULATION
13
14
15// ===== Algorithm constants and operations =====
16//
17`define	CYCLIC(w,s)	((w >> s) | (w << (64 - s)))
18
19`define	SWAP(x)	(((x) << 56)	\
20   | (((x) & 16'hff00) << 40)	\
21   | (((x) & 24'hff0000) << 24)	\
22   | (((x) & 32'hff000000) << 8)	\
23   | (((x) >> 8) & 32'hff000000)	\
24   | (((x) >> 24) & 24'hff0000)	\
25   | (((x) >> 40) & 16'hff00)	\
26   | ((x) >> 56))
27
28`define	SHA512_IV	{ 128'h_5be0cd19137e2179_1f83d9abfb41bd6b, \
29	192'h_9b05688c2b3e6c1f_510e527fade682d1_a54ff53a5f1d36f1, \
30	192'h_3c6ef372fe94f82b_bb67ae8584caa73b_6a09e667f3bcc908 }
31
32
33// =====================================================
34//
35`define	N_CORES		4
36`define	N_THREADS	16
37
38
39// ===== Block processing options (transferred to cores) =====
40//
41`define	BLK_OP_MSB	1
42// 1 - new context, 0 - load context
43`define	BLK_OP_IF_NEW_CTX(r)		r[0]
44// 1) Output computed result;
45// 2) Used to set thread state.
46`define	BLK_OP_END_COMP_OUTPUT(r)	r[1]
47
48
49// ===== engine (services several cores) =====
50//
51`define	MEM_WIDTH	64
52// "Main" memory (per thread; in MEM_WIDTH-bit words)
53`define	MEM_ADDR_MSB	4
54// "Main" memory (per engine) [0: 2**(`MEM_TOTAL_MSB+1)-1]
55`define	MEM_TOTAL_MSB	(`MEM_ADDR_MSB + 4)
56
57// process_bytes (in bytes)
58// max.key_len=64 (comp.len <8k)
59`define	PROCB_CNT_MSB		6
60`define	PROCB_TOTAL_MSB	12
61
62// unit's I/O
63`define	UNIT_INPUT_WIDTH		8
64`define	UNIT_OUTPUT_WIDTH		2
65// Address in Unit's output buffer (UOB) memory in 16-bit words
66`define	UOB_ADDR_MSB	5
67
68
69// ===== computation state (per thread) =====
70//
71`define	THREAD_STATE_MSB		1
72
73`define	THREAD_STATE_NONE		2'd0
74`define	THREAD_STATE_WR_RDY	2'd1
75`define	THREAD_STATE_RD_RDY	2'd2
76`define	THREAD_STATE_BUSY		2'd3
77
78
79// ===== comp_buf, procb_buf, saved_procb_state =====
80//
81`define	COMP_DATA1_MSB		0
82`define	COMP_DATA2_MSB		(`MEM_ADDR_MSB+1 + 4)-1
83
84// address width for procb records (per thread)
85`define	PROCB_N_RECORDS	4
86`define	PROCB_A_WIDTH		3
87// width of each procb record
88`define	PROCB_D_WIDTH		(`MEM_ADDR_MSB+1 + `PROCB_CNT_MSB+1 + 1)
89
90`define	PROCB_SAVE_WIDTH		(4 + `MEM_ADDR_MSB+3+1 + `PROCB_CNT_MSB+1 \
91	+ `PROCB_TOTAL_MSB+1 + 4)
92
93
94// ===== CPU =====
95//
96`define	CPU_WIDTH		16
97`define	MEM_CPU_RATIO	(`MEM_WIDTH / `CPU_WIDTH)
98`define	N_STAGES			4
99// 16 registers
100`define	REG_ADDR_MSB	3
101// Program entry points
102`define	ENTRY_PTS_EN
103`define	ENTRY_PT_MSB	0
104//
105// Allow ADDC/SUBB instructions (slow; 210-220 max.)
106//`define	INSTR_SUBB_EN
107//
108// Each instruction consists of:
109`define	OP_CODE_LEN		5
110`define	OP_CODE			5'd
111// Field A contains exclusively register to read from
112`define	FIELD_A_LEN		(`REG_ADDR_MSB+1)
113`define	FIELD_A			4'd
114// Field B contains register for write, memory address or other data
115`define	FIELD_B_LEN		(`MEM_ADDR_MSB+1)
116// OMG - adjust this if MEM_ADDR_MSB changes
117`define	FIELD_B			5'd
118// Field C typically contains a constant.
119`define	FIELD_C_LEN		8
120`define	FIELD_C			8'd
121// Conditions determine the result of instruction execution.
122`define	CONDITION_LEN	4
123// Instruction execution options.
124`define	EXEC_OPT_LEN	1
125
126`define	PARTIAL_INSTR_LEN	(`CONDITION_LEN \
127	+ `FIELD_B_LEN + `FIELD_C_LEN + `OP_CODE_LEN)
128`define	INSTR_LEN	(`FIELD_A_LEN + `EXEC_OPT_LEN + `PARTIAL_INSTR_LEN)
129
130
131// Instruction Address length: must fit into field_c
132`define	IADDR_LEN		8
133
134// *** Instruction execution options ***
135`define	EXEC_OPT_NONE			1'b0
136// EXEC_OPT_TS_WR_RDY - requires thread_state to be WR_RDY
137`define	EXEC_OPT_TS_WR_RDY	1'b1
138
139// *** CPU flags ***
140`define	N_FLAGS		4
141`define	FLAG_ZERO(r)	r[0]
142`define	FLAG_ONE(r)		r[1]
143`define	FLAG_CARRY(r)	r[2]
144`define	FLAG_USER(r)	r[3]
145
146// *** Conditions ***
147`define	IF_NONE		4'b0000
148
149`define	IF_ONE			4'b0010
150`define	IF_NOT_ONE		4'b0011
151`define	IF_ZERO			4'b0100
152`define	IF_NOT_ZERO		4'b0101
153`define	IF_CARRY			4'b0110
154`define	IF_NOT_CARRY	4'b0111
155`define	IF_UF				4'b1000
156`define	IF_NOT_UF		4'b1001
157//`define	IF
158
159`define	CONDITION `IF_NONE
160`define	IF(cond) \
161`undef	CONDITION \
162`define	CONDITION cond
163
164// *** Operation codes ***
165`define	OP_CODE_PROCB_C		`OP_CODE 8
166`define	OP_CODE_PROCB_C_FIN	`OP_CODE 10
167`define	OP_CODE_PROCB_C_STOP	`OP_CODE 9
168`define	OP_CODE_PROCB_R		`OP_CODE 12
169`define	OP_CODE_PROCB_R_FIN	`OP_CODE 14
170`define	OP_CODE_PROCB_R_STOP	`OP_CODE 13
171
172`define	OP_TYPE_PROCB(c)		(c >= 8 & c <= 15)
173`define	OP_TYPE_PROCB_R(c)	(c >= 12 & c <= 15)
174
175`define	OP_CODE_NEW_CTX		`OP_CODE 7
176`define	OP_CODE_LOAD_CTX		`OP_CODE 6
177
178`define	OP_TYPE_INIT_CTX(c)	(c == 6 | c == 7)
179
180`define	OP_CODE_NOP			`OP_CODE 0
181`define	OP_CODE_HALT		`OP_CODE 1
182
183`define	OP_CODE_ADD_R_C	`OP_CODE 16
184`define	OP_CODE_ADDC_R_C	`OP_CODE 17
185`define	OP_CODE_SUB_R_C	`OP_CODE 18
186`define	OP_CODE_SUBB_R_C	`OP_CODE 19
187`define	OP_CODE_INC_RST	`OP_CODE 20
188`define	OP_CODE_MV_R_C		`OP_CODE 21
189`define	OP_CODE_SHR1		`OP_CODE 22
190`define	OP_CODE_MV_R_R		`OP_CODE 23
191`define	OP_CODE_AND			`OP_CODE 24
192
193// op:a[1:0](1-set, 2-reset, 3-invert) mask:b
194// currently applicable to UF only
195`define	OP_CODE_FLAG		`OP_CODE 25
196
197`define	OP_CODE_MV_UOB_R	`OP_CODE 26
198`define	OP_CODE_SET_OUTPUT_COMPLETE	`OP_CODE 27
199
200`define	OP_CODE_MV_R_MEM_X	`OP_CODE 30
201
202`ifndef	CPU_MEM_4X
203`define	OP_CODE_MV_R_MEM_U	`OP_CODE 28
204`define	OP_CODE_MV_R_MEM_L	`OP_CODE 29
205//`define	OP_CODE_MV_R_MEM_2X	`OP_CODE 30
206`define	OP_TYPE_MV_R_MEM(c)	(c == 28 | c == 29 | c == 30)
207`else
208`define	OP_CODE_MV_R_MEM0	`OP_CODE 2
209`define	OP_CODE_MV_R_MEM1	`OP_CODE 3
210`define	OP_CODE_MV_R_MEM2	`OP_CODE 4
211`define	OP_CODE_MV_R_MEM3	`OP_CODE 5
212`define	OP_TYPE_MV_R_MEM(c)	(c >= 2 & c <= 5)
213`endif
214
215`define	OP_CODE_JMP			`OP_CODE 31
216
217//`define	OP_CODE_ILLEGAL(c) ( ~( \
218//	c ==
219
220`define	OP_TYPE_SETS_ZF(c) ( \
221	c == `OP_CODE_SUB_R_C | c == `OP_CODE_INC_RST | c == `OP_CODE_SHR1)
222
223`define	OP_TYPE_SETS_OF(c) ( \
224	c == `OP_CODE_SHR1)
225
226`define	OP_TYPE_SETS_CF(c) ( \
227	c == `OP_CODE_ADD_R_C | c == `OP_CODE_SUB_R_C \
228	| c == `OP_CODE_ADDC_R_C | c == `OP_CODE_SUBB_R_C)
229
230`define	OP_TYPE_USE_CF(c) ( \
231	c == `OP_CODE_ADDC_R_C | c == `OP_CODE_SUBB_R_C)
232
233
234// This op. potentially writes into a register
235// (write might not be performed because of conditions).
236// MV_R_MEM_* processed independently.
237`define	OP_TYPE_WRITE_REG(c) ( \
238	c == `OP_CODE_ADD_R_C | c == `OP_CODE_SUB_R_C \
239	| c == `OP_CODE_ADDC_R_C | c == `OP_CODE_SUBB_R_C \
240	| c == `OP_CODE_INC_RST \
241	| c == `OP_CODE_MV_R_C | c == `OP_CODE_SHR1 | c == `OP_CODE_MV_R_R \
242	| c == `OP_CODE_AND)
243
244// Reads a register
245`define	OP_TYPE_USE_REG(c) ( \
246	`OP_TYPE_WRITE_REG(c) & c != `OP_CODE_MV_R_C \
247	| c == `OP_CODE_PROCB_R | c == `OP_CODE_PROCB_R_FIN \
248	| c == `OP_CODE_MV_UOB_R)
249
250// read register only from field_a (timing issue)
251
252// These ops check conditions (hardcoded)
253//`define	OP_TYPE_CHECK_CONDITION(c) ( \
254//	`OP_TYPE_WRITE_REG(c) | `OP_TYPE_PROCB(c) | c == `OP_CODE_JMP)
255
256
257// ===== Instructions =====
258//
259// *** Instructions - SHA256 subsystem ***
260`define	NEW_CTX(save_addr,save_len) {`FIELD_A 0, `EXEC_OPT_TS_WR_RDY, \
261	`IF_NONE, `FIELD_B save_addr, `FIELD_C save_len, `OP_CODE_NEW_CTX}
262
263`define	PROCESS_BYTES_C(addr,cnt) \
264	{`FIELD_A 0, `EXEC_OPT_TS_WR_RDY, \
265	`CONDITION, `FIELD_B addr, `FIELD_C cnt, `OP_CODE_PROCB_C}
266`define	PROCESS_BYTES_C_FINISH_CTX(addr,cnt) \
267	{`FIELD_A 0, `EXEC_OPT_TS_WR_RDY, \
268	`CONDITION, `FIELD_B addr, `FIELD_C cnt, `OP_CODE_PROCB_C_FIN}
269
270`define	PROCESS_BYTES_R(addr,r) \
271	{`FIELD_A r, `EXEC_OPT_TS_WR_RDY, \
272	`CONDITION, `FIELD_B addr, `FIELD_C 0, `OP_CODE_PROCB_R}
273`define	PROCESS_BYTES_R_FINISH_CTX(addr,r) \
274	{`FIELD_A r, `EXEC_OPT_TS_WR_RDY, \
275	`CONDITION, `FIELD_B addr, `FIELD_C 0, `OP_CODE_PROCB_R_FIN}
276
277`define	FINISH_CTX	`PROCESS_BYTES_C_FINISH_CTX(0,0)
278
279
280// *** Instructions - integer ***
281`define	ADD_R_C(r,const) {`FIELD_A r, `EXEC_OPT_NONE, \
282	`CONDITION, `FIELD_B r, `FIELD_C const, `OP_CODE_ADD_R_C}
283`define	SUB_R_C(dst,src,const) {`FIELD_A src, `EXEC_OPT_NONE, \
284	`CONDITION, `FIELD_B dst, `FIELD_C const, `OP_CODE_SUB_R_C}
285`define	INC_RST(r,const) {`FIELD_A r, `EXEC_OPT_NONE, \
286	`CONDITION, `FIELD_B r, `FIELD_C const, `OP_CODE_INC_RST}
287`define	MV_R_C(r,const) {`FIELD_A r, `EXEC_OPT_NONE, \
288	`CONDITION, `FIELD_B r, `FIELD_C const, `OP_CODE_MV_R_C}
289`define	SHR1(r) {`FIELD_A r, `EXEC_OPT_NONE, \
290	`CONDITION, `FIELD_B r, `FIELD_C 0, `OP_CODE_SHR1}
291// We can read from one register and store into other one
292`define	MV_R_R(dst,src) {`FIELD_A src, `EXEC_OPT_NONE, \
293	`CONDITION, `FIELD_B dst, `FIELD_C 0, `OP_CODE_MV_R_R}
294`define	AND_R_C(dst,src,const) {`FIELD_A src, `EXEC_OPT_NONE, \
295	`CONDITION, `FIELD_B dst, `FIELD_C const, `OP_CODE_AND}
296
297`ifdef	INSTR_SUBB_EN
298`define	ADDC_R_C(r,const) {`FIELD_A r, `EXEC_OPT_NONE, \
299	`CONDITION, `FIELD_B r, `FIELD_C const, `OP_CODE_ADDC_R_C}
300`define	SUBB_R_C(dst,src,const) {`FIELD_A src, `EXEC_OPT_NONE, \
301	`CONDITION, `FIELD_B dst, `FIELD_C const, `OP_CODE_SUBB_R_C}
302`endif
303
304// *** Instructions - I/O ***
305`define	MV_R_MEM_X(base_r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \
306	`IF_NONE, `FIELD_B base_r, `FIELD_C addr, `OP_CODE_MV_R_MEM_X}
307
308`ifndef	CPU_MEM_4X
309//`define	MV_R_MEM_2X(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \
310//	`IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM_2X}
311`define	MV_R_MEM_L(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \
312	`IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM_L}
313`define	MV_R_MEM_U(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \
314	`IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM_U}
315`else
316`define	MV_R_MEM0(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \
317	`IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM0}
318`define	MV_R_MEM1(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \
319	`IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM1}
320`define	MV_R_MEM2(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \
321	`IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM2}
322`define	MV_R_MEM3(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \
323	`IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM3}
324`endif
325
326`define	MV_UOB_R(uob_addr,r) {`FIELD_A r, `EXEC_OPT_TS_WR_RDY, \
327	`IF_NONE, `FIELD_B 0, `FIELD_C uob_addr, `OP_CODE_MV_UOB_R}
328`define	SET_OUTPUT_COMPLETE {`FIELD_A 0, `EXEC_OPT_TS_WR_RDY, \
329	`IF_NONE, `FIELD_B 0, `FIELD_C 0, `OP_CODE_SET_OUTPUT_COMPLETE}
330
331
332// *** Instructions - execution control ***
333`define	NOP	{`FIELD_A 0, `EXEC_OPT_NONE, \
334	`IF_NONE, `FIELD_B 0, `FIELD_C 0, `OP_CODE_NOP}
335`define	HALT	{`FIELD_A 0, `EXEC_OPT_NONE, \
336	`IF_NONE, `FIELD_B 0, `FIELD_C 0, `OP_CODE_HALT}
337
338`define	JMP(addr) {`FIELD_A 0, `EXEC_OPT_NONE, \
339	`CONDITION, `FIELD_B 0, `FIELD_C addr, `OP_CODE_JMP}
340
341`define	SET_UF {`FIELD_A 0, `EXEC_OPT_NONE, \
342	`CONDITION, `FIELD_B 1, `FIELD_C 0, `OP_CODE_FLAG}
343`define	RST_UF {`FIELD_A 0, `EXEC_OPT_NONE, \
344	`CONDITION, `FIELD_B 2, `FIELD_C 0, `OP_CODE_FLAG}
345`define	INV_UF {`FIELD_A 0, `EXEC_OPT_NONE, \
346	`CONDITION, `FIELD_B 3, `FIELD_C 0, `OP_CODE_FLAG}
347
348
349// *** Registers - BRAM ***
350`define	R0		0
351`define	R1		1
352`define	R2		2
353`define	R3		3
354`define	R4		4
355`define	R5		5
356`define	R6		6
357`define	R7		7
358`define	R8		8
359`define	R9		9
360`define	R10	10
361`define	R11	11
362`define	R12	12
363`define	R13	13
364`define	R14	14
365`define	R15	15
366
367