1`timescale 1ns / 1ps
2/*
3 * This software is Copyright (c) 2016,2019 Denis Burykin
4 * [denis_burykin yahoo com], [denis-burykin2014 yandex ru]
5 * and it is hereby released to the general public under the following terms:
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted.
8 *
9 */
10`include "bcrypt.vh"
11
12module bcrypt_arbiter #(
13	parameter NUM_CORES = -1
14	)(
15	input CLK,
16	input mode_cmp,
17
18	// Packages of data from bcrypt_data for cores
19	input [7:0] din,
20	input [1:0] ctrl,
21
22	// Control exchange with bcrypt_data
23	input data_ready, init_ready,
24	output reg start_data_tx = 0, start_init_tx = 0,
25	input [15:0] bcdata_pkt_id,
26	input bcdata_gen_end,
27
28	// Comparator
29	output reg [31:0] cmp_data,
30	output reg cmp_start = 0,
31	input cmp_found, cmp_finished,
32	input [`HASH_NUM_MSB:0] cmp_hash_num,
33
34	// Output using memory 16x16
35	output reg [`OUTPKT_TYPE_MSB:0] outpkt_type,
36	output [15:0] dout,
37	input [3:0] rd_addr,
38
39	output reg [15:0] pkt_id,
40	output reg [31:0] num_processed = 0,
41	output reg [`HASH_NUM_MSB:0] hash_num,
42	input rd_en,
43	output reg empty = 1,
44	output [3:0] error,
45	output idle,
46
47	// Cores are moved to top-level module.
48	output reg [7:0] core_din,
49	output reg [1:0] core_ctrl,
50	output reg [NUM_CORES-1:0] core_wr_en = 0,
51	input [NUM_CORES-1:0] core_init_ready_in, core_crypt_ready_in,
52	output reg [NUM_CORES-1:0] core_rd_en = 0,
53	input [NUM_CORES-1:0] core_empty_in,
54	input [NUM_CORES-1:0] core_dout_in
55	);
56
57	genvar i;
58
59
60	//
61	// *******************************************************
62	//
63	reg err_core_output = 0;
64
65	assign error = {
66		1'b0, 1'b0, 1'b0, err_core_output
67	};
68
69
70	//
71	// *******************************************************
72	//
73	reg [NUM_CORES-1:0] core_init_ready = 0, core_crypt_ready = 0;
74	reg [NUM_CORES-1:0] core_empty = {NUM_CORES{1'b1}}, core_dout = 0;
75
76	always @(posedge CLK) begin
77		core_din <= din;
78		core_ctrl <= ctrl;
79		core_init_ready <= core_init_ready_in;
80		core_crypt_ready <= core_crypt_ready_in;
81		core_empty <= core_empty_in;
82		core_dout <= core_dout_in;
83	end
84
85	reg some_cores_init_ready = 0;
86	always @(posedge CLK)
87		some_cores_init_ready <= |core_init_ready;
88
89	//
90	// Count candidates in packets
91	// Unlike in descrypt, doing the easiest thing:
92	// candidates from only 1 packet can be "in flight"
93	//
94	reg [31:0] num_processed_in = 0;
95
96	reg inpkt_done = 0; // All cand's from the packet were sent to cores
97	reg outpkt_done = 0; // All cand's processed
98
99
100	// *************************************************************
101	//
102	// Write data to cores
103	//
104	// *************************************************************
105	reg [`MSB(NUM_CORES-1):0] wr_core_num = 0;
106
107	reg [31:0] delay_shr = 0;
108
109	localparam STATE_WR_IDLE = 0,
110				STATE_WR_CHECK_CORE_INIT_READY = 1,
111				STATE_WR_INIT_TX_START = 2,
112				STATE_WR_CHECK_CORE_DATA_READY = 3,
113				STATE_WR_DATA_TX_START = 4,
114				STATE_WR_TX = 5,
115				STATE_WR_TX_END = 6,
116				STATE_WR_WAIT_PKT = 7,
117				STATE_WR_WAIT1 = 8;
118
119	(* FSM_EXTRACT="true" *)
120	reg [3:0] state_wr = STATE_WR_IDLE;
121
122	always @(posedge CLK) begin
123		if (state_wr == STATE_WR_IDLE | delay_shr[31])
124			delay_shr <= { delay_shr[30:0], state_wr == STATE_WR_IDLE };
125
126		case (state_wr)
127		STATE_WR_IDLE: if (delay_shr[31])
128			state_wr <= STATE_WR_CHECK_CORE_INIT_READY;
129
130		// Broadcast initialization of cores with P, MW, S data.
131		STATE_WR_CHECK_CORE_INIT_READY: if (some_cores_init_ready) begin
132			start_init_tx <= 1;
133			state_wr <= STATE_WR_INIT_TX_START;
134		end
135		else
136			state_wr <= STATE_WR_CHECK_CORE_DATA_READY;
137
138		STATE_WR_INIT_TX_START: begin
139			start_init_tx <= 0;
140			if (ctrl == `CTRL_INIT_START) begin
141				core_wr_en <= core_init_ready;
142				state_wr <= STATE_WR_TX;
143			end
144		end
145
146		STATE_WR_CHECK_CORE_DATA_READY: begin
147			// It goes the last "dummy" candidate in the packet.
148			if (data_ready & bcdata_gen_end) begin
149				inpkt_done <= 1;
150				// Set flag for bcdata; actually transmit doesn't start
151				start_data_tx <= 1;
152				// ~mode_cmp: don't do accounting, skip last "dummy" candidate
153				if (mode_cmp)
154					state_wr <= STATE_WR_WAIT_PKT;
155				else
156					state_wr <= STATE_WR_WAIT1;
157			end
158
159			else if (data_ready & core_crypt_ready[wr_core_num]) begin
160
161				num_processed_in <= num_processed_in + 1'b1;
162
163				// Data for cores over {din, ctrl} is going
164				// to appear in a few cycles
165				start_data_tx <= 1;
166				state_wr <= STATE_WR_DATA_TX_START;
167			end
168
169			else begin
170				if (wr_core_num == NUM_CORES-1) begin
171					wr_core_num <= 0;
172					if (data_ready_timeout)
173						state_wr <= STATE_WR_CHECK_CORE_INIT_READY;
174				end
175				else
176					wr_core_num <= wr_core_num + 1'b1;
177			end
178		end
179
180		STATE_WR_DATA_TX_START: begin
181			start_data_tx <= 0;
182			if (ctrl == `CTRL_DATA_START) begin
183				core_wr_en[wr_core_num] <= 1;
184				state_wr <= STATE_WR_TX;
185			end
186		end
187
188		STATE_WR_TX: if (ctrl == `CTRL_END)
189			state_wr <= STATE_WR_TX_END;
190
191		STATE_WR_TX_END: begin
192			// wr_en deasserts after the last data byte
193			core_wr_en <= 0;
194			state_wr <= STATE_WR_CHECK_CORE_DATA_READY;
195		end
196
197		// Wait until data from previous packet is processed.
198		STATE_WR_WAIT_PKT: begin
199			start_data_tx <= 0;
200			if (outpkt_done) begin
201				inpkt_done <= 0;
202				num_processed_in <= 0;
203				state_wr <= STATE_WR_CHECK_CORE_INIT_READY;
204			end
205		end
206
207		STATE_WR_WAIT1: begin
208			start_data_tx <= 0;
209			state_wr <= STATE_WR_CHECK_CORE_DATA_READY;
210		end
211		endcase
212	end
213
214	delay #(.NBITS(9)) data_ready_timeout_inst (.CLK(CLK),
215			.in(state_wr == STATE_WR_CHECK_CORE_DATA_READY),
216			.out(data_ready_timeout) );
217
218
219	// Count number of keys currently in processing
220	localparam TOTAL_IN_PROCESSING = NUM_CORES + 3;
221
222	reg recv_item = 0;
223	reg [`MSB(TOTAL_IN_PROCESSING-1) :0] total_in_processing = 0;
224	always @(posedge CLK)
225		if (start_data_tx & ~bcdata_gen_end) begin
226			if (~recv_item)
227				total_in_processing <= total_in_processing + 1'b1;
228		end
229		else if (recv_item)
230			total_in_processing <= total_in_processing - 1'b1;
231
232	// idle: no keys in flight, no data/init transfer in ~1K cycles
233	delay #(.INIT(1), .NBITS(10)) delay_idle_inst (.CLK(CLK),
234		.in(total_in_processing == 0 & (1'b0
235			| state_wr == STATE_WR_IDLE
236			| state_wr == STATE_WR_CHECK_CORE_INIT_READY
237			| state_wr == STATE_WR_CHECK_CORE_DATA_READY
238		)),
239		.out(idle) );
240
241
242	// *************************************************************
243	//
244	// Read from cores (over 1-bit bus)
245	//
246	// *************************************************************
247	(* RAM_STYLE="DISTRIBUTED" *)
248	reg [15:0] output_r [15:0];
249	assign dout = output_r [rd_addr];
250
251	reg [`MSB(NUM_CORES-1):0] rd_core_num = 0;
252	reg [3:0] rd_count = 0;
253	reg [15:0] rd_tmp = 0;
254	reg [3:0] result_word_count = 0;
255	reg cmp_result;
256
257	localparam UNIT_OUTPUT_WIDTH = 1;
258	localparam PKT_NUM_WORDS = 16;
259
260	reg core_dout_r = 0;
261	always @(posedge CLK)
262		core_dout_r <= core_dout[rd_core_num];
263
264	reg rd_tmp_wr_en = 0;
265	reg [`MSB(PKT_NUM_WORDS-1):0] rd_tmp_wr_addr = 0;
266	always @(posedge CLK)
267		if (rd_tmp_wr_en)
268			output_r [rd_tmp_wr_addr] <= rd_tmp;
269
270	reg pkt_id_wr_en = 0, cmp0_wr_en = 0, cmp1_wr_en = 0;
271	always @(posedge CLK) begin
272		if (pkt_id_wr_en)
273			pkt_id <= rd_tmp;
274		if (cmp0_wr_en)
275			cmp_data[15:0] <= rd_tmp;
276		if (cmp1_wr_en)
277			cmp_data[31:16] <= rd_tmp;
278	end
279
280	reg [31:0] delay_shr2 = 0;
281
282	localparam STATE_RD_IDLE = 0,
283				STATE_RD_CHECK_NOT_EMPTY = 1,
284				STATE_RD_HEADER = 2,
285				STATE_RD_DATA = 3,
286				STATE_RD_CMP = 4,
287				STATE_RD_READ_COMPLETE = 5,
288				STATE_RD_OUTPKT_RESULT = 6,
289				STATE_RD_ACCOUNT = 7,
290				STATE_RD_ACCOUNT2 = 8,
291				STATE_RD_OUTPKT_PROCESSING_DONE = 9,
292				STATE_RD_CLEANUP = 10,
293				STATE_RD_ERROR = 11;
294
295	(* FSM_EXTRACT="true" *)
296	reg [3:0] state_rd = STATE_RD_IDLE;
297
298	always @(posedge CLK) begin
299		if (recv_item)
300			recv_item <= 0;
301
302		if (rd_tmp_wr_en)
303			rd_tmp_wr_en <= 0;
304
305		if (pkt_id_wr_en)
306			pkt_id_wr_en <= 0;
307		if (cmp0_wr_en)
308			cmp0_wr_en <= 0;
309		if (cmp1_wr_en)
310			cmp1_wr_en <= 0;
311		if (cmp_start)
312			cmp_start <= 0;
313
314		if (state_rd == STATE_RD_IDLE | delay_shr2[31])
315			delay_shr2 <= { delay_shr2[30:0], state_rd == STATE_RD_IDLE };
316
317		case(state_rd)
318		STATE_RD_IDLE: if (delay_shr2[31])
319			state_rd <= STATE_RD_CHECK_NOT_EMPTY;
320
321		STATE_RD_CHECK_NOT_EMPTY: begin
322			if (~core_empty[rd_core_num]) begin
323				core_rd_en[rd_core_num] <= 1;
324				state_rd <= STATE_RD_HEADER;
325			end
326			else
327				rd_core_num <= rd_core_num == NUM_CORES-1
328						? {`MSB(NUM_CORES-1)+1{1'b0}} : rd_core_num + 1'b1;
329		end
330
331		// =======================================================
332		// Output content:
333		// - header (1 bit == 1'b1)
334		// - 2x 32-bit IDs. IDs are sent along with encryption data
335		//   for accounting purposes.
336		// - 6x 32-bit Blowfish encryption result.
337		// =======================================================
338		STATE_RD_HEADER: begin
339			// It requires to assert rd_en for 1 cycle.
340			core_rd_en[rd_core_num] <= 0;
341
342			result_word_count <= 0;
343
344			rd_count <= 0;
345			// header (1 bit == 1'b1)
346			if (core_dout_r) begin
347				recv_item <= 1;
348				state_rd <= STATE_RD_DATA;
349			end
350		end
351
352		// Collect PKT_NUM_WORDS words X 16 bit in output_r
353		STATE_RD_DATA: begin
354			rd_tmp [rd_count * UNIT_OUTPUT_WIDTH +:UNIT_OUTPUT_WIDTH]
355				<= core_dout_r;
356			rd_count <= rd_count + 1'b1;
357			if (rd_count == (16 / UNIT_OUTPUT_WIDTH) -1) begin
358				rd_tmp_wr_en <= 1;
359				rd_tmp_wr_addr <= result_word_count;
360				result_word_count <= result_word_count + 1'b1;
361				if (result_word_count == PKT_NUM_WORDS-1) begin
362					if (mode_cmp)
363						state_rd <= STATE_RD_CMP;
364					else
365						state_rd <= STATE_RD_READ_COMPLETE;
366				end
367			end
368
369			// 2nd 16-bit word: pkt_id
370			if (result_word_count == 1 & rd_count == (16 / UNIT_OUTPUT_WIDTH) -1)
371				pkt_id_wr_en <= 1;
372
373			// externalize comparator data, start comparison
374			// before all the data received from a computing unit
375			if (result_word_count == 4 & rd_count == (16 / UNIT_OUTPUT_WIDTH) -1)
376				cmp0_wr_en <= 1;
377			if (result_word_count == 5 & rd_count == (16 / UNIT_OUTPUT_WIDTH) -1)
378				cmp1_wr_en <= 1;
379			if (mode_cmp & result_word_count == 6 & rd_count == 3)
380				cmp_start <= 1;
381		end
382
383		STATE_RD_CMP: begin
384			if (cmp_found) begin
385				outpkt_type <= `OUTPKT_TYPE_CMP_RESULT;
386				hash_num <= cmp_hash_num;
387				empty <= 0;
388				state_rd <= STATE_RD_OUTPKT_RESULT;
389			end
390			else if (cmp_finished)
391				state_rd <= STATE_RD_ACCOUNT;
392		end
393
394		STATE_RD_READ_COMPLETE: begin
395			outpkt_type <= `OUTPKT_TYPE_RESULT;
396			empty <= 0;
397			state_rd <= STATE_RD_OUTPKT_RESULT;
398		end
399
400		STATE_RD_OUTPKT_RESULT: if (rd_en) begin // output PKT_RESULT or PKT_CMP_RESULT
401			empty <= 1;
402			// ~mode_cmp: no accounting, no output of PKT_DONE
403			if (mode_cmp)
404				state_rd <= STATE_RD_ACCOUNT;
405			else
406				state_rd <= STATE_RD_CHECK_NOT_EMPTY;
407		end
408
409		STATE_RD_ACCOUNT: begin
410			num_processed <= num_processed + 1'b1;
411			state_rd <= STATE_RD_ACCOUNT2;
412		end
413
414		STATE_RD_ACCOUNT2: begin
415			outpkt_type <= `OUTPKT_TYPE_PACKET_DONE;
416			if (inpkt_done & num_processed == num_processed_in) begin
417				empty <= 0;
418				state_rd <= STATE_RD_OUTPKT_PROCESSING_DONE;
419			end
420			else
421				state_rd <= STATE_RD_CHECK_NOT_EMPTY;
422		end
423
424		STATE_RD_OUTPKT_PROCESSING_DONE: begin // output PKT_PROCESSING_DONE
425			if (rd_en) begin
426				outpkt_done <= 1;
427				empty <= 1;
428				num_processed <= 0;
429				state_rd <= STATE_RD_CLEANUP;
430			end
431		end
432
433		STATE_RD_CLEANUP: begin
434			outpkt_done <= 0;
435			state_rd <= STATE_RD_CHECK_NOT_EMPTY;
436		end
437
438		STATE_RD_ERROR: begin
439		end
440		endcase
441	end
442
443endmodule
444