1`timescale 1ns / 1ps 2/* 3 * This software is Copyright (c) 2016,2019 Denis Burykin 4 * [denis_burykin yahoo com], [denis-burykin2014 yandex ru] 5 * and it is hereby released to the general public under the following terms: 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted. 8 * 9 */ 10`include "bcrypt.vh" 11 12module bcrypt_arbiter #( 13 parameter NUM_CORES = -1 14 )( 15 input CLK, 16 input mode_cmp, 17 18 // Packages of data from bcrypt_data for cores 19 input [7:0] din, 20 input [1:0] ctrl, 21 22 // Control exchange with bcrypt_data 23 input data_ready, init_ready, 24 output reg start_data_tx = 0, start_init_tx = 0, 25 input [15:0] bcdata_pkt_id, 26 input bcdata_gen_end, 27 28 // Comparator 29 output reg [31:0] cmp_data, 30 output reg cmp_start = 0, 31 input cmp_found, cmp_finished, 32 input [`HASH_NUM_MSB:0] cmp_hash_num, 33 34 // Output using memory 16x16 35 output reg [`OUTPKT_TYPE_MSB:0] outpkt_type, 36 output [15:0] dout, 37 input [3:0] rd_addr, 38 39 output reg [15:0] pkt_id, 40 output reg [31:0] num_processed = 0, 41 output reg [`HASH_NUM_MSB:0] hash_num, 42 input rd_en, 43 output reg empty = 1, 44 output [3:0] error, 45 output idle, 46 47 // Cores are moved to top-level module. 48 output reg [7:0] core_din, 49 output reg [1:0] core_ctrl, 50 output reg [NUM_CORES-1:0] core_wr_en = 0, 51 input [NUM_CORES-1:0] core_init_ready_in, core_crypt_ready_in, 52 output reg [NUM_CORES-1:0] core_rd_en = 0, 53 input [NUM_CORES-1:0] core_empty_in, 54 input [NUM_CORES-1:0] core_dout_in 55 ); 56 57 genvar i; 58 59 60 // 61 // ******************************************************* 62 // 63 reg err_core_output = 0; 64 65 assign error = { 66 1'b0, 1'b0, 1'b0, err_core_output 67 }; 68 69 70 // 71 // ******************************************************* 72 // 73 reg [NUM_CORES-1:0] core_init_ready = 0, core_crypt_ready = 0; 74 reg [NUM_CORES-1:0] core_empty = {NUM_CORES{1'b1}}, core_dout = 0; 75 76 always @(posedge CLK) begin 77 core_din <= din; 78 core_ctrl <= ctrl; 79 core_init_ready <= core_init_ready_in; 80 core_crypt_ready <= core_crypt_ready_in; 81 core_empty <= core_empty_in; 82 core_dout <= core_dout_in; 83 end 84 85 reg some_cores_init_ready = 0; 86 always @(posedge CLK) 87 some_cores_init_ready <= |core_init_ready; 88 89 // 90 // Count candidates in packets 91 // Unlike in descrypt, doing the easiest thing: 92 // candidates from only 1 packet can be "in flight" 93 // 94 reg [31:0] num_processed_in = 0; 95 96 reg inpkt_done = 0; // All cand's from the packet were sent to cores 97 reg outpkt_done = 0; // All cand's processed 98 99 100 // ************************************************************* 101 // 102 // Write data to cores 103 // 104 // ************************************************************* 105 reg [`MSB(NUM_CORES-1):0] wr_core_num = 0; 106 107 reg [31:0] delay_shr = 0; 108 109 localparam STATE_WR_IDLE = 0, 110 STATE_WR_CHECK_CORE_INIT_READY = 1, 111 STATE_WR_INIT_TX_START = 2, 112 STATE_WR_CHECK_CORE_DATA_READY = 3, 113 STATE_WR_DATA_TX_START = 4, 114 STATE_WR_TX = 5, 115 STATE_WR_TX_END = 6, 116 STATE_WR_WAIT_PKT = 7, 117 STATE_WR_WAIT1 = 8; 118 119 (* FSM_EXTRACT="true" *) 120 reg [3:0] state_wr = STATE_WR_IDLE; 121 122 always @(posedge CLK) begin 123 if (state_wr == STATE_WR_IDLE | delay_shr[31]) 124 delay_shr <= { delay_shr[30:0], state_wr == STATE_WR_IDLE }; 125 126 case (state_wr) 127 STATE_WR_IDLE: if (delay_shr[31]) 128 state_wr <= STATE_WR_CHECK_CORE_INIT_READY; 129 130 // Broadcast initialization of cores with P, MW, S data. 131 STATE_WR_CHECK_CORE_INIT_READY: if (some_cores_init_ready) begin 132 start_init_tx <= 1; 133 state_wr <= STATE_WR_INIT_TX_START; 134 end 135 else 136 state_wr <= STATE_WR_CHECK_CORE_DATA_READY; 137 138 STATE_WR_INIT_TX_START: begin 139 start_init_tx <= 0; 140 if (ctrl == `CTRL_INIT_START) begin 141 core_wr_en <= core_init_ready; 142 state_wr <= STATE_WR_TX; 143 end 144 end 145 146 STATE_WR_CHECK_CORE_DATA_READY: begin 147 // It goes the last "dummy" candidate in the packet. 148 if (data_ready & bcdata_gen_end) begin 149 inpkt_done <= 1; 150 // Set flag for bcdata; actually transmit doesn't start 151 start_data_tx <= 1; 152 // ~mode_cmp: don't do accounting, skip last "dummy" candidate 153 if (mode_cmp) 154 state_wr <= STATE_WR_WAIT_PKT; 155 else 156 state_wr <= STATE_WR_WAIT1; 157 end 158 159 else if (data_ready & core_crypt_ready[wr_core_num]) begin 160 161 num_processed_in <= num_processed_in + 1'b1; 162 163 // Data for cores over {din, ctrl} is going 164 // to appear in a few cycles 165 start_data_tx <= 1; 166 state_wr <= STATE_WR_DATA_TX_START; 167 end 168 169 else begin 170 if (wr_core_num == NUM_CORES-1) begin 171 wr_core_num <= 0; 172 if (data_ready_timeout) 173 state_wr <= STATE_WR_CHECK_CORE_INIT_READY; 174 end 175 else 176 wr_core_num <= wr_core_num + 1'b1; 177 end 178 end 179 180 STATE_WR_DATA_TX_START: begin 181 start_data_tx <= 0; 182 if (ctrl == `CTRL_DATA_START) begin 183 core_wr_en[wr_core_num] <= 1; 184 state_wr <= STATE_WR_TX; 185 end 186 end 187 188 STATE_WR_TX: if (ctrl == `CTRL_END) 189 state_wr <= STATE_WR_TX_END; 190 191 STATE_WR_TX_END: begin 192 // wr_en deasserts after the last data byte 193 core_wr_en <= 0; 194 state_wr <= STATE_WR_CHECK_CORE_DATA_READY; 195 end 196 197 // Wait until data from previous packet is processed. 198 STATE_WR_WAIT_PKT: begin 199 start_data_tx <= 0; 200 if (outpkt_done) begin 201 inpkt_done <= 0; 202 num_processed_in <= 0; 203 state_wr <= STATE_WR_CHECK_CORE_INIT_READY; 204 end 205 end 206 207 STATE_WR_WAIT1: begin 208 start_data_tx <= 0; 209 state_wr <= STATE_WR_CHECK_CORE_DATA_READY; 210 end 211 endcase 212 end 213 214 delay #(.NBITS(9)) data_ready_timeout_inst (.CLK(CLK), 215 .in(state_wr == STATE_WR_CHECK_CORE_DATA_READY), 216 .out(data_ready_timeout) ); 217 218 219 // Count number of keys currently in processing 220 localparam TOTAL_IN_PROCESSING = NUM_CORES + 3; 221 222 reg recv_item = 0; 223 reg [`MSB(TOTAL_IN_PROCESSING-1) :0] total_in_processing = 0; 224 always @(posedge CLK) 225 if (start_data_tx & ~bcdata_gen_end) begin 226 if (~recv_item) 227 total_in_processing <= total_in_processing + 1'b1; 228 end 229 else if (recv_item) 230 total_in_processing <= total_in_processing - 1'b1; 231 232 // idle: no keys in flight, no data/init transfer in ~1K cycles 233 delay #(.INIT(1), .NBITS(10)) delay_idle_inst (.CLK(CLK), 234 .in(total_in_processing == 0 & (1'b0 235 | state_wr == STATE_WR_IDLE 236 | state_wr == STATE_WR_CHECK_CORE_INIT_READY 237 | state_wr == STATE_WR_CHECK_CORE_DATA_READY 238 )), 239 .out(idle) ); 240 241 242 // ************************************************************* 243 // 244 // Read from cores (over 1-bit bus) 245 // 246 // ************************************************************* 247 (* RAM_STYLE="DISTRIBUTED" *) 248 reg [15:0] output_r [15:0]; 249 assign dout = output_r [rd_addr]; 250 251 reg [`MSB(NUM_CORES-1):0] rd_core_num = 0; 252 reg [3:0] rd_count = 0; 253 reg [15:0] rd_tmp = 0; 254 reg [3:0] result_word_count = 0; 255 reg cmp_result; 256 257 localparam UNIT_OUTPUT_WIDTH = 1; 258 localparam PKT_NUM_WORDS = 16; 259 260 reg core_dout_r = 0; 261 always @(posedge CLK) 262 core_dout_r <= core_dout[rd_core_num]; 263 264 reg rd_tmp_wr_en = 0; 265 reg [`MSB(PKT_NUM_WORDS-1):0] rd_tmp_wr_addr = 0; 266 always @(posedge CLK) 267 if (rd_tmp_wr_en) 268 output_r [rd_tmp_wr_addr] <= rd_tmp; 269 270 reg pkt_id_wr_en = 0, cmp0_wr_en = 0, cmp1_wr_en = 0; 271 always @(posedge CLK) begin 272 if (pkt_id_wr_en) 273 pkt_id <= rd_tmp; 274 if (cmp0_wr_en) 275 cmp_data[15:0] <= rd_tmp; 276 if (cmp1_wr_en) 277 cmp_data[31:16] <= rd_tmp; 278 end 279 280 reg [31:0] delay_shr2 = 0; 281 282 localparam STATE_RD_IDLE = 0, 283 STATE_RD_CHECK_NOT_EMPTY = 1, 284 STATE_RD_HEADER = 2, 285 STATE_RD_DATA = 3, 286 STATE_RD_CMP = 4, 287 STATE_RD_READ_COMPLETE = 5, 288 STATE_RD_OUTPKT_RESULT = 6, 289 STATE_RD_ACCOUNT = 7, 290 STATE_RD_ACCOUNT2 = 8, 291 STATE_RD_OUTPKT_PROCESSING_DONE = 9, 292 STATE_RD_CLEANUP = 10, 293 STATE_RD_ERROR = 11; 294 295 (* FSM_EXTRACT="true" *) 296 reg [3:0] state_rd = STATE_RD_IDLE; 297 298 always @(posedge CLK) begin 299 if (recv_item) 300 recv_item <= 0; 301 302 if (rd_tmp_wr_en) 303 rd_tmp_wr_en <= 0; 304 305 if (pkt_id_wr_en) 306 pkt_id_wr_en <= 0; 307 if (cmp0_wr_en) 308 cmp0_wr_en <= 0; 309 if (cmp1_wr_en) 310 cmp1_wr_en <= 0; 311 if (cmp_start) 312 cmp_start <= 0; 313 314 if (state_rd == STATE_RD_IDLE | delay_shr2[31]) 315 delay_shr2 <= { delay_shr2[30:0], state_rd == STATE_RD_IDLE }; 316 317 case(state_rd) 318 STATE_RD_IDLE: if (delay_shr2[31]) 319 state_rd <= STATE_RD_CHECK_NOT_EMPTY; 320 321 STATE_RD_CHECK_NOT_EMPTY: begin 322 if (~core_empty[rd_core_num]) begin 323 core_rd_en[rd_core_num] <= 1; 324 state_rd <= STATE_RD_HEADER; 325 end 326 else 327 rd_core_num <= rd_core_num == NUM_CORES-1 328 ? {`MSB(NUM_CORES-1)+1{1'b0}} : rd_core_num + 1'b1; 329 end 330 331 // ======================================================= 332 // Output content: 333 // - header (1 bit == 1'b1) 334 // - 2x 32-bit IDs. IDs are sent along with encryption data 335 // for accounting purposes. 336 // - 6x 32-bit Blowfish encryption result. 337 // ======================================================= 338 STATE_RD_HEADER: begin 339 // It requires to assert rd_en for 1 cycle. 340 core_rd_en[rd_core_num] <= 0; 341 342 result_word_count <= 0; 343 344 rd_count <= 0; 345 // header (1 bit == 1'b1) 346 if (core_dout_r) begin 347 recv_item <= 1; 348 state_rd <= STATE_RD_DATA; 349 end 350 end 351 352 // Collect PKT_NUM_WORDS words X 16 bit in output_r 353 STATE_RD_DATA: begin 354 rd_tmp [rd_count * UNIT_OUTPUT_WIDTH +:UNIT_OUTPUT_WIDTH] 355 <= core_dout_r; 356 rd_count <= rd_count + 1'b1; 357 if (rd_count == (16 / UNIT_OUTPUT_WIDTH) -1) begin 358 rd_tmp_wr_en <= 1; 359 rd_tmp_wr_addr <= result_word_count; 360 result_word_count <= result_word_count + 1'b1; 361 if (result_word_count == PKT_NUM_WORDS-1) begin 362 if (mode_cmp) 363 state_rd <= STATE_RD_CMP; 364 else 365 state_rd <= STATE_RD_READ_COMPLETE; 366 end 367 end 368 369 // 2nd 16-bit word: pkt_id 370 if (result_word_count == 1 & rd_count == (16 / UNIT_OUTPUT_WIDTH) -1) 371 pkt_id_wr_en <= 1; 372 373 // externalize comparator data, start comparison 374 // before all the data received from a computing unit 375 if (result_word_count == 4 & rd_count == (16 / UNIT_OUTPUT_WIDTH) -1) 376 cmp0_wr_en <= 1; 377 if (result_word_count == 5 & rd_count == (16 / UNIT_OUTPUT_WIDTH) -1) 378 cmp1_wr_en <= 1; 379 if (mode_cmp & result_word_count == 6 & rd_count == 3) 380 cmp_start <= 1; 381 end 382 383 STATE_RD_CMP: begin 384 if (cmp_found) begin 385 outpkt_type <= `OUTPKT_TYPE_CMP_RESULT; 386 hash_num <= cmp_hash_num; 387 empty <= 0; 388 state_rd <= STATE_RD_OUTPKT_RESULT; 389 end 390 else if (cmp_finished) 391 state_rd <= STATE_RD_ACCOUNT; 392 end 393 394 STATE_RD_READ_COMPLETE: begin 395 outpkt_type <= `OUTPKT_TYPE_RESULT; 396 empty <= 0; 397 state_rd <= STATE_RD_OUTPKT_RESULT; 398 end 399 400 STATE_RD_OUTPKT_RESULT: if (rd_en) begin // output PKT_RESULT or PKT_CMP_RESULT 401 empty <= 1; 402 // ~mode_cmp: no accounting, no output of PKT_DONE 403 if (mode_cmp) 404 state_rd <= STATE_RD_ACCOUNT; 405 else 406 state_rd <= STATE_RD_CHECK_NOT_EMPTY; 407 end 408 409 STATE_RD_ACCOUNT: begin 410 num_processed <= num_processed + 1'b1; 411 state_rd <= STATE_RD_ACCOUNT2; 412 end 413 414 STATE_RD_ACCOUNT2: begin 415 outpkt_type <= `OUTPKT_TYPE_PACKET_DONE; 416 if (inpkt_done & num_processed == num_processed_in) begin 417 empty <= 0; 418 state_rd <= STATE_RD_OUTPKT_PROCESSING_DONE; 419 end 420 else 421 state_rd <= STATE_RD_CHECK_NOT_EMPTY; 422 end 423 424 STATE_RD_OUTPKT_PROCESSING_DONE: begin // output PKT_PROCESSING_DONE 425 if (rd_en) begin 426 outpkt_done <= 1; 427 empty <= 1; 428 num_processed <= 0; 429 state_rd <= STATE_RD_CLEANUP; 430 end 431 end 432 433 STATE_RD_CLEANUP: begin 434 outpkt_done <= 0; 435 state_rd <= STATE_RD_CHECK_NOT_EMPTY; 436 end 437 438 STATE_RD_ERROR: begin 439 end 440 endcase 441 end 442 443endmodule 444