1`timescale 1ns / 1ps 2/* 3 * This software is Copyright (c) 2018 Denis Burykin 4 * [denis_burykin yahoo com], [denis-burykin2014 yandex ru] 5 * and it is hereby released to the general public under the following terms: 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted. 8 * 9 */ 10`include "../md5.vh" 11 12 13module cpu #( 14 parameter WIDTH = 16, 15 parameter N_CORES = 3, 16 parameter N_THREADS = 4 * N_CORES, 17 parameter N_THREADS_MSB = `MSB(N_THREADS-1) 18 )( 19 input CLK, 20 input [`ENTRY_PT_MSB:0] entry_pt_curr, 21 // thread_state (ts) 22 output [N_THREADS_MSB :0] ts_rd_num, ts_wr_num, // Thread # 23 output reg ts_wr_en = 0, 24 output reg [`THREAD_STATE_MSB :0] ts_wr, 25 input [`THREAD_STATE_MSB :0] ts_rd, 26 27 // comp_buf & procb_buf 28 output reg comp_wr_en = 0, procb_wr_en = 0, 29 output reg [N_THREADS_MSB :0] comp_procb_wr_thread_num, 30 input [`PROCB_A_WIDTH-1 :0] procb_wr_cnt, 31 output reg [`COMP_DATA1_MSB + `COMP_DATA2_MSB+1 :0] comp_dout, 32 output reg [`PROCB_D_WIDTH-1 :0] procb_dout, 33 34 // input from the memory 35 output reg mem_rd_request = 0, 36 output reg [`MEM_TOTAL_MSB :0] mem_rd_addr, 37 input mem_rd_valid, 38 input [2*WIDTH-1:0] mem_din, 39 40 // unit_output_buf 41 output [15:0] uob_dout, 42 output uob_wr_en, uob_set_input_complete, 43 output [`UOB_ADDR_MSB :0] uob_wr_addr, 44 input uob_ready, uob_full, 45 output err 46 ); 47 48 reg z; 49 50 wire INVALIDATE_eqn, INSTR_WAIT_eqn; 51 reg EXECUTED = 0; 52 reg NEXT_THREAD = 0; 53 reg INSTR_WAIT = 0; 54 reg JUMP = 0; 55 reg [`IADDR_LEN-1 :0] jump_addr; 56 57 58 // Thread State Changed flag. 59 // A thread runs only in WR_RDY state, when thread_state 60 // changes it has to switch current thread (that takes 4 cycles). 61 // 62 // The feature allows it to continue running until JMP 63 // or until EXEC_OPT_TS_WR_RDY instruction (that requires WR_RDY state). 64 // 65 reg ts_changed = 0; 66 always @(posedge CLK) 67 if (thread_almost_switched) 68 ts_changed <= 0; 69 else if (thread_state_change) 70 ts_changed <= 1; 71 72 73 // ***************************************************************** 74 // 75 // Instruction Execution 76 // - the instruction right out from memory is available here. 77 // 78 // ***************************************************************** 79 wire [N_THREADS_MSB :0] thread_num; 80 81 wire [`N_STAGES-1:0] stage_allow; 82 //wire STAGE_INSTR_AVAIL = stage_allow[]; 83 wire STAGE_RD0 = stage_allow[1]; 84 wire STAGE_RD1 = stage_allow[2]; 85 wire STAGE_EXEC = stage_allow[3]; 86 87 wire [`FIELD_A_LEN-1 :0] field_a_in; 88 wire [`EXEC_OPT_LEN-1: 0] exec_opt_in; 89 wire [`PARTIAL_INSTR_LEN-1 :0] partial_instruction; 90 91 instruction #( .N_CORES(N_CORES) 92 ) instruction( 93 .CLK(CLK), 94 .entry_pt_curr(entry_pt_curr), 95 .ts_rd_num(ts_rd_num), .ts_rd(ts_rd), 96 .thread_num(thread_num), 97 // Asserts for 1 cycle at STAGE_RD1 98 .thread_almost_switched(thread_almost_switched), 99 .instruction({ field_a_in, exec_opt_in, partial_instruction }), 100 .INVALIDATE(INVALIDATE_eqn), 101 .INSTR_WAIT(INSTR_WAIT_eqn | INSTR_WAIT), 102 .NEXT_THREAD(NEXT_THREAD), .EXECUTED(EXECUTED), 103 .JUMP(JUMP), .jump_addr(jump_addr), 104 .stage_allow(stage_allow), 105 .err(err), 106 // dummy 2nd port 107 .wr_en_dummy(1'b0), .wr_addr_dummy(1'b0) 108 ); 109 110 assign ts_wr_num = thread_num; 111 112 wire op_type_use_reg0 113 = `OP_TYPE_USE_REG(partial_instruction [`OP_CODE_LEN-1 :0]); 114 115 116 // ***************************************************************** 117 // 118 // Input from the memory 119 // 120 // ***************************************************************** 121 reg mem_rd_valid_r = 0, mem_wr_lower = 0; 122 123`ifdef CPU_MV_R_MEM_2X 124 // support for MV_R_MEM_2X 125 reg [2*WIDTH-1:0] mem_r; 126 reg mem_wr_2x = 0, mem_rd_valid_r2 = 0; 127 128 always @(posedge CLK) begin 129 if (mem_rd_valid) 130 mem_r <= mem_din; 131 mem_rd_valid_r <= mem_rd_valid; 132 mem_rd_valid_r2 <= mem_rd_valid_r; 133 end 134 135 wire mem_rd_wr_en = mem_rd_valid_r | mem_wr_2x & mem_rd_valid_r2; 136 // 0: lower half, 1: upper half 137 wire mem_wr_select = ~(mem_rd_valid_r & (mem_wr_2x | mem_wr_lower)); 138 // writing upper half into the next register 139 wire mem_rd_wr_2x = mem_wr_2x & mem_rd_valid_r2; 140 wire mem_rd_complete = mem_wr_2x ? mem_rd_valid_r2 : mem_rd_valid_r; 141 142`else 143 reg [WIDTH-1:0] mem_r; 144 145 always @(posedge CLK) begin 146 if (mem_rd_valid) 147 mem_r <= mem_wr_lower ? mem_din[15:0] : mem_din[31:16]; 148 mem_rd_valid_r <= mem_rd_valid; 149 end 150 151 wire mem_rd_wr_en = mem_rd_valid_r; 152 wire mem_rd_complete = mem_rd_valid_r; 153 154`endif 155 156 157 // ***************************************************************** 158 // 159 // Going to stage STAGE_RD0. 160 // 161 // ***************************************************************** 162 reg [`EXEC_OPT_LEN-1: 0] exec_opt; 163 reg [`CONDITION_LEN-1 :0] op_condition; 164 reg [`FIELD_B_LEN-1 :0] field_b; 165 reg [`FIELD_C_LEN-1 :0] field_c; 166 reg [`OP_CODE_LEN-1:0] op_code; 167 168 always @(posedge CLK) 169 if (STAGE_RD0) begin 170 exec_opt <= exec_opt_in; 171 { op_condition, field_b, field_c, op_code } 172 <= partial_instruction; 173 end 174 175 wire op_type_use_reg = `OP_TYPE_USE_REG(op_code); 176 177 178 // ***************************************************************** 179 // 180 // Registers 181 // - load is 2 cycles (rd_en0, rd_en1) 182 // - 4 inputs (controlled with reg_din_select) 183 // 184 // ***************************************************************** 185 wire [WIDTH-1:0] reg_din1, reg_din2, reg_din3, reg_dout; 186 wire [1:0] reg_din_select; 187 (* SHREG_EXTRACT="no" *) 188 // Write enable, for stages 3,4 respectively 189 reg reg_wr_en3 = 0, reg_wr_en4 = 0; 190 reg [`REG_ADDR_MSB :0] reg_wr_addr4; 191 reg [N_THREADS_MSB :0] reg_wr_thread4; 192 193 registers_bram #( .WIDTH(WIDTH), .N_THREADS(N_THREADS) 194 ) registers( 195 .CLK(CLK), 196 .din1(reg_din1), .din2(reg_din2), .din3(reg_din3), 197 198`ifdef CPU_MV_R_MEM_2X 199 .mem_din(mem_wr_select ? mem_r[2*WIDTH-1:WIDTH] : mem_r[WIDTH-1:0]), 200 .wr_addr( { reg_wr_addr4[`REG_ADDR_MSB:1], 201 mem_rd_wr_2x ? 1'b1 : reg_wr_addr4[0] } ), 202`else 203 .mem_din(mem_r), .wr_addr(reg_wr_addr4), 204`endif 205 206 .mem_wr_en(mem_rd_wr_en), .wr_en(reg_wr_en4), 207 .reg_din_select(reg_din_select), 208 .wr_thread_num(reg_wr_thread4), 209 210 .rd_addr(field_a_in), 211 .rd_en0(STAGE_RD0 & op_type_use_reg0), 212 .rd_en1(STAGE_RD1 & op_type_use_reg), 213 .rd_thread_num(thread_num), .dout(reg_dout) 214 ); 215 216 217 // ***************************************************************** 218 // 219 // Going to stage STAGE_RD1. 220 // 221 // ***************************************************************** 222 reg [`EXEC_OPT_LEN-1: 0] exec_opt1; 223 reg [`CONDITION_LEN-1 :0] op_condition1; 224 reg [`FIELD_B_LEN-1 :0] field_b1; 225 reg [`FIELD_C_LEN-1 :0] field_c1; 226 reg [N_THREADS_MSB :0] thread_num1; 227 reg [`OP_CODE_LEN-1:0] op_code1; 228 229 always @(posedge CLK) 230 if (STAGE_RD1) begin 231 exec_opt1 <= exec_opt; 232 op_condition1 <= op_condition; 233 field_b1 <= field_b; 234 field_c1 <= field_c; 235 thread_num1 <= thread_num; 236 op_code1 <= op_code; 237 end 238 239 always @(posedge CLK) 240 if (STAGE_EXEC) begin 241 reg_wr_addr4 <= field_b1 [`REG_ADDR_MSB :0]; 242 reg_wr_thread4 <= thread_num1; 243 end 244 245 246 // ***************************************************************** 247 // 248 // Integer Operations 249 // 250 // - no "reg <- reg (op) reg" operations so far 251 // 252 // ***************************************************************** 253 wire [`N_FLAGS-1 :0] flags; 254 255 // iops (controls for integer operations) 256 reg iop_addsub = 0, iop_sub = 0, iop_use_cf = 0, 257 iop_grp2 = 0, iop_grp3 = 0, iop_shr1 = 0; 258 reg [1:0] iop_grp2_select = 0; 259 260 integer_ops #( .WIDTH(WIDTH) ) integer_ops( 261 .CLK(CLK), 262 .dina(reg_dout), .dinb(field_c1), .en(STAGE_EXEC), 263 .in_cf(`FLAG_CARRY(flags)), 264 .iops({ iop_addsub, iop_sub, iop_use_cf, 265 iop_grp2, iop_grp3, iop_shr1 }), 266 .iop_grp2_select(iop_grp2_select), 267 268 .dout_select(reg_din_select), 269 .dout1(reg_din1), .dout2(reg_din2), .dout3(reg_din3), 270 .flag_zf(flag_zf_in), .flag_of(flag_of_in), .flag_cf(flag_cf_in) 271 ); 272 273 274 // ***************************************************************** 275 // 276 // Internal CPU Operations - STAGE_RD1 277 // 278 // - op_code is available 279 // 280 // ***************************************************************** 281 reg iop_sets_uf = 0, iop_sets_cf = 0, iop_sets_of = 0, iop_sets_zf = 0; 282 reg [1:0] iop_flag_code = 0; 283 reg iop_jmp = 0, iop_halt = 0; 284 285 always @(posedge CLK) 286 if (STAGE_RD1) begin 287 iop_addsub <= 288 op_code == `OP_CODE_ADD_R_C | op_code == `OP_CODE_SUB_R_C 289 | op_code == `OP_CODE_ADDC_R_C | op_code == `OP_CODE_SUBB_R_C; 290 iop_sub <= op_code == `OP_CODE_SUB_R_C 291 | op_code == `OP_CODE_SUBB_R_C; 292 iop_use_cf <= `OP_TYPE_USE_CF(op_code); 293 294 iop_grp2 <= op_code == `OP_CODE_INC_RST 295 | op_code == `OP_CODE_MV_R_C | op_code == `OP_CODE_AND; 296 iop_grp2_select <= 297 op_code == `OP_CODE_INC_RST ? 2'd1 : 298 op_code == `OP_CODE_AND ? 2'd2 : 299 2'd0; 300 if (op_code == `OP_CODE_INC_RST) 301 z <= 1; 302 303 iop_grp3 <= op_code == `OP_CODE_SHR1 | op_code == `OP_CODE_MV_R_R; 304 iop_shr1 <= op_code == `OP_CODE_SHR1; 305 306 // This op. potentially writes into a register 307 // (write might not be performed dependent on conditions). 308 reg_wr_en3 <= `OP_TYPE_WRITE_REG(op_code); 309 310 // It's hardcoded(hardwired?) when instruction checks conditions. 311 //op_checks_conditions <= OP_TYPE_CHECK_CONDITION(op_code); 312 313 iop_sets_zf <= `OP_TYPE_SETS_ZF(op_code); 314 iop_sets_of <= `OP_TYPE_SETS_OF(op_code); 315 iop_sets_cf = `OP_TYPE_SETS_CF(op_code); 316 317 // OP_CODE_FLAG: applicable to UF only 318 iop_sets_uf <= op_code == `OP_CODE_FLAG; 319 if (op_code == `OP_CODE_FLAG) 320 z <= 1; 321 iop_flag_code <= field_b[1:0]; 322 323 iop_jmp <= op_code == `OP_CODE_JMP; 324 if (op_code == `OP_CODE_JMP) 325 z <= 1; 326 327 iop_halt <= op_code == `OP_CODE_HALT; 328 if (op_code == `OP_CODE_HALT) 329 z <= 1; 330 end 331 332 333 // ***************************************************************** 334 // 335 // Internal CPU Operations - STAGE_EXEC 336 // 337 // - writes integer_ops.dout 338 // - checks conditions 339 // - sets flags 340 // 341 // ***************************************************************** 342 cpu_flags #( .N(`N_FLAGS), .N_THREADS(N_THREADS) 343 ) cpu_flags( 344 .CLK(CLK), 345 .thread_num(thread_num), 346 .load_en(thread_almost_switched), .save_en(NEXT_THREAD), 347 .flags(flags), 348 .op_condition(op_condition1), .condition_is_true(condition_is_true), 349 350 .set_flags(STAGE_EXEC), 351 .iop_flag_mask({ iop_sets_uf, iop_sets_cf, iop_sets_of, iop_sets_zf }), 352 .flags_in({ flag_uf_in, flag_cf_in, flag_of_in, flag_zf_in }) 353 ); 354 355 assign flag_uf_in = 356 iop_flag_code == 2'b00 ? `FLAG_USER(flags) : 357 iop_flag_code == 2'b01 ? 1'b1 : 358 iop_flag_code == 2'b10 ? 1'b0 : 359 ~`FLAG_USER(flags); 360 361 362 always @(posedge CLK) begin 363 // Check conditions for all integer operations that write registers 364 reg_wr_en4 <= STAGE_EXEC & reg_wr_en3 & condition_is_true; 365 366 if (iop_jmp) 367 jump_addr <= field_c1 [`IADDR_LEN-1 :0]; 368 369 if (op_condition1 == `IF_CARRY) 370 z <= 1; 371 end 372 373 374 // ***************************************************************** 375 // 376 // Input/Output Operations. 377 // 378 // - MV_R_MEM_{2X|L|U} (Reg <- Memory) 379 // - MV_UOB_R (Unit output buf. <- Reg) 380 // - SET_OUTPUT_COMPLETE 381 // 382 // ***************************************************************** 383 reg op_mv_r_mem = 0, op_mv_r_mem2x = 0, op_mv_r_mem_lower = 0; 384 385 always @(posedge CLK) 386 if (STAGE_RD1) begin 387 op_mv_r_mem <= `OP_TYPE_MV_R_MEM(op_code); 388`ifdef CPU_MV_R_MEM_2X 389 op_mv_r_mem2x <= op_code == `OP_CODE_MV_R_MEM_2X; 390`endif 391 op_mv_r_mem_lower <= op_code == `OP_CODE_MV_R_MEM_L; 392 op_mv_uob_r <= op_code == `OP_CODE_MV_UOB_R; 393 op_set_output_complete <= op_code == `OP_CODE_SET_OUTPUT_COMPLETE; 394 end 395 396 always @(posedge CLK) //! 397 if (STAGE_EXEC & op_mv_r_mem) begin 398 mem_rd_request <= 1; 399 mem_rd_addr <= { thread_num, field_c1 [`MEM_ADDR_MSB :0] }; 400`ifdef CPU_MV_R_MEM_2X 401 mem_wr_2x <= op_mv_r_mem2x; 402`endif 403 mem_wr_lower <= op_mv_r_mem_lower; 404 end 405 else if (mem_rd_valid) 406 mem_rd_request <= 0; // data on mem_r; mem_rd_valid_r asserts 407 408 409 // UOB (unit's output buffer). (-)Output takes 2 cycles. 410 // uob_wr_en asserts only on the 1st cycle. 411 reg op_mv_uob_r = 0, op_set_output_complete = 0; 412 reg [N_THREADS_MSB :0] uob_thread_num; 413 //reg mv_uob_r_cycle2 = 0; 414 reg uob_thread_num_eq_thread_num = 0; 415 416 always @(posedge CLK) begin 417 if (thread_almost_switched) 418 uob_thread_num_eq_thread_num <= uob_thread_num == thread_num; 419 if (uob_eqn) begin 420 if (uob_ready) 421 uob_thread_num <= thread_num; 422 //mv_uob_r_cycle2 <= 1; 423 uob_thread_num_eq_thread_num <= 1; 424 end 425 //else 426 // mv_uob_r_cycle2 <= 0; 427 end 428 429 assign uob_dout = reg_dout; 430 assign uob_wr_addr = field_c1 [`UOB_ADDR_MSB :0]; 431 assign uob_wr_en = uob_eqn; 432 433 assign uob_eqn = STAGE_EXEC & op_mv_uob_r 434 & (uob_ready | uob_thread_num_eq_thread_num & ~uob_full); 435 436 437 assign uob_set_input_complete = STAGE_EXEC & op_set_output_complete; 438 439 440 // ***************************************************************** 441 // 442 // Integrated (Cryptographic core) Operations - STAGE_RD1 443 // 444 // ***************************************************************** 445 (* SHREG_EXTRACT="no" *) 446 reg op_init_ctx = 0, op_init_new = 0, op_procb = 0, op_procb_r = 0; 447 reg op_procb_flags = 0; 448 449 always @(posedge CLK) 450 if (STAGE_RD1) begin 451 op_init_ctx <= `OP_TYPE_INIT_CTX(op_code); 452 //op_init_new <= op_code[0];//op_code == `OP_CODE_NEW_CTX; 453 454 op_procb <= `OP_TYPE_PROCB(op_code); 455 op_procb_r <= `OP_TYPE_PROCB_R(op_code); 456 op_procb_flags <= op_code[1]; 457 end 458 459 460 reg [`PROCB_A_WIDTH-1 :0] procb_wr_cnt_r; 461 always @(posedge CLK) begin 462 comp_procb_wr_thread_num <= thread_num; 463 if (thread_almost_switched) 464 procb_wr_cnt_r <= procb_wr_cnt; 465 else if (procb_eqn) 466 procb_wr_cnt_r <= procb_wr_cnt_r + 1'b1; 467 end 468 assign procb_full = procb_wr_cnt_r == `PROCB_N_RECORDS; 469 assign procb_afull = procb_wr_cnt_r == `PROCB_N_RECORDS - 1; 470 471 472 // ***************************************************************** 473 // 474 // Integrated (Cryptographic core) Operations - STAGE_EXEC 475 // 476 // ***************************************************************** 477 // thread_state disables execution (applicable at STAGE_EXEC) 478 wire ts_disable_exec = (exec_opt1 & `EXEC_OPT_TS_WR_RDY) != 0 & ts_changed; 479 480 // procb_eqn: writes procb_wr_en if thread_state allows 481 assign procb_eqn = op_procb & condition_is_true & ~ts_disable_exec;// & ~procb_full; 482 483 always @(posedge CLK) begin 484 comp_wr_en <= STAGE_EXEC & op_init_ctx & ~ts_disable_exec; 485 if (op_init_ctx) 486 comp_dout <= { 1'b1, field_b1, field_c1[2:0] }; 487 488 procb_wr_en <= STAGE_EXEC & procb_eqn; 489 if (op_procb) 490 //procb_dout <= { field_b1, reg_dout[7:0], op_procb_flags }; 491 // Allow constant 'cnt' (length) 492 procb_dout <= { field_b1, (op_procb_r 493 ? reg_dout[`PROCB_CNT_MSB:0] : field_c1[`PROCB_CNT_MSB:0]), 494 op_procb_flags }; 495 end 496 497 498 499 // ***************************************************************** 500 501 wire JUMP_eqn = STAGE_EXEC & (iop_jmp & condition_is_true); 502 503 wire NEXT_THREAD_eqn = STAGE_EXEC & (1'b0 504 | iop_halt 505 | ts_disable_exec & condition_is_true 506 507 // Switch to the next thread when: 508 // - Successful PROCESS_BYTES with fin/stop or procb_buf full 509 //| procb_eqn & (op_procb_flags != 0 | procb_afull) 510 511 // - move to UOB, UOB is full or used by other thread 512 | op_mv_uob_r & ~(uob_ready | uob_thread_num_eq_thread_num & ~uob_full) 513 // 514 //| op_set_output_complete 515 // 516 // - JUMP_eqn forces NEXT_THREAD 517 ); 518 519 520 // Invalidate loaded instructions, start loading from the beginning 521 // when: 522 // - Successful jump is performed 523 // - Thread is switched 524 assign INVALIDATE_eqn = NEXT_THREAD_eqn | JUMP_eqn; 525 526 // Oops. On INSTR_WAIT, it doesn't preserve reg_dout, field_[b|c]1 etc. 527 assign INSTR_WAIT_eqn = STAGE_EXEC & (1'b0 528 | op_mv_r_mem 529 //| uob_eqn 530 ); 531 532 wire INSTR_CONTINUE_eqn = 1'b0 533 | mem_rd_complete 534 //| mv_uob_r_cycle2 535 ; 536 537 538 // ***************************************************************** 539 540 always @(posedge CLK) begin 541 542 // TODO: improve condition? 543 if (INSTR_CONTINUE_eqn) 544 INSTR_WAIT <= 0; 545 else if (INSTR_WAIT_eqn) 546 INSTR_WAIT <= 1; 547 548 549 NEXT_THREAD <= NEXT_THREAD_eqn | JUMP_eqn; 550 551 JUMP <= JUMP_eqn; 552 553 EXECUTED <= STAGE_EXEC & (1'b0 554 // Instruction typically executed when: 555 // - No Invalidate condition, no Wait condition 556 | (~INVALIDATE_eqn & ~INSTR_WAIT_eqn) 557 // Exceptions: 558 // - execution disabled because of wrong thread_state 559 & ~(ts_disable_exec & condition_is_true) 560 //| procb_eqn & (op_procb_flags != 0 | procb_afull) 561 562 ) | (1'b0 563 // - It continues after Wait condition 564 | (INSTR_WAIT & INSTR_CONTINUE_eqn) 565 ); 566 567 568 if (ts_wr_en) 569 ts_wr_en <= 0; 570 else if (thread_state_change) 571 ts_wr_en <= 1; 572 573 ts_wr <= 574 op_set_output_complete ? `THREAD_STATE_NONE : 575 `THREAD_STATE_RD_RDY 576 ; 577 578 end 579 580 assign thread_state_change = STAGE_EXEC & (1'b0 581 // Successful PROCESS_BYTES with fin/stop or procb_buf becoming full 582 | procb_eqn & (op_procb_flags != 0 | procb_afull) 583 // Sending UOB content for output 584 | op_set_output_complete 585 ); 586 587 588`ifdef SIMULATION 589 reg [23:0] X_THREAD_SWITCHES = 0; 590 reg [23:0] X_JUMPS = 0; 591 592 always @(posedge CLK) begin 593 if (NEXT_THREAD) 594 X_THREAD_SWITCHES <= X_THREAD_SWITCHES + 1'b1; 595 if (JUMP) 596 X_JUMPS <= X_JUMPS + 1'b1; 597 end 598`endif 599 600endmodule 601