1/* 2 * This software is Copyright (c) 2018-2019 Denis Burykin 3 * [denis_burykin yahoo com], [denis-burykin2014 yandex ru] 4 * and it is hereby released to the general public under the following terms: 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted. 7 * 8 */ 9 10// Include Verilog code for "blackbox" modules 11// (ISE Project Navigator requires restart after switching this) 12//`define SIMULATION 13 14 15// ===== Algorithm constants and operations ===== 16// 17`define CYCLIC(w,s) ((w >> s) | (w << (64 - s))) 18 19`define SWAP(x) (((x) << 56) \ 20 | (((x) & 16'hff00) << 40) \ 21 | (((x) & 24'hff0000) << 24) \ 22 | (((x) & 32'hff000000) << 8) \ 23 | (((x) >> 8) & 32'hff000000) \ 24 | (((x) >> 24) & 24'hff0000) \ 25 | (((x) >> 40) & 16'hff00) \ 26 | ((x) >> 56)) 27 28`define SHA512_IV { 128'h_5be0cd19137e2179_1f83d9abfb41bd6b, \ 29 192'h_9b05688c2b3e6c1f_510e527fade682d1_a54ff53a5f1d36f1, \ 30 192'h_3c6ef372fe94f82b_bb67ae8584caa73b_6a09e667f3bcc908 } 31 32 33// ===================================================== 34// 35`define N_CORES 4 36`define N_THREADS 16 37 38 39// ===== Block processing options (transferred to cores) ===== 40// 41`define BLK_OP_MSB 1 42// 1 - new context, 0 - load context 43`define BLK_OP_IF_NEW_CTX(r) r[0] 44// 1) Output computed result; 45// 2) Used to set thread state. 46`define BLK_OP_END_COMP_OUTPUT(r) r[1] 47 48 49// ===== engine (services several cores) ===== 50// 51`define MEM_WIDTH 64 52// "Main" memory (per thread; in MEM_WIDTH-bit words) 53`define MEM_ADDR_MSB 4 54// "Main" memory (per engine) [0: 2**(`MEM_TOTAL_MSB+1)-1] 55`define MEM_TOTAL_MSB (`MEM_ADDR_MSB + 4) 56 57// process_bytes (in bytes) 58// max.key_len=64 (comp.len <8k) 59`define PROCB_CNT_MSB 6 60`define PROCB_TOTAL_MSB 12 61 62// unit's I/O 63`define UNIT_INPUT_WIDTH 8 64`define UNIT_OUTPUT_WIDTH 2 65// Address in Unit's output buffer (UOB) memory in 16-bit words 66`define UOB_ADDR_MSB 5 67 68 69// ===== computation state (per thread) ===== 70// 71`define THREAD_STATE_MSB 1 72 73`define THREAD_STATE_NONE 2'd0 74`define THREAD_STATE_WR_RDY 2'd1 75`define THREAD_STATE_RD_RDY 2'd2 76`define THREAD_STATE_BUSY 2'd3 77 78 79// ===== comp_buf, procb_buf, saved_procb_state ===== 80// 81`define COMP_DATA1_MSB 0 82`define COMP_DATA2_MSB (`MEM_ADDR_MSB+1 + 4)-1 83 84// address width for procb records (per thread) 85`define PROCB_N_RECORDS 4 86`define PROCB_A_WIDTH 3 87// width of each procb record 88`define PROCB_D_WIDTH (`MEM_ADDR_MSB+1 + `PROCB_CNT_MSB+1 + 1) 89 90`define PROCB_SAVE_WIDTH (4 + `MEM_ADDR_MSB+3+1 + `PROCB_CNT_MSB+1 \ 91 + `PROCB_TOTAL_MSB+1 + 4) 92 93 94// ===== CPU ===== 95// 96`define CPU_WIDTH 16 97`define MEM_CPU_RATIO (`MEM_WIDTH / `CPU_WIDTH) 98`define N_STAGES 4 99// 16 registers 100`define REG_ADDR_MSB 3 101// Program entry points 102`define ENTRY_PTS_EN 103`define ENTRY_PT_MSB 0 104// 105// Allow ADDC/SUBB instructions (slow; 210-220 max.) 106//`define INSTR_SUBB_EN 107// 108// Each instruction consists of: 109`define OP_CODE_LEN 5 110`define OP_CODE 5'd 111// Field A contains exclusively register to read from 112`define FIELD_A_LEN (`REG_ADDR_MSB+1) 113`define FIELD_A 4'd 114// Field B contains register for write, memory address or other data 115`define FIELD_B_LEN (`MEM_ADDR_MSB+1) 116// OMG - adjust this if MEM_ADDR_MSB changes 117`define FIELD_B 5'd 118// Field C typically contains a constant. 119`define FIELD_C_LEN 8 120`define FIELD_C 8'd 121// Conditions determine the result of instruction execution. 122`define CONDITION_LEN 4 123// Instruction execution options. 124`define EXEC_OPT_LEN 1 125 126`define PARTIAL_INSTR_LEN (`CONDITION_LEN \ 127 + `FIELD_B_LEN + `FIELD_C_LEN + `OP_CODE_LEN) 128`define INSTR_LEN (`FIELD_A_LEN + `EXEC_OPT_LEN + `PARTIAL_INSTR_LEN) 129 130 131// Instruction Address length: must fit into field_c 132`define IADDR_LEN 8 133 134// *** Instruction execution options *** 135`define EXEC_OPT_NONE 1'b0 136// EXEC_OPT_TS_WR_RDY - requires thread_state to be WR_RDY 137`define EXEC_OPT_TS_WR_RDY 1'b1 138 139// *** CPU flags *** 140`define N_FLAGS 4 141`define FLAG_ZERO(r) r[0] 142`define FLAG_ONE(r) r[1] 143`define FLAG_CARRY(r) r[2] 144`define FLAG_USER(r) r[3] 145 146// *** Conditions *** 147`define IF_NONE 4'b0000 148 149`define IF_ONE 4'b0010 150`define IF_NOT_ONE 4'b0011 151`define IF_ZERO 4'b0100 152`define IF_NOT_ZERO 4'b0101 153`define IF_CARRY 4'b0110 154`define IF_NOT_CARRY 4'b0111 155`define IF_UF 4'b1000 156`define IF_NOT_UF 4'b1001 157//`define IF 158 159`define CONDITION `IF_NONE 160`define IF(cond) \ 161`undef CONDITION \ 162`define CONDITION cond 163 164// *** Operation codes *** 165`define OP_CODE_PROCB_C `OP_CODE 8 166`define OP_CODE_PROCB_C_FIN `OP_CODE 10 167`define OP_CODE_PROCB_C_STOP `OP_CODE 9 168`define OP_CODE_PROCB_R `OP_CODE 12 169`define OP_CODE_PROCB_R_FIN `OP_CODE 14 170`define OP_CODE_PROCB_R_STOP `OP_CODE 13 171 172`define OP_TYPE_PROCB(c) (c >= 8 & c <= 15) 173`define OP_TYPE_PROCB_R(c) (c >= 12 & c <= 15) 174 175`define OP_CODE_NEW_CTX `OP_CODE 7 176`define OP_CODE_LOAD_CTX `OP_CODE 6 177 178`define OP_TYPE_INIT_CTX(c) (c == 6 | c == 7) 179 180`define OP_CODE_NOP `OP_CODE 0 181`define OP_CODE_HALT `OP_CODE 1 182 183`define OP_CODE_ADD_R_C `OP_CODE 16 184`define OP_CODE_ADDC_R_C `OP_CODE 17 185`define OP_CODE_SUB_R_C `OP_CODE 18 186`define OP_CODE_SUBB_R_C `OP_CODE 19 187`define OP_CODE_INC_RST `OP_CODE 20 188`define OP_CODE_MV_R_C `OP_CODE 21 189`define OP_CODE_SHR1 `OP_CODE 22 190`define OP_CODE_MV_R_R `OP_CODE 23 191`define OP_CODE_AND `OP_CODE 24 192 193// op:a[1:0](1-set, 2-reset, 3-invert) mask:b 194// currently applicable to UF only 195`define OP_CODE_FLAG `OP_CODE 25 196 197`define OP_CODE_MV_UOB_R `OP_CODE 26 198`define OP_CODE_SET_OUTPUT_COMPLETE `OP_CODE 27 199 200`define OP_CODE_MV_R_MEM_X `OP_CODE 30 201 202`ifndef CPU_MEM_4X 203`define OP_CODE_MV_R_MEM_U `OP_CODE 28 204`define OP_CODE_MV_R_MEM_L `OP_CODE 29 205//`define OP_CODE_MV_R_MEM_2X `OP_CODE 30 206`define OP_TYPE_MV_R_MEM(c) (c == 28 | c == 29 | c == 30) 207`else 208`define OP_CODE_MV_R_MEM0 `OP_CODE 2 209`define OP_CODE_MV_R_MEM1 `OP_CODE 3 210`define OP_CODE_MV_R_MEM2 `OP_CODE 4 211`define OP_CODE_MV_R_MEM3 `OP_CODE 5 212`define OP_TYPE_MV_R_MEM(c) (c >= 2 & c <= 5) 213`endif 214 215`define OP_CODE_JMP `OP_CODE 31 216 217//`define OP_CODE_ILLEGAL(c) ( ~( \ 218// c == 219 220`define OP_TYPE_SETS_ZF(c) ( \ 221 c == `OP_CODE_SUB_R_C | c == `OP_CODE_INC_RST | c == `OP_CODE_SHR1) 222 223`define OP_TYPE_SETS_OF(c) ( \ 224 c == `OP_CODE_SHR1) 225 226`define OP_TYPE_SETS_CF(c) ( \ 227 c == `OP_CODE_ADD_R_C | c == `OP_CODE_SUB_R_C \ 228 | c == `OP_CODE_ADDC_R_C | c == `OP_CODE_SUBB_R_C) 229 230`define OP_TYPE_USE_CF(c) ( \ 231 c == `OP_CODE_ADDC_R_C | c == `OP_CODE_SUBB_R_C) 232 233 234// This op. potentially writes into a register 235// (write might not be performed because of conditions). 236// MV_R_MEM_* processed independently. 237`define OP_TYPE_WRITE_REG(c) ( \ 238 c == `OP_CODE_ADD_R_C | c == `OP_CODE_SUB_R_C \ 239 | c == `OP_CODE_ADDC_R_C | c == `OP_CODE_SUBB_R_C \ 240 | c == `OP_CODE_INC_RST \ 241 | c == `OP_CODE_MV_R_C | c == `OP_CODE_SHR1 | c == `OP_CODE_MV_R_R \ 242 | c == `OP_CODE_AND) 243 244// Reads a register 245`define OP_TYPE_USE_REG(c) ( \ 246 `OP_TYPE_WRITE_REG(c) & c != `OP_CODE_MV_R_C \ 247 | c == `OP_CODE_PROCB_R | c == `OP_CODE_PROCB_R_FIN \ 248 | c == `OP_CODE_MV_UOB_R) 249 250// read register only from field_a (timing issue) 251 252// These ops check conditions (hardcoded) 253//`define OP_TYPE_CHECK_CONDITION(c) ( \ 254// `OP_TYPE_WRITE_REG(c) | `OP_TYPE_PROCB(c) | c == `OP_CODE_JMP) 255 256 257// ===== Instructions ===== 258// 259// *** Instructions - SHA256 subsystem *** 260`define NEW_CTX(save_addr,save_len) {`FIELD_A 0, `EXEC_OPT_TS_WR_RDY, \ 261 `IF_NONE, `FIELD_B save_addr, `FIELD_C save_len, `OP_CODE_NEW_CTX} 262 263`define PROCESS_BYTES_C(addr,cnt) \ 264 {`FIELD_A 0, `EXEC_OPT_TS_WR_RDY, \ 265 `CONDITION, `FIELD_B addr, `FIELD_C cnt, `OP_CODE_PROCB_C} 266`define PROCESS_BYTES_C_FINISH_CTX(addr,cnt) \ 267 {`FIELD_A 0, `EXEC_OPT_TS_WR_RDY, \ 268 `CONDITION, `FIELD_B addr, `FIELD_C cnt, `OP_CODE_PROCB_C_FIN} 269 270`define PROCESS_BYTES_R(addr,r) \ 271 {`FIELD_A r, `EXEC_OPT_TS_WR_RDY, \ 272 `CONDITION, `FIELD_B addr, `FIELD_C 0, `OP_CODE_PROCB_R} 273`define PROCESS_BYTES_R_FINISH_CTX(addr,r) \ 274 {`FIELD_A r, `EXEC_OPT_TS_WR_RDY, \ 275 `CONDITION, `FIELD_B addr, `FIELD_C 0, `OP_CODE_PROCB_R_FIN} 276 277`define FINISH_CTX `PROCESS_BYTES_C_FINISH_CTX(0,0) 278 279 280// *** Instructions - integer *** 281`define ADD_R_C(r,const) {`FIELD_A r, `EXEC_OPT_NONE, \ 282 `CONDITION, `FIELD_B r, `FIELD_C const, `OP_CODE_ADD_R_C} 283`define SUB_R_C(dst,src,const) {`FIELD_A src, `EXEC_OPT_NONE, \ 284 `CONDITION, `FIELD_B dst, `FIELD_C const, `OP_CODE_SUB_R_C} 285`define INC_RST(r,const) {`FIELD_A r, `EXEC_OPT_NONE, \ 286 `CONDITION, `FIELD_B r, `FIELD_C const, `OP_CODE_INC_RST} 287`define MV_R_C(r,const) {`FIELD_A r, `EXEC_OPT_NONE, \ 288 `CONDITION, `FIELD_B r, `FIELD_C const, `OP_CODE_MV_R_C} 289`define SHR1(r) {`FIELD_A r, `EXEC_OPT_NONE, \ 290 `CONDITION, `FIELD_B r, `FIELD_C 0, `OP_CODE_SHR1} 291// We can read from one register and store into other one 292`define MV_R_R(dst,src) {`FIELD_A src, `EXEC_OPT_NONE, \ 293 `CONDITION, `FIELD_B dst, `FIELD_C 0, `OP_CODE_MV_R_R} 294`define AND_R_C(dst,src,const) {`FIELD_A src, `EXEC_OPT_NONE, \ 295 `CONDITION, `FIELD_B dst, `FIELD_C const, `OP_CODE_AND} 296 297`ifdef INSTR_SUBB_EN 298`define ADDC_R_C(r,const) {`FIELD_A r, `EXEC_OPT_NONE, \ 299 `CONDITION, `FIELD_B r, `FIELD_C const, `OP_CODE_ADDC_R_C} 300`define SUBB_R_C(dst,src,const) {`FIELD_A src, `EXEC_OPT_NONE, \ 301 `CONDITION, `FIELD_B dst, `FIELD_C const, `OP_CODE_SUBB_R_C} 302`endif 303 304// *** Instructions - I/O *** 305`define MV_R_MEM_X(base_r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \ 306 `IF_NONE, `FIELD_B base_r, `FIELD_C addr, `OP_CODE_MV_R_MEM_X} 307 308`ifndef CPU_MEM_4X 309//`define MV_R_MEM_2X(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \ 310// `IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM_2X} 311`define MV_R_MEM_L(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \ 312 `IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM_L} 313`define MV_R_MEM_U(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \ 314 `IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM_U} 315`else 316`define MV_R_MEM0(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \ 317 `IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM0} 318`define MV_R_MEM1(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \ 319 `IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM1} 320`define MV_R_MEM2(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \ 321 `IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM2} 322`define MV_R_MEM3(r,addr) {`FIELD_A 0, `EXEC_OPT_NONE, \ 323 `IF_NONE, `FIELD_B r, `FIELD_C addr, `OP_CODE_MV_R_MEM3} 324`endif 325 326`define MV_UOB_R(uob_addr,r) {`FIELD_A r, `EXEC_OPT_TS_WR_RDY, \ 327 `IF_NONE, `FIELD_B 0, `FIELD_C uob_addr, `OP_CODE_MV_UOB_R} 328`define SET_OUTPUT_COMPLETE {`FIELD_A 0, `EXEC_OPT_TS_WR_RDY, \ 329 `IF_NONE, `FIELD_B 0, `FIELD_C 0, `OP_CODE_SET_OUTPUT_COMPLETE} 330 331 332// *** Instructions - execution control *** 333`define NOP {`FIELD_A 0, `EXEC_OPT_NONE, \ 334 `IF_NONE, `FIELD_B 0, `FIELD_C 0, `OP_CODE_NOP} 335`define HALT {`FIELD_A 0, `EXEC_OPT_NONE, \ 336 `IF_NONE, `FIELD_B 0, `FIELD_C 0, `OP_CODE_HALT} 337 338`define JMP(addr) {`FIELD_A 0, `EXEC_OPT_NONE, \ 339 `CONDITION, `FIELD_B 0, `FIELD_C addr, `OP_CODE_JMP} 340 341`define SET_UF {`FIELD_A 0, `EXEC_OPT_NONE, \ 342 `CONDITION, `FIELD_B 1, `FIELD_C 0, `OP_CODE_FLAG} 343`define RST_UF {`FIELD_A 0, `EXEC_OPT_NONE, \ 344 `CONDITION, `FIELD_B 2, `FIELD_C 0, `OP_CODE_FLAG} 345`define INV_UF {`FIELD_A 0, `EXEC_OPT_NONE, \ 346 `CONDITION, `FIELD_B 3, `FIELD_C 0, `OP_CODE_FLAG} 347 348 349// *** Registers - BRAM *** 350`define R0 0 351`define R1 1 352`define R2 2 353`define R3 3 354`define R4 4 355`define R5 5 356`define R6 6 357`define R7 7 358`define R8 8 359`define R9 9 360`define R10 10 361`define R11 11 362`define R12 12 363`define R13 13 364`define R14 14 365`define R15 15 366 367