1// Copyright 2018 Ettus Research, a National Instruments Company 2// 3// SPDX-License-Identifier: LGPL-3.0-or-later 4// Write xilinx DSP48E1 primitive for mult-add-clip (signed) 5 6`default_nettype none 7 8module mult_add_clip #( 9 parameter WIDTH_A=25, // Max 25 10 parameter BIN_PT_A=24, 11 parameter WIDTH_B=18, // Max 18 12 parameter BIN_PT_B=17, 13 parameter WIDTH_C=43, // Max 43 14 15 // Min (47-WIDTH_C-1)+BIN_PT_A+BIN_PT_B, 16 // Max WIDTH_C-1+BIN_PT_A+BIN_PT_B 17 parameter BIN_PT_C=42, 18 19 parameter WIDTH_O=43, // Max 43-(BIN_PT_A+BIN_PT_B-BIN_PT_O) 20 parameter BIN_PT_O=42, 21 parameter LATENCY=2 // Maximum is 4 22) ( 23 input wire clk, 24 input wire reset, 25 input wire CE, // Ordinarily set to 1'b1 26 input wire [WIDTH_A-1:0] A, 27 input wire [WIDTH_B-1:0] B, 28 input wire [WIDTH_C-1:0] C, 29 output reg [WIDTH_O-1:0] O 30); 31 // DSP operations: 32 // O = clip(A * B + C) 33 // 34 // Mux settings: 35 // X,Y (01,01) = M 36 // Z (011) = C 37 38 localparam MREG_IN = (LATENCY >= 1) ? 1 : 0; 39 localparam CREG_IN = MREG_IN; 40 localparam PREG_IN = (LATENCY >= 2) ? 1 : 0; 41 localparam A2REG_IN = (LATENCY >= 3) ? 1 : 0; 42 localparam A1REG_IN = (LATENCY == 4) ? 1 : 0; 43 localparam AREG_IN = A1REG_IN + A2REG_IN; 44 // See OPMODE Control Bits Settings, Table 2-7,2-8,2-9 45 localparam ZMUX_C = 3'b011; 46 localparam YMUX_M = 2'b01; 47 localparam XMUX_M = 2'b01; 48 localparam [6:0] OPMODE = {ZMUX_C, YMUX_M, XMUX_M}; 49 50 // A_IN is 25 bits; B_IN is 18 bits. Product M's binary point shifts: 51 localparam BIN_PT_M = BIN_PT_A+(25-WIDTH_A) + BIN_PT_B+(18-WIDTH_B); 52 53 // Calculate shift for C to align binary point to A*B product (M) 54 // Determine top and bottom indices of C (in C_IN), normalized to M 55 // Divide by 2**BIN_PT_C then multiply up by 2**BIN_PT_M 56 localparam C_TOP = WIDTH_C-1 - BIN_PT_C + BIN_PT_M; 57 localparam C_BOT = 0 - BIN_PT_C + BIN_PT_M; 58 // Determine number of sign-extended bits above C_TOP 59 localparam C_EXT = 47 - C_TOP; 60 61 // P is a 43-bit fixed point number with bin pt BIN_PT_M 62 // O is extracted from those bits 63 // Sign extend if more bits to left of bin pt 64 localparam O_EXT = ((WIDTH_O-BIN_PT_O) > (43-BIN_PT_M)) ? 65 (WIDTH_O-BIN_PT_O) - (43-BIN_PT_M) : 0; 66 // If extending, use highest bit of P, else extract bits based on bin pt 67 localparam P_TOP = (O_EXT > 0) ? 42 : 68 (42 + (WIDTH_O-BIN_PT_O) - (43-BIN_PT_M)); 69 // Pad bottom of O if remaining P not enough bits 70 localparam O_PAD = (WIDTH_O > P_TOP+1) ? (WIDTH_O-P_TOP-1) : 0; 71 // If padding O, grab lowest bit of P, else determine based on O's width 72 localparam P_BOT = (O_PAD > 0) ? 0 : (P_TOP+1-WIDTH_O); 73 74 //------------------------------------------------ 75 // Normalize C input to A*B product's binary point 76 //------------------------------------------------ 77 function automatic [47:0] align_c; 78 input [WIDTH_C-1:0] c; 79 begin 80 // Do sign extension 81 if (C_EXT > 0) begin 82 align_c[47 -: C_EXT] = {C_EXT{c[WIDTH_C-1]}}; 83 end 84 if (C_BOT < 0) begin 85 // Chop off lower bits of C 86 align_c[C_TOP:0] = c[WIDTH_C-1:(-C_BOT)]; 87 end else begin 88 // Place C and zero pad if necessary 89 align_c[C_TOP:C_BOT] = c; 90 if (C_BOT > 0) begin 91 align_c[C_BOT-1:0] = {C_BOT{1'b0}}; 92 end 93 end 94 end 95 endfunction 96 97 wire [24:0] A_IN = (WIDTH_A < 25) ? { A, {(25-(WIDTH_A)){1'b0}}} : A; 98 wire [17:0] B_IN = (WIDTH_B < 18) ? { B, {(18-(WIDTH_B)){1'b0}}} : B; 99 wire [47:0] C_IN; 100 wire [47:0] P_OUT; 101 102 //-------------------------------------------------- 103 // C needs more pipeline registers at higher latency 104 //-------------------------------------------------- 105 generate if (AREG_IN > 0) begin 106 reg [AREG_IN*WIDTH_C-1:0] c_r; 107 108 if (AREG_IN > 1) begin 109 always @ (posedge clk) 110 begin 111 if (CE) begin 112 c_r <= {c_r[0 +: (AREG_IN-1)*WIDTH_C], C}; 113 end 114 end 115 end else begin 116 always @ (posedge clk) 117 begin 118 if (CE) begin 119 c_r <= C; 120 end 121 end 122 end 123 124 wire [WIDTH_C-1:0] c_pre = c_r[AREG_IN*WIDTH_C-1 -: WIDTH_C]; 125 assign C_IN = align_c(c_pre); 126 end else begin 127 assign C_IN = align_c(C); 128 end endgenerate 129 130 //---------------------------------------------- 131 // Track signs for overflow/underflow processing 132 //---------------------------------------------- 133 reg [LATENCY-1:0] mult_sign; 134 reg [LATENCY-1:0] c_sign; 135 wire bin_pt_overflow; 136 wire adder_overflow; 137 wire [WIDTH_O-1:0] p_extract; 138 139 generate if (LATENCY > 1) begin 140 always @ (posedge clk) 141 begin 142 if (CE) begin 143 mult_sign <= {mult_sign[0 +: LATENCY-1], A[WIDTH_A-1] ^ B[WIDTH_B-1]}; 144 c_sign <= {c_sign[0 +: LATENCY-1], C[WIDTH_C-1]}; 145 end 146 end 147 end else begin 148 always @ (posedge clk) 149 begin 150 if (CE) begin 151 mult_sign <= A[WIDTH_A-1] ^ B[WIDTH_B-1]; 152 c_sign <= C[WIDTH_C-1]; 153 end 154 end 155 end endgenerate 156 157 assign adder_overflow = (mult_sign[LATENCY-1] == c_sign[LATENCY-1]) && 158 (P_OUT[42] != c_sign[LATENCY-1]); 159 160 //---------------------------------------------- 161 // Extract renormalized bits from P_OUT 162 //---------------------------------------------- 163 generate 164 if (P_TOP < 42) begin 165 assign bin_pt_overflow = (|P_OUT[42:P_TOP]) != (&P_OUT[42:P_TOP]); 166 end else begin 167 assign bin_pt_overflow = 1'b0; 168 end 169 170 if (O_EXT > 0) begin 171 assign p_extract[WIDTH_O-1 -: O_EXT] = {O_EXT{P_OUT[42]}}; 172 end 173 174 if (O_PAD > 0) begin 175 assign p_extract[O_PAD-1:0] = {O_PAD{1'b0}}; 176 end 177 endgenerate 178 179 assign p_extract[WIDTH_O-1-O_EXT:O_PAD] = P_OUT[P_TOP:P_BOT]; 180 181 //---------------------------------- 182 // Clip if underflowed or overflowed 183 //---------------------------------- 184 always @ (*) 185 begin 186 if (bin_pt_overflow || adder_overflow) begin 187 O <= {c_sign[LATENCY-1], {WIDTH_O-1{!c_sign[LATENCY-1]}}}; 188 end else begin 189 O <= p_extract; 190 end 191 end 192 193 194 DSP48E1 #( 195 .ACASCREG(AREG_IN), 196 .AREG(AREG_IN), 197 .ADREG(0), 198 .DREG(0), 199 .BCASCREG(AREG_IN), 200 .BREG(AREG_IN), 201 .MREG(MREG_IN), 202 .CREG(CREG_IN), 203 .PREG(PREG_IN) 204 ) DSP48_inst ( 205 // Outputs 206 .ACOUT(), 207 .BCOUT(), 208 .CARRYCASCOUT(), 209 .CARRYOUT(), 210 .MULTSIGNOUT(), 211 .OVERFLOW(), 212 .P(P_OUT), 213 .PATTERNBDETECT(), 214 .PATTERNDETECT(), 215 .PCOUT(), 216 .UNDERFLOW(), 217 218 // Inputs 219 .A({5'b0,A_IN}), 220 .ACIN(30'b0), 221 .ALUMODE(4'b0000), 222 .B(B_IN), 223 .BCIN(18'b0), 224 .C(C_IN), 225 .CARRYCASCIN(1'b0), 226 .CARRYIN(1'b0), 227 .CARRYINSEL(3'b0), 228 .CEA1(CE), 229 .CEA2(CE), 230 .CEAD(1'b0), 231 .CEALUMODE(1'b1), 232 .CEB1(CE), 233 .CEB2(CE), 234 .CEC(CE), 235 .CECARRYIN(CE), 236 .CECTRL(CE), 237 .CED(1'b0), 238 .CEINMODE(CE), 239 .CEM(CE), 240 .CEP(CE), 241 .CLK(clk), 242 .D({25{1'b1}}), 243 .INMODE(5'b0), 244 .MULTSIGNIN(1'b0), 245 .OPMODE(OPMODE), 246 .PCIN(48'b0), 247 .RSTA(reset), 248 .RSTALLCARRYIN(reset), 249 .RSTALUMODE(reset), 250 .RSTB(reset), 251 .RSTC(reset), 252 .RSTD(reset), 253 .RSTCTRL(reset), 254 .RSTINMODE(reset), 255 .RSTM(reset), 256 .RSTP(reset) 257 ); 258 259endmodule // mult_add_clip 260`default_nettype wire 261