1// Copyright 2018 Ettus Research, a National Instruments Company
2//
3// SPDX-License-Identifier: LGPL-3.0-or-later
4// Write xilinx DSP48E1 primitive for mult-add-clip (signed)
5
6`default_nettype none
7
8module mult_add_clip #(
9  parameter WIDTH_A=25,  // Max 25
10  parameter BIN_PT_A=24,
11  parameter WIDTH_B=18,  // Max 18
12  parameter BIN_PT_B=17,
13  parameter WIDTH_C=43,  // Max 43
14
15  // Min (47-WIDTH_C-1)+BIN_PT_A+BIN_PT_B,
16  // Max WIDTH_C-1+BIN_PT_A+BIN_PT_B
17  parameter BIN_PT_C=42,
18
19  parameter WIDTH_O=43,  // Max 43-(BIN_PT_A+BIN_PT_B-BIN_PT_O)
20  parameter BIN_PT_O=42,
21  parameter LATENCY=2    // Maximum is 4
22) (
23  input wire clk,
24  input wire reset,
25  input wire CE, // Ordinarily set to 1'b1
26  input wire [WIDTH_A-1:0] A,
27  input wire [WIDTH_B-1:0] B,
28  input wire [WIDTH_C-1:0] C,
29  output reg  [WIDTH_O-1:0] O
30);
31  // DSP operations:
32  // O = clip(A * B + C)
33  //
34  // Mux settings:
35  // X,Y (01,01) = M
36  // Z (011) = C
37
38  localparam MREG_IN = (LATENCY >= 1) ? 1 : 0;
39  localparam CREG_IN = MREG_IN;
40  localparam PREG_IN = (LATENCY >= 2) ? 1 : 0;
41  localparam A2REG_IN = (LATENCY >= 3) ? 1 : 0;
42  localparam A1REG_IN = (LATENCY == 4) ? 1 : 0;
43  localparam AREG_IN = A1REG_IN + A2REG_IN;
44  // See OPMODE Control Bits Settings, Table 2-7,2-8,2-9
45  localparam ZMUX_C = 3'b011;
46  localparam YMUX_M = 2'b01;
47  localparam XMUX_M = 2'b01;
48  localparam [6:0] OPMODE = {ZMUX_C, YMUX_M, XMUX_M};
49
50  // A_IN is 25 bits; B_IN is 18 bits. Product M's binary point shifts:
51  localparam BIN_PT_M = BIN_PT_A+(25-WIDTH_A) + BIN_PT_B+(18-WIDTH_B);
52
53  // Calculate shift for C to align binary point to A*B product (M)
54  // Determine top and bottom indices of C (in C_IN), normalized to M
55  // Divide by 2**BIN_PT_C then multiply up by 2**BIN_PT_M
56  localparam C_TOP = WIDTH_C-1 - BIN_PT_C + BIN_PT_M;
57  localparam C_BOT = 0 - BIN_PT_C + BIN_PT_M;
58  // Determine number of sign-extended bits above C_TOP
59  localparam C_EXT = 47 - C_TOP;
60
61  // P is a 43-bit fixed point number with bin pt BIN_PT_M
62  // O is extracted from those bits
63  // Sign extend if more bits to left of bin pt
64  localparam O_EXT = ((WIDTH_O-BIN_PT_O) > (43-BIN_PT_M)) ?
65                      (WIDTH_O-BIN_PT_O) - (43-BIN_PT_M) : 0;
66  // If extending, use highest bit of P, else extract bits based on bin pt
67  localparam P_TOP = (O_EXT > 0) ? 42 :
68                     (42 + (WIDTH_O-BIN_PT_O) - (43-BIN_PT_M));
69  // Pad bottom of O if remaining P not enough bits
70  localparam O_PAD = (WIDTH_O > P_TOP+1) ? (WIDTH_O-P_TOP-1) : 0;
71  // If padding O, grab lowest bit of P, else determine based on O's width
72  localparam P_BOT = (O_PAD > 0) ? 0 : (P_TOP+1-WIDTH_O);
73
74  //------------------------------------------------
75  // Normalize C input to A*B product's binary point
76  //------------------------------------------------
77  function automatic [47:0] align_c;
78    input [WIDTH_C-1:0] c;
79    begin
80      // Do sign extension
81      if (C_EXT > 0) begin
82        align_c[47 -: C_EXT] = {C_EXT{c[WIDTH_C-1]}};
83      end
84      if (C_BOT < 0) begin
85        // Chop off lower bits of C
86        align_c[C_TOP:0] = c[WIDTH_C-1:(-C_BOT)];
87      end else begin
88        // Place C and zero pad if necessary
89        align_c[C_TOP:C_BOT] = c;
90        if (C_BOT > 0) begin
91          align_c[C_BOT-1:0] = {C_BOT{1'b0}};
92        end
93      end
94    end
95  endfunction
96
97  wire [24:0] A_IN = (WIDTH_A < 25) ? { A, {(25-(WIDTH_A)){1'b0}}} : A;
98  wire [17:0] B_IN = (WIDTH_B < 18) ? { B, {(18-(WIDTH_B)){1'b0}}} : B;
99  wire [47:0] C_IN;
100  wire [47:0] P_OUT;
101
102  //--------------------------------------------------
103  // C needs more pipeline registers at higher latency
104  //--------------------------------------------------
105  generate if (AREG_IN > 0) begin
106    reg [AREG_IN*WIDTH_C-1:0] c_r;
107
108    if (AREG_IN > 1) begin
109      always @ (posedge clk)
110      begin
111        if (CE) begin
112          c_r <= {c_r[0 +: (AREG_IN-1)*WIDTH_C], C};
113        end
114      end
115    end else begin
116      always @ (posedge clk)
117      begin
118        if (CE) begin
119          c_r <= C;
120        end
121      end
122    end
123
124    wire [WIDTH_C-1:0] c_pre = c_r[AREG_IN*WIDTH_C-1 -: WIDTH_C];
125    assign C_IN = align_c(c_pre);
126  end else begin
127    assign C_IN = align_c(C);
128  end endgenerate
129
130  //----------------------------------------------
131  // Track signs for overflow/underflow processing
132  //----------------------------------------------
133  reg  [LATENCY-1:0]     mult_sign;
134  reg  [LATENCY-1:0]     c_sign;
135  wire                   bin_pt_overflow;
136  wire                   adder_overflow;
137  wire [WIDTH_O-1:0]     p_extract;
138
139  generate if (LATENCY > 1) begin
140    always @ (posedge clk)
141    begin
142      if (CE) begin
143        mult_sign <= {mult_sign[0 +: LATENCY-1], A[WIDTH_A-1] ^ B[WIDTH_B-1]};
144        c_sign <= {c_sign[0 +: LATENCY-1], C[WIDTH_C-1]};
145      end
146    end
147  end else begin
148    always @ (posedge clk)
149    begin
150      if (CE) begin
151        mult_sign <= A[WIDTH_A-1] ^ B[WIDTH_B-1];
152        c_sign <= C[WIDTH_C-1];
153      end
154    end
155  end endgenerate
156
157  assign adder_overflow = (mult_sign[LATENCY-1] == c_sign[LATENCY-1]) &&
158                          (P_OUT[42] != c_sign[LATENCY-1]);
159
160  //----------------------------------------------
161  // Extract renormalized bits from P_OUT
162  //----------------------------------------------
163  generate
164    if (P_TOP < 42) begin
165      assign bin_pt_overflow = (|P_OUT[42:P_TOP]) != (&P_OUT[42:P_TOP]);
166    end else begin
167      assign bin_pt_overflow = 1'b0;
168    end
169
170    if (O_EXT > 0) begin
171      assign p_extract[WIDTH_O-1 -: O_EXT] = {O_EXT{P_OUT[42]}};
172    end
173
174    if (O_PAD > 0) begin
175      assign p_extract[O_PAD-1:0] = {O_PAD{1'b0}};
176    end
177  endgenerate
178
179  assign p_extract[WIDTH_O-1-O_EXT:O_PAD] = P_OUT[P_TOP:P_BOT];
180
181  //----------------------------------
182  // Clip if underflowed or overflowed
183  //----------------------------------
184  always @ (*)
185  begin
186    if (bin_pt_overflow || adder_overflow) begin
187      O <= {c_sign[LATENCY-1], {WIDTH_O-1{!c_sign[LATENCY-1]}}};
188    end else begin
189      O <= p_extract;
190    end
191  end
192
193
194  DSP48E1 #(
195    .ACASCREG(AREG_IN),
196    .AREG(AREG_IN),
197    .ADREG(0),
198    .DREG(0),
199    .BCASCREG(AREG_IN),
200    .BREG(AREG_IN),
201    .MREG(MREG_IN),
202    .CREG(CREG_IN),
203    .PREG(PREG_IN)
204  ) DSP48_inst (
205    // Outputs
206    .ACOUT(),
207    .BCOUT(),
208    .CARRYCASCOUT(),
209    .CARRYOUT(),
210    .MULTSIGNOUT(),
211    .OVERFLOW(),
212    .P(P_OUT),
213    .PATTERNBDETECT(),
214    .PATTERNDETECT(),
215    .PCOUT(),
216    .UNDERFLOW(),
217
218    // Inputs
219    .A({5'b0,A_IN}),
220    .ACIN(30'b0),
221    .ALUMODE(4'b0000),
222    .B(B_IN),
223    .BCIN(18'b0),
224    .C(C_IN),
225    .CARRYCASCIN(1'b0),
226    .CARRYIN(1'b0),
227    .CARRYINSEL(3'b0),
228    .CEA1(CE),
229    .CEA2(CE),
230    .CEAD(1'b0),
231    .CEALUMODE(1'b1),
232    .CEB1(CE),
233    .CEB2(CE),
234    .CEC(CE),
235    .CECARRYIN(CE),
236    .CECTRL(CE),
237    .CED(1'b0),
238    .CEINMODE(CE),
239    .CEM(CE),
240    .CEP(CE),
241    .CLK(clk),
242    .D({25{1'b1}}),
243    .INMODE(5'b0),
244    .MULTSIGNIN(1'b0),
245    .OPMODE(OPMODE),
246    .PCIN(48'b0),
247    .RSTA(reset),
248    .RSTALLCARRYIN(reset),
249    .RSTALUMODE(reset),
250    .RSTB(reset),
251    .RSTC(reset),
252    .RSTD(reset),
253    .RSTCTRL(reset),
254    .RSTINMODE(reset),
255    .RSTM(reset),
256    .RSTP(reset)
257  );
258
259endmodule // mult_add_clip
260`default_nettype wire
261