1--------------------------------------------------------------------------------
2--! @file
3--! @brief pp_fir_filter.
4--!        This implements a poly-phase fir filter that can be used for
5--!        rational resampling or rational sample delay.
6--!        The taps of the FIR filter are generated at compile time and start
7--!        as a Hann-windowed sinc function.  0-phase offset is then normalized
8--!        to be 0.98 amplitude.
9--!        The generics determine the resolution of the fir-filter, as well as
10--!        as the number of phases.
11--------------------------------------------------------------------------------
12library ieee;
13    use ieee.std_logic_1164.all;
14    use ieee.numeric_std.all;
15    use ieee.math_real.all;
16library work;
17    use work.er_pack.all;
18
19entity pp_fir_filter is
20    generic (
21        --! The width of each tap in bits
22        taps_width_g    : natural :=  16;
23        --! The number of lobes.  This is basically the number of taps per filter
24        num_lobes_g     : natural :=   8;
25        --! The number of parallel channels
26        num_channels_g  : natural :=   1;
27        --! The number of taps per lobe
28        taps_per_lobe_g : natural := 512;
29        --! The number of taps to skip to get to the next tap
30        step_size_g     : natural := 512);
31    port (
32        -- standard ports
33        clk_i : in  std_logic;
34        rst_i : in  std_logic;
35
36        -- input data ports
37        --! Run the filter without taking another sample
38        run_i     : in  std_logic;
39        phase_i   : in  std_logic_vector(log2(taps_per_lobe_g) downto 0);
40        data_en_i : in  std_logic;
41        data_i    : in  std_logic_vector(num_channels_g*taps_width_g-1 downto 0);
42
43        -- output data ports
44        data_o    : out std_logic_vector(num_channels_g*taps_width_g-1 downto 0);
45        data_en_o : out std_logic);
46end entity pp_fir_filter;
47
48architecture behavior of pp_fir_filter is
49    ----------------------------------------------------------------------------
50    -- Types, Subtypes, and Constants
51    ----------------------------------------------------------------------------
52    subtype word_t  is signed(1*taps_width_g-1 downto 0);
53    subtype dword_t is signed(2*taps_width_g-1 downto 0);
54    subtype save_range is natural range 2*taps_width_g-2 downto 1*taps_width_g-1;
55    type word_vector_t  is array (integer range <>) of word_t;
56    type dword_vector_t is array (integer range <>) of dword_t;
57    type rom_t          is array (integer range <>) of signed(data_i'range);
58
59    -- The state machine deals with the MACCs
60    type state_type is (
61        idle_state,  -- Waiting for input signal
62        load_state,  -- Load the sample into the input ram
63        mult_state,  -- First multiply does not accumulate product
64        macc_state,  -- P += A*B
65        save_state); -- Save the output
66    type dsp_opcode_type is (
67        clear,       -- P  = 0
68        mult,        -- P  = A*B
69        macc,        -- P += A*B
70        hold);       -- P  = P
71    constant round_val  : dword_t := shift_left(to_signed(1, dword_t'length), taps_width_g-2);
72
73    -- We want the phase offset to be in relation to the middle of the center
74    -- lobe.  For this reason, we will need to determine the offset of the first
75    -- sample in relation to the step_size, taps_per_lobe, and the number of
76    -- lobes
77    constant phase_offset_c : natural :=
78--      (num_lobes_g * (taps_per_lobe_g - step_size_g+1)) mod taps_per_lobe_g;
79        (num_lobes_g/2 * (taps_per_lobe_g - step_size_g));
80    constant num_regs_c : natural :=
81--      (num_lobes_g * (taps_per_lobe_g / step_size_g));
82        (num_lobes_g);
83
84    ----------------------------------------------------------------------------
85    -- functions
86    ----------------------------------------------------------------------------
87    function load_sinc_rom (
88        taps_per_lobe : natural;
89        num_lobes     : natural)
90    return word_vector_t is
91        -- The returned ram
92        variable rom      : word_vector_t(0 to taps_per_lobe * num_lobes-1);
93
94        -- Stuff for the actual sinc calculation
95        variable real_rom : real_vector(rom'range);
96        variable half     : real := real(rom'length/2);
97        variable nm1      : real := real(rom'length-1);
98        variable phase    : real;
99        variable sinc     : real;
100        variable hann     : real;
101
102        -- for power calculation
103        variable power : real;
104    begin
105        ------------------------------------------------------------------------
106        -- Tap generation
107        ------------------------------------------------------------------------
108        for idx in real_rom'range loop
109            -- Determine the phase, but multiply it by PI to get the correct
110            -- phase shift
111            phase := math_pi * (real(idx) - half) / real(taps_per_lobe);
112
113            -- Don't divide by zero
114            if phase = 0.0 then
115                sinc := 1.0;
116            else
117                sinc := sin(phase) / phase;
118            end if;
119
120            -- Multiply it by a hann window
121            hann := 0.5 * (1.0 - cos(2.0*math_pi*real(idx)/nm1));
122
123            -- Put it in the rom
124            real_rom(idx) := sinc*hann;
125        end loop;
126
127        ------------------------------------------------------------------------
128        -- Energy measurement
129        ------------------------------------------------------------------------
130        -- Now that the ram is complete, we still need to make sure that we
131        -- scale everything to be a power of one.  This is to make sure that we
132        -- don't overflow during the actual addition.
133        power := 0.0;
134        for idx in 0 to num_regs_c-1 loop
135            power := power + real_rom(phase_offset_c + idx*step_size_g);
136        end loop;
137
138        ------------------------------------------------------------------------
139        -- Normalization
140        ------------------------------------------------------------------------
141        -- Now put it in the actual ram
142        for idx in rom'range loop
143            real_rom(idx) := real_rom(idx) * (0.98 / power);
144            rom     (idx) := signed(to_slv(real_rom(idx), word_t'length));
145        end loop;
146
147        -- return it
148        return rom;
149    end function load_sinc_rom;
150
151    -----------------------------------------------------------------------------
152    constant taps_rom : word_vector_t := load_sinc_rom(taps_per_lobe_g, num_lobes_g);
153
154    ----------------------------------------------------------------------------
155    -- Signals
156    ----------------------------------------------------------------------------
157    signal phase_reg : natural;
158    signal data_reg  : std_logic_vector(data_i'range);
159
160    signal state      : state_type;
161    signal dsp_opcode : dsp_opcode_type;
162
163    -- DSP Signals
164    signal a : word_vector_t (0 to num_channels_g-1);
165    signal b : word_t;
166    signal p : dword_vector_t(0 to num_channels_g-1);
167    signal r : word_vector_t (0 to num_channels_g-1);
168
169    -- RAM/ROM Signals
170    signal taps_addr      : natural;
171    signal next_taps_addr : natural;
172    signal z_addr         : natural;
173    signal z_ram          : rom_t(0 to num_regs_c-1);
174    signal z_ram_en       : std_logic;
175
176    -- Quantization signals
177    signal q : dword_vector_t(0 to num_channels_g-1);
178
179    -- for internal testing
180    signal rom_data_test : word_t;
181    signal rom_addr_test : natural;
182
183--------------------------------------------------------------------------------
184begin
185--------------------------------------------------------------------------------
186    -- The actual fir filter part
187    -----------------------------------------------------------------------------
188    -- Direct signal assignments
189    -----------------------------------------------------------------------------
190    a_gen : for idx in 0 to num_channels_g-1 generate
191        -- Get the input for the multiplication
192        a(idx) <= z_ram(z_addr)((idx+1)*taps_width_g-1 downto idx*taps_width_g);
193
194        -- Since the rounding is combinational, we can sum it up here
195        q(idx) <= p(idx) + round_val;
196
197        -- Now the data out
198        data_o((idx+1)*taps_width_g-1 downto idx*taps_width_g) <=
199            std_logic_vector(r(idx));
200    end generate a_gen;
201
202    -- This one is easy
203    b <= taps_rom(taps_addr);      -- Select MUX
204
205    -----------------------------------------------------------------------------
206    -- FIR process controls the main state machine behind the serial FIR
207    -----------------------------------------------------------------------------
208    fsm_proc : process(clk_i)
209        variable idx_hi : natural;
210        variable idx_lo : natural;
211    begin
212        if rising_edge(clk_i) then
213            if rst_i = '1' then
214                state          <= idle_state;
215                dsp_opcode     <= clear;
216                z_ram_en       <= '0';
217                z_addr         <=  0 ;
218                taps_addr      <=  0 ;
219                next_taps_addr <=  0 ;
220                data_en_o      <= '0';
221--              data_o         <= (others => '0');
222            else
223                -- Default cases
224                z_ram_en  <= '0';
225                data_en_o <= '0';
226                next_taps_addr <= next_taps_addr + step_size_g;
227
228                -- Other cases
229                case state is
230                    -----------------------------------------------------------------
231                    when idle_state =>
232                        dsp_opcode <= clear;
233                        z_addr     <=  0 ;
234                        taps_addr  <=  0 ;
235                        if data_en_i = '1' or run_i = '1' then
236                            z_ram_en  <= data_en_i;
237                            state     <= load_state;
238                            phase_reg <= phase_offset_c + to_integer(unsigned(phase_i));
239                            data_reg  <= data_i;
240                        end if;
241                    -----------------------------------------------------------------
242                    when load_state =>
243                        dsp_opcode     <= clear;
244                        z_addr         <=  0 ;
245                        taps_addr      <= phase_reg;
246                        next_taps_addr <= phase_reg;
247                        state          <= mult_state;
248                    -----------------------------------------------------------------
249                    when mult_state =>
250                        dsp_opcode <= mult;
251                        z_addr     <=  0 ;
252                        taps_addr  <= phase_reg;
253                        state      <= macc_state;
254                    -----------------------------------------------------------------
255                    when macc_state =>
256                        dsp_opcode <= macc;
257
258                        -- The delayed version of the incoming signal
259--                      if next_taps_addr >= taps_rom'length then
260                        if z_addr = z_ram'high then
261                            state <= save_state;
262                        else
263                            z_addr    <= z_addr + 1;
264                            taps_addr <= next_taps_addr;
265                        end if;
266                    -----------------------------------------------------------------
267                    when save_state =>
268                        dsp_opcode <= macc;
269                        z_addr     <=  0 ;
270                        data_en_o  <= '1';
271                        state      <= idle_state;
272                        for idx in q'range loop
273                            r(idx) <= q(idx)(save_range);
274                        end loop;
275                    -----------------------------------------------------------------
276                end case;
277            end if;
278        end if;
279    end process fsm_proc;
280
281    -----------------------------------------------------------------------------
282    -- DSP48 process emulates a DSP48 (partially)
283    -----------------------------------------------------------------------------
284    alu_proc : process(clk_i)
285    begin
286        if rising_edge(clk_i) then
287            if rst_i = '1' then
288                p <= (others => (others => '0'));
289            else
290                case dsp_opcode is
291                    ------------------------------------------------------------
292                    when clear =>
293                        p <= (others => (others => '0'));
294                    ------------------------------------------------------------
295                    when mult =>
296                        for idx in p'range loop
297                            p(idx) <= a(idx) * b;
298                        end loop;
299                    ------------------------------------------------------------
300                    when macc =>
301                        for idx in p'range loop
302                            p(idx) <= p(idx) + a(idx) * b;
303                        end loop;
304                    ------------------------------------------------------------
305                    when hold =>
306                        null;
307                    ------------------------------------------------------------
308               end case;
309           end if;
310        end if;
311    end process alu_proc;
312
313    -----------------------------------------------------------------------------
314    -- Shift RAM
315    -----------------------------------------------------------------------------
316    -- I'm calling it the z ram, since it is the z delay of the incoming signal
317    shift_ram_proc : process(clk_i)
318    begin
319        if rising_edge(clk_i) then
320            if rst_i = '1' then
321                z_ram <= (others => (others => '0'));
322            elsif z_ram_en = '1' then
323                z_ram <= signed(data_reg) & z_ram(0 to z_ram'length-2);
324            end if;
325        end if;
326    end process shift_ram_proc;
327
328    ----------------------------------------------------------------------------
329    -- tests
330    ----------------------------------------------------------------------------
331    -- synthesis off
332    -- Test the rom by iterating through the rom
333    rom_test_proc : process(clk_i)
334    begin
335        if rising_edge(clk_i) then
336            if rst_i = '1' then
337                rom_addr_test <= 0;
338            else
339                if rom_addr_test >= taps_rom'length-1 then
340                    rom_addr_test <= 0;
341                else
342                    rom_addr_test <= rom_addr_test + 1;
343                end if;
344            end if;
345        end if;
346    end process rom_test_proc;
347
348    -- combinational read
349    rom_data_test <= taps_rom(rom_addr_test);
350    -- synthesis on
351
352end architecture behavior;
353