1-------------------------------------------------------------------------------- 2--! @file 3--! @brief pp_fir_filter. 4--! This implements a poly-phase fir filter that can be used for 5--! rational resampling or rational sample delay. 6--! The taps of the FIR filter are generated at compile time and start 7--! as a Hann-windowed sinc function. 0-phase offset is then normalized 8--! to be 0.98 amplitude. 9--! The generics determine the resolution of the fir-filter, as well as 10--! as the number of phases. 11-------------------------------------------------------------------------------- 12library ieee; 13 use ieee.std_logic_1164.all; 14 use ieee.numeric_std.all; 15 use ieee.math_real.all; 16library work; 17 use work.er_pack.all; 18 19entity pp_fir_filter is 20 generic ( 21 --! The width of each tap in bits 22 taps_width_g : natural := 16; 23 --! The number of lobes. This is basically the number of taps per filter 24 num_lobes_g : natural := 8; 25 --! The number of parallel channels 26 num_channels_g : natural := 1; 27 --! The number of taps per lobe 28 taps_per_lobe_g : natural := 512; 29 --! The number of taps to skip to get to the next tap 30 step_size_g : natural := 512); 31 port ( 32 -- standard ports 33 clk_i : in std_logic; 34 rst_i : in std_logic; 35 36 -- input data ports 37 --! Run the filter without taking another sample 38 run_i : in std_logic; 39 phase_i : in std_logic_vector(log2(taps_per_lobe_g) downto 0); 40 data_en_i : in std_logic; 41 data_i : in std_logic_vector(num_channels_g*taps_width_g-1 downto 0); 42 43 -- output data ports 44 data_o : out std_logic_vector(num_channels_g*taps_width_g-1 downto 0); 45 data_en_o : out std_logic); 46end entity pp_fir_filter; 47 48architecture behavior of pp_fir_filter is 49 ---------------------------------------------------------------------------- 50 -- Types, Subtypes, and Constants 51 ---------------------------------------------------------------------------- 52 subtype word_t is signed(1*taps_width_g-1 downto 0); 53 subtype dword_t is signed(2*taps_width_g-1 downto 0); 54 subtype save_range is natural range 2*taps_width_g-2 downto 1*taps_width_g-1; 55 type word_vector_t is array (integer range <>) of word_t; 56 type dword_vector_t is array (integer range <>) of dword_t; 57 type rom_t is array (integer range <>) of signed(data_i'range); 58 59 -- The state machine deals with the MACCs 60 type state_type is ( 61 idle_state, -- Waiting for input signal 62 load_state, -- Load the sample into the input ram 63 mult_state, -- First multiply does not accumulate product 64 macc_state, -- P += A*B 65 save_state); -- Save the output 66 type dsp_opcode_type is ( 67 clear, -- P = 0 68 mult, -- P = A*B 69 macc, -- P += A*B 70 hold); -- P = P 71 constant round_val : dword_t := shift_left(to_signed(1, dword_t'length), taps_width_g-2); 72 73 -- We want the phase offset to be in relation to the middle of the center 74 -- lobe. For this reason, we will need to determine the offset of the first 75 -- sample in relation to the step_size, taps_per_lobe, and the number of 76 -- lobes 77 constant phase_offset_c : natural := 78-- (num_lobes_g * (taps_per_lobe_g - step_size_g+1)) mod taps_per_lobe_g; 79 (num_lobes_g/2 * (taps_per_lobe_g - step_size_g)); 80 constant num_regs_c : natural := 81-- (num_lobes_g * (taps_per_lobe_g / step_size_g)); 82 (num_lobes_g); 83 84 ---------------------------------------------------------------------------- 85 -- functions 86 ---------------------------------------------------------------------------- 87 function load_sinc_rom ( 88 taps_per_lobe : natural; 89 num_lobes : natural) 90 return word_vector_t is 91 -- The returned ram 92 variable rom : word_vector_t(0 to taps_per_lobe * num_lobes-1); 93 94 -- Stuff for the actual sinc calculation 95 variable real_rom : real_vector(rom'range); 96 variable half : real := real(rom'length/2); 97 variable nm1 : real := real(rom'length-1); 98 variable phase : real; 99 variable sinc : real; 100 variable hann : real; 101 102 -- for power calculation 103 variable power : real; 104 begin 105 ------------------------------------------------------------------------ 106 -- Tap generation 107 ------------------------------------------------------------------------ 108 for idx in real_rom'range loop 109 -- Determine the phase, but multiply it by PI to get the correct 110 -- phase shift 111 phase := math_pi * (real(idx) - half) / real(taps_per_lobe); 112 113 -- Don't divide by zero 114 if phase = 0.0 then 115 sinc := 1.0; 116 else 117 sinc := sin(phase) / phase; 118 end if; 119 120 -- Multiply it by a hann window 121 hann := 0.5 * (1.0 - cos(2.0*math_pi*real(idx)/nm1)); 122 123 -- Put it in the rom 124 real_rom(idx) := sinc*hann; 125 end loop; 126 127 ------------------------------------------------------------------------ 128 -- Energy measurement 129 ------------------------------------------------------------------------ 130 -- Now that the ram is complete, we still need to make sure that we 131 -- scale everything to be a power of one. This is to make sure that we 132 -- don't overflow during the actual addition. 133 power := 0.0; 134 for idx in 0 to num_regs_c-1 loop 135 power := power + real_rom(phase_offset_c + idx*step_size_g); 136 end loop; 137 138 ------------------------------------------------------------------------ 139 -- Normalization 140 ------------------------------------------------------------------------ 141 -- Now put it in the actual ram 142 for idx in rom'range loop 143 real_rom(idx) := real_rom(idx) * (0.98 / power); 144 rom (idx) := signed(to_slv(real_rom(idx), word_t'length)); 145 end loop; 146 147 -- return it 148 return rom; 149 end function load_sinc_rom; 150 151 ----------------------------------------------------------------------------- 152 constant taps_rom : word_vector_t := load_sinc_rom(taps_per_lobe_g, num_lobes_g); 153 154 ---------------------------------------------------------------------------- 155 -- Signals 156 ---------------------------------------------------------------------------- 157 signal phase_reg : natural; 158 signal data_reg : std_logic_vector(data_i'range); 159 160 signal state : state_type; 161 signal dsp_opcode : dsp_opcode_type; 162 163 -- DSP Signals 164 signal a : word_vector_t (0 to num_channels_g-1); 165 signal b : word_t; 166 signal p : dword_vector_t(0 to num_channels_g-1); 167 signal r : word_vector_t (0 to num_channels_g-1); 168 169 -- RAM/ROM Signals 170 signal taps_addr : natural; 171 signal next_taps_addr : natural; 172 signal z_addr : natural; 173 signal z_ram : rom_t(0 to num_regs_c-1); 174 signal z_ram_en : std_logic; 175 176 -- Quantization signals 177 signal q : dword_vector_t(0 to num_channels_g-1); 178 179 -- for internal testing 180 signal rom_data_test : word_t; 181 signal rom_addr_test : natural; 182 183-------------------------------------------------------------------------------- 184begin 185-------------------------------------------------------------------------------- 186 -- The actual fir filter part 187 ----------------------------------------------------------------------------- 188 -- Direct signal assignments 189 ----------------------------------------------------------------------------- 190 a_gen : for idx in 0 to num_channels_g-1 generate 191 -- Get the input for the multiplication 192 a(idx) <= z_ram(z_addr)((idx+1)*taps_width_g-1 downto idx*taps_width_g); 193 194 -- Since the rounding is combinational, we can sum it up here 195 q(idx) <= p(idx) + round_val; 196 197 -- Now the data out 198 data_o((idx+1)*taps_width_g-1 downto idx*taps_width_g) <= 199 std_logic_vector(r(idx)); 200 end generate a_gen; 201 202 -- This one is easy 203 b <= taps_rom(taps_addr); -- Select MUX 204 205 ----------------------------------------------------------------------------- 206 -- FIR process controls the main state machine behind the serial FIR 207 ----------------------------------------------------------------------------- 208 fsm_proc : process(clk_i) 209 variable idx_hi : natural; 210 variable idx_lo : natural; 211 begin 212 if rising_edge(clk_i) then 213 if rst_i = '1' then 214 state <= idle_state; 215 dsp_opcode <= clear; 216 z_ram_en <= '0'; 217 z_addr <= 0 ; 218 taps_addr <= 0 ; 219 next_taps_addr <= 0 ; 220 data_en_o <= '0'; 221-- data_o <= (others => '0'); 222 else 223 -- Default cases 224 z_ram_en <= '0'; 225 data_en_o <= '0'; 226 next_taps_addr <= next_taps_addr + step_size_g; 227 228 -- Other cases 229 case state is 230 ----------------------------------------------------------------- 231 when idle_state => 232 dsp_opcode <= clear; 233 z_addr <= 0 ; 234 taps_addr <= 0 ; 235 if data_en_i = '1' or run_i = '1' then 236 z_ram_en <= data_en_i; 237 state <= load_state; 238 phase_reg <= phase_offset_c + to_integer(unsigned(phase_i)); 239 data_reg <= data_i; 240 end if; 241 ----------------------------------------------------------------- 242 when load_state => 243 dsp_opcode <= clear; 244 z_addr <= 0 ; 245 taps_addr <= phase_reg; 246 next_taps_addr <= phase_reg; 247 state <= mult_state; 248 ----------------------------------------------------------------- 249 when mult_state => 250 dsp_opcode <= mult; 251 z_addr <= 0 ; 252 taps_addr <= phase_reg; 253 state <= macc_state; 254 ----------------------------------------------------------------- 255 when macc_state => 256 dsp_opcode <= macc; 257 258 -- The delayed version of the incoming signal 259-- if next_taps_addr >= taps_rom'length then 260 if z_addr = z_ram'high then 261 state <= save_state; 262 else 263 z_addr <= z_addr + 1; 264 taps_addr <= next_taps_addr; 265 end if; 266 ----------------------------------------------------------------- 267 when save_state => 268 dsp_opcode <= macc; 269 z_addr <= 0 ; 270 data_en_o <= '1'; 271 state <= idle_state; 272 for idx in q'range loop 273 r(idx) <= q(idx)(save_range); 274 end loop; 275 ----------------------------------------------------------------- 276 end case; 277 end if; 278 end if; 279 end process fsm_proc; 280 281 ----------------------------------------------------------------------------- 282 -- DSP48 process emulates a DSP48 (partially) 283 ----------------------------------------------------------------------------- 284 alu_proc : process(clk_i) 285 begin 286 if rising_edge(clk_i) then 287 if rst_i = '1' then 288 p <= (others => (others => '0')); 289 else 290 case dsp_opcode is 291 ------------------------------------------------------------ 292 when clear => 293 p <= (others => (others => '0')); 294 ------------------------------------------------------------ 295 when mult => 296 for idx in p'range loop 297 p(idx) <= a(idx) * b; 298 end loop; 299 ------------------------------------------------------------ 300 when macc => 301 for idx in p'range loop 302 p(idx) <= p(idx) + a(idx) * b; 303 end loop; 304 ------------------------------------------------------------ 305 when hold => 306 null; 307 ------------------------------------------------------------ 308 end case; 309 end if; 310 end if; 311 end process alu_proc; 312 313 ----------------------------------------------------------------------------- 314 -- Shift RAM 315 ----------------------------------------------------------------------------- 316 -- I'm calling it the z ram, since it is the z delay of the incoming signal 317 shift_ram_proc : process(clk_i) 318 begin 319 if rising_edge(clk_i) then 320 if rst_i = '1' then 321 z_ram <= (others => (others => '0')); 322 elsif z_ram_en = '1' then 323 z_ram <= signed(data_reg) & z_ram(0 to z_ram'length-2); 324 end if; 325 end if; 326 end process shift_ram_proc; 327 328 ---------------------------------------------------------------------------- 329 -- tests 330 ---------------------------------------------------------------------------- 331 -- synthesis off 332 -- Test the rom by iterating through the rom 333 rom_test_proc : process(clk_i) 334 begin 335 if rising_edge(clk_i) then 336 if rst_i = '1' then 337 rom_addr_test <= 0; 338 else 339 if rom_addr_test >= taps_rom'length-1 then 340 rom_addr_test <= 0; 341 else 342 rom_addr_test <= rom_addr_test + 1; 343 end if; 344 end if; 345 end if; 346 end process rom_test_proc; 347 348 -- combinational read 349 rom_data_test <= taps_rom(rom_addr_test); 350 -- synthesis on 351 352end architecture behavior; 353