1-- Copyright (c) 2002-2009 Tampere University.
2--
3-- This file is part of TTA-Based Codesign Environment (TCE).
4--
5-- Permission is hereby granted, free of charge, to any person obtaining a
6-- copy of this software and associated documentation files (the "Software"),
7-- to deal in the Software without restriction, including without limitation
8-- the rights to use, copy, modify, merge, publish, distribute, sublicense,
9-- and/or sell copies of the Software, and to permit persons to whom the
10-- Software is furnished to do so, subject to the following conditions:
11--
12-- The above copyright notice and this permission notice shall be included in
13-- all copies or substantial portions of the Software.
14--
15-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20-- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21-- DEALINGS IN THE SOFTWARE.
22
23library IEEE;
24use IEEE.Std_Logic_1164.all;
25use IEEE.numeric_std.all;
26use work.ffaccel_globals.all;
27use work.ffaccel_gcu_opcodes.all;
28use work.ffaccel_imem_mau.all;
29
30use work.tce_util.all;
31
32entity ffaccel_ifetch is
33
34  generic (
35    no_glock_loopback_g        : std_logic := '0';
36    bypass_fetchblock_register : boolean   := false;
37    bypass_pc_register         : boolean   := false;
38    bypass_decoder_registers   : boolean   := false;
39    extra_fetch_cycles         : integer   := 0;
40    sync_reset_g               : boolean   := false;
41    debug_logic_g              : boolean   := false;
42    enable_loop_buffer_g       : boolean   := false;
43    enable_infloop_buffer_g    : boolean   := false;
44    enable_irf_g               : boolean   := false;
45    irf_size_g                 : integer   := 0;
46
47    pc_init_g : std_logic_vector(IMEMADDRWIDTH-1 downto 0) := (others => '0'));
48
49  port (
50    -- program counter in
51    pc_in      : in  std_logic_vector (IMEMADDRWIDTH-1 downto 0);
52    --return address out
53    ra_out     : out std_logic_vector (IMEMADDRWIDTH-1 downto 0);
54    -- return address in
55    ra_in      : in  std_logic_vector(IMEMADDRWIDTH-1 downto 0);
56    -- ifetch control signals
57    pc_load    : in  std_logic;
58    ra_load    : in  std_logic;
59    pc_opcode  : in  std_logic_vector(0 downto 0);
60    --instruction memory interface
61    imem_data  : in  std_logic_vector(IMEMWIDTHINMAUS*IMEMMAUWIDTH-1 downto 0);
62    imem_addr  : out std_logic_vector(IMEMADDRWIDTH-1 downto 0);
63    imem_en_x  : out std_logic;
64    fetchblock : out std_logic_vector(IMEMWIDTHINMAUS*IMEMMAUWIDTH-1 downto 0);
65    busy       : in  std_logic;
66
67    -- global lock
68    glock : out std_logic;
69
70    -- external control interface
71    fetch_en  : in std_logic;             --fetch_enable
72
73        -- debugger signals
74            db_lockreq  : in std_logic;
75            db_rstx     : in std_logic;
76            db_pc       : out std_logic_vector(IMEMADDRWIDTH-1 downto 0);
77            db_cyclecnt : out std_logic_vector(64-1 downto 0);
78            db_lockcnt  : out std_logic_vector(64-1 downto 0);
79
80
81
82    clk  : in std_logic;
83    rstx : in std_logic);
84end ffaccel_ifetch;
85
86architecture rtl_andor of ffaccel_ifetch is
87
88  -- signals for program counter.
89  signal pc_reg      : std_logic_vector(IMEMADDRWIDTH-1 downto 0);
90  signal pc_wire     : std_logic_vector(IMEMADDRWIDTH-1 downto 0);
91  signal pc_prev_reg : std_logic_vector(IMEMADDRWIDTH-1 downto 0);
92  signal next_pc     : std_logic_vector(IMEMADDRWIDTH-1 downto 0);
93
94  signal increased_pc    : std_logic_vector(IMEMADDRWIDTH-1 downto 0);
95  signal return_addr_reg : std_logic_vector(IMEMADDRWIDTH-1 downto 0);
96
97  -- internal signals for initializing and locking execution.
98  signal lock          : std_logic;
99  signal mem_en_lock_r : std_logic;
100
101  -- Delay/latency from retrieving instruction block from instruction memory.
102  constant IFETCH_DELAY : integer := 1 + extra_fetch_cycles;
103  -- Delay/latency from pc register to dispatching instruction.
104  constant PC_TO_DISPATCH_DELAY : integer :=
105    to_int(not bypass_fetchblock_register) +
106    IFETCH_DELAY;
107  -- Delay/latency from control flow operation to dispatching instruction.
108  constant NEXT_TO_DISPATCH_DELAY : integer :=
109    PC_TO_DISPATCH_DELAY + to_int(not bypass_pc_register);
110
111  signal   reset_cntr   : integer range 0 to IFETCH_DELAY;
112  signal   reset_lock   : std_logic;
113
114  -- Loopbuffer signals, or placeholders if lb is not enabled
115  -- Placeholder signals for loop buffer ports/constants
116  constant LBUFMAXITER  : integer := 1;
117  constant LBUFMAXDEPTH : integer := 1;
118  constant IFE_LBUFS    : integer := 1;
119  constant IFE_INFLOOP  : integer := 1;
120
121  signal o1data : std_logic_vector(LBUFMAXITER-1 downto 0);
122  signal o1load : std_logic;
123  signal loop_start_out : std_logic;
124  signal loop_len_out   : std_logic_vector(bit_width(LBUFMAXDEPTH+1)-1 downto 0);
125  signal loop_iter_out  : std_logic_vector(LBUFMAXITER-1 downto 0);
126
127  signal iteration_count    : std_logic_vector(LBUFMAXITER-1 downto 0);
128  signal pc_after_loop      : std_logic_vector(IMEMADDRWIDTH-1 downto 0);
129  signal lockcnt_r, cyclecnt_r   : unsigned(64 - 1 downto 0);
130  signal db_pc_next       : std_logic_vector(IMEMADDRWIDTH-1 downto 0);
131  constant db_pc_start : std_logic_vector(IMEMADDRWIDTH-1 downto 0)
132                         := (others => '0');
133
134
135
136
137begin
138
139  -- enable instruction memory.
140  imem_en_x <= '0'    when (fetch_en = '1' and mem_en_lock_r = '0') else '1';
141  -- do not fetch new instruction when processor is locked.
142  imem_addr <= pc_wire;
143
144  -- propagate lock to global lock
145
146  glock  <= busy or reset_lock or (not (fetch_en or no_glock_loopback_g));
147  ra_out <= return_addr_reg;
148  lock   <= not fetch_en or busy or mem_en_lock_r;
149
150
151  pc_update_generate_0  :  if not enable_irf_g generate
152    pc_update_proc : process (clk)
153    begin
154      if not sync_reset_g and rstx = '0' then
155        pc_reg      <= pc_init_g;
156        pc_prev_reg <= (others => '0');
157      elsif clk'event and clk = '1' then    -- rising clock edge.
158        if (sync_reset_g and rstx = '0') or db_rstx = '0' then
159          pc_reg      <= db_pc_start;
160          pc_prev_reg <= (others => '0');
161        elsif lock = '0' then
162          pc_reg      <= next_pc;
163          if bypass_pc_register and bypass_fetchblock_register
164            and bypass_decoder_registers and pc_load = '1' then
165            pc_prev_reg <= pc_in;
166          else
167            pc_prev_reg <= pc_reg;
168          end if;
169        end if;
170      end if;
171    end process pc_update_proc;
172  end generate pc_update_generate_0;
173
174
175
176
177  -----------------------------------------------------------------------------
178  ra_block : block
179    signal ra_source : std_logic_vector(IMEMADDRWIDTH-1 downto 0);
180  begin  -- block ra_block
181
182    -- Default choice generate
183    ra_source_select_generate_0 : if not enable_irf_g and not bypass_pc_register generate
184      ra_source <= increased_pc;
185    end generate ra_source_select_generate_0;
186
187    -- Choice enabled by generic
188    ra_source_select_generate_1 : if not enable_irf_g and bypass_pc_register generate
189      ra_source <= pc_reg;
190    end generate ra_source_select_generate_1;
191
192    -- When using IRF
193    ra_source_select_generate_2 : if enable_irf_g generate
194      ra_source <= pc_prev_reg;
195    end generate ra_source_select_generate_2;
196
197    ra_update_proc : process (clk)
198    begin  -- process ra_update_proc
199      if not sync_reset_g and rstx = '0' then -- asynchronous reset (active low)
200        return_addr_reg <= (others => '0');
201      elsif clk'event and clk = '1' then  -- rising clock edge
202        if (sync_reset_g and rstx = '0') or db_rstx = '0' then
203          return_addr_reg <= (others => '0');
204        elsif lock = '0' then
205          -- return address
206          if (ra_load = '1') then
207            return_addr_reg <= ra_in;
208          elsif (pc_load = '1' and unsigned(pc_opcode) = IFE_CALL) then
209            -- return address transformed to same form as all others addresses
210            -- provided as input
211            return_addr_reg <= ra_source;
212          end if;
213
214        end if;
215      end if;
216    end process ra_update_proc;
217  end block ra_block;
218
219  -----------------------------------------------------------------------------
220  -- Keeps memory enable inactive during reset
221  imem_lock_proc : process (clk)
222  begin
223    if not sync_reset_g and rstx = '0' then
224      mem_en_lock_r <= '1';
225    elsif clk'event and clk = '1' then  -- rising clock edge
226        if (sync_reset_g and rstx = '0') or db_rstx = '0' then
227        mem_en_lock_r <= '1';
228      else
229        mem_en_lock_r <= '0';
230      end if;
231    end if;
232  end process imem_lock_proc;
233
234  -----------------------------------------------------------------------------
235  -- Default fetch implementation
236  fetch_block_registered_generate : if
237    not bypass_fetchblock_register generate
238    fetch_block : block
239      signal instruction_reg : std_logic_vector(IMEMWIDTHINMAUS*IMEMMAUWIDTH*
240                                                (extra_fetch_cycles+1)-1 downto 0);
241    begin  -- block fetch_block
242
243      fetch_block_proc : process (clk)
244      begin  -- process fetch_block_proc
245        if not sync_reset_g and rstx = '0' then   -- asynchronous reset (active low)
246          instruction_reg <= (others => '0');
247          reset_cntr      <= 0;
248          reset_lock      <= '1';
249        elsif clk'event and clk = '1' then  -- rising clock edge
250          if (sync_reset_g and rstx = '0') or db_rstx = '0' then
251            instruction_reg <= (others => '0');
252            reset_cntr      <= 0;
253            reset_lock      <= '1';
254          elsif lock = '0' then
255            if reset_cntr < IFETCH_DELAY then
256              reset_cntr <= reset_cntr + 1;
257            else
258              reset_lock <= '0';
259            end if;
260            if (extra_fetch_cycles > 0) then
261              instruction_reg(instruction_reg'length-fetchblock'length-1 downto 0)
262                   <= instruction_reg(instruction_reg'length-1 downto fetchblock'length);
263            end if;
264            instruction_reg(instruction_reg'length-1
265                            downto instruction_reg'length - fetchblock'length)
266            <= imem_data;
267
268          end if;
269        end if;
270      end process fetch_block_proc;
271      fetchblock <= instruction_reg(fetchblock'length-1 downto 0);
272    end block fetch_block;
273  end generate fetch_block_registered_generate;
274
275  -- Fetch implementation without fetch register.
276  fetch_block_bypassed_generate : if
277    not (not bypass_fetchblock_register) generate
278    fetch_block : block
279    begin  -- block fetch_block
280      fetch_block_proc : process (clk)
281      begin  -- process fetch_block_proc
282        if not sync_reset_g and rstx = '0' then -- asynchronous reset (active low)
283          reset_lock <= '1';
284        elsif clk'event and clk = '1' then  -- rising clock edge
285          if (sync_reset_g and rstx = '0') or db_rstx = '0' then
286            reset_lock <= '1';
287          elsif lock = '0' then
288            reset_lock <= '0';
289          end if;
290        end if;
291      end process fetch_block_proc;
292      fetchblock <= imem_data;
293    end block fetch_block;
294
295  end generate fetch_block_bypassed_generate;
296
297  -----------------------------------------------------------------------------
298
299  loopbuf_logic : if enable_loop_buffer_g generate
300    -- Loop buffer signals --
301    signal start_looping      : std_logic;
302    signal start_looping_r    : std_logic_vector(NEXT_TO_DISPATCH_DELAY-1
303                                                 downto 0);
304    signal loop_length, loop_length_reg
305            : std_logic_vector(bit_width(LBUFMAXDEPTH+1)-1 downto 0);
306    signal loop_iter_reg      : std_logic_vector(LBUFMAXITER-1 downto 0);
307    signal loop_iter_temp_reg : std_logic_vector(LBUFMAXITER-1 downto 0);
308  begin
309    assert not enable_irf_g
310      report "IRF is not supported with loop buffer!"
311      severity failure;
312
313    -- Loop buffer setup operation logic --
314    start_looping <= '1' when (pc_load = '1' and
315                               unsigned(pc_opcode) = IFE_LBUFS) else
316                     '0';
317
318    iteration_count <= o1data(LBUFMAXITER-1 downto 0)
319                       when o1load = '1' else
320                       loop_iter_temp_reg;
321    loop_length <= pc_in(bit_width(LBUFMAXDEPTH+1)-1 downto 0);
322
323    process (clk)
324    begin
325      if not sync_reset_g and rstx = '0' then
326        start_looping_r    <= (others => '0');
327        loop_length_reg    <= (others => '0');
328        loop_iter_reg      <= (others => '0');
329        loop_iter_temp_reg <= (others => '0');
330      elsif clk'event and clk = '1' then    -- rising clock edge
331        -- Loop buffer control --
332        if (sync_reset_g and rstx = '0') or db_rstx = '0' then
333          start_looping_r    <= (others => '0');
334          loop_length_reg    <= (others => '0');
335          loop_iter_reg      <= (others => '0');
336          loop_iter_temp_reg <= (others => '0');
337        elsif lock = '0' then
338          if (start_looping = '1' and
339              unsigned(iteration_count) /= 0) then
340            loop_length_reg    <= loop_length;
341            loop_iter_reg      <= iteration_count;
342            start_looping_r(0) <= '1';
343          else
344            start_looping_r(0) <= '0';
345          end if;
346
347          if o1load = '1' then
348            loop_iter_temp_reg <= o1data(LBUFMAXITER-1 downto 0);
349          end if;
350          -- Delay slots for lbufs are introduced to avoid need of pipeline
351          -- flushing in case the loop is skipped with iteration count of zero.
352          start_looping_r(start_looping_r'left downto 1) <=
353            start_looping_r(start_looping_r'left-1 downto 0);
354        end if;
355      end if;
356    end process;
357
358    loop_start_out <= start_looping_r(start_looping_r'left);
359    loop_iter_out  <= loop_iter_reg;
360    loop_len_out   <= loop_length_reg;
361    pc_after_loop <= std_logic_vector(
362      unsigned(increased_pc) + unsigned(loop_length));
363  end generate;
364
365  infloop_logic : if enable_infloop_buffer_g generate
366    signal start_looping : std_logic;
367    signal start_looping_r
368                : std_logic_vector(NEXT_TO_DISPATCH_DELAY-1 downto 0);
369    signal loop_length, loop_length_reg
370                : std_logic_vector(bit_width(LBUFMAXDEPTH+1)-1 downto 0);
371  begin
372    -- infinity loop operation control logic --
373    start_looping <= '1' when (pc_load = '1' and
374                               unsigned(pc_opcode) = IFE_INFLOOP) else
375                     '0';
376    loop_length <= pc_in(bit_width(LBUFMAXDEPTH+1)-1 downto 0);
377
378    process (clk)
379    begin
380      if not sync_reset_g and rstx = '0' then
381        start_looping_r    <= (others => '0');
382        loop_length_reg    <= (others => '0');
383      elsif clk'event and clk = '1' then    -- rising clock edge
384        -- Loop buffer control --
385        if sync_reset_g and rstx = '0' then
386          start_looping_r    <= (others => '0');
387          loop_length_reg    <= (others => '0');
388        elsif lock = '0' then
389          if (start_looping = '1' and to_uint(loop_length) /= 0) then
390            assert to_uint(loop_length) <= LBUFMAXDEPTH
391              report "The loop body size exceeds loop buffer capacity!"
392              severity failure;
393            loop_length_reg    <= loop_length;
394            start_looping_r(0) <= '1';
395          else
396            start_looping_r(0) <= '0';
397          end if;
398
399          -- Delay slots for lbufs are introduced to avoid need of pipeline
400          -- flushing in case the loop is skipped with iteration count of
401          -- zero.
402          start_looping_r(start_looping_r'left downto 1) <=
403            start_looping_r(start_looping_r'left-1 downto 0);
404        end if;
405      end if;
406    end process;
407
408    loop_start_out <= start_looping_r(start_looping_r'left);
409    loop_len_out   <= loop_length_reg;
410
411
412
413
414  end generate infloop_logic;
415--------------------------------------------------------------------------------
416
417
418
419
420
421
422
423
424
425
426  default_pc_generate: if not bypass_pc_register  generate
427    pc_wire <= pc_reg when (lock = '0') else pc_prev_reg;
428    -- increase program counter
429    increased_pc <= std_logic_vector(unsigned(pc_wire) + IMEMWIDTHINMAUS);
430
431    sel_next_pc : process (pc_load, pc_in, increased_pc, pc_opcode)
432    begin
433      if pc_load = '1' and (unsigned(pc_opcode) = IFE_CALL or unsigned(pc_opcode) = IFE_JUMP) then
434        next_pc <= pc_in;
435
436      else -- no branch
437        next_pc <= increased_pc;
438      end if;
439    end process sel_next_pc;
440  end generate default_pc_generate;
441
442  bypass_pc_register_generate: if bypass_pc_register generate
443    -- increase program counter
444    increased_pc <= std_logic_vector(unsigned(pc_wire) + IMEMWIDTHINMAUS);
445
446    sel_next_pc : process (pc_in, pc_reg, increased_pc        ,
447     pc_load, pc_opcode)
448    begin
449      if pc_load = '1' and (unsigned(pc_opcode) = IFE_CALL or unsigned(pc_opcode) = IFE_JUMP) then
450        pc_wire <= pc_in;
451        next_pc      <= increased_pc;
452
453      else -- no branch
454        pc_wire <= pc_reg;
455        next_pc      <= increased_pc;
456      end if;
457    end process sel_next_pc;
458
459  end generate bypass_pc_register_generate;
460
461  -----------------------------------------------------------------------------
462  debug_counters : if debug_logic_g generate
463  -----------------------------------------------------------------------------
464  -- Debugger processes and signal assignments
465  -----------------------------------------------------------------------------
466    db_counters : process(clk)
467    begin
468      if not sync_reset_g and rstx = '0' then -- async reset (active low)
469        lockcnt_r  <= (others => '0');
470        cyclecnt_r <= (others => '0');
471      elsif rising_edge(clk) then
472        if (sync_reset_g and rstx = '0') or db_rstx = '0' then
473          lockcnt_r  <= (others => '0');
474          cyclecnt_r <= (others => '0');
475        elsif db_lockreq = '0' then
476          if lock = '1' then
477            lockcnt_r  <= lockcnt_r  + 1;
478          else
479            cyclecnt_r <= cyclecnt_r + 1;
480          end if;
481        end if;
482      end if;
483    end process;
484
485    db_cyclecnt <= std_logic_vector(cyclecnt_r);
486    db_lockcnt  <= std_logic_vector(lockcnt_r);
487    db_pc       <= pc_reg;
488    db_pc_next  <= next_pc;
489  end generate debug_counters;
490
491
492
493end rtl_andor;
494
495