1-- Copyright (c) 2002-2009 Tampere University. 2-- 3-- This file is part of TTA-Based Codesign Environment (TCE). 4-- 5-- Permission is hereby granted, free of charge, to any person obtaining a 6-- copy of this software and associated documentation files (the "Software"), 7-- to deal in the Software without restriction, including without limitation 8-- the rights to use, copy, modify, merge, publish, distribute, sublicense, 9-- and/or sell copies of the Software, and to permit persons to whom the 10-- Software is furnished to do so, subject to the following conditions: 11-- 12-- The above copyright notice and this permission notice shall be included in 13-- all copies or substantial portions of the Software. 14-- 15-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20-- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21-- DEALINGS IN THE SOFTWARE. 22 23library IEEE; 24use IEEE.Std_Logic_1164.all; 25use IEEE.numeric_std.all; 26use work.ffaccel_globals.all; 27use work.ffaccel_gcu_opcodes.all; 28use work.ffaccel_imem_mau.all; 29 30use work.tce_util.all; 31 32entity ffaccel_ifetch is 33 34 generic ( 35 no_glock_loopback_g : std_logic := '0'; 36 bypass_fetchblock_register : boolean := false; 37 bypass_pc_register : boolean := false; 38 bypass_decoder_registers : boolean := false; 39 extra_fetch_cycles : integer := 0; 40 sync_reset_g : boolean := false; 41 debug_logic_g : boolean := false; 42 enable_loop_buffer_g : boolean := false; 43 enable_infloop_buffer_g : boolean := false; 44 enable_irf_g : boolean := false; 45 irf_size_g : integer := 0; 46 47 pc_init_g : std_logic_vector(IMEMADDRWIDTH-1 downto 0) := (others => '0')); 48 49 port ( 50 -- program counter in 51 pc_in : in std_logic_vector (IMEMADDRWIDTH-1 downto 0); 52 --return address out 53 ra_out : out std_logic_vector (IMEMADDRWIDTH-1 downto 0); 54 -- return address in 55 ra_in : in std_logic_vector(IMEMADDRWIDTH-1 downto 0); 56 -- ifetch control signals 57 pc_load : in std_logic; 58 ra_load : in std_logic; 59 pc_opcode : in std_logic_vector(0 downto 0); 60 --instruction memory interface 61 imem_data : in std_logic_vector(IMEMWIDTHINMAUS*IMEMMAUWIDTH-1 downto 0); 62 imem_addr : out std_logic_vector(IMEMADDRWIDTH-1 downto 0); 63 imem_en_x : out std_logic; 64 fetchblock : out std_logic_vector(IMEMWIDTHINMAUS*IMEMMAUWIDTH-1 downto 0); 65 busy : in std_logic; 66 67 -- global lock 68 glock : out std_logic; 69 70 -- external control interface 71 fetch_en : in std_logic; --fetch_enable 72 73 -- debugger signals 74 db_lockreq : in std_logic; 75 db_rstx : in std_logic; 76 db_pc : out std_logic_vector(IMEMADDRWIDTH-1 downto 0); 77 db_cyclecnt : out std_logic_vector(64-1 downto 0); 78 db_lockcnt : out std_logic_vector(64-1 downto 0); 79 80 81 82 clk : in std_logic; 83 rstx : in std_logic); 84end ffaccel_ifetch; 85 86architecture rtl_andor of ffaccel_ifetch is 87 88 -- signals for program counter. 89 signal pc_reg : std_logic_vector(IMEMADDRWIDTH-1 downto 0); 90 signal pc_wire : std_logic_vector(IMEMADDRWIDTH-1 downto 0); 91 signal pc_prev_reg : std_logic_vector(IMEMADDRWIDTH-1 downto 0); 92 signal next_pc : std_logic_vector(IMEMADDRWIDTH-1 downto 0); 93 94 signal increased_pc : std_logic_vector(IMEMADDRWIDTH-1 downto 0); 95 signal return_addr_reg : std_logic_vector(IMEMADDRWIDTH-1 downto 0); 96 97 -- internal signals for initializing and locking execution. 98 signal lock : std_logic; 99 signal mem_en_lock_r : std_logic; 100 101 -- Delay/latency from retrieving instruction block from instruction memory. 102 constant IFETCH_DELAY : integer := 1 + extra_fetch_cycles; 103 -- Delay/latency from pc register to dispatching instruction. 104 constant PC_TO_DISPATCH_DELAY : integer := 105 to_int(not bypass_fetchblock_register) + 106 IFETCH_DELAY; 107 -- Delay/latency from control flow operation to dispatching instruction. 108 constant NEXT_TO_DISPATCH_DELAY : integer := 109 PC_TO_DISPATCH_DELAY + to_int(not bypass_pc_register); 110 111 signal reset_cntr : integer range 0 to IFETCH_DELAY; 112 signal reset_lock : std_logic; 113 114 -- Loopbuffer signals, or placeholders if lb is not enabled 115 -- Placeholder signals for loop buffer ports/constants 116 constant LBUFMAXITER : integer := 1; 117 constant LBUFMAXDEPTH : integer := 1; 118 constant IFE_LBUFS : integer := 1; 119 constant IFE_INFLOOP : integer := 1; 120 121 signal o1data : std_logic_vector(LBUFMAXITER-1 downto 0); 122 signal o1load : std_logic; 123 signal loop_start_out : std_logic; 124 signal loop_len_out : std_logic_vector(bit_width(LBUFMAXDEPTH+1)-1 downto 0); 125 signal loop_iter_out : std_logic_vector(LBUFMAXITER-1 downto 0); 126 127 signal iteration_count : std_logic_vector(LBUFMAXITER-1 downto 0); 128 signal pc_after_loop : std_logic_vector(IMEMADDRWIDTH-1 downto 0); 129 signal lockcnt_r, cyclecnt_r : unsigned(64 - 1 downto 0); 130 signal db_pc_next : std_logic_vector(IMEMADDRWIDTH-1 downto 0); 131 constant db_pc_start : std_logic_vector(IMEMADDRWIDTH-1 downto 0) 132 := (others => '0'); 133 134 135 136 137begin 138 139 -- enable instruction memory. 140 imem_en_x <= '0' when (fetch_en = '1' and mem_en_lock_r = '0') else '1'; 141 -- do not fetch new instruction when processor is locked. 142 imem_addr <= pc_wire; 143 144 -- propagate lock to global lock 145 146 glock <= busy or reset_lock or (not (fetch_en or no_glock_loopback_g)); 147 ra_out <= return_addr_reg; 148 lock <= not fetch_en or busy or mem_en_lock_r; 149 150 151 pc_update_generate_0 : if not enable_irf_g generate 152 pc_update_proc : process (clk) 153 begin 154 if not sync_reset_g and rstx = '0' then 155 pc_reg <= pc_init_g; 156 pc_prev_reg <= (others => '0'); 157 elsif clk'event and clk = '1' then -- rising clock edge. 158 if (sync_reset_g and rstx = '0') or db_rstx = '0' then 159 pc_reg <= db_pc_start; 160 pc_prev_reg <= (others => '0'); 161 elsif lock = '0' then 162 pc_reg <= next_pc; 163 if bypass_pc_register and bypass_fetchblock_register 164 and bypass_decoder_registers and pc_load = '1' then 165 pc_prev_reg <= pc_in; 166 else 167 pc_prev_reg <= pc_reg; 168 end if; 169 end if; 170 end if; 171 end process pc_update_proc; 172 end generate pc_update_generate_0; 173 174 175 176 177 ----------------------------------------------------------------------------- 178 ra_block : block 179 signal ra_source : std_logic_vector(IMEMADDRWIDTH-1 downto 0); 180 begin -- block ra_block 181 182 -- Default choice generate 183 ra_source_select_generate_0 : if not enable_irf_g and not bypass_pc_register generate 184 ra_source <= increased_pc; 185 end generate ra_source_select_generate_0; 186 187 -- Choice enabled by generic 188 ra_source_select_generate_1 : if not enable_irf_g and bypass_pc_register generate 189 ra_source <= pc_reg; 190 end generate ra_source_select_generate_1; 191 192 -- When using IRF 193 ra_source_select_generate_2 : if enable_irf_g generate 194 ra_source <= pc_prev_reg; 195 end generate ra_source_select_generate_2; 196 197 ra_update_proc : process (clk) 198 begin -- process ra_update_proc 199 if not sync_reset_g and rstx = '0' then -- asynchronous reset (active low) 200 return_addr_reg <= (others => '0'); 201 elsif clk'event and clk = '1' then -- rising clock edge 202 if (sync_reset_g and rstx = '0') or db_rstx = '0' then 203 return_addr_reg <= (others => '0'); 204 elsif lock = '0' then 205 -- return address 206 if (ra_load = '1') then 207 return_addr_reg <= ra_in; 208 elsif (pc_load = '1' and unsigned(pc_opcode) = IFE_CALL) then 209 -- return address transformed to same form as all others addresses 210 -- provided as input 211 return_addr_reg <= ra_source; 212 end if; 213 214 end if; 215 end if; 216 end process ra_update_proc; 217 end block ra_block; 218 219 ----------------------------------------------------------------------------- 220 -- Keeps memory enable inactive during reset 221 imem_lock_proc : process (clk) 222 begin 223 if not sync_reset_g and rstx = '0' then 224 mem_en_lock_r <= '1'; 225 elsif clk'event and clk = '1' then -- rising clock edge 226 if (sync_reset_g and rstx = '0') or db_rstx = '0' then 227 mem_en_lock_r <= '1'; 228 else 229 mem_en_lock_r <= '0'; 230 end if; 231 end if; 232 end process imem_lock_proc; 233 234 ----------------------------------------------------------------------------- 235 -- Default fetch implementation 236 fetch_block_registered_generate : if 237 not bypass_fetchblock_register generate 238 fetch_block : block 239 signal instruction_reg : std_logic_vector(IMEMWIDTHINMAUS*IMEMMAUWIDTH* 240 (extra_fetch_cycles+1)-1 downto 0); 241 begin -- block fetch_block 242 243 fetch_block_proc : process (clk) 244 begin -- process fetch_block_proc 245 if not sync_reset_g and rstx = '0' then -- asynchronous reset (active low) 246 instruction_reg <= (others => '0'); 247 reset_cntr <= 0; 248 reset_lock <= '1'; 249 elsif clk'event and clk = '1' then -- rising clock edge 250 if (sync_reset_g and rstx = '0') or db_rstx = '0' then 251 instruction_reg <= (others => '0'); 252 reset_cntr <= 0; 253 reset_lock <= '1'; 254 elsif lock = '0' then 255 if reset_cntr < IFETCH_DELAY then 256 reset_cntr <= reset_cntr + 1; 257 else 258 reset_lock <= '0'; 259 end if; 260 if (extra_fetch_cycles > 0) then 261 instruction_reg(instruction_reg'length-fetchblock'length-1 downto 0) 262 <= instruction_reg(instruction_reg'length-1 downto fetchblock'length); 263 end if; 264 instruction_reg(instruction_reg'length-1 265 downto instruction_reg'length - fetchblock'length) 266 <= imem_data; 267 268 end if; 269 end if; 270 end process fetch_block_proc; 271 fetchblock <= instruction_reg(fetchblock'length-1 downto 0); 272 end block fetch_block; 273 end generate fetch_block_registered_generate; 274 275 -- Fetch implementation without fetch register. 276 fetch_block_bypassed_generate : if 277 not (not bypass_fetchblock_register) generate 278 fetch_block : block 279 begin -- block fetch_block 280 fetch_block_proc : process (clk) 281 begin -- process fetch_block_proc 282 if not sync_reset_g and rstx = '0' then -- asynchronous reset (active low) 283 reset_lock <= '1'; 284 elsif clk'event and clk = '1' then -- rising clock edge 285 if (sync_reset_g and rstx = '0') or db_rstx = '0' then 286 reset_lock <= '1'; 287 elsif lock = '0' then 288 reset_lock <= '0'; 289 end if; 290 end if; 291 end process fetch_block_proc; 292 fetchblock <= imem_data; 293 end block fetch_block; 294 295 end generate fetch_block_bypassed_generate; 296 297 ----------------------------------------------------------------------------- 298 299 loopbuf_logic : if enable_loop_buffer_g generate 300 -- Loop buffer signals -- 301 signal start_looping : std_logic; 302 signal start_looping_r : std_logic_vector(NEXT_TO_DISPATCH_DELAY-1 303 downto 0); 304 signal loop_length, loop_length_reg 305 : std_logic_vector(bit_width(LBUFMAXDEPTH+1)-1 downto 0); 306 signal loop_iter_reg : std_logic_vector(LBUFMAXITER-1 downto 0); 307 signal loop_iter_temp_reg : std_logic_vector(LBUFMAXITER-1 downto 0); 308 begin 309 assert not enable_irf_g 310 report "IRF is not supported with loop buffer!" 311 severity failure; 312 313 -- Loop buffer setup operation logic -- 314 start_looping <= '1' when (pc_load = '1' and 315 unsigned(pc_opcode) = IFE_LBUFS) else 316 '0'; 317 318 iteration_count <= o1data(LBUFMAXITER-1 downto 0) 319 when o1load = '1' else 320 loop_iter_temp_reg; 321 loop_length <= pc_in(bit_width(LBUFMAXDEPTH+1)-1 downto 0); 322 323 process (clk) 324 begin 325 if not sync_reset_g and rstx = '0' then 326 start_looping_r <= (others => '0'); 327 loop_length_reg <= (others => '0'); 328 loop_iter_reg <= (others => '0'); 329 loop_iter_temp_reg <= (others => '0'); 330 elsif clk'event and clk = '1' then -- rising clock edge 331 -- Loop buffer control -- 332 if (sync_reset_g and rstx = '0') or db_rstx = '0' then 333 start_looping_r <= (others => '0'); 334 loop_length_reg <= (others => '0'); 335 loop_iter_reg <= (others => '0'); 336 loop_iter_temp_reg <= (others => '0'); 337 elsif lock = '0' then 338 if (start_looping = '1' and 339 unsigned(iteration_count) /= 0) then 340 loop_length_reg <= loop_length; 341 loop_iter_reg <= iteration_count; 342 start_looping_r(0) <= '1'; 343 else 344 start_looping_r(0) <= '0'; 345 end if; 346 347 if o1load = '1' then 348 loop_iter_temp_reg <= o1data(LBUFMAXITER-1 downto 0); 349 end if; 350 -- Delay slots for lbufs are introduced to avoid need of pipeline 351 -- flushing in case the loop is skipped with iteration count of zero. 352 start_looping_r(start_looping_r'left downto 1) <= 353 start_looping_r(start_looping_r'left-1 downto 0); 354 end if; 355 end if; 356 end process; 357 358 loop_start_out <= start_looping_r(start_looping_r'left); 359 loop_iter_out <= loop_iter_reg; 360 loop_len_out <= loop_length_reg; 361 pc_after_loop <= std_logic_vector( 362 unsigned(increased_pc) + unsigned(loop_length)); 363 end generate; 364 365 infloop_logic : if enable_infloop_buffer_g generate 366 signal start_looping : std_logic; 367 signal start_looping_r 368 : std_logic_vector(NEXT_TO_DISPATCH_DELAY-1 downto 0); 369 signal loop_length, loop_length_reg 370 : std_logic_vector(bit_width(LBUFMAXDEPTH+1)-1 downto 0); 371 begin 372 -- infinity loop operation control logic -- 373 start_looping <= '1' when (pc_load = '1' and 374 unsigned(pc_opcode) = IFE_INFLOOP) else 375 '0'; 376 loop_length <= pc_in(bit_width(LBUFMAXDEPTH+1)-1 downto 0); 377 378 process (clk) 379 begin 380 if not sync_reset_g and rstx = '0' then 381 start_looping_r <= (others => '0'); 382 loop_length_reg <= (others => '0'); 383 elsif clk'event and clk = '1' then -- rising clock edge 384 -- Loop buffer control -- 385 if sync_reset_g and rstx = '0' then 386 start_looping_r <= (others => '0'); 387 loop_length_reg <= (others => '0'); 388 elsif lock = '0' then 389 if (start_looping = '1' and to_uint(loop_length) /= 0) then 390 assert to_uint(loop_length) <= LBUFMAXDEPTH 391 report "The loop body size exceeds loop buffer capacity!" 392 severity failure; 393 loop_length_reg <= loop_length; 394 start_looping_r(0) <= '1'; 395 else 396 start_looping_r(0) <= '0'; 397 end if; 398 399 -- Delay slots for lbufs are introduced to avoid need of pipeline 400 -- flushing in case the loop is skipped with iteration count of 401 -- zero. 402 start_looping_r(start_looping_r'left downto 1) <= 403 start_looping_r(start_looping_r'left-1 downto 0); 404 end if; 405 end if; 406 end process; 407 408 loop_start_out <= start_looping_r(start_looping_r'left); 409 loop_len_out <= loop_length_reg; 410 411 412 413 414 end generate infloop_logic; 415-------------------------------------------------------------------------------- 416 417 418 419 420 421 422 423 424 425 426 default_pc_generate: if not bypass_pc_register generate 427 pc_wire <= pc_reg when (lock = '0') else pc_prev_reg; 428 -- increase program counter 429 increased_pc <= std_logic_vector(unsigned(pc_wire) + IMEMWIDTHINMAUS); 430 431 sel_next_pc : process (pc_load, pc_in, increased_pc, pc_opcode) 432 begin 433 if pc_load = '1' and (unsigned(pc_opcode) = IFE_CALL or unsigned(pc_opcode) = IFE_JUMP) then 434 next_pc <= pc_in; 435 436 else -- no branch 437 next_pc <= increased_pc; 438 end if; 439 end process sel_next_pc; 440 end generate default_pc_generate; 441 442 bypass_pc_register_generate: if bypass_pc_register generate 443 -- increase program counter 444 increased_pc <= std_logic_vector(unsigned(pc_wire) + IMEMWIDTHINMAUS); 445 446 sel_next_pc : process (pc_in, pc_reg, increased_pc , 447 pc_load, pc_opcode) 448 begin 449 if pc_load = '1' and (unsigned(pc_opcode) = IFE_CALL or unsigned(pc_opcode) = IFE_JUMP) then 450 pc_wire <= pc_in; 451 next_pc <= increased_pc; 452 453 else -- no branch 454 pc_wire <= pc_reg; 455 next_pc <= increased_pc; 456 end if; 457 end process sel_next_pc; 458 459 end generate bypass_pc_register_generate; 460 461 ----------------------------------------------------------------------------- 462 debug_counters : if debug_logic_g generate 463 ----------------------------------------------------------------------------- 464 -- Debugger processes and signal assignments 465 ----------------------------------------------------------------------------- 466 db_counters : process(clk) 467 begin 468 if not sync_reset_g and rstx = '0' then -- async reset (active low) 469 lockcnt_r <= (others => '0'); 470 cyclecnt_r <= (others => '0'); 471 elsif rising_edge(clk) then 472 if (sync_reset_g and rstx = '0') or db_rstx = '0' then 473 lockcnt_r <= (others => '0'); 474 cyclecnt_r <= (others => '0'); 475 elsif db_lockreq = '0' then 476 if lock = '1' then 477 lockcnt_r <= lockcnt_r + 1; 478 else 479 cyclecnt_r <= cyclecnt_r + 1; 480 end if; 481 end if; 482 end if; 483 end process; 484 485 db_cyclecnt <= std_logic_vector(cyclecnt_r); 486 db_lockcnt <= std_logic_vector(lockcnt_r); 487 db_pc <= pc_reg; 488 db_pc_next <= next_pc; 489 end generate debug_counters; 490 491 492 493end rtl_andor; 494 495