1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements.  See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership.  The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License.  You may obtain a copy of the License at
9 *
10 *   http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied.  See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20package vta.core
21
22import chisel3._
23import vta.util.config._
24import vta.shell._
25
26/** Core parameters */
27case class CoreParams(
28    batch: Int = 1,
29    blockOut: Int = 16,
30    blockIn: Int = 16,
31    inpBits: Int = 8,
32    wgtBits: Int = 8,
33    uopBits: Int = 32,
34    accBits: Int = 32,
35    outBits: Int = 8,
36    uopMemDepth: Int = 512,
37    inpMemDepth: Int = 512,
38    wgtMemDepth: Int = 512,
39    accMemDepth: Int = 512,
40    outMemDepth: Int = 512,
41    instQueueEntries: Int = 32
42) {
43  require(uopBits % 8 == 0,
44          s"\n\n[VTA] [CoreParams] uopBits must be byte aligned\n\n")
45}
46
47case object CoreKey extends Field[CoreParams]
48
49/** Core.
50  *
51  * The core defines the current VTA architecture by connecting memory and
52  * compute modules together such as load/store and compute. Most of the
53  * connections in the core are bulk (<>), and we should try to keep it this
54  * way, because it is easier to understand what is going on.
55  *
56  * Also, the core must be instantiated by a shell using the
57  * VTA Control Register (VCR) and the VTA Memory Engine (VME) interfaces.
58  * More info about these interfaces and modules can be found in the shell
59  * directory.
60  */
61class Core(implicit p: Parameters) extends Module {
62  val io = IO(new Bundle {
63    val vcr = new VCRClient
64    val vme = new VMEMaster
65  })
66  val fetch = Module(new Fetch)
67  val load = Module(new Load)
68  val compute = Module(new Compute)
69  val store = Module(new Store)
70  val ecounters = Module(new EventCounters)
71
72  // Read(rd) and write(wr) from/to memory (i.e. DRAM)
73  io.vme.rd(0) <> fetch.io.vme_rd
74  io.vme.rd(1) <> compute.io.vme_rd(0)
75  io.vme.rd(2) <> load.io.vme_rd(0)
76  io.vme.rd(3) <> load.io.vme_rd(1)
77  io.vme.rd(4) <> compute.io.vme_rd(1)
78  io.vme.wr(0) <> store.io.vme_wr
79
80  // Fetch instructions (tasks) from memory (DRAM) into queues (SRAMs)
81  fetch.io.launch := io.vcr.launch
82  fetch.io.ins_baddr := io.vcr.ptrs(0)
83  fetch.io.ins_count := io.vcr.vals(0)
84
85  // Load inputs and weights from memory (DRAM) into scratchpads (SRAMs)
86  load.io.i_post := compute.io.o_post(0)
87  load.io.inst <> fetch.io.inst.ld
88  load.io.inp_baddr := io.vcr.ptrs(2)
89  load.io.wgt_baddr := io.vcr.ptrs(3)
90
91  // The compute module performs the following:
92  // - Load micro-ops (uops) and accumulations (acc)
93  // - Compute dense and ALU instructions (tasks)
94  compute.io.i_post(0) := load.io.o_post
95  compute.io.i_post(1) := store.io.o_post
96  compute.io.inst <> fetch.io.inst.co
97  compute.io.uop_baddr := io.vcr.ptrs(1)
98  compute.io.acc_baddr := io.vcr.ptrs(4)
99  compute.io.inp <> load.io.inp
100  compute.io.wgt <> load.io.wgt
101
102  // The store module performs the following:
103  // - Writes results from compute into scratchpads (SRAMs)
104  // - Store results from scratchpads (SRAMs) to memory (DRAM)
105  store.io.i_post := compute.io.o_post(1)
106  store.io.inst <> fetch.io.inst.st
107  store.io.out_baddr := io.vcr.ptrs(5)
108  store.io.out <> compute.io.out
109
110  // Event counters
111  ecounters.io.launch := io.vcr.launch
112  ecounters.io.finish := compute.io.finish
113  io.vcr.ecnt <> ecounters.io.ecnt
114
115  // Finish instruction is executed and asserts the VCR finish flag
116  val finish = RegNext(compute.io.finish)
117  io.vcr.finish := finish
118}
119