1 /* tce_common.h - common code for the different TCE/TTA device drivers.
2 
3    Copyright (c) 2012 Pekka Jääskeläinen / Tampere University of Technology
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 #ifndef POCL_TCE_COMMON_H
24 #define POCL_TCE_COMMON_H
25 
26 
27 #include "bufalloc.h"
28 
29 #ifdef __cplusplus
30 
31 #include <string>
32 
33 #include "TCEString.hh"
34 #include "pocl_device.h"
35 
36 namespace TTAMachine {
37   class AddressSpace;
38   class Machine;
39 }
40 
41 namespace TTAProgram {
42   class Program;
43 }
44 
45 class TCEDevice {
46  public:
47   TCEDevice(cl_device_id dev, const char* adfName);
48   virtual ~TCEDevice();
49 
50   void initMemoryManagement(const TTAMachine::Machine& mach);
51 
52   /* Block read/write (no byteswaps). */
53   virtual void copyHostToDevice
54     (const void *host_ptr, uint32_t dest_addr, size_t count) = 0;
55 
56   virtual void copyDeviceToHost
57     (uint32_t src_addr, const void *host_ptr, size_t count) = 0;
58 
59   virtual void copyDeviceToDevice
60     (uint32_t src_addr, uint32_t dst_addr, size_t count) = 0;
61 
62   virtual void loadProgramToDevice(const std::string& asmFileName) = 0;
63   /* Restarts the device to start the program from the beginning. */
64   virtual void restartProgram() = 0;
65 
66   /* Write/read word with a host->device / device->host byteswap. */
67   virtual void writeWordToDevice(uint32_t dest_addr, uint32_t word);
68   virtual uint32_t readWordFromDevice(uint32_t addr);
69 
70   /* Read the device time stamp for the profiling queue. */
71   virtual uint64_t timeStamp() = 0;
72 
73   /* Finds the global data memory addresses needed for device->host
74      communication from the loaded program. */
75   virtual void findDataMemoryAddresses();
76 
77   /* Initializes the global communication structures in the device's
78      global memory. Should be called once after loading the program to
79      the device.*/
80   virtual void initDataMemory();
81 
82   virtual void setMachine(const TTAMachine::Machine& machine);
83 
notifyKernelRunCommandSent(__kernel_exec_cmd &,_cl_command_run *)84   virtual void notifyKernelRunCommandSent
85       (__kernel_exec_cmd& /*dev_cmd*/, _cl_command_run* /*run_cmd*/) {};
86 
87   virtual bool isNewKernel(const _cl_command_run* runCmd);
88 
89   void updateCurrentKernel
90     (const _cl_command_run* runCmd, uint32_t kernelAddr);
91 
92   /* Generates the command line string to execute tcecc to produce the
93      kernel binary. */
94   TCEString tceccCommandLine(_cl_command_run *run_cmd, const TCEString &tempDir,
95                              const TCEString &inputSrc,
96                              const TCEString &outputTpef,
97                              const TCEString extraParams = TCEString(""));
98 
99   bool isMultiCoreMachine() const;
100 
101   /* The bufalloc memory regions for device memory allocation book
102      keeping. */
103   struct memory_region local_mem;
104   struct memory_region global_mem;
105 
106   TTAMachine::AddressSpace *local_as;
107   TTAMachine::AddressSpace *global_as;
108   TTAMachine::AddressSpace *private_as;
109   std::string machine_file;
110 
111   cl_device_id parent;
112 
113   bool needsByteSwap;
114 
115   const TTAProgram::Program* currentProgram;
116   const TTAMachine::Machine* machine_;
117 
118   uint32_t commandQueueAddr;
119 
120   uint32_t curKernelAddr;
121   cl_kernel curKernel;
122 
123   size_t curLocalX;
124   size_t curLocalY;
125   size_t curLocalZ;
126 
127   uint64_t globalCycleCount;
128 
129   pthread_mutex_t cq_lock;
130   pocl_lock_t tce_compile_lock;
131   _cl_command_node *ready_list;
132   _cl_command_node *command_list;
133 };
134 
135 #endif
136 
137 /* The address space ids in the ADFs. */
138 #define TTA_ASID_PRIVATE  0
139 #define TTA_ASID_GLOBAL   1
140 #define TTA_ASID_LOCAL    3
141 #define TTA_ASID_CONSTANT 2
142 
143 #define TTA_UNALLOCATED_LOCAL_SPACE (1*1024)
144 /* The space to preserve for the command queue etc. in the
145    device global memory. The structures start from 0, the
146    buffer storage starts after them. TODO: check from the
147    symbol table of the produced program. */
148 /* Note that this is also the offset into global memory where
149  * a struct kernel_exe_cmd is located, so memory allocations
150  * should start after this + sizeof(kernel_exe_cmd) */
151 #define TTA_UNALLOCATED_GLOBAL_SPACE 2048
152 
153 #ifdef __cplusplus
154 extern "C" {
155 #endif
156 
157 #include "prototypes.inc"
158 
159 GEN_PROTOTYPES (tce)
160 
161 #ifdef __cplusplus
162 }
163 #endif
164 
165 #endif
166