1 /*  RetroArch - A frontend for libretro.
2  *  Copyright (C) 2014-2017 - Ali Bouhlel
3  *
4  *  RetroArch is free software: you can redistribute it and/or modify it under the terms
5  *  of the GNU General Public License as published by the Free Software Found-
6  *  ation, either version 3 of the License, or (at your option) any later version.
7  *
8  *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9  *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10  *  PURPOSE.  See the GNU General Public License for more details.
11  *
12  *  You should have received a copy of the GNU General Public License along with RetroArch.
13  *  If not, see <http://www.gnu.org/licenses/>.
14  */
15 
16 /* this file contains mostly modified functions from the ctrulib sdk */
17 
18 #ifndef CTR_GU_H
19 #define CTR_GU_H
20 
21 #include <3ds.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <retro_inline.h>
25 
26 #include "ctr/ctr_debug.h"
27 
28 #define VIRT_TO_PHYS(vaddr) \
29    (((u32)(vaddr)) >= 0x14000000 && ((u32)(vaddr)) < 0x1c000000)?(void*)((u32)(vaddr) + 0x0c000000):\
30    (((u32)(vaddr)) >= 0x1F000000 && ((u32)(vaddr)) < 0x1F600000)?(void*)((u32)(vaddr) - 0x07000000):\
31    (((u32)(vaddr)) >= 0x1FF00000 && ((u32)(vaddr)) < 0x1FF80000)?(void*)(vaddr):\
32    (((u32)(vaddr)) >= 0x30000000 && ((u32)(vaddr)) < 0x40000000)?(void*)((u32)(vaddr) - 0x10000000):(void*)0
33 
34 #define CTRGU_SIZE(W,H)            (((u32)(W)&0xFFFF)|((u32)(H)<<16))
35 
36 /* DMA flags */
37 #define CTRGU_DMA_VFLIP            (1 << 0)
38 #define CTRGU_DMA_L_TO_T           (1 << 1)
39 #define CTRGU_DMA_T_TO_L           (0 << 1)
40 #define CTRGU_DMA_TRUNCATE         (1 << 2)
41 #define CTRGU_DMA_CONVERT_NONE     (1 << 3)
42 
43 #define CTRGU_RGBA8     (0)
44 #define CTRGU_RGB8      (1)
45 #define CTRGU_RGB565    (2)
46 #define CTRGU_RGBA5551  (3)
47 #define CTRGU_RGBA4444  (4)
48 
49 #define CTRGU_MULTISAMPLE_NONE      (0 << 24)
50 #define CTRGU_MULTISAMPLE_2x1       (1 << 24)
51 #define CTRGU_MULTISAMPLE_2x2       (2 << 24)
52 
53 #define CTR_CPU_TICKS_PER_SECOND    268123480
54 #define CTR_CPU_TICKS_PER_FRAME     4481134
55 
56 extern u32* gpuCmdBuf;
57 extern u32 gpuCmdBufOffset;
58 extern u32 __linear_heap_size;
59 extern u32 __linear_heap;
60 
61 #ifdef USE_CTRULIB_2
62 __attribute__((always_inline))
ctrGspSubmitGxCommand(u32 gxCommand[0x8])63 static INLINE Result ctrGspSubmitGxCommand(u32 gxCommand[0x8])
64 {
65    return gspSubmitGxCommand(gxCommand);
66 }
67 #else
68 __attribute__((always_inline))
ctrGspSubmitGxCommand(u32 gxCommand[0x8])69 static INLINE Result ctrGspSubmitGxCommand(u32 gxCommand[0x8])
70 {
71    return gspSubmitGxCommand(gxCmdBuf, gxCommand);
72 }
73 #endif
74 
75 __attribute__((always_inline))
ctr_set_parallax_layer(bool state)76 static INLINE Result ctr_set_parallax_layer(bool state)
77 {
78    u32 reg_state = state? 0x00010001: 0x0;
79    return GSPGPU_WriteHWRegs(0x202000, &reg_state, 4);
80 }
81 
82 __attribute__((always_inline))
ctrGuSetTexture(GPU_TEXUNIT unit,u32 * data,u16 width,u16 height,u32 param,GPU_TEXCOLOR colorType)83 static INLINE void ctrGuSetTexture(GPU_TEXUNIT unit, u32* data,
84       u16 width, u16 height, u32 param, GPU_TEXCOLOR colorType)
85 {
86    switch (unit)
87    {
88    case GPU_TEXUNIT0:
89       GPUCMD_AddWrite(GPUREG_TEXUNIT0_TYPE, colorType);
90       GPUCMD_AddWrite(GPUREG_TEXUNIT0_ADDR1, ((u32)data)>>3);
91       GPUCMD_AddWrite(GPUREG_TEXUNIT0_DIM, (height)|(width<<16));
92       GPUCMD_AddWrite(GPUREG_TEXUNIT0_PARAM, param);
93       break;
94 
95    case GPU_TEXUNIT1:
96       GPUCMD_AddWrite(GPUREG_TEXUNIT1_TYPE, colorType);
97       GPUCMD_AddWrite(GPUREG_TEXUNIT1_ADDR, ((u32)data)>>3);
98       GPUCMD_AddWrite(GPUREG_TEXUNIT1_DIM, (height)|(width<<16));
99       GPUCMD_AddWrite(GPUREG_TEXUNIT1_PARAM, param);
100       break;
101 
102    case GPU_TEXUNIT2:
103       GPUCMD_AddWrite(GPUREG_TEXUNIT2_TYPE, colorType);
104       GPUCMD_AddWrite(GPUREG_TEXUNIT2_ADDR, ((u32)data)>>3);
105       GPUCMD_AddWrite(GPUREG_TEXUNIT2_DIM, (height)|(width<<16));
106       GPUCMD_AddWrite(GPUREG_TEXUNIT2_PARAM, param);
107       break;
108    }
109 }
110 
111 __attribute__((always_inline))
ctrGuSetCommandList_First(bool queued,u32 * buf0a,u32 buf0s,u32 * buf1a,u32 buf1s,u32 * buf2a,u32 buf2s)112 static INLINE Result ctrGuSetCommandList_First(bool queued, u32* buf0a, u32 buf0s, u32* buf1a, u32 buf1s, u32* buf2a, u32 buf2s)
113 {
114    u32 gxCommand[0x8];
115    gxCommand[0]=0x05 | (queued? 0x01000000 : 0x0); //CommandID
116    gxCommand[1]=(u32)buf0a; //buf0 address
117    gxCommand[2]=(u32)buf0s; //buf0 size
118    gxCommand[3]=(u32)buf1a; //buf1 address
119    gxCommand[4]=(u32)buf1s; //buf1 size
120    gxCommand[5]=(u32)buf2a; //buf2 address
121    gxCommand[6]=(u32)buf2s; //buf2 size
122    gxCommand[7]=0x0;
123 
124    return ctrGspSubmitGxCommand(gxCommand);
125 }
126 
127 __attribute__((always_inline))
ctrGuSetCommandList_Last(bool queued,u32 * buf0a,u32 buf0s,u8 flags)128 static INLINE Result ctrGuSetCommandList_Last(bool queued, u32* buf0a, u32 buf0s, u8 flags)
129 {
130    u32 gxCommand[0x8];
131    gxCommand[0]=0x01 | (queued? 0x01000000 : 0x0); //CommandID
132    gxCommand[1]=(u32)buf0a; //buf0 address
133    gxCommand[2]=(u32)buf0s; //buf0 size
134    gxCommand[3]=flags&1; //written to GSP module state
135    gxCommand[4]=gxCommand[5]=gxCommand[6]=0x0;
136    gxCommand[7]=(flags>>1)&1; //when non-zero, call svcFlushProcessDataCache() with the specified buffer
137 
138    return ctrGspSubmitGxCommand(gxCommand);
139 }
140 
141 __attribute__((always_inline))
ctrGuFlushAndRun(bool queued)142 static INLINE void ctrGuFlushAndRun(bool queued)
143 {
144    //take advantage of GX_SetCommandList_First to flush gsp heap
145    ctrGuSetCommandList_First(queued, gpuCmdBuf, gpuCmdBufOffset*4, (u32*)__linear_heap, __linear_heap_size, NULL, 0);
146    ctrGuSetCommandList_Last(queued, gpuCmdBuf, gpuCmdBufOffset*4, 0x0);
147 }
148 
149 __attribute__((always_inline))
ctrGuSetMemoryFill(bool queued,u32 * buf0a,u32 buf0v,u32 * buf0e,u16 width0,u32 * buf1a,u32 buf1v,u32 * buf1e,u16 width1)150 static INLINE Result ctrGuSetMemoryFill(bool queued, u32* buf0a, u32 buf0v, u32* buf0e, u16 width0, u32* buf1a, u32 buf1v, u32* buf1e, u16 width1)
151 {
152    u32 gxCommand[0x8];
153    gxCommand[0]=0x02 | (queued? 0x01000000 : 0x0); //CommandID
154    gxCommand[1]=(u32)buf0a; //buf0 address
155    gxCommand[2]=buf0v; //buf0 value
156    gxCommand[3]=(u32)buf0e; //buf0 end addr
157    gxCommand[4]=(u32)buf1a; //buf1 address
158    gxCommand[5]=buf1v; //buf1 value
159    gxCommand[6]=(u32)buf1e; //buf1 end addr
160    gxCommand[7]=(width0)|(width1<<16);
161 
162    return ctrGspSubmitGxCommand(gxCommand);
163 }
164 
165 __attribute__((always_inline))
ctrGuCopyImage(bool queued,const void * src,int src_w,int src_h,int src_fmt,bool src_is_tiled,void * dst,int dst_w,int dst_fmt,bool dst_is_tiled)166 static INLINE Result ctrGuCopyImage
167       (bool queued,
168        const void* src, int src_w, int src_h, int src_fmt, bool src_is_tiled,
169              void* dst, int dst_w,            int dst_fmt, bool dst_is_tiled)
170 {
171    u32 gxCommand[0x8];
172    gxCommand[0]=0x03 | (queued? 0x01000000 : 0x0); //CommandID
173    gxCommand[1]=(u32)src;
174    gxCommand[2]=(u32)dst;
175    gxCommand[3]=dst_w&0xFF8;
176    gxCommand[4]=CTRGU_SIZE(src_w, src_h);
177    gxCommand[5]=(src_fmt << 8)|(dst_fmt << 12)
178                 | ((src_is_tiled == dst_is_tiled)? CTRGU_DMA_CONVERT_NONE
179                      : src_is_tiled? CTRGU_DMA_T_TO_L
180                      : CTRGU_DMA_L_TO_T)
181                 | ((dst_w > src_w) ? CTRGU_DMA_TRUNCATE : 0);
182    gxCommand[6]=gxCommand[7]=0x0;
183 
184    return ctrGspSubmitGxCommand(gxCommand);
185 
186 }
187 
188 __attribute__((always_inline))
ctrGuDisplayTransfer(bool queued,void * src,int src_w,int src_h,int src_fmt,void * dst,int dst_w,int dst_fmt,int multisample_lvl)189 static INLINE Result ctrGuDisplayTransfer
190      (bool queued,
191       void* src, int src_w, int src_h, int src_fmt,
192       void* dst, int dst_w,            int dst_fmt, int multisample_lvl)
193 {
194    u32 gxCommand[0x8];
195    gxCommand[0]=0x03 | (queued? 0x01000000 : 0x0); //CommandID
196    gxCommand[1]=(u32)src;
197    gxCommand[2]=(u32)dst;
198    gxCommand[3]=CTRGU_SIZE(dst_w, 0);
199    gxCommand[4]=CTRGU_SIZE(src_w, src_h);
200    gxCommand[5]=(src_fmt << 8) | (dst_fmt << 12) | multisample_lvl;
201    gxCommand[6]=gxCommand[7]=0x0;
202 
203    return ctrGspSubmitGxCommand(gxCommand);
204 
205 }
206 
207 __attribute__((always_inline))
ctrGuSetVertexShaderFloatUniform(int id,float * data,int count)208 static INLINE void ctrGuSetVertexShaderFloatUniform(int id, float* data, int count)
209 {
210    GPUCMD_AddWrite(GPUREG_VSH_FLOATUNIFORM_CONFIG, 0x80000000|(u32)id);
211    GPUCMD_AddWrites(GPUREG_VSH_FLOATUNIFORM_DATA, (u32*)data, (u32)count * 4);
212 }
213 
214 #define CTRGU_ATTRIBFMT(f, n) ((((n)-1)<<2)|((f)&3))
215 
216 __attribute__((always_inline))
ctrGuSetAttributeBuffers(u32 total_attributes,void * base_address,u64 attribute_formats,u32 buffer_size)217 static INLINE void ctrGuSetAttributeBuffers(u32 total_attributes,
218       void* base_address, u64 attribute_formats, u32 buffer_size)
219 {
220    u32 param[0x28];
221 
222    memset(param, 0x00, sizeof(param));
223 
224    param[0x0]=((u32)base_address)>>3;
225    param[0x1]=attribute_formats & 0xFFFFFFFF;
226    param[0x2]=((total_attributes-1)<<28)|0xFFF0000|((attribute_formats>>32)&0xFFFF);
227    param[0x4]=0x76543210;
228    param[0x5]=(total_attributes<<28)|((buffer_size&0xFFF)<<16)|0xBA98;
229 
230    GPUCMD_AddIncrementalWrites(GPUREG_ATTRIBBUFFERS_LOC, param, 0x00000027);
231    GPUCMD_AddMaskedWrite(GPUREG_VSH_INPUTBUFFER_CONFIG, 0xB, 0xA0000000|(total_attributes-1));
232    GPUCMD_AddWrite(GPUREG_VSH_NUM_ATTR, (total_attributes-1));
233    GPUCMD_AddIncrementalWrites(GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW, ((u32[]){0x76543210, 0xBA98}), 2);
234 }
235 
236 __attribute__((always_inline))
ctrGuSetAttributeBuffersAddress(u32 * baseAddress)237 static INLINE void ctrGuSetAttributeBuffersAddress(u32* baseAddress)
238 {
239    GPUCMD_AddWrite(GPUREG_ATTRIBBUFFERS_LOC, ((u32)baseAddress)>>3);
240 }
241 
242 __attribute__((always_inline))
ctrGuSetVshGsh(shaderProgram_s * sp,DVLB_s * dvlb,u32 vsh_output_count,u32 gsh_input_count)243 static INLINE void ctrGuSetVshGsh(shaderProgram_s* sp, DVLB_s* dvlb, u32 vsh_output_count, u32 gsh_input_count)
244 {
245    dvlb->DVLE[0].outmapData[0] = vsh_output_count;
246    dvlb->DVLE[0].outmapMask = (1 << vsh_output_count) - 1;
247    shaderProgramInit(sp);
248    shaderProgramSetVsh(sp, &dvlb->DVLE[0]);
249    shaderProgramSetGsh(sp, &dvlb->DVLE[1], gsh_input_count);
250 }
251 
252 __attribute__((always_inline))
ctrgu_swizzle_coords(int x,int y,int width)253 static INLINE int ctrgu_swizzle_coords(int x, int y, int width)
254 {
255    int pos = (x & 0x1) << 0 | ((x & 0x2) << 1) | ((x & 0x4) << 2) |
256              (y & 0x1) << 1 | ((y & 0x2) << 2) | ((y & 0x4) << 3);
257 
258    return ((x >> 3) << 6) + ((y >> 3) * ((width >> 3) << 6)) + pos;
259 
260 }
261 
262 #endif // CTR_GU_H
263