1 #ifndef H_GAPI_GXM
2 #define H_GAPI_GXM
3 
4 #include "core.h"
5 
6 #include <psp2/display.h>
7 #include <psp2/gxm.h>
8 #include <psp2/gxt.h>
9 
10 #define PROFILE_MARKER(title)
11 #define PROFILE_LABEL(id, name, label)
12 #define PROFILE_TIMING(time)
13 
14 #define DISPLAY_WIDTH           960
15 #define DISPLAY_HEIGHT          544
16 #define DISPLAY_STRIDE          1024
17 #define DISPLAY_BUFFER_COUNT    2
18 #define DISPLAY_COLOR_FORMAT    SCE_GXM_COLOR_FORMAT_A8B8G8R8
19 #define DISPLAY_PIXEL_FORMAT    SCE_DISPLAY_PIXELFORMAT_A8B8G8R8
20 
21 namespace GAPI {
22     #define SHADER_BUFF_SIZE    (64 * 1024)
23     #define SHADER_VERT_SIZE    (64 * 1024)
24     #define SHADER_FRAG_SIZE    (64 * 1024)
25 
26     SceGxmShaderPatcher *shaderPatcher;
27     SceUID shaderBuffUID, shaderVertUID, shaderFragUID;
28     void *shaderBuffPtr, *shaderVertPtr, *shaderFragPtr;
29 
30     using namespace Core;
31 
32     #define PASS_CLEAR Core::Pass(0xFF)
33 
34     typedef ::Vertex Vertex;
35 
36     struct DisplayData {
37         void *addr;
38     };
39 
displayCallback(const void * callbackData)40     void displayCallback(const void *callbackData) {
41         SceDisplayFrameBuf display_fb;
42         const DisplayData *cb_data = (DisplayData*)callbackData;
43 
44         memset(&display_fb, 0, sizeof(display_fb));
45         display_fb.size        = sizeof(display_fb);
46         display_fb.base        = cb_data->addr;
47         display_fb.pitch       = DISPLAY_STRIDE;
48         display_fb.pixelformat = DISPLAY_PIXEL_FORMAT;
49         display_fb.width       = DISPLAY_WIDTH;
50         display_fb.height      = DISPLAY_HEIGHT;
51 
52         sceDisplaySetFrameBuf(&display_fb, SCE_DISPLAY_SETBUF_NEXTFRAME);
53 
54         if (Core::settings.detail.vsync) {
55             sceDisplayWaitVblankStart();
56         }
57     }
58 
59     namespace Context {
60         SceGxmContext *gxmContext;
61 
62         SceUID vdmRingBufferUID;
63         SceUID vertexRingBufferUID;
64         SceUID fragmentRingBufferUID;
65         SceUID fragmentUsseRingBufferUID;
66 
67         void *vdmRingBuffer;
68         void *vertexRingBuffer;
69         void *fragmentRingBuffer;
70         void *fragmentUsseRingBuffer;
71 
72         struct PendingResource {
73             int    frameIndex;
74             SceUID uid;
75         };
76 
77         Array<PendingResource> pendings;
78 
allocCPU(void * user_data,unsigned int size)79         void* allocCPU(void *user_data, unsigned int size) {
80             return malloc(size);
81         }
82 
freeCPU(void * user_data,void * mem)83         void freeCPU(void *user_data, void *mem) {
84             return free(mem);
85         }
86 
allocGPU(SceKernelMemBlockType type,unsigned int size,SceGxmMemoryAttribFlags attribs,SceUID * uid)87         void* allocGPU(SceKernelMemBlockType type, unsigned int size, SceGxmMemoryAttribFlags attribs, SceUID *uid) {
88             void *mem;
89 
90             if (type == SCE_KERNEL_MEMBLOCK_TYPE_USER_CDRAM_RW) {
91                 size = ALIGNADDR(size, 256 * 1024);
92             } else {
93                 size = ALIGNADDR(size, 4 * 1024);
94             }
95 
96             *uid = sceKernelAllocMemBlock("gpu_mem", type, size, NULL);
97 
98             if (sceKernelGetMemBlockBase(*uid, &mem) < 0)
99                 return NULL;
100             if (sceGxmMapMemory(mem, size, attribs) < 0)
101                 return NULL;
102 
103             return mem;
104         }
105 
106         void freeGPU(SceUID uid, bool pending = false) {
107             if (pending) {
108                 PendingResource res;
109                 res.frameIndex = Core::stats.frameIndex;
110                 res.uid        = uid;
111                 pendings.push(res);
112                 return;
113             }
114 
115             void *mem = NULL;
116             if (sceKernelGetMemBlockBase(uid, &mem) < 0)
117                 return;
118 
119             SceKernelMemBlockInfo Info;
120             Info.size = sizeof(Info);
121             sceKernelGetMemBlockInfoByAddr(mem, &Info);
122 
123             sceGxmUnmapMemory(mem);
124             sceKernelFreeMemBlock(uid);
125         }
126 
allocVertexUSSE(unsigned int size,SceUID * uid,unsigned int * usse_offset)127         void* allocVertexUSSE(unsigned int size, SceUID *uid, unsigned int *usse_offset) {
128             void *mem = NULL;
129 
130             size = ALIGNADDR(size, 4096);
131 
132             *uid = sceKernelAllocMemBlock("vertex_usse", SCE_KERNEL_MEMBLOCK_TYPE_USER_RW_UNCACHE, size, NULL);
133 
134             if (sceKernelGetMemBlockBase(*uid, &mem) < 0)
135                 return NULL;
136             if (sceGxmMapVertexUsseMemory(mem, size, usse_offset) < 0)
137                 return NULL;
138 
139             return mem;
140         }
141 
freeVertexUSSE(SceUID uid)142         void freeVertexUSSE(SceUID uid) {
143             void *mem = NULL;
144             if (sceKernelGetMemBlockBase(uid, &mem) < 0)
145                 return;
146 
147             SceKernelMemBlockInfo Info;
148             Info.size = sizeof(Info);
149             sceKernelGetMemBlockInfoByAddr(mem, &Info);
150 
151             sceGxmUnmapVertexUsseMemory(mem);
152             sceKernelFreeMemBlock(uid);
153         }
154 
allocFragmentUSSE(unsigned int size,SceUID * uid,unsigned int * usse_offset)155         void* allocFragmentUSSE(unsigned int size, SceUID *uid, unsigned int *usse_offset) {
156             void *mem = NULL;
157 
158             size = ALIGNADDR(size, 4096);
159 
160             *uid = sceKernelAllocMemBlock("fragment_usse", SCE_KERNEL_MEMBLOCK_TYPE_USER_RW_UNCACHE, size, NULL);
161 
162             if (sceKernelGetMemBlockBase(*uid, &mem) < 0)
163                 return NULL;
164             if (sceGxmMapFragmentUsseMemory(mem, size, usse_offset) < 0)
165                 return NULL;
166 
167             return mem;
168         }
169 
freeFragmentUSSE(SceUID uid)170         void freeFragmentUSSE(SceUID uid) {
171             void *mem = NULL;
172             if (sceKernelGetMemBlockBase(uid, &mem) < 0)
173                 return;
174 
175             SceKernelMemBlockInfo Info;
176             Info.size = sizeof(Info);
177             sceKernelGetMemBlockInfoByAddr(mem, &Info);
178 
179             sceGxmUnmapFragmentUsseMemory(mem);
180             sceKernelFreeMemBlock(uid);
181         }
182 
183         uint32 *SWIZZE_TABLE = NULL;
184         #define SWIZZLE(x, y) ((SWIZZE_TABLE[(x)] << 1) | (SWIZZE_TABLE[(y)]))
185 
initSwizzleTable()186         void initSwizzleTable() {
187             SWIZZE_TABLE = new uint32[4096];
188             uint32 value = 0;
189             for (int i = 0; i < 4096; i++) {
190                 SWIZZE_TABLE[i] = value;
191                 value += 0x2AAAAAAB;
192                 value &= 0x55555555;
193             }
194         }
195 
freeSwizzleTable()196         void freeSwizzleTable() {
197             delete[] SWIZZE_TABLE;
198         }
199 
init()200         void init() {
201             void *vdmRingBuffer = allocGPU(
202                 SCE_KERNEL_MEMBLOCK_TYPE_USER_RW_UNCACHE, SCE_GXM_DEFAULT_VDM_RING_BUFFER_SIZE,
203                 SCE_GXM_MEMORY_ATTRIB_READ, &vdmRingBufferUID);
204 
205             void *vertexRingBuffer = allocGPU(
206                 SCE_KERNEL_MEMBLOCK_TYPE_USER_RW_UNCACHE, SCE_GXM_DEFAULT_VERTEX_RING_BUFFER_SIZE,
207                 SCE_GXM_MEMORY_ATTRIB_READ, &vertexRingBufferUID);
208 
209             void *fragmentRingBuffer = allocGPU(
210                 SCE_KERNEL_MEMBLOCK_TYPE_USER_RW_UNCACHE, SCE_GXM_DEFAULT_FRAGMENT_RING_BUFFER_SIZE,
211                 SCE_GXM_MEMORY_ATTRIB_READ, &fragmentRingBufferUID);
212 
213             unsigned int offset;
214             void *fragmentUsseRingBuffer = allocFragmentUSSE(SCE_GXM_DEFAULT_FRAGMENT_USSE_RING_BUFFER_SIZE,
215                 &fragmentUsseRingBufferUID, &offset);
216 
217             SceGxmContextParams params;
218             memset(&params, 0, sizeof(params));
219             params.hostMem                       = malloc(SCE_GXM_MINIMUM_CONTEXT_HOST_MEM_SIZE);
220             params.hostMemSize                   = SCE_GXM_MINIMUM_CONTEXT_HOST_MEM_SIZE;
221             params.vdmRingBufferMem              = vdmRingBuffer;
222             params.vdmRingBufferMemSize          = SCE_GXM_DEFAULT_VDM_RING_BUFFER_SIZE;
223             params.vertexRingBufferMem           = vertexRingBuffer;
224             params.vertexRingBufferMemSize       = SCE_GXM_DEFAULT_VERTEX_RING_BUFFER_SIZE;
225             params.fragmentRingBufferMem         = fragmentRingBuffer;
226             params.fragmentRingBufferMemSize     = SCE_GXM_DEFAULT_FRAGMENT_RING_BUFFER_SIZE;
227             params.fragmentUsseRingBufferMem     = fragmentUsseRingBuffer;
228             params.fragmentUsseRingBufferMemSize = SCE_GXM_DEFAULT_FRAGMENT_USSE_RING_BUFFER_SIZE;
229             params.fragmentUsseRingBufferOffset  = offset;
230 
231             sceGxmCreateContext(&params, &gxmContext);
232 
233             initSwizzleTable();
234         }
235 
deinit()236         void deinit() {
237             for (int i = 0; i < pendings.length; i++)
238                 freeGPU(pendings[i].uid);
239             pendings.clear();
240             freeGPU(vdmRingBufferUID);
241             freeGPU(vertexRingBufferUID);
242             freeGPU(fragmentRingBufferUID);
243             freeFragmentUSSE(fragmentUsseRingBufferUID);
244             sceGxmDestroyContext(gxmContext);
245             freeSwizzleTable();
246         }
247 
checkPendings()248         void checkPendings() {
249             int i = 0;
250             while (i < pendings.length) {
251                 if (pendings[i].frameIndex + DISPLAY_BUFFER_COUNT <= Core::stats.frameIndex) {
252                     pendings.removeFast(i);
253                 } else {
254                     i++;
255                 }
256             }
257         }
258 
259         template <typename T>
swizzleTiles(T * dst,T * src,int width,int tilesX,int tilesY)260         void swizzleTiles(T *dst, T *src, int width, int tilesX, int tilesY) {
261             int tileSize = width / tilesX;
262             int tileArea = SQR(tileSize);
263 
264             for (int j = 0; j < tilesY; j++) {
265                 for (int i = 0; i < tilesX; i++) {
266                     T *tilePtr = dst + ((tileArea * tilesX) * j) + (tileArea * i);
267 
268                     for (int y = 0; y < tileSize; y++) {
269                         T *ptr = src + (width * (tileSize * j)) + (tileSize * i) + width * y;
270 
271                         for (int x = 0; x < tileSize; x++)
272                             *(tilePtr + SWIZZLE(x, y)) = *ptr++;
273                     }
274                 }
275             }
276         }
277 
swizzleImage(void * dst,void * src,int width,int height,int bpp)278         void swizzleImage(void *dst, void *src, int width, int height, int bpp) {
279             ASSERT(SWIZZLE_TABLE);
280 
281             int tilesX, tilesY;
282 
283             if (width > height) {
284                 tilesX = width / height;
285                 tilesY = 1;
286             } else {
287                 tilesX = 1;
288                 tilesY = height / width;
289             }
290 
291             switch (bpp) {
292                 case  8 : swizzleTiles(  (uint8*) dst,  (uint8*) src, width, tilesX, tilesY ); break;
293                 case 16 : swizzleTiles( (uint16*) dst, (uint16*) src, width, tilesX, tilesY ); break;
294                 case 32 : swizzleTiles( (uint32*) dst, (uint32*) src, width, tilesX, tilesY ); break;
295             }
296         }
297 
298         #define TILE_SIZE 32
299 
tileImage(void * dst,void * src,int width,int height,int bpp)300         void tileImage(void *dst, void *src, int width, int height, int bpp) {
301             int tilesX = width  / TILE_SIZE;
302             int tilesY = height / TILE_SIZE;
303 
304             uint8 *tilePtr = (uint8*)dst;
305             for (int y = 0; y < tilesY; y++) {
306                 for (int x = 0; x < tilesX; x++) {
307                     uint8 *ptr = (uint8*)src + (width * y + x) * TILE_SIZE * bpp / 8;
308 
309                     for (int i = 0; i < TILE_SIZE; i++) {
310                         memcpy(tilePtr, ptr, TILE_SIZE * bpp / 8);
311                         ptr += width * bpp / 8;
312                         tilePtr += TILE_SIZE * bpp / 8;
313                     }
314                 }
315             }
316         }
317 
318         #undef TILE_SIZE
319     };
320 
321     namespace SwapChain {
322 
323         struct DepthBuffer {
324             SceUID                    uid;
325             SceGxmDepthStencilSurface surface;
326             void                      *data;
327         } depthBuffer;
328 
329         struct ColorBuffer {
330             SceUID             uid;
331             SceGxmColorSurface surface;
332             void               *data;
333             SceGxmSyncObject   *syncObj;
334         } colorBuffers[DISPLAY_BUFFER_COUNT];
335 
336         uint32 bufferIndex, bufferIndexLast;
337         SceGxmRenderTarget *defaultTarget;
338 
init()339         void init() {
340             bufferIndex = bufferIndexLast = 0;
341 
342             SceGxmRenderTargetParams params;
343             memset(&params, 0, sizeof(params));
344             params.width           = DISPLAY_WIDTH;
345             params.height          = DISPLAY_HEIGHT;
346             params.scenesPerFrame  = 1;
347             params.multisampleMode = SCE_GXM_MULTISAMPLE_NONE;
348             params.driverMemBlock  = -1;
349 
350             sceGxmCreateRenderTarget(&params, &defaultTarget);
351 
352             for (int i = 0; i < DISPLAY_BUFFER_COUNT; i++) {
353                 ColorBuffer &color = colorBuffers[i];
354 
355                 color.data = Context::allocGPU(
356                     SCE_KERNEL_MEMBLOCK_TYPE_USER_CDRAM_RW,
357                     4 * DISPLAY_STRIDE * DISPLAY_HEIGHT,
358                     SCE_GXM_MEMORY_ATTRIB_RW,
359                     &color.uid);
360 
361                 sceGxmColorSurfaceInit(&color.surface,
362                     DISPLAY_COLOR_FORMAT,
363                     SCE_GXM_COLOR_SURFACE_LINEAR,
364                     SCE_GXM_COLOR_SURFACE_SCALE_NONE,
365                     SCE_GXM_OUTPUT_REGISTER_SIZE_32BIT,
366                     DISPLAY_WIDTH,
367                     DISPLAY_HEIGHT,
368                     DISPLAY_STRIDE,
369                     color.data);
370 
371                 sceGxmSyncObjectCreate(&color.syncObj);
372             }
373 
374             uint32 dsWidth   = ALIGNADDR(DISPLAY_WIDTH,  SCE_GXM_TILE_SIZEX);
375             uint32 dsHeight  = ALIGNADDR(DISPLAY_HEIGHT, SCE_GXM_TILE_SIZEY);
376 
377             depthBuffer.data = Context::allocGPU(
378                 SCE_KERNEL_MEMBLOCK_TYPE_USER_RW_UNCACHE,
379                 4 * dsWidth * dsHeight,
380                 SCE_GXM_MEMORY_ATTRIB_RW,
381                 &depthBuffer.uid);
382 
383             sceGxmDepthStencilSurfaceInit(&depthBuffer.surface,
384                 SCE_GXM_DEPTH_STENCIL_FORMAT_S8D24, SCE_GXM_DEPTH_STENCIL_SURFACE_TILED,
385                 dsWidth, depthBuffer.data, NULL);
386         }
387 
deinit()388         void deinit() {
389             Context::freeGPU(depthBuffer.uid);
390 
391             for (int i = 0; i < DISPLAY_BUFFER_COUNT; i++) {
392                 ColorBuffer &color = colorBuffers[i];
393                 Context::freeGPU(color.uid);
394                 sceGxmSyncObjectDestroy(color.syncObj);
395             }
396             sceGxmDestroyRenderTarget(defaultTarget);
397         }
398 
getSyncObj()399         SceGxmSyncObject *getSyncObj() {
400             return colorBuffers[bufferIndex].syncObj;
401         }
402 
getLastSyncObj()403         SceGxmSyncObject *getLastSyncObj() {
404             return colorBuffers[bufferIndexLast].syncObj;
405         }
406 
getColorSurface()407         SceGxmColorSurface* getColorSurface() {
408             return &colorBuffers[bufferIndex].surface;
409         }
410 
getColorSurfaceData()411         void* getColorSurfaceData() {
412             return colorBuffers[bufferIndex].data;
413         }
414 
getDepthSurface()415         SceGxmDepthStencilSurface* getDepthSurface() {
416             return &depthBuffer.surface;
417         }
418 
present()419         void present() {
420             sceGxmPadHeartbeat(getColorSurface(), getSyncObj());
421 
422             DisplayData displayData;
423             displayData.addr = getColorSurfaceData();
424             sceGxmDisplayQueueAddEntry(getLastSyncObj(), getSyncObj(), &displayData);
425 
426             bufferIndexLast = bufferIndex;
427             bufferIndex = (bufferIndex + 1) % DISPLAY_BUFFER_COUNT;
428         }
429     };
430 
431 // Shader
432     #include "shaders/gxm/shaders.h"
433 
434     static const int bindings[uMAX] = {
435          0, // uParam
436          1, // uTexParam
437          2, // uViewProj
438          6, // uBasis
439         70, // uLightProj
440         74, // uMaterial
441         75, // uAmbient
442         81, // uFogParams
443         82, // uViewPos
444         83, // uLightPos
445         87, // uLightColor
446         91, // uRoomSize
447         92, // uPosScale
448         98, // uContacts
449     };
450 
451     struct Shader {
452         SceGxmVertexProgram    *vp;
453         SceGxmShaderPatcherId  vpUID;
454         SceGxmProgram          *vpPtr, *fpPtr;
455 
456         struct PSO {
457             SceGxmFragmentProgram  *fp;
458             SceGxmShaderPatcherId  fpUID;
459         } pso[2 * bmMAX];
460 
461         const SceGxmProgramParameter *vParams[uMAX];
462         const SceGxmProgramParameter *fParams[uMAX];
463 
464         vec4  cbMem[98 + MAX_CONTACTS];
465         int   cbCount[uMAX];
466 
467         SceGxmOutputRegisterFormat outputFmt;
468 
469         int colorMask, blendMode;
470         int psoIndex;
471 
472         bool rebind;
473 
initShader474         void init(Pass pass, int type, int *def, int defCount) {
475             memset(pso, 0, sizeof(pso));
476 
477             outputFmt = SCE_GXM_OUTPUT_REGISTER_FORMAT_UCHAR4;
478 
479             bool underwater = false;
480             bool alphatest  = false;
481 
482             for (int i = 0; i < defCount; i++) {
483                 switch (def[i]) {
484                     case SD_UNDERWATER : underwater = true; break;
485                     case SD_ALPHA_TEST : alphatest  = true; break;
486                 }
487             }
488 
489             #define SHADER(S,P)    S##_##P
490             #define SHADER_A(S,P)  (alphatest  ? SHADER(S##_a,P) : SHADER(S,P))
491             #define SHADER_U(S,P)  (underwater ? SHADER(S##_u,P) : SHADER(S,P))
492             #define SHADER_AU(S,P) ((underwater && alphatest) ? SHADER(S##_au,P) : (alphatest ? SHADER(S##_a,P) : SHADER_U(S,P)))
493 
494             const uint8 *vSrc = NULL, *fSrc = NULL;
495             switch (pass) {
496                 case passCompose :
497                     switch (type) {
498                         case 0  : vSrc = SHADER_U ( compose_sprite, v );  fSrc = SHADER_AU ( compose_sprite, f ); break;
499                         case 1  : vSrc = SHADER   ( compose_flash,  v );  fSrc = SHADER    ( compose_flash,  f ); break;
500                         case 2  : vSrc = SHADER_U ( compose_room,   v );  fSrc = SHADER_AU ( compose_room,   f ); break;
501                         case 3  : vSrc = SHADER_U ( compose_entity, v );  fSrc = SHADER_AU ( compose_entity, f ); break;
502                         case 4  : vSrc = SHADER   ( compose_mirror, v );  fSrc = SHADER    ( compose_mirror, f ); break;
503                         default : ASSERT(false);
504                     }
505                     break;
506                 case passShadow :
507                     switch (type) {
508                         case 3  :
509                         case 4  : vSrc = SHADER ( shadow_entity, v );  fSrc = SHADER ( shadow_entity, f ); break;
510                         default : ASSERT(false);
511                     }
512                     break;
513                 case passAmbient :
514                     switch (type) {
515                         case 0  : vSrc = SHADER ( ambient_sprite, v );  fSrc = SHADER_A ( ambient_sprite, f ); break;
516                         case 1  : vSrc = SHADER ( ambient_room,   v );  fSrc = SHADER   ( ambient_room,   f ); break; // TYPE_FLASH (sky)
517                         case 2  : vSrc = SHADER ( ambient_room,   v );  fSrc = SHADER_A ( ambient_room,   f ); break;
518                         default : ASSERT(false);
519                     }
520                     break;
521                 case passWater :
522                     switch (type) {
523                         case 0  : vSrc = SHADER ( water_drop,     v );  fSrc = SHADER ( water_drop,     f ); break;
524                         case 1  : vSrc = SHADER ( water_simulate, v );  fSrc = SHADER ( water_simulate, f ); break;
525                         case 2  : vSrc = SHADER ( water_caustics, v );  fSrc = SHADER ( water_caustics, f ); break;
526                         case 3  : vSrc = SHADER ( water_rays,     v );  fSrc = SHADER ( water_rays,     f ); break;
527                         case 4  : vSrc = SHADER ( water_mask,     v );  fSrc = SHADER ( water_mask,     f ); break;
528                         case 5  : vSrc = SHADER ( water_compose,  v );  fSrc = SHADER ( water_compose,  f ); break;
529                         default : ASSERT(false);
530                     }
531                     break;
532                 case passFilter :
533                     switch (type) {
534                         case 0  : vSrc = SHADER ( filter_upscale,    v );  fSrc = SHADER ( filter_upscale,    f ); break;
535                         case 1  : vSrc = SHADER ( filter_downsample, v );  fSrc = SHADER ( filter_downsample, f ); break;
536                         case 3  : vSrc = SHADER ( filter_grayscale,  v );  fSrc = SHADER ( filter_grayscale,  f ); break;
537                         case 4  : vSrc = SHADER ( filter_blur,       v );  fSrc = SHADER ( filter_blur,       f ); break;
538                         case 5  : vSrc = SHADER ( filter_blur,       v );  fSrc = SHADER ( filter_blur,       f ); break;
539                         default : ASSERT(false);
540                     }
541                     break;
542                 case passGUI    : vSrc = SHADER ( gui,   v );  fSrc = SHADER ( gui,   f ); break;
543                 case passSky    : vSrc = SHADER ( gui,   v );  fSrc = SHADER ( gui,   f ); break;
544                 case PASS_CLEAR : vSrc = SHADER ( clear, v );  fSrc = SHADER ( clear, f ); break;
545                 default         : ASSERT(false); LOG("! wrong pass id\n"); return;
546             }
547 
548             #undef SHADER_A
549             #undef SHADER_U
550             #undef SHADER_AU
551 
552             if (pass == passWater && (type == 0 || type == 1)) { // water_simulate & water_drop use half2 render target
553                 outputFmt = SCE_GXM_OUTPUT_REGISTER_FORMAT_HALF2;
554             }
555 
556             vpPtr = (SceGxmProgram*)vSrc;
557             fpPtr = (SceGxmProgram*)fSrc;
558 
559             sceGxmShaderPatcherRegisterProgram(shaderPatcher, vpPtr, &vpUID);
560 
561             SceGxmVertexStream vStream;
562             vStream.stride      = sizeof(Vertex);
563             vStream.indexSource = SCE_GXM_INDEX_SOURCE_INDEX_16BIT;
564 
565             SceGxmVertexAttribute vAttrib[5];
566             int vAttribCount = 0;
567 
568             Vertex *v = NULL;
569 
570             struct AttribDesc {
571                 int index;
572                 int offset;
573                 SceGxmParameterSemantic semantic;
574                 SceGxmAttributeFormat   format;
575             } attribDesc[] = {
576                 { 0, OFFSETOF(Vertex, coord),    SCE_GXM_PARAMETER_SEMANTIC_POSITION, SCE_GXM_ATTRIBUTE_FORMAT_S16  },
577                 { 0, OFFSETOF(Vertex, normal),   SCE_GXM_PARAMETER_SEMANTIC_NORMAL,   SCE_GXM_ATTRIBUTE_FORMAT_S16N },
578                 { 0, OFFSETOF(Vertex, texCoord), SCE_GXM_PARAMETER_SEMANTIC_TEXCOORD, SCE_GXM_ATTRIBUTE_FORMAT_S16  },
579                 { 0, OFFSETOF(Vertex, color),    SCE_GXM_PARAMETER_SEMANTIC_COLOR,    SCE_GXM_ATTRIBUTE_FORMAT_U8N  },
580                 { 1, OFFSETOF(Vertex, light),    SCE_GXM_PARAMETER_SEMANTIC_COLOR,    SCE_GXM_ATTRIBUTE_FORMAT_U8N  },
581             };
582 
583             for (int i = 0; i < COUNT(vAttrib); i++) {
584                 AttribDesc &desc = attribDesc[i];
585                 const SceGxmProgramParameter *param = sceGxmProgramFindParameterBySemantic(vpPtr, desc.semantic, desc.index);
586                 if (!param) continue;
587                 SceGxmVertexAttribute &attrib = vAttrib[vAttribCount++];
588                 attrib.streamIndex    = 0;
589                 attrib.offset         = desc.offset;
590                 attrib.format         = desc.format;
591                 attrib.componentCount = 4;
592                 attrib.regIndex       = sceGxmProgramParameterGetResourceIndex(param);
593             }
594 
595             sceGxmShaderPatcherCreateVertexProgram(shaderPatcher, vpUID, vAttrib, vAttribCount, &vStream, 1, &vp);
596 
597             for (int ut = 0; ut < uMAX; ut++) {
598                 vParams[ut] = sceGxmProgramFindParameterByName(vpPtr, UniformName[ut]);
599                 fParams[ut] = sceGxmProgramFindParameterByName(fpPtr, UniformName[ut]);
600             }
601 
602             colorMask = blendMode = -1;
603         }
604 
deinitShader605         void deinit() {
606             sceGxmDisplayQueueFinish();
607 
608             sceGxmShaderPatcherReleaseVertexProgram(shaderPatcher, vp);
609             sceGxmShaderPatcherUnregisterProgram(shaderPatcher, vpUID);
610 
611             for (int i = 0; i < COUNT(pso); i++) {
612                 if (pso[i].fp) {
613                     sceGxmShaderPatcherReleaseFragmentProgram(shaderPatcher, pso[i].fp);
614                     sceGxmShaderPatcherUnregisterProgram(shaderPatcher, pso[i].fpUID);
615                 }
616             }
617         }
618 
setBlendInfoShader619         void setBlendInfo(int colorMask, int blendMode) {
620             if (this->colorMask == colorMask && this->blendMode == blendMode)
621                 return;
622             this->colorMask = colorMask;
623             this->blendMode = blendMode;
624 
625             psoIndex = 0;
626             switch (blendMode) {
627                 case RS_BLEND_ALPHA   : psoIndex = bmAlpha;   break;
628                 case RS_BLEND_ADD     : psoIndex = bmAdd;     break;
629                 case RS_BLEND_MULT    : psoIndex = bmMult;    break;
630                 case RS_BLEND_PREMULT : psoIndex = bmPremult; break;
631                 default               : psoIndex = bmNone;
632             }
633 
634             if (outputFmt != SCE_GXM_OUTPUT_REGISTER_FORMAT_UCHAR4) {
635                 psoIndex = 0;
636             }
637 
638             if (colorMask != SCE_GXM_COLOR_MASK_ALL) {
639                 psoIndex += bmMAX;
640             }
641 
642             PSO &p = pso[psoIndex];
643 
644             if (!p.fp) {
645                 SceGxmBlendInfo blendInfo;
646                 blendInfo.colorMask = SceGxmColorMask(colorMask);
647                 blendInfo.colorFunc = SCE_GXM_BLEND_FUNC_ADD;
648                 blendInfo.alphaFunc = SCE_GXM_BLEND_FUNC_ADD;
649                 blendInfo.alphaSrc  = SCE_GXM_BLEND_FACTOR_ONE;
650                 blendInfo.alphaDst  = SCE_GXM_BLEND_FACTOR_ZERO;
651 
652                 switch (blendMode) {
653                     case RS_BLEND_ALPHA   :
654                         blendInfo.colorSrc = SCE_GXM_BLEND_FACTOR_SRC_ALPHA;
655                         blendInfo.colorDst = SCE_GXM_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
656                         break;
657                     case RS_BLEND_ADD     :
658                         blendInfo.colorSrc = SCE_GXM_BLEND_FACTOR_ONE;
659                         blendInfo.colorDst = SCE_GXM_BLEND_FACTOR_ONE;
660                         break;
661                     case RS_BLEND_MULT    :
662                         blendInfo.colorSrc = SCE_GXM_BLEND_FACTOR_DST_COLOR;
663                         blendInfo.colorDst = SCE_GXM_BLEND_FACTOR_ZERO;
664                         break;
665                     case RS_BLEND_PREMULT :
666                         blendInfo.colorSrc = SCE_GXM_BLEND_FACTOR_ONE;
667                         blendInfo.colorDst = SCE_GXM_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
668                         break;
669                     default               :
670                         blendInfo.colorSrc = SCE_GXM_BLEND_FACTOR_ONE;
671                         blendInfo.colorDst = SCE_GXM_BLEND_FACTOR_ZERO;
672                 }
673                 createFP(p, &blendInfo);
674             }
675 
676             rebind = true;
677         }
678 
createFPShader679         void createFP(PSO &p, SceGxmBlendInfo *blendInfo) {
680             sceGxmShaderPatcherRegisterProgram(shaderPatcher, fpPtr, &p.fpUID);
681             sceGxmShaderPatcherCreateFragmentProgram(shaderPatcher, p.fpUID, outputFmt, SCE_GXM_MULTISAMPLE_NONE, blendInfo, vpPtr, &p.fp);
682         }
683 
bindShader684         void bind() {
685             if (active.shader != this) {
686                 active.shader = this;
687 
688                 memset(cbCount, 0, sizeof(cbCount));
689 
690                 rebind = true;
691             }
692         }
693 
validateShader694         void validate() {
695             if (rebind) {
696                 sceGxmSetVertexProgram(Context::gxmContext, vp);
697                 sceGxmSetFragmentProgram(Context::gxmContext, pso[psoIndex].fp);
698                 rebind = false;
699             }
700 
701             for (int uType = 0; uType < uMAX; uType++) {
702                 if (!cbCount[uType]) continue;
703                 void *buff;
704                 if (vParams[uType]) {
705                     sceGxmReserveVertexDefaultUniformBuffer(Context::gxmContext, &buff);
706                     sceGxmSetUniformDataF(buff, vParams[uType], 0, cbCount[uType] * 4, (float*)(cbMem + bindings[uType]));
707                 }
708                 if (fParams[uType]) {
709                     sceGxmReserveFragmentDefaultUniformBuffer(Context::gxmContext, &buff);
710                     sceGxmSetUniformDataF(buff, fParams[uType], 0, cbCount[uType] * 4, (float*)(cbMem + bindings[uType]));
711                 }
712             }
713         }
714 
setParamShader715         void setParam(UniformType uType, float *value, int count) {
716             cbCount[uType] = count;
717             memcpy(cbMem + bindings[uType], value, count * 16);
718         }
719 
720         void setParam(UniformType uType, const vec4 &value, int count = 1) {
721             setParam(uType, (float*)&value, count);
722         }
723 
724         void setParam(UniformType uType, const mat4  &value, int count = 1) {
725             setParam(uType, (float*)&value, count * 4);
726         }
727     };
728 
729 // Texture
730     static const struct FormatDesc {
731         uint32 bpp, textureFormat, targetFormat;
732     } formats[FMT_MAX] = {
733         {  8, SCE_GXM_TEXTURE_FORMAT_U8_1RRR       , SCE_GXM_COLOR_FORMAT_U8_R          }, // LUMINANCE
734         { 32, SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_ABGR , SCE_GXM_COLOR_FORMAT_A8B8G8R8      }, // RGBA
735         { 16, SCE_GXM_TEXTURE_FORMAT_U5U6U5_BGR    , SCE_GXM_COLOR_FORMAT_U5U6U5_BGR    }, // RGB16
736         { 16, SCE_GXM_TEXTURE_FORMAT_U1U5U5U5_ABGR , SCE_GXM_COLOR_FORMAT_U1U5U5U5_ABGR }, // RGBA16
737         { 64, SCE_GXM_TEXTURE_FORMAT_F32F32_GR     , SCE_GXM_COLOR_FORMAT_F32F32_GR     }, // RG_FLOAT  // not supported
738         { 32, SCE_GXM_TEXTURE_FORMAT_F16F16_GR     , SCE_GXM_COLOR_FORMAT_F16F16_GR     }, // RG_HALF
739         { 32, SCE_GXM_TEXTURE_FORMAT_F32M_R        , SCE_GXM_DEPTH_STENCIL_FORMAT_DF32  }, // DEPTH
740         { 32, SCE_GXM_TEXTURE_FORMAT_F32M_R        , SCE_GXM_DEPTH_STENCIL_FORMAT_DF32  }, // SHADOW
741     };
742 
743     struct Texture {
744         SceGxmTexture ID;
745         uint8         *data;
746         SceUID        uid;
747 
748         int           width, height, depth, origWidth, origHeight, origDepth, aWidth, aHeight;
749         TexFormat     fmt;
750         uint32        opt;
751         int           mipCount;
752 
753         SceGxmColorSurface colorSurface;
754         SceGxmRenderTarget *renderTarget;
755 
756         SceUID                    depthBufferUID;
757         SceGxmDepthStencilSurface depthSurface;
758         void                      *depthBufferData;
759 
TextureTexture760         Texture(int width, int height, int depth, uint32 opt) : width(width), height(height), depth(depth), origWidth(width), origHeight(height), origDepth(depth), fmt(FMT_RGBA), opt(opt) { opt |= OPT_NEAREST; }
761 
initTexture762         void init(void *data) {
763             ASSERT((opt & OPT_PROXY) == 0);
764 
765             bool filter     = (opt & OPT_NEAREST) == 0;
766             bool mipmaps    = (opt & OPT_MIPMAPS) != 0;
767             bool isCube     = (opt & OPT_CUBEMAP) != 0;
768             bool isTarget   = (opt & OPT_TARGET)  != 0;
769             bool isDynamic  = (opt & OPT_DYNAMIC) != 0;
770             bool isShadow   = fmt == FMT_SHADOW;
771             bool isTiled    = isTarget;
772             bool isSwizzled = !isDynamic && !isTiled && filter;
773 
774             FormatDesc desc = formats[fmt];
775 
776             if (isSwizzled) {
777                 aWidth  = width  = nextPow2(width);
778                 aHeight = height = nextPow2(height);
779             } else if (isTiled) {
780                 aWidth  = ALIGNADDR(width,  SCE_GXM_TILE_SIZEX);
781                 aHeight = ALIGNADDR(height, SCE_GXM_TILE_SIZEY);
782             } else {
783                 aWidth  = ALIGNADDR(width, 8);
784                 aHeight = height;
785             }
786 
787             int size = 0;
788 
789             if (isCube || isTiled || fmt != FMT_RGBA) {
790                 mipmaps = false;
791             }
792 
793             mipCount = 0;
794             if (mipmaps) {
795                 int w = width;
796                 int h = height;
797                 while (w > 15 && h > 15 && mipCount < 4) {
798                     size += ALIGNADDR(w, 8) * h;
799                     w /= 2;
800                     h /= 2;
801                     mipCount++;
802                 }
803             } else {
804                 size += aWidth * aHeight;
805             }
806 
807             if (mipCount > 1) {
808                 isSwizzled = false;
809             }
810 
811             size *= desc.bpp / 8;
812 
813             if (isCube) {
814                 size *= 6;
815             }
816 
817             SceGxmMemoryAttribFlags flags = (isTarget || mipCount > 1) ? SCE_GXM_MEMORY_ATTRIB_RW : SCE_GXM_MEMORY_ATTRIB_READ;
818             this->data = (uint8*)Context::allocGPU(SCE_KERNEL_MEMBLOCK_TYPE_USER_CDRAM_RW, size, flags, &uid);
819 
820             if (data && this->data) {
821                 if (isSwizzled || isTiled) {
822                     if (aWidth != origWidth || aHeight != origHeight) {
823                         uint8 *tmp = new uint8[aWidth * aHeight * desc.bpp / 8];
824                         swap(this->data, tmp);
825                         updateData(data);
826                         swap(this->data, tmp);
827                         if (isSwizzled) {
828                             Context::swizzleImage(this->data, tmp, aWidth, aHeight, desc.bpp);
829                         } else {
830                             Context::tileImage(this->data, tmp, aWidth, aHeight, desc.bpp);
831                         }
832                         delete[] tmp;
833                     } else {
834                         if (isSwizzled) {
835                             Context::swizzleImage(this->data, data, aWidth, aHeight, desc.bpp);
836                         } else {
837                             Context::tileImage(this->data, data, aWidth, aHeight, desc.bpp);
838                         }
839                     }
840                 } else {
841                     updateData(data);
842                 }
843             }
844 
845             //generateMipMap();
846 
847             if (isCube) {
848                 sceGxmTextureInitCube(&ID, this->data, SceGxmTextureFormat(desc.textureFormat), width, height, mipCount);
849             } else if (isSwizzled) {
850                 sceGxmTextureInitSwizzled(&ID, this->data, SceGxmTextureFormat(desc.textureFormat), width, height, mipCount);
851             } else if (isTiled) {
852                 sceGxmTextureInitTiled(&ID, this->data, SceGxmTextureFormat(desc.textureFormat), width, height, mipCount);
853             } else {
854                 sceGxmTextureInitLinear(&ID, this->data, SceGxmTextureFormat(desc.textureFormat), width, height, mipCount);
855             }
856 
857             SceGxmTextureAddrMode addrMode;
858             if (opt & OPT_REPEAT) {
859                 addrMode = SCE_GXM_TEXTURE_ADDR_REPEAT;
860             } else {
861                 addrMode = (isShadow && support.texBorder) ? SCE_GXM_TEXTURE_ADDR_CLAMP_FULL_BORDER : SCE_GXM_TEXTURE_ADDR_CLAMP;
862             }
863 
864             sceGxmTextureSetUAddrMode(&ID, addrMode);
865             sceGxmTextureSetUAddrMode(&ID, addrMode);
866 
867             sceGxmTextureSetMinFilter(&ID, filter ? SCE_GXM_TEXTURE_FILTER_LINEAR : SCE_GXM_TEXTURE_FILTER_POINT);
868             sceGxmTextureSetMagFilter(&ID, filter ? SCE_GXM_TEXTURE_FILTER_LINEAR : SCE_GXM_TEXTURE_FILTER_POINT);
869 
870             if (opt & OPT_TARGET) {
871 
872                 if (fmt == FMT_DEPTH || fmt == FMT_SHADOW) {
873                     depthBufferData = this->data;
874                     depthBufferUID  = uid;
875 
876                     sceGxmDepthStencilSurfaceInit(&depthSurface,
877                         SceGxmDepthStencilFormat(desc.targetFormat),
878                         SCE_GXM_DEPTH_STENCIL_SURFACE_TILED,
879                         aWidth, this->data, NULL);
880 
881                 } else {
882                     sceGxmColorSurfaceInit(&colorSurface,
883                         SceGxmColorFormat(desc.targetFormat),
884                         isSwizzled ? SCE_GXM_COLOR_SURFACE_SWIZZLED : (isTiled ? SCE_GXM_COLOR_SURFACE_TILED : SCE_GXM_COLOR_SURFACE_LINEAR),
885                         SCE_GXM_COLOR_SURFACE_SCALE_NONE,
886                         desc.bpp > 32 ? SCE_GXM_OUTPUT_REGISTER_SIZE_64BIT : SCE_GXM_OUTPUT_REGISTER_SIZE_32BIT,
887                         aWidth, aHeight, aWidth, this->data);
888 
889                     uint32 dsWidth  = ALIGNADDR(width,  SCE_GXM_TILE_SIZEX);
890                     uint32 dsHeight = ALIGNADDR(height, SCE_GXM_TILE_SIZEY);
891 
892                     depthBufferData = Context::allocGPU(
893                         SCE_KERNEL_MEMBLOCK_TYPE_USER_RW_UNCACHE,
894                         4 * dsWidth * dsHeight,
895                         SCE_GXM_MEMORY_ATTRIB_RW,
896                         &depthBufferUID);
897 
898                     sceGxmDepthStencilSurfaceInit(&depthSurface,
899                         SCE_GXM_DEPTH_STENCIL_FORMAT_D16,
900                         SCE_GXM_DEPTH_STENCIL_SURFACE_TILED,
901                         dsWidth, depthBufferData, NULL);
902                 }
903 
904                 SceGxmRenderTargetParams params;
905                 memset(&params, 0, sizeof(params));
906                 params.width           = aWidth;
907                 params.height          = aHeight;
908                 params.scenesPerFrame  = 1;
909                 params.multisampleMode = SCE_GXM_MULTISAMPLE_NONE;
910                 params.driverMemBlock  = -1;
911 
912                 sceGxmCreateRenderTarget(&params, &renderTarget);
913             }
914         }
915 
deinitTexture916         void deinit() {
917             if (opt & OPT_TARGET) {
918                 sceGxmDestroyRenderTarget(renderTarget);
919                 if (depthBufferUID != uid) {
920                     Context::freeGPU(depthBufferUID, true);
921                 }
922             }
923             Context::freeGPU(uid, true);
924         }
925 
generateMipMapTexture926         void generateMipMap() { // TODO: cubemap
927             if (mipCount <= 1) return;
928 
929             int w = width;
930             int h = height;
931 
932             uint8 *src = this->data;
933             int srcStride = ALIGNADDR(w, 8) * 4;
934 
935             for (int i = 0; i < mipCount - 1; i++) {
936                 uint8 *dst = src + srcStride * h;
937                 int dstStride = ALIGNADDR(w / 2, 8) * 4;
938 
939                 // TODO: check for NPOT
940                 if (w > 1024 || h > 1024) { // sceGxmTransferDownscale supports blocks less than 1024
941                     int blocksX = max(1, w / 1024);
942                     int blocksY = max(1, h / 1024);
943                     for (int y = 0; y < blocksY; y++) {
944                         for (int x = 0; x < blocksX; x++) {
945                             int blockWidth  = min(1024, w - x * 1024);
946                             int blockHeight = min(1024, h - y * 1024);
947                             sceGxmTransferDownscale(
948                                 SCE_GXM_TRANSFER_FORMAT_U8U8U8U8_ABGR, src, x * 1024, y * 1024, blockWidth, blockHeight, srcStride,
949                                 SCE_GXM_TRANSFER_FORMAT_U8U8U8U8_ABGR, dst, x * 512,  y * 512,  dstStride,
950                                 NULL, SCE_GXM_TRANSFER_FRAGMENT_SYNC, NULL);
951                         }
952                     }
953                 } else {
954                     sceGxmTransferDownscale(
955                         SCE_GXM_TRANSFER_FORMAT_U8U8U8U8_ABGR, src, 0, 0, w, h, srcStride,
956                         SCE_GXM_TRANSFER_FORMAT_U8U8U8U8_ABGR, dst, 0, 0, dstStride,
957                         NULL, SCE_GXM_TRANSFER_FRAGMENT_SYNC, NULL);
958                 }
959 
960                 w /= 2;
961                 h /= 2;
962                 src = dst;
963                 srcStride = dstStride;
964             }
965 
966             sceGxmTextureSetMipFilter(&ID, SCE_GXM_TEXTURE_MIP_FILTER_ENABLED);
967         }
968 
updateDataTexture969         void updateData(void *data) {
970             FormatDesc desc = formats[fmt];
971 
972             if (aWidth != origWidth || aHeight != origHeight) {
973                 uint8 *dst = (uint8*)this->data;
974                 uint8 *src = (uint8*)data;
975                 for (int y = 0; y < origHeight; y++) {
976                     memcpy(dst, src, origWidth * desc.bpp / 8);
977                     src += origWidth * desc.bpp / 8;
978                     dst += aWidth * desc.bpp / 8;
979                 }
980             } else {
981                 memcpy(this->data, data, aWidth * aHeight * desc.bpp / 8);
982             }
983         }
984 
updateTexture985         void update(void *data) {
986             if (data) {
987                 updateData(data);
988             }
989         }
990 
bindTexture991         void bind(int sampler) {
992             if (opt & OPT_PROXY) return;
993             ASSERT(ID);
994 
995             if (active.textures[sampler] != this) {
996                 active.textures[sampler] = this;
997                 sceGxmSetFragmentTexture(Context::gxmContext, sampler, &ID);
998 
999                 if (opt & OPT_VERTEX) {
1000                     sceGxmSetVertexTexture(Context::gxmContext, sampler, &ID);
1001                 }
1002             }
1003         }
1004 
unbindTexture1005         void unbind(int sampler) {
1006             if (active.textures[sampler]) {
1007                 active.textures[sampler] = NULL;
1008                 sceGxmSetFragmentTexture(Context::gxmContext, sampler, NULL);
1009             }
1010         }
1011 
setFilterQualityTexture1012         void setFilterQuality(int value) {
1013             bool filter  = (opt & OPT_NEAREST) == 0 && (value > Settings::LOW);
1014             bool mipmaps = (opt & OPT_MIPMAPS) != 0;
1015 
1016             sceGxmTextureSetMinFilter(&ID, filter ? SCE_GXM_TEXTURE_FILTER_LINEAR : SCE_GXM_TEXTURE_FILTER_POINT);
1017             sceGxmTextureSetMagFilter(&ID, filter ? SCE_GXM_TEXTURE_FILTER_LINEAR : SCE_GXM_TEXTURE_FILTER_POINT);
1018         }
1019     };
1020 
1021 // Mesh
1022     struct Mesh {
1023         Index        *iBuffer;
1024         GAPI::Vertex *vBuffer;
1025 
1026         SceUID       iBufferUID;
1027         SceUID       vBufferUID;
1028 
1029         bool         dynamic;
1030 
1031         struct Chunk {
1032             int frameIndex;
1033             int iBase, iStart, iCount;
1034             int vBase, vStart, vCount;
1035         } chunks[DISPLAY_BUFFER_COUNT];
1036 
MeshMesh1037         Mesh(bool dynamic) : iBuffer(NULL), vBuffer(NULL), dynamic(dynamic) {}
1038 
initMesh1039         void init(Index *indices, int iCount, ::Vertex *vertices, int vCount, int aCount) {
1040             ASSERT(sizeof(GAPI::Vertex) == sizeof(::Vertex));
1041 
1042             memset(chunks, 0, sizeof(chunks));
1043 
1044             for (int i = 0; i < COUNT(chunks); i++) {
1045                 chunks[i].frameIndex = -1;
1046                 chunks[i].iBase = i * iCount;
1047                 chunks[i].vBase = i * vCount;
1048             }
1049 
1050             if (dynamic) {
1051                 iCount *= COUNT(chunks);
1052                 vCount *= COUNT(chunks);
1053             }
1054 
1055             iBuffer = (Index*)  Context::allocGPU(SCE_KERNEL_MEMBLOCK_TYPE_USER_RW_UNCACHE, iCount * sizeof(Index),  SCE_GXM_MEMORY_ATTRIB_READ, &iBufferUID);
1056             vBuffer = (Vertex*) Context::allocGPU(SCE_KERNEL_MEMBLOCK_TYPE_USER_RW_UNCACHE, vCount * sizeof(Vertex), SCE_GXM_MEMORY_ATTRIB_READ, &vBufferUID);
1057 
1058             if (!dynamic) {
1059                 update(indices, iCount, vertices, vCount);
1060             }
1061         }
1062 
deinitMesh1063         void deinit() {
1064             Context::freeGPU(iBufferUID, true);
1065             Context::freeGPU(vBufferUID, true);
1066         }
1067 
getChunkMesh1068         Chunk& getChunk() {
1069             return dynamic ? chunks[Core::stats.frameIndex % COUNT(chunks)] : chunks[0];
1070         }
1071 
updateMesh1072         void update(Index *indices, int iCount, ::Vertex *vertices, int vCount) {
1073             Chunk &chunk = getChunk();
1074             if (chunk.frameIndex != Core::stats.frameIndex) {
1075                 chunk.frameIndex = Core::stats.frameIndex;
1076                 chunk.iStart = chunk.iCount = chunk.iBase;
1077                 chunk.vStart = chunk.vCount = chunk.vBase;
1078             }
1079 
1080             if (indices && iCount) {
1081                 chunk.iStart = chunk.iCount;
1082                 chunk.iCount += iCount;
1083                 memcpy(iBuffer + chunk.iStart, indices, iCount * sizeof(Index));
1084             }
1085 
1086             if (vertices && vCount) {
1087                 chunk.vStart = chunk.vCount;
1088                 chunk.vCount += vCount;
1089                 memcpy(vBuffer + chunk.vStart, vertices, vCount * sizeof(Vertex));
1090             }
1091         }
1092 
bindMesh1093         void bind(const MeshRange &range) {
1094             active.vBuffer = vBuffer + getChunk().vStart + range.vStart;
1095             sceGxmSetVertexStream(Context::gxmContext, 0, active.vBuffer);
1096         }
1097 
initNextRangeMesh1098         void initNextRange(MeshRange &range, int &aIndex) const {
1099             range.aIndex = -1;
1100         }
1101     };
1102 
1103 
1104     int cullMode, blendMode, colorMask;
1105     bool depthTest, depthWrite;
1106     Shader clearShader;
1107     Mesh   clearMesh(false);
1108     vec4   clearColor;
1109 
init()1110     void init() {
1111         LOG("Vendor   : %s\n", "Sony");
1112         LOG("Renderer : %s\n", "SCE GXM");
1113         LOG("Version  : %s\n", "1.0");
1114 
1115        // EDRAM_SIZE = sceGeEdramGetSize();
1116        // LOG("VRAM     : %d\n", EDRAM_SIZE);
1117        // freeEDRAM();
1118 
1119         support.shaderBinary   = true;
1120         support.depthTexture   = true;
1121         support.shadowSampler  = true;
1122         support.texNPOT        = true;
1123         support.texRG          = true;
1124         support.colorHalf      = true;
1125         support.texHalfLinear  = true;
1126         support.texHalf        = true;
1127 
1128         Core::width  = DISPLAY_WIDTH;
1129         Core::height = DISPLAY_HEIGHT;
1130 
1131         { // gxm
1132             SceGxmInitializeParams params;
1133             memset(&params, 0, sizeof(params));
1134             params.displayQueueMaxPendingCount  = DISPLAY_BUFFER_COUNT - 1;
1135             params.displayQueueCallback         = displayCallback;
1136             params.displayQueueCallbackDataSize = sizeof(DisplayData);
1137             params.parameterBufferSize          = SCE_GXM_DEFAULT_PARAMETER_BUFFER_SIZE;
1138 
1139             sceGxmInitialize(&params);
1140         }
1141 
1142         Context::init();
1143         SwapChain::init();
1144 
1145         { // shader patcher
1146             uint32 vertOffset, fragOffset;
1147 
1148             shaderBuffPtr = Context::allocGPU(SCE_KERNEL_MEMBLOCK_TYPE_USER_RW_UNCACHE, SHADER_BUFF_SIZE, SCE_GXM_MEMORY_ATTRIB_RW, &shaderBuffUID);
1149             shaderVertPtr = Context::allocVertexUSSE(SHADER_VERT_SIZE, &shaderVertUID, &vertOffset);
1150             shaderFragPtr = Context::allocFragmentUSSE(SHADER_FRAG_SIZE, &shaderFragUID, &fragOffset);
1151 
1152             SceGxmShaderPatcherParams params;
1153             memset(&params, 0, sizeof(params));
1154             params.hostAllocCallback   = Context::allocCPU;
1155             params.hostFreeCallback    = Context::freeCPU;
1156             params.bufferMem           = shaderBuffPtr;
1157             params.bufferMemSize       = SHADER_BUFF_SIZE;
1158             params.vertexUsseMem       = shaderVertPtr;
1159             params.vertexUsseMemSize   = SHADER_VERT_SIZE;
1160             params.vertexUsseOffset    = vertOffset;
1161             params.fragmentUsseMem     = shaderFragPtr;
1162             params.fragmentUsseMemSize = SHADER_FRAG_SIZE;
1163             params.fragmentUsseOffset  = fragOffset;
1164 
1165             sceGxmShaderPatcherCreate(&params, &shaderPatcher);
1166         }
1167 
1168         clearShader.init(PASS_CLEAR, 0, NULL, 0);
1169 
1170         Index indices[] = { 0, 1, 2 };
1171         Vertex vertices[3];
1172         vertices[0].coord = short4{-1, -1, 1, 1};
1173         vertices[1].coord = short4{ 3, -1, 1, 1};
1174         vertices[2].coord = short4{-1,  3, 1, 1};
1175         clearMesh.init(indices, 3, vertices, 3, 0);
1176 
1177         clearColor = vec4(0.0f);
1178 
1179         colorMask = SCE_GXM_COLOR_MASK_ALL;
1180         blendMode = 0;
1181     }
1182 
deinit()1183     void deinit() {
1184         sceGxmFinish(Context::gxmContext);
1185         sceGxmDisplayQueueFinish();
1186 
1187         clearShader.deinit();
1188         clearMesh.deinit();
1189 
1190         SwapChain::deinit();
1191         Context::deinit();
1192 
1193         sceGxmTerminate();
1194     }
1195 
getProjRange()1196     inline mat4::ProjRange getProjRange() {
1197         return mat4::PROJ_NEG_POS;
1198     }
1199 
ortho(float l,float r,float b,float t,float znear,float zfar)1200     mat4 ortho(float l, float r, float b, float t, float znear, float zfar) {
1201         mat4 m;
1202         m.ortho(getProjRange(), l, r, b, t, znear, zfar);
1203         return m;
1204     }
1205 
perspective(float fov,float aspect,float znear,float zfar,float eye)1206     mat4 perspective(float fov, float aspect, float znear, float zfar, float eye) {
1207         mat4 m;
1208         m.perspective(getProjRange(), fov, aspect, znear, zfar, eye);
1209         return m;
1210     }
1211 
beginFrame()1212     bool beginFrame() {
1213         return true;
1214     }
1215 
endFrame()1216     void endFrame() {
1217     }
1218 
1219     bool hasScene;
1220     Texture defTarget(DISPLAY_WIDTH, DISPLAY_HEIGHT, 1, OPT_TARGET);
1221 
resetState()1222     void resetState() {
1223         hasScene = false;
1224         Core::defaultTarget = &defTarget;
1225     }
1226 
cacheRenderTarget(bool depth,int width,int height)1227     int cacheRenderTarget(bool depth, int width, int height) {
1228         /*
1229         RenderTargetCache &cache = rtCache[depth];
1230 
1231         for (int i = 0; i < cache.count; i++)
1232             if (cache.items[i].width == width && cache.items[i].height == height)
1233                 return i;
1234 
1235         ASSERT(cache.count < MAX_RENDER_BUFFERS);
1236 
1237         RenderTargetCache::Item &item = cache.items[cache.count];
1238         item.width  = width;
1239         item.height = height;
1240 
1241         if (depth)
1242             device->CreateDepthStencilSurface(width, height, D3DFMT_D16, D3DMULTISAMPLE_NONE, 0, true, &item.surface, NULL);
1243         else
1244             device->CreateRenderTarget(width, height, D3DFMT_R5G6B5, D3DMULTISAMPLE_NONE, 0, false, &item.surface, NULL);
1245 
1246         return cache.count++;
1247         */
1248         return 0;
1249     }
1250 
bindTarget(Texture * target,int face)1251     void bindTarget(Texture *target, int face) {
1252         if (hasScene) {
1253             sceGxmEndScene(Context::gxmContext, NULL, NULL);
1254         }
1255         hasScene = true;
1256 
1257         if (!target || target == &defTarget) {
1258             sceGxmBeginScene(Context::gxmContext, 0, SwapChain::defaultTarget, NULL, NULL,
1259                 SwapChain::getSyncObj(),
1260                 SwapChain::getColorSurface(),
1261                 SwapChain::getDepthSurface());
1262         } else {
1263             ASSERT(target->opt & OPT_TARGET);
1264 
1265             uint32 flags = SCE_GXM_SCENE_VERTEX_TRANSFER_SYNC;
1266             if (target->opt & OPT_VERTEX) flags |= SCE_GXM_SCENE_FRAGMENT_SET_DEPENDENCY;
1267             if (target->opt & OPT_DEPEND) flags |= SCE_GXM_SCENE_VERTEX_WAIT_FOR_DEPENDENCY;
1268 
1269             SceGxmColorSurface *colorSurface = (target->fmt == FMT_DEPTH || target->fmt == FMT_SHADOW) ? NULL : &target->colorSurface;
1270 
1271             bool loadDepth  = (Core::reqTarget.op & RT_LOAD_DEPTH);
1272             bool storeDepth = (Core::reqTarget.op & RT_STORE_DEPTH);
1273 
1274             sceGxmDepthStencilSurfaceSetForceLoadMode  ( &target->depthSurface, loadDepth  ? SCE_GXM_DEPTH_STENCIL_FORCE_LOAD_ENABLED  : SCE_GXM_DEPTH_STENCIL_FORCE_LOAD_DISABLED  );
1275             sceGxmDepthStencilSurfaceSetForceStoreMode ( &target->depthSurface, storeDepth ? SCE_GXM_DEPTH_STENCIL_FORCE_STORE_ENABLED : SCE_GXM_DEPTH_STENCIL_FORCE_STORE_DISABLED );
1276 
1277             sceGxmBeginScene(Context::gxmContext, flags, target->renderTarget, NULL, NULL, NULL, colorSurface, &target->depthSurface);
1278         }
1279         active.viewport = short4(0, 0, 0, 0); // forcing viewport reset
1280     }
1281 
discardTarget(bool color,bool depth)1282     void discardTarget(bool color, bool depth) {}
1283 
copyTarget(Texture * dst,int xOffset,int yOffset,int x,int y,int width,int height)1284     void copyTarget(Texture *dst, int xOffset, int yOffset, int x, int y, int width, int height) {
1285         //
1286     }
1287 
setVSync(bool enable)1288     void setVSync(bool enable) {}
1289 
present()1290     void present() {
1291         if (hasScene) {
1292             sceGxmEndScene(Context::gxmContext, NULL, NULL);
1293         }
1294 
1295         SwapChain::present();
1296         Context::checkPendings();
1297     }
1298 
setViewport(const short4 & v)1299     void setViewport(const short4 &v) {
1300         int vh = active.target ? active.target->height : Core::height;
1301         int sw = v.z / 2;
1302         int sh = v.w / 2;
1303         sceGxmSetViewport(Context::gxmContext, float(v.x + sw), float(sw), float(vh - v.y - sh), float(-sh), 0.0f, 1.0f);
1304     }
1305 
setScissor(const short4 & s)1306     void setScissor(const short4 &s) {
1307         //int vh = active.target ? active.target->height : Core::height;
1308         //sceGxmSetRegionClip(Context::gxmContext, SCE_GXM_REGION_CLIP_OUTSIDE, 0, 0, 256, 256);
1309     }
1310 
setDepthTest(bool enable)1311     void setDepthTest(bool enable) {
1312         depthTest = enable;
1313         sceGxmSetFrontDepthFunc(Context::gxmContext, enable ? SCE_GXM_DEPTH_FUNC_LESS_EQUAL : SCE_GXM_DEPTH_FUNC_ALWAYS);
1314         sceGxmSetBackDepthFunc(Context::gxmContext,  enable ? SCE_GXM_DEPTH_FUNC_LESS_EQUAL : SCE_GXM_DEPTH_FUNC_ALWAYS);
1315         sceGxmSetFrontDepthWriteEnable(Context::gxmContext, (depthWrite && depthTest) ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
1316         sceGxmSetBackDepthWriteEnable(Context::gxmContext,  (depthWrite && depthTest) ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
1317     }
1318 
setDepthWrite(bool enable)1319     void setDepthWrite(bool enable) {
1320         depthWrite = enable;
1321         if (depthTest) {
1322             sceGxmSetFrontDepthWriteEnable(Context::gxmContext, enable ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
1323             sceGxmSetBackDepthWriteEnable(Context::gxmContext,  enable ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
1324         }
1325     }
1326 
setColorWrite(bool r,bool g,bool b,bool a)1327     void setColorWrite(bool r, bool g, bool b, bool a) {
1328         colorMask = SCE_GXM_COLOR_MASK_NONE;
1329         if (r) colorMask |= SCE_GXM_COLOR_MASK_R;
1330         if (g) colorMask |= SCE_GXM_COLOR_MASK_G;
1331         if (b) colorMask |= SCE_GXM_COLOR_MASK_B;
1332         if (a) colorMask |= SCE_GXM_COLOR_MASK_A;
1333     }
1334 
setAlphaTest(bool enable)1335     void setAlphaTest(bool enable) {}
1336 
setCullMode(int rsMask)1337     void setCullMode(int rsMask) {
1338         cullMode = rsMask;
1339         switch (rsMask) {
1340             case RS_CULL_BACK  : sceGxmSetCullMode(Context::gxmContext, SCE_GXM_CULL_CW);  break;
1341             case RS_CULL_FRONT : sceGxmSetCullMode(Context::gxmContext, SCE_GXM_CULL_CCW); break;
1342             default            : sceGxmSetCullMode(Context::gxmContext, SCE_GXM_CULL_NONE);
1343         }
1344     }
1345 
setBlendMode(int rsMask)1346     void setBlendMode(int rsMask) {
1347         blendMode = rsMask;
1348     }
1349 
setViewProj(const mat4 & mView,const mat4 & mProj)1350     void setViewProj(const mat4 &mView, const mat4 &mProj) {}
1351 
DIP(Mesh * mesh,const MeshRange & range)1352     void DIP(Mesh *mesh, const MeshRange &range) {
1353         if (!active.shader) return;
1354 
1355         active.shader->setBlendInfo(colorMask, blendMode);
1356         active.shader->validate();
1357         sceGxmDraw(Context::gxmContext, SCE_GXM_PRIMITIVE_TRIANGLES, SCE_GXM_INDEX_FORMAT_U16, mesh->iBuffer + mesh->getChunk().iStart + range.iStart, range.iCount);
1358     }
1359 
updateLights(vec4 * lightPos,vec4 * lightColor,int count)1360     void updateLights(vec4 *lightPos, vec4 *lightColor, int count) {
1361         if (active.shader) {
1362             active.shader->setParam(uLightColor, lightColor[0], count);
1363             active.shader->setParam(uLightPos,   lightPos[0],   count);
1364         }
1365     }
1366 
clear(bool color,bool depth)1367     void clear(bool color, bool depth) {
1368         int  oColorMask  = colorMask;
1369         int  oBlendMode  = blendMode;
1370         bool oDepthTest  = depthTest;
1371         int  oCullMode   = cullMode;
1372         Vertex *oBuffer  = active.vBuffer;
1373         Shader *oShader  = Core::active.shader;
1374 
1375         sceGxmSetFrontDepthFunc(Context::gxmContext, SCE_GXM_DEPTH_FUNC_ALWAYS);
1376         sceGxmSetBackDepthFunc(Context::gxmContext,  SCE_GXM_DEPTH_FUNC_ALWAYS);
1377         sceGxmSetFrontDepthWriteEnable(Context::gxmContext, depth ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
1378         sceGxmSetBackDepthWriteEnable(Context::gxmContext,  depth ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
1379 
1380         setColorWrite(color, color, color, color);
1381         setBlendMode(0);
1382         setCullMode(cmNone);
1383 
1384         active.shader = &clearShader;
1385         active.shader->setParam(uMaterial, clearColor);
1386 
1387         MeshRange range;
1388         range.iStart =  0;
1389         range.iCount =  3;
1390         range.vStart =  0;
1391         range.aIndex = -1;
1392 
1393         clearMesh.bind(range);
1394         DIP(&clearMesh, range);
1395 
1396         colorMask = oColorMask;
1397         setBlendMode(oBlendMode);
1398         setCullMode(oCullMode);
1399         setDepthTest(oDepthTest);
1400 
1401         active.shader = oShader;
1402 
1403         sceGxmSetVertexStream(Context::gxmContext, 0, oBuffer);
1404     }
1405 
setClearColor(const vec4 & color)1406     void setClearColor(const vec4 &color) {
1407         clearColor = color;
1408     }
1409 
copyPixel(int x,int y)1410     vec4 copyPixel(int x, int y) {
1411 //        GAPI::Texture *t = Core::active.target;
1412 //        Color32 *color = (Color32*)t->data;
1413 //        return vec4(color->r, color->g, color->b, 255.0f) * (1.0f / 255.0f);
1414         return vec4(0.0f); // TODO: read from framebuffer
1415     }
1416 }
1417 
1418 #endif