1 /* 2 * Copyright (C) 2019 Connor Abbott <cwabbott0@gmail.com> 3 * Copyright (C) 2019 Lyude Paul <thatslyude@gmail.com> 4 * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 */ 25 26 #ifndef __bifrost_h__ 27 #define __bifrost_h__ 28 29 #include <stdint.h> 30 #include <stdbool.h> 31 32 #define BIFROST_DBG_MSGS 0x0001 33 #define BIFROST_DBG_SHADERS 0x0002 34 35 extern int bifrost_debug; 36 37 enum bifrost_clause_type { 38 BIFROST_CLAUSE_NONE = 0, 39 BIFROST_CLAUSE_LOAD_VARY = 1, 40 BIFROST_CLAUSE_UBO = 2, 41 BIFROST_CLAUSE_TEX = 3, 42 BIFROST_CLAUSE_SSBO_LOAD = 5, 43 BIFROST_CLAUSE_SSBO_STORE = 6, 44 BIFROST_CLAUSE_BLEND = 9, 45 BIFROST_CLAUSE_FRAGZ = 12, 46 BIFROST_CLAUSE_ATEST = 13, 47 BIFROST_CLAUSE_64BIT = 15 48 }; 49 50 struct bifrost_header { 51 unsigned unk0 : 7; 52 // If true, convert any infinite result of any floating-point operation to 53 // the biggest representable number. 54 unsigned suppress_inf: 1; 55 // Convert any NaN results to 0. 56 unsigned suppress_nan : 1; 57 unsigned unk1 : 2; 58 // true if the execution mask of the next clause is the same as the mask of 59 // the current clause. 60 unsigned back_to_back : 1; 61 unsigned no_end_of_shader: 1; 62 unsigned unk2 : 2; 63 // Set to true for fragment shaders, to implement this bit of spec text 64 // from section 7.1.5 of the GLSL ES spec: 65 // 66 // "Stores to image and buffer variables performed by helper invocations 67 // have no effect on the underlying image or buffer memory." 68 // 69 // Helper invocations are threads (invocations) corresponding to pixels in 70 // a quad that aren't actually part of the triangle, but are included to 71 // make derivatives work correctly. They're usually turned on, but they 72 // need to be masked off for GLSL-level stores. This bit seems to be the 73 // only bit that's actually different between fragment shaders and other 74 // shaders, so this is probably what it's doing. 75 unsigned elide_writes : 1; 76 // If backToBack is off: 77 // - true for conditional branches and fallthrough 78 // - false for unconditional branches 79 // The blob seems to always set it to true if back-to-back is on. 80 unsigned branch_cond : 1; 81 // This bit is set when the next clause writes to the data register of some 82 // previous clause. 83 unsigned datareg_writebarrier: 1; 84 unsigned datareg : 6; 85 unsigned scoreboard_deps: 8; 86 unsigned scoreboard_index: 3; 87 enum bifrost_clause_type clause_type: 4; 88 unsigned unk3 : 1; // part of clauseType? 89 enum bifrost_clause_type next_clause_type: 4; 90 unsigned unk4 : 1; // part of nextClauseType? 91 } __attribute__((packed)); 92 93 enum bifrost_packed_src { 94 BIFROST_SRC_PORT0 = 0, 95 BIFROST_SRC_PORT1 = 1, 96 BIFROST_SRC_PORT3 = 2, 97 BIFROST_SRC_STAGE = 3, 98 BIFROST_SRC_CONST_LO = 4, 99 BIFROST_SRC_CONST_HI = 5, 100 BIFROST_SRC_PASS_FMA = 6, 101 BIFROST_SRC_PASS_ADD = 7, 102 }; 103 104 #define BIFROST_FMA_EXT (0xe0000) 105 #define BIFROST_FMA_OP_MOV BIFROST_FMA_EXT | (0x32d) 106 #define BIFROST_FMA_OP_FREXPE_LOG BIFROST_FMA_EXT | 0x3c5 107 #define BIFROST_FMA_OP_ADD_FREXPM ((BIFROST_FMA_EXT | 0x1e80) >> 3) 108 #define BIFROST_FMA_SEL_16(swiz) (((BIFROST_FMA_EXT | 0x1e00) >> 3) | (swiz)) 109 110 #define BIFROST_FMA_ROUND_16(mode, swiz) (BIFROST_FMA_EXT | 0x1800 | (swiz) | ((mode) << 6)) 111 #define BIFROST_FMA_ROUND_32(mode) (BIFROST_FMA_EXT | 0x1805 | ((mode) << 6)) 112 113 struct bifrost_fma_inst { 114 unsigned src0 : 3; 115 unsigned op : 20; 116 } __attribute__((packed)); 117 118 #define BIFROST_FMA_IADD_32 (0x4ff98 >> 3) 119 #define BIFROST_FMA_ISUB_32 (0x4ffd8 >> 3) 120 #define BIFROST_FMA_IMUL_32 ((BIFROST_FMA_EXT | 0x7818) >> 3) 121 122 struct bifrost_fma_2src { 123 unsigned src0 : 3; 124 unsigned src1 : 3; 125 unsigned op : 17; 126 } __attribute__((packed)); 127 128 #define BIFROST_FMA_OP_SEL8 (0x71) 129 130 struct bifrost_fma_sel8 { 131 unsigned src0 : 3; 132 unsigned src1 : 3; 133 unsigned src2 : 3; 134 unsigned src3 : 3; 135 unsigned swizzle : 4; 136 unsigned op : 7; 137 } __attribute__((packed)); 138 139 #define BIFROST_FMA_OP_MSCALE (0x50 >> 3) 140 141 struct bifrost_fma_mscale { 142 unsigned src0 : 3; 143 unsigned src1 : 3; 144 unsigned src2 : 3; 145 unsigned src3 : 3; 146 147 /* If mscale_mode is set - an MSCALE specific mode. If it is not set, a 148 * regular outmod */ 149 unsigned mode : 2; 150 unsigned mscale_mode : 1; 151 152 unsigned src0_abs : 1; 153 unsigned src1_neg : 1; 154 unsigned src2_neg : 1; 155 unsigned op : 5; 156 } __attribute__((packed)); 157 158 #define BIFROST_ADD_OP_BLEND (0x1952c) 159 #define BIFROST_ADD_OP_FRCP_FAST_F32 (0x0cc00) 160 #define BIFROST_ADD_OP_FRCP_FAST_F16_X (0x0ce10) 161 #define BIFROST_ADD_OP_FRCP_FAST_F16_Y (0x0ce30) 162 #define BIFROST_ADD_OP_FRSQ_FAST_F32 (0x0cc20) 163 #define BIFROST_ADD_OP_FRSQ_FAST_F16_X (0x0ce50) 164 #define BIFROST_ADD_OP_FRSQ_FAST_F16_Y (0x0ce70) 165 #define BIFROST_ADD_OP_LOG2_HELP (0x0cc68) 166 #define BIFROST_ADD_OP_IABS_32 (0x07bd4) 167 168 struct bifrost_add_inst { 169 unsigned src0 : 3; 170 unsigned op : 17; 171 } __attribute__((packed)); 172 173 #define BIFROST_ADD_OP_DISCARD (0x19100 >> 8) 174 175 enum bifrost_discard_cond { 176 BIFROST_DISCARD_FEQ = 0, 177 BIFROST_DISCARD_FNE = 1, 178 BIFROST_DISCARD_FLE = 2, 179 BIFROST_DISCARD_FLT = 3, 180 }; 181 182 struct bifrost_add_discard { 183 unsigned src0 : 3; 184 unsigned src1 : 3; 185 enum bifrost_discard_cond cond : 2; 186 /* Zero for fp32 */ 187 unsigned src0_select : 1; 188 unsigned src1_select : 1; 189 unsigned fp32 : 1; 190 unsigned op : 9; 191 } __attribute__((packed)); 192 193 #define BIFROST_ADD_OP_LD_UBO_1 (0x0c1a0 >> 3) 194 #define BIFROST_ADD_OP_LD_UBO_2 (0x0c1e0 >> 3) 195 #define BIFROST_ADD_OP_LD_UBO_3 (0x0caa0 >> 3) 196 #define BIFROST_ADD_OP_LD_UBO_4 (0x0c220 >> 3) 197 #define BIFROST_ADD_SEL_16(swiz) ((0xea60 >> 3) | (swiz)) 198 199 #define BIFROST_ADD_IADD_8 (0x17880 >> 3) 200 #define BIFROST_ADD_IADD_16 (0x17900 >> 3) 201 #define BIFROST_ADD_IADD_32 (0x178c0 >> 3) 202 #define BIFROST_ADD_ISUB_8 (0x17a80 >> 3) 203 #define BIFROST_ADD_ISUB_16 (0x17b00 >> 3) 204 #define BIFROST_ADD_ISUB_32 (0x17ac0 >> 3) 205 #define BIFROST_ADD_OP_FEXP2_FAST (0x0cd58 >> 3) 206 207 struct bifrost_add_2src { 208 unsigned src0 : 3; 209 unsigned src1 : 3; 210 unsigned op : 14; 211 } __attribute__((packed)); 212 213 #define BIFROST_ADD_OP_FMAX32 (0x00) 214 #define BIFROST_ADD_OP_FMIN32 (0x01) 215 #define BIFROST_ADD_OP_FADD32 (0x02) 216 217 #define BIFROST_ADD_OP_FADD16 (0x0A) 218 219 struct bifrost_add_faddmin { 220 unsigned src0 : 3; 221 unsigned src1 : 3; 222 unsigned src1_abs : 1; 223 unsigned src0_neg : 1; 224 unsigned src1_neg : 1; 225 unsigned select : 2; /* swizzle_0 for fp16 */ 226 unsigned outmod : 2; /* swizzle_1 for fp16 */ 227 unsigned mode : 2; 228 unsigned src0_abs : 1; 229 unsigned op : 4; 230 } __attribute__((packed)); 231 232 #define BIFROST_ADD_OP_FMAX16 (0x10) 233 #define BIFROST_ADD_OP_FMIN16 (0x12) 234 235 struct bifrost_add_fmin16 { 236 unsigned src0 : 3; 237 unsigned src1 : 3; 238 /* abs2 inferred as with FMA */ 239 unsigned abs1 : 1; 240 unsigned src0_neg : 1; 241 unsigned src1_neg : 1; 242 unsigned src0_swizzle : 2; 243 unsigned src1_swizzle : 2; 244 unsigned mode : 2; 245 unsigned op : 5; 246 } __attribute__((packed)); 247 248 #define BIFROST_ADD_OP_ST_VAR (0x19300 >> 8) 249 250 struct bifrost_st_vary { 251 unsigned src0 : 3; 252 unsigned src1 : 3; 253 unsigned src2 : 3; 254 unsigned channels : 2; 255 unsigned op : 9; 256 } __attribute__((packed)); 257 258 #define BIFROST_ADD_OP_ATEST (0xc8f) 259 260 struct bifrost_add_atest { 261 /* gl_SampleMask (R60) */ 262 unsigned src0 : 3; 263 264 /* Alpha value */ 265 unsigned src1 : 3; 266 267 /* If half, X/Y select. If !half, always set */ 268 unsigned component : 1; 269 unsigned half : 1; 270 271 unsigned op : 12; 272 } __attribute__((packed)); 273 274 enum bifrost_outmod { 275 BIFROST_NONE = 0x0, 276 BIFROST_POS = 0x1, 277 BIFROST_SAT_SIGNED = 0x2, 278 BIFROST_SAT = 0x3, 279 }; 280 281 enum bifrost_roundmode { 282 BIFROST_RTE = 0x0, /* round to even */ 283 BIFROST_RTP = 0x1, /* round to positive */ 284 BIFROST_RTN = 0x2, /* round to negative */ 285 BIFROST_RTZ = 0x3 /* round to zero */ 286 }; 287 288 /* NONE: Same as fmax() and fmin() -- return the other 289 * number if any number is NaN. Also always return +0 if 290 * one argument is +0 and the other is -0. 291 * 292 * NAN_WINS: Instead of never returning a NaN, always return 293 * one. The "greater"/"lesser" NaN is always returned, first 294 * by checking the sign and then the mantissa bits. 295 * 296 * SRC1_WINS: For max, implement src0 > src1 ? src0 : src1. 297 * For min, implement src0 < src1 ? src0 : src1. This 298 * includes handling NaN's and signedness of 0 differently 299 * from above, since +0 and -0 compare equal and comparisons 300 * always return false for NaN's. As a result, this mode is 301 * *not* commutative. 302 * 303 * SRC0_WINS: For max, implement src0 < src1 ? src1 : src0 304 * For min, implement src0 > src1 ? src1 : src0 305 */ 306 307 308 enum bifrost_minmax_mode { 309 BIFROST_MINMAX_NONE = 0x0, 310 BIFROST_NAN_WINS = 0x1, 311 BIFROST_SRC1_WINS = 0x2, 312 BIFROST_SRC0_WINS = 0x3, 313 }; 314 315 #define BIFROST_FMA_OP_FADD32 (0x58 >> 2) 316 #define BIFROST_FMA_OP_FMAX32 (0x40 >> 2) 317 #define BIFROST_FMA_OP_FMIN32 (0x44 >> 2) 318 319 struct bifrost_fma_add { 320 unsigned src0 : 3; 321 unsigned src1 : 3; 322 unsigned src1_abs : 1; 323 unsigned src0_neg : 1; 324 unsigned src1_neg : 1; 325 unsigned unk : 3; 326 unsigned src0_abs : 1; 327 enum bifrost_roundmode roundmode : 2; 328 enum bifrost_outmod outmod : 2; 329 unsigned op : 6; 330 } __attribute__((packed)); 331 332 #define BIFROST_FMA_OP_FMAX16 (0xC0 >> 2) 333 #define BIFROST_FMA_OP_FMIN16 (0xCC >> 2) 334 #define BIFROST_FMA_OP_FADD16 (0xD8 >> 2) 335 336 struct bifrost_fma_add_minmax16 { 337 unsigned src0 : 3; 338 unsigned src1 : 3; 339 /* abs2 inferred as (src1 < src0) */ 340 unsigned abs1 : 1; 341 unsigned src0_neg : 1; 342 unsigned src1_neg : 1; 343 unsigned src0_swizzle : 2; 344 unsigned src1_swizzle : 2; 345 unsigned mode : 2; 346 enum bifrost_outmod outmod : 2; 347 /* roundmode for add, min/max mode for min/max */ 348 unsigned op : 6; 349 } __attribute__((packed)); 350 351 #define BIFROST_FMA_OP_FMA (0x00) 352 353 struct bifrost_fma_fma { 354 unsigned src0 : 3; 355 unsigned src1 : 3; 356 unsigned src2 : 3; 357 unsigned src_expand : 3; 358 unsigned src0_abs : 1; 359 enum bifrost_roundmode roundmode : 2; 360 enum bifrost_outmod outmod : 2; 361 unsigned src0_neg : 1; /* 14 */ 362 unsigned src2_neg : 1; 363 unsigned src1_abs : 1; 364 unsigned src2_abs : 1; /* 17 */ 365 unsigned op : 2; 366 } __attribute__((packed)); 367 368 #define BIFROST_FMA_OP_FMA16 (0x2) 369 370 struct bifrost_fma_fma16 { 371 unsigned src0 : 3; 372 unsigned src1 : 3; 373 unsigned src2 : 3; 374 unsigned swizzle_0 : 2; 375 unsigned swizzle_1 : 2; 376 enum bifrost_roundmode roundmode : 2; 377 enum bifrost_outmod outmod : 2; 378 unsigned src0_neg : 1; 379 unsigned src2_neg : 1; 380 unsigned swizzle_2 : 2; 381 unsigned op : 2; 382 } __attribute__((packed)); 383 384 enum bifrost_csel_cond { 385 BIFROST_FEQ_F = 0x0, 386 BIFROST_FGT_F = 0x1, 387 BIFROST_FGE_F = 0x2, 388 BIFROST_IEQ_F = 0x3, 389 BIFROST_IGT_I = 0x4, 390 BIFROST_IGE_I = 0x5, 391 BIFROST_UGT_I = 0x6, 392 BIFROST_UGE_I = 0x7 393 }; 394 395 #define BIFROST_FMA_OP_CSEL4 (0x5c) 396 #define BIFROST_FMA_OP_CSEL4_V16 (0xdc) 397 398 struct bifrost_csel4 { 399 unsigned src0 : 3; 400 unsigned src1 : 3; 401 unsigned src2 : 3; 402 unsigned src3 : 3; 403 enum bifrost_csel_cond cond : 3; 404 unsigned op : 8; 405 } __attribute__((packed)); 406 407 #define BIFROST_FMA_OP_RSHIFT_NAND (0x60000 >> 12) 408 #define BIFROST_FMA_OP_RSHIFT_AND (0x61000 >> 12) 409 #define BIFROST_FMA_OP_LSHIFT_NAND (0x62000 >> 12) 410 #define BIFROST_FMA_OP_LSHIFT_AND (0x63000 >> 12) 411 #define BIFROST_FMA_OP_RSHIFT_XOR (0x64000 >> 12) 412 #define BIFROST_FMA_OP_LSHIFT_ADD_32 (0x65200 >> 6) 413 #define BIFROST_FMA_OP_LSHIFT_SUB_32 (0x65600 >> 6) 414 #define BIFROST_FMA_OP_LSHIFT_RSUB_32 (0x65a00 >> 6) 415 #define BIFROST_FMA_OP_RSHIFT_ADD_32 (0x65e00 >> 6) 416 #define BIFROST_FMA_OP_RSHIFT_SUB_32 (0x66200 >> 6) 417 #define BIFROST_FMA_OP_RSHIFT_RSUB_32 (0x66600 >> 6) 418 419 struct bifrost_shift_fma { 420 unsigned src0 : 3; 421 unsigned src1 : 3; 422 unsigned src2 : 3; 423 unsigned half : 3; 424 unsigned unk : 1; /* always set? */ 425 unsigned invert_1 : 1; /* Inverts sources to combining op */ 426 /* For XOR, switches RSHIFT to LSHIFT since only one invert needed */ 427 unsigned invert_2 : 1; 428 unsigned op : 8; 429 } __attribute__((packed)); 430 431 struct bifrost_shift_add { 432 unsigned src0 : 3; 433 unsigned src1 : 3; 434 unsigned src2 : 3; 435 unsigned zero : 2; 436 437 unsigned invert_1 : 1; 438 unsigned invert_2 : 1; 439 440 unsigned op : 7; 441 } __attribute__((packed)); 442 443 enum bifrost_fcmp_cond { 444 BIFROST_OEQ = 0, 445 BIFROST_OGT = 1, 446 BIFROST_OGE = 2, 447 BIFROST_UNE = 3, 448 BIFROST_OLT = 4, 449 BIFROST_OLE = 5, 450 }; 451 452 /* "gl" version produces 0/1. "d3d" version produces 0/~0 */ 453 #define BIFROST_FMA_OP_FCMP_GL (0x48000 >> 13) 454 #define BIFROST_FMA_OP_FCMP_D3D (0x4c000 >> 13) 455 456 struct bifrost_fma_fcmp { 457 unsigned src0 : 3; 458 unsigned src1 : 3; 459 unsigned src1_abs : 1; 460 unsigned unk1 : 1; 461 unsigned src1_neg : 1; 462 unsigned src_expand : 3; 463 unsigned src0_abs : 1; 464 enum bifrost_fcmp_cond cond : 3; 465 unsigned op : 7; 466 } __attribute__((packed)); 467 468 struct bifrost_add_fcmp { 469 unsigned src0 : 3; 470 unsigned src1 : 3; 471 enum bifrost_fcmp_cond cond : 3; 472 unsigned src_expand : 2; 473 unsigned src0_abs : 1; 474 unsigned src1_abs : 1; 475 unsigned src1_neg : 1; 476 unsigned op : 6; 477 } __attribute__((packed)); 478 479 /* "gl" version produces 0/1. "d3d" version produces 0/~0 */ 480 #define BIFROST_FMA_OP_FCMP_GL_16 (0xc8000 >> 13) 481 #define BIFROST_FMA_OP_FCMP_D3D_16 (0xcc000 >> 13) 482 483 struct bifrost_fma_fcmp16 { 484 unsigned src0 : 3; 485 unsigned src1 : 3; 486 487 /* abs2 inferred */ 488 unsigned abs1 : 1; 489 unsigned unk : 2; 490 491 unsigned src0_swizzle : 2; 492 unsigned src1_swizzle : 2; 493 494 enum bifrost_fcmp_cond cond : 3; 495 unsigned op : 7; 496 } __attribute__((packed)); 497 498 struct bifrost_add_fcmp16 { 499 unsigned src0 : 3; 500 unsigned src1 : 3; 501 enum bifrost_fcmp_cond cond : 3; 502 503 unsigned src0_swizzle : 2; 504 unsigned src1_swizzle : 2; 505 506 /* No abs mods */ 507 unsigned src0_neg : 1; 508 509 unsigned op : 6; 510 } __attribute__((packed)); 511 512 enum bifrost_icmp_cond { 513 BIFROST_ICMP_IGT = 0, 514 BIFROST_ICMP_IGE = 1, /* swapped for 16-bit */ 515 BIFROST_ICMP_UGT = 2, /* swapped for 16-bit */ 516 BIFROST_ICMP_UGE = 3, 517 BIFROST_ICMP_EQ = 4, 518 BIFROST_ICMP_NEQ = 5, 519 BIFROST_ICMP_32_OR_8 = 6, /* nested */ 520 BIFROST_ICMP_64 = 7, /* nested */ 521 }; 522 523 struct bifrost_fma_icmp32 { 524 unsigned src0 : 3; 525 unsigned src1 : 3; 526 enum bifrost_icmp_cond cond : 3; 527 unsigned unk1 : 1; /* set */ 528 unsigned d3d : 1; /* if set, true is ~0. otherwise, true is 1 */ 529 unsigned op : 12; 530 } __attribute__((packed)); 531 532 struct bifrost_fma_icmp16 { 533 unsigned src0 : 3; 534 unsigned src1 : 3; 535 unsigned unk : 5; /* 11010 */ 536 enum bifrost_icmp_cond cond : 3; 537 unsigned op : 9; 538 } __attribute__((packed)); 539 540 #define BIFROST_ADD_OP_ICMP_32 (0x0f600 >> 8) 541 #define BIFROST_ADD_OP_ICMP_16 (0x0f000 >> 11) 542 543 struct bifrost_add_icmp { 544 unsigned src0 : 3; 545 unsigned src1 : 3; 546 enum bifrost_icmp_cond cond : 3; 547 unsigned sz : 1; /* 1 for 32, 0 for 8 */ 548 unsigned d3d : 1; /* if set, true is ~0. otherwise, true is 1 */ 549 unsigned op : 9; 550 } __attribute__((packed)); 551 552 struct bifrost_add_icmp16 { 553 unsigned src0 : 3; 554 unsigned src1 : 3; 555 unsigned src0_swizzle : 2; 556 unsigned src1_swizzle : 2; 557 unsigned d3d : 1; /* if set, true is ~0. otherwise, true is 1 */ 558 enum bifrost_icmp_cond cond : 3; 559 unsigned op : 6; 560 } __attribute__((packed)); 561 562 /* Two sources for vectorization */ 563 #define BIFROST_FMA_FLOAT32_TO_16 (0xdd000 >> 3) 564 #define BIFROST_ADD_FLOAT32_TO_16 (0x0EC00 >> 3) 565 566 enum bifrost_convert_mode { 567 BIFROST_CONV_UNK0 = 0, 568 BIFROST_CONV_F32_TO_I32 = 1, 569 BIFROST_CONV_F16_TO_I16 = 2, 570 BIFROST_CONV_I32_TO_F32 = 3, 571 BIFROST_CONV_I16_TO_X32 = 4, 572 BIFROST_CONV_F16_TO_F32 = 5, 573 BIFROST_CONV_I16_TO_F16 = 6, 574 BIFROST_CONV_UNK7 = 7 575 }; 576 577 /* i16 to x32 */ 578 #define BIFROST_CONVERT_4(is_unsigned, component, to_float) \ 579 ((is_unsigned & 1) | ((component & 1) << 1) | ((to_float & 1) << 2) | \ 580 ((0x3) << 3) | ((4) << 5) | 0x100) 581 582 /* f16 to f32 */ 583 #define BIFROST_CONVERT_5(component) \ 584 ((component & 1) | ((1) << 1) | ((5) << 5) | 0x100) 585 586 /* Other conversions */ 587 #define BIFROST_CONVERT(is_unsigned, roundmode, swizzle, mode) \ 588 ((is_unsigned & 1) | ((roundmode & 3) << 1) | ((swizzle & 3) << 3) | ((mode & 7) << 5)) 589 590 #define BIFROST_FMA_CONVERT (0xe0000) 591 #define BIFROST_ADD_CONVERT (0x07800) 592 593 enum bifrost_ldst_type { 594 BIFROST_LDST_F16 = 0, 595 BIFROST_LDST_F32 = 1, 596 BIFROST_LDST_I32 = 2, 597 BIFROST_LDST_U32 = 3 598 }; 599 600 #define BIFROST_ADD_OP_LD_VAR_ADDR (0x18000 >> 10) 601 602 struct bifrost_ld_var_addr { 603 unsigned src0 : 3; 604 unsigned src1 : 3; 605 unsigned location : 5; 606 enum bifrost_ldst_type type : 2; 607 unsigned op : 7; 608 } __attribute__((packed)); 609 610 #define BIFROST_ADD_OP_LD_ATTR (0x08000 >> 12) 611 612 struct bifrost_ld_attr { 613 unsigned src0 : 3; 614 unsigned src1 : 3; 615 unsigned location : 5; 616 unsigned channels : 2; /* MALI_POSITIVE */ 617 enum bifrost_ldst_type type : 2; 618 unsigned op : 5; 619 } __attribute__((packed)); 620 621 enum bifrost_interp_mode { 622 BIFROST_INTERP_PER_FRAG = 0x0, 623 BIFROST_INTERP_CENTROID = 0x1, 624 BIFROST_INTERP_DEFAULT = 0x2, 625 BIFROST_INTERP_EXPLICIT = 0x3 626 }; 627 628 #define BIFROST_ADD_OP_LD_VAR_16 (0x1a << 1) 629 #define BIFROST_ADD_OP_LD_VAR_32 (0x0a << 1) 630 631 /* Fixed location for gl_FragCoord.zw */ 632 #define BIFROST_FRAGZ (23) 633 #define BIFROST_FRAGW (22) 634 635 struct bifrost_ld_var { 636 unsigned src0 : 3; 637 638 /* If top two bits set, indirect with src in bottom three */ 639 unsigned addr : 5; 640 641 unsigned channels : 2; /* MALI_POSITIVE */ 642 enum bifrost_interp_mode interp_mode : 2; 643 unsigned reuse : 1; 644 unsigned flat : 1; 645 unsigned op : 6; 646 } __attribute__((packed)); 647 648 struct bifrost_tex_ctrl { 649 unsigned sampler_index : 4; // also used to signal indirects 650 unsigned tex_index : 7; 651 bool no_merge_index : 1; // whether to merge (direct) sampler & texture indices 652 bool filter : 1; // use the usual filtering pipeline (0 for texelFetch & textureGather) 653 unsigned unk0 : 2; 654 bool texel_offset : 1; // *Offset() 655 bool is_shadow : 1; 656 bool is_array : 1; 657 unsigned tex_type : 2; // 2D, 3D, Cube, Buffer 658 bool compute_lod : 1; // 0 for *Lod() 659 bool not_supply_lod : 1; // 0 for *Lod() or when a bias is applied 660 bool calc_gradients : 1; // 0 for *Grad() 661 unsigned unk1 : 1; 662 unsigned result_type : 4; // integer, unsigned, float TODO: why is this 4 bits? 663 unsigned unk2 : 4; 664 } __attribute__((packed)); 665 666 struct bifrost_dual_tex_ctrl { 667 unsigned sampler_index0 : 2; 668 unsigned unk0 : 2; 669 unsigned tex_index0 : 2; 670 unsigned sampler_index1 : 2; 671 unsigned tex_index1 : 2; 672 unsigned unk1 : 22; 673 } __attribute__((packed)); 674 675 #define BIFROST_ADD_OP_TEX_COMPACT_F32(vtx) ((0x0b000 | ((vtx) ? (0x400) : (0))) >> 10) 676 #define BIFROST_ADD_OP_TEX_COMPACT_F16(vtx) ((0x1b000 | ((vtx) ? (0x400) : (0))) >> 10) 677 678 struct bifrost_tex_compact { 679 unsigned src0 : 3; 680 unsigned src1 : 3; 681 unsigned tex_index : 3; 682 unsigned compute_lod : 1; 683 unsigned sampler_index : 3; 684 unsigned op : 7; 685 } __attribute__((packed)); 686 687 enum branch_bit_size { 688 BR_SIZE_32 = 0, 689 BR_SIZE_16XX = 1, 690 BR_SIZE_16YY = 2, 691 // For the above combinations of bitsize and location, an extra bit is 692 // encoded via comparing the sources. The only possible source of ambiguity 693 // would be if the sources were the same, but then the branch condition 694 // would be always true or always false anyways, so we can ignore it. But 695 // this no longer works when comparing the y component to the x component, 696 // since it's valid to compare the y component of a source against its own 697 // x component. Instead, the extra bit is encoded via an extra bitsize. 698 BR_SIZE_16YX0 = 3, 699 BR_SIZE_16YX1 = 4, 700 BR_SIZE_32_AND_16X = 5, 701 BR_SIZE_32_AND_16Y = 6, 702 // Used for comparisons with zero and always-true, see below. I think this 703 // only works for integer comparisons. 704 BR_SIZE_ZERO = 7, 705 }; 706 707 enum bifrost_reg_write_unit { 708 REG_WRITE_NONE = 0, // don't write 709 REG_WRITE_TWO, // write using reg2 710 REG_WRITE_THREE, // write using reg3 711 }; 712 713 struct bifrost_regs { 714 unsigned uniform_const : 8; 715 unsigned reg2 : 6; 716 unsigned reg3 : 6; 717 unsigned reg0 : 5; 718 unsigned reg1 : 6; 719 unsigned ctrl : 4; 720 } __attribute__((packed)); 721 722 enum bifrost_branch_cond { 723 BR_COND_LT = 0, 724 BR_COND_LE = 1, 725 BR_COND_GE = 2, 726 BR_COND_GT = 3, 727 // Equal vs. not-equal determined by src0/src1 comparison 728 BR_COND_EQ = 4, 729 // floating-point comparisons 730 // Becomes UNE when you flip the arguments 731 BR_COND_OEQ = 5, 732 // TODO what happens when you flip the arguments? 733 BR_COND_OGT = 6, 734 BR_COND_OLT = 7, 735 }; 736 737 enum bifrost_branch_code { 738 BR_ALWAYS = 63, 739 }; 740 741 #define BIFROST_ADD_OP_BRANCH (0x0d000 >> 12) 742 743 struct bifrost_branch { 744 unsigned src0 : 3; 745 746 /* For BR_SIZE_ZERO, upper two bits become ctrl */ 747 unsigned src1 : 3; 748 749 /* Offset source -- always uniform/const but 750 * theoretically could support indirect jumps? */ 751 unsigned src2 : 3; 752 753 enum bifrost_branch_cond cond : 3; 754 enum branch_bit_size size : 3; 755 756 unsigned op : 5; 757 }; 758 759 /* Clause packing */ 760 761 #define BIFROST_FMA_NOP (0x701960 | BIFROST_SRC_STAGE) 762 #define BIFROST_ADD_NOP (0x3D960 | BIFROST_SRC_STAGE) 763 764 struct bifrost_fmt1 { 765 unsigned ins_0 : 3; 766 unsigned tag : 5; 767 uint64_t ins_1 : 64; 768 unsigned ins_2 : 11; 769 uint64_t header : 45; 770 } __attribute__((packed)); 771 772 #define BIFROST_FMT1_INSTRUCTIONS 0b00101 773 #define BIFROST_FMT1_FINAL 0b01001 774 #define BIFROST_FMT1_CONSTANTS 0b00001 775 776 #define BIFROST_FMTC_CONSTANTS 0b0011 777 #define BIFROST_FMTC_FINAL 0b0111 778 779 struct bifrost_fmt_constant { 780 unsigned pos : 4; 781 unsigned tag : 4; 782 uint64_t imm_1 : 60; 783 uint64_t imm_2 : 60; 784 } __attribute__((packed)); 785 786 enum bifrost_reg_control { 787 BIFROST_WRITE_FMA_P2 = 1, 788 BIFROST_WRITE_FMA_P2_READ_P3 = 2, 789 BIFROST_FIRST_WRITE_FMA_P2_READ_P3 = 3, 790 BIFROST_READ_P3 = 4, 791 BIFROST_WRITE_ADD_P2 = 5, 792 BIFROST_WRITE_ADD_P2_READ_P3 = 6, 793 BIFROST_WRITE_ADD_P2_FMA_P3 = 7, 794 795 BIFROST_FIRST_NONE = 8, 796 BIFROST_FIRST_WRITE_FMA_P2 = 9, 797 /* INSTR_INVALID_ENC */ 798 BIFROST_REG_NONE = 11, 799 BIFROST_FIRST_READ_P3 = 12, 800 BIFROST_FIRST_WRITE_ADD_P2 = 13, 801 BIFROST_FIRST_WRITE_ADD_P2_READ_P3 = 14, 802 BIFROST_FIRST_WRITE_ADD_P2_FMA_P3 = 15 803 }; 804 805 #endif 806