1 /*
2 * Copyright (C) 2019-2020 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef __BI_QUIRKS_H
25 #define __BI_QUIRKS_H
26
27 /* Model-specific quirks requiring compiler workarounds/etc. Quirks
28 * may be errata requiring a workaround, or features. We're trying to be
29 * quirk-positive here; quirky is the best! */
30
31 /* Whether this GPU lacks support for the preload mechanism. New GPUs can have
32 * varyings and textures preloaded into the fragment shader to amortize the I/O
33 * cost; early Bifrost models lacked this feature. */
34
35 #define BIFROST_NO_PRELOAD (1 << 0)
36
37 /* Whether this GPU lacks support for fp32 transcendentals, requiring backend
38 * lowering to low-precision lookup tables and polynomial approximation */
39
40 #define BIFROST_NO_FP32_TRANSCENDENTALS (1 << 1)
41
42 /* Whether this GPU lacks support for the full form of the CLPER instruction.
43 * These GPUs use a simple encoding of CLPER that does not support
44 * inactive_result, subgroup_size, or lane_op. Using those features requires
45 * lowering to additional ALU instructions. The encoding forces inactive_result
46 * = zero, subgroup_size = subgroup4, and lane_op = none. */
47
48 #define BIFROST_LIMITED_CLPER (1 << 2)
49
50 static inline unsigned
bifrost_get_quirks(unsigned product_id)51 bifrost_get_quirks(unsigned product_id)
52 {
53 switch (product_id >> 8) {
54 case 0x60:
55 return BIFROST_NO_PRELOAD | BIFROST_NO_FP32_TRANSCENDENTALS |
56 BIFROST_LIMITED_CLPER;
57 case 0x62:
58 return BIFROST_NO_PRELOAD | BIFROST_LIMITED_CLPER;
59 case 0x70: /* G31 */
60 return BIFROST_LIMITED_CLPER;
61 case 0x71:
62 case 0x72:
63 case 0x73:
64 case 0x74:
65 return 0;
66 case 0x90:
67 case 0x91:
68 case 0x92:
69 case 0x93:
70 case 0x94:
71 case 0x95:
72 return BIFROST_NO_PRELOAD;
73 default:
74 unreachable("Unknown Bifrost/Valhall GPU ID");
75 }
76 }
77
78 /* How many lanes per architectural warp (subgroup)? Used to lower divergent
79 * indirects. */
80
81 static inline unsigned
bifrost_lanes_per_warp(unsigned product_id)82 bifrost_lanes_per_warp(unsigned product_id)
83 {
84 switch (product_id >> 12) {
85 case 6: return 4;
86 case 7: return 8;
87 case 9: return 16;
88 default: unreachable("Invalid Bifrost/Valhall GPU major");
89 }
90 }
91
92 #endif
93