1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 template <typename S>
textureLinearPackedRGBA8(S sampler,ivec2 i,int zoffset)6 static PackedRGBA8 textureLinearPackedRGBA8(S sampler, ivec2 i, int zoffset) {
7 assert(sampler->format == TextureFormat::RGBA8);
8 ivec2 frac = i & 0x7F;
9 i >>= 7;
10
11 I32 row0 = clampCoord(i.x, sampler->width) +
12 clampCoord(i.y, sampler->height) * sampler->stride + zoffset;
13 I32 row1 = row0 + ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
14 I32(sampler->stride));
15 I16 fracx =
16 CONVERT(frac.x & (i.x >= 0 && i.x < int32_t(sampler->width) - 1), I16);
17 I16 fracy = CONVERT(frac.y, I16);
18
19 auto a0 =
20 CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row0.x]), V8<int16_t>);
21 auto a1 =
22 CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row1.x]), V8<int16_t>);
23 a0 += ((a1 - a0) * fracy.x) >> 7;
24
25 auto b0 =
26 CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row0.y]), V8<int16_t>);
27 auto b1 =
28 CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row1.y]), V8<int16_t>);
29 b0 += ((b1 - b0) * fracy.y) >> 7;
30
31 auto abl = combine(lowHalf(a0), lowHalf(b0));
32 auto abh = combine(highHalf(a0), highHalf(b0));
33 abl += ((abh - abl) * fracx.xxxxyyyy) >> 7;
34
35 auto c0 =
36 CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row0.z]), V8<int16_t>);
37 auto c1 =
38 CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row1.z]), V8<int16_t>);
39 c0 += ((c1 - c0) * fracy.z) >> 7;
40
41 auto d0 =
42 CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row0.w]), V8<int16_t>);
43 auto d1 =
44 CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row1.w]), V8<int16_t>);
45 d0 += ((d1 - d0) * fracy.w) >> 7;
46
47 auto cdl = combine(lowHalf(c0), lowHalf(d0));
48 auto cdh = combine(highHalf(c0), highHalf(d0));
49 cdl += ((cdh - cdl) * fracx.zzzzwwww) >> 7;
50
51 return pack(combine(HalfRGBA8(abl), HalfRGBA8(cdl)));
52 }
53
54 template <typename S>
textureLinearCommit4(S sampler,ivec2 i,int zoffset,uint32_t * buf)55 static inline void textureLinearCommit4(S sampler, ivec2 i, int zoffset,
56 uint32_t* buf) {
57 commit_span(buf, textureLinearPackedRGBA8(sampler, i, zoffset));
58 }
59
60 template <typename S>
textureLinearCommit8(S sampler,ivec2_scalar i,int zoffset,uint32_t * buf)61 static void textureLinearCommit8(S sampler, ivec2_scalar i, int zoffset,
62 uint32_t* buf) {
63 assert(sampler->format == TextureFormat::RGBA8);
64 ivec2_scalar frac = i & 0x7F;
65 i >>= 7;
66
67 uint32_t* row0 =
68 &sampler
69 ->buf[clampCoord(i.x, sampler->width) +
70 clampCoord(i.y, sampler->height) * sampler->stride + zoffset];
71 uint32_t* row1 =
72 row0 +
73 ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) ? sampler->stride : 0);
74 int16_t fracx = i.x >= 0 && i.x < int32_t(sampler->width) - 1 ? frac.x : 0;
75 int16_t fracy = frac.y;
76
77 U32 pix0 = unaligned_load<U32>(row0);
78 U32 pix0n = unaligned_load<U32>(row0 + 4);
79 uint32_t pix0x = row0[8];
80 U32 pix1 = unaligned_load<U32>(row1);
81 U32 pix1n = unaligned_load<U32>(row1 + 4);
82 uint32_t pix1x = row1[8];
83
84 {
85 auto ab0 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0, pix0, 0, 1, 1, 2)),
86 V16<int16_t>);
87 auto ab1 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1, pix1, 0, 1, 1, 2)),
88 V16<int16_t>);
89 ab0 += ((ab1 - ab0) * fracy) >> 7;
90
91 auto cd0 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0, pix0n, 2, 3, 3, 4)),
92 V16<int16_t>);
93 auto cd1 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1, pix1n, 2, 3, 3, 4)),
94 V16<int16_t>);
95 cd0 += ((cd1 - cd0) * fracy) >> 7;
96
97 auto abcdl = combine(lowHalf(ab0), lowHalf(cd0));
98 auto abcdh = combine(highHalf(ab0), highHalf(cd0));
99 abcdl += ((abcdh - abcdl) * fracx) >> 7;
100
101 commit_span(buf, pack(WideRGBA8(abcdl)));
102 }
103
104 {
105 auto ab0 =
106 CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0n, pix0n, 0, 1, 1, 2)),
107 V16<int16_t>);
108 auto ab1 =
109 CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1n, pix1n, 0, 1, 1, 2)),
110 V16<int16_t>);
111 ab0 += ((ab1 - ab0) * fracy) >> 7;
112
113 auto cd0 =
114 CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0n, U32(pix0x), 2, 3, 3, 4)),
115 V16<int16_t>);
116 auto cd1 =
117 CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1n, U32(pix1x), 2, 3, 3, 4)),
118 V16<int16_t>);
119 cd0 += ((cd1 - cd0) * fracy) >> 7;
120
121 auto abcdl = combine(lowHalf(ab0), lowHalf(cd0));
122 auto abcdh = combine(highHalf(ab0), highHalf(cd0));
123 abcdl += ((abcdh - abcdl) * fracx) >> 7;
124
125 commit_span(buf + 4, pack(WideRGBA8(abcdl)));
126 }
127 }
128