1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 template <typename S>
textureLinearPackedRGBA8(S sampler,ivec2 i,int zoffset)6 static PackedRGBA8 textureLinearPackedRGBA8(S sampler, ivec2 i, int zoffset) {
7   assert(sampler->format == TextureFormat::RGBA8);
8   ivec2 frac = i & 0x7F;
9   i >>= 7;
10 
11   I32 row0 = clampCoord(i.x, sampler->width) +
12              clampCoord(i.y, sampler->height) * sampler->stride + zoffset;
13   I32 row1 = row0 + ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
14                      I32(sampler->stride));
15   I16 fracx =
16       CONVERT(frac.x & (i.x >= 0 && i.x < int32_t(sampler->width) - 1), I16);
17   I16 fracy = CONVERT(frac.y, I16);
18 
19   auto a0 =
20       CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row0.x]), V8<int16_t>);
21   auto a1 =
22       CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row1.x]), V8<int16_t>);
23   a0 += ((a1 - a0) * fracy.x) >> 7;
24 
25   auto b0 =
26       CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row0.y]), V8<int16_t>);
27   auto b1 =
28       CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row1.y]), V8<int16_t>);
29   b0 += ((b1 - b0) * fracy.y) >> 7;
30 
31   auto abl = combine(lowHalf(a0), lowHalf(b0));
32   auto abh = combine(highHalf(a0), highHalf(b0));
33   abl += ((abh - abl) * fracx.xxxxyyyy) >> 7;
34 
35   auto c0 =
36       CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row0.z]), V8<int16_t>);
37   auto c1 =
38       CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row1.z]), V8<int16_t>);
39   c0 += ((c1 - c0) * fracy.z) >> 7;
40 
41   auto d0 =
42       CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row0.w]), V8<int16_t>);
43   auto d1 =
44       CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row1.w]), V8<int16_t>);
45   d0 += ((d1 - d0) * fracy.w) >> 7;
46 
47   auto cdl = combine(lowHalf(c0), lowHalf(d0));
48   auto cdh = combine(highHalf(c0), highHalf(d0));
49   cdl += ((cdh - cdl) * fracx.zzzzwwww) >> 7;
50 
51   return pack(combine(HalfRGBA8(abl), HalfRGBA8(cdl)));
52 }
53 
54 template <typename S>
textureLinearCommit4(S sampler,ivec2 i,int zoffset,uint32_t * buf)55 static inline void textureLinearCommit4(S sampler, ivec2 i, int zoffset,
56                                         uint32_t* buf) {
57   commit_span(buf, textureLinearPackedRGBA8(sampler, i, zoffset));
58 }
59 
60 template <typename S>
textureLinearCommit8(S sampler,ivec2_scalar i,int zoffset,uint32_t * buf)61 static void textureLinearCommit8(S sampler, ivec2_scalar i, int zoffset,
62                                  uint32_t* buf) {
63   assert(sampler->format == TextureFormat::RGBA8);
64   ivec2_scalar frac = i & 0x7F;
65   i >>= 7;
66 
67   uint32_t* row0 =
68       &sampler
69            ->buf[clampCoord(i.x, sampler->width) +
70                  clampCoord(i.y, sampler->height) * sampler->stride + zoffset];
71   uint32_t* row1 =
72       row0 +
73       ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) ? sampler->stride : 0);
74   int16_t fracx = i.x >= 0 && i.x < int32_t(sampler->width) - 1 ? frac.x : 0;
75   int16_t fracy = frac.y;
76 
77   U32 pix0 = unaligned_load<U32>(row0);
78   U32 pix0n = unaligned_load<U32>(row0 + 4);
79   uint32_t pix0x = row0[8];
80   U32 pix1 = unaligned_load<U32>(row1);
81   U32 pix1n = unaligned_load<U32>(row1 + 4);
82   uint32_t pix1x = row1[8];
83 
84   {
85     auto ab0 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0, pix0, 0, 1, 1, 2)),
86                        V16<int16_t>);
87     auto ab1 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1, pix1, 0, 1, 1, 2)),
88                        V16<int16_t>);
89     ab0 += ((ab1 - ab0) * fracy) >> 7;
90 
91     auto cd0 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0, pix0n, 2, 3, 3, 4)),
92                        V16<int16_t>);
93     auto cd1 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1, pix1n, 2, 3, 3, 4)),
94                        V16<int16_t>);
95     cd0 += ((cd1 - cd0) * fracy) >> 7;
96 
97     auto abcdl = combine(lowHalf(ab0), lowHalf(cd0));
98     auto abcdh = combine(highHalf(ab0), highHalf(cd0));
99     abcdl += ((abcdh - abcdl) * fracx) >> 7;
100 
101     commit_span(buf, pack(WideRGBA8(abcdl)));
102   }
103 
104   {
105     auto ab0 =
106         CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0n, pix0n, 0, 1, 1, 2)),
107                 V16<int16_t>);
108     auto ab1 =
109         CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1n, pix1n, 0, 1, 1, 2)),
110                 V16<int16_t>);
111     ab0 += ((ab1 - ab0) * fracy) >> 7;
112 
113     auto cd0 =
114         CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0n, U32(pix0x), 2, 3, 3, 4)),
115                 V16<int16_t>);
116     auto cd1 =
117         CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1n, U32(pix1x), 2, 3, 3, 4)),
118                 V16<int16_t>);
119     cd0 += ((cd1 - cd0) * fracy) >> 7;
120 
121     auto abcdl = combine(lowHalf(ab0), lowHalf(cd0));
122     auto abcdh = combine(highHalf(ab0), highHalf(cd0));
123     abcdl += ((abcdh - abcdl) * fracx) >> 7;
124 
125     commit_span(buf + 4, pack(WideRGBA8(abcdl)));
126   }
127 }
128