1 // Copyright 2008-present Contributors to the OpenImageIO project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/OpenImageIO/oiio/blob/master/LICENSE.md
4 
5 
6 #include <cmath>
7 #include <list>
8 #include <sstream>
9 #include <string>
10 
11 #include <OpenImageIO/dassert.h>
12 #include <OpenImageIO/fmath.h>
13 #include <OpenImageIO/imagecache.h>
14 #include <OpenImageIO/imageio.h>
15 #include <OpenImageIO/strutil.h>
16 #include <OpenImageIO/texture.h>
17 #include <OpenImageIO/typedesc.h>
18 #include <OpenImageIO/ustring.h>
19 #include <OpenImageIO/varyingref.h>
20 
21 #include "../field3d.imageio/field3d_backdoor.h"
22 #include "imagecache_pvt.h"
23 #include "texture_pvt.h"
24 
25 OIIO_NAMESPACE_BEGIN
26 using namespace pvt;
27 using namespace f3dpvt;
28 
29 namespace {  // anonymous
30 
31 static EightBitConverter<float> uchar2float;
32 static ustring s_field3d("field3d");
33 
34 // OIIO_FORCEINLINE float uchar2float (unsigned char val) {
35 //     return float(val) * (1.0f/255.0f);
36 // }
37 
38 OIIO_FORCEINLINE float
ushort2float(unsigned short val)39 ushort2float(unsigned short val)
40 {
41     return float(val) * (1.0f / 65535.0f);
42 }
43 
44 OIIO_FORCEINLINE float
half2float(half val)45 half2float(half val)
46 {
47     return float(val);
48 }
49 
50 
51 }  // end anonymous namespace
52 
53 namespace pvt {  // namespace pvt
54 
55 
56 
57 bool
texture3d(ustring filename,TextureOpt & options,const Imath::V3f & P,const Imath::V3f & dPdx,const Imath::V3f & dPdy,const Imath::V3f & dPdz,int nchannels,float * result,float * dresultds,float * dresultdt,float * dresultdr)58 TextureSystemImpl::texture3d(ustring filename, TextureOpt& options,
59                              const Imath::V3f& P, const Imath::V3f& dPdx,
60                              const Imath::V3f& dPdy, const Imath::V3f& dPdz,
61                              int nchannels, float* result, float* dresultds,
62                              float* dresultdt, float* dresultdr)
63 {
64     PerThreadInfo* thread_info = m_imagecache->get_perthread_info();
65     TextureFile* texturefile   = find_texturefile(filename, thread_info);
66     return texture3d((TextureHandle*)texturefile, (Perthread*)thread_info,
67                      options, P, dPdx, dPdy, dPdz, nchannels, result, dresultds,
68                      dresultdt, dresultdr);
69 }
70 
71 
72 
73 bool
texture3d(TextureHandle * texture_handle_,Perthread * thread_info_,TextureOpt & options,const Imath::V3f & P,const Imath::V3f & dPdx,const Imath::V3f & dPdy,const Imath::V3f & dPdz,int nchannels,float * result,float * dresultds,float * dresultdt,float * dresultdr)74 TextureSystemImpl::texture3d(TextureHandle* texture_handle_,
75                              Perthread* thread_info_, TextureOpt& options,
76                              const Imath::V3f& P, const Imath::V3f& dPdx,
77                              const Imath::V3f& dPdy, const Imath::V3f& dPdz,
78                              int nchannels, float* result, float* dresultds,
79                              float* dresultdt, float* dresultdr)
80 {
81     // Handle >4 channel lookups by recursion.
82     if (nchannels > 4) {
83         int save_firstchannel = options.firstchannel;
84         while (nchannels) {
85             int n   = std::min(nchannels, 4);
86             bool ok = texture3d(texture_handle_, thread_info_, options, P, dPdx,
87                                 dPdy, dPdz, n, result, dresultds, dresultdt,
88                                 dresultdr);
89             if (!ok)
90                 return false;
91             result += n;
92             if (dresultds)
93                 dresultds += n;
94             if (dresultdt)
95                 dresultdt += n;
96             if (dresultdr)
97                 dresultdr += n;
98             options.firstchannel += n;
99             nchannels -= n;
100         }
101         options.firstchannel = save_firstchannel;  // restore what we changed
102         return true;
103     }
104 
105 #if 0
106     // FIXME: currently, no support of actual MIPmapping.  No rush,
107     // since the only volume format we currently support, Field3D,
108     // doesn't support MIPmapping.
109     static const texture3d_lookup_prototype lookup_functions[] = {
110         // Must be in the same order as Mipmode enum
111         &TextureSystemImpl::texture3d_lookup,
112         &TextureSystemImpl::texture3d_lookup_nomip,
113         &TextureSystemImpl::texture3d_lookup_trilinear_mipmap,
114         &TextureSystemImpl::texture3d_lookup_trilinear_mipmap,
115         &TextureSystemImpl::texture3d_lookup
116     };
117     texture3d_lookup_prototype lookup = lookup_functions[(int)options.mipmode];
118 #else
119     texture3d_lookup_prototype lookup
120         = &TextureSystemImpl::texture3d_lookup_nomip;
121 #endif
122 
123     PerThreadInfo* thread_info = m_imagecache->get_perthread_info(
124         (PerThreadInfo*)thread_info_);
125     TextureFile* texturefile = verify_texturefile((TextureFile*)texture_handle_,
126                                                   thread_info);
127     ImageCacheStatistics& stats(thread_info->m_stats);
128     ++stats.texture3d_batches;
129     ++stats.texture3d_queries;
130 
131     if (!texturefile || texturefile->broken())
132         return missing_texture(options, nchannels, result, dresultds, dresultdt,
133                                dresultdr);
134 
135     if (!options.subimagename.empty()) {
136         // If subimage was specified by name, figure out its index.
137         int s = m_imagecache->subimage_from_name(texturefile,
138                                                  options.subimagename);
139         if (s < 0) {
140             error("Unknown subimage \"{}\" in texture \"{}\"",
141                   options.subimagename, texturefile->filename());
142             return missing_texture(options, nchannels, result, dresultds,
143                                    dresultdt, dresultdr);
144         }
145         options.subimage = s;
146         options.subimagename.clear();
147     }
148     if (options.subimage < 0 || options.subimage >= texturefile->subimages()) {
149         error("Unknown subimage \"{}\" in texture \"{}\"", options.subimagename,
150               texturefile->filename());
151         return missing_texture(options, nchannels, result, dresultds, dresultdt,
152                                dresultdr);
153     }
154 
155     const ImageSpec& spec(texturefile->spec(options.subimage, 0));
156 
157     // Figure out the wrap functions
158     if (options.swrap == TextureOpt::WrapDefault)
159         options.swrap = (TextureOpt::Wrap)texturefile->swrap();
160     if (options.swrap == TextureOpt::WrapPeriodic && ispow2(spec.width))
161         options.swrap = TextureOpt::WrapPeriodicPow2;
162     if (options.twrap == TextureOpt::WrapDefault)
163         options.twrap = (TextureOpt::Wrap)texturefile->twrap();
164     if (options.twrap == TextureOpt::WrapPeriodic && ispow2(spec.height))
165         options.twrap = TextureOpt::WrapPeriodicPow2;
166     if (options.rwrap == TextureOpt::WrapDefault)
167         options.rwrap = (TextureOpt::Wrap)texturefile->rwrap();
168     if (options.rwrap == TextureOpt::WrapPeriodic && ispow2(spec.depth))
169         options.rwrap = TextureOpt::WrapPeriodicPow2;
170 
171     int actualchannels = Imath::clamp(spec.nchannels - options.firstchannel, 0,
172                                       nchannels);
173 
174     // Do the volume lookup in local space.
175     Imath::V3f Plocal;
176     const auto& si(texturefile->subimageinfo(options.subimage));
177     if (si.Mlocal) {
178         // See if there is a world-to-local transform stored in the cache
179         // entry. If so, use it to transform the input point.
180         si.Mlocal->multVecMatrix(P, Plocal);
181     } else if (texturefile->fileformat() == s_field3d) {
182         // Field3d is special -- it allows nonlinear or time-varying
183         // transforms procedurally, but we have to use a back door.
184         auto input                   = texturefile->open(thread_info);
185         Field3DInput_Interface* f3di = (Field3DInput_Interface*)input.get();
186         if (!f3di) {
187             errorf("Unable to open texture \"%s\"", texturefile->filename());
188             return false;
189         }
190         f3di->worldToLocal(P, Plocal, options.time);
191     } else {
192         // If no world-to-local matrix could be discerned, just use the
193         // input point directly.
194         Plocal = P;
195     }
196 
197     // FIXME: we don't bother with this for dPdx, dPdy, and dPdz only
198     // because we know that we don't currently filter volume lookups and
199     // therefore don't actually use the derivs.  If/when we do, we'll
200     // need to transform them into local space as well.
201 
202     bool ok = (this->*lookup)(*texturefile, thread_info, options, nchannels,
203                               actualchannels, Plocal, dPdx, dPdy, dPdz, result,
204                               dresultds, dresultdt, dresultdr);
205 
206     if (actualchannels < nchannels && options.firstchannel == 0
207         && m_gray_to_rgb)
208         fill_gray_channels(spec, nchannels, result, dresultds, dresultdt,
209                            dresultdr);
210     return ok;
211 }
212 
213 
214 
215 bool
texture3d(ustring filename,TextureOptions & options,Runflag * runflags,int beginactive,int endactive,VaryingRef<Imath::V3f> P,VaryingRef<Imath::V3f> dPdx,VaryingRef<Imath::V3f> dPdy,VaryingRef<Imath::V3f> dPdz,int nchannels,float * result,float * dresultds,float * dresultdt,float * dresultdr)216 TextureSystemImpl::texture3d(ustring filename, TextureOptions& options,
217                              Runflag* runflags, int beginactive, int endactive,
218                              VaryingRef<Imath::V3f> P,
219                              VaryingRef<Imath::V3f> dPdx,
220                              VaryingRef<Imath::V3f> dPdy,
221                              VaryingRef<Imath::V3f> dPdz, int nchannels,
222                              float* result, float* dresultds, float* dresultdt,
223                              float* dresultdr)
224 {
225     Perthread* thread_info        = get_perthread_info();
226     TextureHandle* texture_handle = get_texture_handle(filename, thread_info);
227     return texture3d(texture_handle, thread_info, options, runflags,
228                      beginactive, endactive, P, dPdx, dPdy, dPdz, nchannels,
229                      result, dresultds, dresultdt, dresultdr);
230 }
231 
232 
233 
234 bool
texture3d(TextureHandle * texture_handle,Perthread * thread_info,TextureOptions & options,Runflag * runflags,int beginactive,int endactive,VaryingRef<Imath::V3f> P,VaryingRef<Imath::V3f> dPdx,VaryingRef<Imath::V3f> dPdy,VaryingRef<Imath::V3f> dPdz,int nchannels,float * result,float * dresultds,float * dresultdt,float * dresultdr)235 TextureSystemImpl::texture3d(
236     TextureHandle* texture_handle, Perthread* thread_info,
237     TextureOptions& options, Runflag* runflags, int beginactive, int endactive,
238     VaryingRef<Imath::V3f> P, VaryingRef<Imath::V3f> dPdx,
239     VaryingRef<Imath::V3f> dPdy, VaryingRef<Imath::V3f> dPdz, int nchannels,
240     float* result, float* dresultds, float* dresultdt, float* dresultdr)
241 {
242     bool ok = true;
243     result += beginactive * nchannels;
244     if (dresultds) {
245         dresultds += beginactive * nchannels;
246         dresultdt += beginactive * nchannels;
247     }
248     for (int i = beginactive; i < endactive; ++i) {
249         if (runflags[i]) {
250             TextureOpt opt(options, i);
251             ok &= texture3d(texture_handle, thread_info, opt, P[i], dPdx[i],
252                             dPdy[i], dPdz[i], 4, result, dresultds, dresultdt,
253                             dresultdr);
254         }
255         result += nchannels;
256         if (dresultds) {
257             dresultds += nchannels;
258             dresultdt += nchannels;
259             dresultdr += nchannels;
260         }
261     }
262     return ok;
263 }
264 
265 
266 
267 bool
texture3d_lookup_nomip(TextureFile & texturefile,PerThreadInfo * thread_info,TextureOpt & options,int nchannels_result,int actualchannels,const Imath::V3f & P,const Imath::V3f &,const Imath::V3f &,const Imath::V3f &,float * result,float * dresultds,float * dresultdt,float * dresultdr)268 TextureSystemImpl::texture3d_lookup_nomip(
269     TextureFile& texturefile, PerThreadInfo* thread_info, TextureOpt& options,
270     int nchannels_result, int actualchannels, const Imath::V3f& P,
271     const Imath::V3f& /*dPdx*/, const Imath::V3f& /*dPdy*/,
272     const Imath::V3f& /*dPdz*/, float* result, float* dresultds,
273     float* dresultdt, float* dresultdr)
274 {
275     // Initialize results to 0.  We'll add from here on as we sample.
276     for (int c = 0; c < nchannels_result; ++c)
277         result[c] = 0;
278     if (dresultds) {
279         OIIO_DASSERT(dresultdt && dresultdr);
280         for (int c = 0; c < nchannels_result; ++c)
281             dresultds[c] = 0;
282         for (int c = 0; c < nchannels_result; ++c)
283             dresultdt[c] = 0;
284         for (int c = 0; c < nchannels_result; ++c)
285             dresultdr[c] = 0;
286     }
287     // If the user only provided us with one pointer, clear all to simplify
288     // the rest of the code, but only after we zero out the data for them so
289     // they know something went wrong.
290     if (!(dresultds && dresultdt && dresultdr))
291         dresultds = dresultdt = dresultdr = NULL;
292 
293     static const accum3d_prototype accum_functions[] = {
294         // Must be in the same order as InterpMode enum
295         &TextureSystemImpl::accum3d_sample_closest,
296         &TextureSystemImpl::accum3d_sample_bilinear,
297         &TextureSystemImpl::accum3d_sample_bilinear,  // FIXME: bicubic,
298         &TextureSystemImpl::accum3d_sample_bilinear,
299     };
300     accum3d_prototype accumer = accum_functions[(int)options.interpmode];
301     bool ok = (this->*accumer)(P, 0, texturefile, thread_info, options,
302                                nchannels_result, actualchannels, 1.0f, result,
303                                dresultds, dresultdt, dresultdr);
304 
305     // Update stats
306     ImageCacheStatistics& stats(thread_info->m_stats);
307     ++stats.aniso_queries;
308     ++stats.aniso_probes;
309     switch (options.interpmode) {
310     case TextureOpt::InterpClosest: ++stats.closest_interps; break;
311     case TextureOpt::InterpBilinear: ++stats.bilinear_interps; break;
312     case TextureOpt::InterpBicubic: ++stats.cubic_interps; break;
313     case TextureOpt::InterpSmartBicubic: ++stats.bilinear_interps; break;
314     }
315     return ok;
316 }
317 
318 
319 
320 bool
accum3d_sample_closest(const Imath::V3f & P,int miplevel,TextureFile & texturefile,PerThreadInfo * thread_info,TextureOpt & options,int nchannels_result,int actualchannels,float weight,float * accum,float * daccumds,float * daccumdt,float * daccumdr)321 TextureSystemImpl::accum3d_sample_closest(
322     const Imath::V3f& P, int miplevel, TextureFile& texturefile,
323     PerThreadInfo* thread_info, TextureOpt& options, int nchannels_result,
324     int actualchannels, float weight, float* accum, float* daccumds,
325     float* daccumdt, float* daccumdr)
326 {
327     const ImageSpec& spec(texturefile.spec(options.subimage, miplevel));
328     const ImageCacheFile::LevelInfo& levelinfo(
329         texturefile.levelinfo(options.subimage, miplevel));
330     TypeDesc::BASETYPE pixeltype = texturefile.pixeltype(options.subimage);
331     // As passed in, (s,t) map the texture to (0,1).  Remap to texel coords.
332     float s = P[0] * spec.full_width + spec.full_x;
333     float t = P[1] * spec.full_height + spec.full_y;
334     float r = P[2] * spec.full_depth + spec.full_z;
335     int stex, ttex, rtex;       // Texel coordinates
336     (void)floorfrac(s, &stex);  // don't need fractional result
337     (void)floorfrac(t, &ttex);
338     (void)floorfrac(r, &rtex);
339 
340     wrap_impl swrap_func = wrap_functions[(int)options.swrap];
341     wrap_impl twrap_func = wrap_functions[(int)options.twrap];
342     wrap_impl rwrap_func = wrap_functions[(int)options.rwrap];
343     bool svalid, tvalid, rvalid;  // Valid texels?  false means black border
344     svalid = swrap_func(stex, spec.x, spec.width);
345     tvalid = twrap_func(ttex, spec.y, spec.height);
346     rvalid = rwrap_func(rtex, spec.z, spec.depth);
347     if (!levelinfo.full_pixel_range) {
348         svalid &= (stex >= spec.x
349                    && stex < (spec.x + spec.width));  // data window
350         tvalid &= (ttex >= spec.y && ttex < (spec.y + spec.height));
351         rvalid &= (rtex >= spec.z && rtex < (spec.z + spec.depth));
352     }
353     if (!(svalid & tvalid & rvalid)) {
354         // All texels we need were out of range and using 'black' wrap.
355         return true;
356     }
357 
358     int tile_chbegin = 0, tile_chend = spec.nchannels;
359     if (spec.nchannels > m_max_tile_channels) {
360         // For files with many channels, narrow the range we cache
361         tile_chbegin = options.firstchannel;
362         tile_chend   = options.firstchannel + actualchannels;
363     }
364     int tile_s = (stex - spec.x) % spec.tile_width;
365     int tile_t = (ttex - spec.y) % spec.tile_height;
366     int tile_r = (rtex - spec.z) % spec.tile_depth;
367     TileID id(texturefile, options.subimage, miplevel, stex - tile_s,
368               ttex - tile_t, rtex - tile_r, tile_chbegin, tile_chend);
369     bool ok = find_tile(id, thread_info, true);
370     if (!ok)
371         error("{}", m_imagecache->geterror());
372     TileRef& tile(thread_info->tile);
373     if (!tile || !ok)
374         return false;
375     int tilepel = (tile_r * spec.tile_height + tile_t) * spec.tile_width
376                   + tile_s;
377     int startchan_in_tile = options.firstchannel - id.chbegin();
378     int offset            = spec.nchannels * tilepel + startchan_in_tile;
379     OIIO_DASSERT((size_t)offset < spec.nchannels * spec.tile_pixels());
380     if (pixeltype == TypeDesc::UINT8) {
381         const unsigned char* texel = tile->bytedata() + offset;
382         for (int c = 0; c < actualchannels; ++c)
383             accum[c] += weight * uchar2float(texel[c]);
384     } else if (pixeltype == TypeDesc::UINT16) {
385         const unsigned short* texel = tile->ushortdata() + offset;
386         for (int c = 0; c < actualchannels; ++c)
387             accum[c] += weight * ushort2float(texel[c]);
388     } else if (pixeltype == TypeDesc::HALF) {
389         const half* texel = tile->halfdata() + offset;
390         for (int c = 0; c < actualchannels; ++c)
391             accum[c] += weight * half2float(texel[c]);
392     } else {
393         OIIO_DASSERT(pixeltype == TypeDesc::FLOAT);
394         const float* texel = tile->floatdata() + offset;
395         for (int c = 0; c < actualchannels; ++c)
396             accum[c] += weight * texel[c];
397     }
398 
399     // Add appropriate amount of "fill" color to extra channels in
400     // non-"black"-wrapped regions.
401     if (nchannels_result > actualchannels && options.fill) {
402         float f = weight * options.fill;
403         for (int c = actualchannels; c < nchannels_result; ++c)
404             accum[c] += f;
405         if (OIIO_UNLIKELY(daccumds)) {
406             OIIO_DASSERT(daccumdt && daccumdr);
407             for (int c = actualchannels; c < nchannels_result; ++c) {
408                 daccumds[c] = 0.0f;
409                 daccumdt[c] = 0.0f;
410                 daccumdr[c] = 0.0f;
411             }
412         }
413     }
414     return true;
415 }
416 
417 
418 
419 bool
accum3d_sample_bilinear(const Imath::V3f & P,int miplevel,TextureFile & texturefile,PerThreadInfo * thread_info,TextureOpt & options,int nchannels_result,int actualchannels,float weight,float * accum,float * daccumds,float * daccumdt,float * daccumdr)420 TextureSystemImpl::accum3d_sample_bilinear(
421     const Imath::V3f& P, int miplevel, TextureFile& texturefile,
422     PerThreadInfo* thread_info, TextureOpt& options, int nchannels_result,
423     int actualchannels, float weight, float* accum, float* daccumds,
424     float* daccumdt, float* daccumdr)
425 {
426     const ImageSpec& spec(texturefile.spec(options.subimage, miplevel));
427     const ImageCacheFile::LevelInfo& levelinfo(
428         texturefile.levelinfo(options.subimage, miplevel));
429     TypeDesc::BASETYPE pixeltype = texturefile.pixeltype(options.subimage);
430     // As passed in, (s,t) map the texture to (0,1).  Remap to texel coords
431     // and subtract 0.5 because samples are at texel centers.
432     float s = P[0] * spec.full_width + spec.full_x - 0.5f;
433     float t = P[1] * spec.full_height + spec.full_y - 0.5f;
434     float r = P[2] * spec.full_depth + spec.full_z - 0.5f;
435     int sint, tint, rint;
436     float sfrac = floorfrac(s, &sint);
437     float tfrac = floorfrac(t, &tint);
438     float rfrac = floorfrac(r, &rint);
439     // Now (sint,tint,rint) are the integer coordinates of the texel to the
440     // immediate "upper left" of the lookup point, and (sfrac,tfrac,rfrac) are
441     // the amount that the lookup point is actually offset from the
442     // texel center (with (1,1) being all the way to the next texel down
443     // and to the right).
444 
445     // Wrap
446     wrap_impl swrap_func = wrap_functions[(int)options.swrap];
447     wrap_impl twrap_func = wrap_functions[(int)options.twrap];
448     wrap_impl rwrap_func = wrap_functions[(int)options.rwrap];
449 
450     int stex[2], ttex[2], rtex[2];  // Texel coords
451     stex[0] = sint;
452     stex[1] = sint + 1;
453     ttex[0] = tint;
454     ttex[1] = tint + 1;
455     rtex[0] = rint;
456     rtex[1] = rint + 1;
457     //    bool svalid[2], tvalid[2], rvalid[2];  // Valid texels?  false means black border
458     union {
459         bool bvalid[6];
460         unsigned long long ivalid;
461     } valid_storage;
462     valid_storage.ivalid = 0;
463     OIIO_DASSERT(sizeof(valid_storage) == 8);
464     const unsigned long long none_valid = 0;
465     const unsigned long long all_valid  = littleendian() ? 0x010101010101LL
466                                                          : 0x01010101010100LL;
467 
468     bool* svalid = valid_storage.bvalid;
469     bool* tvalid = valid_storage.bvalid + 2;
470     bool* rvalid = valid_storage.bvalid + 4;
471 
472     svalid[0] = swrap_func(stex[0], spec.x, spec.width);
473     svalid[1] = swrap_func(stex[1], spec.x, spec.width);
474     tvalid[0] = twrap_func(ttex[0], spec.y, spec.height);
475     tvalid[1] = twrap_func(ttex[1], spec.y, spec.height);
476     rvalid[0] = rwrap_func(rtex[0], spec.z, spec.depth);
477     rvalid[1] = rwrap_func(rtex[1], spec.z, spec.depth);
478     // Account for crop windows
479     if (!levelinfo.full_pixel_range) {
480         svalid[0] &= (stex[0] >= spec.x && stex[0] < spec.x + spec.width);
481         svalid[1] &= (stex[1] >= spec.x && stex[1] < spec.x + spec.width);
482         tvalid[0] &= (ttex[0] >= spec.y && ttex[0] < spec.y + spec.height);
483         tvalid[1] &= (ttex[1] >= spec.y && ttex[1] < spec.y + spec.height);
484         rvalid[0] &= (rtex[0] >= spec.z && rtex[0] < spec.z + spec.depth);
485         rvalid[1] &= (rtex[1] >= spec.z && rtex[1] < spec.z + spec.depth);
486     }
487     //    if (! (svalid[0] | svalid[1] | tvalid[0] | tvalid[1] | rvalid[0] | rvalid[1]))
488     if (valid_storage.ivalid == none_valid)
489         return true;  // All texels we need were out of range and using 'black' wrap
490 
491     int tilewidthmask  = spec.tile_width - 1;  // e.g. 63
492     int tileheightmask = spec.tile_height - 1;
493     int tiledepthmask  = spec.tile_depth - 1;
494     const unsigned char* texel[2][2][2];
495     TileRef savetile[2][2][2];
496     static float black[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
497     int tile_s            = (stex[0] - spec.x) % spec.tile_width;
498     int tile_t            = (ttex[0] - spec.y) % spec.tile_height;
499     int tile_r            = (rtex[0] - spec.z) % spec.tile_depth;
500     bool s_onetile     = (tile_s != tilewidthmask) & (stex[0] + 1 == stex[1]);
501     bool t_onetile     = (tile_t != tileheightmask) & (ttex[0] + 1 == ttex[1]);
502     bool r_onetile     = (tile_r != tiledepthmask) & (rtex[0] + 1 == rtex[1]);
503     bool onetile       = (s_onetile & t_onetile & r_onetile);
504     size_t channelsize = texturefile.channelsize(options.subimage);
505     size_t pixelsize   = texturefile.pixelsize(options.subimage);
506     int tile_chbegin = 0, tile_chend = spec.nchannels;
507     if (spec.nchannels > m_max_tile_channels) {
508         // For files with many channels, narrow the range we cache
509         tile_chbegin = options.firstchannel;
510         tile_chend   = options.firstchannel + actualchannels;
511     }
512     TileID id(texturefile, options.subimage, miplevel, 0, 0, 0, tile_chbegin,
513               tile_chend);
514     int startchan_in_tile = options.firstchannel - id.chbegin();
515     if (onetile && valid_storage.ivalid == all_valid) {
516         // Shortcut if all the texels we need are on the same tile
517         id.xyz(stex[0] - tile_s, ttex[0] - tile_t, rtex[0] - tile_r);
518         bool ok = find_tile(id, thread_info, true);
519         if (!ok)
520             error("{}", m_imagecache->geterror());
521         TileRef& tile(thread_info->tile);
522         if (!tile->valid())
523             return false;
524         size_t tilepel = (tile_r * spec.tile_height + tile_t) * spec.tile_width
525                          + tile_s;
526         size_t offset = (spec.nchannels * tilepel + startchan_in_tile)
527                         * channelsize;
528         OIIO_DASSERT((size_t)offset < spec.tile_width * spec.tile_height
529                                           * spec.tile_depth * pixelsize);
530 
531         const unsigned char* b = tile->bytedata() + offset;
532         texel[0][0][0]         = b;
533         texel[0][0][1]         = b + pixelsize;
534         texel[0][1][0]         = b + pixelsize * spec.tile_width;
535         texel[0][1][1]         = b + pixelsize * spec.tile_width + pixelsize;
536         b += pixelsize * spec.tile_width * spec.tile_height;
537         texel[1][0][0] = b;
538         texel[1][0][1] = b + pixelsize;
539         texel[1][1][0] = b + pixelsize * spec.tile_width;
540         texel[1][1][1] = b + pixelsize * spec.tile_width + pixelsize;
541     } else {
542         bool firstsample = true;
543         for (int k = 0; k < 2; ++k) {
544             for (int j = 0; j < 2; ++j) {
545                 for (int i = 0; i < 2; ++i) {
546                     if (!(svalid[i] && tvalid[j] && rvalid[k])) {
547                         texel[k][j][i] = (unsigned char*)black;
548                         continue;
549                     }
550                     tile_s = (stex[i] - spec.x) % spec.tile_width;
551                     tile_t = (ttex[j] - spec.y) % spec.tile_height;
552                     tile_r = (rtex[k] - spec.z) % spec.tile_depth;
553                     id.xyz(stex[i] - tile_s, ttex[j] - tile_t,
554                            rtex[k] - tile_r);
555                     bool ok = find_tile(id, thread_info, firstsample);
556                     if (!ok)
557                         error("{}", m_imagecache->geterror());
558                     firstsample = false;
559                     TileRef& tile(thread_info->tile);
560                     if (!tile->valid())
561                         return false;
562                     savetile[k][j][i] = tile;
563                     size_t tilepel    = (tile_r * spec.tile_height + tile_t)
564                                          * spec.tile_width
565                                      + tile_s;
566                     size_t offset = (spec.nchannels * tilepel
567                                      + startchan_in_tile)
568                                     * channelsize;
569 #ifndef NDEBUG
570                     if ((size_t)offset >= spec.tile_width * spec.tile_height
571                                               * spec.tile_depth * pixelsize)
572                         std::cerr << "offset=" << offset << ", whd "
573                                   << spec.tile_width << ' ' << spec.tile_height
574                                   << ' ' << spec.tile_depth << " pixsize "
575                                   << pixelsize << "\n";
576 #endif
577                     OIIO_DASSERT((size_t)offset
578                                  < spec.tile_width * spec.tile_height
579                                        * spec.tile_depth * pixelsize);
580                     texel[k][j][i] = tile->bytedata() + offset;
581                     OIIO_DASSERT(tile->id() == id);
582                 }
583             }
584         }
585     }
586     // FIXME -- optimize the above loop by unrolling
587 
588     // clang-format off
589     if (pixeltype == TypeDesc::UINT8) {
590         for (int c = 0; c < actualchannels; ++c)
591             accum[c] += weight
592                         * trilerp(uchar2float(texel[0][0][0][c]),
593                                   uchar2float(texel[0][0][1][c]),
594                                   uchar2float(texel[0][1][0][c]),
595                                   uchar2float(texel[0][1][1][c]),
596                                   uchar2float(texel[1][0][0][c]),
597                                   uchar2float(texel[1][0][1][c]),
598                                   uchar2float(texel[1][1][0][c]),
599                                   uchar2float(texel[1][1][1][c]), sfrac, tfrac,
600                                   rfrac);
601         if (daccumds) {
602             float scalex = weight * spec.full_width;
603             float scaley = weight * spec.full_height;
604             float scalez = weight * spec.full_depth;
605             for (int c = 0; c < actualchannels; ++c) {
606                 daccumds[c] += scalex
607                                * bilerp(uchar2float(texel[0][0][1][c])
608                                             - uchar2float(texel[0][0][0][c]),
609                                         uchar2float(texel[0][1][1][c])
610                                             - uchar2float(texel[0][1][0][c]),
611                                         uchar2float(texel[1][0][1][c])
612                                             - uchar2float(texel[1][0][0][c]),
613                                         uchar2float(texel[1][1][1][c])
614                                             - uchar2float(texel[1][1][0][c]),
615                                         tfrac, rfrac);
616                 daccumdt[c] += scaley
617                                * bilerp(uchar2float(texel[0][1][0][c])
618                                             - uchar2float(texel[0][0][0][c]),
619                                         uchar2float(texel[0][1][1][c])
620                                             - uchar2float(texel[0][0][1][c]),
621                                         uchar2float(texel[1][1][0][c])
622                                             - uchar2float(texel[1][0][0][c]),
623                                         uchar2float(texel[1][1][1][c])
624                                             - uchar2float(texel[1][0][1][c]),
625                                         sfrac, rfrac);
626                 daccumdr[c] += scalez
627                                * bilerp(uchar2float(texel[0][1][0][c])
628                                             - uchar2float(texel[1][1][0][c]),
629                                         uchar2float(texel[0][1][1][c])
630                                             - uchar2float(texel[1][1][1][c]),
631                                         uchar2float(texel[0][0][1][c])
632                                             - uchar2float(texel[1][0][0][c]),
633                                         uchar2float(texel[0][1][1][c])
634                                             - uchar2float(texel[1][1][1][c]),
635                                         sfrac, tfrac);
636             }
637         }
638     } else if (pixeltype == TypeDesc::UINT16) {
639         for (int c = 0; c < actualchannels; ++c)
640             accum[c]
641                 += weight
642                    * trilerp(ushort2float(((const uint16_t*)texel[0][0][0])[c]),
643                              ushort2float(((const uint16_t*)texel[0][0][1])[c]),
644                              ushort2float(((const uint16_t*)texel[0][1][0])[c]),
645                              ushort2float(((const uint16_t*)texel[0][1][1])[c]),
646                              ushort2float(((const uint16_t*)texel[1][0][0])[c]),
647                              ushort2float(((const uint16_t*)texel[1][0][1])[c]),
648                              ushort2float(((const uint16_t*)texel[1][1][0])[c]),
649                              ushort2float(((const uint16_t*)texel[1][1][1])[c]),
650                              sfrac, tfrac, rfrac);
651         if (daccumds) {
652             float scalex = weight * spec.full_width;
653             float scaley = weight * spec.full_height;
654             float scalez = weight * spec.full_depth;
655             for (int c = 0; c < actualchannels; ++c) {
656                 daccumds[c] += scalex * bilerp(
657                              ushort2float(((const uint16_t*)texel[0][0][1])[c])
658                                  - ushort2float(
659                                        ((const uint16_t*)texel[0][0][0])[c]),
660                              ushort2float(((const uint16_t*)texel[0][1][1])[c])
661                                  - ushort2float(
662                                        ((const uint16_t*)texel[0][1][0])[c]),
663                              ushort2float(((const uint16_t*)texel[1][0][1])[c])
664                                  - ushort2float(
665                                        ((const uint16_t*)texel[1][0][0])[c]),
666                              ushort2float(((const uint16_t*)texel[1][1][1])[c])
667                                  - ushort2float(
668                                        ((const uint16_t*)texel[1][1][0])[c]),
669                              tfrac, rfrac);
670                 daccumdt[c] += scaley * bilerp(
671                              ushort2float(((const uint16_t*)texel[0][1][0])[c])
672                                  - ushort2float(
673                                        ((const uint16_t*)texel[0][0][0])[c]),
674                              ushort2float(((const uint16_t*)texel[0][1][1])[c])
675                                  - ushort2float(
676                                        ((const uint16_t*)texel[0][0][1])[c]),
677                              ushort2float(((const uint16_t*)texel[1][1][0])[c])
678                                  - ushort2float(
679                                        ((const uint16_t*)texel[1][0][0])[c]),
680                              ushort2float(((const uint16_t*)texel[1][1][1])[c])
681                                  - ushort2float(
682                                        ((const uint16_t*)texel[1][0][1])[c]),
683                              sfrac, rfrac);
684                 daccumdr[c] += scalez * bilerp(
685                              ushort2float(((const uint16_t*)texel[0][1][0])[c])
686                                  - ushort2float(
687                                        ((const uint16_t*)texel[1][1][0])[c]),
688                              ushort2float(((const uint16_t*)texel[0][1][1])[c])
689                                  - ushort2float(
690                                        ((const uint16_t*)texel[1][1][1])[c]),
691                              ushort2float(((const uint16_t*)texel[0][0][1])[c])
692                                  - ushort2float(
693                                        ((const uint16_t*)texel[1][0][0])[c]),
694                              ushort2float(((const uint16_t*)texel[0][1][1])[c])
695                                  - ushort2float(
696                                        ((const uint16_t*)texel[1][1][1])[c]),
697                              sfrac, tfrac);
698             }
699         }
700     } else if (pixeltype == TypeDesc::HALF) {
701         for (int c = 0; c < actualchannels; ++c)
702             accum[c] += weight
703                         * trilerp(half2float(((const half*)texel[0][0][0])[c]),
704                                   half2float(((const half*)texel[0][0][1])[c]),
705                                   half2float(((const half*)texel[0][1][0])[c]),
706                                   half2float(((const half*)texel[0][1][1])[c]),
707                                   half2float(((const half*)texel[1][0][0])[c]),
708                                   half2float(((const half*)texel[1][0][1])[c]),
709                                   half2float(((const half*)texel[1][1][0])[c]),
710                                   half2float(((const half*)texel[1][1][1])[c]),
711                                   sfrac, tfrac, rfrac);
712         if (daccumds) {
713             float scalex = weight * spec.full_width;
714             float scaley = weight * spec.full_height;
715             float scalez = weight * spec.full_depth;
716             for (int c = 0; c < actualchannels; ++c) {
717                 daccumds[c] += scalex * bilerp(
718                              half2float(((const half*)texel[0][0][1])[c])
719                                  - half2float(((const half*)texel[0][0][0])[c]),
720                              half2float(((const half*)texel[0][1][1])[c])
721                                  - half2float(((const half*)texel[0][1][0])[c]),
722                              half2float(((const half*)texel[1][0][1])[c])
723                                  - half2float(((const half*)texel[1][0][0])[c]),
724                              half2float(((const half*)texel[1][1][1])[c])
725                                  - half2float(((const half*)texel[1][1][0])[c]),
726                              tfrac, rfrac);
727                 daccumdt[c] += scaley * bilerp(
728                              half2float(((const half*)texel[0][1][0])[c])
729                                  - half2float(((const half*)texel[0][0][0])[c]),
730                              half2float(((const half*)texel[0][1][1])[c])
731                                  - half2float(((const half*)texel[0][0][1])[c]),
732                              half2float(((const half*)texel[1][1][0])[c])
733                                  - half2float(((const half*)texel[1][0][0])[c]),
734                              half2float(((const half*)texel[1][1][1])[c])
735                                  - half2float(((const half*)texel[1][0][1])[c]),
736                              sfrac, rfrac);
737                 daccumdr[c] += scalez * bilerp(
738                              half2float(((const half*)texel[0][1][0])[c])
739                                  - half2float(((const half*)texel[1][1][0])[c]),
740                              half2float(((const half*)texel[0][1][1])[c])
741                                  - half2float(((const half*)texel[1][1][1])[c]),
742                              half2float(((const half*)texel[0][0][1])[c])
743                                  - half2float(((const half*)texel[1][0][0])[c]),
744                              half2float(((const half*)texel[0][1][1])[c])
745                                  - half2float(((const half*)texel[1][1][1])[c]),
746                              sfrac, tfrac);
747             }
748         }
749     } else {
750         // General case for float tiles
751         trilerp_mad((const float*)texel[0][0][0], (const float*)texel[0][0][1],
752                     (const float*)texel[0][1][0], (const float*)texel[0][1][1],
753                     (const float*)texel[1][0][0], (const float*)texel[1][0][1],
754                     (const float*)texel[1][1][0], (const float*)texel[1][1][1],
755                     sfrac, tfrac, rfrac, weight, actualchannels, accum);
756         if (daccumds) {
757             float scalex = weight * spec.full_width;
758             float scaley = weight * spec.full_height;
759             float scalez = weight * spec.full_depth;
760             for (int c = 0; c < actualchannels; ++c) {
761                 daccumds[c] += scalex
762                                * bilerp(((const float*)texel[0][0][1])[c]
763                                             - ((const float*)texel[0][0][0])[c],
764                                         ((const float*)texel[0][1][1])[c]
765                                             - ((const float*)texel[0][1][0])[c],
766                                         ((const float*)texel[1][0][1])[c]
767                                             - ((const float*)texel[1][0][0])[c],
768                                         ((const float*)texel[1][1][1])[c]
769                                             - ((const float*)texel[1][1][0])[c],
770                                         tfrac, rfrac);
771                 daccumdt[c] += scaley
772                                * bilerp(((const float*)texel[0][1][0])[c]
773                                             - ((const float*)texel[0][0][0])[c],
774                                         ((const float*)texel[0][1][1])[c]
775                                             - ((const float*)texel[0][0][1])[c],
776                                         ((const float*)texel[1][1][0])[c]
777                                             - ((const float*)texel[1][0][0])[c],
778                                         ((const float*)texel[1][1][1])[c]
779                                             - ((const float*)texel[1][0][1])[c],
780                                         sfrac, rfrac);
781                 daccumdr[c] += scalez
782                                * bilerp(((const float*)texel[0][1][0])[c]
783                                             - ((const float*)texel[1][1][0])[c],
784                                         ((const float*)texel[0][1][1])[c]
785                                             - ((const float*)texel[1][1][1])[c],
786                                         ((const float*)texel[0][0][1])[c]
787                                             - ((const float*)texel[1][0][0])[c],
788                                         ((const float*)texel[0][1][1])[c]
789                                             - ((const float*)texel[1][1][1])[c],
790                                         sfrac, tfrac);
791             }
792         }
793     }
794     // clang-format on
795 
796     // Add appropriate amount of "fill" color to extra channels in
797     // non-"black"-wrapped regions.
798     if (nchannels_result > actualchannels && options.fill) {
799         float f = trilerp(1.0f * (rvalid[0] * tvalid[0] * svalid[0]),
800                           1.0f * (rvalid[0] * tvalid[0] * svalid[1]),
801                           1.0f * (rvalid[0] * tvalid[1] * svalid[0]),
802                           1.0f * (rvalid[0] * tvalid[1] * svalid[1]),
803                           1.0f * (rvalid[1] * tvalid[0] * svalid[0]),
804                           1.0f * (rvalid[1] * tvalid[0] * svalid[1]),
805                           1.0f * (rvalid[1] * tvalid[1] * svalid[0]),
806                           1.0f * (rvalid[1] * tvalid[1] * svalid[1]), sfrac,
807                           tfrac, rfrac);
808         f *= weight * options.fill;
809         for (int c = actualchannels; c < nchannels_result; ++c)
810             accum[c] += f;
811     }
812     return true;
813 }
814 
815 
816 
817 bool
texture3d(TextureHandle * texture_handle,Perthread * thread_info,TextureOptBatch & options,Tex::RunMask mask,const float * P,const float * dPdx,const float * dPdy,const float * dPdz,int nchannels,float * result,float * dresultds,float * dresultdt,float * dresultdr)818 TextureSystemImpl::texture3d(TextureHandle* texture_handle,
819                              Perthread* thread_info, TextureOptBatch& options,
820                              Tex::RunMask mask, const float* P,
821                              const float* dPdx, const float* dPdy,
822                              const float* dPdz, int nchannels, float* result,
823                              float* dresultds, float* dresultdt,
824                              float* dresultdr)
825 {
826     // (FIXME) CHEAT! Texture points individually
827     TextureOpt opt;
828     opt.firstchannel        = options.firstchannel;
829     opt.subimage            = options.subimage;
830     opt.subimagename        = options.subimagename;
831     opt.swrap               = (TextureOpt::Wrap)options.swrap;
832     opt.twrap               = (TextureOpt::Wrap)options.twrap;
833     opt.mipmode             = (TextureOpt::MipMode)options.mipmode;
834     opt.interpmode          = (TextureOpt::InterpMode)options.interpmode;
835     opt.anisotropic         = options.anisotropic;
836     opt.conservative_filter = options.conservative_filter;
837     opt.fill                = options.fill;
838     opt.missingcolor        = options.missingcolor;
839     opt.rwrap               = (TextureOpt::Wrap)options.rwrap;
840 
841     bool ok          = true;
842     Tex::RunMask bit = 1;
843     for (int i = 0; i < Tex::BatchWidth; ++i, bit <<= 1) {
844         float r[4], drds[4], drdt[4], drdr[4];  // temp result
845         if (mask & bit) {
846             opt.sblur  = options.sblur[i];
847             opt.tblur  = options.tblur[i];
848             opt.rblur  = options.rblur[i];
849             opt.swidth = options.swidth[i];
850             opt.twidth = options.twidth[i];
851             opt.rwidth = options.rwidth[i];
852             Imath::V3f P_(P[i], P[i + Tex::BatchWidth],
853                           P[i + 2 * Tex::BatchWidth]);
854             Imath::V3f dPdx_(dPdx[i], dPdx[i + Tex::BatchWidth],
855                              dPdx[i + 2 * Tex::BatchWidth]);
856             Imath::V3f dPdy_(dPdy[i], dPdy[i + Tex::BatchWidth],
857                              dPdy[i + 2 * Tex::BatchWidth]);
858             Imath::V3f dPdz_(dPdz[i], dPdz[i + Tex::BatchWidth],
859                              dPdz[i + 2 * Tex::BatchWidth]);
860             if (dresultds) {
861                 ok &= texture3d(texture_handle, thread_info, opt, P_, dPdx_,
862                                 dPdy_, dPdz_, nchannels, r, drds, drdt, drdr);
863                 for (int c = 0; c < nchannels; ++c) {
864                     result[c * Tex::BatchWidth + i]    = r[c];
865                     dresultds[c * Tex::BatchWidth + i] = drds[c];
866                     dresultdt[c * Tex::BatchWidth + i] = drdt[c];
867                     dresultdr[c * Tex::BatchWidth + i] = drdr[c];
868                 }
869             } else {
870                 ok &= texture3d(texture_handle, thread_info, opt, P_, dPdx_,
871                                 dPdy_, dPdz_, nchannels, r);
872                 for (int c = 0; c < nchannels; ++c) {
873                     result[c * Tex::BatchWidth + i] = r[c];
874                 }
875             }
876         }
877     }
878     return ok;
879 }
880 
881 
882 
883 bool
texture3d(ustring filename,TextureOptBatch & options,Tex::RunMask mask,const float * P,const float * dPdx,const float * dPdy,const float * dPdz,int nchannels,float * result,float * dresultds,float * dresultdt,float * dresultdr)884 TextureSystemImpl::texture3d(ustring filename, TextureOptBatch& options,
885                              Tex::RunMask mask, const float* P,
886                              const float* dPdx, const float* dPdy,
887                              const float* dPdz, int nchannels, float* result,
888                              float* dresultds, float* dresultdt,
889                              float* dresultdr)
890 {
891     Perthread* thread_info        = get_perthread_info();
892     TextureHandle* texture_handle = get_texture_handle(filename, thread_info);
893     return texture3d(texture_handle, thread_info, options, mask, P, dPdx, dPdy,
894                      dPdz, nchannels, result, dresultds, dresultdt, dresultdr);
895 }
896 
897 
898 }  // end namespace pvt
899 
900 OIIO_NAMESPACE_END
901