1 // Copyright 2008-present Contributors to the OpenImageIO project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/OpenImageIO/oiio/blob/master/LICENSE.md
4
5
6 #include <cmath>
7 #include <cstring>
8 #include <list>
9 #include <random>
10 #include <sstream>
11 #include <string>
12
13 #include <OpenImageIO/dassert.h>
14 #include <OpenImageIO/filter.h>
15 #include <OpenImageIO/fmath.h>
16 #include <OpenImageIO/imagebuf.h>
17 #include <OpenImageIO/imagebufalgo.h>
18 #include <OpenImageIO/imagecache.h>
19 #include <OpenImageIO/imageio.h>
20 #include <OpenImageIO/optparser.h>
21 #include <OpenImageIO/simd.h>
22 #include <OpenImageIO/strutil.h>
23 #include <OpenImageIO/sysutil.h>
24 #include <OpenImageIO/texture.h>
25 #include <OpenImageIO/thread.h>
26 #include <OpenImageIO/typedesc.h>
27 #include <OpenImageIO/ustring.h>
28 #include <OpenImageIO/varyingref.h>
29
30 #include "imagecache_pvt.h"
31 #include "texture_pvt.h"
32
33 #define TEX_FAST_MATH 1
34
35
36 OIIO_NAMESPACE_BEGIN
37 using namespace pvt;
38 using namespace simd;
39
40 namespace { // anonymous
41
42 // We would like shared_texturesys to be a shared_ptr so that it is
43 // automatically deleted when the app exists, but because it contains a
44 // reference to an ImageCache, we get into the "destruction order fiasco."
45 // The only easy way to fix this is to make shared_texturesys be an ordinary
46 // pointer and just let it leak (who cares? the app is done, and it only
47 // contains a few hundred bytes).
48 static TextureSystemImpl* shared_texturesys = NULL;
49 static spin_mutex shared_texturesys_mutex;
50 static bool do_unit_test_texture = false;
51 static float unit_test_texture_blur = 0.0f;
52
53 static EightBitConverter<float> uchar2float;
54 static vfloat4 u8scale(1.0f / 255.0f);
55 static vfloat4 u16scale(1.0f / 65535.0f);
56
57 OIIO_FORCEINLINE vfloat4
uchar2float4(const unsigned char * c)58 uchar2float4(const unsigned char* c)
59 {
60 return vfloat4(c) * u8scale;
61 }
62
63
64 OIIO_FORCEINLINE vfloat4
ushort2float4(const unsigned short * s)65 ushort2float4(const unsigned short* s)
66 {
67 return vfloat4(s) * u16scale;
68 }
69
70
71 OIIO_FORCEINLINE vfloat4
half2float4(const half * h)72 half2float4(const half* h)
73 {
74 return vfloat4(h);
75 }
76
77
78 static const OIIO_SIMD4_ALIGN vbool4 channel_masks[5] = {
79 vbool4(false, false, false, false), vbool4(true, false, false, false),
80 vbool4(true, true, false, false), vbool4(true, true, true, false),
81 vbool4(true, true, true, true),
82 };
83
84 } // end anonymous namespace
85
86
87 TextureSystem*
create(bool shared,ImageCache * imagecache)88 TextureSystem::create(bool shared, ImageCache* imagecache)
89 {
90 if (shared) {
91 // They requested a shared texture system. If a shared one already
92 // exists, just return it, otherwise record the new texture system
93 // as the shared one.
94 spin_lock guard(shared_texturesys_mutex);
95 if (!shared_texturesys)
96 shared_texturesys = new TextureSystemImpl(ImageCache::create(true));
97 #if 0
98 std::cerr << " shared TextureSystem is "
99 << (void *)shared_texturesys << "\n";
100 #endif
101 return shared_texturesys;
102 }
103
104 // Doesn't need a shared cache
105 bool own_ic = false; // presume the caller owns the IC it passes
106 if (!imagecache) { // If no IC supplied, make one and we own it
107 imagecache = ImageCache::create(false);
108 own_ic = true;
109 }
110 TextureSystemImpl* ts = new TextureSystemImpl(imagecache);
111 ts->m_imagecache_owner = own_ic;
112 #if 0
113 std::cerr << "creating new ImageCache " << (void *)ts << "\n";
114 #endif
115 return ts;
116 }
117
118
119
120 void
destroy(TextureSystem * x,bool teardown_imagecache)121 TextureSystem::destroy(TextureSystem* x, bool teardown_imagecache)
122 {
123 // std::cerr << "Destroying TS " << (void *)x << "\n";
124 if (!x)
125 return;
126 TextureSystemImpl* impl = (TextureSystemImpl*)x;
127 if (teardown_imagecache) {
128 if (impl->m_imagecache_owner)
129 ImageCache::destroy(impl->m_imagecache, true);
130 impl->m_imagecache = nullptr;
131 }
132
133 spin_lock guard(shared_texturesys_mutex);
134 if (impl == shared_texturesys) {
135 // This is the shared TS, so don't really delete it.
136 } else {
137 // Not a shared cache, we are the only owner, so truly destroy it.
138 delete x;
139 }
140 }
141
142
143
144 namespace pvt { // namespace pvt
145
146
147
148 // Wrap functions wrap 'coord' around 'width', and return true if the
149 // result is a valid pixel coordinate, false if black should be used
150 // instead.
151
152
153 bool
wrap_periodic_sharedborder(int & coord,int origin,int width)154 TextureSystemImpl::wrap_periodic_sharedborder(int& coord, int origin, int width)
155 {
156 // Like periodic, but knowing that the first column and last are
157 // actually the same position, so we essentially skip the last
158 // column in the next cycle.
159 width = std::max(width, 2); // avoid %0 for width=1
160 coord -= origin;
161 coord = safe_mod(coord, width - 1);
162 if (coord < 0) // Fix negative values
163 coord += width;
164 coord += origin;
165 return true;
166 }
167
168
169 const TextureSystemImpl::wrap_impl TextureSystemImpl::wrap_functions[] = {
170 // Must be in same order as Wrap enum
171 wrap_black,
172 wrap_black,
173 wrap_clamp,
174 wrap_periodic,
175 wrap_mirror,
176 wrap_periodic_pow2,
177 wrap_periodic_sharedborder
178 };
179
180
181
182 simd::vbool4
wrap_black_simd(simd::vint4 & coord_,const simd::vint4 & origin,const simd::vint4 & width)183 wrap_black_simd(simd::vint4& coord_, const simd::vint4& origin,
184 const simd::vint4& width)
185 {
186 simd::vint4 coord(coord_);
187 return (coord >= origin) & (coord < (width + origin));
188 }
189
190
191 simd::vbool4
wrap_clamp_simd(simd::vint4 & coord_,const simd::vint4 & origin,const simd::vint4 & width)192 wrap_clamp_simd(simd::vint4& coord_, const simd::vint4& origin,
193 const simd::vint4& width)
194 {
195 simd::vint4 coord(coord_);
196 coord = simd::blend(coord, origin, coord < origin);
197 coord = simd::blend(coord, (origin + width - 1), coord >= (origin + width));
198 coord_ = coord;
199 return simd::vbool4::True();
200 }
201
202
203 simd::vbool4
wrap_periodic_simd(simd::vint4 & coord_,const simd::vint4 & origin,const simd::vint4 & width)204 wrap_periodic_simd(simd::vint4& coord_, const simd::vint4& origin,
205 const simd::vint4& width)
206 {
207 simd::vint4 coord(coord_);
208 coord = coord - origin;
209 coord = coord % width;
210 coord = simd::blend(coord, coord + width, coord < 0);
211 coord = coord + origin;
212 coord_ = coord;
213 return simd::vbool4::True();
214 }
215
216
217 simd::vbool4
wrap_periodic_pow2_simd(simd::vint4 & coord_,const simd::vint4 & origin,const simd::vint4 & width)218 wrap_periodic_pow2_simd(simd::vint4& coord_, const simd::vint4& origin,
219 const simd::vint4& width)
220 {
221 simd::vint4 coord(coord_);
222 // OIIO_DASSERT (ispow2(width));
223 coord = coord - origin;
224 coord = coord
225 & (width - 1); // Shortcut periodic if we're sure it's a pow of 2
226 coord = coord + origin;
227 coord_ = coord;
228 return simd::vbool4::True();
229 }
230
231
232 simd::vbool4
wrap_mirror_simd(simd::vint4 & coord_,const simd::vint4 & origin,const simd::vint4 & width)233 wrap_mirror_simd(simd::vint4& coord_, const simd::vint4& origin,
234 const simd::vint4& width)
235 {
236 simd::vint4 coord(coord_);
237 coord = coord - origin;
238 coord = simd::blend(coord, -1 - coord, coord < 0);
239 simd::vint4 iter = coord / width; // Which iteration of the pattern?
240 coord -= iter * width;
241 // Odd iterations -- flip the sense
242 coord = blend(coord, (width - 1) - coord, (iter & 1) != 0);
243 // OIIO_DASSERT_MSG (coord >= 0 && coord < width,
244 // "width=%d, origin=%d, result=%d", width, origin, coord);
245 coord += origin;
246 coord_ = coord;
247 return simd::vbool4::True();
248 }
249
250
251 simd::vbool4
wrap_periodic_sharedborder_simd(simd::vint4 & coord_,const simd::vint4 & origin,const simd::vint4 & width)252 wrap_periodic_sharedborder_simd(simd::vint4& coord_, const simd::vint4& origin,
253 const simd::vint4& width)
254 {
255 // Like periodic, but knowing that the first column and last are
256 // actually the same position, so we essentially skip the last
257 // column in the next cycle.
258 simd::vint4 coord(coord_);
259 coord = coord - origin;
260 coord = safe_mod(coord, (width - 1));
261 coord += blend(simd::vint4(origin), width + origin,
262 coord < 0); // Fix negative values
263 coord_ = coord;
264 return true;
265 }
266
267
268
269 typedef simd::vbool4 (*wrap_impl_simd)(simd::vint4& coord,
270 const simd::vint4& origin,
271 const simd::vint4& width);
272
273
274 const wrap_impl_simd wrap_functions_simd[] = {
275 // Must be in same order as Wrap enum
276 wrap_black_simd,
277 wrap_black_simd,
278 wrap_clamp_simd,
279 wrap_periodic_simd,
280 wrap_mirror_simd,
281 wrap_periodic_pow2_simd,
282 wrap_periodic_sharedborder_simd
283 };
284
285
286
287 const char*
texture_format_name(TexFormat f)288 texture_format_name(TexFormat f)
289 {
290 static const char* texture_format_names[] = {
291 // MUST match the order of TexFormat
292 "unknown",
293 "Plain Texture",
294 "Volume Texture",
295 "Shadow",
296 "CubeFace Shadow",
297 "Volume Shadow",
298 "LatLong Environment",
299 "CubeFace Environment",
300 ""
301 };
302 return texture_format_names[(int)f];
303 }
304
305
306
307 const char*
texture_type_name(TexFormat f)308 texture_type_name(TexFormat f)
309 {
310 static const char* texture_type_names[] = {
311 // MUST match the order of TexFormat
312 "unknown", "Plain Texture", "Volume Texture", "Shadow", "Shadow",
313 "Shadow", "Environment", "Environment", ""
314 };
315 return texture_type_names[(int)f];
316 }
317
318
319
TextureSystemImpl(ImageCache * imagecache)320 TextureSystemImpl::TextureSystemImpl(ImageCache* imagecache)
321 : hq_filter(NULL)
322 {
323 m_imagecache = (ImageCacheImpl*)imagecache;
324 init();
325 }
326
327
328
329 void
init()330 TextureSystemImpl::init()
331 {
332 m_Mw2c.makeIdentity();
333 m_gray_to_rgb = false;
334 m_flip_t = false;
335 m_max_tile_channels = 6;
336 delete hq_filter;
337 hq_filter = Filter1D::create("b-spline", 4);
338 m_statslevel = 0;
339
340 // Allow environment variable to override default options
341 const char* options = getenv("OPENIMAGEIO_TEXTURE_OPTIONS");
342 if (options)
343 attribute("options", options);
344
345 if (do_unit_test_texture)
346 unit_test_texture();
347 }
348
349
350
~TextureSystemImpl()351 TextureSystemImpl::~TextureSystemImpl()
352 {
353 printstats();
354 delete hq_filter;
355 }
356
357
358
359 std::string
getstats(int level,bool icstats) const360 TextureSystemImpl::getstats(int level, bool icstats) const
361 {
362 // Merge all the threads
363 ImageCacheStatistics stats;
364 m_imagecache->mergestats(stats);
365
366 std::ostringstream out;
367 out.imbue(std::locale::classic()); // Force "C" locale with '.' decimal
368 bool anytexture = (stats.texture_queries + stats.texture3d_queries
369 + stats.shadow_queries + stats.environment_queries
370 + stats.imageinfo_queries);
371 if (level > 0 && anytexture) {
372 out << "OpenImageIO Texture statistics\n";
373
374 std::string opt;
375 #define BOOLOPT(name) \
376 if (m_##name) \
377 opt += #name " "
378 #define INTOPT(name) opt += Strutil::sprintf(#name "=%d ", m_##name)
379 #define STROPT(name) \
380 if (m_##name.size()) \
381 opt += Strutil::sprintf(#name "=\"%s\" ", m_##name)
382 INTOPT(gray_to_rgb);
383 INTOPT(flip_t);
384 INTOPT(max_tile_channels);
385 #undef BOOLOPT
386 #undef INTOPT
387 #undef STROPT
388 out << " Options: " << Strutil::wordwrap(opt, 75, 12) << "\n";
389
390 out << " Queries/batches : \n";
391 out << " texture : " << stats.texture_queries << " queries in "
392 << stats.texture_batches << " batches\n";
393 out << " texture 3d : " << stats.texture3d_queries
394 << " queries in " << stats.texture3d_batches << " batches\n";
395 out << " shadow : " << stats.shadow_queries << " queries in "
396 << stats.shadow_batches << " batches\n";
397 out << " environment : " << stats.environment_queries
398 << " queries in " << stats.environment_batches << " batches\n";
399 out << " gettextureinfo : " << stats.imageinfo_queries
400 << " queries\n";
401 out << " Interpolations :\n";
402 out << " closest : " << stats.closest_interps << "\n";
403 out << " bilinear : " << stats.bilinear_interps << "\n";
404 out << " bicubic : " << stats.cubic_interps << "\n";
405 if (stats.aniso_queries)
406 out << Strutil::sprintf(" Average anisotropic probes : %.3g\n",
407 (double)stats.aniso_probes
408 / (double)stats.aniso_queries);
409 else
410 out << Strutil::sprintf(" Average anisotropic probes : 0\n");
411 out << Strutil::sprintf(" Max anisotropy in the wild : %.3g\n",
412 stats.max_aniso);
413 if (icstats)
414 out << "\n";
415 }
416 if (icstats)
417 out << m_imagecache->getstats(level);
418 return out.str();
419 }
420
421
422
423 void
printstats() const424 TextureSystemImpl::printstats() const
425 {
426 if (m_statslevel == 0)
427 return;
428 std::cout << getstats(m_statslevel, false) << "\n\n";
429 }
430
431
432
433 void
reset_stats()434 TextureSystemImpl::reset_stats()
435 {
436 OIIO_DASSERT(m_imagecache);
437 m_imagecache->reset_stats();
438 }
439
440
441
442 bool
attribute(string_view name,TypeDesc type,const void * val)443 TextureSystemImpl::attribute(string_view name, TypeDesc type, const void* val)
444 {
445 if (name == "options" && type == TypeDesc::STRING) {
446 return optparser(*this, *(const char**)val);
447 }
448 if (name == "worldtocommon"
449 && (type == TypeMatrix || type == TypeDesc(TypeDesc::FLOAT, 16))) {
450 m_Mw2c = *(const Imath::M44f*)val;
451 m_Mc2w = m_Mw2c.inverse();
452 return true;
453 }
454 if (name == "commontoworld"
455 && (type == TypeMatrix || type == TypeDesc(TypeDesc::FLOAT, 16))) {
456 m_Mc2w = *(const Imath::M44f*)val;
457 m_Mw2c = m_Mc2w.inverse();
458 return true;
459 }
460 if ((name == "gray_to_rgb" || name == "grey_to_rgb") && (type == TypeInt)) {
461 m_gray_to_rgb = *(const int*)val;
462 return true;
463 }
464 if (name == "flip_t" && type == TypeInt) {
465 m_flip_t = *(const int*)val;
466 return true;
467 }
468 if (name == "m_max_tile_channels" && type == TypeInt) {
469 m_max_tile_channels = *(const int*)val;
470 return true;
471 }
472 if (name == "statistics:level" && type == TypeInt) {
473 m_statslevel = *(const int*)val;
474 // DO NOT RETURN! pass the same message to the image cache
475 }
476 if (name == "unit_test" && type == TypeInt) {
477 do_unit_test_texture = *(const int*)val;
478 return true;
479 }
480 if (name == "unit_test_blur" && type == TypeFloat) {
481 unit_test_texture_blur = *(const float*)val;
482 return true;
483 }
484
485 // Maybe it's meant for the cache?
486 return m_imagecache->attribute(name, type, val);
487 }
488
489
490
491 bool
getattribute(string_view name,TypeDesc type,void * val) const492 TextureSystemImpl::getattribute(string_view name, TypeDesc type,
493 void* val) const
494 {
495 if (name == "worldtocommon"
496 && (type == TypeMatrix || type == TypeDesc(TypeDesc::FLOAT, 16))) {
497 *(Imath::M44f*)val = m_Mw2c;
498 return true;
499 }
500 if (name == "commontoworld"
501 && (type == TypeMatrix || type == TypeDesc(TypeDesc::FLOAT, 16))) {
502 *(Imath::M44f*)val = m_Mc2w;
503 return true;
504 }
505 if ((name == "gray_to_rgb" || name == "grey_to_rgb") && (type == TypeInt)) {
506 *(int*)val = m_gray_to_rgb;
507 return true;
508 }
509 if (name == "flip_t" && type == TypeInt) {
510 *(int*)val = m_flip_t;
511 return true;
512 }
513 if (name == "m_max_tile_channels" && type == TypeInt) {
514 *(int*)val = m_max_tile_channels;
515 return true;
516 }
517
518 // If not one of these, maybe it's an attribute meant for the image cache?
519 return m_imagecache->getattribute(name, type, val);
520
521 return false;
522 }
523
524
525
526 std::string
resolve_filename(const std::string & filename) const527 TextureSystemImpl::resolve_filename(const std::string& filename) const
528 {
529 return m_imagecache->resolve_filename(filename);
530 }
531
532
533
534 bool
get_texture_info(ustring filename,int subimage,ustring dataname,TypeDesc datatype,void * data)535 TextureSystemImpl::get_texture_info(ustring filename, int subimage,
536 ustring dataname, TypeDesc datatype,
537 void* data)
538 {
539 bool ok = m_imagecache->get_image_info(filename, subimage, 0, dataname,
540 datatype, data);
541 if (!ok) {
542 std::string err = m_imagecache->geterror();
543 if (!err.empty())
544 error("{}", err);
545 }
546 return ok;
547 }
548
549
550
551 bool
get_texture_info(TextureHandle * texture_handle,Perthread * thread_info,int subimage,ustring dataname,TypeDesc datatype,void * data)552 TextureSystemImpl::get_texture_info(TextureHandle* texture_handle,
553 Perthread* thread_info, int subimage,
554 ustring dataname, TypeDesc datatype,
555 void* data)
556 {
557 bool ok
558 = m_imagecache->get_image_info((ImageCache::ImageHandle*)texture_handle,
559 (ImageCache::Perthread*)thread_info,
560 subimage, 0, dataname, datatype, data);
561 if (!ok) {
562 std::string err = m_imagecache->geterror();
563 if (!err.empty())
564 error("{}", err);
565 }
566 return ok;
567 }
568
569
570
571 bool
get_imagespec(ustring filename,int subimage,ImageSpec & spec)572 TextureSystemImpl::get_imagespec(ustring filename, int subimage,
573 ImageSpec& spec)
574 {
575 bool ok = m_imagecache->get_imagespec(filename, spec, subimage);
576 if (!ok) {
577 std::string err = m_imagecache->geterror();
578 if (!err.empty())
579 error("{}", err);
580 }
581 return ok;
582 }
583
584
585
586 bool
get_imagespec(TextureHandle * texture_handle,Perthread * thread_info,int subimage,ImageSpec & spec)587 TextureSystemImpl::get_imagespec(TextureHandle* texture_handle,
588 Perthread* thread_info, int subimage,
589 ImageSpec& spec)
590 {
591 bool ok
592 = m_imagecache->get_imagespec((ImageCache::ImageHandle*)texture_handle,
593 (ImageCache::Perthread*)thread_info, spec,
594 subimage);
595 if (!ok) {
596 std::string err = m_imagecache->geterror();
597 if (!err.empty())
598 error("{}", err);
599 }
600 return ok;
601 }
602
603
604
605 const ImageSpec*
imagespec(ustring filename,int subimage)606 TextureSystemImpl::imagespec(ustring filename, int subimage)
607 {
608 const ImageSpec* spec = m_imagecache->imagespec(filename, subimage);
609 if (!spec)
610 error("{}", m_imagecache->geterror());
611 return spec;
612 }
613
614
615
616 const ImageSpec*
imagespec(TextureHandle * texture_handle,Perthread * thread_info,int subimage)617 TextureSystemImpl::imagespec(TextureHandle* texture_handle,
618 Perthread* thread_info, int subimage)
619 {
620 const ImageSpec* spec
621 = m_imagecache->imagespec((ImageCache::ImageHandle*)texture_handle,
622 (ImageCache::Perthread*)thread_info,
623 subimage);
624 if (!spec) {
625 std::string err = m_imagecache->geterror();
626 if (!err.empty())
627 error("{}", err);
628 }
629 return spec;
630 }
631
632
633
634 bool
get_texels(ustring filename,TextureOpt & options,int miplevel,int xbegin,int xend,int ybegin,int yend,int zbegin,int zend,int chbegin,int chend,TypeDesc format,void * result)635 TextureSystemImpl::get_texels(ustring filename, TextureOpt& options,
636 int miplevel, int xbegin, int xend, int ybegin,
637 int yend, int zbegin, int zend, int chbegin,
638 int chend, TypeDesc format, void* result)
639 {
640 PerThreadInfo* thread_info = m_imagecache->get_perthread_info();
641 TextureFile* texfile = find_texturefile(filename, thread_info);
642 if (!texfile) {
643 error("Texture file \"{}\" not found", filename);
644 return false;
645 }
646 return get_texels((TextureHandle*)texfile, (Perthread*)thread_info, options,
647 miplevel, xbegin, xend, ybegin, yend, zbegin, zend,
648 chbegin, chend, format, result);
649 }
650
651
652
653 bool
get_texels(TextureHandle * texture_handle_,Perthread * thread_info_,TextureOpt & options,int miplevel,int xbegin,int xend,int ybegin,int yend,int zbegin,int zend,int chbegin,int chend,TypeDesc format,void * result)654 TextureSystemImpl::get_texels(TextureHandle* texture_handle_,
655 Perthread* thread_info_, TextureOpt& options,
656 int miplevel, int xbegin, int xend, int ybegin,
657 int yend, int zbegin, int zend, int chbegin,
658 int chend, TypeDesc format, void* result)
659 {
660 PerThreadInfo* thread_info = m_imagecache->get_perthread_info(
661 (PerThreadInfo*)thread_info_);
662 TextureFile* texfile = verify_texturefile((TextureFile*)texture_handle_,
663 thread_info);
664 if (!texfile) {
665 error("Invalid texture handle NULL");
666 return false;
667 }
668
669 if (texfile->broken()) {
670 if (texfile->errors_should_issue())
671 error("Invalid texture file \"{}\"", texfile->filename());
672 return false;
673 }
674 int subimage = options.subimage;
675 if (subimage < 0 || subimage >= texfile->subimages()) {
676 error("get_texel asked for nonexistent subimage {} of \"{}\"", subimage,
677 texfile->filename());
678 return false;
679 }
680 if (miplevel < 0 || miplevel >= texfile->miplevels(subimage)) {
681 if (texfile->errors_should_issue())
682 error("get_texel asked for nonexistent MIP level {} of \"{}\"",
683 miplevel, texfile->filename());
684 return false;
685 }
686 const ImageSpec& spec(texfile->spec(subimage, miplevel));
687
688 // FIXME -- this could be WAY more efficient than starting from
689 // scratch for each pixel within the rectangle. Instead, we should
690 // grab a whole tile at a time and memcpy it rapidly. But no point
691 // doing anything more complicated (not to mention bug-prone) until
692 // somebody reports this routine as being a bottleneck.
693 int nchannels = chend - chbegin;
694 int actualchannels = Imath::clamp(spec.nchannels - chbegin, 0, nchannels);
695 int tile_chbegin = 0, tile_chend = spec.nchannels;
696 if (spec.nchannels > m_max_tile_channels) {
697 // For files with many channels, narrow the range we cache
698 tile_chbegin = chbegin;
699 tile_chend = chbegin + actualchannels;
700 }
701 TileID tileid(*texfile, subimage, miplevel, 0, 0, 0, tile_chbegin,
702 tile_chend);
703 size_t formatchannelsize = format.size();
704 size_t formatpixelsize = nchannels * formatchannelsize;
705 size_t scanlinesize = (xend - xbegin) * formatpixelsize;
706 size_t zplanesize = (yend - ybegin) * scanlinesize;
707 imagesize_t npixelsread = 0;
708 bool ok = true;
709 for (int z = zbegin; z < zend; ++z) {
710 if (z < spec.z || z >= (spec.z + std::max(spec.depth, 1))) {
711 // nonexistent planes
712 memset(result, 0, zplanesize);
713 result = (void*)((char*)result + zplanesize);
714 continue;
715 }
716 tileid.z(z - ((z - spec.z) % std::max(1, spec.tile_depth)));
717 for (int y = ybegin; y < yend; ++y) {
718 if (y < spec.y || y >= (spec.y + spec.height)) {
719 // nonexistent scanlines
720 memset(result, 0, scanlinesize);
721 result = (void*)((char*)result + scanlinesize);
722 continue;
723 }
724 tileid.y(y - ((y - spec.y) % spec.tile_height));
725 for (int x = xbegin; x < xend; ++x, ++npixelsread) {
726 if (x < spec.x || x >= (spec.x + spec.width)) {
727 // nonexistent columns
728 memset(result, 0, formatpixelsize);
729 result = (void*)((char*)result + formatpixelsize);
730 continue;
731 }
732 tileid.x(x - ((x - spec.x) % spec.tile_width));
733 ok &= find_tile(tileid, thread_info, npixelsread == 0);
734 TileRef& tile(thread_info->tile);
735 const char* data;
736 if (tile
737 && (data = (const char*)tile->data(x, y, z, chbegin))) {
738 convert_types(texfile->datatype(subimage), data, format,
739 result, actualchannels);
740 for (int c = actualchannels; c < nchannels; ++c)
741 convert_types(TypeDesc::FLOAT, &options.fill, format,
742 (char*)result + c * formatchannelsize, 1);
743 } else {
744 memset(result, 0, formatpixelsize);
745 }
746 result = (void*)((char*)result + formatpixelsize);
747 }
748 }
749 }
750 if (!ok) {
751 std::string err = m_imagecache->geterror();
752 if (!err.empty())
753 error("{}", err);
754 }
755 return ok;
756 }
757
758
759
760 std::string
geterror() const761 TextureSystemImpl::geterror() const
762 {
763 std::string e;
764 std::string* errptr = m_errormessage.get();
765 if (errptr) {
766 e = *errptr;
767 errptr->clear();
768 }
769 return e;
770 }
771
772
773
774 void
append_error(const std::string & message) const775 TextureSystemImpl::append_error(const std::string& message) const
776 {
777 std::string* errptr = m_errormessage.get();
778 if (!errptr) {
779 errptr = new std::string;
780 m_errormessage.reset(errptr);
781 }
782 OIIO_DASSERT(
783 errptr->size() < 1024 * 1024 * 16
784 && "Accumulated error messages > 16MB. Try checking return codes!");
785 if (errptr->size())
786 *errptr += '\n';
787 *errptr += message;
788 }
789
790
791
792 // Implementation of invalidate -- just invalidate the image cache.
793 void
invalidate(ustring filename,bool force)794 TextureSystemImpl::invalidate(ustring filename, bool force)
795 {
796 m_imagecache->invalidate(filename, force);
797 }
798
799
800
801 // Implementation of invalidate -- just invalidate the image cache.
802 void
invalidate_all(bool force)803 TextureSystemImpl::invalidate_all(bool force)
804 {
805 m_imagecache->invalidate_all(force);
806 }
807
808
809
810 void
close(ustring filename)811 TextureSystemImpl::close(ustring filename)
812 {
813 m_imagecache->close(filename);
814 }
815
816
817
818 void
close_all()819 TextureSystemImpl::close_all()
820 {
821 m_imagecache->close_all();
822 }
823
824
825
826 bool
missing_texture(TextureOpt & options,int nchannels,float * result,float * dresultds,float * dresultdt,float * dresultdr)827 TextureSystemImpl::missing_texture(TextureOpt& options, int nchannels,
828 float* result, float* dresultds,
829 float* dresultdt, float* dresultdr)
830 {
831 for (int c = 0; c < nchannels; ++c) {
832 if (options.missingcolor)
833 result[c] = options.missingcolor[c];
834 else
835 result[c] = options.fill;
836 if (dresultds)
837 dresultds[c] = 0;
838 if (dresultdt)
839 dresultdt[c] = 0;
840 if (dresultdr)
841 dresultdr[c] = 0;
842 }
843 if (options.missingcolor) {
844 // don't treat it as an error if missingcolor was supplied
845 (void)geterror(); // eat the error
846 return true;
847 } else {
848 return false;
849 }
850 }
851
852
853
854 void
fill_gray_channels(const ImageSpec & spec,int nchannels,float * result,float * dresultds,float * dresultdt,float * dresultdr)855 TextureSystemImpl::fill_gray_channels(const ImageSpec& spec, int nchannels,
856 float* result, float* dresultds,
857 float* dresultdt, float* dresultdr)
858 {
859 int specchans = spec.nchannels;
860 if (specchans == 1 && nchannels >= 3) {
861 // Asked for RGB or RGBA, texture was just R...
862 // copy the one channel to G and B
863 *(simd::vfloat4*)result = simd::shuffle<0, 0, 0, 3>(
864 *(simd::vfloat4*)result);
865 if (dresultds) {
866 *(simd::vfloat4*)dresultds = simd::shuffle<0, 0, 0, 3>(
867 *(simd::vfloat4*)dresultds);
868 *(simd::vfloat4*)dresultdt = simd::shuffle<0, 0, 0, 3>(
869 *(simd::vfloat4*)dresultdt);
870 if (dresultdr)
871 *(simd::vfloat4*)dresultdr = simd::shuffle<0, 0, 0, 3>(
872 *(simd::vfloat4*)dresultdr);
873 }
874 } else if (specchans == 2 && nchannels == 4 && spec.alpha_channel == 1) {
875 // Asked for RGBA, texture was RA
876 // Shuffle into RRRA
877 *(simd::vfloat4*)result = simd::shuffle<0, 0, 0, 1>(
878 *(simd::vfloat4*)result);
879 if (dresultds) {
880 *(simd::vfloat4*)dresultds = simd::shuffle<0, 0, 0, 1>(
881 *(simd::vfloat4*)dresultds);
882 *(simd::vfloat4*)dresultdt = simd::shuffle<0, 0, 0, 1>(
883 *(simd::vfloat4*)dresultdt);
884 if (dresultdr)
885 *(simd::vfloat4*)dresultdr = simd::shuffle<0, 0, 0, 1>(
886 *(simd::vfloat4*)dresultdr);
887 }
888 }
889 }
890
891
892
893 bool
texture(ustring filename,TextureOptions & options,Runflag * runflags,int beginactive,int endactive,VaryingRef<float> s,VaryingRef<float> t,VaryingRef<float> dsdx,VaryingRef<float> dtdx,VaryingRef<float> dsdy,VaryingRef<float> dtdy,int nchannels,float * result,float * dresultds,float * dresultdt)894 TextureSystemImpl::texture(ustring filename, TextureOptions& options,
895 Runflag* runflags, int beginactive, int endactive,
896 VaryingRef<float> s, VaryingRef<float> t,
897 VaryingRef<float> dsdx, VaryingRef<float> dtdx,
898 VaryingRef<float> dsdy, VaryingRef<float> dtdy,
899 int nchannels, float* result, float* dresultds,
900 float* dresultdt)
901 {
902 Perthread* thread_info = get_perthread_info();
903 TextureHandle* texture_handle = get_texture_handle(filename, thread_info);
904 return texture(texture_handle, thread_info, options, runflags, beginactive,
905 endactive, s, t, dsdx, dtdx, dsdy, dtdy, nchannels, result,
906 dresultds, dresultdt);
907 }
908
909
910
911 bool
texture(TextureHandle * texture_handle,Perthread * thread_info,TextureOptions & options,Runflag * runflags,int beginactive,int endactive,VaryingRef<float> s,VaryingRef<float> t,VaryingRef<float> dsdx,VaryingRef<float> dtdx,VaryingRef<float> dsdy,VaryingRef<float> dtdy,int nchannels,float * result,float * dresultds,float * dresultdt)912 TextureSystemImpl::texture(TextureHandle* texture_handle,
913 Perthread* thread_info, TextureOptions& options,
914 Runflag* runflags, int beginactive, int endactive,
915 VaryingRef<float> s, VaryingRef<float> t,
916 VaryingRef<float> dsdx, VaryingRef<float> dtdx,
917 VaryingRef<float> dsdy, VaryingRef<float> dtdy,
918 int nchannels, float* result, float* dresultds,
919 float* dresultdt)
920 {
921 if (!texture_handle)
922 return false;
923 bool ok = true;
924 result += beginactive * nchannels;
925 if (dresultds) {
926 dresultds += beginactive * nchannels;
927 dresultdt += beginactive * nchannels;
928 }
929 for (int i = beginactive; i < endactive; ++i) {
930 if (runflags[i]) {
931 TextureOpt opt(options, i);
932 ok &= texture(texture_handle, thread_info, opt, s[i], t[i], dsdx[i],
933 dtdx[i], dsdy[i], dtdy[i], nchannels, result,
934 dresultds, dresultdt);
935 }
936 result += nchannels;
937 if (dresultds) {
938 dresultds += nchannels;
939 dresultdt += nchannels;
940 }
941 }
942 return ok;
943 }
944
945
946
947 bool
texture(ustring filename,TextureOpt & options,float s,float t,float dsdx,float dtdx,float dsdy,float dtdy,int nchannels,float * result,float * dresultds,float * dresultdt)948 TextureSystemImpl::texture(ustring filename, TextureOpt& options, float s,
949 float t, float dsdx, float dtdx, float dsdy,
950 float dtdy, int nchannels, float* result,
951 float* dresultds, float* dresultdt)
952 {
953 PerThreadInfo* thread_info = m_imagecache->get_perthread_info();
954 TextureFile* texturefile = find_texturefile(filename, thread_info);
955 return texture((TextureHandle*)texturefile, (Perthread*)thread_info,
956 options, s, t, dsdx, dtdx, dsdy, dtdy, nchannels, result,
957 dresultds, dresultdt);
958 }
959
960
961
962 bool
texture(TextureHandle * texture_handle_,Perthread * thread_info_,TextureOpt & options,float s,float t,float dsdx,float dtdx,float dsdy,float dtdy,int nchannels,float * result,float * dresultds,float * dresultdt)963 TextureSystemImpl::texture(TextureHandle* texture_handle_,
964 Perthread* thread_info_, TextureOpt& options,
965 float s, float t, float dsdx, float dtdx, float dsdy,
966 float dtdy, int nchannels, float* result,
967 float* dresultds, float* dresultdt)
968 {
969 // Handle >4 channel lookups by recursion.
970 if (nchannels > 4) {
971 int save_firstchannel = options.firstchannel;
972 while (nchannels) {
973 int n = std::min(nchannels, 4);
974 bool ok = texture(texture_handle_, thread_info_, options, s, t,
975 dsdx, dtdx, dsdy, dtdy, n /* chans */, result,
976 dresultds, dresultdt);
977 if (!ok)
978 return false;
979 result += n;
980 if (dresultds)
981 dresultds += n;
982 if (dresultdt)
983 dresultdt += n;
984 options.firstchannel += n;
985 nchannels -= n;
986 }
987 options.firstchannel = save_firstchannel; // restore what we changed
988 return true;
989 }
990
991 static const texture_lookup_prototype lookup_functions[] = {
992 // Must be in the same order as Mipmode enum
993 &TextureSystemImpl::texture_lookup,
994 &TextureSystemImpl::texture_lookup_nomip,
995 &TextureSystemImpl::texture_lookup_trilinear_mipmap,
996 &TextureSystemImpl::texture_lookup_trilinear_mipmap,
997 &TextureSystemImpl::texture_lookup
998 };
999 texture_lookup_prototype lookup = lookup_functions[(int)options.mipmode];
1000
1001 PerThreadInfo* thread_info = m_imagecache->get_perthread_info(
1002 (PerThreadInfo*)thread_info_);
1003 TextureFile* texturefile = (TextureFile*)texture_handle_;
1004 if (texturefile->is_udim())
1005 texturefile = m_imagecache->resolve_udim(texturefile, thread_info, s,
1006 t);
1007
1008 texturefile = verify_texturefile(texturefile, thread_info);
1009
1010 ImageCacheStatistics& stats(thread_info->m_stats);
1011 ++stats.texture_batches;
1012 ++stats.texture_queries;
1013
1014 if (!texturefile || texturefile->broken())
1015 return missing_texture(options, nchannels, result, dresultds,
1016 dresultdt);
1017
1018 if (!options.subimagename.empty()) {
1019 // If subimage was specified by name, figure out its index.
1020 int s = m_imagecache->subimage_from_name(texturefile,
1021 options.subimagename);
1022 if (s < 0) {
1023 error("Unknown subimage \"{}\" in texture \"{}\"",
1024 options.subimagename, texturefile->filename());
1025 return missing_texture(options, nchannels, result, dresultds,
1026 dresultdt);
1027 }
1028 options.subimage = s;
1029 options.subimagename.clear();
1030 }
1031
1032 const ImageCacheFile::SubimageInfo& subinfo(
1033 texturefile->subimageinfo(options.subimage));
1034 const ImageSpec& spec(texturefile->spec(options.subimage, 0));
1035
1036 int actualchannels = Imath::clamp(spec.nchannels - options.firstchannel, 0,
1037 nchannels);
1038
1039 // Figure out the wrap functions
1040 if (options.swrap == TextureOpt::WrapDefault)
1041 options.swrap = (TextureOpt::Wrap)texturefile->swrap();
1042 if (options.swrap == TextureOpt::WrapPeriodic && ispow2(spec.width))
1043 options.swrap = TextureOpt::WrapPeriodicPow2;
1044 if (options.twrap == TextureOpt::WrapDefault)
1045 options.twrap = (TextureOpt::Wrap)texturefile->twrap();
1046 if (options.twrap == TextureOpt::WrapPeriodic && ispow2(spec.height))
1047 options.twrap = TextureOpt::WrapPeriodicPow2;
1048
1049 if (subinfo.is_constant_image && options.swrap != TextureOpt::WrapBlack
1050 && options.twrap != TextureOpt::WrapBlack) {
1051 // Lookup of constant color texture, non-black wrap -- skip all the
1052 // hard stuff.
1053 for (int c = 0; c < actualchannels; ++c)
1054 result[c] = subinfo.average_color[c + options.firstchannel];
1055 for (int c = actualchannels; c < nchannels; ++c)
1056 result[c] = options.fill;
1057 if (dresultds) {
1058 // Derivs are always 0 from a constant texture lookup
1059 for (int c = 0; c < nchannels; ++c) {
1060 dresultds[c] = 0.0f;
1061 dresultdt[c] = 0.0f;
1062 }
1063 }
1064 if (actualchannels < nchannels && options.firstchannel == 0
1065 && m_gray_to_rgb)
1066 fill_gray_channels(spec, nchannels, result, dresultds, dresultdt);
1067 return true;
1068 }
1069
1070 if (m_flip_t) {
1071 t = 1.0f - t;
1072 dtdx *= -1.0f;
1073 dtdy *= -1.0f;
1074 }
1075
1076 if (!subinfo.full_pixel_range) { // remap st for overscan or crop
1077 s = s * subinfo.sscale + subinfo.soffset;
1078 dsdx *= subinfo.sscale;
1079 dsdy *= subinfo.sscale;
1080 t = t * subinfo.tscale + subinfo.toffset;
1081 dtdx *= subinfo.tscale;
1082 dtdy *= subinfo.tscale;
1083 }
1084
1085 bool ok;
1086 // Everything from the lookup function on down will assume that there
1087 // is space for a vfloat4 in all of the result locations, so if that's
1088 // not the case (or it's not properly aligned), make a local copy and
1089 // then copy back when we're done.
1090 // FIXME -- is this necessary at all? Can we eliminate the conditional
1091 // and the duplicated code by always doing the simd copy thing? Come
1092 // back here and time whether for 4-channel textures it really matters.
1093 bool simd_copy = (nchannels != 4 || ((size_t)result & 0x0f)
1094 || ((size_t)dresultds & 0x0f) || /* FIXME -- necessary? */
1095 ((size_t)dresultdt & 0x0f));
1096 if (simd_copy) {
1097 simd::vfloat4 result_simd, dresultds_simd, dresultdt_simd;
1098 float* OIIO_RESTRICT saved_dresultds = dresultds;
1099 float* OIIO_RESTRICT saved_dresultdt = dresultdt;
1100 if (saved_dresultds) {
1101 dresultds = (float*)&dresultds_simd;
1102 dresultdt = (float*)&dresultdt_simd;
1103 }
1104 ok = (this->*lookup)(*texturefile, thread_info, options, nchannels,
1105 actualchannels, s, t, dsdx, dtdx, dsdy, dtdy,
1106 (float*)&result_simd, dresultds, dresultdt);
1107 if (actualchannels < nchannels && options.firstchannel == 0
1108 && m_gray_to_rgb)
1109 fill_gray_channels(spec, nchannels, (float*)&result_simd, dresultds,
1110 dresultdt);
1111 result_simd.store(result, nchannels);
1112 if (saved_dresultds) {
1113 if (m_flip_t)
1114 dresultdt_simd = -dresultdt_simd;
1115 dresultds_simd.store(saved_dresultds, nchannels);
1116 dresultdt_simd.store(saved_dresultdt, nchannels);
1117 }
1118 } else {
1119 // All provided output slots are aligned 4-floats, use them directly
1120 ok = (this->*lookup)(*texturefile, thread_info, options, nchannels,
1121 actualchannels, s, t, dsdx, dtdx, dsdy, dtdy,
1122 result, dresultds, dresultdt);
1123 if (actualchannels < nchannels && options.firstchannel == 0
1124 && m_gray_to_rgb)
1125 fill_gray_channels(spec, nchannels, result, dresultds, dresultdt);
1126 if (m_flip_t && dresultdt)
1127 *(vfloat4*)dresultdt = -(*(vfloat4*)dresultdt);
1128 }
1129
1130 return ok;
1131 }
1132
1133
1134
1135 bool
texture(ustring filename,TextureOptBatch & options,Tex::RunMask mask,const float * s,const float * t,const float * dsdx,const float * dtdx,const float * dsdy,const float * dtdy,int nchannels,float * result,float * dresultds,float * dresultdt)1136 TextureSystemImpl::texture(ustring filename, TextureOptBatch& options,
1137 Tex::RunMask mask, const float* s, const float* t,
1138 const float* dsdx, const float* dtdx,
1139 const float* dsdy, const float* dtdy, int nchannels,
1140 float* result, float* dresultds, float* dresultdt)
1141 {
1142 Perthread* thread_info = get_perthread_info();
1143 TextureHandle* texture_handle = get_texture_handle(filename, thread_info);
1144 return texture(texture_handle, thread_info, options, mask, s, t, dsdx, dtdx,
1145 dsdy, dtdy, nchannels, result, dresultds, dresultdt);
1146 }
1147
1148
1149 bool
texture(TextureHandle * texture_handle,Perthread * thread_info,TextureOptBatch & options,Tex::RunMask mask,const float * s,const float * t,const float * dsdx,const float * dtdx,const float * dsdy,const float * dtdy,int nchannels,float * result,float * dresultds,float * dresultdt)1150 TextureSystemImpl::texture(TextureHandle* texture_handle,
1151 Perthread* thread_info, TextureOptBatch& options,
1152 Tex::RunMask mask, const float* s, const float* t,
1153 const float* dsdx, const float* dtdx,
1154 const float* dsdy, const float* dtdy, int nchannels,
1155 float* result, float* dresultds, float* dresultdt)
1156 {
1157 // (FIXME) CHEAT! Texture points individually
1158 TextureOpt opt;
1159 opt.firstchannel = options.firstchannel;
1160 opt.subimage = options.subimage;
1161 opt.subimagename = options.subimagename;
1162 opt.swrap = (TextureOpt::Wrap)options.swrap;
1163 opt.twrap = (TextureOpt::Wrap)options.twrap;
1164 opt.mipmode = (TextureOpt::MipMode)options.mipmode;
1165 opt.interpmode = (TextureOpt::InterpMode)options.interpmode;
1166 opt.anisotropic = options.anisotropic;
1167 opt.conservative_filter = options.conservative_filter;
1168 opt.fill = options.fill;
1169 opt.missingcolor = options.missingcolor;
1170 // rwrap not needed for 2D texture
1171
1172 bool ok = true;
1173 Tex::RunMask bit = 1;
1174 for (int i = 0; i < Tex::BatchWidth; ++i, bit <<= 1) {
1175 float r[4], drds[4], drdt[4]; // temp result
1176 if (mask & bit) {
1177 opt.sblur = options.sblur[i];
1178 opt.tblur = options.tblur[i];
1179 opt.swidth = options.swidth[i];
1180 opt.twidth = options.twidth[i];
1181 // rblur, rwidth not needed for 2D texture
1182 if (dresultds) {
1183 ok &= texture(texture_handle, thread_info, opt, s[i], t[i],
1184 dsdx[i], dtdx[i], dsdy[i], dtdy[i], nchannels, r,
1185 drds, drdt);
1186 for (int c = 0; c < nchannels; ++c) {
1187 result[c * Tex::BatchWidth + i] = r[c];
1188 dresultds[c * Tex::BatchWidth + i] = drds[c];
1189 dresultdt[c * Tex::BatchWidth + i] = drdt[c];
1190 }
1191 } else {
1192 ok &= texture(texture_handle, thread_info, opt, s[i], t[i],
1193 dsdx[i], dtdx[i], dsdy[i], dtdy[i], nchannels, r);
1194 for (int c = 0; c < nchannels; ++c) {
1195 result[c * Tex::BatchWidth + i] = r[c];
1196 }
1197 }
1198 }
1199 }
1200 return ok;
1201 }
1202
1203
1204
1205 bool
texture_lookup_nomip(TextureFile & texturefile,PerThreadInfo * thread_info,TextureOpt & options,int nchannels_result,int actualchannels,float s,float t,float,float,float,float,float * result,float * dresultds,float * dresultdt)1206 TextureSystemImpl::texture_lookup_nomip(
1207 TextureFile& texturefile, PerThreadInfo* thread_info, TextureOpt& options,
1208 int nchannels_result, int actualchannels, float s, float t, float /*dsdx*/,
1209 float /*dtdx*/, float /*dsdy*/, float /*dtdy*/, float* result,
1210 float* dresultds, float* dresultdt)
1211 {
1212 // Initialize results to 0. We'll add from here on as we sample.
1213 OIIO_DASSERT((dresultds == NULL) == (dresultdt == NULL));
1214 ((simd::vfloat4*)result)->clear();
1215 if (dresultds) {
1216 ((simd::vfloat4*)dresultds)->clear();
1217 ((simd::vfloat4*)dresultdt)->clear();
1218 }
1219
1220 static const sampler_prototype sample_functions[] = {
1221 // Must be in the same order as InterpMode enum
1222 &TextureSystemImpl::sample_closest,
1223 &TextureSystemImpl::sample_bilinear,
1224 &TextureSystemImpl::sample_bicubic,
1225 &TextureSystemImpl::sample_bilinear,
1226 };
1227 sampler_prototype sampler = sample_functions[(int)options.interpmode];
1228 OIIO_SIMD4_ALIGN float sval[4] = { s, 0.0f, 0.0f, 0.0f };
1229 OIIO_SIMD4_ALIGN float tval[4] = { t, 0.0f, 0.0f, 0.0f };
1230 static OIIO_SIMD4_ALIGN float weight[4] = { 1.0f, 0.0f, 0.0f, 0.0f };
1231 ImageCacheFile::SubimageInfo& subinfo(
1232 texturefile.subimageinfo(options.subimage));
1233 int min_mip_level = subinfo.min_mip_level;
1234 bool ok = (this->*sampler)(1, sval, tval, min_mip_level, texturefile,
1235 thread_info, options, nchannels_result,
1236 actualchannels, weight, (vfloat4*)result,
1237 (vfloat4*)dresultds, (vfloat4*)dresultdt);
1238
1239 // Update stats
1240 ImageCacheStatistics& stats(thread_info->m_stats);
1241 ++stats.aniso_queries;
1242 ++stats.aniso_probes;
1243 switch (options.interpmode) {
1244 case TextureOpt::InterpClosest: ++stats.closest_interps; break;
1245 case TextureOpt::InterpBilinear: ++stats.bilinear_interps; break;
1246 case TextureOpt::InterpBicubic: ++stats.cubic_interps; break;
1247 case TextureOpt::InterpSmartBicubic: ++stats.bilinear_interps; break;
1248 }
1249 return ok;
1250 }
1251
1252
1253
1254 // Scale the derivs as dictated by 'width', and also make sure
1255 // they are all some minimum value to make the subsequent math clean.
1256 inline void
adjust_width(float & dsdx,float & dtdx,float & dsdy,float & dtdy,float swidth,float twidth)1257 adjust_width(float& dsdx, float& dtdx, float& dsdy, float& dtdy, float swidth,
1258 float twidth /*, float sblur=0, float tblur=0*/)
1259 {
1260 // Trust user not to use nonsensical width<0
1261 dsdx *= swidth;
1262 dtdx *= twidth;
1263 dsdy *= swidth;
1264 dtdy *= twidth;
1265 #if 0
1266 // You might think that blur should be added to the original derivs and
1267 // then just carry on. And sometimes it looks fine, but for some
1268 // situations the results are absurdly wrong, as revealed by the unit
1269 // test visualizations. I'm leaving this code here but disabled as a
1270 // reminder to myself not to try it again. It's wrong.
1271 if (sblur+tblur != 0.0f /* avoid the work when blur is zero */) {
1272 dsdx += std::copysign (sblur, dsdx);
1273 dsdy += std::copysign (sblur, dsdy);
1274 dtdx += std::copysign (tblur, dtdx);
1275 dtdy += std::copysign (tblur, dtdy);
1276 }
1277 #endif
1278
1279 // Clamp degenerate derivatives so they don't cause mathematical problems
1280 static const float eps = 1.0e-8f, eps2 = eps * eps;
1281 float dxlen2 = dsdx * dsdx + dtdx * dtdx;
1282 float dylen2 = dsdy * dsdy + dtdy * dtdy;
1283 if (dxlen2 < eps2) { // Tiny dx
1284 if (dylen2 < eps2) {
1285 // Tiny dx and dy: Essentially point sampling. Substitute a
1286 // tiny but finite filter.
1287 dsdx = eps;
1288 dsdy = 0;
1289 dtdx = 0;
1290 dtdy = eps;
1291 dxlen2 = dylen2 = eps2;
1292 } else {
1293 // Tiny dx, sane dy -- pick a small dx orthogonal to dy, but
1294 // of length eps.
1295 float scale = eps / sqrtf(dylen2);
1296 dsdx = dtdy * scale;
1297 dtdx = -dsdy * scale;
1298 dxlen2 = eps2;
1299 }
1300 } else if (dylen2 < eps2) {
1301 // Tiny dy, sane dx -- pick a small dy orthogonal to dx, but of
1302 // length eps.
1303 float scale = eps / sqrtf(dxlen2);
1304 dsdy = -dtdx * scale;
1305 dtdy = dsdx * scale;
1306 dylen2 = eps2;
1307 }
1308 }
1309
1310
1311
1312 // Adjust the ellipse major and minor axes based on the blur, if nonzero.
1313 // Trust user not to use nonsensical blur<0
1314 //
1315 // FIXME: This is not "correct," but it's probably good enough. There is
1316 // probably a more principled way to deal with blur (including anisotropic)
1317 // by figuring out how to transform/scale the ellipse or consider the blur
1318 // to be like a convolution of gaussians. Some day, somebody ought to come
1319 // back to this and solve it better.
1320 inline void
adjust_blur(float & majorlength,float & minorlength,float & theta,float sblur,float tblur)1321 adjust_blur(float& majorlength, float& minorlength, float& theta, float sblur,
1322 float tblur)
1323 {
1324 if (sblur + tblur != 0.0f /* avoid the work when blur is zero */) {
1325 // Carefully add blur to the right derivative components in the
1326 // right proportions -- merely adding the same amount of blur
1327 // to all four derivatives blurs too much at some angles.
1328 OIIO_DASSERT(majorlength > 0.0f && minorlength > 0.0f);
1329 float sintheta, costheta;
1330 #ifdef TEX_FAST_MATH
1331 fast_sincos(theta, &sintheta, &costheta);
1332 #else
1333 sincos(theta, &sintheta, &costheta);
1334 #endif
1335 sintheta = fabsf(sintheta);
1336 costheta = fabsf(costheta);
1337 majorlength += sblur * costheta + tblur * sintheta;
1338 minorlength += sblur * sintheta + tblur * costheta;
1339 #if 1
1340 if (minorlength > majorlength) {
1341 // Wildly uneven sblur and tblur values might swap which axis is
1342 // thicker. For example, if the major axis is vertical (thin
1343 // ellipse) but you have so much horizontal blur that it turns
1344 // into a wide ellipse. My hacky solution is to notice when this
1345 // happens and just swap the major and minor axes. I'm actually
1346 // not convinced this is the best solution -- in the unit test
1347 // visualizations, it looks great (and better than not doing it)
1348 // for most ordinary situations, but when the derivs indicate
1349 // extreme diagonal sheer (like when the dx and dy are almost
1350 // parallel, rather than the expected almost perpendicular),
1351 // it's less than fully satisfactory. But I don't know a better
1352 // solution. And, I dunno, maybe it's even right; it's very hard
1353 // to reason about what the right thing to do is for that case,
1354 // for very stretched blur.
1355 std::swap(minorlength, majorlength);
1356 theta += M_PI_2;
1357 }
1358 #endif
1359 }
1360 }
1361
1362
1363
1364 // For the given texture file, options, and ellipse major and minor
1365 // lengths and aspect ratio, compute the two MIPmap levels and
1366 // respective weights to use for a texture lookup. The general strategy
1367 // is that we choose the MIPmap level so that the minor axis length is
1368 // pixel-sized (and then we will sample several times along the major
1369 // axis in order to handle anisotropy), but we make adjustments in
1370 // corner cases where the ideal sampling is too high or too low resolution
1371 // given the MIPmap levels we have available.
1372 inline void
compute_miplevels(TextureSystemImpl::TextureFile & texturefile,TextureOpt & options,float majorlength,float minorlength,float & aspect,int * miplevel,float * levelweight)1373 compute_miplevels(TextureSystemImpl::TextureFile& texturefile,
1374 TextureOpt& options, float majorlength, float minorlength,
1375 float& aspect, int* miplevel, float* levelweight)
1376 {
1377 ImageCacheFile::SubimageInfo& subinfo(
1378 texturefile.subimageinfo(options.subimage));
1379 float levelblend = 0.0f;
1380 int nmiplevels = (int)subinfo.levels.size();
1381 int min_mip_level = subinfo.min_mip_level;
1382 for (int m = min_mip_level; m < nmiplevels; ++m) {
1383 // Compute the filter size (minor axis) in raster space at this
1384 // MIP level. We use the smaller of the two texture resolutions,
1385 // which is better than just using one, but a more principled
1386 // approach is desired but remains elusive. FIXME.
1387 float filtwidth_ras = minorlength
1388 * std::min(subinfo.spec(m).width,
1389 subinfo.spec(m).height);
1390 // Once the filter width is smaller than one texel at this level,
1391 // we've gone too far, so we know that we want to interpolate the
1392 // previous level and the current level. Note that filtwidth_ras
1393 // is expected to be >= 0.5, or would have stopped one level ago.
1394 if (filtwidth_ras <= 1.0f) {
1395 miplevel[0] = m - 1;
1396 miplevel[1] = m;
1397 levelblend = Imath::clamp(2.0f * filtwidth_ras - 1.0f, 0.0f, 1.0f);
1398 break;
1399 }
1400 }
1401
1402 if (miplevel[1] < 0) {
1403 // We'd like to blur even more, but make due with the coarsest
1404 // MIP level.
1405 miplevel[0] = nmiplevels - 1;
1406 miplevel[1] = miplevel[0];
1407 levelblend = 0;
1408 } else if (miplevel[0] < min_mip_level) {
1409 // We wish we had even more resolution than the finest MIP level,
1410 // but tough for us.
1411 miplevel[0] = min_mip_level;
1412 miplevel[1] = min_mip_level;
1413 levelblend = 0;
1414 // It's possible that minorlength is degenerate, giving an aspect
1415 // ratio that implies a huge nsamples, which is pointless if those
1416 // samples are too close. So if minorlength is less than 1/2 texel
1417 // at the finest resolution, clamp it and recalculate aspect.
1418 int r = std::max(subinfo.spec(0).full_width,
1419 subinfo.spec(0).full_height);
1420 if (minorlength * r < 0.5f) {
1421 aspect = Imath::clamp(majorlength * r * 2.0f, 1.0f,
1422 float(options.anisotropic));
1423 }
1424 }
1425 if (options.mipmode == TextureOpt::MipModeOneLevel) {
1426 miplevel[0] = miplevel[1];
1427 levelblend = 0;
1428 }
1429 levelweight[0] = 1.0f - levelblend;
1430 levelweight[1] = levelblend;
1431 }
1432
1433
1434
1435 bool
texture_lookup_trilinear_mipmap(TextureFile & texturefile,PerThreadInfo * thread_info,TextureOpt & options,int nchannels_result,int actualchannels,float s,float t,float dsdx,float dtdx,float dsdy,float dtdy,float * result,float * dresultds,float * dresultdt)1436 TextureSystemImpl::texture_lookup_trilinear_mipmap(
1437 TextureFile& texturefile, PerThreadInfo* thread_info, TextureOpt& options,
1438 int nchannels_result, int actualchannels, float s, float t, float dsdx,
1439 float dtdx, float dsdy, float dtdy, float* result, float* dresultds,
1440 float* dresultdt)
1441 {
1442 // Initialize results to 0. We'll add from here on as we sample.
1443 OIIO_DASSERT((dresultds == NULL) == (dresultdt == NULL));
1444 ((simd::vfloat4*)result)->clear();
1445 if (dresultds) {
1446 ((simd::vfloat4*)dresultds)->clear();
1447 ((simd::vfloat4*)dresultdt)->clear();
1448 }
1449
1450 adjust_width(dsdx, dtdx, dsdy, dtdy, options.swidth, options.twidth);
1451
1452 // Determine the MIP-map level(s) we need: we will blend
1453 // data(miplevel[0]) * (1-levelblend) + data(miplevel[1]) * levelblend
1454 int miplevel[2] = { -1, -1 };
1455 float levelweight[2] = { 0, 0 };
1456 float sfilt = std::max(fabsf(dsdx), fabsf(dsdy));
1457 float tfilt = std::max(fabsf(dtdx), fabsf(dtdy));
1458 float filtwidth = options.conservative_filter ? std::max(sfilt, tfilt)
1459 : std::min(sfilt, tfilt);
1460 // account for blur
1461 filtwidth += std::max(options.sblur, options.tblur);
1462 float aspect = 1.0f;
1463 compute_miplevels(texturefile, options, filtwidth, filtwidth, aspect,
1464 miplevel, levelweight);
1465
1466 static const sampler_prototype sample_functions[] = {
1467 // Must be in the same order as InterpMode enum
1468 &TextureSystemImpl::sample_closest,
1469 &TextureSystemImpl::sample_bilinear,
1470 &TextureSystemImpl::sample_bicubic,
1471 &TextureSystemImpl::sample_bilinear,
1472 };
1473 sampler_prototype sampler = sample_functions[(int)options.interpmode];
1474
1475 // FIXME -- support for smart cubic?
1476
1477 OIIO_SIMD4_ALIGN float sval[4] = { s, 0.0f, 0.0f, 0.0f };
1478 OIIO_SIMD4_ALIGN float tval[4] = { t, 0.0f, 0.0f, 0.0f };
1479 OIIO_SIMD4_ALIGN float weight[4] = { 1.0f, 0.0f, 0.0f, 0.0f };
1480 bool ok = true;
1481 int npointson = 0;
1482 vfloat4 r_sum, drds_sum, drdt_sum;
1483 r_sum.clear();
1484 if (dresultds) {
1485 drds_sum.clear();
1486 drdt_sum.clear();
1487 }
1488 for (int level = 0; level < 2; ++level) {
1489 if (!levelweight[level]) // No contribution from this level, skip it
1490 continue;
1491 vfloat4 r, drds, drdt;
1492 ok &= (this->*sampler)(1, sval, tval, miplevel[level], texturefile,
1493 thread_info, options, nchannels_result,
1494 actualchannels, weight, &r,
1495 dresultds ? &drds : NULL,
1496 dresultds ? &drdt : NULL);
1497 ++npointson;
1498 vfloat4 lw = levelweight[level];
1499 r_sum += lw * r;
1500 if (dresultds) {
1501 drds_sum += lw * drds;
1502 drdt_sum += lw * drdt;
1503 }
1504 }
1505
1506 *(simd::vfloat4*)(result) = r_sum;
1507 if (dresultds) {
1508 *(simd::vfloat4*)(dresultds) = drds_sum;
1509 *(simd::vfloat4*)(dresultdt) = drdt_sum;
1510 }
1511
1512 // Update stats
1513 ImageCacheStatistics& stats(thread_info->m_stats);
1514 stats.aniso_queries += npointson;
1515 stats.aniso_probes += npointson;
1516 switch (options.interpmode) {
1517 case TextureOpt::InterpClosest: stats.closest_interps += npointson; break;
1518 case TextureOpt::InterpBilinear: stats.bilinear_interps += npointson; break;
1519 case TextureOpt::InterpBicubic: stats.cubic_interps += npointson; break;
1520 case TextureOpt::InterpSmartBicubic:
1521 stats.bilinear_interps += npointson;
1522 break;
1523 }
1524 return ok;
1525 }
1526
1527
1528
1529 // Given pixel derivatives, calculate major and minor axis lengths and
1530 // major axis orientation angle of the ellipse. See Greene's EWA paper
1531 // or Mavridis (2011). If ABCF is non-NULL, it should point to space
1532 // for 4 floats, which will be used to store the ellipse parameters A,
1533 // B, C, F.
1534 inline void
ellipse_axes(float dsdx,float dtdx,float dsdy,float dtdy,float & majorlength,float & minorlength,float & theta,float * ABCF=NULL)1535 ellipse_axes(float dsdx, float dtdx, float dsdy, float dtdy, float& majorlength,
1536 float& minorlength, float& theta, float* ABCF = NULL)
1537 {
1538 float dsdx2 = dsdx * dsdx;
1539 float dtdx2 = dtdx * dtdx;
1540 float dsdy2 = dsdy * dsdy;
1541 float dtdy2 = dtdy * dtdy;
1542 double A = dtdx2 + dtdy2;
1543 double B = -2.0 * (dsdx * dtdx + dsdy * dtdy);
1544 double C = dsdx2 + dsdy2;
1545 double root = hypot(A - C, B); // equivalent: sqrt (A*A - 2AC + C*C + B*B)
1546 double Aprime = (A + C - root) * 0.5;
1547 double Cprime = (A + C + root) * 0.5;
1548 #if 0
1549 double F = A*C - B*B*0.25;
1550 majorlength = std::min(safe_sqrt (float(F / Aprime)), 1000.0f);
1551 minorlength = std::min(safe_sqrt (float(F / Cprime)), 1000.0f);
1552 #else
1553 // Wolfram says that this is equivalent to:
1554 majorlength = std::min(safe_sqrt(float(Cprime)), 1000.0f);
1555 minorlength = std::min(safe_sqrt(float(Aprime)), 1000.0f);
1556 #endif
1557 #ifdef TEX_FAST_MATH
1558 theta = fast_atan2(B, A - C) * 0.5f + float(M_PI_2);
1559 #else
1560 theta = atan2(B, A - C) * 0.5 + M_PI_2;
1561 #endif
1562 if (ABCF) {
1563 // Optionally store the ellipse equation parameters, the ellipse
1564 // is given by: A*u^2 + B*u*v + C*v^2 < 1
1565 double F = A * C - B * B * 0.25;
1566 double Finv = 1.0f / F;
1567 ABCF[0] = A * Finv;
1568 ABCF[1] = B * Finv;
1569 ABCF[2] = C * Finv;
1570 ABCF[3] = F;
1571 }
1572
1573 // N.B. If the derivs passed in are the full pixel-to-pixel
1574 // derivatives, then majorlength/minorlength are the (diameter) axes
1575 // of the ellipse; if the derivs are the "to edge of pixel" 1/2
1576 // length derivs, then majorlength/minorlength are the major and
1577 // minor radii of the ellipse. We do the former! So it's important
1578 // to consider that factor of 2 in compute_ellipse_sampling.
1579 }
1580
1581
1582
1583 // Given the aspect ratio, major axis orientation angle, and axis lengths,
1584 // calculate the smajor & tmajor values that give the orientation of the
1585 // line on which samples should be distributed. If there are n samples,
1586 // they should be positioned as:
1587 // p_i = 2*(i+0.5)/n - 1.0;
1588 // sample_i = (s + p_i*smajor, t + p_i*tmajor)
1589 // If a weights ptr is supplied, it will be filled in [0..nsamples-1] with
1590 // normalized weights for each sample.
1591 inline int
compute_ellipse_sampling(float aspect,float theta,float majorlength,float minorlength,float & smajor,float & tmajor,float & invsamples,float * weights=NULL)1592 compute_ellipse_sampling(float aspect, float theta, float majorlength,
1593 float minorlength, float& smajor, float& tmajor,
1594 float& invsamples, float* weights = NULL)
1595 {
1596 // Compute the sin and cos of the sampling direction, given major
1597 // axis angle
1598 sincos(theta, &tmajor, &smajor);
1599 float L = 2.0f * (majorlength - minorlength);
1600 smajor *= L;
1601 tmajor *= L;
1602 #if 1
1603 // This is the theoretically correct number of samples.
1604 int nsamples = std::max(1, int(2.0f * aspect - 1.0f));
1605 #else
1606 int nsamples = std::max(1, (int)ceilf(aspect - 0.3f));
1607 // This approach does fewer samples for high aspect ratios, but I see
1608 // artifacts.
1609 #endif
1610 invsamples = 1.0f / nsamples;
1611 if (weights) {
1612 if (nsamples == 1) {
1613 weights[0] = 1.0f;
1614 } else if (nsamples == 2) {
1615 weights[0] = 0.5f;
1616 weights[1] = 0.5f;
1617 } else {
1618 float scale = majorlength / L; // 1/(L/major)
1619 for (int i = 0, e = (nsamples + 1) / 2; i < e; ++i) {
1620 float x = (2.0f * (i + 0.5f) * invsamples - 1.0f) * scale;
1621 #ifdef TEX_FAST_MATH
1622 float w = fast_exp(-2.0f * x * x);
1623 #else
1624 float w = expf(-2.0f * x * x);
1625 #endif
1626 weights[nsamples - i - 1] = weights[i] = w;
1627 }
1628 float sumw = 0.0f;
1629 for (int i = 0; i < nsamples; ++i)
1630 sumw += weights[i];
1631 for (int i = 0; i < nsamples; ++i)
1632 weights[i] /= sumw;
1633 }
1634 }
1635 return nsamples;
1636 }
1637
1638
1639
1640 bool
texture_lookup(TextureFile & texturefile,PerThreadInfo * thread_info,TextureOpt & options,int nchannels_result,int actualchannels,float s,float t,float dsdx,float dtdx,float dsdy,float dtdy,float * result,float * dresultds,float * dresultdt)1641 TextureSystemImpl::texture_lookup(TextureFile& texturefile,
1642 PerThreadInfo* thread_info,
1643 TextureOpt& options, int nchannels_result,
1644 int actualchannels, float s, float t,
1645 float dsdx, float dtdx, float dsdy,
1646 float dtdy, float* result, float* dresultds,
1647 float* dresultdt)
1648 {
1649 OIIO_DASSERT((dresultds == NULL) == (dresultdt == NULL));
1650
1651 // Compute the natural resolution we want for the bare derivs, this
1652 // will be the threshold for knowing we're maxifying (and therefore
1653 // wanting cubic interpolation).
1654 float sfilt_noblur = std::max(std::max(fabsf(dsdx), fabsf(dsdy)), 1e-8f);
1655 float tfilt_noblur = std::max(std::max(fabsf(dtdx), fabsf(dtdy)), 1e-8f);
1656 int naturalsres = (int)(1.0f / sfilt_noblur);
1657 int naturaltres = (int)(1.0f / tfilt_noblur);
1658
1659 // Scale by 'width'
1660 adjust_width(dsdx, dtdx, dsdy, dtdy, options.swidth, options.twidth);
1661
1662 // Determine the MIP-map level(s) we need: we will blend
1663 // data(miplevel[0]) * (1-levelblend) + data(miplevel[1]) * levelblend
1664 float smajor, tmajor;
1665 float majorlength, minorlength;
1666 float theta;
1667
1668 // Do a bit more math and get the exact ellipse axis lengths, and
1669 // therefore a more accurate aspect ratio as well. Looks much MUCH
1670 // better, but for scenes with lots of grazing angles, it can greatly
1671 // increase the average anisotropy, therefore the number of bilinear
1672 // or bicubic texture probes, and therefore runtime!
1673 ellipse_axes(dsdx, dtdx, dsdy, dtdy, majorlength, minorlength, theta);
1674
1675 adjust_blur(majorlength, minorlength, theta, options.sblur, options.tblur);
1676
1677 float aspect, trueaspect;
1678 aspect = anisotropic_aspect(majorlength, minorlength, options, trueaspect);
1679
1680 int miplevel[2] = { -1, -1 };
1681 float levelweight[2] = { 0, 0 };
1682 compute_miplevels(texturefile, options, majorlength, minorlength, aspect,
1683 miplevel, levelweight);
1684
1685 float* lineweight
1686 = OIIO_ALLOCA(float,
1687 round_to_multiple_of_pow2(2 * options.anisotropic, 4));
1688 float invsamples;
1689 int nsamples = compute_ellipse_sampling(aspect, theta, majorlength,
1690 minorlength, smajor, tmajor,
1691 invsamples, lineweight);
1692 // All the computations were done assuming full diametric axes of
1693 // the ellipse, but our derivatives are pixel-to-pixel, yielding
1694 // semi-major and semi-minor lengths, so we need to scale everything
1695 // by 1/2.
1696 smajor *= 0.5f;
1697 tmajor *= 0.5f;
1698
1699 bool ok = true;
1700 int npointson = 0;
1701 int closestprobes = 0, bilinearprobes = 0, bicubicprobes = 0;
1702 int nsamples_padded = round_to_multiple_of_pow2(nsamples, 4);
1703 float* sval = OIIO_ALLOCA(float, nsamples_padded);
1704 float* tval = OIIO_ALLOCA(float, nsamples_padded);
1705
1706 // Compute the s and t positions of the samples along the major axis.
1707 #if OIIO_SIMD
1708 // Do the computations in batches of 4, with SIMD ops.
1709 static OIIO_SIMD4_ALIGN float iota_start[4] = { 0.5f, 1.5f, 2.5f, 3.5f };
1710 vfloat4 iota = *(const vfloat4*)iota_start;
1711 for (int sample = 0; sample < nsamples; sample += 4) {
1712 vfloat4 pos = 2.0f * (iota * invsamples - 0.5f);
1713 vfloat4 ss = s + pos * smajor;
1714 vfloat4 tt = t + pos * tmajor;
1715 ss.store(sval + sample);
1716 tt.store(tval + sample);
1717 iota += 4.0f;
1718 }
1719 #else
1720 // Non-SIMD, reference code
1721 for (int sample = 0; sample < nsamples; ++sample) {
1722 float pos = 2.0f * ((sample + 0.5f) * invsamples - 0.5f);
1723 sval[sample] = s + pos * smajor;
1724 tval[sample] = t + pos * tmajor;
1725 }
1726 #endif
1727
1728 vfloat4 r_sum, drds_sum, drdt_sum;
1729 r_sum.clear();
1730 if (dresultds) {
1731 drds_sum.clear();
1732 drdt_sum.clear();
1733 }
1734 for (int level = 0; level < 2; ++level) {
1735 if (!levelweight[level]) // No contribution from this level, skip it
1736 continue;
1737 ++npointson;
1738 vfloat4 r, drds, drdt;
1739 int lev = miplevel[level];
1740 switch (options.interpmode) {
1741 case TextureOpt::InterpClosest:
1742 ok &= sample_closest(nsamples, sval, tval, lev, texturefile,
1743 thread_info, options, nchannels_result,
1744 actualchannels, lineweight, &r, NULL, NULL);
1745 ++closestprobes;
1746 break;
1747 case TextureOpt::InterpBilinear:
1748 ok &= sample_bilinear(nsamples, sval, tval, lev, texturefile,
1749 thread_info, options, nchannels_result,
1750 actualchannels, lineweight, &r,
1751 dresultds ? &drds : NULL,
1752 dresultds ? &drdt : NULL);
1753 ++bilinearprobes;
1754 break;
1755 case TextureOpt::InterpBicubic:
1756 ok &= sample_bicubic(nsamples, sval, tval, lev, texturefile,
1757 thread_info, options, nchannels_result,
1758 actualchannels, lineweight, &r,
1759 dresultds ? &drds : NULL,
1760 dresultds ? &drdt : NULL);
1761 ++bicubicprobes;
1762 break;
1763 case TextureOpt::InterpSmartBicubic:
1764 if (lev == 0
1765 || (texturefile.spec(options.subimage, lev).width
1766 < naturalsres / 2)
1767 || (texturefile.spec(options.subimage, lev).height
1768 < naturaltres / 2)) {
1769 ok &= sample_bicubic(nsamples, sval, tval, lev, texturefile,
1770 thread_info, options, nchannels_result,
1771 actualchannels, lineweight, &r,
1772 dresultds ? &drds : NULL,
1773 dresultds ? &drdt : NULL);
1774 ++bicubicprobes;
1775 } else {
1776 ok &= sample_bilinear(nsamples, sval, tval, lev, texturefile,
1777 thread_info, options, nchannels_result,
1778 actualchannels, lineweight, &r,
1779 dresultds ? &drds : NULL,
1780 dresultds ? &drdt : NULL);
1781 ++bilinearprobes;
1782 }
1783 break;
1784 }
1785
1786 vfloat4 lw = levelweight[level];
1787 r_sum += lw * r;
1788 if (dresultds) {
1789 drds_sum += lw * drds;
1790 drdt_sum += lw * drdt;
1791 }
1792 }
1793
1794 *(simd::vfloat4*)(result) = r_sum;
1795 if (dresultds) {
1796 *(simd::vfloat4*)(dresultds) = drds_sum;
1797 *(simd::vfloat4*)(dresultdt) = drdt_sum;
1798 }
1799
1800 // Update stats
1801 ImageCacheStatistics& stats(thread_info->m_stats);
1802 stats.aniso_queries += npointson;
1803 stats.aniso_probes += npointson * nsamples;
1804 if (trueaspect > stats.max_aniso)
1805 stats.max_aniso = trueaspect; // FIXME?
1806 stats.closest_interps += closestprobes * nsamples;
1807 stats.bilinear_interps += bilinearprobes * nsamples;
1808 stats.cubic_interps += bicubicprobes * nsamples;
1809
1810 return ok;
1811 }
1812
1813
1814
1815 const float*
pole_color(TextureFile & texturefile,PerThreadInfo *,const ImageCacheFile::LevelInfo & levelinfo,TileRef & tile,int subimage,int,int pole)1816 TextureSystemImpl::pole_color(TextureFile& texturefile,
1817 PerThreadInfo* /*thread_info*/,
1818 const ImageCacheFile::LevelInfo& levelinfo,
1819 TileRef& tile, int subimage, int /*miplevel*/,
1820 int pole)
1821 {
1822 if (!levelinfo.onetile)
1823 return NULL; // Only compute color for one-tile MIP levels
1824 const ImageSpec& spec(levelinfo.spec);
1825 if (!levelinfo.polecolorcomputed) {
1826 static spin_mutex mutex; // Protect everybody's polecolor
1827 spin_lock lock(mutex);
1828 if (!levelinfo.polecolorcomputed) {
1829 OIIO_DASSERT(levelinfo.polecolor.size() == 0);
1830 levelinfo.polecolor.resize(2 * spec.nchannels);
1831 OIIO_DASSERT(tile->id().nchannels() == spec.nchannels
1832 && "pole_color doesn't work for channel subsets");
1833 int pixelsize = tile->pixelsize();
1834 TypeDesc::BASETYPE pixeltype = texturefile.pixeltype(subimage);
1835 // We store north and south poles adjacently in polecolor
1836 float* p = &(levelinfo.polecolor[0]);
1837 int width = spec.width;
1838 float scale = 1.0f / width;
1839 for (int pole = 0; pole <= 1; ++pole, p += spec.nchannels) {
1840 int y = pole * (spec.height - 1); // 0 or height-1
1841 for (int c = 0; c < spec.nchannels; ++c)
1842 p[c] = 0.0f;
1843 const unsigned char* texel = tile->bytedata()
1844 + y * spec.tile_width * pixelsize;
1845 for (int i = 0; i < width; ++i, texel += pixelsize)
1846 for (int c = 0; c < spec.nchannels; ++c) {
1847 if (pixeltype == TypeDesc::UINT8)
1848 p[c] += uchar2float(texel[c]);
1849 else if (pixeltype == TypeDesc::UINT16)
1850 p[c] += convert_type<uint16_t, float>(
1851 ((const uint16_t*)texel)[c]);
1852 else if (pixeltype == TypeDesc::HALF)
1853 p[c] += ((const half*)texel)[c];
1854 else {
1855 OIIO_DASSERT(pixeltype == TypeDesc::FLOAT);
1856 p[c] += ((const float*)texel)[c];
1857 }
1858 }
1859 for (int c = 0; c < spec.nchannels; ++c)
1860 p[c] *= scale;
1861 }
1862 levelinfo.polecolorcomputed = true;
1863 }
1864 }
1865 return &(levelinfo.polecolor[pole * spec.nchannels]);
1866 }
1867
1868
1869
1870 void
fade_to_pole(float t,float * accum,float & weight,TextureFile & texturefile,PerThreadInfo * thread_info,const ImageCacheFile::LevelInfo & levelinfo,TextureOpt & options,int miplevel,int nchannels)1871 TextureSystemImpl::fade_to_pole(float t, float* accum, float& weight,
1872 TextureFile& texturefile,
1873 PerThreadInfo* thread_info,
1874 const ImageCacheFile::LevelInfo& levelinfo,
1875 TextureOpt& options, int miplevel,
1876 int nchannels)
1877 {
1878 // N.B. We want to fade to pole colors right at texture
1879 // boundaries t==0 and t==height, but at the very top of this
1880 // function, we subtracted another 0.5 from t, so we need to
1881 // undo that here.
1882 float pole;
1883 const float* polecolor;
1884 if (t < 1.0f) {
1885 pole = (1.0f - t);
1886 polecolor = pole_color(texturefile, thread_info, levelinfo,
1887 thread_info->tile, options.subimage, miplevel,
1888 0);
1889 } else {
1890 pole = t - floorf(t);
1891 polecolor = pole_color(texturefile, thread_info, levelinfo,
1892 thread_info->tile, options.subimage, miplevel,
1893 1);
1894 }
1895 pole = Imath::clamp(pole, 0.0f, 1.0f);
1896 pole *= pole; // squaring makes more pleasing appearance
1897 polecolor += options.firstchannel;
1898 for (int c = 0; c < nchannels; ++c)
1899 accum[c] += weight * pole * polecolor[c];
1900 weight *= 1.0f - pole;
1901 }
1902
1903
1904
1905 bool
sample_closest(int nsamples,const float * s_,const float * t_,int miplevel,TextureFile & texturefile,PerThreadInfo * thread_info,TextureOpt & options,int nchannels_result,int actualchannels,const float * weight_,vfloat4 * accum_,vfloat4 * daccumds_,vfloat4 * daccumdt_)1906 TextureSystemImpl::sample_closest(
1907 int nsamples, const float* s_, const float* t_, int miplevel,
1908 TextureFile& texturefile, PerThreadInfo* thread_info, TextureOpt& options,
1909 int nchannels_result, int actualchannels, const float* weight_,
1910 vfloat4* accum_, vfloat4* daccumds_, vfloat4* daccumdt_)
1911 {
1912 bool allok = true;
1913 const ImageSpec& spec(texturefile.spec(options.subimage, miplevel));
1914 const ImageCacheFile::LevelInfo& levelinfo(
1915 texturefile.levelinfo(options.subimage, miplevel));
1916 TypeDesc::BASETYPE pixeltype = texturefile.pixeltype(options.subimage);
1917 wrap_impl swrap_func = wrap_functions[(int)options.swrap];
1918 wrap_impl twrap_func = wrap_functions[(int)options.twrap];
1919 vfloat4 accum;
1920 accum.clear();
1921 float nonfill = 0.0f;
1922 int firstchannel = options.firstchannel;
1923 int tile_chbegin = 0, tile_chend = spec.nchannels;
1924 if (spec.nchannels > m_max_tile_channels) {
1925 // For files with many channels, narrow the range we cache
1926 tile_chbegin = options.firstchannel;
1927 tile_chend = options.firstchannel + actualchannels;
1928 }
1929 TileID id(texturefile, options.subimage, miplevel, 0, 0, 0, tile_chbegin,
1930 tile_chend);
1931 for (int sample = 0; sample < nsamples; ++sample) {
1932 float s = s_[sample], t = t_[sample];
1933 float weight = weight_[sample];
1934
1935 int stex, ttex; // Texel coordinates
1936 float sfrac, tfrac;
1937 st_to_texel(s, t, texturefile, spec, stex, ttex, sfrac, tfrac);
1938
1939 if (sfrac > 0.5f)
1940 ++stex;
1941 if (tfrac > 0.5f)
1942 ++ttex;
1943
1944 // Wrap
1945 bool svalid, tvalid; // Valid texels? false means black border
1946 svalid = swrap_func(stex, spec.x, spec.width);
1947 tvalid = twrap_func(ttex, spec.y, spec.height);
1948 if (!levelinfo.full_pixel_range) {
1949 svalid &= (stex >= spec.x
1950 && stex < (spec.x + spec.width)); // data window
1951 tvalid &= (ttex >= spec.y && ttex < (spec.y + spec.height));
1952 }
1953 if (!(svalid & tvalid)) {
1954 // All texels we need were out of range and using 'black' wrap.
1955 nonfill += weight;
1956 continue;
1957 }
1958
1959 int tile_s = (stex - spec.x) % spec.tile_width;
1960 int tile_t = (ttex - spec.y) % spec.tile_height;
1961 id.xy(stex - tile_s, ttex - tile_t);
1962 bool ok = find_tile(id, thread_info, sample == 0);
1963 if (!ok)
1964 error("{}", m_imagecache->geterror());
1965 TileRef& tile(thread_info->tile);
1966 if (!tile || !ok) {
1967 allok = false;
1968 continue;
1969 }
1970 int offset = id.nchannels() * (tile_t * spec.tile_width + tile_s)
1971 + (firstchannel - id.chbegin());
1972 OIIO_DASSERT((size_t)offset < spec.nchannels * spec.tile_pixels());
1973 simd::vfloat4 texel_simd;
1974 if (pixeltype == TypeDesc::UINT8) {
1975 // special case for 8-bit tiles
1976 texel_simd = uchar2float4(tile->bytedata() + offset);
1977 } else if (pixeltype == TypeDesc::UINT16) {
1978 texel_simd = ushort2float4(tile->ushortdata() + offset);
1979 } else if (pixeltype == TypeDesc::HALF) {
1980 texel_simd = half2float4(tile->halfdata() + offset);
1981 } else {
1982 OIIO_DASSERT(pixeltype == TypeDesc::FLOAT);
1983 texel_simd.load(tile->floatdata() + offset);
1984 }
1985
1986 accum += weight * texel_simd;
1987 }
1988 simd::vbool4 channel_mask = channel_masks[actualchannels];
1989 accum = blend0(accum, channel_mask);
1990 if (nonfill < 1.0f && nchannels_result > actualchannels && options.fill) {
1991 // Add the weighted fill color
1992 accum += blend0not(vfloat4((1.0f - nonfill) * options.fill),
1993 channel_mask);
1994 }
1995
1996 *accum_ = accum;
1997 if (daccumds_) {
1998 daccumds_->clear(); // constant interp has 0 derivatives
1999 daccumdt_->clear();
2000 }
2001 return allok;
2002 }
2003
2004
2005
2006 /// Convert texture coordinates (s,t), which range on 0-1 for the "full"
2007 /// image boundary, to texel coordinates (i+ifrac,j+jfrac) where (i,j) is
2008 /// the texel to the immediate upper left of the sample position, and ifrac
2009 /// and jfrac are the fractional (0-1) portion of the way to the next texel
2010 /// to the right or down, respectively. Do this for 4 s,t values at a time.
2011 inline void
st_to_texel_simd(const vfloat4 & s_,const vfloat4 & t_,TextureSystemImpl::TextureFile & texturefile,const ImageSpec & spec,vint4 & i,vint4 & j,vfloat4 & ifrac,vfloat4 & jfrac)2012 st_to_texel_simd(const vfloat4& s_, const vfloat4& t_,
2013 TextureSystemImpl::TextureFile& texturefile,
2014 const ImageSpec& spec, vint4& i, vint4& j, vfloat4& ifrac,
2015 vfloat4& jfrac)
2016 {
2017 vfloat4 s, t;
2018 // As passed in, (s,t) map the texture to (0,1). Remap to texel coords.
2019 // Note that we have two modes, depending on the m_sample_border.
2020 if (texturefile.sample_border() == 0) {
2021 // texel samples are at 0.5/res, 1.5/res, ..., (res-0.5)/res,
2022 s = s_ * float(spec.width) + (spec.x - 0.5f);
2023 t = t_ * float(spec.height) + (spec.y - 0.5f);
2024 } else {
2025 // first and last rows/columns are *exactly* on the boundary,
2026 // so samples are at 0, 1/(res-1), ..., 1.
2027 s = s_ * float(spec.width - 1) + float(spec.x);
2028 t = t_ * float(spec.height - 1) + float(spec.y);
2029 }
2030 ifrac = floorfrac(s, &i);
2031 jfrac = floorfrac(t, &j);
2032 // Now (i,j) are the integer coordinates of the texel to the
2033 // immediate "upper left" of the lookup point, and (ifrac,jfrac) are
2034 // the amount that the lookup point is actually offset from the
2035 // texel center (with (1,1) being all the way to the next texel down
2036 // and to the right).
2037 }
2038
2039
2040
2041 bool
sample_bilinear(int nsamples,const float * s_,const float * t_,int miplevel,TextureFile & texturefile,PerThreadInfo * thread_info,TextureOpt & options,int nchannels_result,int actualchannels,const float * weight_,vfloat4 * accum_,vfloat4 * daccumds_,vfloat4 * daccumdt_)2042 TextureSystemImpl::sample_bilinear(
2043 int nsamples, const float* s_, const float* t_, int miplevel,
2044 TextureFile& texturefile, PerThreadInfo* thread_info, TextureOpt& options,
2045 int nchannels_result, int actualchannels, const float* weight_,
2046 vfloat4* accum_, vfloat4* daccumds_, vfloat4* daccumdt_)
2047 {
2048 const ImageSpec& spec(texturefile.spec(options.subimage, miplevel));
2049 const ImageCacheFile::LevelInfo& levelinfo(
2050 texturefile.levelinfo(options.subimage, miplevel));
2051 TypeDesc::BASETYPE pixeltype = texturefile.pixeltype(options.subimage);
2052 wrap_impl swrap_func = wrap_functions[(int)options.swrap];
2053 wrap_impl twrap_func = wrap_functions[(int)options.twrap];
2054 wrap_impl_simd wrap_func = (swrap_func == twrap_func)
2055 ? wrap_functions_simd[(int)options.swrap]
2056 : NULL;
2057 simd::vint4 xy(spec.x, spec.y);
2058 simd::vint4 widthheight(spec.width, spec.height);
2059 simd::vint4 tilewh(spec.tile_width, spec.tile_height);
2060 simd::vint4 tilewhmask = tilewh - 1;
2061 bool use_fill = (nchannels_result > actualchannels && options.fill);
2062 bool tilepow2 = ispow2(spec.tile_width) && ispow2(spec.tile_height);
2063 size_t channelsize = texturefile.channelsize(options.subimage);
2064 int firstchannel = options.firstchannel;
2065 int tile_chbegin = 0, tile_chend = spec.nchannels;
2066 // need_pole: do we potentially need to fade to special pole color?
2067 // If we do, can't restrict channel range or fade_to_pole won't work.
2068 bool need_pole = (options.envlayout == LayoutLatLong && levelinfo.onetile);
2069 if (spec.nchannels > m_max_tile_channels && !need_pole) {
2070 // For files with many channels, narrow the range we cache
2071 tile_chbegin = options.firstchannel;
2072 tile_chend = options.firstchannel + actualchannels;
2073 }
2074 TileID id(texturefile, options.subimage, miplevel, 0, 0, 0, tile_chbegin,
2075 tile_chend);
2076 float nonfill = 0.0f; // The degree to which we DON'T need fill
2077 // N.B. What's up with "nofill"? We need to consider fill only when we
2078 // are inside the valid texture region. Outside, i.e. in the black wrap
2079 // region, black takes precedence over fill. By keeping track of when
2080 // we don't need to worry about fill, which is the comparatively rare
2081 // case, we do a lot less math and have fewer rounding errors.
2082
2083 vfloat4 accum, daccumds, daccumdt;
2084 accum.clear();
2085 if (daccumds_) {
2086 daccumds.clear();
2087 daccumdt.clear();
2088 }
2089 vfloat4 s_simd, t_simd;
2090 vint4 sint_simd, tint_simd;
2091 vfloat4 sfrac_simd, tfrac_simd;
2092 for (int sample = 0; sample < nsamples; ++sample) {
2093 // To utilize SIMD ops in an inherently scalar loop, every fourth
2094 // step, we compute the st_to_texel for the next four samples.
2095 int sample4 = sample & 3;
2096 if (sample4 == 0) {
2097 s_simd.load(s_ + sample);
2098 t_simd.load(t_ + sample);
2099 st_to_texel_simd(s_simd, t_simd, texturefile, spec, sint_simd,
2100 tint_simd, sfrac_simd, tfrac_simd);
2101 }
2102 int sint = sint_simd[sample4], tint = tint_simd[sample4];
2103 float sfrac = sfrac_simd[sample4], tfrac = tfrac_simd[sample4];
2104 float weight = weight_[sample];
2105
2106 // SIMD-ize the indices. We have four texels, fit them into one SIMD
2107 // 4-vector as S0,S1,T0,T1.
2108 enum { S0 = 0, S1 = 1, T0 = 2, T1 = 3 };
2109
2110 simd::vint4 sttex(sint, sint + 1, tint,
2111 tint + 1); // Texel coords: s0,s1,t0,t1
2112 simd::vbool4 stvalid;
2113 if (wrap_func) {
2114 // Both directions use the same wrap function, call in parallel.
2115 stvalid = wrap_func(sttex, xy, widthheight);
2116 } else {
2117 stvalid.load(swrap_func(sttex[S0], spec.x, spec.width),
2118 swrap_func(sttex[S1], spec.x, spec.width),
2119 twrap_func(sttex[T0], spec.y, spec.height),
2120 twrap_func(sttex[T1], spec.y, spec.height));
2121 }
2122
2123 // Account for crop windows
2124 if (!levelinfo.full_pixel_range) {
2125 stvalid &= (sttex >= xy) & (sttex < (xy + widthheight));
2126 }
2127 if (none(stvalid)) {
2128 nonfill += weight;
2129 continue; // All texels we need were out of range and using 'black' wrap
2130 }
2131
2132 simd::vfloat4 texel_simd[2][2];
2133 simd::vint4 tile_st = (simd::vint4(simd::shuffle<S0, S0, T0, T0>(sttex))
2134 - xy);
2135 if (tilepow2)
2136 tile_st &= tilewhmask;
2137 else
2138 tile_st %= tilewh;
2139 bool s_onetile = (tile_st[S0] != tilewhmask[S0])
2140 & (sttex[S0] + 1 == sttex[S1]);
2141 bool t_onetile = (tile_st[T0] != tilewhmask[T0])
2142 & (sttex[T0] + 1 == sttex[T1]);
2143 bool onetile = (s_onetile & t_onetile);
2144 if (onetile && all(stvalid)) {
2145 // Shortcut if all the texels we need are on the same tile
2146 id.xy(sttex[S0] - tile_st[S0], sttex[T0] - tile_st[T0]);
2147 bool ok = find_tile(id, thread_info, sample == 0);
2148 if (!ok)
2149 error("{}", m_imagecache->geterror());
2150 TileRef& tile(thread_info->tile);
2151 if (!tile->valid())
2152 return false;
2153 int pixelsize = tile->pixelsize();
2154 int offset = pixelsize
2155 * (tile_st[T0] * spec.tile_width + tile_st[S0]);
2156 const unsigned char* p = tile->bytedata() + offset
2157 + channelsize
2158 * (firstchannel - id.chbegin());
2159 if (pixeltype == TypeDesc::UINT8) {
2160 texel_simd[0][0] = uchar2float4(p);
2161 texel_simd[0][1] = uchar2float4(p + pixelsize);
2162 p += pixelsize * spec.tile_width;
2163 texel_simd[1][0] = uchar2float4(p);
2164 texel_simd[1][1] = uchar2float4(p + pixelsize);
2165 } else if (pixeltype == TypeDesc::UINT16) {
2166 texel_simd[0][0] = ushort2float4((uint16_t*)p);
2167 texel_simd[0][1] = ushort2float4((uint16_t*)(p + pixelsize));
2168 p += pixelsize * spec.tile_width;
2169 texel_simd[1][0] = ushort2float4((uint16_t*)p);
2170 texel_simd[1][1] = ushort2float4((uint16_t*)(p + pixelsize));
2171 } else if (pixeltype == TypeDesc::HALF) {
2172 texel_simd[0][0] = half2float4((half*)p);
2173 texel_simd[0][1] = half2float4((half*)(p + pixelsize));
2174 p += pixelsize * spec.tile_width;
2175 texel_simd[1][0] = half2float4((half*)p);
2176 texel_simd[1][1] = half2float4((half*)(p + pixelsize));
2177 } else {
2178 OIIO_DASSERT(pixeltype == TypeDesc::FLOAT);
2179 texel_simd[0][0].load((const float*)p);
2180 texel_simd[0][1].load((const float*)(p + pixelsize));
2181 p += pixelsize * spec.tile_width;
2182 texel_simd[1][0].load((const float*)p);
2183 texel_simd[1][1].load((const float*)(p + pixelsize));
2184 }
2185 } else {
2186 bool noreusetile = (options.swrap == TextureOpt::WrapMirror);
2187 simd::vint4 tile_st = (sttex - xy) % tilewh;
2188 simd::vint4 tile_edge = sttex - tile_st;
2189 for (int j = 0; j < 2; ++j) {
2190 if (!stvalid[T0 + j]) {
2191 texel_simd[j][0].clear();
2192 texel_simd[j][1].clear();
2193 continue;
2194 }
2195 int tile_t = tile_st[T0 + j];
2196 for (int i = 0; i < 2; ++i) {
2197 if (!stvalid[S0 + i]) {
2198 texel_simd[j][i].clear();
2199 continue;
2200 }
2201 int tile_s = tile_st[S0 + i];
2202 // Trick: we only need to find a tile if i == 0 or if we
2203 // just crossed a tile bouncary (if tile_s == 0).
2204 // Otherwise, we are still on the same tile as the last
2205 // iteration, as long as we aren't using mirror wrap mode!
2206 if (i == 0 || tile_s == 0 || noreusetile) {
2207 id.xy(tile_edge[S0 + i], tile_edge[T0 + j]);
2208 bool ok = find_tile(id, thread_info, sample == 0);
2209 if (!ok)
2210 error("{}", m_imagecache->geterror());
2211 if (!thread_info->tile->valid()) {
2212 return false;
2213 }
2214 OIIO_DASSERT(thread_info->tile->id() == id);
2215 }
2216 TileRef& tile(thread_info->tile);
2217 int pixelsize = tile->pixelsize();
2218 int offset = pixelsize
2219 * (tile_t * spec.tile_width + tile_s);
2220 offset += (firstchannel - id.chbegin()) * channelsize;
2221 OIIO_DASSERT(offset < spec.tile_width * spec.tile_height
2222 * spec.tile_depth * pixelsize);
2223 if (pixeltype == TypeDesc::UINT8)
2224 texel_simd[j][i] = uchar2float4(
2225 (const unsigned char*)(tile->bytedata() + offset));
2226 else if (pixeltype == TypeDesc::UINT16)
2227 texel_simd[j][i] = ushort2float4(
2228 (const unsigned short*)(tile->bytedata() + offset));
2229 else if (pixeltype == TypeDesc::HALF)
2230 texel_simd[j][i] = half2float4(
2231 (const half*)(tile->bytedata() + offset));
2232 else {
2233 OIIO_DASSERT(pixeltype == TypeDesc::FLOAT);
2234 texel_simd[j][i].load(
2235 (const float*)(tile->bytedata() + offset));
2236 }
2237 }
2238 }
2239 }
2240
2241 // When we're on the lowest res mipmap levels, it's more pleasing if
2242 // we converge to a single pole color right at the pole. Fade to
2243 // the average color over the texel height right next to the pole.
2244 if (need_pole) {
2245 float height = spec.height;
2246 if (texturefile.m_sample_border)
2247 height -= 1.0f;
2248 float tt = t_[sample] * height;
2249 if (tt < 1.0f || tt > (height - 1.0f))
2250 fade_to_pole(tt, (float*)&accum, weight, texturefile,
2251 thread_info, levelinfo, options, miplevel,
2252 actualchannels);
2253 }
2254
2255 simd::vfloat4 weight_simd = weight;
2256 accum += weight_simd
2257 * bilerp(texel_simd[0][0], texel_simd[0][1], texel_simd[1][0],
2258 texel_simd[1][1], sfrac, tfrac);
2259 if (daccumds_) {
2260 simd::vfloat4 scalex = weight_simd * float(spec.width);
2261 simd::vfloat4 scaley = weight_simd * float(spec.height);
2262 daccumds += scalex
2263 * lerp(texel_simd[0][1] - texel_simd[0][0],
2264 texel_simd[1][1] - texel_simd[1][0], tfrac);
2265 daccumdt += scaley
2266 * lerp(texel_simd[1][0] - texel_simd[0][0],
2267 texel_simd[1][1] - texel_simd[0][1], sfrac);
2268 }
2269 if (use_fill && !all(stvalid)) {
2270 // Compute appropriate amount of "fill" color to extra channels in
2271 // non-"black"-wrapped regions.
2272 float f = bilerp(float(stvalid[S0] * stvalid[T0]),
2273 float(stvalid[S1] * stvalid[T0]),
2274 float(stvalid[S0] * stvalid[T1]),
2275 float(stvalid[S1] * stvalid[T1]), sfrac, tfrac);
2276 nonfill += (1.0f - f) * weight;
2277 }
2278 }
2279
2280 simd::vbool4 channel_mask = channel_masks[actualchannels];
2281 accum = blend0(accum, channel_mask);
2282 if (use_fill) {
2283 // Add the weighted fill color
2284 accum += blend0not(vfloat4((1.0f - nonfill) * options.fill),
2285 channel_mask);
2286 }
2287
2288 *accum_ = accum;
2289 if (daccumds_) {
2290 *daccumds_ = blend0(daccumds, channel_mask);
2291 *daccumdt_ = blend0(daccumdt, channel_mask);
2292 }
2293 return true;
2294 }
2295
2296
2297 namespace {
2298
2299 // Evaluate Bspline weights for both value and derivatives (if dw is not
2300 // NULL) into w[0..3] and dw[0..3]. This is the canonical version for
2301 // reference, but we don't actually call it, instead favoring the much
2302 // harder to read SIMD versions below.
2303 template<typename T>
evalBSplineWeights_and_derivs(T * w,T fraction,T * dw=NULL)2304 inline void evalBSplineWeights_and_derivs(T* w, T fraction, T* dw = NULL)
2305 {
2306 T one_frac = 1.0 - fraction;
2307 w[0] = T(1.0 / 6.0) * one_frac * one_frac * one_frac;
2308 w[1] = T(2.0 / 3.0)
2309 - T(0.5) * fraction * fraction * (T(2.0) - fraction);
2310 w[2] = T(2.0 / 3.0)
2311 - T(0.5) * one_frac * one_frac * (T(2.0) - one_frac);
2312 w[3] = T(1.0 / 6.0) * fraction * fraction * fraction;
2313 if (dw) {
2314 dw[0] = T(-0.5) * one_frac * one_frac;
2315 dw[1] = T(0.5) * fraction * (T(3.0) * fraction - T(4.0));
2316 dw[2] = T(-0.5) * one_frac * (T(3.0) * one_frac - T(4.0));
2317 dw[3] = T(0.5) * fraction * fraction;
2318 }
2319 }
2320
2321 // Evaluate the 4 Bspline weights (no derivs), returing them as a vfloat4.
2322 // The fraction also comes in as a vfloat4 (assuming the same value in all 4
2323 // slots).
evalBSplineWeights(const vfloat4 & fraction)2324 inline vfloat4 evalBSplineWeights(const vfloat4& fraction)
2325 {
2326 #if 0
2327 // Version that's easy to read and understand:
2328 float one_frac = 1.0f - fraction;
2329 vfloat4 w;
2330 w[0] = 0.0f + (1.0f / 6.0f) * one_frac * one_frac * one_frac;
2331 w[1] = (2.0f / 3.0f) + (-0.5f) * fraction * fraction * (2.0f - fraction);
2332 w[2] = (2.0f / 3.0f) + (-0.5f) * one_frac * one_frac * (2.0f - one_frac);
2333 w[3] = 0.0f + (1.0f / 6.0f) * fraction * fraction * fraction;
2334 return w;
2335 #else
2336 // Not as clear, but fastest version I've been able to achieve:
2337 OIIO_SIMD_FLOAT4_CONST4(A, 0.0f, 2.0f / 3.0f, 2.0f / 3.0f, 0.0f);
2338 OIIO_SIMD_FLOAT4_CONST4(B, 1.0f / 6.0f, -0.5f, -0.5f, 1.0f / 6.0f);
2339 OIIO_SIMD_FLOAT4_CONST4(om1m1o, 1.0f, -1.0f, -1.0f, 1.0f);
2340 OIIO_SIMD_FLOAT4_CONST4(z22z, 0.0f, 2.0f, 2.0f, 0.0f);
2341 simd::vfloat4 one_frac = vfloat4::One() - fraction;
2342 simd::vfloat4 ofof = AxBxAyBy(one_frac,
2343 fraction); // 1-frac, frac, 1-frac, frac
2344 simd::vfloat4 C = (*(vfloat4*)&om1m1o) * ofof + (*(vfloat4*)&z22z);
2345 return (*(vfloat4*)&A) + (*(vfloat4*)&B) * ofof * ofof * C;
2346 #endif
2347 }
2348
2349 // Evaluate Bspline weights for both value and derivatives (if dw is not
2350 // NULL), returning the 4 coefficients for each as vfloat4's.
evalBSplineWeights_and_derivs(simd::vfloat4 * w,float fraction,simd::vfloat4 * dw=NULL)2351 inline void evalBSplineWeights_and_derivs(simd::vfloat4* w, float fraction,
2352 simd::vfloat4* dw = NULL)
2353 {
2354 #if 0
2355 // Version that's easy to read and understand:
2356 float one_frac = 1.0f - fraction;
2357 (*w)[0] = 0.0f + (1.0f / 6.0f) * one_frac * one_frac * one_frac;
2358 (*w)[1] = (2.0f / 3.0f) + (-0.5f) * fraction * fraction * (2.0f - fraction);
2359 (*w)[2] = (2.0f / 3.0f) + (-0.5f) * one_frac * one_frac * (2.0f - one_frac);
2360 (*w)[3] = 0.0f + (1.0f / 6.0f) * fraction * fraction * fraction;
2361 if (dw) {
2362 (*dw)[0] = -0.5f * one_frac * (1.0f * one_frac - 0.0f);
2363 (*dw)[1] = 0.5f * fraction * (3.0f * fraction - 4.0f);
2364 (*dw)[2] = -0.5f * one_frac * (3.0f * one_frac - 4.0f);
2365 (*dw)[3] = 0.5f * fraction * (1.0f * fraction - 0.0f);
2366 }
2367 #else
2368 // Not as clear, but fastest version I've been able to achieve:
2369 OIIO_SIMD_FLOAT4_CONST4(A, 0.0f, 2.0f / 3.0f, 2.0f / 3.0f, 0.0f);
2370 OIIO_SIMD_FLOAT4_CONST4(B, 1.0f / 6.0f, -0.5f, -0.5f, 1.0f / 6.0f);
2371 float one_frac = 1.0f - fraction;
2372 simd::vfloat4 ofof(one_frac, fraction, one_frac, fraction);
2373 simd::vfloat4 C(one_frac, 2.0f - fraction, 2.0f - one_frac, fraction);
2374 *w = (*(vfloat4*)&A) + (*(vfloat4*)&B) * ofof * ofof * C;
2375 if (dw) {
2376 const simd::vfloat4 D(-0.5f, 0.5f, -0.5f, 0.5f);
2377 const simd::vfloat4 E(1.0f, 3.0f, 3.0f, 1.0f);
2378 const simd::vfloat4 F(0.0f, 4.0f, 4.0f, 0.0f);
2379 *dw = D * ofof * (E * ofof - F);
2380 }
2381 #endif
2382 }
2383
2384 } // anonymous namespace
2385
2386
2387
2388 bool
sample_bicubic(int nsamples,const float * s_,const float * t_,int miplevel,TextureFile & texturefile,PerThreadInfo * thread_info,TextureOpt & options,int nchannels_result,int actualchannels,const float * weight_,vfloat4 * accum_,vfloat4 * daccumds_,vfloat4 * daccumdt_)2389 TextureSystemImpl::sample_bicubic(
2390 int nsamples, const float* s_, const float* t_, int miplevel,
2391 TextureFile& texturefile, PerThreadInfo* thread_info, TextureOpt& options,
2392 int nchannels_result, int actualchannels, const float* weight_,
2393 vfloat4* accum_, vfloat4* daccumds_, vfloat4* daccumdt_)
2394 {
2395 const ImageSpec& spec(texturefile.spec(options.subimage, miplevel));
2396 const ImageCacheFile::LevelInfo& levelinfo(
2397 texturefile.levelinfo(options.subimage, miplevel));
2398 TypeDesc::BASETYPE pixeltype = texturefile.pixeltype(options.subimage);
2399 wrap_impl_simd swrap_func_simd = wrap_functions_simd[(int)options.swrap];
2400 wrap_impl_simd twrap_func_simd = wrap_functions_simd[(int)options.twrap];
2401
2402 vint4 spec_x_simd(spec.x);
2403 vint4 spec_y_simd(spec.y);
2404 vint4 spec_width_simd(spec.width);
2405 vint4 spec_height_simd(spec.height);
2406 vint4 spec_x_plus_width_simd = spec_x_simd + spec_width_simd;
2407 vint4 spec_y_plus_height_simd = spec_y_simd + spec_height_simd;
2408 bool use_fill = (nchannels_result > actualchannels && options.fill);
2409 bool tilepow2 = ispow2(spec.tile_width) && ispow2(spec.tile_height);
2410 int tilewidthmask = spec.tile_width - 1; // e.g. 63
2411 int tileheightmask = spec.tile_height - 1;
2412 size_t channelsize = texturefile.channelsize(options.subimage);
2413 int firstchannel = options.firstchannel;
2414 float nonfill = 0.0f; // The degree to which we DON'T need fill
2415 // N.B. What's up with "nofill"? We need to consider fill only when we
2416 // are inside the valid texture region. Outside, i.e. in the black wrap
2417 // region, black takes precedence over fill. By keeping track of when
2418 // we don't need to worry about fill, which is the comparatively rare
2419 // case, we do a lot less math and have fewer rounding errors.
2420
2421 // need_pole: do we potentially need to fade to special pole color?
2422 // If we do, can't restrict channel range or fade_to_pole won't work.
2423 bool need_pole = (options.envlayout == LayoutLatLong && levelinfo.onetile);
2424 int tile_chbegin = 0, tile_chend = spec.nchannels;
2425 if (spec.nchannels > m_max_tile_channels) {
2426 // For files with many channels, narrow the range we cache
2427 tile_chbegin = options.firstchannel;
2428 tile_chend = options.firstchannel + actualchannels;
2429 }
2430 TileID id(texturefile, options.subimage, miplevel, 0, 0, 0, tile_chbegin,
2431 tile_chend);
2432 size_t pixelsize = channelsize * id.nchannels();
2433 size_t firstchannel_offset_bytes = channelsize
2434 * (firstchannel - id.chbegin());
2435 vfloat4 accum, daccumds, daccumdt;
2436 accum.clear();
2437 if (daccumds_) {
2438 daccumds.clear();
2439 daccumdt.clear();
2440 }
2441
2442 vfloat4 s_simd, t_simd;
2443 vint4 sint_simd, tint_simd;
2444 vfloat4 sfrac_simd, tfrac_simd;
2445 for (int sample = 0; sample < nsamples; ++sample) {
2446 // To utilize SIMD ops in an inherently scalar loop, every fourth
2447 // step, we compute the st_to_texel for the next four samples.
2448 int sample4 = sample & 3;
2449 if (sample4 == 0) {
2450 s_simd.load(s_ + sample);
2451 t_simd.load(t_ + sample);
2452 st_to_texel_simd(s_simd, t_simd, texturefile, spec, sint_simd,
2453 tint_simd, sfrac_simd, tfrac_simd);
2454 }
2455 int sint = sint_simd[sample4], tint = tint_simd[sample4];
2456 float sfrac = sfrac_simd[sample4], tfrac = tfrac_simd[sample4];
2457 float weight = weight_[sample];
2458
2459 // We're gathering 4x4 samples and 4x weights. Indices: texels 0,
2460 // 1, 2, 3. The sample lies between samples 1 and 2.
2461
2462 static const OIIO_SIMD4_ALIGN int iota[4] = { 0, 1, 2, 3 };
2463 static const OIIO_SIMD4_ALIGN int iota_1[4] = { -1, 0, 1, 2 };
2464 simd::vint4 stex, ttex; // Texel coords for each row and column
2465 stex = sint + (*(vint4*)iota_1);
2466 ttex = tint + (*(vint4*)iota_1);
2467 simd::vbool4 svalid = swrap_func_simd(stex, spec_x_simd,
2468 spec_width_simd);
2469 simd::vbool4 tvalid = twrap_func_simd(ttex, spec_y_simd,
2470 spec_height_simd);
2471 bool allvalid = reduce_and(svalid & tvalid);
2472 bool anyvalid = reduce_or(svalid | tvalid);
2473 if (!levelinfo.full_pixel_range && anyvalid) {
2474 // Handle case of crop windows or overscan
2475 svalid &= (stex >= spec_x_simd) & (stex < spec_x_plus_width_simd);
2476 tvalid &= (ttex >= spec_y_simd) & (ttex < spec_y_plus_height_simd);
2477 allvalid = reduce_and(svalid & tvalid);
2478 anyvalid = reduce_or(svalid | tvalid);
2479 }
2480 if (!anyvalid) {
2481 // All texels we need were out of range and using 'black' wrap.
2482 nonfill += weight;
2483 continue;
2484 }
2485
2486 simd::vfloat4 texel_simd[4][4];
2487 // int tile_s = (stex[0] - spec.x) % spec.tile_width;
2488 // int tile_t = (ttex[0] - spec.y) % spec.tile_height;
2489 int tile_s = (stex[0] - spec.x);
2490 int tile_t = (ttex[0] - spec.y);
2491 if (tilepow2) {
2492 tile_s &= tilewidthmask;
2493 tile_t &= tileheightmask;
2494 } else {
2495 tile_s %= spec.tile_width;
2496 tile_t %= spec.tile_height;
2497 }
2498 bool s_onetile = (tile_s <= tilewidthmask - 3);
2499 bool t_onetile = (tile_t <= tileheightmask - 3);
2500 if (s_onetile & t_onetile) {
2501 // If we thought it was one tile, realize that it isn't unless
2502 // it's ascending.
2503 s_onetile &= all(stex
2504 == (simd::shuffle<0>(stex) + (*(vint4*)iota)));
2505 t_onetile &= all(ttex
2506 == (simd::shuffle<0>(ttex) + (*(vint4*)iota)));
2507 }
2508 bool onetile = (s_onetile & t_onetile);
2509 if (onetile & allvalid) {
2510 // Shortcut if all the texels we need are on the same tile
2511 id.xy(stex[0] - tile_s, ttex[0] - tile_t);
2512 bool ok = find_tile(id, thread_info, sample == 0);
2513 if (!ok)
2514 error("{}", m_imagecache->geterror());
2515 TileRef& tile(thread_info->tile);
2516 if (!tile) {
2517 return false;
2518 }
2519 // N.B. thread_info->tile will keep holding a ref-counted pointer
2520 // to the tile for the duration that we're using the tile data.
2521 int offset = pixelsize * (tile_t * spec.tile_width + tile_s);
2522 const unsigned char* base = tile->bytedata() + offset
2523 + firstchannel_offset_bytes;
2524 OIIO_DASSERT(tile->data());
2525 if (pixeltype == TypeDesc::UINT8) {
2526 for (int j = 0, j_offset = 0; j < 4;
2527 ++j, j_offset += pixelsize * spec.tile_width)
2528 for (int i = 0, i_offset = j_offset; i < 4;
2529 ++i, i_offset += pixelsize)
2530 texel_simd[j][i] = uchar2float4(base + i_offset);
2531 } else if (pixeltype == TypeDesc::UINT16) {
2532 for (int j = 0, j_offset = 0; j < 4;
2533 ++j, j_offset += pixelsize * spec.tile_width)
2534 for (int i = 0, i_offset = j_offset; i < 4;
2535 ++i, i_offset += pixelsize)
2536 texel_simd[j][i] = ushort2float4(
2537 (const uint16_t*)(base + i_offset));
2538 } else if (pixeltype == TypeDesc::HALF) {
2539 for (int j = 0, j_offset = 0; j < 4;
2540 ++j, j_offset += pixelsize * spec.tile_width)
2541 for (int i = 0, i_offset = j_offset; i < 4;
2542 ++i, i_offset += pixelsize)
2543 texel_simd[j][i] = half2float4(
2544 (const half*)(base + i_offset));
2545 } else {
2546 for (int j = 0, j_offset = 0; j < 4;
2547 ++j, j_offset += pixelsize * spec.tile_width)
2548 for (int i = 0, i_offset = j_offset; i < 4;
2549 ++i, i_offset += pixelsize)
2550 texel_simd[j][i].load((const float*)(base + i_offset));
2551 }
2552 } else {
2553 simd::vint4 tile_s, tile_t; // texel offset WITHIN its tile
2554 simd::vint4 tile_s_edge,
2555 tile_t_edge; // coordinate of the tile edge
2556 tile_s = (stex - spec_x_simd) % spec.tile_width;
2557 tile_t = (ttex - spec_y_simd) % spec.tile_height;
2558 tile_s_edge = stex - tile_s;
2559 tile_t_edge = ttex - tile_t;
2560 simd::vint4 column_offset_bytes = tile_s * pixelsize
2561 + firstchannel_offset_bytes;
2562 for (int j = 0; j < 4; ++j) {
2563 if (!tvalid[j]) {
2564 for (int i = 0; i < 4; ++i)
2565 texel_simd[j][i].clear();
2566 continue;
2567 }
2568 int row_offset_bytes = tile_t[j]
2569 * (spec.tile_width * pixelsize);
2570 for (int i = 0; i < 4; ++i) {
2571 if (!svalid[i]) {
2572 texel_simd[j][i].clear();
2573 continue;
2574 }
2575 // Trick: we only need to find a tile if i == 0 or if we
2576 // just crossed a tile boundary (if tile_s[i] == 0).
2577 // Otherwise, we are still on the same tile as the last
2578 // iteration, as long as we aren't using mirror wrap mode!
2579 if (i == 0 || tile_s[i] == 0
2580 || options.swrap == TextureOpt::WrapMirror) {
2581 id.xy(tile_s_edge[i], tile_t_edge[j]);
2582 bool ok = find_tile(id, thread_info, sample == 0);
2583 if (!ok)
2584 error("{}", m_imagecache->geterror());
2585 DASSERT(thread_info->tile->id() == id);
2586 if (!thread_info->tile->valid())
2587 return false;
2588 }
2589 TileRef& tile(thread_info->tile);
2590 OIIO_DASSERT(tile->data());
2591 int offset = row_offset_bytes + column_offset_bytes[i];
2592 // const unsigned char *pixelptr = tile->bytedata() + offset[i];
2593 if (pixeltype == TypeDesc::UINT8)
2594 texel_simd[j][i] = uchar2float4(tile->bytedata()
2595 + offset);
2596 else if (pixeltype == TypeDesc::UINT16)
2597 texel_simd[j][i] = ushort2float4(
2598 (const uint16_t*)(tile->bytedata() + offset));
2599 else if (pixeltype == TypeDesc::HALF)
2600 texel_simd[j][i] = half2float4(
2601 (const half*)(tile->bytedata() + offset));
2602 else
2603 texel_simd[j][i].load(
2604 (const float*)(tile->bytedata() + offset));
2605 }
2606 }
2607 }
2608
2609 // When we're on the lowest res mipmap levels, it's more pleasing if
2610 // we converge to a single pole color right at the pole. Fade to
2611 // the average color over the texel height right next to the pole.
2612 if (need_pole) {
2613 float height = spec.height;
2614 if (texturefile.m_sample_border)
2615 height -= 1.0f;
2616 float tt = t_[sample] * height;
2617 if (tt < 1.0f || tt > (height - 1.0f))
2618 fade_to_pole(tt, (float*)&accum, weight, texturefile,
2619 thread_info, levelinfo, options, miplevel,
2620 actualchannels);
2621 }
2622
2623 // We use a formulation of cubic B-spline evaluation that reduces to
2624 // lerps. It's tricky to follow, but the references are:
2625 // * Ruijters, Daniel et al, "Efficient GPU-Based Texture
2626 // Interpolation using Uniform B-Splines", Journal of Graphics
2627 // Tools 13(4), pp. 61-69, 2008.
2628 // http://jgt.akpeters.com/papers/RuijtersEtAl08/
2629 // * Sigg, Christian and Markus Hadwiger, "Fast Third-Order Texture
2630 // Filtering", in GPU Gems 2 (Chapter 20), Pharr and Fernando, ed.
2631 // http://http.developer.nvidia.com/GPUGems2/gpugems2_chapter20.html
2632 // We like this formulation because it's slightly faster than any of
2633 // the other B-spline evaluation routines we tried, and also the lerp
2634 // guarantees that the filtered results will be non-negative for
2635 // non-negative texel values (which we had trouble with before due to
2636 // numerical imprecision).
2637 vfloat4 wx, dwx;
2638 vfloat4 wy, dwy;
2639 if (daccumds_) {
2640 evalBSplineWeights_and_derivs(&wx, sfrac, &dwx);
2641 evalBSplineWeights_and_derivs(&wy, tfrac, &dwy);
2642 } else {
2643 wx = evalBSplineWeights(vfloat4(sfrac));
2644 wy = evalBSplineWeights(vfloat4(tfrac));
2645 #if (defined(__i386__) && !defined(__x86_64__)) || defined(__aarch64__)
2646 // Some platforms complain here about these being uninitialized,
2647 // so initialize them. Don't waste the cycles for platforms that
2648 // don't seem to have that error. It's a false positive -- this
2649 // code really is safe without the initialization, but that
2650 // doesn't seem to get figured out on some platforms (even when
2651 // running the same gcc version).
2652 dwx = vfloat4::Zero();
2653 dwy = vfloat4::Zero();
2654 #endif
2655 }
2656
2657 // figure out lerp weights so we can turn the filter into a sequence of lerp's
2658 // Here's the obvious scalar code:
2659 // float g0x = wx[0] + wx[1]; float h0x = (wx[1] / g0x);
2660 // float g1x = wx[2] + wx[3]; float h1x = (wx[3] / g1x);
2661 // float g0y = wy[0] + wy[1]; float h0y = (wy[1] / g0y);
2662 // float g1y = wy[2] + wy[3]; float h1y = (wy[3] / g1y);
2663 // But instead, we convolutedly (but quickly!) compute the four g
2664 // and four h values using SIMD ops:
2665 vfloat4 wx_0213 = simd::shuffle<0, 2, 1, 3>(wx);
2666 vfloat4 wx_1302 = simd::shuffle<1, 3, 0, 2>(wx);
2667 vfloat4 wx_01_23_01_23 = wx_0213
2668 + wx_1302; // wx[0]+wx[1] wx[2]+wx[3] ..
2669 vfloat4 wy_0213 = simd::shuffle<0, 2, 1, 3>(wy);
2670 vfloat4 wy_1302 = simd::shuffle<1, 3, 0, 2>(wy);
2671 vfloat4 wy_01_23_01_23 = wy_0213
2672 + wy_1302; // wy[0]+wy[1] wy[2]+wy[3] ..
2673 vfloat4 g = AxyBxy(
2674 wx_01_23_01_23,
2675 wy_01_23_01_23); // wx[0]+wx[1] wx[2]+wx[3] wy[0]+wy[1] wy[2]+wy[3]
2676 vfloat4 wx13_wy13 = AxyBxy(wx_1302, wy_1302);
2677 vfloat4 h = wx13_wy13 / g; // [ h0x h1x h0y h1y ]
2678
2679 simd::vfloat4 col[4];
2680 for (int j = 0; j < 4; ++j) {
2681 simd::vfloat4 lx = lerp(texel_simd[j][0], texel_simd[j][1],
2682 shuffle<0>(h) /*h0x*/);
2683 simd::vfloat4 rx = lerp(texel_simd[j][2], texel_simd[j][3],
2684 shuffle<1>(h) /*h1x*/);
2685 col[j] = lerp(lx, rx, shuffle<1>(g) /*g1x*/);
2686 }
2687 simd::vfloat4 ly = lerp(col[0], col[1], shuffle<2>(h) /*h0y*/);
2688 simd::vfloat4 ry = lerp(col[2], col[3], shuffle<3>(h) /*h1y*/);
2689 simd::vfloat4 weight_simd = weight;
2690 accum += weight_simd * lerp(ly, ry, shuffle<3>(g) /*g1y*/);
2691 if (daccumds_) {
2692 simd::vfloat4 scalex = weight_simd * float(spec.width);
2693 simd::vfloat4 scaley = weight_simd * float(spec.height);
2694 daccumds += scalex
2695 * (dwx[0]
2696 * (wy[0] * texel_simd[0][0]
2697 + wy[1] * texel_simd[1][0]
2698 + wy[2] * texel_simd[2][0]
2699 + wy[3] * texel_simd[3][0])
2700 + dwx[1]
2701 * (wy[0] * texel_simd[0][1]
2702 + wy[1] * texel_simd[1][1]
2703 + wy[2] * texel_simd[2][1]
2704 + wy[3] * texel_simd[3][1])
2705 + dwx[2]
2706 * (wy[0] * texel_simd[0][2]
2707 + wy[1] * texel_simd[1][2]
2708 + wy[2] * texel_simd[2][2]
2709 + wy[3] * texel_simd[3][2])
2710 + dwx[3]
2711 * (wy[0] * texel_simd[0][3]
2712 + wy[1] * texel_simd[1][3]
2713 + wy[2] * texel_simd[2][3]
2714 + wy[3] * texel_simd[3][3]));
2715 daccumdt += scaley
2716 * (dwy[0]
2717 * (wx[0] * texel_simd[0][0]
2718 + wx[1] * texel_simd[0][1]
2719 + wx[2] * texel_simd[0][2]
2720 + wx[3] * texel_simd[0][3])
2721 + dwy[1]
2722 * (wx[0] * texel_simd[1][0]
2723 + wx[1] * texel_simd[1][1]
2724 + wx[2] * texel_simd[1][2]
2725 + wx[3] * texel_simd[1][3])
2726 + dwy[2]
2727 * (wx[0] * texel_simd[2][0]
2728 + wx[1] * texel_simd[2][1]
2729 + wx[2] * texel_simd[2][2]
2730 + wx[3] * texel_simd[2][3])
2731 + dwy[3]
2732 * (wx[0] * texel_simd[3][0]
2733 + wx[1] * texel_simd[3][1]
2734 + wx[2] * texel_simd[3][2]
2735 + wx[3] * texel_simd[3][3]));
2736 }
2737
2738 // Compute appropriate amount of "fill" color to extra channels in
2739 // non-"black"-wrapped regions.
2740 if (!allvalid && use_fill) {
2741 float col[4];
2742 for (int j = 0; j < 4; ++j) {
2743 float lx = lerp(1.0f * tvalid[j] * svalid[0],
2744 1.0f * tvalid[j] * svalid[1],
2745 extract<0>(h) /*h0x*/);
2746 float rx = lerp(1.0f * tvalid[j] * svalid[2],
2747 1.0f * tvalid[j] * svalid[3],
2748 extract<1>(h) /*h1x*/);
2749 col[j] = lerp(lx, rx, extract<1>(g) /*g1x*/);
2750 }
2751 float ly = lerp(col[0], col[1], extract<2>(h) /*h0y*/);
2752 float ry = lerp(col[2], col[3], extract<3>(h) /*h1y*/);
2753 nonfill += weight * (1.0f - lerp(ly, ry, extract<3>(g) /*g1y*/));
2754 }
2755 }
2756
2757 simd::vbool4 channel_mask = channel_masks[actualchannels];
2758 accum = blend0(accum, channel_mask);
2759 if (use_fill) {
2760 // Add the weighted fill color
2761 accum += blend0not(vfloat4((1.0f - nonfill) * options.fill),
2762 channel_mask);
2763 }
2764
2765 *accum_ = accum;
2766 if (daccumds_) {
2767 *daccumds_ = blend0(daccumds, channel_mask);
2768 *daccumdt_ = blend0(daccumdt, channel_mask);
2769 }
2770 return true;
2771 }
2772
2773
2774
2775 void
visualize_ellipse(const std::string & name,float dsdx,float dtdx,float dsdy,float dtdy,float sblur,float tblur)2776 TextureSystemImpl::visualize_ellipse(const std::string& name, float dsdx,
2777 float dtdx, float dsdy, float dtdy,
2778 float sblur, float tblur)
2779 {
2780 std::cout << name << " derivs dx " << dsdx << ' ' << dtdx << ", dt " << dtdx
2781 << ' ' << dtdy << "\n";
2782 adjust_width(dsdx, dtdx, dsdy, dtdy, 1.0f, 1.0f /*, sblur, tblur */);
2783 float majorlength, minorlength, theta;
2784 float ABCF[4];
2785 ellipse_axes(dsdx, dtdx, dsdy, dtdy, majorlength, minorlength, theta, ABCF);
2786 std::cout << " ellipse major " << majorlength << ", minor " << minorlength
2787 << ", theta " << theta << "\n";
2788 adjust_blur(majorlength, minorlength, theta, sblur, tblur);
2789 std::cout << " post " << sblur << ' ' << tblur << " blur: major "
2790 << majorlength << ", minor " << minorlength << "\n\n";
2791
2792 TextureOpt options;
2793 float trueaspect;
2794 float aspect = TextureSystemImpl::anisotropic_aspect(majorlength,
2795 minorlength, options,
2796 trueaspect);
2797 float* lineweight = OIIO_ALLOCA(float, 2 * options.anisotropic);
2798 float smajor, tmajor, invsamples;
2799 int nsamples = compute_ellipse_sampling(aspect, theta, majorlength,
2800 minorlength, smajor, tmajor,
2801 invsamples, lineweight);
2802
2803 // Make an ImageBuf to hold our visualization image, set it to grey
2804 float scale = 100;
2805 int w = 256, h = 256;
2806 ImageSpec spec(w, h, 3);
2807 ImageBuf ib(spec);
2808 static float dark[3] = { 0.2f, 0.2f, 0.2f };
2809 static float white[3] = { 1, 1, 1 };
2810 static float grey[3] = { 0.5, 0.5, 0.5 };
2811 static float red[3] = { 1, 0, 0 };
2812 static float green[3] = { 0, 1, 0 };
2813 ImageBufAlgo::fill(ib, grey);
2814
2815 // scan all the pixels, darken the ellipse interior (no blur considered)
2816 for (int j = 0; j < h; ++j) {
2817 float y = (j - h / 2) / scale;
2818 for (int i = 0; i < w; ++i) {
2819 float x = (i - w / 2) / scale;
2820 float d2 = ABCF[0] * x * x + ABCF[1] * x * y + ABCF[2] * y * y;
2821 if (d2 < 1.0f)
2822 ib.setpixel(i, h - 1 - j, dark);
2823 }
2824 }
2825
2826 // Draw red and green axes for the dx and dy derivatives, respectively
2827 ImageBufAlgo::render_line(ib, w / 2, h / 2, w / 2 + int(dsdx * scale),
2828 h / 2 - int(dtdx * scale), red);
2829 ImageBufAlgo::render_line(ib, w / 2, h / 2, w / 2 + int(dsdy * scale),
2830 h / 2 - int(dtdy * scale), green);
2831
2832 // Draw yellow and blue axes for the ellipse axes, with blur
2833 ImageBufAlgo::render_line(ib, w / 2, h / 2,
2834 w / 2 + int(scale * majorlength * cosf(theta)),
2835 h / 2 - int(scale * majorlength * sinf(theta)),
2836 { 1.0f, 1.0f, 0.0f });
2837 ImageBufAlgo::render_line(ib, w / 2, h / 2,
2838 w / 2 + int(scale * minorlength * -sinf(theta)),
2839 h / 2 - int(scale * minorlength * cosf(theta)),
2840 { 0.0f, 0.0f, 1.0f });
2841
2842 float bigweight = 0;
2843 for (int i = 0; i < nsamples; ++i)
2844 bigweight = std::max(lineweight[i], bigweight);
2845
2846 // Plop white dots at the sample positions
2847 int rad = int(scale * minorlength);
2848 for (int sample = 0; sample < nsamples; ++sample) {
2849 float pos = 1.0f * (sample + 0.5f) * invsamples - 0.5f;
2850 float x = pos * smajor, y = pos * tmajor;
2851 int xx = w / 2 + int(x * scale), yy = h / 2 - int(y * scale);
2852 int size = int(5 * lineweight[sample] / bigweight);
2853 ImageBufAlgo::render_box(ib, xx - rad, yy - rad, xx + rad, yy + rad,
2854 { 0.65f, 0.65f, 0.65f });
2855 ImageBufAlgo::render_box(ib, xx - size / 2, yy - size / 2,
2856 xx + size / 2, yy + size / 2, white, true);
2857 }
2858
2859 ib.write(name);
2860 }
2861
2862
2863
2864 void
unit_test_texture()2865 TextureSystemImpl::unit_test_texture()
2866 {
2867 // Just blur in s, it is a harder case
2868 float sblur = unit_test_texture_blur, tblur = 0.0f;
2869 float dsdx, dtdx, dsdy, dtdy;
2870
2871 dsdx = 0.4;
2872 dtdx = 0.0;
2873 dsdy = 0.0;
2874 dtdy = 0.2;
2875 visualize_ellipse("0.tif", dsdx, dtdx, dsdy, dtdy, sblur, tblur);
2876
2877 dsdx = 0.2;
2878 dtdx = 0.0;
2879 dsdy = 0.0;
2880 dtdy = 0.4;
2881 visualize_ellipse("1.tif", dsdx, dtdx, dsdy, dtdy, sblur, tblur);
2882
2883 dsdx = 0.2;
2884 dtdx = 0.2;
2885 dsdy = -0.2;
2886 dtdy = 0.2;
2887 visualize_ellipse("2.tif", dsdx, dtdx, dsdy, dtdy, sblur, tblur);
2888
2889 dsdx = 0.35;
2890 dtdx = 0.27;
2891 dsdy = 0.1;
2892 dtdy = 0.35;
2893 visualize_ellipse("3.tif", dsdx, dtdx, dsdy, dtdy, sblur, tblur);
2894
2895 dsdx = 0.35;
2896 dtdx = 0.27;
2897 dsdy = 0.1;
2898 dtdy = -0.35;
2899 visualize_ellipse("4.tif", dsdx, dtdx, dsdy, dtdy, sblur, tblur);
2900
2901 // Major axis starts vertical, but blur make it minor?
2902 dsdx = 0.2;
2903 dtdx = 0.0;
2904 dsdy = 0.0;
2905 dtdy = 0.3;
2906 visualize_ellipse("5.tif", dsdx, dtdx, dsdy, dtdy, 0.5, 0.0);
2907 dsdx = 0.3;
2908 dtdx = 0.0;
2909 dsdy = 0.0;
2910 dtdy = 0.2;
2911 visualize_ellipse("6.tif", dsdx, dtdx, dsdy, dtdy, 0.0, 0.5);
2912
2913 std::mt19937 gen;
2914 std::uniform_real_distribution<float> rnd(0.0f, 1.0f);
2915 for (int i = 0; i < 100; ++i) {
2916 dsdx = 1.5f * (rnd(gen) - 0.5f);
2917 dtdx = 1.5f * (rnd(gen) - 0.5f);
2918 dsdy = 1.5f * (rnd(gen) - 0.5f);
2919 dtdy = 1.5f * (rnd(gen) - 0.5f);
2920 visualize_ellipse(Strutil::sprintf("%04d.tif", 100 + i), dsdx, dtdx,
2921 dsdy, dtdy, sblur, tblur);
2922 }
2923 }
2924
2925
2926
2927 } // end namespace pvt
2928
2929 OIIO_NAMESPACE_END
2930