1 /****************************************************************************** 2 * 3 * Project: High Performance Image Reprojector 4 * Purpose: Implementation of the GDALWarpKernel class. Implements the actual 5 * image warping for a "chunk" of input and output imagery already 6 * loaded into memory. 7 * Author: Frank Warmerdam, warmerdam@pobox.com 8 * 9 ****************************************************************************** 10 * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com> 11 * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com> 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a 14 * copy of this software and associated documentation files (the "Software"), 15 * to deal in the Software without restriction, including without limitation 16 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 17 * and/or sell copies of the Software, and to permit persons to whom the 18 * Software is furnished to do so, subject to the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included 21 * in all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 24 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 26 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 29 * DEALINGS IN THE SOFTWARE. 30 ****************************************************************************/ 31 32 #include "cpl_port.h" 33 #include "gdalwarper.h" 34 35 #include <cfloat> 36 #include <cmath> 37 #include <cstddef> 38 #include <cstdlib> 39 #include <cstring> 40 41 #include <algorithm> 42 #include <limits> 43 #include <new> 44 #include <vector> 45 46 #include "cpl_atomic_ops.h" 47 #include "cpl_conv.h" 48 #include "cpl_error.h" 49 #include "cpl_multiproc.h" 50 #include "cpl_progress.h" 51 #include "cpl_string.h" 52 #include "cpl_vsi.h" 53 #include "cpl_worker_thread_pool.h" 54 #include "gdal.h" 55 #include "gdal_alg.h" 56 #include "gdal_alg_priv.h" 57 #include "gdal_thread_pool.h" 58 #include "gdalwarpkernel_opencl.h" 59 60 // We restrict to 64bit processors because they are guaranteed to have SSE2. 61 // Could possibly be used too on 32bit, but we would need to check at runtime. 62 #if defined(__x86_64) || defined(_M_X64) 63 #include "gdalsse_priv.h" 64 65 #if __SSE4_1__ 66 #include <smmintrin.h> 67 #endif 68 69 #if __SSE3__ 70 #include <pmmintrin.h> 71 #endif 72 73 #endif 74 75 CPL_CVSID("$Id: gdalwarpkernel.cpp 5cf9a59b0286589b642bb9c229bbb0b4966a88c8 2021-08-13 18:39:08 +0200 Even Rouault $") 76 77 constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001; 78 constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f; 79 80 // #define INSTANTIATE_FLOAT64_SSE2_IMPL 81 82 static const int anGWKFilterRadius[] = 83 { 84 0, // Nearest neighbour 85 1, // Bilinear 86 2, // Cubic Convolution (Catmull-Rom) 87 2, // Cubic B-Spline 88 3, // Lanczos windowed sinc 89 0, // Average 90 0, // Mode 91 0, // Reserved GRA_Gauss=7 92 0, // Max 93 0, // Min 94 0, // Med 95 0, // Q1 96 0, // Q3 97 0, // Sum 98 0, // RMS 99 }; 100 101 static double GWKBilinear(double dfX); 102 static double GWKCubic(double dfX); 103 static double GWKBSpline(double dfX); 104 static double GWKLanczosSinc(double dfX); 105 106 static const FilterFuncType apfGWKFilter[] = 107 { 108 nullptr, // Nearest neighbour 109 GWKBilinear, // Bilinear 110 GWKCubic, // Cubic Convolution (Catmull-Rom) 111 GWKBSpline, // Cubic B-Spline 112 GWKLanczosSinc, // Lanczos windowed sinc 113 nullptr, // Average 114 nullptr, // Mode 115 nullptr, // Reserved GRA_Gauss=7 116 nullptr, // Max 117 nullptr, // Min 118 nullptr, // Med 119 nullptr, // Q1 120 nullptr, // Q3 121 nullptr, // Sum 122 nullptr, // RMS 123 }; 124 125 // TODO(schwehr): Can we make these functions have a const * const arg? 126 static double GWKBilinear4Values(double* padfVals); 127 static double GWKCubic4Values(double* padfVals); 128 static double GWKBSpline4Values(double* padfVals); 129 static double GWKLanczosSinc4Values(double* padfVals); 130 131 static const FilterFunc4ValuesType apfGWKFilter4Values[] = 132 { 133 nullptr, // Nearest neighbour 134 GWKBilinear4Values, // Bilinear 135 GWKCubic4Values, // Cubic Convolution (Catmull-Rom) 136 GWKBSpline4Values, // Cubic B-Spline 137 GWKLanczosSinc4Values, // Lanczos windowed sinc 138 nullptr, // Average 139 nullptr, // Mode 140 nullptr, // Reserved GRA_Gauss=7 141 nullptr, // Max 142 nullptr, // Min 143 nullptr, // Med 144 nullptr, // Q1 145 nullptr, // Q3 146 nullptr, // Sum 147 nullptr, // RMS 148 }; 149 150 int GWKGetFilterRadius(GDALResampleAlg eResampleAlg) 151 { 152 static_assert( CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1, "Bad size of anGWKFilterRadius" ); 153 return anGWKFilterRadius[eResampleAlg]; 154 } 155 156 FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg) 157 { 158 static_assert( CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1, "Bad size of apfGWKFilter" ); 159 return apfGWKFilter[eResampleAlg]; 160 } 161 162 FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg) 163 { 164 static_assert( CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1, "Bad size of apfGWKFilter4Values" ); 165 return apfGWKFilter4Values[eResampleAlg]; 166 } 167 168 #ifdef HAVE_OPENCL 169 static CPLErr GWKOpenCLCase( GDALWarpKernel * ); 170 #endif 171 172 static CPLErr GWKGeneralCase( GDALWarpKernel * ); 173 static CPLErr GWKRealCase( GDALWarpKernel *poWK ); 174 static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ); 175 static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ); 176 static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ); 177 static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ); 178 #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL 179 static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK ); 180 #endif 181 static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ); 182 static CPLErr GWKNearestByte( GDALWarpKernel *poWK ); 183 static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ); 184 static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ); 185 static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ); 186 #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL 187 static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK ); 188 #endif 189 static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ); 190 static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ); 191 static CPLErr GWKNearestShort( GDALWarpKernel *poWK ); 192 static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ); 193 static CPLErr GWKNearestFloat( GDALWarpKernel *poWK ); 194 static CPLErr GWKAverageOrMode( GDALWarpKernel * ); 195 static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort( GDALWarpKernel * ); 196 static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort( GDALWarpKernel * ); 197 static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort( GDALWarpKernel * ); 198 199 /************************************************************************/ 200 /* GWKJobStruct */ 201 /************************************************************************/ 202 203 typedef struct _GWKJobStruct GWKJobStruct; 204 205 struct _GWKJobStruct 206 { 207 GDALWarpKernel *poWK; 208 int iYMin; 209 int iYMax; 210 volatile int *pnCounter; 211 volatile int *pbStop; 212 CPLCond *hCond; 213 CPLMutex *hCondMutex; 214 int (*pfnProgress)(GWKJobStruct* psJob); 215 void *pTransformerArg; 216 217 void (*pfnFunc)(void*); // used by GWKRun() to assign the proper pTransformerArg 218 } ; 219 220 struct GWKThreadData 221 { 222 std::unique_ptr<CPLJobQueue> poJobQueue{}; 223 GWKJobStruct* pasThreadJob = nullptr; 224 int nThreads = 0; 225 CPLCond* hCond = nullptr; 226 CPLMutex* hCondMutex = nullptr; 227 bool bTransformerArgInputAssignedToThread = false; 228 void* pTransformerArgInput = nullptr; // owned by calling layer. Not to be destroyed 229 std::map<GIntBig, void*> mapThreadToTransformerArg{}; 230 }; 231 232 /************************************************************************/ 233 /* GWKProgressThread() */ 234 /************************************************************************/ 235 236 // Return TRUE if the computation must be interrupted. 237 static int GWKProgressThread( GWKJobStruct* psJob ) 238 { 239 CPLAcquireMutex(psJob->hCondMutex, 1.0); 240 (*(psJob->pnCounter))++; 241 CPLCondSignal(psJob->hCond); 242 int bStop = *(psJob->pbStop); 243 CPLReleaseMutex(psJob->hCondMutex); 244 245 return bStop; 246 } 247 248 /************************************************************************/ 249 /* GWKProgressMonoThread() */ 250 /************************************************************************/ 251 252 // Return TRUE if the computation must be interrupted. 253 static int GWKProgressMonoThread( GWKJobStruct* psJob ) 254 { 255 GDALWarpKernel *poWK = psJob->poWK; 256 int nCounter = ++(*(psJob->pnCounter)); 257 if( !poWK->pfnProgress( poWK->dfProgressBase + poWK->dfProgressScale * 258 (nCounter / static_cast<double>(psJob->iYMax)), 259 "", poWK->pProgress ) ) 260 { 261 CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" ); 262 *(psJob->pbStop) = TRUE; 263 return TRUE; 264 } 265 return FALSE; 266 } 267 268 /************************************************************************/ 269 /* GWKGenericMonoThread() */ 270 /************************************************************************/ 271 272 static CPLErr GWKGenericMonoThread( GDALWarpKernel *poWK, 273 void (*pfnFunc) (void *pUserData) ) 274 { 275 volatile int bStop = FALSE; 276 volatile int nCounter = 0; 277 278 GWKJobStruct sThreadJob; 279 sThreadJob.poWK = poWK; 280 sThreadJob.pnCounter = &nCounter; 281 sThreadJob.iYMin = 0; 282 sThreadJob.iYMax = poWK->nDstYSize; 283 sThreadJob.pbStop = &bStop; 284 sThreadJob.hCond = nullptr; 285 sThreadJob.hCondMutex = nullptr; 286 sThreadJob.pfnProgress = GWKProgressMonoThread; 287 sThreadJob.pTransformerArg = poWK->pTransformerArg; 288 289 pfnFunc(&sThreadJob); 290 291 return !bStop ? CE_None : CE_Failure; 292 } 293 294 /************************************************************************/ 295 /* GWKThreadsCreate() */ 296 /************************************************************************/ 297 298 void* GWKThreadsCreate( char** papszWarpOptions, 299 GDALTransformerFunc /* pfnTransformer */, 300 void* pTransformerArg ) 301 { 302 const char* pszWarpThreads = 303 CSLFetchNameValue(papszWarpOptions, "NUM_THREADS"); 304 if( pszWarpThreads == nullptr ) 305 pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1"); 306 307 int nThreads = 0; 308 if( EQUAL(pszWarpThreads, "ALL_CPUS") ) 309 nThreads = CPLGetNumCPUs(); 310 else 311 nThreads = atoi(pszWarpThreads); 312 if( nThreads <= 1 ) 313 nThreads = 0; 314 if( nThreads > 128 ) 315 nThreads = 128; 316 317 GWKThreadData* psThreadData = new GWKThreadData(); 318 CPLCond* hCond = nullptr; 319 if( nThreads ) 320 hCond = CPLCreateCond(); 321 auto poThreadPool = nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr; 322 if( nThreads && hCond && poThreadPool ) 323 { 324 psThreadData->nThreads = nThreads; 325 psThreadData->hCond = hCond; 326 psThreadData->pasThreadJob = static_cast<GWKJobStruct *>( 327 VSI_CALLOC_VERBOSE(sizeof(GWKJobStruct), nThreads)); 328 if( psThreadData->pasThreadJob == nullptr ) 329 { 330 GWKThreadsEnd(psThreadData); 331 return nullptr; 332 } 333 334 psThreadData->hCondMutex = CPLCreateMutex(); 335 if( psThreadData->hCondMutex == nullptr ) 336 { 337 GWKThreadsEnd(psThreadData); 338 return nullptr; 339 } 340 CPLReleaseMutex(psThreadData->hCondMutex); 341 342 for( int i = 0; i < nThreads; i++ ) 343 { 344 psThreadData->pasThreadJob[i].hCond = psThreadData->hCond; 345 psThreadData->pasThreadJob[i].hCondMutex = psThreadData->hCondMutex; 346 } 347 348 psThreadData->poJobQueue = poThreadPool->CreateJobQueue(); 349 psThreadData->pTransformerArgInput = pTransformerArg; 350 } 351 else if( hCond ) 352 CPLDestroyCond(hCond); 353 354 return psThreadData; 355 } 356 357 /************************************************************************/ 358 /* GWKThreadsEnd() */ 359 /************************************************************************/ 360 361 void GWKThreadsEnd( void* psThreadDataIn ) 362 { 363 if( psThreadDataIn == nullptr ) 364 return; 365 366 GWKThreadData* psThreadData = static_cast<GWKThreadData *>(psThreadDataIn); 367 if( psThreadData->poJobQueue ) 368 { 369 for( auto& pair: psThreadData->mapThreadToTransformerArg ) 370 { 371 if( pair.second != psThreadData->pTransformerArgInput ) 372 GDALDestroyTransformer(pair.second); 373 } 374 psThreadData->poJobQueue.reset(); 375 } 376 CPLFree(psThreadData->pasThreadJob); 377 if( psThreadData->hCond ) 378 CPLDestroyCond(psThreadData->hCond); 379 if( psThreadData->hCondMutex ) 380 CPLDestroyMutex(psThreadData->hCondMutex); 381 delete psThreadData; 382 } 383 384 /************************************************************************/ 385 /* ThreadFuncAdapter() */ 386 /************************************************************************/ 387 388 static void ThreadFuncAdapter(void* pData) 389 { 390 GWKJobStruct* psJob = static_cast<GWKJobStruct *>(pData); 391 GWKThreadData* psThreadData = 392 static_cast<GWKThreadData*>(psJob->poWK->psThreadData); 393 394 // Look if we have already a per-thread transformer 395 void* pTransformerArg = nullptr; 396 const GIntBig nThreadId = CPLGetPID(); 397 CPLAcquireMutex(psThreadData->hCondMutex, 1.0); 398 auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId); 399 if (oIter != psThreadData->mapThreadToTransformerArg.end()) 400 { 401 pTransformerArg = oIter->second; 402 } 403 else if( !psThreadData->bTransformerArgInputAssignedToThread ) 404 { 405 // Borrow the original transformer, as it has not already been done 406 psThreadData->bTransformerArgInputAssignedToThread = true; 407 pTransformerArg = psThreadData->pTransformerArgInput; 408 psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg; 409 } 410 CPLReleaseMutex(psThreadData->hCondMutex); 411 412 // If no transformer assigned to current thread, instantiate one 413 if( pTransformerArg == nullptr ) 414 { 415 // This somehow assumes that GDALCloneTransformer() is thread-safe 416 // which should normally be the case. 417 pTransformerArg = 418 GDALCloneTransformer(psThreadData->pTransformerArgInput); 419 if( !pTransformerArg ) 420 { 421 *(psJob->pbStop) = TRUE; 422 return; 423 } 424 425 // register in map 426 CPLAcquireMutex(psThreadData->hCondMutex, 1.0); 427 psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg; 428 CPLReleaseMutex(psThreadData->hCondMutex); 429 } 430 431 psJob->pTransformerArg = pTransformerArg; 432 psJob->pfnFunc(pData); 433 } 434 435 436 /************************************************************************/ 437 /* GWKRun() */ 438 /************************************************************************/ 439 440 static CPLErr GWKRun( GDALWarpKernel *poWK, 441 const char* pszFuncName, 442 void (*pfnFunc) (void *pUserData) ) 443 444 { 445 const int nDstYSize = poWK->nDstYSize; 446 447 CPLDebug("GDAL", "GDALWarpKernel()::%s() " 448 "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d", 449 pszFuncName, 450 poWK->nSrcXOff, poWK->nSrcYOff, 451 poWK->nSrcXSize, poWK->nSrcYSize, 452 poWK->nDstXOff, poWK->nDstYOff, 453 poWK->nDstXSize, poWK->nDstYSize); 454 455 if( !poWK->pfnProgress( poWK->dfProgressBase, "", poWK->pProgress ) ) 456 { 457 CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" ); 458 return CE_Failure; 459 } 460 461 GWKThreadData* psThreadData = 462 static_cast<GWKThreadData*>(poWK->psThreadData); 463 if( psThreadData == nullptr || psThreadData->poJobQueue == nullptr ) 464 { 465 return GWKGenericMonoThread(poWK, pfnFunc); 466 } 467 468 int nThreads = std::min(psThreadData->nThreads, nDstYSize / 2); 469 // Config option mostly useful for tests to be able to test multithreading 470 // with small rasters 471 const int nWarpChunkSize = atoi( 472 CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536")); 473 if( nWarpChunkSize > 0 ) 474 { 475 GIntBig nChunks = 476 static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize; 477 if( nThreads > nChunks ) 478 nThreads = static_cast<int>(nChunks); 479 } 480 if( nThreads <= 0 ) 481 nThreads = 1; 482 483 CPLDebug("WARP", "Using %d threads", nThreads); 484 485 volatile int bStop = FALSE; 486 volatile int nCounter = 0; 487 488 CPLAcquireMutex(psThreadData->hCondMutex, 1000); 489 490 /* -------------------------------------------------------------------- */ 491 /* Submit jobs */ 492 /* -------------------------------------------------------------------- */ 493 for( int i = 0; i < nThreads; i++ ) 494 { 495 psThreadData->pasThreadJob[i].poWK = poWK; 496 psThreadData->pasThreadJob[i].pnCounter = &nCounter; 497 psThreadData->pasThreadJob[i].iYMin = 498 static_cast<int>((static_cast<GIntBig>(i)) * nDstYSize / nThreads); 499 psThreadData->pasThreadJob[i].iYMax = 500 static_cast<int>((static_cast<GIntBig>(i + 1)) * 501 nDstYSize / nThreads); 502 psThreadData->pasThreadJob[i].pbStop = &bStop; 503 if( poWK->pfnProgress != GDALDummyProgress ) 504 psThreadData->pasThreadJob[i].pfnProgress = GWKProgressThread; 505 else 506 psThreadData->pasThreadJob[i].pfnProgress = nullptr; 507 psThreadData->pasThreadJob[i].pfnFunc = pfnFunc; 508 psThreadData->poJobQueue->SubmitJob( ThreadFuncAdapter, 509 static_cast<void*>(&psThreadData->pasThreadJob[i]) ); 510 } 511 512 /* -------------------------------------------------------------------- */ 513 /* Report progress. */ 514 /* -------------------------------------------------------------------- */ 515 if( poWK->pfnProgress != GDALDummyProgress ) 516 { 517 while( nCounter < nDstYSize ) 518 { 519 CPLCondWait(psThreadData->hCond, psThreadData->hCondMutex); 520 521 if( !poWK->pfnProgress( 522 poWK->dfProgressBase + poWK->dfProgressScale * 523 (nCounter / static_cast<double>(nDstYSize)), 524 "", poWK->pProgress ) ) 525 { 526 CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" ); 527 bStop = TRUE; 528 break; 529 } 530 } 531 } 532 533 /* Release mutex before joining threads, otherwise they will dead-lock */ 534 /* forever in GWKProgressThread() */ 535 CPLReleaseMutex(psThreadData->hCondMutex); 536 537 /* -------------------------------------------------------------------- */ 538 /* Wait for all jobs to complete. */ 539 /* -------------------------------------------------------------------- */ 540 psThreadData->poJobQueue->WaitCompletion(); 541 542 return !bStop ? CE_None : CE_Failure; 543 } 544 545 /************************************************************************/ 546 /* ==================================================================== */ 547 /* GDALWarpKernel */ 548 /* ==================================================================== */ 549 /************************************************************************/ 550 551 /** 552 * \class GDALWarpKernel "gdalwarper.h" 553 * 554 * Low level image warping class. 555 * 556 * This class is responsible for low level image warping for one 557 * "chunk" of imagery. The class is essentially a structure with all 558 * data members public - primarily so that new special-case functions 559 * can be added without changing the class declaration. 560 * 561 * Applications are normally intended to interactive with warping facilities 562 * through the GDALWarpOperation class, though the GDALWarpKernel can in 563 * theory be used directly if great care is taken in setting up the 564 * control data. 565 * 566 * <h3>Design Issues</h3> 567 * 568 * The intention is that PerformWarp() would analyze the setup in terms 569 * of the datatype, resampling type, and validity/density mask usage and 570 * pick one of many specific implementations of the warping algorithm over 571 * a continuum of optimization vs. generality. At one end there will be a 572 * reference general purpose implementation of the algorithm that supports 573 * any data type (working internally in double precision complex), all three 574 * resampling types, and any or all of the validity/density masks. At the 575 * other end would be highly optimized algorithms for common cases like 576 * nearest neighbour resampling on GDT_Byte data with no masks. 577 * 578 * The full set of optimized versions have not been decided but we should 579 * expect to have at least: 580 * - One for each resampling algorithm for 8bit data with no masks. 581 * - One for each resampling algorithm for float data with no masks. 582 * - One for each resampling algorithm for float data with any/all masks 583 * (essentially the generic case for just float data). 584 * - One for each resampling algorithm for 8bit data with support for 585 * input validity masks (per band or per pixel). This handles the common 586 * case of nodata masking. 587 * - One for each resampling algorithm for float data with support for 588 * input validity masks (per band or per pixel). This handles the common 589 * case of nodata masking. 590 * 591 * Some of the specializations would operate on all bands in one pass 592 * (especially the ones without masking would do this), while others might 593 * process each band individually to reduce code complexity. 594 * 595 * <h3>Masking Semantics</h3> 596 * 597 * A detailed explanation of the semantics of the validity and density masks, 598 * and their effects on resampling kernels is needed here. 599 */ 600 601 /************************************************************************/ 602 /* GDALWarpKernel Data Members */ 603 /************************************************************************/ 604 605 /** 606 * \var GDALResampleAlg GDALWarpKernel::eResample; 607 * 608 * Resampling algorithm. 609 * 610 * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear, 611 * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS, 612 * GRA_Mode or GRA_Sum. 613 * 614 * This field is required. GDT_NearestNeighbour may be used as a default 615 * value. 616 */ 617 618 /** 619 * \var GDALDataType GDALWarpKernel::eWorkingDataType; 620 * 621 * Working pixel data type. 622 * 623 * The datatype of pixels in the source image (papabySrcimage) and 624 * destination image (papabyDstImage) buffers. Note that operations on 625 * some data types (such as GDT_Byte) may be much better optimized than other 626 * less common cases. 627 * 628 * This field is required. It may not be GDT_Unknown. 629 */ 630 631 /** 632 * \var int GDALWarpKernel::nBands; 633 * 634 * Number of bands. 635 * 636 * The number of bands (layers) of imagery being warped. Determines the 637 * number of entries in the papabySrcImage, papanBandSrcValid, 638 * and papabyDstImage arrays. 639 * 640 * This field is required. 641 */ 642 643 /** 644 * \var int GDALWarpKernel::nSrcXSize; 645 * 646 * Source image width in pixels. 647 * 648 * This field is required. 649 */ 650 651 /** 652 * \var int GDALWarpKernel::nSrcYSize; 653 * 654 * Source image height in pixels. 655 * 656 * This field is required. 657 */ 658 659 /** 660 * \var double GDALWarpKernel::dfSrcXExtraSize; 661 * 662 * Number of pixels included in nSrcXSize that are present on the edges of 663 * the area of interest to take into account the width of the kernel. 664 * 665 * This field is required. 666 */ 667 668 /** 669 * \var double GDALWarpKernel::dfSrcYExtraSize; 670 * 671 * Number of pixels included in nSrcYExtraSize that are present on the edges of 672 * the area of interest to take into account the height of the kernel. 673 * 674 * This field is required. 675 */ 676 677 /** 678 * \var int GDALWarpKernel::papabySrcImage; 679 * 680 * Array of source image band data. 681 * 682 * This is an array of pointers (of size GDALWarpKernel::nBands) pointers 683 * to image data. Each individual band of image data is organized as a single 684 * block of image data in left to right, then bottom to top order. The actual 685 * type of the image data is determined by GDALWarpKernel::eWorkingDataType. 686 * 687 * To access the pixel value for the (x=3, y=4) pixel (zero based) of 688 * the second band with eWorkingDataType set to GDT_Float32 use code like 689 * this: 690 * 691 * \code 692 * float dfPixelValue; 693 * int nBand = 2-1; // Band indexes are zero based. 694 * int nPixel = 3; // Zero based. 695 * int nLine = 4; // Zero based. 696 * 697 * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize ); 698 * assert( nLine >= 0 && nLine < poKern->nSrcYSize ); 699 * assert( nBand >= 0 && nBand < poKern->nBands ); 700 * dfPixelValue = ((float *) poKern->papabySrcImage[nBand]) 701 * [nPixel + nLine * poKern->nSrcXSize]; 702 * \endcode 703 * 704 * This field is required. 705 */ 706 707 /** 708 * \var GUInt32 **GDALWarpKernel::papanBandSrcValid; 709 * 710 * Per band validity mask for source pixels. 711 * 712 * Array of pixel validity mask layers for each source band. Each of 713 * the mask layers is the same size (in pixels) as the source image with 714 * one bit per pixel. Note that it is legal (and common) for this to be 715 * NULL indicating that none of the pixels are invalidated, or for some 716 * band validity masks to be NULL in which case all pixels of the band are 717 * valid. The following code can be used to test the validity of a particular 718 * pixel. 719 * 720 * \code 721 * int bIsValid = TRUE; 722 * int nBand = 2-1; // Band indexes are zero based. 723 * int nPixel = 3; // Zero based. 724 * int nLine = 4; // Zero based. 725 * 726 * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize ); 727 * assert( nLine >= 0 && nLine < poKern->nSrcYSize ); 728 * assert( nBand >= 0 && nBand < poKern->nBands ); 729 * 730 * if( poKern->papanBandSrcValid != NULL 731 * && poKern->papanBandSrcValid[nBand] != NULL ) 732 * { 733 * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand]; 734 * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize; 735 * 736 * bIsValid = panBandMask[iPixelOffset>>5] 737 * & (0x01 << (iPixelOffset & 0x1f)); 738 * } 739 * \endcode 740 */ 741 742 /** 743 * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid; 744 * 745 * Per pixel validity mask for source pixels. 746 * 747 * A single validity mask layer that applies to the pixels of all source 748 * bands. It is accessed similarly to papanBandSrcValid, but without the 749 * extra level of band indirection. 750 * 751 * This pointer may be NULL indicating that all pixels are valid. 752 * 753 * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available, 754 * the pixel isn't considered to be valid unless both arrays indicate it is 755 * valid. 756 */ 757 758 /** 759 * \var float *GDALWarpKernel::pafUnifiedSrcDensity; 760 * 761 * Per pixel density mask for source pixels. 762 * 763 * A single density mask layer that applies to the pixels of all source 764 * bands. It contains values between 0.0 and 1.0 indicating the degree to 765 * which this pixel should be allowed to contribute to the output result. 766 * 767 * This pointer may be NULL indicating that all pixels have a density of 1.0. 768 * 769 * The density for a pixel may be accessed like this: 770 * 771 * \code 772 * float fDensity = 1.0; 773 * int nPixel = 3; // Zero based. 774 * int nLine = 4; // Zero based. 775 * 776 * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize ); 777 * assert( nLine >= 0 && nLine < poKern->nSrcYSize ); 778 * if( poKern->pafUnifiedSrcDensity != NULL ) 779 * fDensity = poKern->pafUnifiedSrcDensity 780 * [nPixel + nLine * poKern->nSrcXSize]; 781 * \endcode 782 */ 783 784 /** 785 * \var int GDALWarpKernel::nDstXSize; 786 * 787 * Width of destination image in pixels. 788 * 789 * This field is required. 790 */ 791 792 /** 793 * \var int GDALWarpKernel::nDstYSize; 794 * 795 * Height of destination image in pixels. 796 * 797 * This field is required. 798 */ 799 800 /** 801 * \var GByte **GDALWarpKernel::papabyDstImage; 802 * 803 * Array of destination image band data. 804 * 805 * This is an array of pointers (of size GDALWarpKernel::nBands) pointers 806 * to image data. Each individual band of image data is organized as a single 807 * block of image data in left to right, then bottom to top order. The actual 808 * type of the image data is determined by GDALWarpKernel::eWorkingDataType. 809 * 810 * To access the pixel value for the (x=3, y=4) pixel (zero based) of 811 * the second band with eWorkingDataType set to GDT_Float32 use code like 812 * this: 813 * 814 * \code 815 * float dfPixelValue; 816 * int nBand = 2-1; // Band indexes are zero based. 817 * int nPixel = 3; // Zero based. 818 * int nLine = 4; // Zero based. 819 * 820 * assert( nPixel >= 0 && nPixel < poKern->nDstXSize ); 821 * assert( nLine >= 0 && nLine < poKern->nDstYSize ); 822 * assert( nBand >= 0 && nBand < poKern->nBands ); 823 * dfPixelValue = ((float *) poKern->papabyDstImage[nBand]) 824 * [nPixel + nLine * poKern->nSrcYSize]; 825 * \endcode 826 * 827 * This field is required. 828 */ 829 830 /** 831 * \var GUInt32 *GDALWarpKernel::panDstValid; 832 * 833 * Per pixel validity mask for destination pixels. 834 * 835 * A single validity mask layer that applies to the pixels of all destination 836 * bands. It is accessed similarly to papanUnitifiedSrcValid, but based 837 * on the size of the destination image. 838 * 839 * This pointer may be NULL indicating that all pixels are valid. 840 */ 841 842 /** 843 * \var float *GDALWarpKernel::pafDstDensity; 844 * 845 * Per pixel density mask for destination pixels. 846 * 847 * A single density mask layer that applies to the pixels of all destination 848 * bands. It contains values between 0.0 and 1.0. 849 * 850 * This pointer may be NULL indicating that all pixels have a density of 1.0. 851 * 852 * The density for a pixel may be accessed like this: 853 * 854 * \code 855 * float fDensity = 1.0; 856 * int nPixel = 3; // Zero based. 857 * int nLine = 4; // Zero based. 858 * 859 * assert( nPixel >= 0 && nPixel < poKern->nDstXSize ); 860 * assert( nLine >= 0 && nLine < poKern->nDstYSize ); 861 * if( poKern->pafDstDensity != NULL ) 862 * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize]; 863 * \endcode 864 */ 865 866 /** 867 * \var int GDALWarpKernel::nSrcXOff; 868 * 869 * X offset to source pixel coordinates for transformation. 870 * 871 * See pfnTransformer. 872 * 873 * This field is required. 874 */ 875 876 /** 877 * \var int GDALWarpKernel::nSrcYOff; 878 * 879 * Y offset to source pixel coordinates for transformation. 880 * 881 * See pfnTransformer. 882 * 883 * This field is required. 884 */ 885 886 /** 887 * \var int GDALWarpKernel::nDstXOff; 888 * 889 * X offset to destination pixel coordinates for transformation. 890 * 891 * See pfnTransformer. 892 * 893 * This field is required. 894 */ 895 896 /** 897 * \var int GDALWarpKernel::nDstYOff; 898 * 899 * Y offset to destination pixel coordinates for transformation. 900 * 901 * See pfnTransformer. 902 * 903 * This field is required. 904 */ 905 906 /** 907 * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer; 908 * 909 * Source/destination location transformer. 910 * 911 * The function to call to transform coordinates between source image 912 * pixel/line coordinates and destination image pixel/line coordinates. 913 * See GDALTransformerFunc() for details of the semantics of this function. 914 * 915 * The GDALWarpKern algorithm will only ever use this transformer in 916 * "destination to source" mode (bDstToSrc=TRUE), and will always pass 917 * partial or complete scanlines of points in the destination image as 918 * input. This means, among other things, that it is safe to the the 919 * approximating transform GDALApproxTransform() as the transformation 920 * function. 921 * 922 * Source and destination images may be subsets of a larger overall image. 923 * The transformation algorithms will expect and return pixel/line coordinates 924 * in terms of this larger image, so coordinates need to be offset by 925 * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before 926 * passing to pfnTransformer, and after return from it. 927 * 928 * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback 929 * data to this function when it is called. 930 * 931 * This field is required. 932 */ 933 934 /** 935 * \var void *GDALWarpKernel::pTransformerArg; 936 * 937 * Callback data for pfnTransformer. 938 * 939 * This field may be NULL if not required for the pfnTransformer being used. 940 */ 941 942 /** 943 * \var GDALProgressFunc GDALWarpKernel::pfnProgress; 944 * 945 * The function to call to report progress of the algorithm, and to check 946 * for a requested termination of the operation. It operates according to 947 * GDALProgressFunc() semantics. 948 * 949 * Generally speaking the progress function will be invoked for each 950 * scanline of the destination buffer that has been processed. 951 * 952 * This field may be NULL (internally set to GDALDummyProgress()). 953 */ 954 955 /** 956 * \var void *GDALWarpKernel::pProgress; 957 * 958 * Callback data for pfnProgress. 959 * 960 * This field may be NULL if not required for the pfnProgress being used. 961 */ 962 963 /************************************************************************/ 964 /* GDALWarpKernel() */ 965 /************************************************************************/ 966 967 GDALWarpKernel::GDALWarpKernel() : 968 papszWarpOptions(nullptr), 969 eResample(GRA_NearestNeighbour), 970 eWorkingDataType(GDT_Unknown), 971 nBands(0), 972 nSrcXSize(0), 973 nSrcYSize(0), 974 dfSrcXExtraSize(0.0), 975 dfSrcYExtraSize(0.0), 976 papabySrcImage(nullptr), 977 papanBandSrcValid(nullptr), 978 panUnifiedSrcValid(nullptr), 979 pafUnifiedSrcDensity(nullptr), 980 nDstXSize(0), 981 nDstYSize(0), 982 papabyDstImage(nullptr), 983 panDstValid(nullptr), 984 pafDstDensity(nullptr), 985 dfXScale(1.0), 986 dfYScale(1.0), 987 dfXFilter(0.0), 988 dfYFilter(0.0), 989 nXRadius(0), 990 nYRadius(0), 991 nFiltInitX(0), 992 nFiltInitY(0), 993 nSrcXOff(0), 994 nSrcYOff(0), 995 nDstXOff(0), 996 nDstYOff(0), 997 pfnTransformer(nullptr), 998 pTransformerArg(nullptr), 999 pfnProgress(GDALDummyProgress), 1000 pProgress(nullptr), 1001 dfProgressBase(0.0), 1002 dfProgressScale(1.0), 1003 padfDstNoDataReal(nullptr), 1004 psThreadData(nullptr) 1005 {} 1006 1007 /************************************************************************/ 1008 /* ~GDALWarpKernel() */ 1009 /************************************************************************/ 1010 1011 GDALWarpKernel::~GDALWarpKernel() {} 1012 1013 /************************************************************************/ 1014 /* PerformWarp() */ 1015 /************************************************************************/ 1016 1017 /** 1018 * \fn CPLErr GDALWarpKernel::PerformWarp(); 1019 * 1020 * This method performs the warp described in the GDALWarpKernel. 1021 * 1022 * @return CE_None on success or CE_Failure if an error occurs. 1023 */ 1024 1025 CPLErr GDALWarpKernel::PerformWarp() 1026 1027 { 1028 const CPLErr eErr = Validate(); 1029 1030 if( eErr != CE_None ) 1031 return eErr; 1032 1033 // See #2445 and #3079. 1034 if( nSrcXSize <= 0 || nSrcYSize <= 0 ) 1035 { 1036 if( !pfnProgress( dfProgressBase + dfProgressScale, 1037 "", pProgress ) ) 1038 { 1039 CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" ); 1040 return CE_Failure; 1041 } 1042 return CE_None; 1043 } 1044 1045 /* -------------------------------------------------------------------- */ 1046 /* Pre-calculate resampling scales and window sizes for filtering. */ 1047 /* -------------------------------------------------------------------- */ 1048 1049 dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize); 1050 dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize); 1051 if( nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize ) 1052 dfXScale = 1.0; 1053 if( nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize ) 1054 dfYScale = 1.0; 1055 if( dfXScale < 1.0 ) 1056 { 1057 double dfXReciprocalScale = 1.0 / dfXScale; 1058 const int nXReciprocalScale = 1059 static_cast<int>(dfXReciprocalScale + 0.5); 1060 if( fabs(dfXReciprocalScale-nXReciprocalScale) < 0.05 ) 1061 dfXScale = 1.0 / nXReciprocalScale; 1062 } 1063 if( dfYScale < 1.0 ) 1064 { 1065 double dfYReciprocalScale = 1.0 / dfYScale; 1066 const int nYReciprocalScale = 1067 static_cast<int>(dfYReciprocalScale + 0.5); 1068 if( fabs(dfYReciprocalScale-nYReciprocalScale) < 0.05 ) 1069 dfYScale = 1.0 / nYReciprocalScale; 1070 } 1071 1072 // XSCALE and YSCALE undocumented for now. Can help in some cases. 1073 // Best would probably be a per-pixel scale computation. 1074 const char* pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE"); 1075 if( pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING") ) 1076 dfXScale = CPLAtof(pszXScale); 1077 const char* pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE"); 1078 if( pszYScale != nullptr ) 1079 dfYScale = CPLAtof(pszYScale); 1080 1081 // If the xscale is significantly lower than the yscale, this is highly 1082 // suspicious of a situation of wrapping a very large virtual file in 1083 // geographic coordinates with left and right parts being close to the 1084 // antimeridian. In that situation, the xscale computed by the above method 1085 // is completely wrong. Prefer doing an average of a few sample points 1086 // instead 1087 if( (dfYScale / dfXScale > 100 || 1088 (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))) ) 1089 { 1090 // Sample points along a grid 1091 const int nPointsX = std::min(10, nDstXSize); 1092 const int nPointsY = std::min(10, nDstYSize); 1093 const int nPoints = 3 * nPointsX * nPointsY; 1094 std::vector<double> padfX; 1095 std::vector<double> padfY; 1096 std::vector<double> padfZ(nPoints); 1097 std::vector<int> pabSuccess(nPoints); 1098 for(int iY = 0; iY < nPointsY; iY++) 1099 { 1100 for(int iX = 0; iX < nPointsX; iX++) 1101 { 1102 const double dfX = nPointsX == 1 ? 0.0 : 1103 static_cast<double>(iX) * nDstXSize / (nPointsX - 1); 1104 const double dfY = nPointsY == 1 ? 0.0 : 1105 static_cast<double>(iY) * nDstYSize / (nPointsY - 1); 1106 1107 // Reproject each destination sample point and its neighbours 1108 // at (x+1,y) and (x,y+1), so as to get the local scale. 1109 padfX.push_back( dfX ); 1110 padfY.push_back( dfY ); 1111 1112 padfX.push_back( (iX == nPointsX - 1) ? dfX - 1 : dfX + 1 ); 1113 padfY.push_back( dfY); 1114 1115 padfX.push_back( dfX ); 1116 padfY.push_back( (iY == nPointsY - 1) ? dfY - 1 : dfY + 1 ); 1117 } 1118 } 1119 pfnTransformer(pTransformerArg, TRUE, nPoints, 1120 &padfX[0], &padfY[0], &padfZ[0], &pabSuccess[0]); 1121 1122 // Compute the xscale at each sampling point 1123 std::vector<double> adfXScales; 1124 for( int i = 0; i < nPoints; i+=3 ) 1125 { 1126 if( pabSuccess[i] && pabSuccess[i+1] && pabSuccess[i+2] ) 1127 { 1128 const double dfPointXScale = 1.0 / std::max( 1129 std::abs(padfX[i+1] - padfX[i]), std::abs(padfX[i+2] - padfX[i]) ); 1130 adfXScales.push_back(dfPointXScale); 1131 } 1132 } 1133 1134 // Sort by increasing xcale 1135 std::sort(adfXScales.begin(), adfXScales.end()); 1136 1137 if( !adfXScales.empty() ) 1138 { 1139 // Compute the average of scales, but eliminate outliers small 1140 // scales, if some samples are just along the discontinuity. 1141 const double dfMaxPointXScale = adfXScales.back(); 1142 double dfSumPointXScale = 0; 1143 int nCountPointScale = 0; 1144 for( double dfPointXScale : adfXScales ) 1145 { 1146 if( dfPointXScale > dfMaxPointXScale / 10 ) 1147 { 1148 dfSumPointXScale += dfPointXScale; 1149 nCountPointScale ++; 1150 } 1151 } 1152 if( nCountPointScale > 0 ) // should always be true 1153 { 1154 const double dfXScaleFromSampling = dfSumPointXScale / nCountPointScale; 1155 #if DEBUG_VERBOSE 1156 CPLDebug("WARP", "Correcting dfXScale from %f to %f", 1157 dfXScale, dfXScaleFromSampling); 1158 #endif 1159 dfXScale = dfXScaleFromSampling; 1160 } 1161 } 1162 } 1163 1164 #if DEBUG_VERBOSE 1165 CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale); 1166 #endif 1167 1168 const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95; 1169 1170 // Safety check for callers that would use GDALWarpKernel without using 1171 // GDALWarpOperation. 1172 if( (eResample == GRA_CubicSpline || eResample == GRA_Lanczos || 1173 ((eResample == GRA_Cubic || eResample == GRA_Bilinear) && 1174 !bUse4SamplesFormula)) && 1175 atoi(CSLFetchNameValueDef(papszWarpOptions, 1176 "EXTRA_ELTS", "0") ) != WARP_EXTRA_ELTS ) 1177 { 1178 CPLError( CE_Failure, CPLE_AppDefined, 1179 "Source arrays must have WARP_EXTRA_ELTS extra elements at " 1180 "their end. " 1181 "See GDALWarpKernel class definition. If this condition is " 1182 "fulfilled, define a EXTRA_ELTS=%d warp options", 1183 WARP_EXTRA_ELTS); 1184 return CE_Failure; 1185 } 1186 1187 dfXFilter = anGWKFilterRadius[eResample]; 1188 dfYFilter = anGWKFilterRadius[eResample]; 1189 1190 nXRadius = 1191 dfXScale < 1.0 1192 ? static_cast<int>(ceil(dfXFilter / dfXScale)) 1193 : static_cast<int>(dfXFilter); 1194 nYRadius = 1195 dfYScale < 1.0 1196 ? static_cast<int>(ceil(dfYFilter / dfYScale)) 1197 : static_cast<int>(dfYFilter); 1198 1199 // Filter window offset depends on the parity of the kernel radius. 1200 nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius; 1201 nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius; 1202 1203 /* -------------------------------------------------------------------- */ 1204 /* Set up resampling functions. */ 1205 /* -------------------------------------------------------------------- */ 1206 if( CPLFetchBool( papszWarpOptions, "USE_GENERAL_CASE", false ) ) 1207 return GWKGeneralCase( this ); 1208 1209 #if defined(HAVE_OPENCL) 1210 if( (eWorkingDataType == GDT_Byte 1211 || eWorkingDataType == GDT_CInt16 1212 || eWorkingDataType == GDT_UInt16 1213 || eWorkingDataType == GDT_Int16 1214 || eWorkingDataType == GDT_CFloat32 1215 || eWorkingDataType == GDT_Float32) && 1216 (eResample == GRA_Bilinear 1217 || eResample == GRA_Cubic 1218 || eResample == GRA_CubicSpline 1219 || eResample == GRA_Lanczos) && 1220 CPLFetchBool( papszWarpOptions, "USE_OPENCL", true ) ) 1221 { 1222 const CPLErr eResult = GWKOpenCLCase( this ); 1223 1224 // CE_Warning tells us a suitable OpenCL environment was not available 1225 // so we fall through to other CPU based methods. 1226 if( eResult != CE_Warning ) 1227 return eResult; 1228 } 1229 #endif // defined HAVE_OPENCL 1230 1231 const bool bNoMasksOrDstDensityOnly = 1232 papanBandSrcValid == nullptr 1233 && panUnifiedSrcValid == nullptr 1234 && pafUnifiedSrcDensity == nullptr 1235 && panDstValid == nullptr; 1236 1237 if( eWorkingDataType == GDT_Byte 1238 && eResample == GRA_NearestNeighbour 1239 && bNoMasksOrDstDensityOnly ) 1240 return GWKNearestNoMasksOrDstDensityOnlyByte( this ); 1241 1242 if( eWorkingDataType == GDT_Byte 1243 && eResample == GRA_Bilinear 1244 && bNoMasksOrDstDensityOnly ) 1245 return GWKBilinearNoMasksOrDstDensityOnlyByte( this ); 1246 1247 if( eWorkingDataType == GDT_Byte 1248 && eResample == GRA_Cubic 1249 && bNoMasksOrDstDensityOnly ) 1250 return GWKCubicNoMasksOrDstDensityOnlyByte( this ); 1251 1252 if( eWorkingDataType == GDT_Byte 1253 && eResample == GRA_CubicSpline 1254 && bNoMasksOrDstDensityOnly ) 1255 return GWKCubicSplineNoMasksOrDstDensityOnlyByte( this ); 1256 1257 if( eWorkingDataType == GDT_Byte 1258 && eResample == GRA_NearestNeighbour ) 1259 return GWKNearestByte( this ); 1260 1261 if( (eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) 1262 && eResample == GRA_NearestNeighbour 1263 && bNoMasksOrDstDensityOnly ) 1264 return GWKNearestNoMasksOrDstDensityOnlyShort( this ); 1265 1266 if( (eWorkingDataType == GDT_Int16 ) 1267 && eResample == GRA_Cubic 1268 && bNoMasksOrDstDensityOnly ) 1269 return GWKCubicNoMasksOrDstDensityOnlyShort( this ); 1270 1271 if( (eWorkingDataType == GDT_Int16 ) 1272 && eResample == GRA_CubicSpline 1273 && bNoMasksOrDstDensityOnly ) 1274 return GWKCubicSplineNoMasksOrDstDensityOnlyShort( this ); 1275 1276 if( (eWorkingDataType == GDT_Int16 ) 1277 && eResample == GRA_Bilinear 1278 && bNoMasksOrDstDensityOnly ) 1279 return GWKBilinearNoMasksOrDstDensityOnlyShort( this ); 1280 1281 if( (eWorkingDataType == GDT_UInt16 ) 1282 && eResample == GRA_Cubic 1283 && bNoMasksOrDstDensityOnly ) 1284 return GWKCubicNoMasksOrDstDensityOnlyUShort( this ); 1285 1286 if( (eWorkingDataType == GDT_UInt16 ) 1287 && eResample == GRA_CubicSpline 1288 && bNoMasksOrDstDensityOnly ) 1289 return GWKCubicSplineNoMasksOrDstDensityOnlyUShort( this ); 1290 1291 if( (eWorkingDataType == GDT_UInt16 ) 1292 && eResample == GRA_Bilinear 1293 && bNoMasksOrDstDensityOnly ) 1294 return GWKBilinearNoMasksOrDstDensityOnlyUShort( this ); 1295 1296 if( (eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) 1297 && eResample == GRA_NearestNeighbour ) 1298 return GWKNearestShort( this ); 1299 1300 if( eWorkingDataType == GDT_Float32 1301 && eResample == GRA_NearestNeighbour 1302 && bNoMasksOrDstDensityOnly ) 1303 return GWKNearestNoMasksOrDstDensityOnlyFloat( this ); 1304 1305 if( eWorkingDataType == GDT_Float32 1306 && eResample == GRA_NearestNeighbour ) 1307 return GWKNearestFloat( this ); 1308 1309 if( eWorkingDataType == GDT_Float32 1310 && eResample == GRA_Bilinear 1311 && bNoMasksOrDstDensityOnly ) 1312 return GWKBilinearNoMasksOrDstDensityOnlyFloat( this ); 1313 1314 if( eWorkingDataType == GDT_Float32 1315 && eResample == GRA_Cubic 1316 && bNoMasksOrDstDensityOnly ) 1317 return GWKCubicNoMasksOrDstDensityOnlyFloat( this ); 1318 1319 #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL 1320 if( eWorkingDataType == GDT_Float64 1321 && eResample == GRA_Bilinear 1322 && bNoMasksOrDstDensityOnly ) 1323 return GWKBilinearNoMasksOrDstDensityOnlyDouble( this ); 1324 1325 if( eWorkingDataType == GDT_Float64 1326 && eResample == GRA_Cubic 1327 && bNoMasksOrDstDensityOnly ) 1328 return GWKCubicNoMasksOrDstDensityOnlyDouble( this ); 1329 #endif 1330 1331 if( eResample == GRA_Average ) 1332 return GWKAverageOrMode( this ); 1333 1334 if( eResample == GRA_RMS ) 1335 return GWKAverageOrMode( this ); 1336 1337 if( eResample == GRA_Mode ) 1338 return GWKAverageOrMode( this ); 1339 1340 if( eResample == GRA_Max ) 1341 return GWKAverageOrMode( this ); 1342 1343 if( eResample == GRA_Min ) 1344 return GWKAverageOrMode( this ); 1345 1346 if( eResample == GRA_Med ) 1347 return GWKAverageOrMode( this ); 1348 1349 if( eResample == GRA_Q1 ) 1350 return GWKAverageOrMode( this ); 1351 1352 if( eResample == GRA_Q3 ) 1353 return GWKAverageOrMode( this ); 1354 1355 if( eResample == GRA_Sum ) 1356 return GWKAverageOrMode( this ); 1357 1358 if (!GDALDataTypeIsComplex(eWorkingDataType)) 1359 { 1360 return GWKRealCase( this ); 1361 } 1362 1363 return GWKGeneralCase( this ); 1364 } 1365 1366 /************************************************************************/ 1367 /* Validate() */ 1368 /************************************************************************/ 1369 1370 /** 1371 * \fn CPLErr GDALWarpKernel::Validate() 1372 * 1373 * Check the settings in the GDALWarpKernel, and issue a CPLError() 1374 * (and return CE_Failure) if the configuration is considered to be 1375 * invalid for some reason. 1376 * 1377 * This method will also do some standard defaulting such as setting 1378 * pfnProgress to GDALDummyProgress() if it is NULL. 1379 * 1380 * @return CE_None on success or CE_Failure if an error is detected. 1381 */ 1382 1383 CPLErr GDALWarpKernel::Validate() 1384 1385 { 1386 if( static_cast<size_t>(eResample) >= 1387 (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])) ) 1388 { 1389 CPLError( CE_Failure, CPLE_AppDefined, 1390 "Unsupported resampling method %d.", 1391 static_cast<int>(eResample) ); 1392 return CE_Failure; 1393 } 1394 1395 return CE_None; 1396 } 1397 1398 /************************************************************************/ 1399 /* GWKOverlayDensity() */ 1400 /* */ 1401 /* Compute the final density for the destination pixel. This */ 1402 /* is a function of the overlay density (passed in) and the */ 1403 /* original density. */ 1404 /************************************************************************/ 1405 1406 static void GWKOverlayDensity( const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset, 1407 double dfDensity ) 1408 { 1409 if( dfDensity < 0.0001 || poWK->pafDstDensity == nullptr ) 1410 return; 1411 1412 poWK->pafDstDensity[iDstOffset] = static_cast<float> 1413 ( 1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]) ); 1414 } 1415 1416 /************************************************************************/ 1417 /* GWKRoundValueT() */ 1418 /************************************************************************/ 1419 1420 template<class T, EMULATED_BOOL is_signed> struct sGWKRoundValueT 1421 { 1422 static T eval(double); 1423 }; 1424 1425 template<class T> struct sGWKRoundValueT<T, true> /* signed */ 1426 { 1427 static T eval(double dfValue) { return static_cast<T>(floor(dfValue + 0.5)); } 1428 }; 1429 1430 template<class T> struct sGWKRoundValueT<T, false> /* unsigned */ 1431 { 1432 static T eval(double dfValue) { return static_cast<T>(dfValue + 0.5); } 1433 }; 1434 1435 template<class T> static T GWKRoundValueT(double dfValue) 1436 { 1437 return sGWKRoundValueT<T, std::numeric_limits<T>::is_signed>::eval(dfValue); 1438 } 1439 1440 template<> float GWKRoundValueT<float>(double dfValue) 1441 { 1442 return static_cast<float>(dfValue); 1443 } 1444 1445 #ifdef notused 1446 template<> double GWKRoundValueT<double>(double dfValue) 1447 { 1448 return dfValue; 1449 } 1450 #endif 1451 1452 /************************************************************************/ 1453 /* GWKClampValueT() */ 1454 /************************************************************************/ 1455 1456 template<class T> 1457 static CPL_INLINE T GWKClampValueT(double dfValue) 1458 { 1459 if( dfValue < std::numeric_limits<T>::min() ) 1460 return std::numeric_limits<T>::min(); 1461 else if( dfValue > std::numeric_limits<T>::max() ) 1462 return std::numeric_limits<T>::max(); 1463 else 1464 return GWKRoundValueT<T>(dfValue); 1465 } 1466 1467 template<> float GWKClampValueT<float>(double dfValue) 1468 { 1469 return static_cast<float>(dfValue); 1470 } 1471 1472 #ifdef notused 1473 template<> double GWKClampValueT<double>(double dfValue) 1474 { 1475 return dfValue; 1476 } 1477 #endif 1478 1479 /************************************************************************/ 1480 /* GWKSetPixelValueRealT() */ 1481 /************************************************************************/ 1482 1483 template<class T> 1484 static bool GWKSetPixelValueRealT( const GDALWarpKernel *poWK, int iBand, 1485 GPtrDiff_t iDstOffset, double dfDensity, 1486 T value) 1487 { 1488 T *pDst = reinterpret_cast<T*>(poWK->papabyDstImage[iBand]); 1489 1490 /* -------------------------------------------------------------------- */ 1491 /* If the source density is less than 100% we need to fetch the */ 1492 /* existing destination value, and mix it with the source to */ 1493 /* get the new "to apply" value. Also compute composite */ 1494 /* density. */ 1495 /* */ 1496 /* We avoid mixing if density is very near one or risk mixing */ 1497 /* in very extreme nodata values and causing odd results (#1610) */ 1498 /* -------------------------------------------------------------------- */ 1499 if( dfDensity < 0.9999 ) 1500 { 1501 if( dfDensity < 0.0001 ) 1502 return true; 1503 1504 double dfDstDensity = 1.0; 1505 1506 if( poWK->pafDstDensity != nullptr ) 1507 dfDstDensity = poWK->pafDstDensity[iDstOffset]; 1508 else if( poWK->panDstValid != nullptr 1509 && !((poWK->panDstValid[iDstOffset>>5] 1510 & (0x01 << (iDstOffset & 0x1f))) ) ) 1511 dfDstDensity = 0.0; 1512 1513 // It seems like we also ought to be testing panDstValid[] here! 1514 1515 const double dfDstReal = pDst[iDstOffset]; 1516 1517 // The destination density is really only relative to the portion 1518 // not occluded by the overlay. 1519 const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity; 1520 1521 const double dfReal = 1522 (value * dfDensity + dfDstReal * dfDstInfluence) 1523 / (dfDensity + dfDstInfluence); 1524 1525 /* -------------------------------------------------------------------- */ 1526 /* Actually apply the destination value. */ 1527 /* */ 1528 /* Avoid using the destination nodata value for integer datatypes */ 1529 /* if by chance it is equal to the computed pixel value. */ 1530 /* -------------------------------------------------------------------- */ 1531 pDst[iDstOffset] = GWKClampValueT<T>(dfReal); 1532 } 1533 else 1534 { 1535 pDst[iDstOffset] = value; 1536 } 1537 1538 if( poWK->padfDstNoDataReal != nullptr && 1539 poWK->padfDstNoDataReal[iBand] == 1540 static_cast<double>(pDst[iDstOffset]) ) 1541 { 1542 if( pDst[iDstOffset] == std::numeric_limits<T>::min() ) 1543 pDst[iDstOffset] = std::numeric_limits<T>::min() + 1; 1544 else 1545 pDst[iDstOffset]--; 1546 } 1547 1548 return true; 1549 } 1550 1551 /************************************************************************/ 1552 /* GWKSetPixelValue() */ 1553 /************************************************************************/ 1554 1555 static bool GWKSetPixelValue( const GDALWarpKernel *poWK, int iBand, 1556 GPtrDiff_t iDstOffset, double dfDensity, 1557 double dfReal, double dfImag ) 1558 1559 { 1560 GByte *pabyDst = poWK->papabyDstImage[iBand]; 1561 1562 /* -------------------------------------------------------------------- */ 1563 /* If the source density is less than 100% we need to fetch the */ 1564 /* existing destination value, and mix it with the source to */ 1565 /* get the new "to apply" value. Also compute composite */ 1566 /* density. */ 1567 /* */ 1568 /* We avoid mixing if density is very near one or risk mixing */ 1569 /* in very extreme nodata values and causing odd results (#1610) */ 1570 /* -------------------------------------------------------------------- */ 1571 if( dfDensity < 0.9999 ) 1572 { 1573 if( dfDensity < 0.0001 ) 1574 return true; 1575 1576 double dfDstDensity = 1.0; 1577 if( poWK->pafDstDensity != nullptr ) 1578 dfDstDensity = poWK->pafDstDensity[iDstOffset]; 1579 else if( poWK->panDstValid != nullptr 1580 && !((poWK->panDstValid[iDstOffset>>5] 1581 & (0x01 << (iDstOffset & 0x1f))) ) ) 1582 dfDstDensity = 0.0; 1583 1584 double dfDstReal = 0.0; 1585 double dfDstImag = 0.0; 1586 // It seems like we also ought to be testing panDstValid[] here! 1587 1588 // TODO(schwehr): Factor out this repreated type of set. 1589 switch( poWK->eWorkingDataType ) 1590 { 1591 case GDT_Byte: 1592 dfDstReal = pabyDst[iDstOffset]; 1593 dfDstImag = 0.0; 1594 break; 1595 1596 case GDT_Int16: 1597 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset]; 1598 dfDstImag = 0.0; 1599 break; 1600 1601 case GDT_UInt16: 1602 dfDstReal = reinterpret_cast<GUInt16*>(pabyDst)[iDstOffset]; 1603 dfDstImag = 0.0; 1604 break; 1605 1606 case GDT_Int32: 1607 dfDstReal = reinterpret_cast<GInt32*>(pabyDst)[iDstOffset]; 1608 dfDstImag = 0.0; 1609 break; 1610 1611 case GDT_UInt32: 1612 dfDstReal = reinterpret_cast<GUInt32*>(pabyDst)[iDstOffset]; 1613 dfDstImag = 0.0; 1614 break; 1615 1616 case GDT_Float32: 1617 dfDstReal = reinterpret_cast<float*>(pabyDst)[iDstOffset]; 1618 dfDstImag = 0.0; 1619 break; 1620 1621 case GDT_Float64: 1622 dfDstReal = reinterpret_cast<double*>(pabyDst)[iDstOffset]; 1623 dfDstImag = 0.0; 1624 break; 1625 1626 case GDT_CInt16: 1627 dfDstReal = reinterpret_cast<GInt16*>(pabyDst)[iDstOffset*2]; 1628 dfDstImag = reinterpret_cast<GInt16*>(pabyDst)[iDstOffset*2+1]; 1629 break; 1630 1631 case GDT_CInt32: 1632 dfDstReal = reinterpret_cast<GInt32*>(pabyDst)[iDstOffset*2]; 1633 dfDstImag = reinterpret_cast<GInt32*>(pabyDst)[iDstOffset*2+1]; 1634 break; 1635 1636 case GDT_CFloat32: 1637 dfDstReal = reinterpret_cast<float*>(pabyDst)[iDstOffset*2]; 1638 dfDstImag = reinterpret_cast<float*>(pabyDst)[iDstOffset*2+1]; 1639 break; 1640 1641 case GDT_CFloat64: 1642 dfDstReal = reinterpret_cast<double*>(pabyDst)[iDstOffset*2]; 1643 dfDstImag = reinterpret_cast<double*>(pabyDst)[iDstOffset*2+1]; 1644 break; 1645 1646 default: 1647 CPLAssert( false ); 1648 return false; 1649 } 1650 1651 // The destination density is really only relative to the portion 1652 // not occluded by the overlay. 1653 const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity; 1654 1655 dfReal = 1656 (dfReal * dfDensity + dfDstReal * dfDstInfluence) 1657 / (dfDensity + dfDstInfluence); 1658 1659 dfImag = 1660 (dfImag * dfDensity + dfDstImag * dfDstInfluence) 1661 / (dfDensity + dfDstInfluence); 1662 } 1663 1664 /* -------------------------------------------------------------------- */ 1665 /* Actually apply the destination value. */ 1666 /* */ 1667 /* Avoid using the destination nodata value for integer datatypes */ 1668 /* if by chance it is equal to the computed pixel value. */ 1669 /* -------------------------------------------------------------------- */ 1670 1671 // TODO(schwehr): Can we make this a template? 1672 #define CLAMP(type) \ 1673 do { \ 1674 type* _pDst = reinterpret_cast<type*>(pabyDst); \ 1675 if( dfReal < static_cast<double>(std::numeric_limits<type>::min()) ) \ 1676 _pDst[iDstOffset] = static_cast<type>(std::numeric_limits<type>::min()); \ 1677 else if( dfReal > static_cast<double>(std::numeric_limits<type>::max()) ) \ 1678 _pDst[iDstOffset] = static_cast<type>(std::numeric_limits<type>::max()); \ 1679 else \ 1680 _pDst[iDstOffset] = \ 1681 (std::numeric_limits<type>::is_signed) ? \ 1682 static_cast<type>(floor(dfReal + 0.5)) : \ 1683 static_cast<type>(dfReal + 0.5); \ 1684 if( poWK->padfDstNoDataReal != nullptr && \ 1685 poWK->padfDstNoDataReal[iBand] == static_cast<double>(_pDst[iDstOffset]) ) \ 1686 { \ 1687 if( _pDst[iDstOffset] == static_cast<type>(std::numeric_limits<type>::min()) ) \ 1688 _pDst[iDstOffset] = static_cast<type>(std::numeric_limits<type>::min() + 1); \ 1689 else \ 1690 _pDst[iDstOffset]--; \ 1691 } } while( false ) 1692 1693 switch( poWK->eWorkingDataType ) 1694 { 1695 case GDT_Byte: 1696 CLAMP(GByte); 1697 break; 1698 1699 case GDT_Int16: 1700 CLAMP(GInt16); 1701 break; 1702 1703 case GDT_UInt16: 1704 CLAMP(GUInt16); 1705 break; 1706 1707 case GDT_UInt32: 1708 CLAMP(GUInt32); 1709 break; 1710 1711 case GDT_Int32: 1712 CLAMP(GInt32); 1713 break; 1714 1715 case GDT_Float32: 1716 reinterpret_cast<float*>(pabyDst)[iDstOffset] = static_cast<float>(dfReal); 1717 break; 1718 1719 case GDT_Float64: 1720 reinterpret_cast<double*>(pabyDst)[iDstOffset] = dfReal; 1721 break; 1722 1723 case GDT_CInt16: 1724 { 1725 typedef GInt16 T; 1726 if( dfReal < static_cast<double>(std::numeric_limits<T>::min()) ) 1727 reinterpret_cast<T*>(pabyDst)[iDstOffset*2] = std::numeric_limits<T>::min(); 1728 else if( dfReal > static_cast<double>(std::numeric_limits<T>::max()) ) 1729 reinterpret_cast<T*>(pabyDst)[iDstOffset*2] = std::numeric_limits<T>::max(); 1730 else 1731 reinterpret_cast<T*>(pabyDst)[iDstOffset*2] = static_cast<T>(floor(dfReal+0.5)); 1732 if( dfImag < static_cast<double>(std::numeric_limits<T>::min()) ) 1733 reinterpret_cast<T*>(pabyDst)[iDstOffset*2+1] = std::numeric_limits<T>::min(); 1734 else if( dfImag > static_cast<double>(std::numeric_limits<T>::max()) ) 1735 reinterpret_cast<T*>(pabyDst)[iDstOffset*2+1] = std::numeric_limits<T>::max(); 1736 else 1737 reinterpret_cast<T*>(pabyDst)[iDstOffset*2+1] = static_cast<T>(floor(dfImag+0.5)); 1738 break; 1739 } 1740 1741 case GDT_CInt32: 1742 { 1743 typedef GInt32 T; 1744 if( dfReal < static_cast<double>(std::numeric_limits<T>::min()) ) 1745 reinterpret_cast<T*>(pabyDst)[iDstOffset*2] = std::numeric_limits<T>::min(); 1746 else if( dfReal > static_cast<double>(std::numeric_limits<T>::max()) ) 1747 reinterpret_cast<T*>(pabyDst)[iDstOffset*2] = std::numeric_limits<T>::max(); 1748 else 1749 reinterpret_cast<T*>(pabyDst)[iDstOffset*2] = static_cast<T>(floor(dfReal+0.5)); 1750 if( dfImag < static_cast<double>(std::numeric_limits<T>::min()) ) 1751 reinterpret_cast<T*>(pabyDst)[iDstOffset*2+1] = std::numeric_limits<T>::min(); 1752 else if( dfImag > static_cast<double>(std::numeric_limits<T>::max()) ) 1753 reinterpret_cast<T*>(pabyDst)[iDstOffset*2+1] = std::numeric_limits<T>::max(); 1754 else 1755 reinterpret_cast<T*>(pabyDst)[iDstOffset*2+1] = static_cast<T>(floor(dfImag+0.5)); 1756 break; 1757 } 1758 1759 case GDT_CFloat32: 1760 reinterpret_cast<float*>(pabyDst)[iDstOffset*2] = static_cast<float>(dfReal); 1761 reinterpret_cast<float*>(pabyDst)[iDstOffset*2+1] = static_cast<float>(dfImag); 1762 break; 1763 1764 case GDT_CFloat64: 1765 reinterpret_cast<double*>(pabyDst)[iDstOffset*2] = dfReal; 1766 reinterpret_cast<double*>(pabyDst)[iDstOffset*2+1] = dfImag; 1767 break; 1768 1769 default: 1770 return false; 1771 } 1772 1773 return true; 1774 } 1775 1776 /************************************************************************/ 1777 /* GWKSetPixelValueReal() */ 1778 /************************************************************************/ 1779 1780 static bool GWKSetPixelValueReal( const GDALWarpKernel *poWK, int iBand, 1781 GPtrDiff_t iDstOffset, double dfDensity, 1782 double dfReal ) 1783 1784 { 1785 GByte *pabyDst = poWK->papabyDstImage[iBand]; 1786 1787 /* -------------------------------------------------------------------- */ 1788 /* If the source density is less than 100% we need to fetch the */ 1789 /* existing destination value, and mix it with the source to */ 1790 /* get the new "to apply" value. Also compute composite */ 1791 /* density. */ 1792 /* */ 1793 /* We avoid mixing if density is very near one or risk mixing */ 1794 /* in very extreme nodata values and causing odd results (#1610) */ 1795 /* -------------------------------------------------------------------- */ 1796 if( dfDensity < 0.9999 ) 1797 { 1798 if( dfDensity < 0.0001 ) 1799 return true; 1800 1801 double dfDstReal = 0.0; 1802 double dfDstDensity = 1.0; 1803 1804 if( poWK->pafDstDensity != nullptr ) 1805 dfDstDensity = poWK->pafDstDensity[iDstOffset]; 1806 else if( poWK->panDstValid != nullptr 1807 && !((poWK->panDstValid[iDstOffset>>5] 1808 & (0x01 << (iDstOffset & 0x1f))) ) ) 1809 dfDstDensity = 0.0; 1810 1811 // It seems like we also ought to be testing panDstValid[] here! 1812 1813 switch( poWK->eWorkingDataType ) 1814 { 1815 case GDT_Byte: 1816 dfDstReal = pabyDst[iDstOffset]; 1817 break; 1818 1819 case GDT_Int16: 1820 dfDstReal = reinterpret_cast<GInt16*>(pabyDst)[iDstOffset]; 1821 break; 1822 1823 case GDT_UInt16: 1824 dfDstReal = reinterpret_cast<GUInt16*>(pabyDst)[iDstOffset]; 1825 break; 1826 1827 case GDT_Int32: 1828 dfDstReal = reinterpret_cast<GInt32*>(pabyDst)[iDstOffset]; 1829 break; 1830 1831 case GDT_UInt32: 1832 dfDstReal = reinterpret_cast<GUInt32*>(pabyDst)[iDstOffset]; 1833 break; 1834 1835 case GDT_Float32: 1836 dfDstReal = reinterpret_cast<float*>(pabyDst)[iDstOffset]; 1837 break; 1838 1839 case GDT_Float64: 1840 dfDstReal = reinterpret_cast<double*>(pabyDst)[iDstOffset]; 1841 break; 1842 1843 default: 1844 CPLAssert( false ); 1845 return false; 1846 } 1847 1848 // The destination density is really only relative to the portion 1849 // not occluded by the overlay. 1850 const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity; 1851 1852 dfReal = 1853 (dfReal * dfDensity + dfDstReal * dfDstInfluence) 1854 / (dfDensity + dfDstInfluence); 1855 } 1856 1857 /* -------------------------------------------------------------------- */ 1858 /* Actually apply the destination value. */ 1859 /* */ 1860 /* Avoid using the destination nodata value for integer datatypes */ 1861 /* if by chance it is equal to the computed pixel value. */ 1862 /* -------------------------------------------------------------------- */ 1863 1864 switch( poWK->eWorkingDataType ) 1865 { 1866 case GDT_Byte: 1867 CLAMP(GByte); 1868 break; 1869 1870 case GDT_Int16: 1871 CLAMP(GInt16); 1872 break; 1873 1874 case GDT_UInt16: 1875 CLAMP(GUInt16); 1876 break; 1877 1878 case GDT_UInt32: 1879 CLAMP(GUInt32); 1880 break; 1881 1882 case GDT_Int32: 1883 CLAMP(GInt32); 1884 break; 1885 1886 case GDT_Float32: 1887 reinterpret_cast<float*>(pabyDst)[iDstOffset] = static_cast<float>(dfReal); 1888 break; 1889 1890 case GDT_Float64: 1891 reinterpret_cast<double*>(pabyDst)[iDstOffset] = dfReal; 1892 break; 1893 1894 default: 1895 return false; 1896 } 1897 1898 return true; 1899 } 1900 1901 /************************************************************************/ 1902 /* GWKGetPixelValue() */ 1903 /************************************************************************/ 1904 1905 /* It is assumed that panUnifiedSrcValid has been checked before */ 1906 1907 static bool GWKGetPixelValue( const GDALWarpKernel *poWK, int iBand, 1908 GPtrDiff_t iSrcOffset, double *pdfDensity, 1909 double *pdfReal, double *pdfImag ) 1910 1911 { 1912 GByte *pabySrc = poWK->papabySrcImage[iBand]; 1913 1914 if( poWK->papanBandSrcValid != nullptr 1915 && poWK->papanBandSrcValid[iBand] != nullptr 1916 && !((poWK->papanBandSrcValid[iBand][iSrcOffset>>5] 1917 & (0x01 << (iSrcOffset & 0x1f)))) ) 1918 { 1919 *pdfDensity = 0.0; 1920 return false; 1921 } 1922 1923 // TODO(schwehr): Fix casting. 1924 switch( poWK->eWorkingDataType ) 1925 { 1926 case GDT_Byte: 1927 *pdfReal = pabySrc[iSrcOffset]; 1928 *pdfImag = 0.0; 1929 break; 1930 1931 case GDT_Int16: 1932 *pdfReal = reinterpret_cast<GInt16*>(pabySrc)[iSrcOffset]; 1933 *pdfImag = 0.0; 1934 break; 1935 1936 case GDT_UInt16: 1937 *pdfReal = reinterpret_cast<GUInt16*>(pabySrc)[iSrcOffset]; 1938 *pdfImag = 0.0; 1939 break; 1940 1941 case GDT_Int32: 1942 *pdfReal = reinterpret_cast<GInt32*>(pabySrc)[iSrcOffset]; 1943 *pdfImag = 0.0; 1944 break; 1945 1946 case GDT_UInt32: 1947 *pdfReal = reinterpret_cast<GUInt32*>(pabySrc)[iSrcOffset]; 1948 *pdfImag = 0.0; 1949 break; 1950 1951 case GDT_Float32: 1952 *pdfReal = reinterpret_cast<float*>(pabySrc)[iSrcOffset]; 1953 *pdfImag = 0.0; 1954 break; 1955 1956 case GDT_Float64: 1957 *pdfReal = reinterpret_cast<double*>(pabySrc)[iSrcOffset]; 1958 *pdfImag = 0.0; 1959 break; 1960 1961 case GDT_CInt16: 1962 *pdfReal = reinterpret_cast<GInt16*>(pabySrc)[iSrcOffset*2]; 1963 *pdfImag = reinterpret_cast<GInt16*>(pabySrc)[iSrcOffset*2+1]; 1964 break; 1965 1966 case GDT_CInt32: 1967 *pdfReal = reinterpret_cast<GInt32*>(pabySrc)[iSrcOffset*2]; 1968 *pdfImag = reinterpret_cast<GInt32*>(pabySrc)[iSrcOffset*2+1]; 1969 break; 1970 1971 case GDT_CFloat32: 1972 *pdfReal = reinterpret_cast<float*>(pabySrc)[iSrcOffset*2]; 1973 *pdfImag = reinterpret_cast<float*>(pabySrc)[iSrcOffset*2+1]; 1974 break; 1975 1976 case GDT_CFloat64: 1977 *pdfReal = reinterpret_cast<double*>(pabySrc)[iSrcOffset*2]; 1978 *pdfImag = reinterpret_cast<double*>(pabySrc)[iSrcOffset*2+1]; 1979 break; 1980 1981 default: 1982 *pdfDensity = 0.0; 1983 return false; 1984 } 1985 1986 if( poWK->pafUnifiedSrcDensity != nullptr ) 1987 *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; 1988 else 1989 *pdfDensity = 1.0; 1990 1991 return *pdfDensity != 0.0; 1992 } 1993 1994 /************************************************************************/ 1995 /* GWKGetPixelValueReal() */ 1996 /************************************************************************/ 1997 1998 static bool GWKGetPixelValueReal( const GDALWarpKernel *poWK, int iBand, 1999 GPtrDiff_t iSrcOffset, double *pdfDensity, 2000 double *pdfReal ) 2001 2002 { 2003 GByte *pabySrc = poWK->papabySrcImage[iBand]; 2004 2005 if( poWK->papanBandSrcValid != nullptr 2006 && poWK->papanBandSrcValid[iBand] != nullptr 2007 && !((poWK->papanBandSrcValid[iBand][iSrcOffset>>5] 2008 & (0x01 << (iSrcOffset & 0x1f)))) ) 2009 { 2010 *pdfDensity = 0.0; 2011 return false; 2012 } 2013 2014 switch( poWK->eWorkingDataType ) 2015 { 2016 case GDT_Byte: 2017 *pdfReal = pabySrc[iSrcOffset]; 2018 break; 2019 2020 case GDT_Int16: 2021 *pdfReal = reinterpret_cast<GInt16*>(pabySrc)[iSrcOffset]; 2022 break; 2023 2024 case GDT_UInt16: 2025 *pdfReal = reinterpret_cast<GUInt16*>(pabySrc)[iSrcOffset]; 2026 break; 2027 2028 case GDT_Int32: 2029 *pdfReal = reinterpret_cast<GInt32*>(pabySrc)[iSrcOffset]; 2030 break; 2031 2032 case GDT_UInt32: 2033 *pdfReal = reinterpret_cast<GUInt32*>(pabySrc)[iSrcOffset]; 2034 break; 2035 2036 case GDT_Float32: 2037 *pdfReal = reinterpret_cast<float*>(pabySrc)[iSrcOffset]; 2038 break; 2039 2040 case GDT_Float64: 2041 *pdfReal = reinterpret_cast<double*>(pabySrc)[iSrcOffset]; 2042 break; 2043 2044 default: 2045 CPLAssert(false); 2046 return false; 2047 } 2048 2049 if( poWK->pafUnifiedSrcDensity != nullptr ) 2050 *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; 2051 else 2052 *pdfDensity = 1.0; 2053 2054 return *pdfDensity != 0.0; 2055 } 2056 2057 /************************************************************************/ 2058 /* GWKGetPixelRow() */ 2059 /************************************************************************/ 2060 2061 /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */ 2062 /* data-types. */ 2063 2064 static bool GWKGetPixelRow( const GDALWarpKernel *poWK, int iBand, 2065 GPtrDiff_t iSrcOffset, int nHalfSrcLen, 2066 double* padfDensity, 2067 double adfReal[], 2068 double* padfImag ) 2069 { 2070 // We know that nSrcLen is even, so we can *always* unroll loops 2x. 2071 const int nSrcLen = nHalfSrcLen * 2; 2072 bool bHasValid = false; 2073 2074 if( padfDensity != nullptr ) 2075 { 2076 // Init the density. 2077 for( int i = 0; i < nSrcLen; i += 2 ) 2078 { 2079 padfDensity[i] = 1.0; 2080 padfDensity[i+1] = 1.0; 2081 } 2082 2083 if( poWK->panUnifiedSrcValid != nullptr ) 2084 { 2085 for( int i = 0; i < nSrcLen; i += 2 ) 2086 { 2087 if( poWK->panUnifiedSrcValid[(iSrcOffset+i)>>5] 2088 & (0x01 << ((iSrcOffset+i) & 0x1f)) ) 2089 bHasValid = true; 2090 else 2091 padfDensity[i] = 0.0; 2092 2093 if( poWK->panUnifiedSrcValid[(iSrcOffset+i+1)>>5] 2094 & (0x01 << ((iSrcOffset+i+1) & 0x1f)) ) 2095 bHasValid = true; 2096 else 2097 padfDensity[i+1] = 0.0; 2098 } 2099 2100 // Reset or fail as needed. 2101 if( bHasValid ) 2102 bHasValid = false; 2103 else 2104 return false; 2105 } 2106 2107 if( poWK->papanBandSrcValid != nullptr 2108 && poWK->papanBandSrcValid[iBand] != nullptr ) 2109 { 2110 for( int i = 0; i < nSrcLen; i += 2 ) 2111 { 2112 if( poWK->papanBandSrcValid[iBand][(iSrcOffset+i)>>5] 2113 & (0x01 << ((iSrcOffset+i) & 0x1f)) ) 2114 bHasValid = true; 2115 else 2116 padfDensity[i] = 0.0; 2117 2118 if( poWK->papanBandSrcValid[iBand][(iSrcOffset+i+1)>>5] 2119 & (0x01 << ((iSrcOffset+i+1) & 0x1f)) ) 2120 bHasValid = true; 2121 else 2122 padfDensity[i+1] = 0.0; 2123 } 2124 2125 // Reset or fail as needed. 2126 if( bHasValid ) 2127 bHasValid = false; 2128 else 2129 return false; 2130 } 2131 } 2132 2133 // TODO(schwehr): Fix casting. 2134 // Fetch data. 2135 switch( poWK->eWorkingDataType ) 2136 { 2137 case GDT_Byte: 2138 { 2139 GByte* pSrc = reinterpret_cast<GByte*>(poWK->papabySrcImage[iBand]); 2140 pSrc += iSrcOffset; 2141 for( int i = 0; i < nSrcLen; i += 2 ) 2142 { 2143 adfReal[i] = pSrc[i]; 2144 adfReal[i+1] = pSrc[i+1]; 2145 } 2146 break; 2147 } 2148 2149 case GDT_Int16: 2150 { 2151 GInt16* pSrc = reinterpret_cast<GInt16*>(poWK->papabySrcImage[iBand]); 2152 pSrc += iSrcOffset; 2153 for( int i = 0; i < nSrcLen; i += 2 ) 2154 { 2155 adfReal[i] = pSrc[i]; 2156 adfReal[i+1] = pSrc[i+1]; 2157 } 2158 break; 2159 } 2160 2161 case GDT_UInt16: 2162 { 2163 GUInt16* pSrc = reinterpret_cast<GUInt16*>(poWK->papabySrcImage[iBand]); 2164 pSrc += iSrcOffset; 2165 for( int i = 0; i < nSrcLen; i += 2 ) 2166 { 2167 adfReal[i] = pSrc[i]; 2168 adfReal[i+1] = pSrc[i+1]; 2169 } 2170 break; 2171 } 2172 2173 case GDT_Int32: 2174 { 2175 GInt32* pSrc = reinterpret_cast<GInt32*>(poWK->papabySrcImage[iBand]); 2176 pSrc += iSrcOffset; 2177 for( int i = 0; i < nSrcLen; i += 2 ) 2178 { 2179 adfReal[i] = pSrc[i]; 2180 adfReal[i+1] = pSrc[i+1]; 2181 } 2182 break; 2183 } 2184 2185 case GDT_UInt32: 2186 { 2187 GUInt32* pSrc = reinterpret_cast<GUInt32*>(poWK->papabySrcImage[iBand]); 2188 pSrc += iSrcOffset; 2189 for( int i = 0; i < nSrcLen; i += 2 ) 2190 { 2191 adfReal[i] = pSrc[i]; 2192 adfReal[i+1] = pSrc[i+1]; 2193 } 2194 break; 2195 } 2196 2197 case GDT_Float32: 2198 { 2199 float* pSrc = reinterpret_cast<float*>(poWK->papabySrcImage[iBand]); 2200 pSrc += iSrcOffset; 2201 for( int i = 0; i < nSrcLen; i += 2 ) 2202 { 2203 adfReal[i] = pSrc[i]; 2204 adfReal[i+1] = pSrc[i+1]; 2205 } 2206 break; 2207 } 2208 2209 case GDT_Float64: 2210 { 2211 double* pSrc = reinterpret_cast<double*>(poWK->papabySrcImage[iBand]); 2212 pSrc += iSrcOffset; 2213 for( int i = 0; i < nSrcLen; i += 2 ) 2214 { 2215 adfReal[i] = pSrc[i]; 2216 adfReal[i+1] = pSrc[i+1]; 2217 } 2218 break; 2219 } 2220 2221 case GDT_CInt16: 2222 { 2223 GInt16* pSrc = reinterpret_cast<GInt16*>(poWK->papabySrcImage[iBand]); 2224 pSrc += 2 * iSrcOffset; 2225 for( int i = 0; i < nSrcLen; i += 2 ) 2226 { 2227 adfReal[i] = pSrc[2*i]; 2228 padfImag[i] = pSrc[2*i+1]; 2229 2230 adfReal[i+1] = pSrc[2*i+2]; 2231 padfImag[i+1] = pSrc[2*i+3]; 2232 } 2233 break; 2234 } 2235 2236 case GDT_CInt32: 2237 { 2238 GInt32* pSrc = reinterpret_cast<GInt32*>(poWK->papabySrcImage[iBand]); 2239 pSrc += 2 * iSrcOffset; 2240 for( int i = 0; i < nSrcLen; i += 2 ) 2241 { 2242 adfReal[i] = pSrc[2*i]; 2243 padfImag[i] = pSrc[2*i+1]; 2244 2245 adfReal[i+1] = pSrc[2*i+2]; 2246 padfImag[i+1] = pSrc[2*i+3]; 2247 } 2248 break; 2249 } 2250 2251 case GDT_CFloat32: 2252 { 2253 float* pSrc = reinterpret_cast<float*>(poWK->papabySrcImage[iBand]); 2254 pSrc += 2 * iSrcOffset; 2255 for( int i = 0; i < nSrcLen; i += 2 ) 2256 { 2257 adfReal[i] = pSrc[2*i]; 2258 padfImag[i] = pSrc[2*i+1]; 2259 2260 adfReal[i+1] = pSrc[2*i+2]; 2261 padfImag[i+1] = pSrc[2*i+3]; 2262 } 2263 break; 2264 } 2265 2266 case GDT_CFloat64: 2267 { 2268 double* pSrc = reinterpret_cast<double*>(poWK->papabySrcImage[iBand]); 2269 pSrc += 2 * iSrcOffset; 2270 for( int i = 0; i < nSrcLen; i += 2 ) 2271 { 2272 adfReal[i] = pSrc[2*i]; 2273 padfImag[i] = pSrc[2*i+1]; 2274 2275 adfReal[i+1] = pSrc[2*i+2]; 2276 padfImag[i+1] = pSrc[2*i+3]; 2277 } 2278 break; 2279 } 2280 2281 default: 2282 CPLAssert(false); 2283 if( padfDensity ) 2284 memset( padfDensity, 0, nSrcLen * sizeof(double) ); 2285 return false; 2286 } 2287 2288 if( padfDensity == nullptr ) 2289 return true; 2290 2291 if( poWK->pafUnifiedSrcDensity == nullptr ) 2292 { 2293 for( int i = 0; i < nSrcLen; i += 2 ) 2294 { 2295 // Take into account earlier calcs. 2296 if( padfDensity[i] > SRC_DENSITY_THRESHOLD ) 2297 { 2298 padfDensity[i] = 1.0; 2299 bHasValid = true; 2300 } 2301 2302 if( padfDensity[i+1] > SRC_DENSITY_THRESHOLD ) 2303 { 2304 padfDensity[i+1] = 1.0; 2305 bHasValid = true; 2306 } 2307 } 2308 } 2309 else 2310 { 2311 for( int i = 0; i < nSrcLen; i += 2 ) 2312 { 2313 if( padfDensity[i] > SRC_DENSITY_THRESHOLD ) 2314 padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset+i]; 2315 if( padfDensity[i] > SRC_DENSITY_THRESHOLD ) 2316 bHasValid = true; 2317 2318 if( padfDensity[i+1] > SRC_DENSITY_THRESHOLD ) 2319 padfDensity[i+1] = poWK->pafUnifiedSrcDensity[iSrcOffset+i+1]; 2320 if( padfDensity[i+1] > SRC_DENSITY_THRESHOLD ) 2321 bHasValid = true; 2322 } 2323 } 2324 2325 return bHasValid; 2326 } 2327 2328 /************************************************************************/ 2329 /* GWKGetPixelT() */ 2330 /************************************************************************/ 2331 2332 template<class T> 2333 static bool GWKGetPixelT( const GDALWarpKernel *poWK, int iBand, 2334 GPtrDiff_t iSrcOffset, double *pdfDensity, 2335 T *pValue ) 2336 2337 { 2338 T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]); 2339 2340 if( ( poWK->panUnifiedSrcValid != nullptr 2341 && !((poWK->panUnifiedSrcValid[iSrcOffset>>5] 2342 & (0x01 << (iSrcOffset & 0x1f))) ) ) 2343 || ( poWK->papanBandSrcValid != nullptr 2344 && poWK->papanBandSrcValid[iBand] != nullptr 2345 && !((poWK->papanBandSrcValid[iBand][iSrcOffset>>5] 2346 & (0x01 << (iSrcOffset & 0x1f)))) ) ) 2347 { 2348 *pdfDensity = 0.0; 2349 return false; 2350 } 2351 2352 *pValue = pSrc[iSrcOffset]; 2353 2354 if( poWK->pafUnifiedSrcDensity == nullptr ) 2355 *pdfDensity = 1.0; 2356 else 2357 *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; 2358 2359 return *pdfDensity != 0.0; 2360 } 2361 2362 /************************************************************************/ 2363 /* GWKBilinearResample() */ 2364 /* Set of bilinear interpolators */ 2365 /************************************************************************/ 2366 2367 static bool GWKBilinearResample4Sample( const GDALWarpKernel *poWK, int iBand, 2368 double dfSrcX, double dfSrcY, 2369 double *pdfDensity, 2370 double *pdfReal, double *pdfImag ) 2371 2372 { 2373 // Save as local variables to avoid following pointers. 2374 const int nSrcXSize = poWK->nSrcXSize; 2375 const int nSrcYSize = poWK->nSrcYSize; 2376 2377 int iSrcX = static_cast<int>(floor(dfSrcX - 0.5)); 2378 int iSrcY = static_cast<int>(floor(dfSrcY - 0.5)); 2379 double dfRatioX = 1.5 - (dfSrcX - iSrcX); 2380 double dfRatioY = 1.5 - (dfSrcY - iSrcY); 2381 bool bShifted = false; 2382 2383 if( iSrcX == -1 ) 2384 { 2385 iSrcX = 0; 2386 dfRatioX = 1; 2387 } 2388 if( iSrcY == -1 ) 2389 { 2390 iSrcY = 0; 2391 dfRatioY = 1; 2392 } 2393 GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 2394 2395 // Shift so we don't overrun the array. 2396 if( static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 2397 || static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + nSrcXSize + 1 ) 2398 { 2399 bShifted = true; 2400 --iSrcOffset; 2401 } 2402 2403 double adfDensity[2] = { 0.0, 0.0 }; 2404 double adfReal[2] = { 0.0, 0.0 }; 2405 double adfImag[2] = { 0.0, 0.0 }; 2406 double dfAccumulatorReal = 0.0; 2407 double dfAccumulatorImag = 0.0; 2408 double dfAccumulatorDensity = 0.0; 2409 double dfAccumulatorDivisor = 0.0; 2410 2411 const GPtrDiff_t nSrcPixels = static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize; 2412 // Get pixel row. 2413 if( iSrcY >= 0 && iSrcY < nSrcYSize 2414 && iSrcOffset >= 0 && iSrcOffset < nSrcPixels 2415 && GWKGetPixelRow( poWK, iBand, iSrcOffset, 1, 2416 adfDensity, adfReal, adfImag ) ) 2417 { 2418 double dfMult1 = dfRatioX * dfRatioY; 2419 double dfMult2 = (1.0-dfRatioX) * dfRatioY; 2420 2421 // Shifting corrected. 2422 if( bShifted ) 2423 { 2424 adfReal[0] = adfReal[1]; 2425 adfImag[0] = adfImag[1]; 2426 adfDensity[0] = adfDensity[1]; 2427 } 2428 2429 // Upper Left Pixel. 2430 if( iSrcX >= 0 && iSrcX < nSrcXSize 2431 && adfDensity[0] > SRC_DENSITY_THRESHOLD ) 2432 { 2433 dfAccumulatorDivisor += dfMult1; 2434 2435 dfAccumulatorReal += adfReal[0] * dfMult1; 2436 dfAccumulatorImag += adfImag[0] * dfMult1; 2437 dfAccumulatorDensity += adfDensity[0] * dfMult1; 2438 } 2439 2440 // Upper Right Pixel. 2441 if( iSrcX+1 >= 0 && iSrcX+1 < nSrcXSize 2442 && adfDensity[1] > SRC_DENSITY_THRESHOLD ) 2443 { 2444 dfAccumulatorDivisor += dfMult2; 2445 2446 dfAccumulatorReal += adfReal[1] * dfMult2; 2447 dfAccumulatorImag += adfImag[1] * dfMult2; 2448 dfAccumulatorDensity += adfDensity[1] * dfMult2; 2449 } 2450 } 2451 2452 // Get pixel row. 2453 if( iSrcY+1 >= 0 && iSrcY+1 < nSrcYSize 2454 && iSrcOffset+nSrcXSize >= 0 2455 && iSrcOffset+nSrcXSize < nSrcPixels 2456 && GWKGetPixelRow( poWK, iBand, iSrcOffset+nSrcXSize, 1, 2457 adfDensity, adfReal, adfImag ) ) 2458 { 2459 double dfMult1 = dfRatioX * (1.0-dfRatioY); 2460 double dfMult2 = (1.0-dfRatioX) * (1.0-dfRatioY); 2461 2462 // Shifting corrected 2463 if ( bShifted ) 2464 { 2465 adfReal[0] = adfReal[1]; 2466 adfImag[0] = adfImag[1]; 2467 adfDensity[0] = adfDensity[1]; 2468 } 2469 2470 // Lower Left Pixel 2471 if ( iSrcX >= 0 && iSrcX < nSrcXSize 2472 && adfDensity[0] > SRC_DENSITY_THRESHOLD ) 2473 { 2474 dfAccumulatorDivisor += dfMult1; 2475 2476 dfAccumulatorReal += adfReal[0] * dfMult1; 2477 dfAccumulatorImag += adfImag[0] * dfMult1; 2478 dfAccumulatorDensity += adfDensity[0] * dfMult1; 2479 } 2480 2481 // Lower Right Pixel. 2482 if( iSrcX+1 >= 0 && iSrcX+1 < nSrcXSize 2483 && adfDensity[1] > SRC_DENSITY_THRESHOLD ) 2484 { 2485 dfAccumulatorDivisor += dfMult2; 2486 2487 dfAccumulatorReal += adfReal[1] * dfMult2; 2488 dfAccumulatorImag += adfImag[1] * dfMult2; 2489 dfAccumulatorDensity += adfDensity[1] * dfMult2; 2490 } 2491 } 2492 2493 /* -------------------------------------------------------------------- */ 2494 /* Return result. */ 2495 /* -------------------------------------------------------------------- */ 2496 if( dfAccumulatorDivisor == 1.0 ) 2497 { 2498 *pdfReal = dfAccumulatorReal; 2499 *pdfImag = dfAccumulatorImag; 2500 *pdfDensity = dfAccumulatorDensity; 2501 return false; 2502 } 2503 else if( dfAccumulatorDivisor < 0.00001 ) 2504 { 2505 *pdfReal = 0.0; 2506 *pdfImag = 0.0; 2507 *pdfDensity = 0.0; 2508 return false; 2509 } 2510 else 2511 { 2512 *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor; 2513 *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor; 2514 *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor; 2515 return true; 2516 } 2517 } 2518 2519 template<class T> 2520 static bool GWKBilinearResampleNoMasks4SampleT( const GDALWarpKernel *poWK, int iBand, 2521 double dfSrcX, double dfSrcY, 2522 T *pValue ) 2523 2524 { 2525 2526 const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5)); 2527 const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5)); 2528 GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize; 2529 const double dfRatioX = 1.5 - (dfSrcX - iSrcX); 2530 const double dfRatioY = 1.5 - (dfSrcY - iSrcY); 2531 2532 const T* const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]); 2533 2534 2535 if( iSrcX >= 0 && iSrcX+1 < poWK->nSrcXSize 2536 && iSrcY >= 0 && iSrcY+1 < poWK->nSrcYSize ) 2537 { 2538 const double dfAccumulator = 2539 (pSrc[iSrcOffset] * dfRatioX + 2540 pSrc[iSrcOffset+1] * (1.0 - dfRatioX)) * 2541 dfRatioY + 2542 (pSrc[iSrcOffset+poWK->nSrcXSize] * dfRatioX + 2543 pSrc[iSrcOffset+1+poWK->nSrcXSize] * 2544 (1.0 - dfRatioX)) * (1.0-dfRatioY); 2545 2546 *pValue = GWKRoundValueT<T>(dfAccumulator); 2547 2548 return true; 2549 } 2550 2551 double dfAccumulatorDivisor = 0.0; 2552 double dfAccumulator = 0.0; 2553 2554 // Upper Left Pixel. 2555 if( iSrcX >= 0 && iSrcX < poWK->nSrcXSize 2556 && iSrcY >= 0 && iSrcY < poWK->nSrcYSize ) 2557 { 2558 const double dfMult = dfRatioX * dfRatioY; 2559 2560 dfAccumulatorDivisor += dfMult; 2561 2562 dfAccumulator += pSrc[iSrcOffset] * dfMult; 2563 } 2564 2565 // Upper Right Pixel. 2566 if( iSrcX+1 >= 0 && iSrcX+1 < poWK->nSrcXSize 2567 && iSrcY >= 0 && iSrcY < poWK->nSrcYSize ) 2568 { 2569 const double dfMult = (1.0 - dfRatioX) * dfRatioY; 2570 2571 dfAccumulatorDivisor += dfMult; 2572 2573 dfAccumulator += pSrc[iSrcOffset+1] * dfMult; 2574 } 2575 2576 // Lower Right Pixel. 2577 if( iSrcX+1 >= 0 && iSrcX+1 < poWK->nSrcXSize 2578 && iSrcY+1 >= 0 && iSrcY+1 < poWK->nSrcYSize ) 2579 { 2580 const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY); 2581 2582 dfAccumulatorDivisor += dfMult; 2583 2584 dfAccumulator += pSrc[iSrcOffset+1+poWK->nSrcXSize] * dfMult; 2585 } 2586 2587 // Lower Left Pixel. 2588 if( iSrcX >= 0 && iSrcX < poWK->nSrcXSize 2589 && iSrcY+1 >= 0 && iSrcY+1 < poWK->nSrcYSize ) 2590 { 2591 const double dfMult = dfRatioX * (1.0 - dfRatioY); 2592 2593 dfAccumulatorDivisor += dfMult; 2594 2595 dfAccumulator += pSrc[iSrcOffset+poWK->nSrcXSize] * dfMult; 2596 } 2597 2598 /* -------------------------------------------------------------------- */ 2599 /* Return result. */ 2600 /* -------------------------------------------------------------------- */ 2601 double dfValue = 0.0; 2602 2603 if( dfAccumulatorDivisor < 0.00001 ) 2604 { 2605 *pValue = 0; 2606 return false; 2607 } 2608 else if( dfAccumulatorDivisor == 1.0 ) 2609 { 2610 dfValue = dfAccumulator; 2611 } 2612 else 2613 { 2614 dfValue = dfAccumulator / dfAccumulatorDivisor; 2615 } 2616 2617 *pValue = GWKRoundValueT<T>(dfValue); 2618 2619 return true; 2620 } 2621 2622 /************************************************************************/ 2623 /* GWKCubicResample() */ 2624 /* Set of bicubic interpolators using cubic convolution. */ 2625 /************************************************************************/ 2626 2627 // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18 2628 // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2) 2629 // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation 2630 2631 // TODO(schwehr): Use an inline function. 2632 #define CubicConvolution(distance1,distance2,distance3,f0,f1,f2,f3) \ 2633 ( f1 \ 2634 + 0.5 * (distance1*(f2 - f0) \ 2635 + distance2*(2.0*f0 - 5.0*f1 + 4.0*f2 - f3) \ 2636 + distance3*(3.0*(f1 - f2) + f3 - f0))) 2637 2638 /************************************************************************/ 2639 /* GWKCubicComputeWeights() */ 2640 /************************************************************************/ 2641 2642 // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]); 2643 2644 // TODO(schwehr): Use an inline function. 2645 #define GWKCubicComputeWeights(dfX_, adfCoeffs) \ 2646 { \ 2647 const double dfX = dfX_; \ 2648 const double dfHalfX = 0.5 * dfX; \ 2649 const double dfThreeX = 3.0 * dfX; \ 2650 const double dfHalfX2 = dfHalfX * dfX; \ 2651 \ 2652 adfCoeffs[0] = dfHalfX * (-1 + dfX * (2 - dfX)); \ 2653 adfCoeffs[1] = 1 + dfHalfX2 * (-5 + dfThreeX); \ 2654 adfCoeffs[2] = dfHalfX * (1 + dfX * (4 - dfThreeX)); \ 2655 adfCoeffs[3] = dfHalfX2 * (-1 + dfX); \ 2656 } 2657 2658 // TODO(schwehr): Use an inline function. 2659 #define CONVOL4(v1, v2) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + \ 2660 (v1)[2] * (v2)[2] + (v1)[3] * (v2)[3]) 2661 2662 #if 0 2663 // Optimal (in theory...) for max 2 convolutions: 14 multiplications 2664 // instead of 17. 2665 // TODO(schwehr): Use an inline function. 2666 #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \ 2667 { \ 2668 const double dfX = dfX_; \ 2669 dfHalfX = 0.5 * dfX; \ 2670 const double dfThreeX = 3.0 * dfX; \ 2671 const double dfXMinus1 = dfX - 1; \ 2672 \ 2673 adfCoeffs[0] = -1 + dfX * (2 - dfX); \ 2674 adfCoeffs[1] = dfX * (-5 + dfThreeX); \ 2675 /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \ 2676 adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \ 2677 /*adfCoeffs[3] = dfX * (-1 + dfX); */ \ 2678 adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \ 2679 } 2680 2681 // TODO(schwehr): Use an inline function. 2682 #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \ 2683 ((v)[1] + (dfHalfX) * (\ 2684 (adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \ 2685 (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3])) 2686 #endif 2687 2688 static bool GWKCubicResample4Sample( const GDALWarpKernel *poWK, int iBand, 2689 double dfSrcX, double dfSrcY, 2690 double *pdfDensity, 2691 double *pdfReal, double *pdfImag ) 2692 2693 { 2694 const int iSrcX = static_cast<int>(dfSrcX - 0.5); 2695 const int iSrcY = static_cast<int>(dfSrcY - 0.5); 2696 GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize; 2697 const double dfDeltaX = dfSrcX - 0.5 - iSrcX; 2698 const double dfDeltaY = dfSrcY - 0.5 - iSrcY; 2699 double adfDensity[4] = {}; 2700 double adfReal[4] = {}; 2701 double adfImag[4] = {}; 2702 2703 // Get the bilinear interpolation at the image borders. 2704 if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize 2705 || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize ) 2706 return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY, 2707 pdfDensity, pdfReal, pdfImag ); 2708 2709 double adfValueDens[4] = {}; 2710 double adfValueReal[4] = {}; 2711 double adfValueImag[4] = {}; 2712 2713 double adfCoeffsX[4] = {}; 2714 GWKCubicComputeWeights(dfDeltaX, adfCoeffsX); 2715 2716 for( GPtrDiff_t i = -1; i < 3; i++ ) 2717 { 2718 if( !GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1, 2719 2, adfDensity, adfReal, adfImag) 2720 || adfDensity[0] < SRC_DENSITY_THRESHOLD 2721 || adfDensity[1] < SRC_DENSITY_THRESHOLD 2722 || adfDensity[2] < SRC_DENSITY_THRESHOLD 2723 || adfDensity[3] < SRC_DENSITY_THRESHOLD ) 2724 { 2725 return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY, 2726 pdfDensity, pdfReal, pdfImag ); 2727 } 2728 2729 adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity); 2730 adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal); 2731 adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag); 2732 } 2733 2734 /* -------------------------------------------------------------------- */ 2735 /* For now, if we have any pixels missing in the kernel area, */ 2736 /* we fallback on using bilinear interpolation. Ideally we */ 2737 /* should do "weight adjustment" of our results similarly to */ 2738 /* what is done for the cubic spline and lanc. interpolators. */ 2739 /* -------------------------------------------------------------------- */ 2740 2741 double adfCoeffsY[4] = {}; 2742 GWKCubicComputeWeights(dfDeltaY, adfCoeffsY); 2743 2744 *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens); 2745 *pdfReal = CONVOL4(adfCoeffsY, adfValueReal); 2746 *pdfImag = CONVOL4(adfCoeffsY, adfValueImag); 2747 2748 return true; 2749 } 2750 2751 // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero 2752 // perf benefit. 2753 2754 #if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) 2755 2756 /************************************************************************/ 2757 /* XMMLoad4Values() */ 2758 /* */ 2759 /* Load 4 packed byte or uint16, cast them to float and put them in a */ 2760 /* m128 register. */ 2761 /************************************************************************/ 2762 2763 static CPL_INLINE __m128 XMMLoad4Values(const GByte* ptr) 2764 { 2765 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS 2766 unsigned int i; 2767 memcpy(&i, ptr, 4); 2768 __m128i xmm_i = _mm_cvtsi32_si128(s); 2769 #else 2770 __m128i xmm_i = _mm_cvtsi32_si128(*(unsigned int*)(ptr)); 2771 #endif 2772 // Zero extend 4 packed unsigned 8-bit integers in a to packed 2773 // 32-bit integers. 2774 #if __SSE4_1__ 2775 xmm_i = _mm_cvtepu8_epi32(xmm_i); 2776 #else 2777 xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128()); 2778 xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128()); 2779 #endif 2780 return _mm_cvtepi32_ps(xmm_i); 2781 } 2782 2783 static CPL_INLINE __m128 XMMLoad4Values(const GUInt16* ptr) 2784 { 2785 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS 2786 GUInt64 i; 2787 memcpy(&i, ptr, 8); 2788 __m128i xmm_i = _mm_cvtsi64_si128(s); 2789 #else 2790 __m128i xmm_i = _mm_cvtsi64_si128(*(GUInt64*)(ptr)); 2791 #endif 2792 // Zero extend 4 packed unsigned 16-bit integers in a to packed 2793 // 32-bit integers. 2794 #if __SSE4_1__ 2795 xmm_i = _mm_cvtepu16_epi32(xmm_i); 2796 #else 2797 xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128()); 2798 #endif 2799 return _mm_cvtepi32_ps(xmm_i); 2800 } 2801 2802 /************************************************************************/ 2803 /* XMMHorizontalAdd() */ 2804 /* */ 2805 /* Return the sum of the 4 floating points of the register. */ 2806 /************************************************************************/ 2807 2808 #if __SSE3__ 2809 static CPL_INLINE float XMMHorizontalAdd(__m128 v) 2810 { 2811 __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1) 2812 __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0) 2813 shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2) 2814 sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2) 2815 return _mm_cvtss_f32(sums); 2816 } 2817 #else 2818 static CPL_INLINE float XMMHorizontalAdd(__m128 v) 2819 { 2820 __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2) 2821 __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0) 2822 shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1) 2823 sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1) 2824 return _mm_cvtss_f32(sums); 2825 } 2826 #endif 2827 2828 #endif // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) 2829 2830 /************************************************************************/ 2831 /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */ 2832 /************************************************************************/ 2833 2834 // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16, 2835 // because there are a few assumptions above those types. 2836 2837 template<class T> 2838 static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT( 2839 const GDALWarpKernel *poWK, int iBand, 2840 double dfSrcX, double dfSrcY, 2841 double *pdfDensity, 2842 double *pdfReal ) 2843 { 2844 const int iSrcX = static_cast<int>(dfSrcX - 0.5); 2845 const int iSrcY = static_cast<int>(dfSrcY - 0.5); 2846 const GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize; 2847 2848 // Get the bilinear interpolation at the image borders. 2849 if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize 2850 || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize ) 2851 { 2852 double adfImagIgnored[4] = {}; 2853 return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, 2854 pdfDensity, pdfReal, adfImagIgnored); 2855 } 2856 2857 #if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) 2858 const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX; 2859 const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY; 2860 2861 // TODO(schwehr): Explain the magic numbers. 2862 float afTemp[4 + 4 + 4 + 1]; 2863 float* pafAligned = 2864 reinterpret_cast<float*>(afTemp + ((size_t)afTemp & 0xf)); 2865 float* pafCoeffs = pafAligned; 2866 float* pafDensity = pafAligned + 4; 2867 float* pafValue = pafAligned + 8; 2868 2869 const float fHalfDeltaX = 0.5f * fDeltaX; 2870 const float fThreeDeltaX = 3.0f * fDeltaX; 2871 const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX; 2872 2873 pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX)); 2874 pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX); 2875 pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX)); 2876 pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX); 2877 __m128 xmmCoeffs = _mm_load_ps(pafCoeffs); 2878 const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD); 2879 2880 __m128 xmmMaskLowDensity = _mm_setzero_ps(); 2881 for( GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; 2882 i < 3; i++, iOffset += poWK->nSrcXSize ) 2883 { 2884 const __m128 xmmDensity = _mm_loadu_ps( 2885 poWK->pafUnifiedSrcDensity + iOffset); 2886 xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity, 2887 _mm_cmplt_ps(xmmDensity, xmmThreshold)); 2888 pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity)); 2889 2890 const __m128 xmmValues = XMMLoad4Values( 2891 ((T*) poWK->papabySrcImage[iBand]) + iOffset); 2892 pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues)); 2893 } 2894 if( _mm_movemask_ps(xmmMaskLowDensity) ) 2895 { 2896 double adfImagIgnored[4] = {}; 2897 return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY, 2898 pdfDensity, pdfReal, adfImagIgnored ); 2899 } 2900 2901 const float fHalfDeltaY = 0.5f * fDeltaY; 2902 const float fThreeDeltaY = 3.0f * fDeltaY; 2903 const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY; 2904 2905 pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY)); 2906 pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY); 2907 pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY)); 2908 pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY); 2909 2910 xmmCoeffs = _mm_load_ps(pafCoeffs); 2911 2912 const __m128 xmmDensity = _mm_load_ps(pafDensity); 2913 const __m128 xmmValue = _mm_load_ps(pafValue); 2914 *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity)); 2915 *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue)); 2916 2917 // We did all above computations on float32 whereas the general case is 2918 // float64. Not sure if one is fundamentally more correct than the other 2919 // one, but we want our optimization to give the same result as the 2920 // general case as much as possible, so if the resulting value is 2921 // close to some_int_value + 0.5, redo the computation with the general 2922 // case. 2923 // Note: If other types than Byte or UInt16, will need changes. 2924 if( fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007 ) 2925 return true; 2926 2927 #endif // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) 2928 2929 const double dfDeltaX = dfSrcX - 0.5 - iSrcX; 2930 const double dfDeltaY = dfSrcY - 0.5 - iSrcY; 2931 2932 double adfValueDens[4] = {}; 2933 double adfValueReal[4] = {}; 2934 2935 double adfCoeffsX[4] = {}; 2936 GWKCubicComputeWeights(dfDeltaX, adfCoeffsX); 2937 2938 double adfCoeffsY[4] = {}; 2939 GWKCubicComputeWeights(dfDeltaY, adfCoeffsY); 2940 2941 for( GPtrDiff_t i = -1; i < 3; i++ ) 2942 { 2943 const GPtrDiff_t iOffset = iSrcOffset+i*poWK->nSrcXSize - 1; 2944 #if !(defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))) 2945 if( poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD || 2946 poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD || 2947 poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD || 2948 poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD ) 2949 { 2950 double adfImagIgnored[4] = {}; 2951 return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY, 2952 pdfDensity, pdfReal, adfImagIgnored ); 2953 } 2954 #endif 2955 2956 adfValueDens[i + 1] = CONVOL4( 2957 adfCoeffsX, 2958 poWK->pafUnifiedSrcDensity + iOffset); 2959 2960 adfValueReal[i + 1] = CONVOL4( 2961 adfCoeffsX, 2962 reinterpret_cast<T*>(poWK->papabySrcImage[iBand]) + iOffset); 2963 } 2964 2965 *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens); 2966 *pdfReal = CONVOL4(adfCoeffsY, adfValueReal); 2967 2968 return true; 2969 } 2970 2971 /************************************************************************/ 2972 /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */ 2973 /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */ 2974 /************************************************************************/ 2975 2976 static bool GWKCubicResampleSrcMaskIsDensity4SampleReal( 2977 const GDALWarpKernel *poWK, int iBand, 2978 double dfSrcX, double dfSrcY, 2979 double *pdfDensity, 2980 double *pdfReal ) 2981 2982 { 2983 const int iSrcX = static_cast<int>(dfSrcX - 0.5); 2984 const int iSrcY = static_cast<int>(dfSrcY - 0.5); 2985 const GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize; 2986 const double dfDeltaX = dfSrcX - 0.5 - iSrcX; 2987 const double dfDeltaY = dfSrcY - 0.5 - iSrcY; 2988 2989 // Get the bilinear interpolation at the image borders. 2990 if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize 2991 || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize ) 2992 { 2993 double adfImagIgnored[4] = {}; 2994 return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, 2995 pdfDensity, pdfReal, adfImagIgnored); 2996 } 2997 2998 double adfCoeffsX[4] = {}; 2999 GWKCubicComputeWeights(dfDeltaX, adfCoeffsX); 3000 3001 double adfCoeffsY[4] = {}; 3002 GWKCubicComputeWeights(dfDeltaY, adfCoeffsY); 3003 3004 double adfValueDens[4] = {}; 3005 double adfValueReal[4] = {}; 3006 double adfDensity[4] = {}; 3007 double adfReal[4] = {}; 3008 double adfImagIgnored[4] = {}; 3009 3010 for( GPtrDiff_t i = -1; i < 3; i++ ) 3011 { 3012 if( !GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1, 3013 2, adfDensity, adfReal, adfImagIgnored) 3014 || adfDensity[0] < SRC_DENSITY_THRESHOLD 3015 || adfDensity[1] < SRC_DENSITY_THRESHOLD 3016 || adfDensity[2] < SRC_DENSITY_THRESHOLD 3017 || adfDensity[3] < SRC_DENSITY_THRESHOLD ) 3018 { 3019 return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY, 3020 pdfDensity, pdfReal, adfImagIgnored ); 3021 } 3022 3023 adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity); 3024 adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal); 3025 } 3026 3027 *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens); 3028 *pdfReal = CONVOL4(adfCoeffsY, adfValueReal); 3029 3030 return true; 3031 } 3032 3033 template<class T> 3034 static bool GWKCubicResampleNoMasks4SampleT( const GDALWarpKernel *poWK, int iBand, 3035 double dfSrcX, double dfSrcY, 3036 T *pValue ) 3037 3038 { 3039 const int iSrcX = static_cast<int>(dfSrcX - 0.5); 3040 const int iSrcY = static_cast<int>(dfSrcY - 0.5); 3041 const GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize; 3042 const double dfDeltaX = dfSrcX - 0.5 - iSrcX; 3043 const double dfDeltaY = dfSrcY - 0.5 - iSrcY; 3044 const double dfDeltaY2 = dfDeltaY * dfDeltaY; 3045 const double dfDeltaY3 = dfDeltaY2 * dfDeltaY; 3046 3047 // Get the bilinear interpolation at the image borders. 3048 if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize 3049 || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize ) 3050 return GWKBilinearResampleNoMasks4SampleT ( poWK, iBand, dfSrcX, dfSrcY, 3051 pValue ); 3052 3053 double adfCoeffs[4] = {}; 3054 GWKCubicComputeWeights(dfDeltaX, adfCoeffs); 3055 3056 double adfValue[4] = {}; 3057 3058 for( GPtrDiff_t i = -1; i < 3; i++ ) 3059 { 3060 const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1; 3061 3062 adfValue[i + 1] = CONVOL4( 3063 adfCoeffs, 3064 reinterpret_cast<T*>(poWK->papabySrcImage[iBand]) + iOffset); 3065 } 3066 3067 const double dfValue = CubicConvolution( 3068 dfDeltaY, dfDeltaY2, dfDeltaY3, 3069 adfValue[0], adfValue[1], adfValue[2], adfValue[3]); 3070 3071 *pValue = GWKClampValueT<T>(dfValue); 3072 3073 return true; 3074 } 3075 3076 /************************************************************************/ 3077 /* GWKLanczosSinc() */ 3078 /************************************************************************/ 3079 3080 /* 3081 * Lanczos windowed sinc interpolation kernel with radius r. 3082 * / 3083 * | sinc(x) * sinc(x/r), if |x| < r 3084 * L(x) = | 1, if x = 0 , 3085 * | 0, otherwise 3086 * \ 3087 * 3088 * where sinc(x) = sin(PI * x) / (PI * x). 3089 */ 3090 3091 static double GWKLanczosSinc( double dfX ) 3092 { 3093 if( dfX == 0.0 ) 3094 return 1.0; 3095 3096 const double dfPIX = M_PI * dfX; 3097 const double dfPIXoverR = dfPIX / 3; 3098 const double dfPIX2overR = dfPIX * dfPIXoverR; 3099 return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR; 3100 } 3101 3102 static double GWKLanczosSinc4Values( double* padfValues ) 3103 { 3104 for( int i = 0; i < 4; i++ ) 3105 { 3106 if( padfValues[i] == 0.0 ) 3107 { 3108 padfValues[i] = 1.0; 3109 } 3110 else 3111 { 3112 const double dfPIX = M_PI * padfValues[i]; 3113 const double dfPIXoverR = dfPIX / 3; 3114 const double dfPIX2overR = dfPIX * dfPIXoverR; 3115 padfValues[i] = sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR; 3116 } 3117 } 3118 return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3]; 3119 } 3120 3121 /************************************************************************/ 3122 /* GWKBilinear() */ 3123 /************************************************************************/ 3124 3125 static double GWKBilinear( double dfX ) 3126 { 3127 double dfAbsX = fabs(dfX); 3128 if( dfAbsX <= 1.0 ) 3129 return 1 - dfAbsX; 3130 else 3131 return 0.0; 3132 } 3133 3134 static double GWKBilinear4Values( double* padfValues ) 3135 { 3136 double dfAbsX0 = fabs(padfValues[0]); 3137 double dfAbsX1 = fabs(padfValues[1]); 3138 double dfAbsX2 = fabs(padfValues[2]); 3139 double dfAbsX3 = fabs(padfValues[3]); 3140 if( dfAbsX0 <= 1.0 ) 3141 padfValues[0] = 1 - dfAbsX0; 3142 else 3143 padfValues[0] = 0.0; 3144 if( dfAbsX1 <= 1.0 ) 3145 padfValues[1] = 1 - dfAbsX1; 3146 else 3147 padfValues[1] = 0.0; 3148 if( dfAbsX2 <= 1.0 ) 3149 padfValues[2] = 1 - dfAbsX2; 3150 else 3151 padfValues[2] = 0.0; 3152 if( dfAbsX3 <= 1.0 ) 3153 padfValues[3] = 1 - dfAbsX3; 3154 else 3155 padfValues[3] = 0.0; 3156 return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3]; 3157 } 3158 3159 /************************************************************************/ 3160 /* GWKCubic() */ 3161 /************************************************************************/ 3162 3163 static double GWKCubic( double dfX ) 3164 { 3165 // http://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm 3166 // W(x) formula with a = -0.5 (cubic hermite spline ) 3167 // or https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf 3168 // k(x) (formula 8) with (B,C)=(0,0.5) the Catmull-Rom spline 3169 double dfAbsX = fabs(dfX); 3170 if( dfAbsX <= 1.0 ) 3171 { 3172 double dfX2 = dfX * dfX; 3173 return dfX2 * (1.5 * dfAbsX - 2.5) + 1; 3174 } 3175 else if( dfAbsX <= 2.0 ) 3176 { 3177 double dfX2 = dfX * dfX; 3178 return dfX2 * (-0.5 * dfAbsX + 2.5) - 4 * dfAbsX + 2; 3179 } 3180 else 3181 return 0.0; 3182 } 3183 3184 static double GWKCubic4Values( double* padfValues ) 3185 { 3186 const double dfAbsX_0 = fabs(padfValues[0]); 3187 const double dfAbsX_1 = fabs(padfValues[1]); 3188 const double dfAbsX_2 = fabs(padfValues[2]); 3189 const double dfAbsX_3 = fabs(padfValues[3]); 3190 const double dfX2_0 = padfValues[0] * padfValues[0]; 3191 const double dfX2_1 = padfValues[1] * padfValues[1]; 3192 const double dfX2_2 = padfValues[2] * padfValues[2]; 3193 const double dfX2_3 = padfValues[3] * padfValues[3]; 3194 3195 double dfVal0 = 0.0; 3196 if( dfAbsX_0 <= 1.0 ) 3197 dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0; 3198 else if( dfAbsX_0 <= 2.0 ) 3199 dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0; 3200 3201 double dfVal1 = 0.0; 3202 if( dfAbsX_1 <= 1.0 ) 3203 dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0; 3204 else if( dfAbsX_1 <= 2.0 ) 3205 dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0; 3206 3207 double dfVal2 = 0.0; 3208 if( dfAbsX_2 <= 1.0 ) 3209 dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0; 3210 else if( dfAbsX_2 <= 2.0 ) 3211 dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0; 3212 3213 double dfVal3 = 0.0; 3214 if( dfAbsX_3 <= 1.0 ) 3215 dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0; 3216 else if( dfAbsX_3 <= 2.0 ) 3217 dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0; 3218 3219 padfValues[0] = dfVal0; 3220 padfValues[1] = dfVal1; 3221 padfValues[2] = dfVal2; 3222 padfValues[3] = dfVal3; 3223 return dfVal0 + dfVal1 + dfVal2 + dfVal3; 3224 } 3225 3226 /************************************************************************/ 3227 /* GWKBSpline() */ 3228 /************************************************************************/ 3229 3230 // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf 3231 // Equation 8 with (B,C)=(1,0) 3232 // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1 3233 // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2 3234 3235 static double GWKBSpline( double x ) 3236 { 3237 const double xp2 = x + 2.0; 3238 const double xp1 = x + 1.0; 3239 const double xm1 = x - 1.0; 3240 3241 // This will most likely be used, so we'll compute it ahead of time to 3242 // avoid stalling the processor. 3243 const double xp2c = xp2 * xp2 * xp2; 3244 3245 // Note that the test is computed only if it is needed. 3246 // TODO(schwehr): Make this easier to follow. 3247 return 3248 xp2 > 0.0 3249 ? ((xp1 > 0.0)?((x > 0.0)?((xm1 > 0.0)? 3250 -4.0 * xm1*xm1*xm1:0.0) + 3251 6.0 * x*x*x:0.0) + 3252 -4.0 * xp1*xp1*xp1:0.0) + xp2c 3253 : 0.0; // * 0.166666666666666666666 3254 } 3255 3256 static double GWKBSpline4Values( double* padfValues ) 3257 { 3258 for( int i = 0; i < 4; i++ ) 3259 { 3260 const double x = padfValues[i]; 3261 const double xp2 = x + 2.0; 3262 const double xp1 = x + 1.0; 3263 const double xm1 = x - 1.0; 3264 3265 // This will most likely be used, so we'll compute it ahead of time to 3266 // avoid stalling the processor. 3267 const double xp2c = xp2 * xp2 * xp2; 3268 3269 // Note that the test is computed only if it is needed. 3270 // TODO(schwehr): Make this easier to follow. 3271 padfValues[i] = (xp2 > 0.0)?((xp1 > 0.0)?((x > 0.0)?((xm1 > 0.0)? 3272 -4.0 * xm1*xm1*xm1:0.0) + 3273 6.0 * x*x*x:0.0) + 3274 -4.0 * xp1*xp1*xp1:0.0) + 3275 xp2c:0.0; // * 0.166666666666666666666 3276 } 3277 return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3]; 3278 } 3279 /************************************************************************/ 3280 /* GWKResampleWrkStruct */ 3281 /************************************************************************/ 3282 3283 typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct; 3284 3285 typedef bool (*pfnGWKResampleType) ( const GDALWarpKernel *poWK, int iBand, 3286 double dfSrcX, double dfSrcY, 3287 double *pdfDensity, 3288 double *pdfReal, double *pdfImag, 3289 GWKResampleWrkStruct* psWrkStruct ); 3290 3291 struct _GWKResampleWrkStruct 3292 { 3293 pfnGWKResampleType pfnGWKResample; 3294 3295 // Space for saved X weights. 3296 double *padfWeightsX; 3297 bool *pabCalcX; 3298 3299 double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos. 3300 int iLastSrcX; // Only used by GWKResampleOptimizedLanczos. 3301 int iLastSrcY; // Only used by GWKResampleOptimizedLanczos. 3302 double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos. 3303 double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos. 3304 3305 // Space for saving a row of pixels. 3306 double *padfRowDensity; 3307 double *padfRowReal; 3308 double *padfRowImag; 3309 }; 3310 3311 /************************************************************************/ 3312 /* GWKResampleCreateWrkStruct() */ 3313 /************************************************************************/ 3314 3315 static bool GWKResample( const GDALWarpKernel *poWK, int iBand, 3316 double dfSrcX, double dfSrcY, 3317 double *pdfDensity, 3318 double *pdfReal, double *pdfImag, 3319 GWKResampleWrkStruct* psWrkStruct ); 3320 3321 static bool GWKResampleOptimizedLanczos( const GDALWarpKernel *poWK, int iBand, 3322 double dfSrcX, double dfSrcY, 3323 double *pdfDensity, 3324 double *pdfReal, double *pdfImag, 3325 GWKResampleWrkStruct* psWrkStruct ); 3326 3327 static GWKResampleWrkStruct* GWKResampleCreateWrkStruct(GDALWarpKernel *poWK) 3328 { 3329 const int nXDist = ( poWK->nXRadius + 1 ) * 2; 3330 const int nYDist = ( poWK->nYRadius + 1 ) * 2; 3331 3332 GWKResampleWrkStruct* psWrkStruct = static_cast<GWKResampleWrkStruct *>( 3333 CPLMalloc(sizeof(GWKResampleWrkStruct))); 3334 3335 // Alloc space for saved X weights. 3336 psWrkStruct->padfWeightsX = 3337 static_cast<double *>(CPLCalloc( nXDist, sizeof(double))); 3338 psWrkStruct->pabCalcX = 3339 static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool))); 3340 3341 psWrkStruct->padfWeightsY = 3342 static_cast<double *>(CPLCalloc(nYDist, sizeof(double))); 3343 psWrkStruct->iLastSrcX = -10; 3344 psWrkStruct->iLastSrcY = -10; 3345 psWrkStruct->dfLastDeltaX = -10; 3346 psWrkStruct->dfLastDeltaY = -10; 3347 3348 // Alloc space for saving a row of pixels. 3349 if( poWK->pafUnifiedSrcDensity == nullptr && 3350 poWK->panUnifiedSrcValid == nullptr && 3351 poWK->papanBandSrcValid == nullptr ) 3352 { 3353 psWrkStruct->padfRowDensity = nullptr; 3354 } 3355 else 3356 { 3357 psWrkStruct->padfRowDensity = 3358 static_cast<double *>(CPLCalloc(nXDist, sizeof(double))); 3359 } 3360 psWrkStruct->padfRowReal = 3361 static_cast<double *>(CPLCalloc(nXDist, sizeof(double))); 3362 psWrkStruct->padfRowImag = 3363 static_cast<double *>(CPLCalloc(nXDist, sizeof(double))); 3364 3365 if( poWK->eResample == GRA_Lanczos ) 3366 { 3367 psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos; 3368 3369 const double dfXScale = poWK->dfXScale; 3370 if( dfXScale < 1.0 ) 3371 { 3372 int iMin = poWK->nFiltInitX; 3373 int iMax = poWK->nXRadius; 3374 while( iMin * dfXScale < -3.0 ) 3375 iMin++; 3376 while( iMax * dfXScale > 3.0 ) 3377 iMax--; 3378 3379 for( int i = iMin; i <= iMax; ++i ) 3380 { 3381 psWrkStruct->padfWeightsX[i-poWK->nFiltInitX] = 3382 GWKLanczosSinc(i * dfXScale); 3383 } 3384 } 3385 3386 const double dfYScale = poWK->dfYScale; 3387 if( dfYScale < 1.0 ) 3388 { 3389 int jMin = poWK->nFiltInitY; 3390 int jMax = poWK->nYRadius; 3391 while( jMin * dfYScale < -3.0 ) 3392 jMin++; 3393 while( jMax * dfYScale > 3.0 ) 3394 jMax--; 3395 3396 for( int j = jMin; j <= jMax; ++j ) 3397 { 3398 psWrkStruct->padfWeightsY[j-poWK->nFiltInitY] = 3399 GWKLanczosSinc(j * dfYScale); 3400 } 3401 } 3402 } 3403 else 3404 psWrkStruct->pfnGWKResample = GWKResample; 3405 3406 return psWrkStruct; 3407 } 3408 3409 /************************************************************************/ 3410 /* GWKResampleDeleteWrkStruct() */ 3411 /************************************************************************/ 3412 3413 static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct* psWrkStruct) 3414 { 3415 CPLFree( psWrkStruct->padfWeightsX ); 3416 CPLFree( psWrkStruct->padfWeightsY ); 3417 CPLFree( psWrkStruct->pabCalcX ); 3418 CPLFree( psWrkStruct->padfRowDensity ); 3419 CPLFree( psWrkStruct->padfRowReal ); 3420 CPLFree( psWrkStruct->padfRowImag ); 3421 CPLFree( psWrkStruct ); 3422 } 3423 3424 /************************************************************************/ 3425 /* GWKResample() */ 3426 /************************************************************************/ 3427 3428 static bool GWKResample( const GDALWarpKernel *poWK, int iBand, 3429 double dfSrcX, double dfSrcY, 3430 double *pdfDensity, 3431 double *pdfReal, double *pdfImag, 3432 GWKResampleWrkStruct* psWrkStruct ) 3433 3434 { 3435 // Save as local variables to avoid following pointers in loops. 3436 const int nSrcXSize = poWK->nSrcXSize; 3437 const int nSrcYSize = poWK->nSrcYSize; 3438 3439 double dfAccumulatorReal = 0.0; 3440 double dfAccumulatorImag = 0.0; 3441 double dfAccumulatorDensity = 0.0; 3442 double dfAccumulatorWeight = 0.0; 3443 const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5)); 3444 const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5)); 3445 const GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 3446 const double dfDeltaX = dfSrcX - 0.5 - iSrcX; 3447 const double dfDeltaY = dfSrcY - 0.5 - iSrcY; 3448 3449 const double dfXScale = poWK->dfXScale; 3450 const double dfYScale = poWK->dfYScale; 3451 3452 const int nXDist = ( poWK->nXRadius + 1 ) * 2; 3453 3454 // Space for saved X weights. 3455 double *padfWeightsX = psWrkStruct->padfWeightsX; 3456 bool *pabCalcX = psWrkStruct->pabCalcX; 3457 3458 // Space for saving a row of pixels. 3459 double *padfRowDensity = psWrkStruct->padfRowDensity; 3460 double *padfRowReal = psWrkStruct->padfRowReal; 3461 double *padfRowImag = psWrkStruct->padfRowImag; 3462 3463 // Mark as needing calculation (don't calculate the weights yet, 3464 // because a mask may render it unnecessary). 3465 memset( pabCalcX, false, nXDist * sizeof(bool) ); 3466 3467 FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample]; 3468 CPLAssert(pfnGetWeight); 3469 3470 // Skip sampling over edge of image. 3471 int j = poWK->nFiltInitY; 3472 int jMax= poWK->nYRadius; 3473 if( iSrcY + j < 0 ) 3474 j = -iSrcY; 3475 if( iSrcY + jMax >= nSrcYSize ) 3476 jMax = nSrcYSize - iSrcY - 1; 3477 3478 int iMin = poWK->nFiltInitX; 3479 int iMax = poWK->nXRadius; 3480 if( iSrcX + iMin < 0 ) 3481 iMin = -iSrcX; 3482 if( iSrcX + iMax >= nSrcXSize ) 3483 iMax = nSrcXSize - iSrcX - 1; 3484 3485 const int bXScaleBelow1 = ( dfXScale < 1.0 ); 3486 const int bYScaleBelow1 = ( dfYScale < 1.0 ); 3487 3488 GPtrDiff_t iRowOffset = iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin; 3489 3490 // Loop over pixel rows in the kernel. 3491 for( ; j <= jMax; ++j ) 3492 { 3493 iRowOffset += nSrcXSize; 3494 3495 // Get pixel values. 3496 // We can potentially read extra elements after the "normal" end of the 3497 // source arrays, but the contract of papabySrcImage[iBand], 3498 // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity 3499 // is to have WARP_EXTRA_ELTS reserved at their end. 3500 if( !GWKGetPixelRow( poWK, iBand, iRowOffset, (iMax-iMin+2)/2, 3501 padfRowDensity, padfRowReal, padfRowImag ) ) 3502 continue; 3503 3504 // Calculate the Y weight. 3505 double dfWeight1 = ( bYScaleBelow1 ) ? 3506 pfnGetWeight((j - dfDeltaY) * dfYScale): 3507 pfnGetWeight(j - dfDeltaY); 3508 3509 // Iterate over pixels in row. 3510 double dfAccumulatorRealLocal = 0.0; 3511 double dfAccumulatorImagLocal = 0.0; 3512 double dfAccumulatorDensityLocal = 0.0; 3513 double dfAccumulatorWeightLocal = 0.0; 3514 3515 for( int i = iMin; i <= iMax; ++i ) 3516 { 3517 // Skip sampling if pixel has zero density. 3518 if( padfRowDensity != nullptr && 3519 padfRowDensity[i-iMin] < SRC_DENSITY_THRESHOLD ) 3520 continue; 3521 3522 double dfWeight2 = 0.0; 3523 3524 // Make or use a cached set of weights for this row. 3525 if( pabCalcX[i-iMin] ) 3526 { 3527 // Use saved weight value instead of recomputing it. 3528 dfWeight2 = padfWeightsX[i-iMin]; 3529 } 3530 else 3531 { 3532 // Calculate & save the X weight. 3533 padfWeightsX[i-iMin] = dfWeight2 = ( bXScaleBelow1 ) ? 3534 pfnGetWeight((i - dfDeltaX) * dfXScale): 3535 pfnGetWeight(i - dfDeltaX); 3536 3537 pabCalcX[i-iMin] = true; 3538 } 3539 3540 // Accumulate! 3541 dfAccumulatorRealLocal += padfRowReal[i-iMin] * dfWeight2; 3542 dfAccumulatorImagLocal += padfRowImag[i-iMin] * dfWeight2; 3543 if( padfRowDensity != nullptr ) 3544 dfAccumulatorDensityLocal += padfRowDensity[i-iMin] * dfWeight2; 3545 dfAccumulatorWeightLocal += dfWeight2; 3546 } 3547 3548 dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1; 3549 dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1; 3550 dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1; 3551 dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1; 3552 } 3553 3554 if( dfAccumulatorWeight < 0.000001 || 3555 (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001) ) 3556 { 3557 *pdfDensity = 0.0; 3558 return false; 3559 } 3560 3561 // Calculate the output taking into account weighting. 3562 if( dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001 ) 3563 { 3564 *pdfReal = dfAccumulatorReal / dfAccumulatorWeight; 3565 *pdfImag = dfAccumulatorImag / dfAccumulatorWeight; 3566 if( padfRowDensity != nullptr ) 3567 *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight; 3568 else 3569 *pdfDensity = 1.0; 3570 } 3571 else 3572 { 3573 *pdfReal = dfAccumulatorReal; 3574 *pdfImag = dfAccumulatorImag; 3575 if( padfRowDensity != nullptr ) 3576 *pdfDensity = dfAccumulatorDensity; 3577 else 3578 *pdfDensity = 1.0; 3579 } 3580 3581 return true; 3582 } 3583 3584 /************************************************************************/ 3585 /* GWKResampleOptimizedLanczos() */ 3586 /************************************************************************/ 3587 3588 static bool GWKResampleOptimizedLanczos( const GDALWarpKernel *poWK, int iBand, 3589 double dfSrcX, double dfSrcY, 3590 double *pdfDensity, 3591 double *pdfReal, double *pdfImag, 3592 GWKResampleWrkStruct* psWrkStruct ) 3593 3594 { 3595 // Save as local variables to avoid following pointers in loops. 3596 const int nSrcXSize = poWK->nSrcXSize; 3597 const int nSrcYSize = poWK->nSrcYSize; 3598 3599 double dfAccumulatorReal = 0.0; 3600 double dfAccumulatorImag = 0.0; 3601 double dfAccumulatorDensity = 0.0; 3602 double dfAccumulatorWeight = 0.0; 3603 const int iSrcX = static_cast<int>(floor( dfSrcX - 0.5 )); 3604 const int iSrcY = static_cast<int>(floor( dfSrcY - 0.5 )); 3605 const GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 3606 const double dfDeltaX = dfSrcX - 0.5 - iSrcX; 3607 const double dfDeltaY = dfSrcY - 0.5 - iSrcY; 3608 3609 const double dfXScale = poWK->dfXScale; 3610 const double dfYScale = poWK->dfYScale; 3611 3612 // Space for saved X weights. 3613 double *padfWeightsX = psWrkStruct->padfWeightsX; 3614 double *padfWeightsY = psWrkStruct->padfWeightsY; 3615 3616 // Space for saving a row of pixels. 3617 double *padfRowDensity = psWrkStruct->padfRowDensity; 3618 double *padfRowReal = psWrkStruct->padfRowReal; 3619 double *padfRowImag = psWrkStruct->padfRowImag; 3620 3621 // Skip sampling over edge of image. 3622 int jMin = poWK->nFiltInitY; 3623 int jMax = poWK->nYRadius; 3624 if( iSrcY + jMin < 0 ) 3625 jMin = -iSrcY; 3626 if( iSrcY + jMax >= nSrcYSize ) 3627 jMax = nSrcYSize - iSrcY - 1; 3628 3629 int iMin = poWK->nFiltInitX; 3630 int iMax = poWK->nXRadius; 3631 if( iSrcX + iMin < 0 ) 3632 iMin = -iSrcX; 3633 if( iSrcX + iMax >= nSrcXSize ) 3634 iMax = nSrcXSize - iSrcX - 1; 3635 3636 if( dfXScale < 1.0 ) 3637 { 3638 while( iMin * dfXScale < -3.0 ) 3639 iMin++; 3640 while( iMax * dfXScale > 3.0 ) 3641 iMax--; 3642 // padfWeightsX computed in GWKResampleCreateWrkStruct. 3643 } 3644 else 3645 { 3646 while( iMin - dfDeltaX < -3.0 ) 3647 iMin++; 3648 while( iMax - dfDeltaX > 3.0 ) 3649 iMax--; 3650 3651 if( iSrcX != psWrkStruct->iLastSrcX || 3652 dfDeltaX != psWrkStruct->dfLastDeltaX ) 3653 { 3654 // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the 3655 // following trigonometric formulas. 3656 3657 // TODO(schwehr): Move this somewhere where it can be rendered at LaTeX. 3658 // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) + cos(M_PI * dfBase) * sin(M_PI * k) 3659 // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k) 3660 // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) 3661 // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1) 3662 3663 // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) + cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k) 3664 // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k) 3665 3666 const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX); 3667 const double dfSin2PIDeltaXOver3 = 3668 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3; 3669 // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2. 3670 const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3); 3671 const double dfSinPIDeltaX = 3672 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3; 3673 const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI); 3674 const double dfInvPI2Over3xSinPIDeltaX = 3675 dfInvPI2Over3 * dfSinPIDeltaX; 3676 const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 = 3677 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3; 3678 const double dfSinPIOver3 = 0.8660254037844386; 3679 const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 = 3680 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3; 3681 const double padfCst[] = { 3682 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3, 3683 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 - 3684 dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3, 3685 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 + 3686 dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 }; 3687 3688 for( int i = iMin; i <= iMax; ++i ) 3689 { 3690 const double dfX = i - dfDeltaX; 3691 if( dfX == 0.0 ) 3692 padfWeightsX[i-poWK->nFiltInitX] = 1.0; 3693 else 3694 padfWeightsX[i-poWK->nFiltInitX] = 3695 padfCst[(i + 3) % 3] / (dfX * dfX); 3696 #if DEBUG_VERBOSE 3697 // TODO(schwehr): AlmostEqual. 3698 //CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] - 3699 // GWKLanczosSinc(dfX, 3.0)) < 1e-10); 3700 #endif 3701 } 3702 3703 psWrkStruct->iLastSrcX = iSrcX; 3704 psWrkStruct->dfLastDeltaX = dfDeltaX; 3705 } 3706 } 3707 3708 if( dfYScale < 1.0 ) 3709 { 3710 while( jMin * dfYScale < -3.0 ) 3711 jMin++; 3712 while( jMax * dfYScale > 3.0 ) 3713 jMax--; 3714 // padfWeightsY computed in GWKResampleCreateWrkStruct. 3715 } 3716 else 3717 { 3718 while( jMin - dfDeltaY < -3.0 ) 3719 jMin++; 3720 while( jMax - dfDeltaY > 3.0 ) 3721 jMax--; 3722 3723 if( iSrcY != psWrkStruct->iLastSrcY || 3724 dfDeltaY != psWrkStruct->dfLastDeltaY ) 3725 { 3726 const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY); 3727 const double dfSin2PIDeltaYOver3 = 3728 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3; 3729 // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2. 3730 const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3); 3731 const double dfSinPIDeltaY = 3732 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3; 3733 const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI); 3734 const double dfInvPI2Over3xSinPIDeltaY = 3735 dfInvPI2Over3 * dfSinPIDeltaY; 3736 const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 = 3737 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3; 3738 const double dfSinPIOver3 = 0.8660254037844386; 3739 const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 = 3740 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3; 3741 const double padfCst[] = { 3742 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3, 3743 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 - 3744 dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3, 3745 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 + 3746 dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 }; 3747 3748 for( int j = jMin; j <= jMax; ++j ) 3749 { 3750 const double dfY = j - dfDeltaY; 3751 if( dfY == 0.0 ) 3752 padfWeightsY[j-poWK->nFiltInitY] = 1.0; 3753 else 3754 padfWeightsY[j-poWK->nFiltInitY] = 3755 padfCst[(j + 3) % 3] / (dfY * dfY); 3756 #if DEBUG_VERBOSE 3757 // TODO(schwehr): AlmostEqual. 3758 //CPLAssert(fabs(padfWeightsY[j-poWK->nFiltInitY] - 3759 // GWKLanczosSinc(dfY, 3.0)) < 1e-10); 3760 #endif 3761 } 3762 3763 psWrkStruct->iLastSrcY = iSrcY; 3764 psWrkStruct->dfLastDeltaY = dfDeltaY; 3765 } 3766 } 3767 3768 GPtrDiff_t iRowOffset = iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin; 3769 3770 // If we have no density information, we can simply compute the 3771 // accumulated weight. 3772 if( padfRowDensity == nullptr ) 3773 { 3774 double dfRowAccWeight = 0.0; 3775 for( int i = iMin; i <= iMax; ++i ) 3776 { 3777 dfRowAccWeight += padfWeightsX[i-poWK->nFiltInitX]; 3778 } 3779 double dfColAccWeight = 0.0; 3780 for( int j = jMin; j <= jMax; ++j ) 3781 { 3782 dfColAccWeight += padfWeightsY[j-poWK->nFiltInitY]; 3783 } 3784 dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight; 3785 } 3786 3787 const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType); 3788 3789 // Loop over pixel rows in the kernel. 3790 int nCountValid = 0; 3791 for( int j = jMin; j <= jMax; ++j ) 3792 { 3793 iRowOffset += nSrcXSize; 3794 3795 // Get pixel values. 3796 // We can potentially read extra elements after the "normal" end of the 3797 // source arrays, but the contract of papabySrcImage[iBand], 3798 // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity 3799 // is to have WARP_EXTRA_ELTS reserved at their end. 3800 if( !GWKGetPixelRow( poWK, iBand, iRowOffset, (iMax-iMin+2)/2, 3801 padfRowDensity, padfRowReal, padfRowImag ) ) 3802 continue; 3803 3804 const double dfWeight1 = padfWeightsY[j-poWK->nFiltInitY]; 3805 3806 // Iterate over pixels in row. 3807 if( padfRowDensity != nullptr ) 3808 { 3809 for( int i = iMin; i <= iMax; ++i ) 3810 { 3811 // Skip sampling if pixel has zero density. 3812 if( padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD ) 3813 continue; 3814 3815 nCountValid ++; 3816 3817 // Use a cached set of weights for this row. 3818 const double dfWeight2 = 3819 dfWeight1 * padfWeightsX[i- poWK->nFiltInitX]; 3820 3821 // Accumulate! 3822 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2; 3823 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2; 3824 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2; 3825 dfAccumulatorWeight += dfWeight2; 3826 } 3827 } 3828 else if( bIsNonComplex ) 3829 { 3830 double dfRowAccReal = 0.0; 3831 for( int i = iMin; i <= iMax; ++i ) 3832 { 3833 const double dfWeight2 = padfWeightsX[i- poWK->nFiltInitX]; 3834 3835 // Accumulate! 3836 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2; 3837 } 3838 3839 dfAccumulatorReal += dfRowAccReal * dfWeight1; 3840 } 3841 else 3842 { 3843 double dfRowAccReal = 0.0; 3844 double dfRowAccImag = 0.0; 3845 for( int i = iMin; i <= iMax; ++i ) 3846 { 3847 const double dfWeight2 = padfWeightsX[i- poWK->nFiltInitX]; 3848 3849 // Accumulate! 3850 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2; 3851 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2; 3852 } 3853 3854 dfAccumulatorReal += dfRowAccReal * dfWeight1; 3855 dfAccumulatorImag += dfRowAccImag * dfWeight1; 3856 } 3857 } 3858 3859 if( dfAccumulatorWeight < 0.000001 || 3860 (padfRowDensity != nullptr && ( 3861 dfAccumulatorDensity < 0.000001 || 3862 nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1)/2)) ) 3863 { 3864 *pdfDensity = 0.0; 3865 return false; 3866 } 3867 3868 // Calculate the output taking into account weighting. 3869 if( dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001 ) 3870 { 3871 const double dfInvAcc = 1.0 / dfAccumulatorWeight; 3872 *pdfReal = dfAccumulatorReal * dfInvAcc; 3873 *pdfImag = dfAccumulatorImag * dfInvAcc; 3874 if( padfRowDensity != nullptr ) 3875 *pdfDensity = dfAccumulatorDensity * dfInvAcc; 3876 else 3877 *pdfDensity = 1.0; 3878 } 3879 else 3880 { 3881 *pdfReal = dfAccumulatorReal; 3882 *pdfImag = dfAccumulatorImag; 3883 if( padfRowDensity != nullptr ) 3884 *pdfDensity = dfAccumulatorDensity; 3885 else 3886 *pdfDensity = 1.0; 3887 } 3888 3889 return true; 3890 } 3891 3892 /************************************************************************/ 3893 /* GWKResampleNoMasksT() */ 3894 /************************************************************************/ 3895 3896 template <class T> 3897 static bool GWKResampleNoMasksT( const GDALWarpKernel *poWK, int iBand, 3898 double dfSrcX, double dfSrcY, 3899 T *pValue, double *padfWeight ) 3900 3901 { 3902 // Commonly used; save locally. 3903 const int nSrcXSize = poWK->nSrcXSize; 3904 const int nSrcYSize = poWK->nSrcYSize; 3905 3906 const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5)); 3907 const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5)); 3908 const GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 3909 3910 const int nXRadius = poWK->nXRadius; 3911 const int nYRadius = poWK->nYRadius; 3912 3913 // Politely refuse to process invalid coordinates or obscenely small image. 3914 if( iSrcX >= nSrcXSize || iSrcY >= nSrcYSize 3915 || nXRadius > nSrcXSize || nYRadius > nSrcYSize ) 3916 return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY, 3917 pValue); 3918 3919 T* pSrcBand = reinterpret_cast<T*>(poWK->papabySrcImage[iBand]); 3920 const double dfDeltaX = dfSrcX - 0.5 - iSrcX; 3921 const double dfDeltaY = dfSrcY - 0.5 - iSrcY; 3922 3923 const FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample]; 3924 CPLAssert(pfnGetWeight); 3925 const FilterFunc4ValuesType pfnGetWeight4Values = 3926 apfGWKFilter4Values[poWK->eResample]; 3927 CPLAssert(pfnGetWeight4Values); 3928 3929 const double dfXScale = std::min(poWK->dfXScale, 1.0); 3930 const double dfYScale = std::min(poWK->dfYScale, 1.0); 3931 3932 // Loop over all rows in the kernel. 3933 double dfAccumulatorWeightHorizontal = 0.0; 3934 double dfAccumulatorWeightVertical = 0.0; 3935 3936 int iMin = 1 - nXRadius; 3937 if( iSrcX + iMin < 0 ) 3938 iMin = -iSrcX; 3939 int iMax = nXRadius; 3940 if( iSrcX + iMax >= nSrcXSize-1 ) 3941 iMax = nSrcXSize-1 - iSrcX; 3942 int i = iMin; // Used after for. 3943 int iC = 0; // Used after for. 3944 for( ; i+2 < iMax; i += 4, iC += 4 ) 3945 { 3946 padfWeight[iC] = (i - dfDeltaX) * dfXScale; 3947 padfWeight[iC+1] = padfWeight[iC] + dfXScale; 3948 padfWeight[iC+2] = padfWeight[iC+1] + dfXScale; 3949 padfWeight[iC+3] = padfWeight[iC+2] + dfXScale; 3950 dfAccumulatorWeightHorizontal += pfnGetWeight4Values(padfWeight+iC); 3951 } 3952 for( ; i <= iMax; ++i, ++iC ) 3953 { 3954 const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale); 3955 padfWeight[iC] = dfWeight; 3956 dfAccumulatorWeightHorizontal += dfWeight; 3957 } 3958 3959 int j = 1 - nYRadius; 3960 if( iSrcY + j < 0 ) 3961 j = -iSrcY; 3962 int jMax = nYRadius; 3963 if( iSrcY + jMax >= nSrcYSize-1 ) 3964 jMax = nSrcYSize-1 - iSrcY; 3965 3966 double dfAccumulator = 0.0; 3967 3968 for( ; j <= jMax; ++j ) 3969 { 3970 const GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize; 3971 3972 // Loop over all pixels in the row. 3973 double dfAccumulatorLocal = 0.0; 3974 double dfAccumulatorLocal2 = 0.0; 3975 iC = 0; 3976 i = iMin; 3977 // Process by chunk of 4 cols. 3978 for( ; i+2 < iMax; i += 4, iC += 4 ) 3979 { 3980 // Retrieve the pixel & accumulate. 3981 dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC]; 3982 dfAccumulatorLocal += pSrcBand[i+1+iSampJ] * padfWeight[iC+1]; 3983 dfAccumulatorLocal2 += pSrcBand[i+2+iSampJ] * padfWeight[iC+2]; 3984 dfAccumulatorLocal2 += pSrcBand[i+3+iSampJ] * padfWeight[iC+3]; 3985 } 3986 dfAccumulatorLocal += dfAccumulatorLocal2; 3987 if( i < iMax ) 3988 { 3989 dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC]; 3990 dfAccumulatorLocal += pSrcBand[i+1+iSampJ] * padfWeight[iC+1]; 3991 i += 2; 3992 iC += 2; 3993 } 3994 if( i == iMax ) 3995 { 3996 dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC]; 3997 } 3998 3999 // Calculate the Y weight. 4000 const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale); 4001 dfAccumulator += dfWeight * dfAccumulatorLocal; 4002 dfAccumulatorWeightVertical += dfWeight; 4003 } 4004 4005 const double dfAccumulatorWeight = 4006 dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical; 4007 4008 *pValue = GWKClampValueT<T>(dfAccumulator / dfAccumulatorWeight); 4009 4010 return true; 4011 } 4012 4013 /* We restrict to 64bit processors because they are guaranteed to have SSE2 */ 4014 /* Could possibly be used too on 32bit, but we would need to check at runtime */ 4015 #if defined(__x86_64) || defined(_M_X64) 4016 4017 /************************************************************************/ 4018 /* GWKResampleNoMasks_SSE2_T() */ 4019 /************************************************************************/ 4020 4021 template<class T> 4022 static bool GWKResampleNoMasks_SSE2_T( const GDALWarpKernel *poWK, int iBand, 4023 double dfSrcX, double dfSrcY, 4024 T *pValue, double *padfWeight ) 4025 { 4026 // Commonly used; save locally. 4027 const int nSrcXSize = poWK->nSrcXSize; 4028 const int nSrcYSize = poWK->nSrcYSize; 4029 4030 const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5)); 4031 const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5)); 4032 const GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 4033 const int nXRadius = poWK->nXRadius; 4034 const int nYRadius = poWK->nYRadius; 4035 4036 // Politely refuse to process invalid coordinates or obscenely small image. 4037 if( iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || 4038 nXRadius > nSrcXSize || nYRadius > nSrcYSize ) 4039 return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY, 4040 pValue); 4041 4042 const T* pSrcBand = reinterpret_cast<const T*>(poWK->papabySrcImage[iBand]); 4043 4044 const FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample]; 4045 CPLAssert(pfnGetWeight); 4046 const FilterFunc4ValuesType pfnGetWeight4Values = 4047 apfGWKFilter4Values[poWK->eResample]; 4048 CPLAssert(pfnGetWeight4Values); 4049 4050 const double dfDeltaX = dfSrcX - 0.5 - iSrcX; 4051 const double dfDeltaY = dfSrcY - 0.5 - iSrcY; 4052 const double dfXScale = std::min(poWK->dfXScale, 1.0); 4053 const double dfYScale = std::min(poWK->dfYScale, 1.0); 4054 4055 // Loop over all rows in the kernel. 4056 double dfAccumulatorWeightHorizontal = 0.0; 4057 double dfAccumulatorWeightVertical = 0.0; 4058 double dfAccumulator = 0.0; 4059 4060 int iMin = 1 - nXRadius; 4061 if( iSrcX + iMin < 0 ) 4062 iMin = -iSrcX; 4063 int iMax = nXRadius; 4064 if( iSrcX + iMax >= nSrcXSize-1 ) 4065 iMax = nSrcXSize-1 - iSrcX; 4066 int i, iC; 4067 for( iC = 0, i = iMin; i+2 < iMax; i+=4, iC+=4 ) 4068 { 4069 padfWeight[iC] = (i - dfDeltaX) * dfXScale; 4070 padfWeight[iC+1] = padfWeight[iC] + dfXScale; 4071 padfWeight[iC+2] = padfWeight[iC+1] + dfXScale; 4072 padfWeight[iC+3] = padfWeight[iC+2] + dfXScale; 4073 dfAccumulatorWeightHorizontal += pfnGetWeight4Values(padfWeight+iC); 4074 } 4075 for( ; i <= iMax; ++i, ++iC ) 4076 { 4077 double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale); 4078 padfWeight[iC] = dfWeight; 4079 dfAccumulatorWeightHorizontal += dfWeight; 4080 } 4081 4082 int j = 1 - nYRadius; 4083 if( iSrcY + j < 0 ) 4084 j = -iSrcY; 4085 int jMax = nYRadius; 4086 if( iSrcY + jMax >= nSrcYSize-1 ) 4087 jMax = nSrcYSize-1 - iSrcY; 4088 4089 // Process by chunk of 4 rows. 4090 for( ; j+2 < jMax; j+=4 ) 4091 { 4092 const GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize; 4093 4094 // Loop over all pixels in the row. 4095 iC = 0; 4096 i = iMin; 4097 // Process by chunk of 4 cols. 4098 XMMReg4Double v_acc_1 = XMMReg4Double::Zero(); 4099 XMMReg4Double v_acc_2 = XMMReg4Double::Zero(); 4100 XMMReg4Double v_acc_3 = XMMReg4Double::Zero(); 4101 XMMReg4Double v_acc_4 = XMMReg4Double::Zero(); 4102 for( ; i+2 < iMax; i+=4, iC+=4 ) 4103 { 4104 // Retrieve the pixel & accumulate. 4105 XMMReg4Double v_pixels_1 = 4106 XMMReg4Double::Load4Val(pSrcBand+i+iSampJ); 4107 XMMReg4Double v_pixels_2 = 4108 XMMReg4Double::Load4Val(pSrcBand+i+iSampJ+nSrcXSize); 4109 XMMReg4Double v_pixels_3 = 4110 XMMReg4Double::Load4Val(pSrcBand+i+iSampJ+2*nSrcXSize); 4111 XMMReg4Double v_pixels_4 = 4112 XMMReg4Double::Load4Val(pSrcBand+i+iSampJ+3*nSrcXSize); 4113 4114 XMMReg4Double v_padfWeight = 4115 XMMReg4Double::Load4Val(padfWeight + iC); 4116 4117 v_acc_1 += v_pixels_1 * v_padfWeight; 4118 v_acc_2 += v_pixels_2 * v_padfWeight; 4119 v_acc_3 += v_pixels_3 * v_padfWeight; 4120 v_acc_4 += v_pixels_4 * v_padfWeight; 4121 } 4122 4123 if( i < iMax ) 4124 { 4125 XMMReg2Double v_pixels_1 = 4126 XMMReg2Double::Load2Val(pSrcBand+i+iSampJ); 4127 XMMReg2Double v_pixels_2 = 4128 XMMReg2Double::Load2Val(pSrcBand+i+iSampJ+nSrcXSize); 4129 XMMReg2Double v_pixels_3 = 4130 XMMReg2Double::Load2Val(pSrcBand+i+iSampJ+2*nSrcXSize); 4131 XMMReg2Double v_pixels_4 = 4132 XMMReg2Double::Load2Val(pSrcBand+i+iSampJ+3*nSrcXSize); 4133 4134 XMMReg2Double v_padfWeight = 4135 XMMReg2Double::Load2Val(padfWeight + iC); 4136 4137 v_acc_1.AddToLow( v_pixels_1 * v_padfWeight ); 4138 v_acc_2.AddToLow( v_pixels_2 * v_padfWeight ); 4139 v_acc_3.AddToLow( v_pixels_3 * v_padfWeight ); 4140 v_acc_4.AddToLow( v_pixels_4 * v_padfWeight ); 4141 4142 i+=2; 4143 iC+=2; 4144 } 4145 4146 double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum(); 4147 double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum(); 4148 double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum(); 4149 double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum(); 4150 4151 if( i == iMax ) 4152 { 4153 dfAccumulatorLocal_1 += 4154 static_cast<double>(pSrcBand[i+iSampJ]) * padfWeight[iC]; 4155 dfAccumulatorLocal_2 += 4156 static_cast<double>(pSrcBand[i+iSampJ + nSrcXSize]) * 4157 padfWeight[iC]; 4158 dfAccumulatorLocal_3 += 4159 static_cast<double>(pSrcBand[i+iSampJ + 2 * nSrcXSize]) * 4160 padfWeight[iC]; 4161 dfAccumulatorLocal_4 += 4162 static_cast<double>(pSrcBand[i+iSampJ + 3 * nSrcXSize]) * 4163 padfWeight[iC]; 4164 } 4165 4166 // Calculate the Y weight. 4167 const double dfWeight0 = (j - dfDeltaY) * dfYScale; 4168 const double dfWeight1 = dfWeight0 + dfYScale; 4169 const double dfWeight2 = dfWeight1 + dfYScale; 4170 const double dfWeight3 = dfWeight2 + dfYScale; 4171 double adfWeight[4] = { dfWeight0, dfWeight1, dfWeight2, dfWeight3 }; 4172 4173 dfAccumulatorWeightVertical += pfnGetWeight4Values(adfWeight); 4174 dfAccumulator += adfWeight[0] * dfAccumulatorLocal_1; 4175 dfAccumulator += adfWeight[1] * dfAccumulatorLocal_2; 4176 dfAccumulator += adfWeight[2] * dfAccumulatorLocal_3; 4177 dfAccumulator += adfWeight[3] * dfAccumulatorLocal_4; 4178 } 4179 for( ; j <= jMax; ++j ) 4180 { 4181 const GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize; 4182 4183 // Loop over all pixels in the row. 4184 iC = 0; 4185 i = iMin; 4186 // Process by chunk of 4 cols. 4187 XMMReg4Double v_acc = XMMReg4Double::Zero(); 4188 for( ; i+2 < iMax; i+=4, iC+=4 ) 4189 { 4190 // Retrieve the pixel & accumulate. 4191 XMMReg4Double v_pixels= XMMReg4Double::Load4Val(pSrcBand+i+iSampJ); 4192 XMMReg4Double v_padfWeight = 4193 XMMReg4Double::Load4Val(padfWeight + iC); 4194 4195 v_acc += v_pixels * v_padfWeight; 4196 } 4197 4198 double dfAccumulatorLocal = v_acc.GetHorizSum(); 4199 4200 if( i < iMax ) 4201 { 4202 dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC]; 4203 dfAccumulatorLocal += pSrcBand[i+1+iSampJ] * padfWeight[iC+1]; 4204 i+=2; 4205 iC+=2; 4206 } 4207 if( i == iMax ) 4208 { 4209 dfAccumulatorLocal += static_cast<double>(pSrcBand[i+iSampJ]) * padfWeight[iC]; 4210 } 4211 4212 // Calculate the Y weight. 4213 double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale); 4214 dfAccumulator += dfWeight * dfAccumulatorLocal; 4215 dfAccumulatorWeightVertical += dfWeight; 4216 } 4217 4218 const double dfAccumulatorWeight = 4219 dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical; 4220 4221 *pValue = GWKClampValueT<T>(dfAccumulator / dfAccumulatorWeight); 4222 4223 return true; 4224 } 4225 4226 /************************************************************************/ 4227 /* GWKResampleNoMasksT<GByte>() */ 4228 /************************************************************************/ 4229 4230 template<> 4231 bool GWKResampleNoMasksT<GByte>( const GDALWarpKernel *poWK, int iBand, 4232 double dfSrcX, double dfSrcY, 4233 GByte *pValue, double *padfWeight ) 4234 { 4235 return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, 4236 pValue, padfWeight); 4237 } 4238 4239 /************************************************************************/ 4240 /* GWKResampleNoMasksT<GInt16>() */ 4241 /************************************************************************/ 4242 4243 template<> 4244 bool GWKResampleNoMasksT<GInt16>( const GDALWarpKernel *poWK, int iBand, 4245 double dfSrcX, double dfSrcY, 4246 GInt16 *pValue, double *padfWeight ) 4247 { 4248 return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, 4249 pValue, padfWeight); 4250 } 4251 4252 /************************************************************************/ 4253 /* GWKResampleNoMasksT<GUInt16>() */ 4254 /************************************************************************/ 4255 4256 template<> 4257 bool GWKResampleNoMasksT<GUInt16>( const GDALWarpKernel *poWK, int iBand, 4258 double dfSrcX, double dfSrcY, 4259 GUInt16 *pValue, double *padfWeight ) 4260 { 4261 return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, 4262 pValue, padfWeight); 4263 } 4264 4265 /************************************************************************/ 4266 /* GWKResampleNoMasksT<float>() */ 4267 /************************************************************************/ 4268 4269 template<> 4270 bool GWKResampleNoMasksT<float>( const GDALWarpKernel *poWK, int iBand, 4271 double dfSrcX, double dfSrcY, 4272 float *pValue, double *padfWeight ) 4273 { 4274 return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, 4275 pValue, padfWeight); 4276 } 4277 4278 #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL 4279 4280 /************************************************************************/ 4281 /* GWKResampleNoMasksT<double>() */ 4282 /************************************************************************/ 4283 4284 template<> 4285 bool GWKResampleNoMasksT<double>( const GDALWarpKernel *poWK, int iBand, 4286 double dfSrcX, double dfSrcY, 4287 double *pValue, double *padfWeight ) 4288 { 4289 return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, 4290 pValue, padfWeight); 4291 } 4292 4293 #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */ 4294 4295 #endif /* defined(__x86_64) || defined(_M_X64) */ 4296 4297 /************************************************************************/ 4298 /* GWKRoundSourceCoordinates() */ 4299 /************************************************************************/ 4300 4301 static void GWKRoundSourceCoordinates( int nDstXSize, 4302 double* padfX, 4303 double* padfY, 4304 double* padfZ, 4305 int* pabSuccess, 4306 double dfSrcCoordPrecision, 4307 double dfErrorThreshold, 4308 GDALTransformerFunc pfnTransformer, 4309 void* pTransformerArg, 4310 double dfDstXOff, 4311 double dfDstY ) 4312 { 4313 double dfPct = 0.8; 4314 if( dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0 ) 4315 { 4316 dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold); 4317 } 4318 const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision; 4319 4320 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 4321 { 4322 const double dfXBefore = padfX[iDstX]; 4323 const double dfYBefore = padfY[iDstX]; 4324 padfX[iDstX] = 4325 floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) * 4326 dfSrcCoordPrecision; 4327 padfY[iDstX] = 4328 floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) * 4329 dfSrcCoordPrecision; 4330 4331 // If we are in an uncertainty zone, go to non-approximated 4332 // transformation. 4333 // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must 4334 // be at least 10 times greater than the approximation error. 4335 if( fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold || 4336 fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold ) 4337 { 4338 padfX[iDstX] = iDstX + dfDstXOff; 4339 padfY[iDstX] = dfDstY; 4340 padfZ[iDstX] = 0.0; 4341 pfnTransformer( pTransformerArg, TRUE, 1, 4342 padfX + iDstX, padfY + iDstX, 4343 padfZ + iDstX, pabSuccess + iDstX ); 4344 padfX[iDstX] = 4345 floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) * 4346 dfSrcCoordPrecision; 4347 padfY[iDstX] = 4348 floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) * 4349 dfSrcCoordPrecision; 4350 } 4351 } 4352 } 4353 4354 /************************************************************************/ 4355 /* GWKOpenCLCase() */ 4356 /* */ 4357 /* This is identical to GWKGeneralCase(), but functions via */ 4358 /* OpenCL. This means we have vector optimization (SSE) and/or */ 4359 /* GPU optimization depending on our prefs. The code itself is */ 4360 /* general and not optimized, but by defining constants we can */ 4361 /* make some pretty darn good code on the fly. */ 4362 /************************************************************************/ 4363 4364 #if defined(HAVE_OPENCL) 4365 static CPLErr GWKOpenCLCase( GDALWarpKernel *poWK ) 4366 { 4367 const int nDstXSize = poWK->nDstXSize; 4368 const int nDstYSize = poWK->nDstYSize; 4369 const int nSrcXSize = poWK->nSrcXSize; 4370 const int nSrcYSize = poWK->nSrcYSize; 4371 const int nDstXOff = poWK->nDstXOff; 4372 const int nDstYOff = poWK->nDstYOff; 4373 const int nSrcXOff = poWK->nSrcXOff; 4374 const int nSrcYOff = poWK->nSrcYOff; 4375 bool bUseImag = false; 4376 4377 cl_channel_type imageFormat; 4378 switch( poWK->eWorkingDataType ) 4379 { 4380 case GDT_Byte: 4381 imageFormat = CL_UNORM_INT8; 4382 break; 4383 case GDT_UInt16: 4384 imageFormat = CL_UNORM_INT16; 4385 break; 4386 case GDT_CInt16: 4387 bUseImag = true; 4388 CPL_FALLTHROUGH 4389 case GDT_Int16: 4390 imageFormat = CL_SNORM_INT16; 4391 break; 4392 case GDT_CFloat32: 4393 bUseImag = true; 4394 CPL_FALLTHROUGH 4395 case GDT_Float32: 4396 imageFormat = CL_FLOAT; 4397 break; 4398 default: 4399 // No support for higher precision formats. 4400 CPLDebug("OpenCL", 4401 "Unsupported resampling OpenCL data type %d.", 4402 static_cast<int>(poWK->eWorkingDataType)); 4403 return CE_Warning; 4404 } 4405 4406 OCLResampAlg resampAlg; 4407 switch( poWK->eResample ) 4408 { 4409 case GRA_Bilinear: 4410 resampAlg = OCL_Bilinear; 4411 break; 4412 case GRA_Cubic: 4413 resampAlg = OCL_Cubic; 4414 break; 4415 case GRA_CubicSpline: 4416 resampAlg = OCL_CubicSpline; 4417 break; 4418 case GRA_Lanczos: 4419 resampAlg = OCL_Lanczos; 4420 break; 4421 default: 4422 // No support for higher precision formats. 4423 CPLDebug("OpenCL", 4424 "Unsupported resampling OpenCL resampling alg %d.", 4425 static_cast<int>(poWK->eResample)); 4426 return CE_Warning; 4427 } 4428 4429 struct oclWarper *warper = nullptr; 4430 cl_int err; 4431 CPLErr eErr = CE_None; 4432 4433 // TODO(schwehr): Fix indenting. 4434 try 4435 { 4436 4437 // Using a factor of 2 or 4 seems to have much less rounding error 4438 // than 3 on the GPU. 4439 // Then the rounding error can cause strange artifacts under the 4440 // right conditions. 4441 warper = 4442 GDALWarpKernelOpenCL_createEnv(nSrcXSize, nSrcYSize, 4443 nDstXSize, nDstYSize, 4444 imageFormat, poWK->nBands, 4, 4445 bUseImag, 4446 poWK->papanBandSrcValid != nullptr, 4447 poWK->pafDstDensity, 4448 poWK->padfDstNoDataReal, 4449 resampAlg, &err); 4450 4451 if( err != CL_SUCCESS || warper == nullptr ) 4452 { 4453 eErr = CE_Warning; 4454 if( warper != nullptr ) 4455 throw eErr; 4456 return eErr; 4457 } 4458 4459 CPLDebug("GDAL", "GDALWarpKernel()::GWKOpenCLCase() " 4460 "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d", 4461 nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize, 4462 nDstXOff, nDstYOff, nDstXSize, nDstYSize); 4463 4464 if( !poWK->pfnProgress( poWK->dfProgressBase, "", poWK->pProgress ) ) 4465 { 4466 CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" ); 4467 eErr = CE_Failure; 4468 throw eErr; 4469 } 4470 4471 /* ==================================================================== */ 4472 /* Loop over bands. */ 4473 /* ==================================================================== */ 4474 for( int iBand = 0; iBand < poWK->nBands; iBand++ ) 4475 { 4476 if( poWK->papanBandSrcValid != nullptr && 4477 poWK->papanBandSrcValid[iBand] != nullptr) 4478 { 4479 GDALWarpKernelOpenCL_setSrcValid( 4480 warper, reinterpret_cast<int *>(poWK->papanBandSrcValid[iBand]), iBand); 4481 if( err != CL_SUCCESS ) 4482 { 4483 CPLError( CE_Failure, CPLE_AppDefined, 4484 "OpenCL routines reported failure (%d) on line %d.", 4485 static_cast<int>(err), __LINE__ ); 4486 eErr = CE_Failure; 4487 throw eErr; 4488 } 4489 } 4490 4491 err = GDALWarpKernelOpenCL_setSrcImg(warper, 4492 poWK->papabySrcImage[iBand], 4493 iBand); 4494 if( err != CL_SUCCESS ) 4495 { 4496 CPLError(CE_Failure, CPLE_AppDefined, 4497 "OpenCL routines reported failure (%d) on line %d.", 4498 static_cast<int>(err), __LINE__); 4499 eErr = CE_Failure; 4500 throw eErr; 4501 } 4502 4503 err = GDALWarpKernelOpenCL_setDstImg(warper, 4504 poWK->papabyDstImage[iBand], 4505 iBand); 4506 if( err != CL_SUCCESS ) 4507 { 4508 CPLError(CE_Failure, CPLE_AppDefined, 4509 "OpenCL routines reported failure (%d) on line %d.", 4510 static_cast<int>(err), __LINE__); 4511 eErr = CE_Failure; 4512 throw eErr; 4513 } 4514 } 4515 4516 /* -------------------------------------------------------------------- */ 4517 /* Allocate x,y,z coordinate arrays for transformation ... one */ 4518 /* scanlines worth of positions. */ 4519 /* -------------------------------------------------------------------- */ 4520 4521 // For x, 2 *, because we cache the precomputed values at the end. 4522 double *padfX = 4523 static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize)); 4524 double * padfY = 4525 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 4526 double *padfZ = 4527 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 4528 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize)); 4529 const double dfSrcCoordPrecision = CPLAtof( 4530 CSLFetchNameValueDef(poWK->papszWarpOptions, 4531 "SRC_COORD_PRECISION", "0")); 4532 const double dfErrorThreshold = CPLAtof( 4533 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); 4534 4535 // Precompute values. 4536 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 4537 padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; 4538 4539 /* ==================================================================== */ 4540 /* Loop over output lines. */ 4541 /* ==================================================================== */ 4542 for( int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY ) 4543 { 4544 /* ---------------------------------------------------------------- */ 4545 /* Setup points to transform to source image space. */ 4546 /* ---------------------------------------------------------------- */ 4547 memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize ); 4548 const double dfYConst = iDstY + 0.5 + poWK->nDstYOff; 4549 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 4550 padfY[iDstX] = dfYConst; 4551 memset( padfZ, 0, sizeof(double) * nDstXSize ); 4552 4553 /* ---------------------------------------------------------------- */ 4554 /* Transform the points from destination pixel/line coordinates*/ 4555 /* to source pixel/line coordinates. */ 4556 /* ---------------------------------------------------------------- */ 4557 poWK->pfnTransformer( poWK->pTransformerArg, TRUE, nDstXSize, 4558 padfX, padfY, padfZ, pabSuccess ); 4559 if( dfSrcCoordPrecision > 0.0 ) 4560 { 4561 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, 4562 pabSuccess, 4563 dfSrcCoordPrecision, 4564 dfErrorThreshold, 4565 poWK->pfnTransformer, 4566 poWK->pTransformerArg, 4567 0.5 + nDstXOff, 4568 iDstY + 0.5 + nDstYOff); 4569 } 4570 4571 err = GDALWarpKernelOpenCL_setCoordRow(warper, padfX, padfY, 4572 nSrcXOff, nSrcYOff, 4573 pabSuccess, iDstY); 4574 if( err != CL_SUCCESS ) 4575 { 4576 CPLError(CE_Failure, CPLE_AppDefined, 4577 "OpenCL routines reported failure (%d) on line %d.", 4578 static_cast<int>(err), __LINE__); 4579 eErr = CE_Failure; 4580 break; 4581 } 4582 4583 // Update the valid & density masks because we don't do so in the 4584 // kernel. 4585 for( int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++ ) 4586 { 4587 const double dfX = padfX[iDstX]; 4588 const double dfY = padfY[iDstX]; 4589 const GPtrDiff_t iDstOffset = iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize; 4590 4591 // See GWKGeneralCase() for appropriate commenting. 4592 if( !pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff ) 4593 continue; 4594 4595 int iSrcX = static_cast<int>(dfX) - nSrcXOff; 4596 int iSrcY = static_cast<int>(dfY) - nSrcYOff; 4597 4598 if( iSrcX < 0 || iSrcX >= nSrcXSize || 4599 iSrcY < 0 || iSrcY >= nSrcYSize ) 4600 continue; 4601 4602 GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 4603 double dfDensity = 1.0; 4604 4605 if( poWK->pafUnifiedSrcDensity != nullptr 4606 && iSrcX >= 0 && iSrcY >= 0 4607 && iSrcX < nSrcXSize && iSrcY < nSrcYSize ) 4608 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; 4609 4610 GWKOverlayDensity( poWK, iDstOffset, dfDensity ); 4611 4612 // Because this is on the bit-wise level, it can't be done well in 4613 // OpenCL. 4614 if( poWK->panDstValid != nullptr ) 4615 poWK->panDstValid[iDstOffset>>5] |= 0x01 << (iDstOffset & 0x1f); 4616 } 4617 } 4618 4619 CPLFree( padfX ); 4620 CPLFree( padfY ); 4621 CPLFree( padfZ ); 4622 CPLFree( pabSuccess ); 4623 4624 if( eErr != CE_None ) 4625 throw eErr; 4626 4627 err = GDALWarpKernelOpenCL_runResamp(warper, 4628 poWK->pafUnifiedSrcDensity, 4629 poWK->panUnifiedSrcValid, 4630 poWK->pafDstDensity, 4631 poWK->panDstValid, 4632 poWK->dfXScale, poWK->dfYScale, 4633 poWK->dfXFilter, poWK->dfYFilter, 4634 poWK->nXRadius, poWK->nYRadius, 4635 poWK->nFiltInitX, poWK->nFiltInitY); 4636 4637 if( err != CL_SUCCESS ) 4638 { 4639 CPLError(CE_Failure, CPLE_AppDefined, 4640 "OpenCL routines reported failure (%d) on line %d.", 4641 static_cast<int>(err), __LINE__); 4642 eErr = CE_Failure; 4643 throw eErr; 4644 } 4645 4646 /* ==================================================================== */ 4647 /* Loop over output lines. */ 4648 /* ==================================================================== */ 4649 for( int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++ ) 4650 { 4651 for( int iBand = 0; iBand < poWK->nBands; iBand++ ) 4652 { 4653 void *rowReal = nullptr; 4654 void *rowImag = nullptr; 4655 GByte *pabyDst = poWK->papabyDstImage[iBand]; 4656 4657 err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag, 4658 iDstY, iBand); 4659 if( err != CL_SUCCESS ) 4660 { 4661 CPLError( CE_Failure, CPLE_AppDefined, 4662 "OpenCL routines reported failure (%d) on line %d.", 4663 static_cast<int>(err), __LINE__ ); 4664 eErr = CE_Failure; 4665 throw eErr; 4666 } 4667 4668 // Copy the data from the warper to GDAL's memory. 4669 switch( poWK->eWorkingDataType ) 4670 { 4671 case GDT_Byte: 4672 memcpy(&(pabyDst[iDstY*nDstXSize]), 4673 rowReal, sizeof(GByte)*nDstXSize); 4674 break; 4675 case GDT_Int16: 4676 memcpy(&(reinterpret_cast<GInt16 *>(pabyDst)[iDstY*nDstXSize]), 4677 rowReal, sizeof(GInt16)*nDstXSize); 4678 break; 4679 case GDT_UInt16: 4680 memcpy(&(reinterpret_cast<GUInt16 *>(pabyDst)[iDstY*nDstXSize]), 4681 rowReal, sizeof(GUInt16)*nDstXSize); 4682 break; 4683 case GDT_Float32: 4684 memcpy(&(reinterpret_cast<float *>(pabyDst)[iDstY*nDstXSize]), 4685 rowReal, sizeof(float)*nDstXSize); 4686 break; 4687 case GDT_CInt16: 4688 { 4689 GInt16 *pabyDstI16 = &(reinterpret_cast<GInt16 *>(pabyDst)[iDstY*nDstXSize]); 4690 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 4691 { 4692 pabyDstI16[iDstX*2 ] = static_cast<GInt16 *>(rowReal)[iDstX]; 4693 pabyDstI16[iDstX*2+1] = static_cast<GInt16 *>(rowImag)[iDstX]; 4694 } 4695 } 4696 break; 4697 case GDT_CFloat32: 4698 { 4699 float *pabyDstF32 = &(reinterpret_cast<float *>(pabyDst)[iDstY*nDstXSize]); 4700 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 4701 { 4702 pabyDstF32[iDstX*2 ] = static_cast<float *>(rowReal)[iDstX]; 4703 pabyDstF32[iDstX*2+1] = static_cast<float *>(rowImag)[iDstX]; 4704 } 4705 } 4706 break; 4707 default: 4708 // No support for higher precision formats. 4709 CPLError(CE_Failure, CPLE_AppDefined, 4710 "Unsupported resampling OpenCL data type %d.", 4711 static_cast<int>(poWK->eWorkingDataType) ); 4712 eErr = CE_Failure; 4713 throw eErr; 4714 } 4715 } 4716 } 4717 } 4718 catch( const CPLErr& ) 4719 { 4720 } 4721 4722 if( (err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS ) 4723 { 4724 CPLError(CE_Failure, CPLE_AppDefined, 4725 "OpenCL routines reported failure (%d) on line %d.", 4726 static_cast<int>(err), __LINE__ ); 4727 return CE_Failure; 4728 } 4729 4730 return eErr; 4731 } 4732 #endif /* defined(HAVE_OPENCL) */ 4733 4734 /************************************************************************/ 4735 /* GWKCheckAndComputeSrcOffsets() */ 4736 /************************************************************************/ 4737 static CPL_INLINE bool GWKCheckAndComputeSrcOffsets( 4738 const int* _pabSuccess, 4739 int _iDstX, 4740 const double* _padfX, 4741 const double* _padfY, 4742 const GDALWarpKernel* _poWK, 4743 int _nSrcXSize, 4744 int _nSrcYSize, 4745 GPtrDiff_t& iSrcOffset) 4746 { 4747 if( !_pabSuccess[_iDstX] ) 4748 return false; 4749 4750 // If this happens this is likely the symptom of a bug somewhere. 4751 if( CPLIsNan(_padfX[_iDstX]) || CPLIsNan(_padfY[_iDstX]) ) 4752 { 4753 static bool bNanCoordFound = false; 4754 if( !bNanCoordFound ) 4755 { 4756 CPLDebug("WARP", 4757 "GWKCheckAndComputeSrcOffsets(): " 4758 "NaN coordinate found on point %d.", 4759 _iDstX); 4760 bNanCoordFound = true; 4761 } 4762 return false; 4763 } 4764 4765 /* -------------------------------------------------------------------- */ 4766 /* Figure out what pixel we want in our source raster, and skip */ 4767 /* further processing if it is well off the source image. */ 4768 /* -------------------------------------------------------------------- */ 4769 /* We test against the value before casting to avoid the */ 4770 /* problem of asymmetric truncation effects around zero. That is */ 4771 /* -0.5 will be 0 when cast to an int. */ 4772 if( _padfX[_iDstX] < _poWK->nSrcXOff 4773 || _padfY[_iDstX] < _poWK->nSrcYOff ) 4774 return false; 4775 4776 // Check for potential overflow when casting from float to int, (if 4777 // operating outside natural projection area, padfX/Y can be a very huge 4778 // positive number before doing the actual conversion), as such cast is 4779 // undefined behavior that can trigger exception with some compilers 4780 // (see #6753) 4781 if( _padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff || 4782 _padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff ) 4783 { 4784 return false; 4785 } 4786 4787 int iSrcX = 4788 static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff; 4789 int iSrcY = 4790 static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff; 4791 if( iSrcX == _nSrcXSize ) 4792 iSrcX --; 4793 if( iSrcY == _nSrcYSize ) 4794 iSrcY --; 4795 4796 // Those checks should normally be OK given the previous ones. 4797 CPLAssert( iSrcX >= 0 ); 4798 CPLAssert( iSrcY >= 0 ); 4799 CPLAssert( iSrcX < _nSrcXSize ); 4800 CPLAssert( iSrcY < _nSrcYSize ); 4801 4802 iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize; 4803 4804 return true; 4805 } 4806 4807 /************************************************************************/ 4808 /* GWKGeneralCase() */ 4809 /* */ 4810 /* This is the most general case. It attempts to handle all */ 4811 /* possible features with relatively little concern for */ 4812 /* efficiency. */ 4813 /************************************************************************/ 4814 4815 static void GWKGeneralCaseThread( void* pData) 4816 4817 { 4818 GWKJobStruct* psJob = reinterpret_cast<GWKJobStruct *>(pData); 4819 GDALWarpKernel *poWK = psJob->poWK; 4820 const int iYMin = psJob->iYMin; 4821 const int iYMax = psJob->iYMax; 4822 4823 int nDstXSize = poWK->nDstXSize; 4824 int nSrcXSize = poWK->nSrcXSize; 4825 int nSrcYSize = poWK->nSrcYSize; 4826 4827 /* -------------------------------------------------------------------- */ 4828 /* Allocate x,y,z coordinate arrays for transformation ... one */ 4829 /* scanlines worth of positions. */ 4830 /* -------------------------------------------------------------------- */ 4831 // For x, 2 *, because we cache the precomputed values at the end. 4832 double *padfX = 4833 static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize)); 4834 double *padfY = 4835 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 4836 double *padfZ = 4837 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 4838 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize)); 4839 4840 const bool bUse4SamplesFormula = 4841 poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95; 4842 4843 GWKResampleWrkStruct* psWrkStruct = nullptr; 4844 if( poWK->eResample != GRA_NearestNeighbour ) 4845 { 4846 psWrkStruct = GWKResampleCreateWrkStruct(poWK); 4847 } 4848 const double dfSrcCoordPrecision = CPLAtof( 4849 CSLFetchNameValueDef(poWK->papszWarpOptions, 4850 "SRC_COORD_PRECISION", "0")); 4851 const double dfErrorThreshold = CPLAtof( 4852 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); 4853 4854 // Precompute values. 4855 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 4856 padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; 4857 4858 /* ==================================================================== */ 4859 /* Loop over output lines. */ 4860 /* ==================================================================== */ 4861 for( int iDstY = iYMin; iDstY < iYMax; iDstY++ ) 4862 { 4863 /* -------------------------------------------------------------------- */ 4864 /* Setup points to transform to source image space. */ 4865 /* -------------------------------------------------------------------- */ 4866 memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize ); 4867 const double dfY = iDstY + 0.5 + poWK->nDstYOff; 4868 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 4869 padfY[iDstX] = dfY; 4870 memset( padfZ, 0, sizeof(double) * nDstXSize ); 4871 4872 /* -------------------------------------------------------------------- */ 4873 /* Transform the points from destination pixel/line coordinates */ 4874 /* to source pixel/line coordinates. */ 4875 /* -------------------------------------------------------------------- */ 4876 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, 4877 padfX, padfY, padfZ, pabSuccess ); 4878 if( dfSrcCoordPrecision > 0.0 ) 4879 { 4880 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, 4881 pabSuccess, 4882 dfSrcCoordPrecision, 4883 dfErrorThreshold, 4884 poWK->pfnTransformer, 4885 psJob->pTransformerArg, 4886 0.5 + poWK->nDstXOff, 4887 iDstY + 0.5 + poWK->nDstYOff); 4888 } 4889 4890 /* ==================================================================== */ 4891 /* Loop over pixels in output scanline. */ 4892 /* ==================================================================== */ 4893 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 4894 { 4895 GPtrDiff_t iSrcOffset = 0; 4896 if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY, 4897 poWK, nSrcXSize, nSrcYSize, iSrcOffset) ) 4898 continue; 4899 4900 /* -------------------------------------------------------------------- */ 4901 /* Do not try to apply transparent/invalid source pixels to the */ 4902 /* destination. This currently ignores the multi-pixel input */ 4903 /* of bilinear and cubic resamples. */ 4904 /* -------------------------------------------------------------------- */ 4905 double dfDensity = 1.0; 4906 4907 if( poWK->pafUnifiedSrcDensity != nullptr ) 4908 { 4909 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; 4910 if( dfDensity < SRC_DENSITY_THRESHOLD ) 4911 continue; 4912 } 4913 4914 if( poWK->panUnifiedSrcValid != nullptr 4915 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 4916 & (0x01 << (iSrcOffset & 0x1f))) ) 4917 continue; 4918 4919 /* ==================================================================== */ 4920 /* Loop processing each band. */ 4921 /* ==================================================================== */ 4922 bool bHasFoundDensity = false; 4923 4924 const GPtrDiff_t iDstOffset = iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize; 4925 for( int iBand = 0; iBand < poWK->nBands; iBand++ ) 4926 { 4927 double dfBandDensity = 0.0; 4928 double dfValueReal = 0.0; 4929 double dfValueImag = 0.0; 4930 4931 /* -------------------------------------------------------------------- */ 4932 /* Collect the source value. */ 4933 /* -------------------------------------------------------------------- */ 4934 if( poWK->eResample == GRA_NearestNeighbour || 4935 nSrcXSize == 1 || nSrcYSize == 1 ) 4936 { 4937 // FALSE is returned if dfBandDensity == 0, which is 4938 // checked below. 4939 CPL_IGNORE_RET_VAL( 4940 GWKGetPixelValue( poWK, iBand, iSrcOffset, 4941 &dfBandDensity, 4942 &dfValueReal, &dfValueImag )); 4943 } 4944 else if( poWK->eResample == GRA_Bilinear && 4945 bUse4SamplesFormula ) 4946 { 4947 GWKBilinearResample4Sample( poWK, iBand, 4948 padfX[iDstX]-poWK->nSrcXOff, 4949 padfY[iDstX]-poWK->nSrcYOff, 4950 &dfBandDensity, 4951 &dfValueReal, &dfValueImag ); 4952 } 4953 else if( poWK->eResample == GRA_Cubic && 4954 bUse4SamplesFormula ) 4955 { 4956 GWKCubicResample4Sample( poWK, iBand, 4957 padfX[iDstX]-poWK->nSrcXOff, 4958 padfY[iDstX]-poWK->nSrcYOff, 4959 &dfBandDensity, 4960 &dfValueReal, &dfValueImag ); 4961 } 4962 else 4963 #ifdef DEBUG 4964 // Only useful for clang static analyzer. 4965 if( psWrkStruct != nullptr ) 4966 #endif 4967 { 4968 psWrkStruct->pfnGWKResample( poWK, iBand, 4969 padfX[iDstX]-poWK->nSrcXOff, 4970 padfY[iDstX]-poWK->nSrcYOff, 4971 &dfBandDensity, 4972 &dfValueReal, &dfValueImag, psWrkStruct ); 4973 } 4974 4975 // If we didn't find any valid inputs skip to next band. 4976 if( dfBandDensity < BAND_DENSITY_THRESHOLD ) 4977 continue; 4978 4979 bHasFoundDensity = true; 4980 4981 /* -------------------------------------------------------------------- */ 4982 /* We have a computed value from the source. Now apply it to */ 4983 /* the destination pixel. */ 4984 /* -------------------------------------------------------------------- */ 4985 GWKSetPixelValue( poWK, iBand, iDstOffset, 4986 dfBandDensity, 4987 dfValueReal, dfValueImag ); 4988 } 4989 4990 if( !bHasFoundDensity ) 4991 continue; 4992 4993 /* -------------------------------------------------------------------- */ 4994 /* Update destination density/validity masks. */ 4995 /* -------------------------------------------------------------------- */ 4996 GWKOverlayDensity( poWK, iDstOffset, dfDensity ); 4997 4998 if( poWK->panDstValid != nullptr ) 4999 { 5000 poWK->panDstValid[iDstOffset>>5] |= 5001 0x01 << (iDstOffset & 0x1f); 5002 } 5003 } /* Next iDstX */ 5004 5005 /* -------------------------------------------------------------------- */ 5006 /* Report progress to the user, and optionally cancel out. */ 5007 /* -------------------------------------------------------------------- */ 5008 if( psJob->pfnProgress && psJob->pfnProgress(psJob) ) 5009 break; 5010 } 5011 5012 /* -------------------------------------------------------------------- */ 5013 /* Cleanup and return. */ 5014 /* -------------------------------------------------------------------- */ 5015 CPLFree( padfX ); 5016 CPLFree( padfY ); 5017 CPLFree( padfZ ); 5018 CPLFree( pabSuccess ); 5019 if( psWrkStruct ) 5020 GWKResampleDeleteWrkStruct(psWrkStruct); 5021 } 5022 5023 static CPLErr GWKGeneralCase( GDALWarpKernel *poWK ) 5024 { 5025 return GWKRun( poWK, "GWKGeneralCase", GWKGeneralCaseThread ); 5026 } 5027 5028 /************************************************************************/ 5029 /* GWKRealCase() */ 5030 /* */ 5031 /* General case for non-complex data types. */ 5032 /************************************************************************/ 5033 5034 static void GWKRealCaseThread( void* pData) 5035 5036 { 5037 GWKJobStruct* psJob = static_cast<GWKJobStruct*>(pData); 5038 GDALWarpKernel *poWK = psJob->poWK; 5039 const int iYMin = psJob->iYMin; 5040 const int iYMax = psJob->iYMax; 5041 5042 const int nDstXSize = poWK->nDstXSize; 5043 const int nSrcXSize = poWK->nSrcXSize; 5044 const int nSrcYSize = poWK->nSrcYSize; 5045 5046 /* -------------------------------------------------------------------- */ 5047 /* Allocate x,y,z coordinate arrays for transformation ... one */ 5048 /* scanlines worth of positions. */ 5049 /* -------------------------------------------------------------------- */ 5050 5051 // For x, 2 *, because we cache the precomputed values at the end. 5052 double *padfX = 5053 static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize)); 5054 double *padfY = 5055 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5056 double *padfZ = 5057 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5058 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize)); 5059 5060 const bool bUse4SamplesFormula = 5061 poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95; 5062 5063 GWKResampleWrkStruct* psWrkStruct = nullptr; 5064 if( poWK->eResample != GRA_NearestNeighbour ) 5065 { 5066 psWrkStruct = GWKResampleCreateWrkStruct(poWK); 5067 } 5068 const double dfSrcCoordPrecision = CPLAtof( 5069 CSLFetchNameValueDef(poWK->papszWarpOptions, 5070 "SRC_COORD_PRECISION", "0")); 5071 const double dfErrorThreshold = CPLAtof( 5072 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); 5073 5074 const bool bSrcMaskIsDensity = 5075 poWK->panUnifiedSrcValid == nullptr && 5076 poWK->papanBandSrcValid == nullptr && 5077 poWK->pafUnifiedSrcDensity != nullptr; 5078 5079 // Precompute values. 5080 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5081 padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; 5082 5083 /* ==================================================================== */ 5084 /* Loop over output lines. */ 5085 /* ==================================================================== */ 5086 for( int iDstY = iYMin; iDstY < iYMax; iDstY++ ) 5087 { 5088 /* -------------------------------------------------------------------- */ 5089 /* Setup points to transform to source image space. */ 5090 /* -------------------------------------------------------------------- */ 5091 memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize ); 5092 const double dfY = iDstY + 0.5 + poWK->nDstYOff; 5093 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5094 padfY[iDstX] = dfY; 5095 memset( padfZ, 0, sizeof(double) * nDstXSize ); 5096 5097 /* -------------------------------------------------------------------- */ 5098 /* Transform the points from destination pixel/line coordinates */ 5099 /* to source pixel/line coordinates. */ 5100 /* -------------------------------------------------------------------- */ 5101 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, 5102 padfX, padfY, padfZ, pabSuccess ); 5103 if( dfSrcCoordPrecision > 0.0 ) 5104 { 5105 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, 5106 pabSuccess, 5107 dfSrcCoordPrecision, 5108 dfErrorThreshold, 5109 poWK->pfnTransformer, 5110 psJob->pTransformerArg, 5111 0.5 + poWK->nDstXOff, 5112 iDstY + 0.5 + poWK->nDstYOff); 5113 } 5114 5115 /* ==================================================================== */ 5116 /* Loop over pixels in output scanline. */ 5117 /* ==================================================================== */ 5118 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5119 { 5120 GPtrDiff_t iSrcOffset = 0; 5121 if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY, 5122 poWK, nSrcXSize, nSrcYSize, iSrcOffset) ) 5123 continue; 5124 5125 /* -------------------------------------------------------------------- */ 5126 /* Do not try to apply transparent/invalid source pixels to the */ 5127 /* destination. This currently ignores the multi-pixel input */ 5128 /* of bilinear and cubic resamples. */ 5129 /* -------------------------------------------------------------------- */ 5130 double dfDensity = 1.0; 5131 5132 if( poWK->pafUnifiedSrcDensity != nullptr ) 5133 { 5134 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; 5135 if( dfDensity < SRC_DENSITY_THRESHOLD ) 5136 continue; 5137 } 5138 5139 if( poWK->panUnifiedSrcValid != nullptr 5140 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 5141 & (0x01 << (iSrcOffset & 0x1f))) ) 5142 continue; 5143 5144 /* ==================================================================== */ 5145 /* Loop processing each band. */ 5146 /* ==================================================================== */ 5147 bool bHasFoundDensity = false; 5148 5149 const GPtrDiff_t iDstOffset = iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize; 5150 for( int iBand = 0; iBand < poWK->nBands; iBand++ ) 5151 { 5152 double dfBandDensity = 0.0; 5153 double dfValueReal = 0.0; 5154 5155 /* -------------------------------------------------------------------- */ 5156 /* Collect the source value. */ 5157 /* -------------------------------------------------------------------- */ 5158 if( poWK->eResample == GRA_NearestNeighbour || 5159 nSrcXSize == 1 || nSrcYSize == 1 ) 5160 { 5161 // FALSE is returned if dfBandDensity == 0, which is 5162 // checked below. 5163 CPL_IGNORE_RET_VAL( 5164 GWKGetPixelValueReal( poWK, iBand, iSrcOffset, 5165 &dfBandDensity, &dfValueReal )); 5166 } 5167 else if( poWK->eResample == GRA_Bilinear && 5168 bUse4SamplesFormula ) 5169 { 5170 double dfValueImagIgnored = 0.0; 5171 GWKBilinearResample4Sample( poWK, iBand, 5172 padfX[iDstX]-poWK->nSrcXOff, 5173 padfY[iDstX]-poWK->nSrcYOff, 5174 &dfBandDensity, 5175 &dfValueReal, &dfValueImagIgnored ); 5176 } 5177 else if( poWK->eResample == GRA_Cubic && 5178 bUse4SamplesFormula ) 5179 { 5180 if( bSrcMaskIsDensity ) 5181 { 5182 if( poWK->eWorkingDataType == GDT_Byte ) 5183 { 5184 GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>( 5185 poWK, iBand, 5186 padfX[iDstX]-poWK->nSrcXOff, 5187 padfY[iDstX]-poWK->nSrcYOff, 5188 &dfBandDensity, 5189 &dfValueReal ); 5190 } 5191 else if( poWK->eWorkingDataType == GDT_UInt16 ) 5192 { 5193 GWKCubicResampleSrcMaskIsDensity4SampleRealT<GUInt16>( 5194 poWK, iBand, 5195 padfX[iDstX]-poWK->nSrcXOff, 5196 padfY[iDstX]-poWK->nSrcYOff, 5197 &dfBandDensity, 5198 &dfValueReal ); 5199 } 5200 else 5201 { 5202 GWKCubicResampleSrcMaskIsDensity4SampleReal( poWK, iBand, 5203 padfX[iDstX]-poWK->nSrcXOff, 5204 padfY[iDstX]-poWK->nSrcYOff, 5205 &dfBandDensity, 5206 &dfValueReal ); 5207 } 5208 } 5209 else 5210 { 5211 double dfValueImagIgnored = 0.0; 5212 GWKCubicResample4Sample( poWK, iBand, 5213 padfX[iDstX]-poWK->nSrcXOff, 5214 padfY[iDstX]-poWK->nSrcYOff, 5215 &dfBandDensity, 5216 &dfValueReal, 5217 &dfValueImagIgnored); 5218 } 5219 } 5220 else 5221 #ifdef DEBUG 5222 // Only useful for clang static analyzer. 5223 if( psWrkStruct != nullptr ) 5224 #endif 5225 { 5226 double dfValueImagIgnored = 0.0; 5227 psWrkStruct->pfnGWKResample( 5228 poWK, iBand, 5229 padfX[iDstX]-poWK->nSrcXOff, 5230 padfY[iDstX]-poWK->nSrcYOff, 5231 &dfBandDensity, 5232 &dfValueReal, &dfValueImagIgnored, psWrkStruct); 5233 } 5234 5235 // If we didn't find any valid inputs skip to next band. 5236 if( dfBandDensity < BAND_DENSITY_THRESHOLD ) 5237 continue; 5238 5239 bHasFoundDensity = true; 5240 5241 /* -------------------------------------------------------------------- */ 5242 /* We have a computed value from the source. Now apply it to */ 5243 /* the destination pixel. */ 5244 /* -------------------------------------------------------------------- */ 5245 GWKSetPixelValueReal(poWK, iBand, iDstOffset, 5246 dfBandDensity, 5247 dfValueReal); 5248 } 5249 5250 if( !bHasFoundDensity ) 5251 continue; 5252 5253 /* -------------------------------------------------------------------- */ 5254 /* Update destination density/validity masks. */ 5255 /* -------------------------------------------------------------------- */ 5256 GWKOverlayDensity( poWK, iDstOffset, dfDensity ); 5257 5258 if( poWK->panDstValid != nullptr ) 5259 { 5260 poWK->panDstValid[iDstOffset>>5] |= 5261 0x01 << (iDstOffset & 0x1f); 5262 } 5263 } // Next iDstX. 5264 5265 /* -------------------------------------------------------------------- */ 5266 /* Report progress to the user, and optionally cancel out. */ 5267 /* -------------------------------------------------------------------- */ 5268 if( psJob->pfnProgress && psJob->pfnProgress(psJob) ) 5269 break; 5270 } 5271 5272 /* -------------------------------------------------------------------- */ 5273 /* Cleanup and return. */ 5274 /* -------------------------------------------------------------------- */ 5275 CPLFree( padfX ); 5276 CPLFree( padfY ); 5277 CPLFree( padfZ ); 5278 CPLFree( pabSuccess ); 5279 if( psWrkStruct ) 5280 GWKResampleDeleteWrkStruct(psWrkStruct); 5281 } 5282 5283 static CPLErr GWKRealCase( GDALWarpKernel *poWK ) 5284 { 5285 return GWKRun( poWK, "GWKRealCase", GWKRealCaseThread ); 5286 } 5287 5288 /************************************************************************/ 5289 /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */ 5290 /************************************************************************/ 5291 5292 template<class T, GDALResampleAlg eResample, int bUse4SamplesFormula> 5293 static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal( void* pData ) 5294 5295 { 5296 GWKJobStruct* psJob = static_cast<GWKJobStruct *>(pData); 5297 GDALWarpKernel *poWK = psJob->poWK; 5298 const int iYMin = psJob->iYMin; 5299 const int iYMax = psJob->iYMax; 5300 5301 const int nDstXSize = poWK->nDstXSize; 5302 const int nSrcXSize = poWK->nSrcXSize; 5303 const int nSrcYSize = poWK->nSrcYSize; 5304 5305 /* -------------------------------------------------------------------- */ 5306 /* Allocate x,y,z coordinate arrays for transformation ... one */ 5307 /* scanlines worth of positions. */ 5308 /* -------------------------------------------------------------------- */ 5309 5310 // For x, 2 *, because we cache the precomputed values at the end. 5311 double *padfX = 5312 static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize)); 5313 double *padfY = 5314 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5315 double *padfZ = 5316 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5317 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize)); 5318 5319 const int nXRadius = poWK->nXRadius; 5320 double *padfWeight = 5321 static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double))); 5322 const double dfSrcCoordPrecision = CPLAtof( 5323 CSLFetchNameValueDef(poWK->papszWarpOptions, 5324 "SRC_COORD_PRECISION", "0")); 5325 const double dfErrorThreshold = CPLAtof( 5326 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); 5327 5328 // Precompute values. 5329 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5330 padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; 5331 5332 /* ==================================================================== */ 5333 /* Loop over output lines. */ 5334 /* ==================================================================== */ 5335 for( int iDstY = iYMin; iDstY < iYMax; iDstY++ ) 5336 { 5337 /* -------------------------------------------------------------------- */ 5338 /* Setup points to transform to source image space. */ 5339 /* -------------------------------------------------------------------- */ 5340 memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize ); 5341 const double dfY = iDstY + 0.5 + poWK->nDstYOff; 5342 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5343 padfY[iDstX] = dfY; 5344 memset( padfZ, 0, sizeof(double) * nDstXSize ); 5345 5346 /* -------------------------------------------------------------------- */ 5347 /* Transform the points from destination pixel/line coordinates */ 5348 /* to source pixel/line coordinates. */ 5349 /* -------------------------------------------------------------------- */ 5350 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, 5351 padfX, padfY, padfZ, pabSuccess ); 5352 if( dfSrcCoordPrecision > 0.0 ) 5353 { 5354 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess, 5355 dfSrcCoordPrecision, 5356 dfErrorThreshold, 5357 poWK->pfnTransformer, 5358 psJob->pTransformerArg, 5359 0.5 + poWK->nDstXOff, 5360 iDstY + 0.5 + poWK->nDstYOff); 5361 } 5362 5363 /* ==================================================================== */ 5364 /* Loop over pixels in output scanline. */ 5365 /* ==================================================================== */ 5366 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5367 { 5368 GPtrDiff_t iSrcOffset = 0; 5369 if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY, 5370 poWK, nSrcXSize, nSrcYSize, 5371 iSrcOffset) ) 5372 continue; 5373 5374 /* ==================================================================== */ 5375 /* Loop processing each band. */ 5376 /* ==================================================================== */ 5377 const GPtrDiff_t iDstOffset = iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize; 5378 5379 for( int iBand = 0; iBand < poWK->nBands; iBand++ ) 5380 { 5381 T value = 0; 5382 if( eResample == GRA_NearestNeighbour ) 5383 { 5384 value = reinterpret_cast<T *>( 5385 poWK->papabySrcImage[iBand])[iSrcOffset]; 5386 } 5387 else if( bUse4SamplesFormula ) 5388 { 5389 if( eResample == GRA_Bilinear ) 5390 GWKBilinearResampleNoMasks4SampleT( poWK, iBand, 5391 padfX[iDstX]-poWK->nSrcXOff, 5392 padfY[iDstX]-poWK->nSrcYOff, 5393 &value ); 5394 else 5395 GWKCubicResampleNoMasks4SampleT( poWK, iBand, 5396 padfX[iDstX]-poWK->nSrcXOff, 5397 padfY[iDstX]-poWK->nSrcYOff, 5398 &value ); 5399 } 5400 else 5401 { 5402 GWKResampleNoMasksT( poWK, iBand, 5403 padfX[iDstX]-poWK->nSrcXOff, 5404 padfY[iDstX]-poWK->nSrcYOff, 5405 &value, 5406 padfWeight); 5407 } 5408 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] = 5409 value; 5410 } 5411 5412 if( poWK->pafDstDensity ) 5413 poWK->pafDstDensity[iDstOffset] = 1.0f; 5414 } 5415 5416 /* -------------------------------------------------------------------- */ 5417 /* Report progress to the user, and optionally cancel out. */ 5418 /* -------------------------------------------------------------------- */ 5419 if( psJob->pfnProgress && psJob->pfnProgress(psJob) ) 5420 break; 5421 } 5422 5423 /* -------------------------------------------------------------------- */ 5424 /* Cleanup and return. */ 5425 /* -------------------------------------------------------------------- */ 5426 CPLFree( padfX ); 5427 CPLFree( padfY ); 5428 CPLFree( padfZ ); 5429 CPLFree( pabSuccess ); 5430 CPLFree( padfWeight ); 5431 } 5432 5433 template<class T, GDALResampleAlg eResample> 5434 static void GWKResampleNoMasksOrDstDensityOnlyThread( void* pData ) 5435 { 5436 GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, 5437 FALSE>(pData); 5438 } 5439 5440 template<class T, GDALResampleAlg eResample> 5441 static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread( void* pData ) 5442 5443 { 5444 GWKJobStruct* psJob = static_cast<GWKJobStruct *>(pData); 5445 GDALWarpKernel *poWK = psJob->poWK; 5446 CPLAssert(eResample == GRA_Bilinear || eResample == GRA_Cubic); 5447 const bool bUse4SamplesFormula = 5448 poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95; 5449 if( bUse4SamplesFormula ) 5450 GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, 5451 TRUE>(pData); 5452 else 5453 GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, 5454 FALSE>(pData); 5455 } 5456 5457 static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ) 5458 { 5459 return GWKRun( 5460 poWK, "GWKNearestNoMasksOrDstDensityOnlyByte", 5461 GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>); 5462 } 5463 5464 static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ) 5465 { 5466 return GWKRun( 5467 poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte", 5468 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, 5469 GRA_Bilinear>); 5470 } 5471 5472 static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ) 5473 { 5474 return GWKRun( 5475 poWK, "GWKCubicNoMasksOrDstDensityOnlyByte", 5476 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>); 5477 } 5478 5479 static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ) 5480 { 5481 return GWKRun( 5482 poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat", 5483 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>); 5484 } 5485 5486 #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL 5487 5488 static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK ) 5489 { 5490 return GWKRun( 5491 poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble", 5492 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>); 5493 } 5494 #endif 5495 5496 static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ) 5497 { 5498 return GWKRun( 5499 poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte", 5500 GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>); 5501 } 5502 5503 /************************************************************************/ 5504 /* GWKNearestByte() */ 5505 /* */ 5506 /* Case for 8bit input data with nearest neighbour resampling */ 5507 /* using valid flags. Should be as fast as possible for this */ 5508 /* particular transformation type. */ 5509 /************************************************************************/ 5510 5511 template<class T> 5512 static void GWKNearestThread( void* pData ) 5513 5514 { 5515 GWKJobStruct* psJob = static_cast<GWKJobStruct *>(pData); 5516 GDALWarpKernel *poWK = psJob->poWK; 5517 const int iYMin = psJob->iYMin; 5518 const int iYMax = psJob->iYMax; 5519 5520 const int nDstXSize = poWK->nDstXSize; 5521 const int nSrcXSize = poWK->nSrcXSize; 5522 const int nSrcYSize = poWK->nSrcYSize; 5523 5524 /* -------------------------------------------------------------------- */ 5525 /* Allocate x,y,z coordinate arrays for transformation ... one */ 5526 /* scanlines worth of positions. */ 5527 /* -------------------------------------------------------------------- */ 5528 5529 // For x, 2 *, because we cache the precomputed values at the end. 5530 double *padfX = 5531 static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize)); 5532 double *padfY = 5533 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5534 double *padfZ = 5535 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5536 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize)); 5537 5538 const double dfSrcCoordPrecision = CPLAtof( 5539 CSLFetchNameValueDef(poWK->papszWarpOptions, 5540 "SRC_COORD_PRECISION", "0")); 5541 const double dfErrorThreshold = CPLAtof( 5542 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); 5543 5544 // Precompute values. 5545 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5546 padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; 5547 5548 /* ==================================================================== */ 5549 /* Loop over output lines. */ 5550 /* ==================================================================== */ 5551 for( int iDstY = iYMin; iDstY < iYMax; iDstY++ ) 5552 { 5553 5554 /* -------------------------------------------------------------------- */ 5555 /* Setup points to transform to source image space. */ 5556 /* -------------------------------------------------------------------- */ 5557 memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize ); 5558 const double dfY = iDstY + 0.5 + poWK->nDstYOff; 5559 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5560 padfY[iDstX] = dfY; 5561 memset( padfZ, 0, sizeof(double) * nDstXSize ); 5562 5563 /* -------------------------------------------------------------------- */ 5564 /* Transform the points from destination pixel/line coordinates */ 5565 /* to source pixel/line coordinates. */ 5566 /* -------------------------------------------------------------------- */ 5567 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, 5568 padfX, padfY, padfZ, pabSuccess ); 5569 if( dfSrcCoordPrecision > 0.0 ) 5570 { 5571 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, 5572 pabSuccess, 5573 dfSrcCoordPrecision, 5574 dfErrorThreshold, 5575 poWK->pfnTransformer, 5576 psJob->pTransformerArg, 5577 0.5 + poWK->nDstXOff, 5578 iDstY + 0.5 + poWK->nDstYOff); 5579 } 5580 /* ==================================================================== */ 5581 /* Loop over pixels in output scanline. */ 5582 /* ==================================================================== */ 5583 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5584 { 5585 GPtrDiff_t iSrcOffset = 0; 5586 if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY, 5587 poWK, nSrcXSize, nSrcYSize, iSrcOffset) ) 5588 continue; 5589 5590 /* -------------------------------------------------------------------- */ 5591 /* Do not try to apply invalid source pixels to the dest. */ 5592 /* -------------------------------------------------------------------- */ 5593 if( poWK->panUnifiedSrcValid != nullptr 5594 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 5595 & (0x01 << (iSrcOffset & 0x1f))) ) 5596 continue; 5597 5598 /* -------------------------------------------------------------------- */ 5599 /* Do not try to apply transparent source pixels to the destination.*/ 5600 /* -------------------------------------------------------------------- */ 5601 double dfDensity = 1.0; 5602 5603 if( poWK->pafUnifiedSrcDensity != nullptr ) 5604 { 5605 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; 5606 if( dfDensity < SRC_DENSITY_THRESHOLD ) 5607 continue; 5608 } 5609 5610 /* ==================================================================== */ 5611 /* Loop processing each band. */ 5612 /* ==================================================================== */ 5613 5614 const GPtrDiff_t iDstOffset = iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize; 5615 5616 for( int iBand = 0; iBand < poWK->nBands; iBand++ ) 5617 { 5618 T value = 0; 5619 double dfBandDensity = 0.0; 5620 5621 /* -------------------------------------------------------------------- */ 5622 /* Collect the source value. */ 5623 /* -------------------------------------------------------------------- */ 5624 if( GWKGetPixelT(poWK, iBand, iSrcOffset, 5625 &dfBandDensity, &value) ) 5626 { 5627 if( dfBandDensity < 1.0 ) 5628 { 5629 if( dfBandDensity == 0.0 ) 5630 { 5631 // Do nothing. 5632 } 5633 else 5634 { 5635 // Let the general code take care of mixing. 5636 GWKSetPixelValueRealT( poWK, iBand, iDstOffset, 5637 dfBandDensity, value ); 5638 } 5639 } 5640 else 5641 { 5642 reinterpret_cast<T *>( 5643 poWK->papabyDstImage[iBand])[iDstOffset] = value; 5644 } 5645 } 5646 } 5647 5648 /* -------------------------------------------------------------------- */ 5649 /* Mark this pixel valid/opaque in the output. */ 5650 /* -------------------------------------------------------------------- */ 5651 GWKOverlayDensity( poWK, iDstOffset, dfDensity ); 5652 5653 if( poWK->panDstValid != nullptr ) 5654 { 5655 poWK->panDstValid[iDstOffset>>5] |= 5656 0x01 << (iDstOffset & 0x1f); 5657 } 5658 } /* Next iDstX */ 5659 5660 /* -------------------------------------------------------------------- */ 5661 /* Report progress to the user, and optionally cancel out. */ 5662 /* -------------------------------------------------------------------- */ 5663 if( psJob->pfnProgress && psJob->pfnProgress(psJob) ) 5664 break; 5665 } 5666 5667 /* -------------------------------------------------------------------- */ 5668 /* Cleanup and return. */ 5669 /* -------------------------------------------------------------------- */ 5670 CPLFree( padfX ); 5671 CPLFree( padfY ); 5672 CPLFree( padfZ ); 5673 CPLFree( pabSuccess ); 5674 } 5675 5676 static CPLErr GWKNearestByte( GDALWarpKernel *poWK ) 5677 { 5678 return GWKRun( poWK, "GWKNearestByte", GWKNearestThread<GByte> ); 5679 } 5680 5681 static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ) 5682 { 5683 return GWKRun( 5684 poWK, "GWKNearestNoMasksOrDstDensityOnlyShort", 5685 GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>); 5686 } 5687 5688 static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ) 5689 { 5690 return GWKRun( 5691 poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort", 5692 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, 5693 GRA_Bilinear>); 5694 } 5695 5696 static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort( GDALWarpKernel *poWK ) 5697 { 5698 return GWKRun( 5699 poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort", 5700 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, 5701 GRA_Bilinear>); 5702 } 5703 5704 static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ) 5705 { 5706 return GWKRun( 5707 poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat", 5708 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, 5709 GRA_Bilinear>); 5710 } 5711 5712 #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL 5713 5714 static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK ) 5715 { 5716 return GWKRun( 5717 poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble", 5718 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, 5719 GRA_Bilinear>); 5720 } 5721 #endif 5722 5723 static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ) 5724 { 5725 return GWKRun( 5726 poWK, "GWKCubicNoMasksOrDstDensityOnlyShort", 5727 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>); 5728 } 5729 5730 static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort( GDALWarpKernel *poWK ) 5731 { 5732 return GWKRun( 5733 poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort", 5734 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>); 5735 } 5736 5737 static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ) 5738 { 5739 return GWKRun( 5740 poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort", 5741 GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>); 5742 } 5743 5744 static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort( 5745 GDALWarpKernel *poWK ) 5746 { 5747 return GWKRun( 5748 poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort", 5749 GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>); 5750 } 5751 5752 static CPLErr GWKNearestShort( GDALWarpKernel *poWK ) 5753 { 5754 return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>); 5755 } 5756 5757 static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ) 5758 { 5759 return GWKRun( 5760 poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat", 5761 GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>); 5762 } 5763 5764 static CPLErr GWKNearestFloat( GDALWarpKernel *poWK ) 5765 { 5766 return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>); 5767 } 5768 5769 /************************************************************************/ 5770 /* GWKAverageOrMode() */ 5771 /* */ 5772 /************************************************************************/ 5773 5774 static void GWKAverageOrModeThread(void* pData); 5775 5776 static CPLErr GWKAverageOrMode( GDALWarpKernel *poWK ) 5777 { 5778 return GWKRun( poWK, "GWKAverageOrMode", GWKAverageOrModeThread ); 5779 } 5780 5781 // Overall logic based on GWKGeneralCaseThread(). 5782 static void GWKAverageOrModeThread( void* pData) 5783 { 5784 GWKJobStruct* psJob = static_cast<GWKJobStruct *>(pData); 5785 GDALWarpKernel *poWK = psJob->poWK; 5786 const int iYMin = psJob->iYMin; 5787 const int iYMax = psJob->iYMax; 5788 5789 const int nDstXSize = poWK->nDstXSize; 5790 const int nSrcXSize = poWK->nSrcXSize; 5791 const int nSrcYSize = poWK->nSrcYSize; 5792 5793 /* -------------------------------------------------------------------- */ 5794 /* Find out which algorithm to use (small optim.) */ 5795 /* -------------------------------------------------------------------- */ 5796 int nAlgo = 0; 5797 5798 // These vars only used with nAlgo == 3. 5799 int *panVals = nullptr; 5800 int nBins = 0; 5801 int nBinsOffset = 0; 5802 5803 // Only used with nAlgo = 2. 5804 float* pafRealVals = nullptr; 5805 float* pafImagVals = nullptr; 5806 int* panRealSums = nullptr; 5807 int* panImagSums = nullptr; 5808 5809 // Only used with nAlgo = 6. 5810 float quant = 0.5; 5811 5812 //To control array allocation only when data type is complex 5813 const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType)!=0; 5814 5815 if( poWK->eResample == GRA_Average ) 5816 { 5817 nAlgo = GWKAOM_Average; 5818 } 5819 else if( poWK->eResample == GRA_RMS ) 5820 { 5821 nAlgo = GWKAOM_RMS; 5822 } 5823 else if( poWK->eResample == GRA_Mode ) 5824 { 5825 // TODO check color table count > 256. 5826 if( poWK->eWorkingDataType == GDT_Byte || 5827 poWK->eWorkingDataType == GDT_UInt16 || 5828 poWK->eWorkingDataType == GDT_Int16 ) 5829 { 5830 nAlgo = GWKAOM_Imode; 5831 5832 // In the case of a paletted or non-paletted byte band, 5833 // Input values are between 0 and 255. 5834 if( poWK->eWorkingDataType == GDT_Byte ) 5835 { 5836 nBins = 256; 5837 } 5838 // In the case of Int16, input values are between -32768 and 32767. 5839 else if( poWK->eWorkingDataType == GDT_Int16 ) 5840 { 5841 nBins = 65536; 5842 nBinsOffset = 32768; 5843 } 5844 // In the case of UInt16, input values are between 0 and 65537. 5845 else if( poWK->eWorkingDataType == GDT_UInt16 ) 5846 { 5847 nBins = 65536; 5848 } 5849 panVals = 5850 static_cast<int *>(VSI_MALLOC_VERBOSE(nBins * sizeof(int))); 5851 if( panVals == nullptr ) 5852 return; 5853 } 5854 else 5855 { 5856 nAlgo = GWKAOM_Fmode; 5857 5858 if( nSrcXSize > 0 && nSrcYSize > 0 ) 5859 { 5860 pafRealVals = static_cast<float *>( 5861 VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float))); 5862 panRealSums = static_cast<int *>( 5863 VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(int))); 5864 if( pafRealVals == nullptr || panRealSums == nullptr ) 5865 { 5866 VSIFree(pafRealVals); 5867 VSIFree(panRealSums); 5868 return; 5869 } 5870 } 5871 } 5872 } 5873 else if( poWK->eResample == GRA_Max ) 5874 { 5875 nAlgo = GWKAOM_Max; 5876 } 5877 else if( poWK->eResample == GRA_Min ) 5878 { 5879 nAlgo = GWKAOM_Min; 5880 } 5881 else if( poWK->eResample == GRA_Med ) 5882 { 5883 nAlgo = GWKAOM_Quant; 5884 quant = 0.5; 5885 } 5886 else if( poWK->eResample == GRA_Q1 ) 5887 { 5888 nAlgo = GWKAOM_Quant; 5889 quant = 0.25; 5890 } 5891 else if( poWK->eResample == GRA_Q3 ) 5892 { 5893 nAlgo = GWKAOM_Quant; 5894 quant = 0.75; 5895 } 5896 else if( poWK->eResample == GRA_Sum ) 5897 { 5898 nAlgo = GWKAOM_Sum; 5899 } 5900 else 5901 { 5902 // Other resample algorithms not permitted here. 5903 CPLDebug("GDAL", 5904 "GDALWarpKernel():GWKAverageOrModeThread() ERROR, " 5905 "illegal resample" ); 5906 return; 5907 } 5908 5909 CPLDebug("GDAL", 5910 "GDALWarpKernel():GWKAverageOrModeThread() using algo %d", nAlgo); 5911 5912 /* -------------------------------------------------------------------- */ 5913 /* Allocate x,y,z coordinate arrays for transformation ... two */ 5914 /* scanlines worth of positions. */ 5915 /* -------------------------------------------------------------------- */ 5916 5917 double *padfX = 5918 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5919 double *padfY = 5920 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5921 double *padfZ = 5922 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5923 double *padfX2 = 5924 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5925 double *padfY2 = 5926 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5927 double *padfZ2 = 5928 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize)); 5929 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize)); 5930 int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize)); 5931 5932 const double dfSrcCoordPrecision = CPLAtof( 5933 CSLFetchNameValueDef(poWK->papszWarpOptions, 5934 "SRC_COORD_PRECISION", "0")); 5935 const double dfErrorThreshold = CPLAtof( 5936 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); 5937 5938 /* ==================================================================== */ 5939 /* Loop over output lines. */ 5940 /* ==================================================================== */ 5941 for( int iDstY = iYMin; iDstY < iYMax; iDstY++ ) 5942 { 5943 5944 /* -------------------------------------------------------------------- */ 5945 /* Setup points to transform to source image space. */ 5946 /* -------------------------------------------------------------------- */ 5947 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5948 { 5949 padfX[iDstX] = iDstX + poWK->nDstXOff; 5950 padfY[iDstX] = iDstY + poWK->nDstYOff; 5951 padfZ[iDstX] = 0.0; 5952 padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff; 5953 padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff; 5954 padfZ2[iDstX] = 0.0; 5955 } 5956 5957 /* -------------------------------------------------------------------- */ 5958 /* Transform the points from destination pixel/line coordinates */ 5959 /* to source pixel/line coordinates. */ 5960 /* -------------------------------------------------------------------- */ 5961 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, 5962 padfX, padfY, padfZ, pabSuccess ); 5963 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, 5964 padfX2, padfY2, padfZ2, pabSuccess2 ); 5965 5966 if( dfSrcCoordPrecision > 0.0 ) 5967 { 5968 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, 5969 pabSuccess, 5970 dfSrcCoordPrecision, 5971 dfErrorThreshold, 5972 poWK->pfnTransformer, 5973 psJob->pTransformerArg, 5974 poWK->nDstXOff, 5975 iDstY + poWK->nDstYOff); 5976 GWKRoundSourceCoordinates(nDstXSize, padfX2, padfY2, padfZ2, 5977 pabSuccess2, 5978 dfSrcCoordPrecision, 5979 dfErrorThreshold, 5980 poWK->pfnTransformer, 5981 psJob->pTransformerArg, 5982 1.0 + poWK->nDstXOff, 5983 iDstY + 1.0 + poWK->nDstYOff); 5984 } 5985 5986 /* ==================================================================== */ 5987 /* Loop over pixels in output scanline. */ 5988 /* ==================================================================== */ 5989 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) 5990 { 5991 GPtrDiff_t iSrcOffset = 0; 5992 double dfDensity = 1.0; 5993 bool bHasFoundDensity = false; 5994 5995 if( !pabSuccess[iDstX] || !pabSuccess2[iDstX] ) 5996 continue; 5997 5998 if( padfX[iDstX] < poWK->nSrcXOff - 1 || 5999 padfX2[iDstX] < poWK->nSrcXOff - 1 || 6000 padfY[iDstX] < poWK->nSrcYOff - 1 || 6001 padfY2[iDstX] < poWK->nSrcYOff -1 || 6002 padfX[iDstX] > nSrcXSize + poWK->nSrcXOff + 1 || 6003 padfX2[iDstX] > nSrcXSize + poWK->nSrcXOff + 1 || 6004 padfY[iDstX] > nSrcYSize + poWK->nSrcYOff + 1 || 6005 padfY2[iDstX] > nSrcYSize + poWK->nSrcYOff + 1 ) 6006 { 6007 continue; 6008 } 6009 6010 const GPtrDiff_t iDstOffset = iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize; 6011 6012 // Compute corners in source crs. 6013 6014 // The transformation might not have preserved ordering of 6015 // coordinates so do the necessary swapping (#5433). 6016 // NOTE: this is really an approximative fix. To do something 6017 // more precise we would for example need to compute the 6018 // transformation of coordinates in the 6019 // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source 6020 // coordinates, and take the bounding box of the got source 6021 // coordinates. 6022 const double dfXMin = std::min(padfX[iDstX],padfX2[iDstX]) - 6023 poWK->nSrcXOff; 6024 int iSrcXMin = 6025 std::max(static_cast<int>(floor(dfXMin + 1e-10)), 0); 6026 const double dfXMax = std::max(padfX[iDstX],padfX2[iDstX]) - 6027 poWK->nSrcXOff; 6028 int iSrcXMax = 6029 std::min(static_cast<int>(ceil(dfXMax - 1e-10)), nSrcXSize); 6030 const double dfYMin = std::min(padfY[iDstX],padfY2[iDstX]) - 6031 poWK->nSrcYOff; 6032 int iSrcYMin = 6033 std::max(static_cast<int>(floor(dfYMin + 1e-10)), 0); 6034 const double dfYMax = std::max(padfY[iDstX],padfY2[iDstX]) - 6035 poWK->nSrcYOff; 6036 int iSrcYMax = 6037 std::min(static_cast<int>(ceil(dfYMax - 1e-10)), nSrcYSize); 6038 6039 if( iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize ) 6040 iSrcXMax++; 6041 if( iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize ) 6042 iSrcYMax++; 6043 6044 /* ==================================================================== */ 6045 /* Loop processing each band. */ 6046 /* ==================================================================== */ 6047 6048 for( int iBand = 0; iBand < poWK->nBands; iBand++ ) 6049 { 6050 double dfBandDensity = 0.0; 6051 double dfValueReal = 0.0; 6052 double dfValueImag = 0.0; 6053 double dfValueRealTmp = 0.0; 6054 double dfValueImagTmp = 0.0; 6055 6056 /* -------------------------------------------------------------------- */ 6057 /* Collect the source value. */ 6058 /* -------------------------------------------------------------------- */ 6059 6060 // Loop over source lines and pixels - 3 possible algorithms. 6061 6062 #define COMPUTE_WEIGHT_Y(iSrcY) \ 6063 ((iSrcY == iSrcYMin) ? \ 6064 ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)): \ 6065 (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax): \ 6066 1.0) 6067 6068 #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \ 6069 ((iSrcX == iSrcXMin) ? \ 6070 ((iSrcXMin + 1 == iSrcXMax) ? dfWeightY : dfWeightY * (1 - (dfXMin - iSrcXMin))): \ 6071 (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)): \ 6072 dfWeightY) 6073 6074 // poWK->eResample == GRA_Average. 6075 if( nAlgo == GWKAOM_Average ) 6076 { 6077 double dfTotalReal = 0.0; 6078 double dfTotalImag = 0.0; 6079 double dfTotalWeight = 0.0; 6080 6081 // This code adapted from GDALDownsampleChunk32R_AverageT() 6082 // in gcore/overview.cpp. 6083 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) 6084 { 6085 const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY); 6086 iSrcOffset = iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 6087 for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++, iSrcOffset++ ) 6088 { 6089 if( poWK->panUnifiedSrcValid != nullptr 6090 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 6091 & (0x01 << (iSrcOffset & 0x1f))) ) 6092 { 6093 continue; 6094 } 6095 6096 if( GWKGetPixelValue( 6097 poWK, iBand, iSrcOffset, 6098 &dfBandDensity, &dfValueRealTmp, 6099 &dfValueImagTmp ) && 6100 dfBandDensity > BAND_DENSITY_THRESHOLD ) 6101 { 6102 const double dfWeight = COMPUTE_WEIGHT(iSrcX, dfWeightY); 6103 dfTotalWeight += dfWeight; 6104 dfTotalReal += dfValueRealTmp * dfWeight; 6105 if (bIsComplex) 6106 { 6107 dfTotalImag += dfValueImagTmp * dfWeight; 6108 } 6109 } 6110 } 6111 } 6112 6113 if( dfTotalWeight > 0 ) 6114 { 6115 dfValueReal = dfTotalReal / dfTotalWeight; 6116 if (bIsComplex) 6117 { 6118 dfValueImag = dfTotalImag / dfTotalWeight; 6119 } 6120 dfBandDensity = 1; 6121 bHasFoundDensity = true; 6122 } 6123 } // GRA_Average. 6124 // poWK->eResample == GRA_RMS. 6125 if( nAlgo == GWKAOM_RMS ) 6126 { 6127 double dfTotalReal = 0.0; 6128 double dfTotalImag = 0.0; 6129 double dfTotalWeight = 0.0; 6130 // This code adapted from GDALDownsampleChunk32R_AverageT() 6131 // in gcore/overview.cpp. 6132 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) 6133 { 6134 const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY); 6135 iSrcOffset = iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 6136 for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++, iSrcOffset++ ) 6137 { 6138 if( poWK->panUnifiedSrcValid != nullptr 6139 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 6140 & (0x01 << (iSrcOffset & 0x1f))) ) 6141 { 6142 continue; 6143 } 6144 6145 if( GWKGetPixelValue( 6146 poWK, iBand, iSrcOffset, 6147 &dfBandDensity, &dfValueRealTmp, 6148 &dfValueImagTmp ) && 6149 dfBandDensity > BAND_DENSITY_THRESHOLD ) 6150 { 6151 const double dfWeight = COMPUTE_WEIGHT(iSrcX, dfWeightY); 6152 dfTotalWeight += dfWeight; 6153 dfTotalReal += dfValueRealTmp * dfValueRealTmp * dfWeight; 6154 if (bIsComplex) 6155 dfTotalImag += dfValueImagTmp * dfValueImagTmp * dfWeight; 6156 } 6157 } 6158 } 6159 6160 if( dfTotalWeight > 0 ) 6161 { 6162 dfValueReal = sqrt( dfTotalReal / dfTotalWeight ); 6163 if (bIsComplex) 6164 dfValueImag = sqrt( dfTotalImag / dfTotalWeight ); 6165 6166 dfBandDensity = 1; 6167 bHasFoundDensity = true; 6168 } 6169 } // GRA_RMS. 6170 else if( nAlgo == GWKAOM_Sum ) 6171 // poWK->eResample == GRA_Sum 6172 { 6173 double dfTotalReal = 0.0; 6174 double dfTotalImag = 0.0; 6175 bool bFoundValid = false; 6176 6177 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) 6178 { 6179 const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY); 6180 iSrcOffset = iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 6181 for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++, iSrcOffset++ ) 6182 { 6183 if( poWK->panUnifiedSrcValid != nullptr 6184 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 6185 & (0x01 << (iSrcOffset & 0x1f))) ) 6186 { 6187 continue; 6188 } 6189 6190 if( GWKGetPixelValue( 6191 poWK, iBand, iSrcOffset, 6192 &dfBandDensity, &dfValueRealTmp, 6193 &dfValueImagTmp ) && 6194 dfBandDensity > BAND_DENSITY_THRESHOLD ) 6195 { 6196 const double dfWeight = COMPUTE_WEIGHT(iSrcX, dfWeightY); 6197 bFoundValid = true; 6198 dfTotalReal += dfValueRealTmp * dfWeight; 6199 if (bIsComplex) 6200 { 6201 dfTotalImag += dfValueImagTmp * dfWeight; 6202 } 6203 } 6204 } 6205 } 6206 6207 if( bFoundValid ) 6208 { 6209 dfValueReal = dfTotalReal; 6210 if (bIsComplex) 6211 { 6212 dfValueImag = dfTotalImag; 6213 } 6214 dfBandDensity = 1; 6215 bHasFoundDensity = true; 6216 } 6217 } // GRA_Sum. 6218 else if( nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode ) 6219 // poWK->eResample == GRA_Mode 6220 { 6221 // This code adapted from GDALDownsampleChunk32R_Mode() in 6222 // gcore/overview.cpp. 6223 if( nAlgo == GWKAOM_Fmode ) // int32 or float. 6224 { 6225 // Does it make sense it makes to run a 6226 // majority filter on floating point data? But, here it 6227 // is for the sake of compatibility. It won't look 6228 // right on RGB images by the nature of the filter. 6229 int iMaxInd = 0; 6230 int iMaxVal = -1; 6231 int i = 0; 6232 6233 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) 6234 { 6235 for( int iSrcX = iSrcXMin; 6236 iSrcX < iSrcXMax; 6237 iSrcX++ ) 6238 { 6239 iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 6240 6241 if( poWK->panUnifiedSrcValid != nullptr 6242 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 6243 & (0x01 << (iSrcOffset & 0x1f))) ) 6244 continue; 6245 6246 if( GWKGetPixelValue( 6247 poWK, iBand, iSrcOffset, 6248 &dfBandDensity, &dfValueRealTmp, 6249 &dfValueImagTmp ) && 6250 dfBandDensity > BAND_DENSITY_THRESHOLD ) 6251 { 6252 const float fVal = 6253 static_cast<float>(dfValueRealTmp); 6254 6255 // Check array for existing entry. 6256 for( i = 0; i < iMaxInd; ++i ) 6257 if( pafRealVals[i] == fVal 6258 && ++panRealSums[i] > panRealSums[iMaxVal] ) 6259 { 6260 iMaxVal = i; 6261 break; 6262 } 6263 6264 // Add to arr if entry not already there. 6265 if( i == iMaxInd ) 6266 { 6267 pafRealVals[iMaxInd] = fVal; 6268 panRealSums[iMaxInd] = 1; 6269 6270 if( iMaxVal < 0 ) 6271 iMaxVal = iMaxInd; 6272 6273 ++iMaxInd; 6274 } 6275 } 6276 } 6277 } 6278 6279 if( iMaxVal != -1 ) 6280 { 6281 dfValueReal = pafRealVals[iMaxVal]; 6282 dfBandDensity = 1; 6283 bHasFoundDensity = true; 6284 } 6285 } 6286 else // byte or int16. 6287 { 6288 int nMaxVal = 0; 6289 int iMaxInd = -1; 6290 6291 memset(panVals, 0, nBins*sizeof(int)); 6292 6293 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) 6294 { 6295 for( int iSrcX = iSrcXMin; 6296 iSrcX < iSrcXMax; 6297 iSrcX++ ) 6298 { 6299 iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 6300 6301 if( poWK->panUnifiedSrcValid != nullptr 6302 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 6303 & (0x01 << (iSrcOffset & 0x1f))) ) 6304 continue; 6305 6306 if( GWKGetPixelValue( 6307 poWK, iBand, iSrcOffset, 6308 &dfBandDensity, &dfValueRealTmp, 6309 &dfValueImagTmp ) && 6310 dfBandDensity > BAND_DENSITY_THRESHOLD ) 6311 { 6312 const int nVal = 6313 static_cast<int>(dfValueRealTmp); 6314 if( ++panVals[nVal+nBinsOffset] > nMaxVal ) 6315 { 6316 // Sum the density. 6317 // Is it the most common value so far? 6318 iMaxInd = nVal; 6319 nMaxVal = panVals[nVal+nBinsOffset]; 6320 } 6321 } 6322 } 6323 } 6324 6325 if( iMaxInd != -1 ) 6326 { 6327 dfValueReal = iMaxInd; 6328 dfBandDensity = 1; 6329 bHasFoundDensity = true; 6330 } 6331 } 6332 } // GRA_Mode. 6333 else if( nAlgo == GWKAOM_Max ) 6334 // poWK->eResample == GRA_Max. 6335 { 6336 bool bFoundValid = false; 6337 double dfTotalReal = std::numeric_limits<double>::lowest(); 6338 // This code adapted from nAlgo 1 method, GRA_Average. 6339 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) 6340 { 6341 for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ ) 6342 { 6343 iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 6344 6345 if( poWK->panUnifiedSrcValid != nullptr 6346 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 6347 & (0x01 << (iSrcOffset & 0x1f))) ) 6348 { 6349 continue; 6350 } 6351 6352 // Returns pixel value if it is not no data. 6353 if( GWKGetPixelValue( 6354 poWK, iBand, iSrcOffset, 6355 &dfBandDensity, &dfValueRealTmp, 6356 &dfValueImagTmp ) && 6357 dfBandDensity > BAND_DENSITY_THRESHOLD ) 6358 { 6359 bFoundValid = true; 6360 if( dfTotalReal < dfValueRealTmp ) 6361 { 6362 dfTotalReal = dfValueRealTmp; 6363 } 6364 } 6365 } 6366 } 6367 6368 if( bFoundValid ) 6369 { 6370 dfValueReal = dfTotalReal; 6371 dfBandDensity = 1; 6372 bHasFoundDensity = true; 6373 } 6374 } // GRA_Max. 6375 else if( nAlgo == GWKAOM_Min ) 6376 // poWK->eResample == GRA_Min. 6377 { 6378 bool bFoundValid = false; 6379 double dfTotalReal = std::numeric_limits<double>::max(); 6380 // This code adapted from nAlgo 1 method, GRA_Average. 6381 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) 6382 { 6383 for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ ) 6384 { 6385 iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 6386 6387 if( poWK->panUnifiedSrcValid != nullptr 6388 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 6389 & (0x01 << (iSrcOffset & 0x1f))) ) 6390 { 6391 continue; 6392 } 6393 6394 // Returns pixel value if it is not no data. 6395 if( GWKGetPixelValue( 6396 poWK, iBand, iSrcOffset, 6397 &dfBandDensity, &dfValueRealTmp, 6398 &dfValueImagTmp ) && 6399 dfBandDensity > BAND_DENSITY_THRESHOLD ) 6400 { 6401 bFoundValid = true; 6402 if( dfTotalReal > dfValueRealTmp ) 6403 { 6404 dfTotalReal = dfValueRealTmp; 6405 } 6406 } 6407 } 6408 } 6409 6410 if( bFoundValid ) 6411 { 6412 dfValueReal = dfTotalReal; 6413 dfBandDensity = 1; 6414 bHasFoundDensity = true; 6415 } 6416 } // GRA_Min. 6417 else if( nAlgo == GWKAOM_Quant ) 6418 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3. 6419 { 6420 bool bFoundValid = false; 6421 std::vector<double> dfRealValuesTmp; 6422 6423 // This code adapted from nAlgo 1 method, GRA_Average. 6424 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) 6425 { 6426 for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ ) 6427 { 6428 iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize; 6429 6430 if( poWK->panUnifiedSrcValid != nullptr 6431 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] 6432 & (0x01 << (iSrcOffset & 0x1f))) ) 6433 { 6434 continue; 6435 } 6436 6437 // Returns pixel value if it is not no data. 6438 if( GWKGetPixelValue( 6439 poWK, iBand, iSrcOffset, 6440 &dfBandDensity, &dfValueRealTmp, 6441 &dfValueImagTmp ) && 6442 dfBandDensity > BAND_DENSITY_THRESHOLD ) 6443 { 6444 bFoundValid = true; 6445 dfRealValuesTmp.push_back(dfValueRealTmp); 6446 } 6447 } 6448 } 6449 6450 if( bFoundValid ) 6451 { 6452 std::sort(dfRealValuesTmp.begin(), dfRealValuesTmp.end()); 6453 int quantIdx = static_cast<int>( 6454 std::ceil(quant * dfRealValuesTmp.size() - 1)); 6455 dfValueReal = dfRealValuesTmp[quantIdx]; 6456 6457 dfBandDensity = 1; 6458 bHasFoundDensity = true; 6459 dfRealValuesTmp.clear(); 6460 } 6461 } // Quantile. 6462 6463 /* -------------------------------------------------------------------- */ 6464 /* We have a computed value from the source. Now apply it to */ 6465 /* the destination pixel. */ 6466 /* -------------------------------------------------------------------- */ 6467 if( bHasFoundDensity ) 6468 { 6469 // TODO: Should we compute dfBandDensity in fct of 6470 // nCount/nCount2, or use as a threshold to set the dest 6471 // value? 6472 // dfBandDensity = (float) nCount / nCount2; 6473 // if( (float) nCount / nCount2 > 0.1 ) 6474 // or fix gdalwarp crop_to_cutline to crop partially 6475 // overlapping pixels. 6476 GWKSetPixelValue( poWK, iBand, iDstOffset, 6477 dfBandDensity, 6478 dfValueReal, dfValueImag ); 6479 } 6480 } 6481 6482 if( !bHasFoundDensity ) 6483 continue; 6484 6485 /* -------------------------------------------------------------------- */ 6486 /* Update destination density/validity masks. */ 6487 /* -------------------------------------------------------------------- */ 6488 GWKOverlayDensity( poWK, iDstOffset, dfDensity ); 6489 6490 if( poWK->panDstValid != nullptr ) 6491 { 6492 poWK->panDstValid[iDstOffset>>5] |= 6493 0x01 << (iDstOffset & 0x1f); 6494 } 6495 } /* Next iDstX */ 6496 6497 /* -------------------------------------------------------------------- */ 6498 /* Report progress to the user, and optionally cancel out. */ 6499 /* -------------------------------------------------------------------- */ 6500 if( psJob->pfnProgress && psJob->pfnProgress(psJob) ) 6501 break; 6502 } 6503 6504 /* -------------------------------------------------------------------- */ 6505 /* Cleanup and return. */ 6506 /* -------------------------------------------------------------------- */ 6507 CPLFree( padfX ); 6508 CPLFree( padfY ); 6509 CPLFree( padfZ ); 6510 CPLFree( padfX2 ); 6511 CPLFree( padfY2 ); 6512 CPLFree( padfZ2 ); 6513 CPLFree( pabSuccess ); 6514 CPLFree( pabSuccess2 ); 6515 VSIFree( panVals ); 6516 VSIFree(pafRealVals); 6517 VSIFree(panRealSums); 6518 if (bIsComplex) 6519 { 6520 VSIFree(pafImagVals); 6521 VSIFree(panImagSums); 6522 } 6523 } 6524