1 /******************************************************************************
2  *
3  * Project:  GDAL Core
4  * Purpose:  Contains default implementation of GDALRasterBand::IRasterIO()
5  *           and supporting functions of broader utility.
6  * Author:   Frank Warmerdam, warmerdam@pobox.com
7  *
8  ******************************************************************************
9  * Copyright (c) 1998, Frank Warmerdam
10  * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a
13  * copy of this software and associated documentation files (the "Software"),
14  * to deal in the Software without restriction, including without limitation
15  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
16  * and/or sell copies of the Software, and to permit persons to whom the
17  * Software is furnished to do so, subject to the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included
20  * in all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
23  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  ****************************************************************************/
30 
31 #include "cpl_port.h"
32 #include "gdal.h"
33 #include "gdal_priv.h"
34 
35 #include <climits>
36 #include <cmath>
37 #include <cstddef>
38 #include <cstdio>
39 #include <cstdlib>
40 #include <cstring>
41 
42 #include <algorithm>
43 #include <limits>
44 #include <stdexcept>
45 
46 #include "cpl_conv.h"
47 #include "cpl_cpu_features.h"
48 #include "cpl_error.h"
49 #include "cpl_progress.h"
50 #include "cpl_string.h"
51 #include "cpl_vsi.h"
52 #include "gdal_priv_templates.hpp"
53 #include "gdal_vrt.h"
54 #include "gdalwarper.h"
55 #include "memdataset.h"
56 #include "vrtdataset.h"
57 
58 CPL_CVSID("$Id: rasterio.cpp 1c4378ccd440f5578f86f5539d61d05a9ad2089f 2021-10-22 09:18:07 +0200 Even Rouault $")
59 
60 static void GDALFastCopyByte( const GByte * CPL_RESTRICT pSrcData,
61                               int nSrcPixelStride,
62                               GByte * CPL_RESTRICT pDstData,
63                               int nDstPixelStride,
64                               GPtrDiff_t nWordCount );
65 
66 /************************************************************************/
67 /*                    DownsamplingIntegerXFactor()                      */
68 /************************************************************************/
69 
70 template<bool bSameDataType, int DATA_TYPE_SIZE> static
DownsamplingIntegerXFactor(GDALRasterBand * poBand,int iSrcX,int nSrcXInc,GPtrDiff_t iSrcOffsetCst,GByte * CPL_RESTRICT pabyDstData,int nPixelSpace,int nBufXSize,GDALDataType eDataType,GDALDataType eBufType,int & nStartBlockX,int nBlockXSize,GDALRasterBlock * & poBlock,int nLBlockY)71 bool DownsamplingIntegerXFactor(GDALRasterBand* poBand,
72                          int iSrcX,
73                          int nSrcXInc,
74                          GPtrDiff_t iSrcOffsetCst,
75                          GByte* CPL_RESTRICT pabyDstData,
76                          int nPixelSpace,
77                          int nBufXSize,
78                          GDALDataType eDataType,
79                          GDALDataType eBufType,
80                          int& nStartBlockX,
81                          int nBlockXSize,
82                          GDALRasterBlock*& poBlock,
83                          int nLBlockY)
84 {
85     const int nBandDataSize = bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes( eDataType );
86     int nOuterLoopIters = nBufXSize - 1;
87     const int nIncSrcOffset = nSrcXInc * nBandDataSize;
88     const GByte* CPL_RESTRICT pabySrcData;
89     int nEndBlockX = nBlockXSize + nStartBlockX;
90 
91     if( iSrcX < nEndBlockX )
92     {
93         goto no_reload_block;
94     }
95     goto reload_block;
96 
97     // Don't do the last iteration in the loop, as iSrcX might go beyond nRasterXSize - 1
98     while( --nOuterLoopIters >= 1 )
99     {
100         iSrcX += nSrcXInc;
101         pabySrcData += nIncSrcOffset;
102         pabyDstData += nPixelSpace;
103 
104 /* -------------------------------------------------------------------- */
105 /*      Ensure we have the appropriate block loaded.                    */
106 /* -------------------------------------------------------------------- */
107         if( iSrcX >= nEndBlockX )
108         {
109 reload_block:
110             {
111                 const int nLBlockX = iSrcX / nBlockXSize;
112                 nStartBlockX = nLBlockX * nBlockXSize;
113                 nEndBlockX = nStartBlockX + nBlockXSize;
114 
115                 if( poBlock != nullptr )
116                     poBlock->DropLock();
117 
118                 poBlock = poBand->GetLockedBlockRef( nLBlockX, nLBlockY, FALSE );
119                 if( poBlock == nullptr )
120                 {
121                     return false;
122                 }
123             }
124 
125 no_reload_block:
126             const GByte* pabySrcBlock = static_cast<const GByte *>(poBlock->GetDataRef());
127             GPtrDiff_t iSrcOffset = (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
128             pabySrcData = pabySrcBlock + iSrcOffset;
129         }
130 
131 /* -------------------------------------------------------------------- */
132 /*      Copy the maximum run of pixels.                                 */
133 /* -------------------------------------------------------------------- */
134 
135         const int nIters = std::min(
136             (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
137         if( bSameDataType )
138         {
139             memcpy( pabyDstData, pabySrcData, nBandDataSize );
140             if( nIters > 1 )
141             {
142                 if( DATA_TYPE_SIZE == 1 )
143                 {
144                     pabySrcData += nIncSrcOffset;
145                     pabyDstData += nPixelSpace;
146                     GDALFastCopyByte( pabySrcData, nIncSrcOffset,
147                                       pabyDstData, nPixelSpace,
148                                       nIters - 1 );
149                     pabySrcData += static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
150                     pabyDstData += static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
151                 }
152                 else
153                 {
154                     for( int i = 0; i < nIters - 1; i ++)
155                     {
156                         pabySrcData += nIncSrcOffset;
157                         pabyDstData += nPixelSpace;
158                         memcpy( pabyDstData, pabySrcData, nBandDataSize );
159                     }
160                 }
161                 iSrcX += nSrcXInc * (nIters - 1);
162                 nOuterLoopIters -= nIters - 1;
163             }
164         }
165         else
166         {
167             // Type to type conversion ...
168             GDALCopyWords(
169                 pabySrcData, eDataType, nIncSrcOffset,
170                 pabyDstData, eBufType, nPixelSpace,
171                 std::max(1, nIters) );
172             if( nIters > 1 )
173             {
174                 pabySrcData += static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
175                 pabyDstData += static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
176                 iSrcX += nSrcXInc * (nIters - 1);
177                 nOuterLoopIters -= nIters - 1;
178             }
179         }
180     }
181 
182     // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
183     if( nOuterLoopIters == 0 )
184     {
185         const int nRasterXSize = poBand->GetXSize();
186         iSrcX = static_cast<int>(
187             std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
188             static_cast<GInt64>(nRasterXSize - 1)));
189         pabyDstData += nPixelSpace;
190         if( iSrcX < nEndBlockX )
191         {
192             goto no_reload_block;
193         }
194         goto reload_block;
195     }
196     return true;
197 }
198 
199 /************************************************************************/
200 /*                             IRasterIO()                              */
201 /*                                                                      */
202 /*      Default internal implementation of RasterIO() ... utilizes      */
203 /*      the Block access methods to satisfy the request.  This would    */
204 /*      normally only be overridden by formats with overviews.          */
205 /************************************************************************/
206 
IRasterIO(GDALRWFlag eRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,GSpacing nPixelSpace,GSpacing nLineSpace,GDALRasterIOExtraArg * psExtraArg)207 CPLErr GDALRasterBand::IRasterIO( GDALRWFlag eRWFlag,
208                                   int nXOff, int nYOff, int nXSize, int nYSize,
209                                   void * pData, int nBufXSize, int nBufYSize,
210                                   GDALDataType eBufType,
211                                   GSpacing nPixelSpace, GSpacing nLineSpace,
212                                   GDALRasterIOExtraArg* psExtraArg )
213 
214 {
215     if( eRWFlag == GF_Write && eFlushBlockErr != CE_None )
216     {
217         CPLError(eFlushBlockErr, CPLE_AppDefined,
218                  "An error occurred while writing a dirty block "
219                  "from GDALRasterBand::IRasterIO");
220         CPLErr eErr = eFlushBlockErr;
221         eFlushBlockErr = CE_None;
222         return eErr;
223     }
224     if( nBlockXSize <= 0 || nBlockYSize <= 0 )
225     {
226         CPLError( CE_Failure, CPLE_AppDefined, "Invalid block size" );
227         return CE_Failure;
228     }
229 
230     const int nBandDataSize = GDALGetDataTypeSizeBytes( eDataType );
231     const int nBufDataSize = GDALGetDataTypeSizeBytes( eBufType );
232     GByte dummyBlock[2] = {0, 0};
233     GByte *pabySrcBlock = dummyBlock; /* to avoid Coverity warning about nullptr dereference */
234     GDALRasterBlock *poBlock = nullptr;
235     const bool bUseIntegerRequestCoords =
236            (!psExtraArg->bFloatingPointWindowValidity ||
237             (nXOff == psExtraArg->dfXOff &&
238              nYOff == psExtraArg->dfYOff &&
239              nXSize == psExtraArg->dfXSize &&
240              nYSize == psExtraArg->dfYSize));
241 
242 /* ==================================================================== */
243 /*      A common case is the data requested with the destination        */
244 /*      is packed, and the block width is the raster width.             */
245 /* ==================================================================== */
246     if( nPixelSpace == nBufDataSize
247         && nLineSpace == nPixelSpace * nXSize
248         && nBlockXSize == GetXSize()
249         && nBufXSize == nXSize
250         && nBufYSize == nYSize
251         && bUseIntegerRequestCoords )
252     {
253         CPLErr eErr = CE_None;
254         int nLBlockY = -1;
255 
256         for( int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++ )
257         {
258             const int iSrcY = iBufYOff + nYOff;
259 
260             if( iSrcY < nLBlockY * nBlockYSize
261                 || iSrcY - nBlockYSize >= nLBlockY * nBlockYSize )
262             {
263                 nLBlockY = iSrcY / nBlockYSize;
264                 bool bJustInitialize =
265                     eRWFlag == GF_Write
266                     && nXOff == 0 && nXSize == nBlockXSize
267                     && nYOff <= nLBlockY * nBlockYSize
268                     && nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
269 
270                 // Is this a partial tile at right and/or bottom edges of
271                 // the raster, and that is going to be completely written?
272                 // If so, do not load it from storage, but zero it so that
273                 // the content outsize of the validity area is initialized.
274                 bool bMemZeroBuffer = false;
275                 if( eRWFlag == GF_Write && !bJustInitialize &&
276                     nXOff == 0 && nXSize == nBlockXSize &&
277                     nYOff <= nLBlockY * nBlockYSize &&
278                     nYOff + nYSize == GetYSize() &&
279                     nLBlockY * nBlockYSize > GetYSize() - nBlockYSize )
280                 {
281                     bJustInitialize = true;
282                     bMemZeroBuffer = true;
283                 }
284 
285                 if( poBlock )
286                     poBlock->DropLock();
287 
288                 const GUInt32 nErrorCounter = CPLGetErrorCounter();
289                 poBlock = GetLockedBlockRef( 0, nLBlockY, bJustInitialize );
290                 if( poBlock == nullptr )
291                 {
292                     if( strstr(CPLGetLastErrorMsg(), "IReadBlock failed") == nullptr )
293                     {
294                         CPLError( CE_Failure, CPLE_AppDefined,
295                             "GetBlockRef failed at X block offset %d, "
296                             "Y block offset %d%s",
297                             0, nLBlockY,
298                             (nErrorCounter != CPLGetErrorCounter()) ?
299                                     CPLSPrintf(": %s", CPLGetLastErrorMsg()) : "");
300                     }
301                     eErr = CE_Failure;
302                     break;
303                 }
304 
305                 if( eRWFlag == GF_Write )
306                     poBlock->MarkDirty();
307 
308                 pabySrcBlock =  static_cast<GByte *>(poBlock->GetDataRef());
309                 if( bMemZeroBuffer )
310                 {
311                     memset(pabySrcBlock, 0,
312                            static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize * nBlockYSize);
313                 }
314             }
315 
316             const auto nSrcByteOffset =
317                 (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) * nBlockXSize + nXOff)
318                 * nBandDataSize;
319 
320             if( eDataType == eBufType )
321             {
322                 if( eRWFlag == GF_Read )
323                     memcpy(
324                         static_cast<GByte *>(pData)
325                         + static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
326                         pabySrcBlock + nSrcByteOffset,
327                         static_cast<size_t>(nLineSpace) );
328                 else
329                     memcpy(
330                         pabySrcBlock + nSrcByteOffset,
331                         static_cast<GByte *>(pData)
332                         + static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
333                         static_cast<size_t>(nLineSpace) );
334             }
335             else
336             {
337                 // Type to type conversion.
338 
339                 if( eRWFlag == GF_Read )
340                     GDALCopyWords(
341                         pabySrcBlock + nSrcByteOffset,
342                         eDataType, nBandDataSize,
343                         static_cast<GByte *>(pData)
344                         + static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
345                         eBufType,
346                         static_cast<int>(nPixelSpace), nBufXSize );
347                 else
348                     GDALCopyWords(
349                         static_cast<GByte *>(pData)
350                         + static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
351                         eBufType, static_cast<int>(nPixelSpace),
352                         pabySrcBlock + nSrcByteOffset,
353                         eDataType, nBandDataSize, nBufXSize );
354             }
355 
356             if( psExtraArg->pfnProgress != nullptr &&
357                 !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
358                                          psExtraArg->pProgressData) )
359             {
360                 eErr = CE_Failure;
361                 break;
362             }
363         }
364 
365         if( poBlock )
366             poBlock->DropLock();
367 
368         return eErr;
369     }
370 
371 /* ==================================================================== */
372 /*      Do we have overviews that would be appropriate to satisfy       */
373 /*      this request?                                                   */
374 /* ==================================================================== */
375     if( (nBufXSize < nXSize || nBufYSize < nYSize)
376         && GetOverviewCount() > 0 && eRWFlag == GF_Read )
377     {
378         GDALRasterIOExtraArg sExtraArg;
379         GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
380 
381         const int nOverview =
382             GDALBandGetBestOverviewLevel2( this, nXOff, nYOff, nXSize, nYSize,
383                                            nBufXSize, nBufYSize, &sExtraArg );
384         if (nOverview >= 0)
385         {
386             GDALRasterBand* poOverviewBand = GetOverview(nOverview);
387             if (poOverviewBand == nullptr)
388                 return CE_Failure;
389 
390             return poOverviewBand->RasterIO(
391                 eRWFlag, nXOff, nYOff, nXSize, nYSize,
392                 pData, nBufXSize, nBufYSize, eBufType,
393                 nPixelSpace, nLineSpace, &sExtraArg );
394         }
395     }
396 
397     if( eRWFlag == GF_Read &&
398         nBufXSize < nXSize / 100 && nBufYSize < nYSize / 100 &&
399         nPixelSpace == nBufDataSize &&
400         nLineSpace == nPixelSpace * nBufXSize &&
401         CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")) )
402     {
403         memset( pData, 0, static_cast<size_t>(nLineSpace * nBufYSize) );
404         return CE_None;
405     }
406 
407 /* ==================================================================== */
408 /*      The second case when we don't need subsample data but likely    */
409 /*      need data type conversion.                                      */
410 /* ==================================================================== */
411     if ( // nPixelSpace == nBufDataSize &&
412          nXSize == nBufXSize
413          && nYSize == nBufYSize
414          && bUseIntegerRequestCoords )
415     {
416 #if DEBUG_VERBOSE
417         printf( "IRasterIO(%d,%d,%d,%d) rw=%d case 2\n",/*ok*/
418                 nXOff, nYOff, nXSize, nYSize,
419                 static_cast<int>(eRWFlag) );
420 #endif
421 
422 /* -------------------------------------------------------------------- */
423 /*      Loop over buffer computing source locations.                    */
424 /* -------------------------------------------------------------------- */
425         // Calculate starting values out of loop
426         const int nLBlockXStart = nXOff / nBlockXSize;
427         const int nXSpanEnd = nBufXSize + nXOff;
428 
429         int nYInc = 0;
430         for( int iBufYOff = 0, iSrcY = nYOff;
431              iBufYOff < nBufYSize;
432              iBufYOff += nYInc, iSrcY += nYInc )
433         {
434             GPtrDiff_t iSrcOffset = 0;
435             int nXSpan = 0;
436 
437             GPtrDiff_t iBufOffset =
438                 static_cast<GPtrDiff_t>(iBufYOff) *
439                 static_cast<GPtrDiff_t>(nLineSpace);
440             int nLBlockY = iSrcY / nBlockYSize;
441             int nLBlockX = nLBlockXStart;
442             int iSrcX = nXOff;
443             while( iSrcX < nXSpanEnd )
444             {
445                 nXSpan = nLBlockX * nBlockXSize;
446                 if( nXSpan < INT_MAX - nBlockXSize )
447                     nXSpan += nBlockXSize;
448                 else
449                     nXSpan = INT_MAX;
450                 const int nXRight = nXSpan;
451                 nXSpan = ( nXSpan < nXSpanEnd ? nXSpan:nXSpanEnd ) - iSrcX;
452                 const size_t nXSpanSize = nXSpan * static_cast<size_t>(nPixelSpace);
453 
454                 bool bJustInitialize =
455                     eRWFlag == GF_Write
456                     && nYOff <= nLBlockY * nBlockYSize
457                     && nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize
458                     && nXOff <= nLBlockX * nBlockXSize
459                     && nXOff + nXSize >= nXRight;
460 
461                 // Is this a partial tile at right and/or bottom edges of
462                 // the raster, and that is going to be completely written?
463                 // If so, do not load it from storage, but zero it so that
464                 // the content outsize of the validity area is initialized.
465                 bool bMemZeroBuffer = false;
466                 if( eRWFlag == GF_Write && !bJustInitialize &&
467                     nXOff <= nLBlockX * nBlockXSize &&
468                     nYOff <= nLBlockY * nBlockYSize &&
469                     (nXOff + nXSize >= nXRight ||
470                      (nXOff + nXSize == GetXSize() &&
471                       nXRight > GetXSize())) &&
472                     (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
473                      (nYOff + nYSize == GetYSize() &&
474                       nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)) )
475                 {
476                     bJustInitialize = true;
477                     bMemZeroBuffer = true;
478                 }
479 
480 /* -------------------------------------------------------------------- */
481 /*      Ensure we have the appropriate block loaded.                    */
482 /* -------------------------------------------------------------------- */
483                 const GUInt32 nErrorCounter = CPLGetErrorCounter();
484                 poBlock = GetLockedBlockRef( nLBlockX, nLBlockY,
485                                              bJustInitialize );
486                 if( !poBlock )
487                 {
488                     if( strstr(CPLGetLastErrorMsg(), "IReadBlock failed") == nullptr )
489                     {
490                         CPLError( CE_Failure, CPLE_AppDefined,
491                                 "GetBlockRef failed at X block offset %d, "
492                                 "Y block offset %d%s",
493                                 nLBlockX, nLBlockY,
494                                 (nErrorCounter != CPLGetErrorCounter()) ?
495                                     CPLSPrintf(": %s", CPLGetLastErrorMsg()) : "");
496                     }
497                     return( CE_Failure );
498                 }
499 
500                 if( eRWFlag == GF_Write )
501                     poBlock->MarkDirty();
502 
503                 pabySrcBlock =  static_cast<GByte *>(poBlock->GetDataRef());
504                 if( bMemZeroBuffer )
505                 {
506                     memset(pabySrcBlock, 0,
507                            static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize * nBlockYSize);
508                 }
509 /* -------------------------------------------------------------------- */
510 /*      Copy over this chunk of data.                                   */
511 /* -------------------------------------------------------------------- */
512                 iSrcOffset =
513                     (static_cast<GPtrDiff_t>(iSrcX)
514                      - static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize)
515                      + (static_cast<GPtrDiff_t>(iSrcY)
516                        - static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize)
517                      * nBlockXSize)
518                     * nBandDataSize;
519                 // Fill up as many rows as possible for the loaded block.
520                 const int kmax = std::min(
521                     nBlockYSize - (iSrcY % nBlockYSize), nBufYSize - iBufYOff );
522                 for(int k=0; k<kmax;k++)
523                 {
524                     if( eDataType == eBufType
525                         && nPixelSpace == nBufDataSize )
526                     {
527                         if( eRWFlag == GF_Read )
528                             memcpy( static_cast<GByte *>(pData) + iBufOffset
529                                     + static_cast<GPtrDiff_t>(k) * nLineSpace,
530                                     pabySrcBlock + iSrcOffset,
531                                     nXSpanSize );
532                         else
533                             memcpy( pabySrcBlock + iSrcOffset,
534                                     static_cast<GByte *>(pData) + iBufOffset
535                                     + static_cast<GPtrDiff_t>(k) * nLineSpace,
536                                     nXSpanSize );
537                     }
538                     else
539                     {
540                         /* type to type conversion */
541                         if( eRWFlag == GF_Read )
542                             GDALCopyWords(
543                                 pabySrcBlock + iSrcOffset,
544                                 eDataType, nBandDataSize,
545                                 static_cast<GByte *>(pData) + iBufOffset
546                                 + static_cast<GPtrDiff_t>(k) * nLineSpace,
547                                 eBufType, static_cast<int>(nPixelSpace),
548                                 nXSpan );
549                         else
550                             GDALCopyWords(
551                                 static_cast<GByte *>(pData) + iBufOffset +
552                                 static_cast<GPtrDiff_t>(k) * nLineSpace,
553                                 eBufType, static_cast<int>(nPixelSpace),
554                                 pabySrcBlock + iSrcOffset,
555                                 eDataType, nBandDataSize, nXSpan );
556                     }
557 
558                     iSrcOffset += nBlockXSize * nBandDataSize;
559                 }
560 
561                 iBufOffset = CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
562                 nLBlockX++;
563                 iSrcX+=nXSpan;
564 
565                 poBlock->DropLock();
566                 poBlock = nullptr;
567             }
568 
569             /* Compute the increment to go on a block boundary */
570             nYInc = nBlockYSize - (iSrcY % nBlockYSize);
571 
572             if( psExtraArg->pfnProgress != nullptr &&
573                 !psExtraArg->pfnProgress(
574                     1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
575                     psExtraArg->pProgressData) )
576             {
577                 return CE_Failure;
578             }
579         }
580 
581         return CE_None;
582     }
583 
584 /* ==================================================================== */
585 /*      Loop reading required source blocks to satisfy output           */
586 /*      request.  This is the most general implementation.              */
587 /* ==================================================================== */
588 
589     double dfXOff = nXOff;
590     double dfYOff = nYOff;
591     double dfXSize = nXSize;
592     double dfYSize = nYSize;
593     if( psExtraArg->bFloatingPointWindowValidity )
594     {
595         dfXOff = psExtraArg->dfXOff;
596         dfYOff = psExtraArg->dfYOff;
597         dfXSize = psExtraArg->dfXSize;
598         dfYSize = psExtraArg->dfYSize;
599     }
600 
601 /* -------------------------------------------------------------------- */
602 /*      Compute stepping increment.                                     */
603 /* -------------------------------------------------------------------- */
604     const double dfSrcXInc = dfXSize / static_cast<double>( nBufXSize );
605     const double dfSrcYInc = dfYSize / static_cast<double>( nBufYSize );
606     CPLErr eErr = CE_None;
607 
608     if (eRWFlag == GF_Write)
609     {
610 /* -------------------------------------------------------------------- */
611 /*    Write case                                                        */
612 /*    Loop over raster window computing source locations in the buffer. */
613 /* -------------------------------------------------------------------- */
614         GByte* pabyDstBlock = nullptr;
615         int nLBlockX = -1;
616         int nLBlockY = -1;
617 
618         for( int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY ++)
619         {
620             GPtrDiff_t iBufOffset = 0;
621             GPtrDiff_t iDstOffset = 0;
622             const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
623 
624             for( int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX ++)
625             {
626                 const int iBufXOff = static_cast<int>((iDstX - nXOff) / dfSrcXInc);
627                 iBufOffset =
628                     static_cast<GPtrDiff_t>(iBufYOff)
629                     * static_cast<GPtrDiff_t>(nLineSpace)
630                     + iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
631 
632                 // FIXME: this code likely doesn't work if the dirty block gets
633                 // flushed to disk before being completely written.
634                 // In the meantime, bJustInitialize should probably be set to
635                 // FALSE even if it is not ideal performance wise, and for
636                 // lossy compression.
637 
638     /* -------------------------------------------------------------------- */
639     /*      Ensure we have the appropriate block loaded.                    */
640     /* -------------------------------------------------------------------- */
641                 if( iDstX < nLBlockX * nBlockXSize
642                     || iDstX - nBlockXSize >= nLBlockX * nBlockXSize
643                     || iDstY < nLBlockY * nBlockYSize
644                     || iDstY - nBlockYSize >= nLBlockY * nBlockYSize )
645                 {
646                     nLBlockX = iDstX / nBlockXSize;
647                     nLBlockY = iDstY / nBlockYSize;
648 
649                     const bool bJustInitialize =
650                         nYOff <= nLBlockY * nBlockYSize
651                         && nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize
652                         && nXOff <= nLBlockX * nBlockXSize
653                         && nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
654                     /*bool bMemZeroBuffer = FALSE;
655                     if( !bJustInitialize &&
656                         nXOff <= nLBlockX * nBlockXSize &&
657                         nYOff <= nLBlockY * nBlockYSize &&
658                         (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
659                          (nXOff + nXSize == GetXSize() &&
660                          (nLBlockX+1) * nBlockXSize > GetXSize())) &&
661                         (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
662                          (nYOff + nYSize == GetYSize() &&
663                          (nLBlockY+1) * nBlockYSize > GetYSize())) )
664                     {
665                         bJustInitialize = TRUE;
666                         bMemZeroBuffer = TRUE;
667                     }*/
668                     if( poBlock != nullptr )
669                         poBlock->DropLock();
670 
671                     poBlock = GetLockedBlockRef( nLBlockX, nLBlockY,
672                                                  bJustInitialize );
673                     if( poBlock == nullptr )
674                     {
675                         return( CE_Failure );
676                     }
677 
678                     poBlock->MarkDirty();
679 
680                     pabyDstBlock =  static_cast<GByte *>(poBlock->GetDataRef());
681                     /*if( bMemZeroBuffer )
682                     {
683                         memset(pabyDstBlock, 0,
684                             static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize * nBlockYSize);
685                     }*/
686                 }
687 
688                 // To make Coverity happy. Should not happen by design.
689                 if( pabyDstBlock == nullptr )
690                 {
691                     CPLAssert(false);
692                     eErr = CE_Failure;
693                     break;
694                 }
695 
696     /* -------------------------------------------------------------------- */
697     /*      Copy over this pixel of data.                                   */
698     /* -------------------------------------------------------------------- */
699                 iDstOffset =
700                     (static_cast<GPtrDiff_t>(iDstX)
701                      - static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize
702                      + (static_cast<GPtrDiff_t>(iDstY)
703                         - static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize)
704                      * nBlockXSize )
705                     * nBandDataSize;
706 
707                 if( eDataType == eBufType )
708                 {
709                     memcpy(
710                         pabyDstBlock + iDstOffset,
711                         static_cast<GByte *>(pData) + iBufOffset,
712                         nBandDataSize );
713                 }
714                 else
715                 {
716                     /* type to type conversion ... ouch, this is expensive way
717                     of handling single words */
718 
719                     GDALCopyWords(
720                         static_cast<GByte *>(pData) + iBufOffset, eBufType, 0,
721                         pabyDstBlock + iDstOffset, eDataType, 0,
722                         1 );
723                 }
724             }
725 
726             if( psExtraArg->pfnProgress != nullptr &&
727                 !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
728                                          psExtraArg->pProgressData) )
729             {
730                 eErr = CE_Failure;
731                 break;
732             }
733         }
734     }
735     else
736     {
737         if( psExtraArg->eResampleAlg != GRIORA_NearestNeighbour )
738         {
739             if( (psExtraArg->eResampleAlg == GRIORA_Cubic ||
740                  psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
741                  psExtraArg->eResampleAlg == GRIORA_Bilinear ||
742                  psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
743                 GetColorTable() != nullptr )
744             {
745                 CPLError(CE_Warning, CPLE_NotSupported,
746                          "Resampling method not supported on paletted band. "
747                          "Falling back to nearest neighbour");
748             }
749             else if( psExtraArg->eResampleAlg == GRIORA_Gauss &&
750                      GDALDataTypeIsComplex( eDataType ) )
751             {
752                 CPLError(
753                     CE_Warning, CPLE_NotSupported,
754                     "Resampling method not supported on complex data type "
755                     "band. Falling back to nearest neighbour");
756             }
757             else
758             {
759                 return RasterIOResampled( eRWFlag,
760                                           nXOff, nYOff, nXSize, nYSize,
761                                           pData, nBufXSize, nBufYSize,
762                                           eBufType,
763                                           nPixelSpace, nLineSpace,
764                                           psExtraArg );
765             }
766         }
767 
768         int nLimitBlockY = 0;
769         const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
770         int nStartBlockX = -nBlockXSize;
771         const double EPS = 1e-10;
772         int nLBlockY = -1;
773         const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
774         const bool bIntegerXFactor = bUseIntegerRequestCoords &&
775                                      static_cast<int>(dfSrcXInc) == dfSrcXInc &&
776                                      static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
777 
778 /* -------------------------------------------------------------------- */
779 /*      Read case                                                       */
780 /*      Loop over buffer computing source locations.                    */
781 /* -------------------------------------------------------------------- */
782         for( int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++ )
783         {
784             // Add small epsilon to avoid some numeric precision issues.
785             const double dfSrcY = (iBufYOff+0.5) * dfSrcYInc + dfYOff + EPS;
786             const int iSrcY = static_cast<int>(std::min(std::max(0.0, dfSrcY),
787                                         static_cast<double>(nRasterYSize - 1)));
788 
789             GPtrDiff_t iBufOffset =
790                 static_cast<GPtrDiff_t>(iBufYOff)
791                 * static_cast<GPtrDiff_t>(nLineSpace);
792 
793             if( iSrcY >= nLimitBlockY )
794             {
795                 nLBlockY = iSrcY / nBlockYSize;
796                 nLimitBlockY = nLBlockY * nBlockYSize;
797                 if( nLimitBlockY < INT_MAX - nBlockYSize )
798                     nLimitBlockY += nBlockYSize;
799                 else
800                     nLimitBlockY = INT_MAX;
801                 // Make sure a new block is loaded.
802                 nStartBlockX = -nBlockXSize;
803             }
804             else if( static_cast<int>(dfSrcXStart) < nStartBlockX )
805             {
806                 // Make sure a new block is loaded.
807                 nStartBlockX = -nBlockXSize;
808             }
809 
810             GPtrDiff_t iSrcOffsetCst =
811                 (iSrcY - nLBlockY*nBlockYSize)
812                 * static_cast<GPtrDiff_t>(nBlockXSize);
813 
814             if( bIntegerXFactor )
815             {
816                 int iSrcX = static_cast<int>(dfSrcXStart);
817                 const int nSrcXInc = static_cast<int>(dfSrcXInc);
818                 GByte* pabyDstData= static_cast<GByte*>(pData) + iBufOffset;
819                 bool bRet = false;
820                 if( bByteCopy )
821                 {
822                     bRet = DownsamplingIntegerXFactor<true, 1>(this, iSrcX, nSrcXInc,
823                                                 iSrcOffsetCst,
824                                                 pabyDstData,
825                                                 static_cast<int>(nPixelSpace),
826                                                 nBufXSize,
827                                                 GDT_Byte, GDT_Byte,
828                                                 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
829                 }
830                 else if( eDataType == eBufType )
831                 {
832                     switch( nBandDataSize )
833                     {
834                         case 2:
835                             bRet = DownsamplingIntegerXFactor<true, 2>(this, iSrcX, nSrcXInc,
836                                                 iSrcOffsetCst,
837                                                 pabyDstData,
838                                                 static_cast<int>(nPixelSpace),
839                                                 nBufXSize,
840                                                 eDataType, eDataType,
841                                                 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
842                             break;
843                         case 4:
844                             bRet = DownsamplingIntegerXFactor<true, 4>(this, iSrcX, nSrcXInc,
845                                                 iSrcOffsetCst,
846                                                 pabyDstData,
847                                                 static_cast<int>(nPixelSpace),
848                                                 nBufXSize,
849                                                 eDataType, eDataType,
850                                                 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
851                             break;
852                         case 8:
853                             bRet = DownsamplingIntegerXFactor<true, 8>(this, iSrcX, nSrcXInc,
854                                                 iSrcOffsetCst,
855                                                 pabyDstData,
856                                                 static_cast<int>(nPixelSpace),
857                                                 nBufXSize,
858                                                 eDataType, eDataType,
859                                                 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
860                             break;
861                         case 16:
862                             bRet = DownsamplingIntegerXFactor<true, 16>(this, iSrcX, nSrcXInc,
863                                                 iSrcOffsetCst,
864                                                 pabyDstData,
865                                                 static_cast<int>(nPixelSpace),
866                                                 nBufXSize,
867                                                 eDataType, eDataType,
868                                                 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
869                             break;
870                         default:
871                             CPLAssert(false);
872                             break;
873                     }
874                 }
875                 else
876                 {
877                     bRet = DownsamplingIntegerXFactor<false, 0>(this, iSrcX, nSrcXInc,
878                                         iSrcOffsetCst,
879                                         pabyDstData,
880                                         static_cast<int>(nPixelSpace),
881                                         nBufXSize,
882                                         eDataType, eBufType,
883                                         nStartBlockX, nBlockXSize, poBlock, nLBlockY);
884                 }
885                 if( !bRet )
886                     eErr = CE_Failure;
887             }
888             else
889             {
890               double dfSrcX = dfSrcXStart;
891               for( int iBufXOff = 0;
892                    iBufXOff < nBufXSize;
893                   iBufXOff++, dfSrcX += dfSrcXInc )
894               {
895                 // TODO?: try to avoid the clamping for most iterations
896                 const int iSrcX = static_cast<int>(std::min(std::max(0.0, dfSrcX),
897                                         static_cast<double>(nRasterXSize - 1)));
898 
899     /* -------------------------------------------------------------------- */
900     /*      Ensure we have the appropriate block loaded.                    */
901     /* -------------------------------------------------------------------- */
902                 if( iSrcX >= nBlockXSize + nStartBlockX )
903                 {
904                     const int nLBlockX = iSrcX / nBlockXSize;
905                     nStartBlockX = nLBlockX * nBlockXSize;
906 
907                     if( poBlock != nullptr )
908                         poBlock->DropLock();
909 
910                     poBlock = GetLockedBlockRef( nLBlockX, nLBlockY, FALSE );
911                     if( poBlock == nullptr )
912                     {
913                         eErr = CE_Failure;
914                         break;
915                     }
916 
917                     pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
918                 }
919                 const GPtrDiff_t nDiffX = static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
920 
921     /* -------------------------------------------------------------------- */
922     /*      Copy over this pixel of data.                                   */
923     /* -------------------------------------------------------------------- */
924 
925                 if( bByteCopy )
926                 {
927                     GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
928                     static_cast<GByte *>(pData)[iBufOffset] =
929                         pabySrcBlock[iSrcOffset];
930                 }
931                 else if( eDataType == eBufType )
932                 {
933                     GPtrDiff_t iSrcOffset =
934                         (nDiffX + iSrcOffsetCst)
935                         * nBandDataSize;
936                     memcpy( static_cast<GByte *>(pData) + iBufOffset,
937                             pabySrcBlock + iSrcOffset, nBandDataSize );
938                 }
939                 else
940                 {
941                     // Type to type conversion ...
942                     GPtrDiff_t iSrcOffset =
943                         (nDiffX + iSrcOffsetCst)
944                         * nBandDataSize;
945                     GDALCopyWords(
946                         pabySrcBlock + iSrcOffset, eDataType, 0,
947                         static_cast<GByte *>(pData) + iBufOffset, eBufType, 0,
948                         1 );
949                 }
950 
951                 iBufOffset += static_cast<int>(nPixelSpace);
952               }
953             }
954             if( eErr == CE_Failure )
955                 break;
956 
957             if( psExtraArg->pfnProgress != nullptr &&
958                 !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
959                                          psExtraArg->pProgressData) )
960             {
961                 eErr = CE_Failure;
962                 break;
963             }
964         }
965     }
966 
967     if( poBlock != nullptr )
968         poBlock->DropLock();
969 
970     return eErr;
971 }
972 
973 /************************************************************************/
974 /*                         GDALRasterIOTransformer()                    */
975 /************************************************************************/
976 
977 struct GDALRasterIOTransformerStruct
978 {
979     double dfXOff;
980     double dfYOff;
981     double dfXRatioDstToSrc;
982     double dfYRatioDstToSrc;
983 };
984 
GDALRasterIOTransformer(void * pTransformerArg,int bDstToSrc,int nPointCount,double * x,double * y,double *,int * panSuccess)985 static int GDALRasterIOTransformer( void *pTransformerArg,
986                                     int bDstToSrc,
987                                     int nPointCount,
988                                     double *x, double *y, double * /* z */,
989                                     int *panSuccess )
990 {
991     GDALRasterIOTransformerStruct* psParams =
992         static_cast<GDALRasterIOTransformerStruct *>( pTransformerArg );
993     if( bDstToSrc )
994     {
995         for(int i = 0; i < nPointCount; i++)
996         {
997             x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
998             y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
999             panSuccess[i] = TRUE;
1000         }
1001     }
1002     else
1003     {
1004         for(int i = 0; i < nPointCount; i++)
1005         {
1006             x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
1007             y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
1008             panSuccess[i] = TRUE;
1009         }
1010     }
1011     return TRUE;
1012 }
1013 
1014 /************************************************************************/
1015 /*                          RasterIOResampled()                         */
1016 /************************************************************************/
1017 
1018 //! @cond Doxygen_Suppress
RasterIOResampled(GDALRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,GSpacing nPixelSpace,GSpacing nLineSpace,GDALRasterIOExtraArg * psExtraArg)1019 CPLErr GDALRasterBand::RasterIOResampled(
1020     GDALRWFlag /* eRWFlag */,
1021     int nXOff, int nYOff, int nXSize, int nYSize,
1022     void * pData, int nBufXSize, int nBufYSize,
1023     GDALDataType eBufType,
1024     GSpacing nPixelSpace, GSpacing nLineSpace,
1025     GDALRasterIOExtraArg* psExtraArg )
1026 {
1027     // Determine if we use warping resampling or overview resampling
1028     bool bUseWarp = false;
1029     if( GDALDataTypeIsComplex( eDataType ) )
1030         bUseWarp = true;
1031 
1032     double dfXOff = nXOff;
1033     double dfYOff = nYOff;
1034     double dfXSize = nXSize;
1035     double dfYSize = nYSize;
1036     if( psExtraArg->bFloatingPointWindowValidity )
1037     {
1038         dfXOff = psExtraArg->dfXOff;
1039         dfYOff = psExtraArg->dfYOff;
1040         dfXSize = psExtraArg->dfXSize;
1041         dfYSize = psExtraArg->dfYSize;
1042     }
1043 
1044     const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1045     const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1046 
1047     // Determine the coordinates in the "virtual" output raster to see
1048     // if there are not integers, in which case we will use them as a shift
1049     // so that subwindow extracts give the exact same results as entire raster
1050     // scaling.
1051     double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1052     bool bHasXOffVirtual = false;
1053     int nDestXOffVirtual = 0;
1054     if( fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8 )
1055     {
1056         bHasXOffVirtual = true;
1057         dfXOff = nXOff;
1058         nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1059     }
1060 
1061     double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1062     bool bHasYOffVirtual = false;
1063     int nDestYOffVirtual = 0;
1064     if( fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8 )
1065     {
1066         bHasYOffVirtual = true;
1067         dfYOff = nYOff;
1068         nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1069     }
1070 
1071     // Create a MEM dataset that wraps the output buffer.
1072     GDALDataset* poMEMDS;
1073     void* pTempBuffer = nullptr;
1074     GSpacing nPSMem = nPixelSpace;
1075     GSpacing nLSMem = nLineSpace;
1076     void* pDataMem = pData;
1077     GDALDataType eDTMem = eBufType;
1078     if( eBufType != eDataType )
1079     {
1080         nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1081         nLSMem = nPSMem * nBufXSize;
1082         pTempBuffer = VSI_MALLOC2_VERBOSE( nBufYSize, static_cast<size_t>(nLSMem) );
1083         if( pTempBuffer == nullptr )
1084             return CE_Failure;
1085         pDataMem = pTempBuffer;
1086         eDTMem = eDataType;
1087     }
1088 
1089     poMEMDS = MEMDataset::Create( "", nDestXOffVirtual + nBufXSize,
1090                                   nDestYOffVirtual + nBufYSize, 0,
1091                                   eDTMem, nullptr );
1092     char szBuffer[32] = { '\0' };
1093     int nRet =
1094         CPLPrintPointer(
1095             szBuffer, static_cast<GByte*>(pDataMem)
1096             - nPSMem * nDestXOffVirtual
1097             - nLSMem * nDestYOffVirtual, sizeof(szBuffer));
1098     szBuffer[nRet] = '\0';
1099 
1100     char szBuffer0[64] = { '\0' };
1101     snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer);
1102     char szBuffer1[64] = { '\0' };
1103     snprintf( szBuffer1, sizeof(szBuffer1),
1104               "PIXELOFFSET=" CPL_FRMT_GIB, static_cast<GIntBig>(nPSMem) );
1105     char szBuffer2[64] = { '\0' };
1106     snprintf( szBuffer2, sizeof(szBuffer2),
1107               "LINEOFFSET=" CPL_FRMT_GIB, static_cast<GIntBig>(nLSMem) );
1108     char* apszOptions[4] = { szBuffer0, szBuffer1, szBuffer2, nullptr };
1109 
1110     poMEMDS->AddBand(eDTMem, apszOptions);
1111 
1112     GDALRasterBandH hMEMBand = poMEMDS->GetRasterBand(1);
1113 
1114     const char* pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1115     if( pszNBITS )
1116         reinterpret_cast<GDALRasterBand *>(hMEMBand)->
1117             SetMetadataItem("NBITS", pszNBITS, "IMAGE_STRUCTURE");
1118 
1119     CPLErr eErr = CE_None;
1120 
1121     // Do the resampling.
1122     if( bUseWarp )
1123     {
1124         int bHasNoData = FALSE;
1125         double dfNoDataValue = GetNoDataValue(&bHasNoData) ;
1126 
1127         VRTDatasetH hVRTDS = nullptr;
1128         GDALRasterBandH hVRTBand = nullptr;
1129         if( GetDataset() == nullptr )
1130         {
1131             /* Create VRT dataset that wraps the whole dataset */
1132             hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1133             VRTAddBand( hVRTDS, eDataType, nullptr );
1134             hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1135             VRTAddSimpleSource( hVRTBand,
1136                                 this,
1137                                 0, 0,
1138                                 nRasterXSize, nRasterYSize,
1139                                 0, 0,
1140                                 nRasterXSize, nRasterYSize,
1141                                 nullptr, VRT_NODATA_UNSET );
1142 
1143             /* Add a mask band if needed */
1144             if( GetMaskFlags() != GMF_ALL_VALID )
1145             {
1146               reinterpret_cast<GDALDataset *>(hVRTDS)->CreateMaskBand(0);
1147                 VRTSourcedRasterBand* poVRTMaskBand =
1148                     reinterpret_cast<VRTSourcedRasterBand *>(
1149                         reinterpret_cast<GDALRasterBand *>(hVRTBand)->
1150                             GetMaskBand());
1151                 poVRTMaskBand->
1152                     AddMaskBandSource( this,
1153                                        0, 0,
1154                                        nRasterXSize, nRasterYSize,
1155                                        0, 0,
1156                                        nRasterXSize, nRasterYSize );
1157             }
1158         }
1159 
1160         GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1161         switch(psExtraArg->eResampleAlg)
1162         {
1163             case GRIORA_NearestNeighbour:
1164                 psWarpOptions->eResampleAlg = GRA_NearestNeighbour; break;
1165             case GRIORA_Bilinear:
1166                 psWarpOptions->eResampleAlg = GRA_Bilinear; break;
1167             case GRIORA_Cubic:
1168                 psWarpOptions->eResampleAlg = GRA_Cubic; break;
1169             case GRIORA_CubicSpline:
1170                 psWarpOptions->eResampleAlg = GRA_CubicSpline; break;
1171             case GRIORA_Lanczos:
1172                 psWarpOptions->eResampleAlg = GRA_Lanczos; break;
1173             case GRIORA_Average:
1174                 psWarpOptions->eResampleAlg = GRA_Average; break;
1175             case GRIORA_RMS:
1176                 psWarpOptions->eResampleAlg = GRA_RMS; break;
1177             case GRIORA_Mode:
1178                 psWarpOptions->eResampleAlg = GRA_Mode; break;
1179             default:
1180                 CPLAssert(false);
1181                 psWarpOptions->eResampleAlg = GRA_NearestNeighbour; break;
1182         }
1183         psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
1184         psWarpOptions->hDstDS = poMEMDS;
1185         psWarpOptions->nBandCount = 1;
1186         int nSrcBandNumber = hVRTDS ? 1 : nBand;
1187         int nDstBandNumber = 1;
1188         psWarpOptions->panSrcBands = &nSrcBandNumber;
1189         psWarpOptions->panDstBands = &nDstBandNumber;
1190         psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1191                     psExtraArg->pfnProgress : GDALDummyProgress;
1192         psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1193         psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1194         if (bHasNoData)
1195         {
1196             psWarpOptions->papszWarpOptions = CSLSetNameValue( psWarpOptions->papszWarpOptions,
1197                                                     "INIT_DEST", "NO_DATA");
1198             if (psWarpOptions->padfSrcNoDataReal == nullptr)
1199             {
1200                 psWarpOptions->padfSrcNoDataReal = static_cast<double*>(CPLMalloc(sizeof(double)));
1201                 psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
1202             }
1203 
1204             if (psWarpOptions->padfDstNoDataReal == nullptr)
1205             {
1206                 psWarpOptions->padfDstNoDataReal = static_cast<double*>(CPLMalloc(sizeof(double)));
1207                 psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
1208             }
1209         }
1210 
1211         GDALRasterIOTransformerStruct sTransformer;
1212         sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1213         sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1214         sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1215         sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1216         psWarpOptions->pTransformerArg = &sTransformer;
1217 
1218         GDALWarpOperationH hWarpOperation =
1219             GDALCreateWarpOperation(psWarpOptions);
1220         eErr = GDALChunkAndWarpImage( hWarpOperation,
1221                                       nDestXOffVirtual, nDestYOffVirtual,
1222                                       nBufXSize, nBufYSize );
1223         GDALDestroyWarpOperation( hWarpOperation );
1224 
1225         psWarpOptions->panSrcBands = nullptr;
1226         psWarpOptions->panDstBands = nullptr;
1227         GDALDestroyWarpOptions( psWarpOptions );
1228 
1229         if( hVRTDS )
1230             GDALClose(hVRTDS);
1231     }
1232     else
1233     {
1234         const char* pszResampling =
1235             (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR" :
1236             (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC" :
1237             (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE" :
1238             (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS" :
1239             (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE" :
1240             (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS" :
1241             (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE" :
1242             (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS" : "UNKNOWN";
1243 
1244         int nKernelRadius = 0;
1245         GDALResampleFunction pfnResampleFunc =
1246                         GDALGetResampleFunction(pszResampling, &nKernelRadius);
1247         CPLAssert(pfnResampleFunc);
1248         GDALDataType eWrkDataType =
1249             GDALGetOvrWorkDataType(pszResampling, eDataType);
1250         int bHasNoData = FALSE;
1251         float fNoDataValue = static_cast<float>( GetNoDataValue(&bHasNoData) );
1252         if( !bHasNoData )
1253             fNoDataValue = 0.0f;
1254 
1255         int nDstBlockXSize = nBufXSize;
1256         int nDstBlockYSize = nBufYSize;
1257         int nFullResXChunk = 0;
1258         int nFullResYChunk = 0;
1259         while( true )
1260         {
1261             nFullResXChunk =
1262                 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1263             nFullResYChunk =
1264                 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1265             if( nFullResXChunk > nRasterXSize )
1266                 nFullResXChunk = nRasterXSize;
1267             if( nFullResYChunk > nRasterYSize )
1268                 nFullResYChunk = nRasterYSize;
1269             if( (nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1270                 (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <= 1024 * 1024) )
1271                 break;
1272             // When operating on the full width of a raster whose block width is
1273             // the raster width, prefer doing chunks in height.
1274             if( nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1275                 nDstBlockYSize > 1 )
1276                 nDstBlockYSize /= 2;
1277             /* Otherwise cut the maximal dimension */
1278             else if( nDstBlockXSize > 1 &&
1279                      (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1) )
1280                 nDstBlockXSize /= 2;
1281             else
1282                 nDstBlockYSize /= 2;
1283         }
1284 
1285         int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
1286         int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
1287         if( nOvrXFactor == 0 ) nOvrXFactor = 1;
1288         if( nOvrYFactor == 0 ) nOvrYFactor = 1;
1289         int nFullResXSizeQueried =
1290             nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
1291         int nFullResYSizeQueried =
1292             nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
1293 
1294         if( nFullResXSizeQueried > nRasterXSize )
1295             nFullResXSizeQueried = nRasterXSize;
1296         if( nFullResYSizeQueried > nRasterYSize )
1297             nFullResYSizeQueried = nRasterYSize;
1298 
1299         void * pChunk =
1300             VSI_MALLOC3_VERBOSE( GDALGetDataTypeSizeBytes(eWrkDataType),
1301                                  nFullResXSizeQueried, nFullResYSizeQueried );
1302         GByte * pabyChunkNoDataMask = nullptr;
1303 
1304         GDALRasterBand* poMaskBand = GetMaskBand();
1305         int l_nMaskFlags = GetMaskFlags();
1306 
1307         bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
1308         if (bUseNoDataMask)
1309         {
1310             pabyChunkNoDataMask = static_cast<GByte *>(
1311                 VSI_MALLOC2_VERBOSE( nFullResXSizeQueried,
1312                                      nFullResYSizeQueried ) );
1313         }
1314         if( pChunk == nullptr || (bUseNoDataMask && pabyChunkNoDataMask == nullptr) )
1315         {
1316             GDALClose(poMEMDS);
1317             CPLFree(pChunk);
1318             CPLFree(pabyChunkNoDataMask);
1319             VSIFree(pTempBuffer);
1320             return CE_Failure;
1321         }
1322 
1323         int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1324                            ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1325         int nBlocksDone = 0;
1326 
1327         int nDstYOff;
1328         for( nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1329             nDstYOff += nDstBlockYSize )
1330         {
1331             int nDstYCount;
1332             if  (nDstYOff + nDstBlockYSize <= nBufYSize)
1333                 nDstYCount = nDstBlockYSize;
1334             else
1335                 nDstYCount = nBufYSize - nDstYOff;
1336 
1337             int nChunkYOff =
1338                 nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1339             int nChunkYOff2 =
1340                 nYOff + 1 +
1341                 static_cast<int>(
1342                     ceil((nDstYOff + nDstYCount) * dfYRatioDstToSrc ) );
1343             if( nChunkYOff2 > nRasterYSize )
1344                 nChunkYOff2 = nRasterYSize;
1345             int nYCount = nChunkYOff2 - nChunkYOff;
1346             CPLAssert(nYCount <= nFullResYChunk);
1347 
1348             int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
1349             int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
1350             if( nChunkYOffQueried < 0 )
1351             {
1352                 nChunkYSizeQueried += nChunkYOffQueried;
1353                 nChunkYOffQueried = 0;
1354             }
1355             if( nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize )
1356                 nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1357             CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1358 
1359             int nDstXOff = 0;
1360             for( nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1361                 nDstXOff += nDstBlockXSize )
1362             {
1363                 int nDstXCount = 0;
1364                 if  (nDstXOff + nDstBlockXSize <= nBufXSize)
1365                     nDstXCount = nDstBlockXSize;
1366                 else
1367                     nDstXCount = nBufXSize - nDstXOff;
1368 
1369                 int nChunkXOff =
1370                     nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1371                 int nChunkXOff2 =
1372                     nXOff + 1 +
1373                     static_cast<int>(
1374                         ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1375                 if( nChunkXOff2 > nRasterXSize )
1376                     nChunkXOff2 = nRasterXSize;
1377                 int nXCount = nChunkXOff2 - nChunkXOff;
1378                 CPLAssert(nXCount <= nFullResXChunk);
1379 
1380                 int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrXFactor;
1381                 int nChunkXSizeQueried =
1382                     nXCount + 2 * nKernelRadius * nOvrXFactor;
1383                 if( nChunkXOffQueried < 0 )
1384                 {
1385                     nChunkXSizeQueried += nChunkXOffQueried;
1386                     nChunkXOffQueried = 0;
1387                 }
1388                 if( nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize )
1389                     nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1390                 CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1391 
1392                 // Read the source buffers.
1393                 eErr = RasterIO( GF_Read,
1394                                 nChunkXOffQueried, nChunkYOffQueried,
1395                                 nChunkXSizeQueried, nChunkYSizeQueried,
1396                                 pChunk,
1397                                 nChunkXSizeQueried, nChunkYSizeQueried,
1398                                 eWrkDataType, 0, 0, nullptr );
1399 
1400                 bool bSkipResample = false;
1401                 bool bNoDataMaskFullyOpaque = false;
1402                 if (eErr == CE_None && bUseNoDataMask)
1403                 {
1404                     eErr = poMaskBand->RasterIO( GF_Read,
1405                                                  nChunkXOffQueried,
1406                                                  nChunkYOffQueried,
1407                                                  nChunkXSizeQueried,
1408                                                  nChunkYSizeQueried,
1409                                                  pabyChunkNoDataMask,
1410                                                  nChunkXSizeQueried,
1411                                                  nChunkYSizeQueried,
1412                                                  GDT_Byte, 0, 0, nullptr );
1413 
1414                     /* Optimizations if mask if fully opaque or transparent */
1415                     int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1416                     GByte bVal = pabyChunkNoDataMask[0];
1417                     int i = 1;
1418                     for( ; i < nPixels; i++ )
1419                     {
1420                         if( pabyChunkNoDataMask[i] != bVal )
1421                             break;
1422                     }
1423                     if( i == nPixels )
1424                     {
1425                         if( bVal == 0 )
1426                         {
1427                             for(int j=0;j<nDstYCount;j++)
1428                             {
1429                                 GDALCopyWords(
1430                                     &fNoDataValue, GDT_Float32, 0,
1431                                     static_cast<GByte*>(pDataMem) +
1432                                     nLSMem * (j + nDstYOff) +
1433                                     nDstXOff * nPSMem,
1434                                     eDTMem, static_cast<int>(nPSMem),
1435                                     nDstXCount);
1436                             }
1437                             bSkipResample = true;
1438                         }
1439                         else
1440                         {
1441                             bNoDataMaskFullyOpaque = true;
1442                         }
1443                     }
1444                 }
1445 
1446                 if( !bSkipResample && eErr == CE_None )
1447                 {
1448                     const bool bPropagateNoData = false;
1449                     void* pDstBuffer = nullptr;
1450                     GDALDataType eDstBufferDataType = GDT_Unknown;
1451                     GDALRasterBand* poMEMBand = GDALRasterBand::FromHandle(hMEMBand);
1452                     eErr = pfnResampleFunc(
1453                         dfXRatioDstToSrc,
1454                         dfYRatioDstToSrc,
1455                         dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1456                         dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1457                         eWrkDataType,
1458                         pChunk,
1459                         bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1460                         nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1461                         nChunkXSizeQueried,
1462                         nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1463                         nChunkYSizeQueried,
1464                         nDstXOff + nDestXOffVirtual,
1465                         nDstXOff + nDestXOffVirtual + nDstXCount,
1466                         nDstYOff + nDestYOffVirtual,
1467                         nDstYOff + nDestYOffVirtual + nDstYCount,
1468                         poMEMBand,
1469                         &pDstBuffer,
1470                         &eDstBufferDataType,
1471                         pszResampling,
1472                         bHasNoData, fNoDataValue,
1473                         GetColorTable(),
1474                         eDataType,
1475                         bPropagateNoData);
1476                     if( eErr == CE_None )
1477                     {
1478                         eErr = poMEMBand->RasterIO(
1479                             GF_Write,
1480                             nDstXOff + nDestXOffVirtual,
1481                             nDstYOff + nDestYOffVirtual,
1482                             nDstXCount,
1483                             nDstYCount,
1484                             pDstBuffer,
1485                             nDstXCount,
1486                             nDstYCount,
1487                             eDstBufferDataType,
1488                             0, 0, nullptr );
1489                     }
1490                     CPLFree(pDstBuffer);
1491                 }
1492 
1493                 nBlocksDone ++;
1494                 if( eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1495                     !psExtraArg->pfnProgress(
1496                         1.0 * nBlocksDone / nTotalBlocks, "",
1497                         psExtraArg->pProgressData) )
1498                 {
1499                     eErr = CE_Failure;
1500                 }
1501             }
1502         }
1503 
1504         CPLFree(pChunk);
1505         CPLFree(pabyChunkNoDataMask);
1506     }
1507 
1508     if( eBufType != eDataType )
1509     {
1510         CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(GF_Read,
1511                           nDestXOffVirtual, nDestYOffVirtual,
1512                           nBufXSize, nBufYSize,
1513                           pData,
1514                           nBufXSize, nBufYSize,
1515                           eBufType,
1516                           nPixelSpace, nLineSpace,
1517                           nullptr));
1518     }
1519     GDALClose(poMEMDS);
1520     VSIFree(pTempBuffer);
1521 
1522     return eErr;
1523 }
1524 
1525 /************************************************************************/
1526 /*                          RasterIOResampled()                         */
1527 /************************************************************************/
1528 
RasterIOResampled(GDALRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,int nBandCount,int * panBandMap,GSpacing nPixelSpace,GSpacing nLineSpace,GSpacing nBandSpace,GDALRasterIOExtraArg * psExtraArg)1529 CPLErr GDALDataset::RasterIOResampled(
1530     GDALRWFlag /* eRWFlag */,
1531     int nXOff, int nYOff, int nXSize, int nYSize,
1532     void *pData, int nBufXSize, int nBufYSize,
1533     GDALDataType eBufType,
1534     int nBandCount, int *panBandMap,
1535     GSpacing nPixelSpace, GSpacing nLineSpace,
1536     GSpacing nBandSpace,
1537     GDALRasterIOExtraArg* psExtraArg )
1538 
1539 {
1540 #if 0
1541     // Determine if we use warping resampling or overview resampling
1542     bool bUseWarp = false;
1543     if( GDALDataTypeIsComplex( eDataType ) )
1544         bUseWarp = true;
1545 #endif
1546 
1547     double dfXOff = nXOff;
1548     double dfYOff = nYOff;
1549     double dfXSize = nXSize;
1550     double dfYSize = nYSize;
1551     if( psExtraArg->bFloatingPointWindowValidity )
1552     {
1553         dfXOff = psExtraArg->dfXOff;
1554         dfYOff = psExtraArg->dfYOff;
1555         dfXSize = psExtraArg->dfXSize;
1556         dfYSize = psExtraArg->dfYSize;
1557     }
1558 
1559     const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1560     const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1561 
1562     // Determine the coordinates in the "virtual" output raster to see
1563     // if there are not integers, in which case we will use them as a shift
1564     // so that subwindow extracts give the exact same results as entire raster
1565     // scaling.
1566     double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1567     bool bHasXOffVirtual = false;
1568     int nDestXOffVirtual = 0;
1569     if( fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8 )
1570     {
1571         bHasXOffVirtual = true;
1572         dfXOff = nXOff;
1573         nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1574     }
1575 
1576     double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1577     bool bHasYOffVirtual = false;
1578     int nDestYOffVirtual = 0;
1579     if( fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8 )
1580     {
1581         bHasYOffVirtual = true;
1582         dfYOff = nYOff;
1583         nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1584     }
1585 
1586     // Create a MEM dataset that wraps the output buffer.
1587     GDALDataset* poMEMDS = MEMDataset::Create( "", nDestXOffVirtual + nBufXSize,
1588                                                nDestYOffVirtual + nBufYSize, 0,
1589                                                eBufType, nullptr );
1590     GDALRasterBand** papoDstBands =
1591         static_cast<GDALRasterBand **>(
1592             CPLMalloc( nBandCount * sizeof(GDALRasterBand*)) );
1593     for(int i=0;i<nBandCount;i++)
1594     {
1595         char szBuffer[32] = { '\0' };
1596         int nRet = CPLPrintPointer(
1597             szBuffer,
1598             static_cast<GByte*>(pData) - nPixelSpace * nDestXOffVirtual
1599             - nLineSpace * nDestYOffVirtual + nBandSpace * i, sizeof(szBuffer));
1600         szBuffer[nRet] = 0;
1601 
1602         char szBuffer0[64] = { '\0' };
1603         snprintf( szBuffer0, sizeof(szBuffer0),
1604                   "DATAPOINTER=%s", szBuffer );
1605 
1606         char szBuffer1[64] = { '\0' };
1607         snprintf( szBuffer1, sizeof(szBuffer1),
1608                   "PIXELOFFSET=" CPL_FRMT_GIB,
1609                   static_cast<GIntBig>(nPixelSpace) );
1610 
1611         char szBuffer2[64] = { '\0' };
1612         snprintf( szBuffer2, sizeof(szBuffer2),
1613                   "LINEOFFSET=" CPL_FRMT_GIB,
1614                   static_cast<GIntBig>(nLineSpace) );
1615 
1616         char* apszOptions[4] = { szBuffer0, szBuffer1, szBuffer2, nullptr };
1617 
1618         poMEMDS->AddBand(eBufType, apszOptions);
1619 
1620         GDALRasterBand* poSrcBand = GetRasterBand(panBandMap[i]);
1621         papoDstBands[i] = poMEMDS->GetRasterBand(i+1);
1622         const char* pszNBITS = poSrcBand->GetMetadataItem( "NBITS",
1623                                                            "IMAGE_STRUCTURE" );
1624         if( pszNBITS )
1625             poMEMDS->GetRasterBand(i+1)->SetMetadataItem( "NBITS", pszNBITS,
1626                                                           "IMAGE_STRUCTURE" );
1627     }
1628 
1629     CPLErr eErr = CE_None;
1630 
1631     // TODO(schwehr): Why disabled?  Why not just delete?
1632     // Looks like this code was initially added as disable by copying
1633     // from RasterIO here:
1634     // https://trac.osgeo.org/gdal/changeset/29572
1635 #if 0
1636     // Do the resampling.
1637     if( bUseWarp )
1638     {
1639         VRTDatasetH hVRTDS = nullptr;
1640         GDALRasterBandH hVRTBand = nullptr;
1641         if( GetDataset() == nullptr )
1642         {
1643             /* Create VRT dataset that wraps the whole dataset */
1644             hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1645             VRTAddBand( hVRTDS, eDataType, nullptr );
1646             hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1647             VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1648                                 (GDALRasterBandH)this,
1649                                 0, 0,
1650                                 nRasterXSize, nRasterYSize,
1651                                 0, 0,
1652                                 nRasterXSize, nRasterYSize,
1653                                 nullptr, VRT_NODATA_UNSET );
1654 
1655             /* Add a mask band if needed */
1656             if( GetMaskFlags() != GMF_ALL_VALID )
1657             {
1658                 ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1659                 VRTSourcedRasterBand* poVRTMaskBand =
1660                     (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1661                 poVRTMaskBand->
1662                     AddMaskBandSource( this,
1663                                     0, 0,
1664                                     nRasterXSize, nRasterYSize,
1665                                     0, 0,
1666                                     nRasterXSize, nRasterYSize);
1667             }
1668         }
1669 
1670         GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1671         psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1672         psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1673         psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1674         psWarpOptions->nBandCount = 1;
1675         int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1676         int nDstBandNumber = 1;
1677         psWarpOptions->panSrcBands = &nSrcBandNumber;
1678         psWarpOptions->panDstBands = &nDstBandNumber;
1679         psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1680                     psExtraArg->pfnProgress : GDALDummyProgress;
1681         psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1682         psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1683         GDALRasterIOTransformerStruct sTransformer;
1684         sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1685         sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1686         sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1687         sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1688         psWarpOptions->pTransformerArg = &sTransformer;
1689 
1690         GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1691         eErr = GDALChunkAndWarpImage( hWarpOperation,
1692                                       nDestXOffVirtual, nDestYOffVirtual,
1693                                       nBufXSize, nBufYSize );
1694         GDALDestroyWarpOperation( hWarpOperation );
1695 
1696         psWarpOptions->panSrcBands = nullptr;
1697         psWarpOptions->panDstBands = nullptr;
1698         GDALDestroyWarpOptions( psWarpOptions );
1699 
1700         if( hVRTDS )
1701             GDALClose(hVRTDS);
1702     }
1703     else
1704 #endif
1705     {
1706         const char* pszResampling =
1707             (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR" :
1708             (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC" :
1709             (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE" :
1710             (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS" :
1711             (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE" :
1712             (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS" :
1713             (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE" :
1714             (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS" : "UNKNOWN";
1715 
1716         GDALRasterBand* poFirstSrcBand = GetRasterBand(panBandMap[0]);
1717         GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
1718         int nBlockXSize, nBlockYSize;
1719         poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
1720 
1721         int nKernelRadius;
1722         GDALResampleFunction pfnResampleFunc =
1723                         GDALGetResampleFunction(pszResampling, &nKernelRadius);
1724         CPLAssert(pfnResampleFunc);
1725 #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1726         GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1727                         GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1728 #endif
1729         GDALDataType eWrkDataType =
1730             GDALGetOvrWorkDataType(pszResampling, eDataType);
1731 
1732         int nDstBlockXSize = nBufXSize;
1733         int nDstBlockYSize = nBufYSize;
1734         int nFullResXChunk, nFullResYChunk;
1735         while( true )
1736         {
1737             nFullResXChunk =
1738                 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1739             nFullResYChunk =
1740                 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1741             if( nFullResXChunk > nRasterXSize )
1742                 nFullResXChunk = nRasterXSize;
1743             if( nFullResYChunk > nRasterYSize )
1744                 nFullResYChunk = nRasterYSize;
1745             if( (nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1746                 (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <= 1024 * 1024) )
1747                 break;
1748             // When operating on the full width of a raster whose block width is
1749             // the raster width, prefer doing chunks in height.
1750             if( nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1751                 nDstBlockYSize > 1 )
1752                 nDstBlockYSize /= 2;
1753             /* Otherwise cut the maximal dimension */
1754             else if( nDstBlockXSize > 1 &&
1755                      (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1) )
1756                 nDstBlockXSize /= 2;
1757             else
1758                 nDstBlockYSize /= 2;
1759         }
1760 
1761         int nOvrFactor = std::max( static_cast<int>(0.5 + dfXRatioDstToSrc),
1762                                    static_cast<int>(0.5 + dfYRatioDstToSrc) );
1763         if( nOvrFactor == 0 ) nOvrFactor = 1;
1764         int nFullResXSizeQueried = nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
1765         int nFullResYSizeQueried = nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
1766 
1767         if( nFullResXSizeQueried > nRasterXSize )
1768             nFullResXSizeQueried = nRasterXSize;
1769         if( nFullResYSizeQueried > nRasterYSize )
1770             nFullResYSizeQueried = nRasterYSize;
1771 
1772         void * pChunk =
1773             VSI_MALLOC3_VERBOSE(
1774                 GDALGetDataTypeSizeBytes(eWrkDataType) * nBandCount,
1775                 nFullResXSizeQueried, nFullResYSizeQueried );
1776         GByte * pabyChunkNoDataMask = nullptr;
1777 
1778         GDALRasterBand* poMaskBand = poFirstSrcBand->GetMaskBand();
1779         int nMaskFlags = poFirstSrcBand->GetMaskFlags();
1780 
1781         bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
1782         if (bUseNoDataMask)
1783         {
1784             pabyChunkNoDataMask = static_cast<GByte *>(
1785                 VSI_MALLOC2_VERBOSE( nFullResXSizeQueried, nFullResYSizeQueried ));
1786         }
1787         if( pChunk == nullptr || (bUseNoDataMask && pabyChunkNoDataMask == nullptr) )
1788         {
1789             GDALClose(poMEMDS);
1790             CPLFree(pChunk);
1791             CPLFree(pabyChunkNoDataMask);
1792             CPLFree(papoDstBands);
1793             return CE_Failure;
1794         }
1795 
1796         int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1797                            ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1798         int nBlocksDone = 0;
1799 
1800         int nDstYOff;
1801         for( nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1802             nDstYOff += nDstBlockYSize )
1803         {
1804             int nDstYCount;
1805             if  (nDstYOff + nDstBlockYSize <= nBufYSize)
1806                 nDstYCount = nDstBlockYSize;
1807             else
1808                 nDstYCount = nBufYSize - nDstYOff;
1809 
1810             int nChunkYOff =
1811                 nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1812             int nChunkYOff2 =
1813                 nYOff + 1 +
1814                 static_cast<int>(
1815                     ceil((nDstYOff + nDstYCount) * dfYRatioDstToSrc) );
1816             if( nChunkYOff2 > nRasterYSize )
1817                 nChunkYOff2 = nRasterYSize;
1818             int nYCount = nChunkYOff2 - nChunkYOff;
1819             CPLAssert(nYCount <= nFullResYChunk);
1820 
1821             int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
1822             int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
1823             if( nChunkYOffQueried < 0 )
1824             {
1825                 nChunkYSizeQueried += nChunkYOffQueried;
1826                 nChunkYOffQueried = 0;
1827             }
1828             if( nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize )
1829                 nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1830             CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1831 
1832             int nDstXOff;
1833             for( nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1834                 nDstXOff += nDstBlockXSize )
1835             {
1836                 int nDstXCount;
1837                 if  (nDstXOff + nDstBlockXSize <= nBufXSize)
1838                     nDstXCount = nDstBlockXSize;
1839                 else
1840                     nDstXCount = nBufXSize - nDstXOff;
1841 
1842                 int nChunkXOff =
1843                     nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1844                 int nChunkXOff2 =
1845                     nXOff + 1 + static_cast<int>(
1846                         ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc) );
1847                 if( nChunkXOff2 > nRasterXSize )
1848                     nChunkXOff2 = nRasterXSize;
1849                 int nXCount = nChunkXOff2 - nChunkXOff;
1850                 CPLAssert(nXCount <= nFullResXChunk);
1851 
1852                 int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
1853                 int nChunkXSizeQueried = nXCount + 2 * nKernelRadius * nOvrFactor;
1854                 if( nChunkXOffQueried < 0 )
1855                 {
1856                     nChunkXSizeQueried += nChunkXOffQueried;
1857                     nChunkXOffQueried = 0;
1858                 }
1859                 if( nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize )
1860                     nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1861                 CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1862 
1863                 bool bSkipResample = false;
1864                 bool bNoDataMaskFullyOpaque = false;
1865                 if (eErr == CE_None && bUseNoDataMask)
1866                 {
1867                     eErr = poMaskBand->RasterIO( GF_Read,
1868                                                  nChunkXOffQueried,
1869                                                  nChunkYOffQueried,
1870                                                  nChunkXSizeQueried,
1871                                                  nChunkYSizeQueried,
1872                                                  pabyChunkNoDataMask,
1873                                                  nChunkXSizeQueried,
1874                                                  nChunkYSizeQueried,
1875                                                  GDT_Byte, 0, 0, nullptr );
1876 
1877                     /* Optimizations if mask if fully opaque or transparent */
1878                     const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1879                     const GByte bVal = pabyChunkNoDataMask[0];
1880                     int i = 1;  // Used after for.
1881                     for( ; i < nPixels; i++ )
1882                     {
1883                         if( pabyChunkNoDataMask[i] != bVal )
1884                             break;
1885                     }
1886                     if( i == nPixels )
1887                     {
1888                         if( bVal == 0 )
1889                         {
1890                             float fNoDataValue = 0.0f;
1891                             for( int iBand = 0; iBand < nBandCount; iBand++ )
1892                             {
1893                                 for( int j = 0; j < nDstYCount; j++ )
1894                                 {
1895                                     GDALCopyWords(
1896                                         &fNoDataValue, GDT_Float32, 0,
1897                                         static_cast<GByte *>(pData) +
1898                                         iBand * nBandSpace +
1899                                         nLineSpace * (j + nDstYOff) +
1900                                         nDstXOff * nPixelSpace,
1901                                         eBufType,
1902                                         static_cast<int>(nPixelSpace),
1903                                         nDstXCount);
1904                                 }
1905                             }
1906                             bSkipResample = true;
1907                         }
1908                         else
1909                         {
1910                             bNoDataMaskFullyOpaque = true;
1911                         }
1912                     }
1913                 }
1914 
1915                 if( !bSkipResample && eErr == CE_None )
1916                 {
1917                     /* Read the source buffers */
1918                     eErr = RasterIO( GF_Read,
1919                                      nChunkXOffQueried, nChunkYOffQueried,
1920                                      nChunkXSizeQueried, nChunkYSizeQueried,
1921                                      pChunk,
1922                                      nChunkXSizeQueried, nChunkYSizeQueried,
1923                                      eWrkDataType,
1924                                      nBandCount, panBandMap,
1925                                      0, 0, 0, nullptr );
1926                 }
1927 
1928 #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1929                 if( pfnResampleFuncMultiBands &&
1930                     !bSkipResample &&
1931                     eErr == CE_None )
1932                 {
1933                     eErr = pfnResampleFuncMultiBands(
1934                         dfXRatioDstToSrc,
1935                         dfYRatioDstToSrc,
1936                         dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1937                         dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1938                         eWrkDataType,
1939                         (GByte*)pChunk,
1940                         nBandCount,
1941                         bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1942                         nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1943                         nChunkXSizeQueried,
1944                         nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1945                         nChunkYSizeQueried,
1946                         nDstXOff + nDestXOffVirtual,
1947                         nDstXOff + nDestXOffVirtual + nDstXCount,
1948                         nDstYOff + nDestYOffVirtual,
1949                         nDstYOff + nDestYOffVirtual + nDstYCount,
1950                         papoDstBands,
1951                         pszResampling,
1952                         FALSE /*bHasNoData*/,
1953                         0.f /* fNoDataValue */,
1954                         nullptr /* color table*/,
1955                         eDataType );
1956                 }
1957                 else
1958 #endif
1959                 {
1960                     size_t nChunkBandOffset =
1961                         static_cast<size_t>(nChunkXSizeQueried) *
1962                         nChunkYSizeQueried *
1963                         GDALGetDataTypeSizeBytes(eWrkDataType);
1964                     for( int i = 0;
1965                          i < nBandCount && !bSkipResample && eErr == CE_None;
1966                          i++ )
1967                     {
1968                         const bool bPropagateNoData = false;
1969                         void* pDstBuffer = nullptr;
1970                         GDALDataType eDstBufferDataType = GDT_Unknown;
1971                         GDALRasterBand* poMEMBand = poMEMDS->GetRasterBand(i+1);
1972                         eErr = pfnResampleFunc(
1973                             dfXRatioDstToSrc,
1974                             dfYRatioDstToSrc,
1975                             dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1976                             dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1977                             eWrkDataType,
1978                             reinterpret_cast<GByte*>(pChunk) + i * nChunkBandOffset,
1979                             bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1980                             nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1981                             nChunkXSizeQueried,
1982                             nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1983                             nChunkYSizeQueried,
1984                             nDstXOff + nDestXOffVirtual,
1985                             nDstXOff + nDestXOffVirtual + nDstXCount,
1986                             nDstYOff + nDestYOffVirtual,
1987                             nDstYOff + nDestYOffVirtual + nDstYCount,
1988                             poMEMBand,
1989                             &pDstBuffer,
1990                             &eDstBufferDataType,
1991                             pszResampling,
1992                             FALSE /*bHasNoData*/,
1993                             0.f /* fNoDataValue */,
1994                             nullptr /* color table*/,
1995                             eDataType,
1996                             bPropagateNoData );
1997                         if( eErr == CE_None )
1998                         {
1999                             eErr = poMEMBand->RasterIO(
2000                                 GF_Write,
2001                                 nDstXOff + nDestXOffVirtual,
2002                                 nDstYOff + nDestYOffVirtual,
2003                                 nDstXCount,
2004                                 nDstYCount,
2005                                 pDstBuffer,
2006                                 nDstXCount,
2007                                 nDstYCount,
2008                                 eDstBufferDataType,
2009                                 0, 0, nullptr );
2010                         }
2011                         CPLFree(pDstBuffer);
2012                     }
2013                 }
2014 
2015                 nBlocksDone ++;
2016                 if( eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
2017                     !psExtraArg->pfnProgress(
2018                         1.0 * nBlocksDone / nTotalBlocks, "",
2019                         psExtraArg->pProgressData) )
2020                 {
2021                     eErr = CE_Failure;
2022                 }
2023             }
2024         }
2025 
2026         CPLFree(pChunk);
2027         CPLFree(pabyChunkNoDataMask);
2028     }
2029 
2030     CPLFree(papoDstBands);
2031     GDALClose(poMEMDS);
2032 
2033     return eErr;
2034 }
2035 //! @endcond
2036 
2037 /************************************************************************/
2038 /*                           GDALSwapWords()                            */
2039 /************************************************************************/
2040 
2041 /**
2042  * Byte swap words in-place.
2043  *
2044  * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2045  * a memory array.  No assumption is made that the words being swapped are
2046  * word aligned in memory.  Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2047  * to determine if the current platform is big endian or little endian.  Use
2048  * The macros like CPL_SWAP32() to byte swap single values without the overhead
2049  * of a function call.
2050  *
2051  * @param pData pointer to start of data buffer.
2052  * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2053  * @param nWordCount the number of words to be swapped in this call.
2054  * @param nWordSkip the byte offset from the start of one word to the start of
2055  * the next. For packed buffers this is the same as nWordSize.
2056  */
2057 
GDALSwapWords(void * pData,int nWordSize,int nWordCount,int nWordSkip)2058 void CPL_STDCALL GDALSwapWords( void *pData, int nWordSize, int nWordCount,
2059                                 int nWordSkip )
2060 
2061 {
2062     if (nWordCount > 0)
2063         VALIDATE_POINTER0( pData , "GDALSwapWords" );
2064 
2065     GByte *pabyData = static_cast<GByte *>( pData );
2066 
2067     switch( nWordSize )
2068     {
2069       case 1:
2070         break;
2071 
2072       case 2:
2073         CPLAssert( nWordSkip >= 2 || nWordCount == 1 );
2074         for( int i = 0; i < nWordCount; i++ )
2075         {
2076             CPL_SWAP16PTR(pabyData);
2077             pabyData += nWordSkip;
2078         }
2079         break;
2080 
2081       case 4:
2082         CPLAssert( nWordSkip >= 4 || nWordCount == 1 );
2083         if( CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0 )
2084         {
2085             for( int i = 0; i < nWordCount; i++ )
2086             {
2087                 *reinterpret_cast<GUInt32*>(pabyData) = CPL_SWAP32(*reinterpret_cast<const GUInt32*>(pabyData));
2088                 pabyData += nWordSkip;
2089             }
2090         }
2091         else
2092         {
2093             for( int i = 0; i < nWordCount; i++ )
2094             {
2095                 CPL_SWAP32PTR(pabyData);
2096                 pabyData += nWordSkip;
2097             }
2098         }
2099         break;
2100 
2101       case 8:
2102         CPLAssert( nWordSkip >= 8 || nWordCount == 1 );
2103 #ifdef CPL_HAS_GINT64
2104         if( CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0 )
2105         {
2106             for( int i = 0; i < nWordCount; i++ )
2107             {
2108                 *reinterpret_cast<GUInt64*>(pabyData) = CPL_SWAP64(*reinterpret_cast<const GUInt64*>(pabyData));
2109                 pabyData += nWordSkip;
2110             }
2111         }
2112         else
2113 #endif
2114         {
2115             for( int i = 0; i < nWordCount; i++ )
2116             {
2117                 CPL_SWAP64PTR(pabyData);
2118                 pabyData += nWordSkip;
2119             }
2120         }
2121         break;
2122 
2123       default:
2124         CPLAssert( false );
2125     }
2126 }
2127 
2128 /************************************************************************/
2129 /*                           GDALSwapWordsEx()                          */
2130 /************************************************************************/
2131 
2132 /**
2133  * Byte swap words in-place.
2134  *
2135  * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2136  * a memory array.  No assumption is made that the words being swapped are
2137  * word aligned in memory.  Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2138  * to determine if the current platform is big endian or little endian.  Use
2139  * The macros like CPL_SWAP32() to byte swap single values without the overhead
2140  * of a function call.
2141  *
2142  * @param pData pointer to start of data buffer.
2143  * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2144  * @param nWordCount the number of words to be swapped in this call.
2145  * @param nWordSkip the byte offset from the start of one word to the start of
2146  * the next. For packed buffers this is the same as nWordSize.
2147  * @since GDAL 2.1
2148  */
GDALSwapWordsEx(void * pData,int nWordSize,size_t nWordCount,int nWordSkip)2149 void CPL_STDCALL GDALSwapWordsEx( void *pData, int nWordSize, size_t nWordCount,
2150                                   int nWordSkip )
2151 {
2152     GByte* pabyData = static_cast<GByte*>(pData);
2153     while( nWordCount )
2154     {
2155         // Pick-up a multiple of 8 as max chunk size.
2156         const int nWordCountSmall =
2157             (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
2158         GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
2159         pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
2160         nWordCount -= nWordCountSmall;
2161     }
2162 }
2163 
2164 // Place the new GDALCopyWords helpers in an anonymous namespace
2165 namespace {
2166 
2167 /************************************************************************/
2168 /*                           GDALCopyWordsT()                           */
2169 /************************************************************************/
2170 /**
2171  * Template function, used to copy data from pSrcData into buffer
2172  * pDstData, with stride nSrcPixelStride in the source data and
2173  * stride nDstPixelStride in the destination data. This template can
2174  * deal with the case where the input data type is real or complex and
2175  * the output is real.
2176  *
2177  * @param pSrcData the source data buffer
2178  * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2179  *                      of interest.
2180  * @param pDstData the destination buffer.
2181  * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2182  *                      interest.
2183  * @param nWordCount the total number of pixel words to copy
2184  *
2185  * @code
2186  * // Assume an input buffer of type GUInt16 named pBufferIn
2187  * GByte *pBufferOut = new GByte[numBytesOut];
2188  * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2189  * @endcode
2190  * @note
2191  * This is a private function, and should not be exposed outside of rasterio.cpp.
2192  * External users should call the GDALCopyWords driver function.
2193  */
2194 
2195 template <class Tin, class Tout>
GDALCopyWordsGenericT(const Tin * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2196 static void inline GDALCopyWordsGenericT( const Tin* const CPL_RESTRICT pSrcData,
2197                             int nSrcPixelStride,
2198                             Tout* const CPL_RESTRICT pDstData,
2199                             int nDstPixelStride,
2200                             GPtrDiff_t nWordCount)
2201 {
2202     decltype(nWordCount) nDstOffset = 0;
2203 
2204     const char* const pSrcDataPtr = reinterpret_cast<const char*>(pSrcData);
2205     char* const pDstDataPtr = reinterpret_cast<char*>(pDstData);
2206     for (decltype(nWordCount) n = 0; n < nWordCount; n++  )
2207     {
2208         const Tin tValue =
2209             *reinterpret_cast<const Tin*>(pSrcDataPtr + (n * nSrcPixelStride));
2210         Tout* const pOutPixel =
2211             reinterpret_cast<Tout*>(pDstDataPtr + nDstOffset);
2212 
2213         GDALCopyWord(tValue, *pOutPixel);
2214 
2215         nDstOffset += nDstPixelStride;
2216     }
2217 }
2218 
2219 template <class Tin, class Tout>
GDALCopyWordsT(const Tin * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2220 static void inline GDALCopyWordsT( const Tin* const CPL_RESTRICT pSrcData,
2221                             int nSrcPixelStride,
2222                             Tout* const CPL_RESTRICT pDstData,
2223                             int nDstPixelStride,
2224                             GPtrDiff_t nWordCount)
2225 {
2226     GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2227                           pDstData, nDstPixelStride,
2228                           nWordCount);
2229 }
2230 
2231 template <class Tin, class Tout>
GDALCopyWordsT_8atatime(const Tin * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2232 static void inline GDALCopyWordsT_8atatime( const Tin* const CPL_RESTRICT pSrcData,
2233                                      int nSrcPixelStride,
2234                                      Tout* const CPL_RESTRICT pDstData,
2235                                      int nDstPixelStride,
2236                                      GPtrDiff_t nWordCount )
2237 {
2238     decltype(nWordCount) nDstOffset = 0;
2239 
2240     const char* const pSrcDataPtr = reinterpret_cast<const char*>(pSrcData);
2241     char* const pDstDataPtr = reinterpret_cast<char*>(pDstData);
2242     decltype(nWordCount) n = 0;
2243     if( nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
2244         nDstPixelStride == static_cast<int>(sizeof(Tout)) )
2245     {
2246         for (; n < nWordCount-7; n+=8)
2247         {
2248             const Tin* pInValues =
2249                 reinterpret_cast<const Tin*>(pSrcDataPtr + (n * nSrcPixelStride));
2250             Tout* const pOutPixels =
2251                 reinterpret_cast<Tout*>(pDstDataPtr + nDstOffset);
2252 
2253             GDALCopy8Words(pInValues, pOutPixels);
2254 
2255             nDstOffset += 8 * nDstPixelStride;
2256         }
2257     }
2258     for( ; n < nWordCount; n++  )
2259     {
2260         const Tin tValue =
2261             *reinterpret_cast<const Tin*>(pSrcDataPtr + (n * nSrcPixelStride));
2262         Tout* const pOutPixel =
2263             reinterpret_cast<Tout*>(pDstDataPtr + nDstOffset);
2264 
2265         GDALCopyWord(tValue, *pOutPixel);
2266 
2267         nDstOffset += nDstPixelStride;
2268     }
2269 }
2270 
2271 #if defined(__x86_64) || defined(_M_X64)
2272 
2273 #include <emmintrin.h>
2274 
GDALCopyWordsByteTo16Bit(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2275 template<class Tout> void GDALCopyWordsByteTo16Bit(
2276                                 const GByte* const CPL_RESTRICT pSrcData,
2277                                 int nSrcPixelStride,
2278                                 Tout* const CPL_RESTRICT pDstData,
2279                                 int nDstPixelStride,
2280                                 GPtrDiff_t nWordCount )
2281 {
2282     if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2283         nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2284     {
2285         decltype(nWordCount) n = 0;
2286         const __m128i xmm_zero = _mm_setzero_si128 ();
2287         GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2288         for (; n < nWordCount-15; n+=16)
2289         {
2290             __m128i xmm = _mm_loadu_si128(
2291                 reinterpret_cast<const __m128i*> (pSrcData + n) );
2292             __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
2293             __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
2294             _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 2),
2295                               xmm0 );
2296             _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 2 + 16),
2297                               xmm1 );
2298         }
2299         for( ; n < nWordCount; n++  )
2300         {
2301             pDstData[n] = pSrcData[n];
2302         }
2303     }
2304     else
2305     {
2306         GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2307                               pDstData, nDstPixelStride,
2308                               nWordCount);
2309     }
2310 }
2311 
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GUInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2312 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2313                                 int nSrcPixelStride,
2314                                 GUInt16* const CPL_RESTRICT pDstData,
2315                                 int nDstPixelStride,
2316                                 GPtrDiff_t nWordCount )
2317 {
2318     GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2319                              nDstPixelStride, nWordCount);
2320 }
2321 
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2322 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2323                                 int nSrcPixelStride,
2324                                 GInt16* const CPL_RESTRICT pDstData,
2325                                 int nDstPixelStride,
2326                                 GPtrDiff_t nWordCount )
2327 {
2328     GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2329                              nDstPixelStride, nWordCount);
2330 }
2331 
GDALCopyWordsByteTo32Bit(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2332 template<class Tout> void GDALCopyWordsByteTo32Bit(
2333                                 const GByte* const CPL_RESTRICT pSrcData,
2334                                 int nSrcPixelStride,
2335                                 Tout* const CPL_RESTRICT pDstData,
2336                                 int nDstPixelStride,
2337                                 GPtrDiff_t nWordCount )
2338 {
2339     if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2340         nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2341     {
2342         decltype(nWordCount) n = 0;
2343         const __m128i xmm_zero = _mm_setzero_si128 ();
2344         GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2345         for (; n < nWordCount-15; n+=16)
2346         {
2347             __m128i xmm = _mm_loadu_si128(
2348                 reinterpret_cast<const __m128i*> (pSrcData + n) );
2349             __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2350             __m128i xmm_high= _mm_unpackhi_epi8(xmm, xmm_zero);
2351             __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2352             __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2353             __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2354             __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2355             _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 4),
2356                               xmm0 );
2357             _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 4 + 16),
2358                               xmm1 );
2359             _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 4 + 32),
2360                               xmm2 );
2361             _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 4 + 48),
2362                               xmm3 );
2363         }
2364         for( ; n < nWordCount; n++  )
2365         {
2366             pDstData[n] = pSrcData[n];
2367         }
2368     }
2369     else
2370     {
2371         GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2372                               pDstData, nDstPixelStride,
2373                               nWordCount);
2374     }
2375 }
2376 
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GUInt32 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2377 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2378                                 int nSrcPixelStride,
2379                                 GUInt32* const CPL_RESTRICT pDstData,
2380                                 int nDstPixelStride,
2381                                 GPtrDiff_t nWordCount )
2382 {
2383     GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2384                              nDstPixelStride, nWordCount);
2385 }
2386 
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GInt32 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2387 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2388                                 int nSrcPixelStride,
2389                                 GInt32* const CPL_RESTRICT pDstData,
2390                                 int nDstPixelStride,
2391                                 GPtrDiff_t nWordCount )
2392 {
2393     GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2394                              nDstPixelStride, nWordCount);
2395 }
2396 
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,float * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2397 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2398                                 int nSrcPixelStride,
2399                                 float* const CPL_RESTRICT pDstData,
2400                                 int nDstPixelStride,
2401                                 GPtrDiff_t nWordCount )
2402 {
2403     if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2404         nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2405     {
2406         decltype(nWordCount) n = 0;
2407         const __m128i xmm_zero = _mm_setzero_si128 ();
2408         GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2409         for (; n < nWordCount-15; n+=16)
2410         {
2411             __m128i xmm = _mm_loadu_si128(
2412                 reinterpret_cast<const __m128i*> (pSrcData + n) );
2413             __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2414             __m128i xmm_high= _mm_unpackhi_epi8(xmm, xmm_zero);
2415             __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2416             __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2417             __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2418             __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2419             __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2420             __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2421             __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
2422             __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
2423             _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4),
2424                            xmm0_f );
2425             _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4 + 16),
2426                            xmm1_f );
2427             _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4 + 32),
2428                            xmm2_f );
2429             _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4 + 48),
2430                            xmm3_f );
2431         }
2432         for( ; n < nWordCount; n++  )
2433         {
2434             pDstData[n] = pSrcData[n];
2435         }
2436     }
2437     else
2438     {
2439         GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2440                               pDstData, nDstPixelStride,
2441                               nWordCount);
2442     }
2443 }
2444 
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,double * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2445 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2446                                 int nSrcPixelStride,
2447                                 double* const CPL_RESTRICT pDstData,
2448                                 int nDstPixelStride,
2449                                 GPtrDiff_t nWordCount )
2450 {
2451     if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2452         nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2453     {
2454         decltype(nWordCount) n = 0;
2455         const __m128i xmm_zero = _mm_setzero_si128 ();
2456         GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2457         for (; n < nWordCount-15; n+=16)
2458         {
2459             __m128i xmm = _mm_loadu_si128(
2460                 reinterpret_cast<const __m128i*> (pSrcData + n) );
2461             __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2462             __m128i xmm_high= _mm_unpackhi_epi8(xmm, xmm_zero);
2463             __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2464             __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2465             __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2466             __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2467 
2468             __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2469             __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2470             __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
2471             __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
2472             xmm0 = _mm_srli_si128(xmm0, 8);
2473             xmm1 = _mm_srli_si128(xmm1, 8);
2474             xmm2 = _mm_srli_si128(xmm2, 8);
2475             xmm3 = _mm_srli_si128(xmm3, 8);
2476             __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2477             __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2478             __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
2479             __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
2480 
2481             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8),
2482                            xmm0_low_d );
2483             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 16),
2484                            xmm0_high_d );
2485             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 32),
2486                            xmm1_low_d );
2487             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 48),
2488                            xmm1_high_d );
2489             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 64),
2490                            xmm2_low_d );
2491             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 80),
2492                            xmm2_high_d );
2493             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 96),
2494                            xmm3_low_d );
2495             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 112),
2496                            xmm3_high_d );
2497         }
2498         for( ; n < nWordCount; n++  )
2499         {
2500             pDstData[n] = pSrcData[n];
2501         }
2502     }
2503     else
2504     {
2505         GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2506                               pDstData, nDstPixelStride,
2507                               nWordCount);
2508     }
2509 }
2510 
GDALCopyWordsT(const GUInt16 * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GByte * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2511 template<> void GDALCopyWordsT( const GUInt16* const CPL_RESTRICT pSrcData,
2512                                 int nSrcPixelStride,
2513                                 GByte* const CPL_RESTRICT pDstData,
2514                                 int nDstPixelStride,
2515                                 GPtrDiff_t nWordCount )
2516 {
2517     if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2518         nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2519     {
2520          decltype(nWordCount) n = 0;
2521         // In SSE2, min_epu16 does not exist, so shift from
2522         // UInt16 to SInt16 to be able to use min_epi16
2523         const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16 (-32768);
2524         const __m128i xmm_m255_shifted = _mm_set1_epi16 (255-32768);
2525         for (; n < nWordCount-7; n+=8)
2526         {
2527             __m128i xmm = _mm_loadu_si128(
2528                 reinterpret_cast<const __m128i*> (pSrcData + n) );
2529             xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2530             xmm = _mm_min_epi16(xmm, xmm_m255_shifted);
2531             xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2532             xmm = _mm_packus_epi16(xmm, xmm);
2533             GDALCopyXMMToInt64(xmm, reinterpret_cast<GPtrDiff_t*>(pDstData + n));
2534         }
2535         for( ; n < nWordCount; n++  )
2536         {
2537             pDstData[n] =
2538                 pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]);
2539         }
2540     }
2541     else
2542     {
2543         GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2544                               pDstData, nDstPixelStride,
2545                               nWordCount);
2546     }
2547 }
2548 
GDALCopyWordsT(const GUInt16 * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2549 template<> void GDALCopyWordsT( const GUInt16* const CPL_RESTRICT pSrcData,
2550                                 int nSrcPixelStride,
2551                                 GInt16* const CPL_RESTRICT pDstData,
2552                                 int nDstPixelStride,
2553                                 GPtrDiff_t nWordCount )
2554 {
2555     if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2556         nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2557     {
2558         decltype(nWordCount) n = 0;
2559         // In SSE2, min_epu16 does not exist, so shift from
2560         // UInt16 to SInt16 to be able to use min_epi16
2561         const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16 (-32768);
2562         const __m128i xmm_32767_shifted = _mm_set1_epi16 (32767-32768);
2563         for (; n < nWordCount-7; n+=8)
2564         {
2565             __m128i xmm = _mm_loadu_si128(
2566                 reinterpret_cast<const __m128i*> (pSrcData + n) );
2567             xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2568             xmm = _mm_min_epi16(xmm, xmm_32767_shifted);
2569             xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2570             _mm_storeu_si128( reinterpret_cast<__m128i*>(pDstData + n),
2571                               xmm );
2572         }
2573         for( ; n < nWordCount; n++  )
2574         {
2575             pDstData[n] =
2576                 pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]);
2577         }
2578     }
2579     else
2580     {
2581         GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2582                               pDstData, nDstPixelStride,
2583                               nWordCount);
2584     }
2585 }
2586 
GDALCopyWordsT(const GUInt16 * const CPL_RESTRICT pSrcData,int nSrcPixelStride,float * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2587 template<> void GDALCopyWordsT( const GUInt16* const CPL_RESTRICT pSrcData,
2588                                 int nSrcPixelStride,
2589                                 float* const CPL_RESTRICT pDstData,
2590                                 int nDstPixelStride,
2591                                 GPtrDiff_t nWordCount )
2592 {
2593     if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2594         nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2595     {
2596         decltype(nWordCount) n = 0;
2597         const __m128i xmm_zero = _mm_setzero_si128 ();
2598         GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2599         for (; n < nWordCount-7; n+=8)
2600         {
2601             __m128i xmm = _mm_loadu_si128(
2602                 reinterpret_cast<const __m128i*> (pSrcData + n) );
2603             __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2604             __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2605             __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2606             __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2607             _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4),
2608                            xmm0_f );
2609             _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4 + 16),
2610                            xmm1_f );
2611         }
2612         for( ; n < nWordCount; n++  )
2613         {
2614             pDstData[n] = pSrcData[n];
2615         }
2616     }
2617     else
2618     {
2619         GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2620                               pDstData, nDstPixelStride,
2621                               nWordCount);
2622     }
2623 }
2624 
GDALCopyWordsT(const GUInt16 * const CPL_RESTRICT pSrcData,int nSrcPixelStride,double * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2625 template<> void GDALCopyWordsT( const GUInt16* const CPL_RESTRICT pSrcData,
2626                                 int nSrcPixelStride,
2627                                 double* const CPL_RESTRICT pDstData,
2628                                 int nDstPixelStride,
2629                                 GPtrDiff_t nWordCount )
2630 {
2631     if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2632         nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2633     {
2634         decltype(nWordCount) n = 0;
2635         const __m128i xmm_zero = _mm_setzero_si128 ();
2636         GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2637         for (; n < nWordCount-7; n+=8)
2638         {
2639             __m128i xmm = _mm_loadu_si128(
2640                 reinterpret_cast<const __m128i*> (pSrcData + n) );
2641             __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2642             __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2643 
2644             __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2645             __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2646             xmm0 = _mm_srli_si128(xmm0, 8);
2647             xmm1 = _mm_srli_si128(xmm1, 8);
2648             __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2649             __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2650 
2651             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8),
2652                            xmm0_low_d );
2653             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 16),
2654                            xmm0_high_d );
2655             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 32),
2656                            xmm1_low_d );
2657             _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 48),
2658                            xmm1_high_d );
2659         }
2660         for( ; n < nWordCount; n++  )
2661         {
2662             pDstData[n] = pSrcData[n];
2663         }
2664     }
2665     else
2666     {
2667         GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2668                               pDstData, nDstPixelStride,
2669                               nWordCount);
2670     }
2671 }
2672 
GDALCopyWordsT(const double * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GUInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2673 template<> void GDALCopyWordsT( const double* const CPL_RESTRICT pSrcData,
2674                                 int nSrcPixelStride,
2675                                 GUInt16* const CPL_RESTRICT pDstData,
2676                                 int nDstPixelStride,
2677                                 GPtrDiff_t nWordCount )
2678 {
2679     GDALCopyWordsT_8atatime( pSrcData, nSrcPixelStride,
2680                              pDstData, nDstPixelStride, nWordCount );
2681 }
2682 
2683 #endif // defined(__x86_64) || defined(_M_X64)
2684 
GDALCopyWordsT(const float * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GByte * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2685 template<> void GDALCopyWordsT( const float* const CPL_RESTRICT pSrcData,
2686                                 int nSrcPixelStride,
2687                                 GByte* const CPL_RESTRICT pDstData,
2688                                 int nDstPixelStride,
2689                                 GPtrDiff_t nWordCount )
2690 {
2691     GDALCopyWordsT_8atatime( pSrcData, nSrcPixelStride,
2692                              pDstData, nDstPixelStride, nWordCount );
2693 }
2694 
GDALCopyWordsT(const float * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2695 template<> void GDALCopyWordsT( const float* const CPL_RESTRICT pSrcData,
2696                                 int nSrcPixelStride,
2697                                 GInt16* const CPL_RESTRICT pDstData,
2698                                 int nDstPixelStride,
2699                                 GPtrDiff_t nWordCount )
2700 {
2701     GDALCopyWordsT_8atatime( pSrcData, nSrcPixelStride,
2702                              pDstData, nDstPixelStride, nWordCount );
2703 }
2704 
GDALCopyWordsT(const float * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GUInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2705 template<> void GDALCopyWordsT( const float* const CPL_RESTRICT pSrcData,
2706                                 int nSrcPixelStride,
2707                                 GUInt16* const CPL_RESTRICT pDstData,
2708                                 int nDstPixelStride,
2709                                 GPtrDiff_t nWordCount )
2710 {
2711     GDALCopyWordsT_8atatime( pSrcData, nSrcPixelStride,
2712                              pDstData, nDstPixelStride, nWordCount );
2713 }
2714 
2715 /************************************************************************/
2716 /*                   GDALCopyWordsComplexT()                            */
2717 /************************************************************************/
2718 /**
2719  * Template function, used to copy data from pSrcData into buffer
2720  * pDstData, with stride nSrcPixelStride in the source data and
2721  * stride nDstPixelStride in the destination data. Deals with the
2722  * complex case, where input is complex and output is complex.
2723  *
2724  * @param pSrcData the source data buffer
2725  * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2726  *                      of interest.
2727  * @param pDstData the destination buffer.
2728  * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2729  *                      interest.
2730  * @param nWordCount the total number of pixel words to copy
2731  *
2732  */
2733 template <class Tin, class Tout>
GDALCopyWordsComplexT(const Tin * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2734 inline void GDALCopyWordsComplexT( const Tin* const CPL_RESTRICT pSrcData,
2735                                    int nSrcPixelStride,
2736                                    Tout* const CPL_RESTRICT pDstData,
2737                                    int nDstPixelStride,
2738                                    GPtrDiff_t nWordCount )
2739 {
2740     decltype(nWordCount) nDstOffset = 0;
2741     const char* const pSrcDataPtr = reinterpret_cast<const char*>(pSrcData);
2742     char* const pDstDataPtr = reinterpret_cast<char*>(pDstData);
2743 
2744     for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2745     {
2746         const Tin* const pPixelIn =
2747             reinterpret_cast<const Tin*>(pSrcDataPtr + n * nSrcPixelStride);
2748         Tout* const pPixelOut =
2749             reinterpret_cast<Tout*>(pDstDataPtr + nDstOffset);
2750 
2751         GDALCopyWord(pPixelIn[0], pPixelOut[0]);
2752         GDALCopyWord(pPixelIn[1], pPixelOut[1]);
2753 
2754         nDstOffset += nDstPixelStride;
2755     }
2756 }
2757 
2758 /************************************************************************/
2759 /*                   GDALCopyWordsComplexOutT()                         */
2760 /************************************************************************/
2761 /**
2762  * Template function, used to copy data from pSrcData into buffer
2763  * pDstData, with stride nSrcPixelStride in the source data and
2764  * stride nDstPixelStride in the destination data. Deals with the
2765  * case where the value is real coming in, but complex going out.
2766  *
2767  * @param pSrcData the source data buffer
2768  * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2769  *                      of interest, in bytes.
2770  * @param pDstData the destination buffer.
2771  * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2772  *                      interest, in bytes.
2773  * @param nWordCount the total number of pixel words to copy
2774  *
2775  */
2776 template <class Tin, class Tout>
GDALCopyWordsComplexOutT(const Tin * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2777 inline void GDALCopyWordsComplexOutT( const Tin* const CPL_RESTRICT pSrcData,
2778                                       int nSrcPixelStride,
2779                                       Tout* const CPL_RESTRICT pDstData,
2780                                       int nDstPixelStride,
2781                                       GPtrDiff_t nWordCount )
2782 {
2783     decltype(nWordCount) nDstOffset = 0;
2784 
2785     const Tout tOutZero = static_cast<Tout>(0);
2786 
2787     const char* const pSrcDataPtr = reinterpret_cast<const char*>(pSrcData);
2788     char* const pDstDataPtr = reinterpret_cast<char*>(pDstData);
2789 
2790     for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2791     {
2792         const Tin tValue =
2793             *reinterpret_cast<const Tin*>(pSrcDataPtr + n * nSrcPixelStride);
2794         Tout* const pPixelOut =
2795             reinterpret_cast<Tout*>(pDstDataPtr + nDstOffset);
2796         GDALCopyWord(tValue, *pPixelOut);
2797 
2798         pPixelOut[1] = tOutZero;
2799 
2800         nDstOffset += nDstPixelStride;
2801     }
2802 }
2803 
2804 /************************************************************************/
2805 /*                           GDALCopyWordsFromT()                       */
2806 /************************************************************************/
2807 /**
2808  * Template driver function. Given the input type T, call the appropriate
2809  * GDALCopyWordsT function template for the desired output type. You should
2810  * never call this function directly (call GDALCopyWords instead).
2811  *
2812  * @param pSrcData source data buffer
2813  * @param nSrcPixelStride pixel stride in input buffer, in pixel words
2814  * @param bInComplex input is complex
2815  * @param pDstData destination data buffer
2816  * @param eDstType destination data type
2817  * @param nDstPixelStride pixel stride in output buffer, in pixel words
2818  * @param nWordCount number of pixel words to be copied
2819  */
2820 template <class T>
GDALCopyWordsFromT(const T * const CPL_RESTRICT pSrcData,int nSrcPixelStride,bool bInComplex,void * CPL_RESTRICT pDstData,GDALDataType eDstType,int nDstPixelStride,GPtrDiff_t nWordCount)2821 inline void GDALCopyWordsFromT( const T* const CPL_RESTRICT pSrcData,
2822                                 int nSrcPixelStride, bool bInComplex,
2823                                 void * CPL_RESTRICT pDstData,
2824                                 GDALDataType eDstType, int nDstPixelStride,
2825                                 GPtrDiff_t nWordCount )
2826 {
2827     switch (eDstType)
2828     {
2829     case GDT_Byte:
2830         GDALCopyWordsT( pSrcData, nSrcPixelStride,
2831                         static_cast<unsigned char*>(pDstData), nDstPixelStride,
2832                         nWordCount );
2833         break;
2834     case GDT_UInt16:
2835         GDALCopyWordsT( pSrcData, nSrcPixelStride,
2836                         static_cast<unsigned short*>(pDstData), nDstPixelStride,
2837                         nWordCount );
2838         break;
2839     case GDT_Int16:
2840         GDALCopyWordsT( pSrcData, nSrcPixelStride,
2841                         static_cast<short*>(pDstData), nDstPixelStride,
2842                         nWordCount );
2843         break;
2844     case GDT_UInt32:
2845         GDALCopyWordsT( pSrcData, nSrcPixelStride,
2846                         static_cast<unsigned int*>(pDstData), nDstPixelStride,
2847                         nWordCount );
2848         break;
2849     case GDT_Int32:
2850         GDALCopyWordsT( pSrcData, nSrcPixelStride,
2851                         static_cast<int*>(pDstData), nDstPixelStride,
2852                         nWordCount );
2853         break;
2854     case GDT_Float32:
2855         GDALCopyWordsT( pSrcData, nSrcPixelStride,
2856                         static_cast<float*>(pDstData), nDstPixelStride,
2857                         nWordCount );
2858         break;
2859     case GDT_Float64:
2860         GDALCopyWordsT( pSrcData, nSrcPixelStride,
2861                         static_cast<double*>(pDstData), nDstPixelStride,
2862                         nWordCount );
2863         break;
2864     case GDT_CInt16:
2865         if (bInComplex)
2866         {
2867             GDALCopyWordsComplexT(
2868                 pSrcData, nSrcPixelStride,
2869                 static_cast<short *>(pDstData), nDstPixelStride,
2870                 nWordCount );
2871         }
2872         else // input is not complex, so we need to promote to a complex buffer
2873         {
2874             GDALCopyWordsComplexOutT(
2875                 pSrcData, nSrcPixelStride,
2876                 static_cast<short *>(pDstData), nDstPixelStride,
2877                 nWordCount );
2878         }
2879         break;
2880     case GDT_CInt32:
2881         if (bInComplex)
2882         {
2883             GDALCopyWordsComplexT(
2884                 pSrcData, nSrcPixelStride,
2885                 static_cast<int *>(pDstData), nDstPixelStride,
2886                 nWordCount );
2887         }
2888         else // input is not complex, so we need to promote to a complex buffer
2889         {
2890             GDALCopyWordsComplexOutT(
2891                 pSrcData, nSrcPixelStride,
2892                 static_cast<int *>(pDstData), nDstPixelStride,
2893                 nWordCount );
2894         }
2895         break;
2896     case GDT_CFloat32:
2897         if (bInComplex)
2898         {
2899             GDALCopyWordsComplexT(
2900                 pSrcData, nSrcPixelStride,
2901                 static_cast<float *>(pDstData), nDstPixelStride,
2902                 nWordCount );
2903         }
2904         else // input is not complex, so we need to promote to a complex buffer
2905         {
2906             GDALCopyWordsComplexOutT(
2907                 pSrcData, nSrcPixelStride,
2908                 static_cast<float *>(pDstData), nDstPixelStride,
2909                 nWordCount );
2910         }
2911         break;
2912     case GDT_CFloat64:
2913         if (bInComplex)
2914         {
2915             GDALCopyWordsComplexT(
2916                 pSrcData, nSrcPixelStride,
2917                 static_cast<double *>(pDstData), nDstPixelStride,
2918                 nWordCount );
2919         }
2920         else // input is not complex, so we need to promote to a complex buffer
2921         {
2922             GDALCopyWordsComplexOutT(
2923                 pSrcData, nSrcPixelStride,
2924                 static_cast<double *>(pDstData), nDstPixelStride,
2925                 nWordCount );
2926         }
2927         break;
2928     case GDT_Unknown:
2929     default:
2930         CPLAssert(false);
2931     }
2932 }
2933 
2934 } // end anonymous namespace
2935 
2936 /************************************************************************/
2937 /*                          GDALReplicateWord()                         */
2938 /************************************************************************/
2939 
2940 template <class T>
GDALReplicateWordT(void * pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2941 inline void GDALReplicateWordT( void * pDstData,
2942                                 int nDstPixelStride,
2943                                 GPtrDiff_t nWordCount )
2944 {
2945     const T valSet = *static_cast<const T*>(pDstData);
2946     if( nDstPixelStride == static_cast<int>(sizeof(T)) )
2947     {
2948         T* pDstPtr = static_cast<T*>(pDstData) + 1;
2949         while( nWordCount >= 4 )
2950         {
2951             nWordCount -= 4;
2952             pDstPtr[0] = valSet;
2953             pDstPtr[1] = valSet;
2954             pDstPtr[2] = valSet;
2955             pDstPtr[3] = valSet;
2956             pDstPtr += 4;
2957         }
2958         while( nWordCount > 0 )
2959         {
2960             --nWordCount;
2961             *pDstPtr = valSet;
2962             pDstPtr++;
2963         }
2964     }
2965     else
2966     {
2967         GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
2968         while( nWordCount > 0 )
2969         {
2970             --nWordCount;
2971             *reinterpret_cast<T*>(pabyDstPtr) = valSet;
2972             pabyDstPtr += nDstPixelStride;
2973         }
2974     }
2975 }
2976 
2977 static
GDALReplicateWord(const void * CPL_RESTRICT pSrcData,GDALDataType eSrcType,void * CPL_RESTRICT pDstData,GDALDataType eDstType,int nDstPixelStride,GPtrDiff_t nWordCount)2978 void GDALReplicateWord( const void * CPL_RESTRICT pSrcData,
2979                         GDALDataType eSrcType,
2980                         void * CPL_RESTRICT pDstData,
2981                         GDALDataType eDstType,
2982                         int nDstPixelStride,
2983                         GPtrDiff_t nWordCount)
2984 {
2985 /* ----------------------------------------------------------------------- */
2986 /* Special case when the source data is always the same value              */
2987 /* (for VRTSourcedRasterBand::IRasterIO and VRTDerivedRasterBand::IRasterIO*/
2988 /*  for example)                                                           */
2989 /* ----------------------------------------------------------------------- */
2990     // Let the general translation case do the necessary conversions
2991     // on the first destination element.
2992     GDALCopyWords(pSrcData, eSrcType, 0,
2993                   pDstData, eDstType, 0,
2994                   1 );
2995 
2996     // Now copy the first element to the nWordCount - 1 following destination
2997     // elements.
2998     nWordCount--;
2999     GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
3000 
3001     switch (eDstType)
3002     {
3003         case GDT_Byte:
3004         {
3005             if (nDstPixelStride == 1)
3006             {
3007                 if (nWordCount > 0)
3008                     memset(pabyDstWord, *reinterpret_cast<const GByte*>(pDstData), nWordCount);
3009             }
3010             else
3011             {
3012                 GByte valSet = *reinterpret_cast<const GByte*>(pDstData);
3013                 while( nWordCount > 0 )
3014                 {
3015                     --nWordCount;
3016                     *pabyDstWord = valSet;
3017                     pabyDstWord += nDstPixelStride;
3018                 }
3019             }
3020             break;
3021         }
3022 
3023 #define CASE_DUPLICATE_SIMPLE(enum_type, c_type) \
3024         case enum_type:\
3025         { \
3026             GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount); \
3027             break; \
3028         }
3029 
3030         CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
3031         CASE_DUPLICATE_SIMPLE(GDT_Int16,  GInt16)
3032         CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
3033         CASE_DUPLICATE_SIMPLE(GDT_Int32,  GInt32)
3034         CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
3035         CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
3036 
3037 #define CASE_DUPLICATE_COMPLEX(enum_type, c_type) \
3038         case enum_type:\
3039         { \
3040             c_type valSet1 = reinterpret_cast<const c_type*>(pDstData)[0]; \
3041             c_type valSet2 = reinterpret_cast<const c_type*>(pDstData)[1]; \
3042             while( nWordCount > 0 ) \
3043             { \
3044                 --nWordCount; \
3045                 reinterpret_cast<c_type*>(pabyDstWord)[0] = valSet1; \
3046                 reinterpret_cast<c_type*>(pabyDstWord)[1] = valSet2; \
3047                 pabyDstWord += nDstPixelStride; \
3048             } \
3049             break; \
3050         }
3051 
3052         CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
3053         CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
3054         CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
3055         CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
3056 
3057         default:
3058             CPLAssert( false );
3059     }
3060 }
3061 
3062 /************************************************************************/
3063 /*                        GDALUnrolledCopy()                            */
3064 /************************************************************************/
3065 
3066 template<class T, int srcStride, int dstStride>
GDALUnrolledCopyGeneric(T * CPL_RESTRICT pDest,const T * CPL_RESTRICT pSrc,GPtrDiff_t nIters)3067 static inline void GDALUnrolledCopyGeneric( T* CPL_RESTRICT pDest,
3068                                      const T* CPL_RESTRICT pSrc,
3069                                      GPtrDiff_t nIters )
3070 {
3071     if (nIters >= 16)
3072     {
3073         for ( GPtrDiff_t i = nIters / 16; i != 0; i -- )
3074         {
3075             pDest[0*dstStride] = pSrc[0*srcStride];
3076             pDest[1*dstStride] = pSrc[1*srcStride];
3077             pDest[2*dstStride] = pSrc[2*srcStride];
3078             pDest[3*dstStride] = pSrc[3*srcStride];
3079             pDest[4*dstStride] = pSrc[4*srcStride];
3080             pDest[5*dstStride] = pSrc[5*srcStride];
3081             pDest[6*dstStride] = pSrc[6*srcStride];
3082             pDest[7*dstStride] = pSrc[7*srcStride];
3083             pDest[8*dstStride] = pSrc[8*srcStride];
3084             pDest[9*dstStride] = pSrc[9*srcStride];
3085             pDest[10*dstStride] = pSrc[10*srcStride];
3086             pDest[11*dstStride] = pSrc[11*srcStride];
3087             pDest[12*dstStride] = pSrc[12*srcStride];
3088             pDest[13*dstStride] = pSrc[13*srcStride];
3089             pDest[14*dstStride] = pSrc[14*srcStride];
3090             pDest[15*dstStride] = pSrc[15*srcStride];
3091             pDest += 16*dstStride;
3092             pSrc += 16*srcStride;
3093         }
3094         nIters = nIters % 16;
3095     }
3096     for( GPtrDiff_t i = 0; i < nIters; i++ )
3097     {
3098         pDest[i*dstStride] = *pSrc;
3099         pSrc += srcStride;
3100     }
3101 }
3102 
3103 template<class T, int srcStride, int dstStride>
GDALUnrolledCopy(T * CPL_RESTRICT pDest,const T * CPL_RESTRICT pSrc,GPtrDiff_t nIters)3104 static inline void GDALUnrolledCopy( T* CPL_RESTRICT pDest,
3105                                      const T* CPL_RESTRICT pSrc,
3106                                      GPtrDiff_t nIters )
3107 {
3108     GDALUnrolledCopyGeneric<T,srcStride,dstStride>(pDest, pSrc, nIters);
3109 }
3110 
3111 #if (defined(__x86_64) || defined(_M_X64)) &&  !(defined(__GNUC__) && __GNUC__ < 4)
3112 
3113 #ifdef HAVE_SSSE3_AT_COMPILE_TIME
3114 
3115 void GDALUnrolledCopy_GByte_3_1_SSSE3( GByte* CPL_RESTRICT pDest,
3116                                              const GByte* CPL_RESTRICT pSrc,
3117                                              GPtrDiff_t nIters );
3118 #endif
3119 
3120 
GDALUnrolledCopy(GByte * CPL_RESTRICT pDest,const GByte * CPL_RESTRICT pSrc,GPtrDiff_t nIters)3121 template<> void GDALUnrolledCopy<GByte,2,1>( GByte* CPL_RESTRICT pDest,
3122                                              const GByte* CPL_RESTRICT pSrc,
3123                                              GPtrDiff_t nIters )
3124 {
3125     decltype(nIters) i = 0;
3126     if( nIters > 16 )
3127     {
3128         const __m128i xmm_mask = _mm_set1_epi16(0xff);
3129         // If we were sure that there would always be 1 trailing byte, we could
3130         // check against nIters - 15
3131         for ( ; i < nIters - 16; i += 16 )
3132         {
3133             __m128i xmm0 = _mm_loadu_si128( reinterpret_cast<__m128i const*>(pSrc + 0) );
3134             __m128i xmm1 = _mm_loadu_si128( reinterpret_cast<__m128i const*>(pSrc + 16) );
3135             // Set higher 8bit of each int16 packed word to 0
3136             xmm0 = _mm_and_si128(xmm0, xmm_mask);
3137             xmm1 = _mm_and_si128(xmm1, xmm_mask);
3138             // Pack int16 to uint8 and merge back both vector
3139             xmm0 = _mm_packus_epi16(xmm0, xmm1);
3140 
3141             // Store result
3142             _mm_storeu_si128( reinterpret_cast<__m128i*>(pDest + i), xmm0);
3143 
3144             pSrc += 2 * 16;
3145         }
3146     }
3147     for( ; i < nIters; i++ )
3148     {
3149         pDest[i] = *pSrc;
3150         pSrc += 2;
3151     }
3152 }
3153 
3154 
3155 #ifdef HAVE_SSSE3_AT_COMPILE_TIME
3156 
GDALUnrolledCopy(GByte * CPL_RESTRICT pDest,const GByte * CPL_RESTRICT pSrc,GPtrDiff_t nIters)3157 template<> void GDALUnrolledCopy<GByte,3,1>( GByte* CPL_RESTRICT pDest,
3158                                              const GByte* CPL_RESTRICT pSrc,
3159                                              GPtrDiff_t nIters )
3160 {
3161     if( nIters > 16 && CPLHaveRuntimeSSSE3() )
3162     {
3163         GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3164     }
3165     else
3166     {
3167         GDALUnrolledCopyGeneric<GByte,3,1>(pDest, pSrc, nIters);
3168     }
3169 }
3170 
3171 #endif
3172 
GDALUnrolledCopy(GByte * CPL_RESTRICT pDest,const GByte * CPL_RESTRICT pSrc,GPtrDiff_t nIters)3173 template<> void GDALUnrolledCopy<GByte,4,1>( GByte* CPL_RESTRICT pDest,
3174                                              const GByte* CPL_RESTRICT pSrc,
3175                                              GPtrDiff_t nIters )
3176 {
3177     decltype(nIters) i = 0;
3178     if( nIters > 16 )
3179     {
3180         const __m128i xmm_mask = _mm_set1_epi32(0xff);
3181         // If we were sure that there would always be 3 trailing bytes, we could
3182         // check against nIters - 15
3183         for ( ; i < nIters - 16; i += 16 )
3184         {
3185             __m128i xmm0 = _mm_loadu_si128( reinterpret_cast<__m128i const*> (pSrc + 0) );
3186             __m128i xmm1 = _mm_loadu_si128( reinterpret_cast<__m128i const*> (pSrc + 16) );
3187             __m128i xmm2 = _mm_loadu_si128( reinterpret_cast<__m128i const*> (pSrc + 32) );
3188             __m128i xmm3 = _mm_loadu_si128( reinterpret_cast<__m128i const*> (pSrc + 48) );
3189             // Set higher 24bit of each int32 packed word to 0
3190             xmm0 = _mm_and_si128(xmm0, xmm_mask);
3191             xmm1 = _mm_and_si128(xmm1, xmm_mask);
3192             xmm2 = _mm_and_si128(xmm2, xmm_mask);
3193             xmm3 = _mm_and_si128(xmm3, xmm_mask);
3194             // Pack int32 to int16
3195             xmm0 = _mm_packs_epi32(xmm0, xmm1);
3196             xmm2 = _mm_packs_epi32(xmm2, xmm3);
3197             // Pack int16 to uint8
3198             xmm0 = _mm_packus_epi16(xmm0, xmm2);
3199 
3200             // Store result
3201             _mm_storeu_si128( reinterpret_cast<__m128i*>(pDest + i), xmm0);
3202 
3203             pSrc += 4 * 16;
3204         }
3205     }
3206     for( ; i < nIters; i++ )
3207     {
3208         pDest[i] = *pSrc;
3209         pSrc += 4;
3210     }
3211 }
3212 #endif // defined(__x86_64) || defined(_M_X64)
3213 
3214 /************************************************************************/
3215 /*                         GDALFastCopy()                               */
3216 /************************************************************************/
3217 
3218 template<class T>
GDALFastCopy(T * CPL_RESTRICT pDest,int nDestStride,const T * CPL_RESTRICT pSrc,int nSrcStride,GPtrDiff_t nIters)3219 static inline void GDALFastCopy( T* CPL_RESTRICT pDest,
3220                                  int nDestStride,
3221                                  const T* CPL_RESTRICT pSrc,
3222                                  int nSrcStride,
3223                                  GPtrDiff_t nIters )
3224 {
3225     if( nIters == 1 )
3226     {
3227         *pDest = *pSrc;
3228     }
3229     else if( nDestStride == static_cast<int>(sizeof(T)) )
3230     {
3231         if( nSrcStride == static_cast<int>(sizeof(T)) )
3232         {
3233             memcpy(pDest, pSrc, nIters * sizeof(T));
3234         }
3235         else if( nSrcStride == 2 * static_cast<int>(sizeof(T)) )
3236         {
3237             GDALUnrolledCopy<T, 2,1>(pDest, pSrc, nIters);
3238         }
3239         else if( nSrcStride == 3 * static_cast<int>(sizeof(T)) )
3240         {
3241             GDALUnrolledCopy<T, 3,1>(pDest, pSrc, nIters);
3242         }
3243         else if( nSrcStride == 4 * static_cast<int>(sizeof(T)) )
3244         {
3245             GDALUnrolledCopy<T, 4,1>(pDest, pSrc, nIters);
3246         }
3247         else
3248         {
3249             while( nIters-- > 0 )
3250             {
3251                 *pDest = *pSrc;
3252                 pSrc += nSrcStride / static_cast<int>(sizeof(T));
3253                 pDest ++;
3254             }
3255         }
3256     }
3257     else if( nSrcStride == static_cast<int>(sizeof(T))  )
3258     {
3259         if( nDestStride == 2 * static_cast<int>(sizeof(T)) )
3260         {
3261             GDALUnrolledCopy<T, 1,2>(pDest, pSrc, nIters);
3262         }
3263         else if( nDestStride == 3 * static_cast<int>(sizeof(T)) )
3264         {
3265             GDALUnrolledCopy<T, 1,3>(pDest, pSrc, nIters);
3266         }
3267         else if( nDestStride == 4 * static_cast<int>(sizeof(T))  )
3268         {
3269             GDALUnrolledCopy<T, 1,4>(pDest, pSrc, nIters);
3270         }
3271         else
3272         {
3273             while( nIters-- > 0 )
3274             {
3275                 *pDest = *pSrc;
3276                 pSrc ++;
3277                 pDest += nDestStride / static_cast<int>(sizeof(T));
3278             }
3279         }
3280     }
3281     else
3282     {
3283         while( nIters-- > 0 )
3284         {
3285             *pDest = *pSrc;
3286             pSrc += nSrcStride / static_cast<int>(sizeof(T));
3287             pDest += nDestStride / static_cast<int>(sizeof(T));
3288         }
3289     }
3290 }
3291 
3292 /************************************************************************/
3293 /*                         GDALFastCopyByte()                           */
3294 /************************************************************************/
3295 
GDALFastCopyByte(const GByte * CPL_RESTRICT pSrcData,int nSrcPixelStride,GByte * CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)3296 static void GDALFastCopyByte( const GByte * CPL_RESTRICT pSrcData,
3297                               int nSrcPixelStride,
3298                               GByte * CPL_RESTRICT pDstData,
3299                               int nDstPixelStride,
3300                               GPtrDiff_t nWordCount )
3301 {
3302     GDALFastCopy( pDstData, nDstPixelStride,
3303                   pSrcData, nSrcPixelStride, nWordCount );
3304 }
3305 
3306 /************************************************************************/
3307 /*                           GDALCopyWords()                            */
3308 /************************************************************************/
3309 
3310 /**
3311  * Copy pixel words from buffer to buffer.
3312  *
3313  * @see GDALCopyWords64()
3314  */
3315 void CPL_STDCALL
GDALCopyWords(const void * CPL_RESTRICT pSrcData,GDALDataType eSrcType,int nSrcPixelStride,void * CPL_RESTRICT pDstData,GDALDataType eDstType,int nDstPixelStride,int nWordCount)3316 GDALCopyWords( const void * CPL_RESTRICT pSrcData,
3317                GDALDataType eSrcType,
3318                int nSrcPixelStride,
3319                void * CPL_RESTRICT pDstData,
3320                GDALDataType eDstType,
3321                int nDstPixelStride,
3322                int nWordCount )
3323 {
3324     GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride,
3325                     pDstData, eDstType, nDstPixelStride,
3326                     nWordCount);
3327 }
3328 
3329 /************************************************************************/
3330 /*                          GDALCopyWords64()                           */
3331 /************************************************************************/
3332 
3333 /**
3334  * Copy pixel words from buffer to buffer.
3335  *
3336  * This function is used to copy pixel word values from one memory buffer
3337  * to another, with support for conversion between data types, and differing
3338  * step factors.  The data type conversion is done using the normal GDAL
3339  * rules.  Values assigned to a lower range integer type are clipped.  For
3340  * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values
3341  * less the 0 to be set to 0, and values larger than 255 to be set to 255.
3342  * Assignment from floating point to integer uses default C type casting
3343  * semantics.   Assignment from non-complex to complex will result in the
3344  * imaginary part being set to zero on output.  Assignment from complex to
3345  * non-complex will result in the complex portion being lost and the real
3346  * component being preserved (<i>not magnitude!</i>).
3347  *
3348  * No assumptions are made about the source or destination words occurring
3349  * on word boundaries.  It is assumed that all values are in native machine
3350  * byte order.
3351  *
3352  * @param pSrcData Pointer to source data to be converted.
3353  * @param eSrcType the source data type (see GDALDataType enum)
3354  * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
3355  * in bytes
3356  * @param pDstData Pointer to buffer where destination data should go
3357  * @param eDstType the destination data type (see GDALDataType enum)
3358  * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
3359  * words), in bytes
3360  * @param nWordCount number of words to be copied
3361  *
3362  * @note
3363  * When adding a new data type to GDAL, you must do the following to
3364  * support it properly within the GDALCopyWords function:
3365  * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
3366  *    This should invoke the appropriate GDALCopyWordsFromT wrapper.
3367  * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
3368  *    This should call the appropriate GDALCopyWordsT template.
3369  * 3. If appropriate, overload the appropriate CopyWord template in the
3370  *    above namespace. This will ensure that any conversion issues are
3371  *    handled (cases like the float -> int32 case, where the min/max)
3372  *    values are subject to roundoff error.
3373  */
3374 
3375 void CPL_STDCALL
GDALCopyWords64(const void * CPL_RESTRICT pSrcData,GDALDataType eSrcType,int nSrcPixelStride,void * CPL_RESTRICT pDstData,GDALDataType eDstType,int nDstPixelStride,GPtrDiff_t nWordCount)3376 GDALCopyWords64( const void * CPL_RESTRICT pSrcData,
3377                GDALDataType eSrcType,
3378                int nSrcPixelStride,
3379                void * CPL_RESTRICT pDstData,
3380                GDALDataType eDstType,
3381                int nDstPixelStride,
3382                GPtrDiff_t nWordCount )
3383 
3384 {
3385     // On platforms where alignment matters, be careful
3386     const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
3387 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
3388     const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
3389     if( !(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
3390         ( (reinterpret_cast<GPtrDiff_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
3391           (reinterpret_cast<GPtrDiff_t>(pDstData) % nDstDataTypeSize) != 0 ||
3392           ( nSrcPixelStride % nSrcDataTypeSize) != 0 ||
3393           ( nDstPixelStride % nDstDataTypeSize) != 0 ) )
3394     {
3395         if( eSrcType == eDstType )
3396         {
3397             for( decltype(nWordCount) i = 0; i < nWordCount; i++ )
3398             {
3399                 memcpy( static_cast<GByte*>(pDstData) + nDstPixelStride * i,
3400                         static_cast<const GByte*>(pSrcData) + nSrcPixelStride * i,
3401                         nDstDataTypeSize );
3402             }
3403         }
3404         else
3405         {
3406 #define ALIGN_PTR(ptr, align) ((ptr) + ((align) - (reinterpret_cast<size_t>(ptr) % (align))) % (align))
3407             // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
3408             // be sure to get correctly aligned pointer.
3409             GByte abySrcBuffer[32];
3410             GByte abyDstBuffer[32];
3411             GByte* pabySrcBuffer = ALIGN_PTR(abySrcBuffer, nSrcDataTypeSize);
3412             GByte* pabyDstBuffer = ALIGN_PTR(abyDstBuffer, nDstDataTypeSize);
3413             for( decltype(nWordCount) i = 0; i < nWordCount; i++ )
3414             {
3415                 memcpy( pabySrcBuffer,
3416                         static_cast<const GByte*>(pSrcData) + nSrcPixelStride * i,
3417                         nSrcDataTypeSize );
3418                 GDALCopyWords64( pabySrcBuffer,
3419                                eSrcType,
3420                                0,
3421                                pabyDstBuffer,
3422                                eDstType,
3423                                0,
3424                                1 );
3425                 memcpy( static_cast<GByte*>(pDstData) + nDstPixelStride * i,
3426                         pabyDstBuffer,
3427                         nDstDataTypeSize );
3428             }
3429         }
3430         return;
3431     }
3432 #endif
3433 
3434     // Deal with the case where we're replicating a single word into the
3435     // provided buffer
3436     if (nSrcPixelStride == 0 && nWordCount > 1)
3437     {
3438         GDALReplicateWord( pSrcData, eSrcType, pDstData,
3439                            eDstType, nDstPixelStride, nWordCount );
3440         return;
3441     }
3442 
3443     if (eSrcType == eDstType)
3444     {
3445         if( eSrcType == GDT_Byte )
3446         {
3447             GDALFastCopy(
3448                 static_cast<GByte*>(pDstData), nDstPixelStride,
3449                 static_cast<const GByte*>(pSrcData), nSrcPixelStride, nWordCount );
3450             return;
3451         }
3452 
3453         if( nSrcDataTypeSize == 2 && (nSrcPixelStride%2) == 0 &&
3454             (nDstPixelStride%2) == 0 )
3455         {
3456             GDALFastCopy(
3457                 static_cast<short*>(pDstData), nDstPixelStride,
3458                 static_cast<const short*>(pSrcData), nSrcPixelStride, nWordCount );
3459             return;
3460         }
3461 
3462         if( nWordCount == 1 )
3463         {
3464 #ifdef CSA_BUILD
3465             // Avoid false positives...
3466             memcpy(pDstData, pSrcData, nSrcDataTypeSize);
3467 #else
3468             if( nSrcDataTypeSize == 2 )
3469                 memcpy(pDstData, pSrcData, 2);
3470             else if( nSrcDataTypeSize == 4 )
3471                 memcpy(pDstData, pSrcData, 4);
3472             else if( nSrcDataTypeSize == 8 )
3473                 memcpy(pDstData, pSrcData, 8 );
3474             else /* if( eSrcType == GDT_CFloat64 ) */
3475                 memcpy(pDstData, pSrcData, 16);
3476 #endif
3477             return;
3478         }
3479 
3480         // Let memcpy() handle the case where we're copying a packed buffer
3481         // of pixels.
3482         if( nSrcPixelStride == nDstPixelStride )
3483         {
3484             if( nSrcPixelStride == nSrcDataTypeSize)
3485             {
3486                 memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
3487                 return;
3488             }
3489         }
3490     }
3491 
3492     // Handle the more general case -- deals with conversion of data types
3493     // directly.
3494     switch (eSrcType)
3495     {
3496     case GDT_Byte:
3497         GDALCopyWordsFromT<unsigned char>(
3498             static_cast<const unsigned char *>(pSrcData),
3499             nSrcPixelStride, false,
3500             pDstData, eDstType, nDstPixelStride,
3501             nWordCount );
3502         break;
3503     case GDT_UInt16:
3504         GDALCopyWordsFromT<unsigned short>(
3505             static_cast<const unsigned short *>(pSrcData),
3506             nSrcPixelStride, false,
3507             pDstData, eDstType, nDstPixelStride,
3508             nWordCount );
3509         break;
3510     case GDT_Int16:
3511         GDALCopyWordsFromT<short>(
3512             static_cast<const short *>(pSrcData), nSrcPixelStride, false,
3513             pDstData, eDstType, nDstPixelStride,
3514             nWordCount );
3515         break;
3516     case GDT_UInt32:
3517         GDALCopyWordsFromT<unsigned int>(
3518             static_cast<const unsigned int *>(pSrcData), nSrcPixelStride, false,
3519             pDstData, eDstType, nDstPixelStride,
3520             nWordCount );
3521         break;
3522     case GDT_Int32:
3523         GDALCopyWordsFromT<int>(
3524             static_cast<const int *>(pSrcData), nSrcPixelStride, false,
3525             pDstData, eDstType, nDstPixelStride,
3526             nWordCount );
3527         break;
3528     case GDT_Float32:
3529         GDALCopyWordsFromT<float>(
3530             static_cast<const float *>(pSrcData), nSrcPixelStride, false,
3531             pDstData, eDstType, nDstPixelStride,
3532             nWordCount );
3533         break;
3534     case GDT_Float64:
3535         GDALCopyWordsFromT<double>(
3536             static_cast<const double *>(pSrcData), nSrcPixelStride, false,
3537             pDstData, eDstType, nDstPixelStride,
3538             nWordCount );
3539         break;
3540     case GDT_CInt16:
3541         GDALCopyWordsFromT<short>(
3542             static_cast<const short *>(pSrcData), nSrcPixelStride, true,
3543             pDstData, eDstType, nDstPixelStride,
3544             nWordCount );
3545         break;
3546     case GDT_CInt32:
3547         GDALCopyWordsFromT<int>(
3548             static_cast<const int *>(pSrcData), nSrcPixelStride, true,
3549             pDstData, eDstType, nDstPixelStride,
3550             nWordCount );
3551         break;
3552     case GDT_CFloat32:
3553         GDALCopyWordsFromT<float>(
3554             static_cast<const float *>(pSrcData), nSrcPixelStride, true,
3555             pDstData, eDstType, nDstPixelStride,
3556             nWordCount );
3557         break;
3558     case GDT_CFloat64:
3559         GDALCopyWordsFromT<double>(
3560             static_cast<const double *>(pSrcData), nSrcPixelStride, true,
3561             pDstData, eDstType, nDstPixelStride,
3562             nWordCount );
3563         break;
3564     case GDT_Unknown:
3565     default:
3566         CPLAssert(false);
3567     }
3568 }
3569 
3570 /************************************************************************/
3571 /*                            GDALCopyBits()                            */
3572 /************************************************************************/
3573 
3574 /**
3575  * Bitwise word copying.
3576  *
3577  * A function for moving sets of partial bytes around.  Loosely
3578  * speaking this is a bitwise analog to GDALCopyWords().
3579  *
3580  * It copies nStepCount "words" where each word is nBitCount bits long.
3581  * The nSrcStep and nDstStep are the number of bits from the start of one
3582  * word to the next (same as nBitCount if they are packed).  The nSrcOffset
3583  * and nDstOffset are the offset into the source and destination buffers
3584  * to start at, also measured in bits.
3585  *
3586  * All bit offsets are assumed to start from the high order bit in a byte
3587  * (i.e. most significant bit first).  Currently this function is not very
3588  * optimized, but it may be improved for some common cases in the future
3589  * as needed.
3590  *
3591  * @param pabySrcData the source data buffer.
3592  * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
3593  * first word to copy.
3594  * @param nSrcStep the offset in bits from the start one source word to the
3595  * start of the next.
3596  * @param pabyDstData the destination data buffer.
3597  * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
3598  * first word to copy over.
3599  * @param nDstStep the offset in bits from the start one word to the
3600  * start of the next.
3601  * @param nBitCount the number of bits in a word to be copied.
3602  * @param nStepCount the number of words to copy.
3603  */
3604 
GDALCopyBits(const GByte * pabySrcData,int nSrcOffset,int nSrcStep,GByte * pabyDstData,int nDstOffset,int nDstStep,int nBitCount,int nStepCount)3605 void GDALCopyBits( const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
3606                    GByte *pabyDstData, int nDstOffset, int nDstStep,
3607                    int nBitCount, int nStepCount )
3608 
3609 {
3610     VALIDATE_POINTER0( pabySrcData, "GDALCopyBits" );
3611 
3612     for( int iStep = 0; iStep < nStepCount; iStep++ )
3613     {
3614         for( int iBit = 0; iBit < nBitCount; iBit++ )
3615         {
3616             if( pabySrcData[nSrcOffset>>3]
3617                 & (0x80 >>(nSrcOffset & 7)) )
3618                 pabyDstData[nDstOffset>>3] |= (0x80 >> (nDstOffset & 7));
3619             else
3620                 pabyDstData[nDstOffset>>3] &= ~(0x80 >> (nDstOffset & 7));
3621 
3622             nSrcOffset++;
3623             nDstOffset++;
3624         }
3625 
3626         nSrcOffset += (nSrcStep - nBitCount);
3627         nDstOffset += (nDstStep - nBitCount);
3628     }
3629 }
3630 
3631 /************************************************************************/
3632 /*                    GDALGetBestOverviewLevel()                        */
3633 /*                                                                      */
3634 /* Returns the best overview level to satisfy the query or -1 if none   */
3635 /* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when        */
3636 /* returning a valid overview level                                     */
3637 /************************************************************************/
3638 
GDALBandGetBestOverviewLevel(GDALRasterBand * poBand,int & nXOff,int & nYOff,int & nXSize,int & nYSize,int nBufXSize,int nBufYSize)3639 int GDALBandGetBestOverviewLevel( GDALRasterBand* poBand,
3640                                   int &nXOff, int &nYOff,
3641                                   int &nXSize, int &nYSize,
3642                                   int nBufXSize, int nBufYSize )
3643 {
3644     return GDALBandGetBestOverviewLevel2( poBand, nXOff, nYOff, nXSize, nYSize,
3645                                           nBufXSize, nBufYSize, nullptr );
3646 }
3647 
GDALBandGetBestOverviewLevel2(GDALRasterBand * poBand,int & nXOff,int & nYOff,int & nXSize,int & nYSize,int nBufXSize,int nBufYSize,GDALRasterIOExtraArg * psExtraArg)3648 int GDALBandGetBestOverviewLevel2( GDALRasterBand* poBand,
3649                                    int &nXOff, int &nYOff,
3650                                    int &nXSize, int &nYSize,
3651                                    int nBufXSize, int nBufYSize,
3652                                    GDALRasterIOExtraArg* psExtraArg )
3653 {
3654     double dfDesiredResolution = 0.0;
3655 /* -------------------------------------------------------------------- */
3656 /*      Compute the desired resolution.  The resolution is              */
3657 /*      based on the least reduced axis, and represents the number      */
3658 /*      of source pixels to one destination pixel.                      */
3659 /* -------------------------------------------------------------------- */
3660     if( (nXSize / static_cast<double>(nBufXSize)) <
3661         (nYSize / static_cast<double>(nBufYSize))
3662         || nBufYSize == 1 )
3663         dfDesiredResolution = nXSize / static_cast<double>( nBufXSize );
3664     else
3665         dfDesiredResolution = nYSize / static_cast<double>( nBufYSize );
3666 
3667 /* -------------------------------------------------------------------- */
3668 /*      Find the overview level that largest resolution value (most     */
3669 /*      downsampled) that is still less than (or only a little more)    */
3670 /*      downsampled than the request.                                   */
3671 /* -------------------------------------------------------------------- */
3672     int nOverviewCount = poBand->GetOverviewCount();
3673     GDALRasterBand* poBestOverview = nullptr;
3674     double dfBestResolution = 0;
3675     int nBestOverviewLevel = -1;
3676 
3677     for( int iOverview = 0; iOverview < nOverviewCount; iOverview++ )
3678     {
3679         GDALRasterBand *poOverview = poBand->GetOverview( iOverview );
3680         if (poOverview == nullptr ||
3681             poOverview->GetXSize() > poBand->GetXSize() ||
3682             poOverview->GetYSize() > poBand->GetYSize() )
3683         {
3684             continue;
3685         }
3686 
3687         double dfResolution = 0.0;
3688 
3689         // What resolution is this?
3690         if( (poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()))
3691             < (poBand->GetYSize() /
3692                static_cast<double>(poOverview->GetYSize())) )
3693             dfResolution =
3694                 poBand->GetXSize() /
3695                 static_cast<double>( poOverview->GetXSize() );
3696         else
3697             dfResolution =
3698                 poBand->GetYSize() /
3699                 static_cast<double>( poOverview->GetYSize() );
3700 
3701         // Is it nearly the requested resolution and better (lower) than
3702         // the current best resolution?
3703         if( dfResolution >= dfDesiredResolution * 1.2
3704             || dfResolution <= dfBestResolution )
3705             continue;
3706 
3707         // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
3708         const char *pszResampling =
3709             poOverview->GetMetadataItem( "RESAMPLING" );
3710 
3711         if( pszResampling != nullptr &&
3712             STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2") )
3713             continue;
3714 
3715         // OK, this is our new best overview.
3716         poBestOverview = poOverview;
3717         nBestOverviewLevel = iOverview;
3718         dfBestResolution = dfResolution;
3719     }
3720 
3721 /* -------------------------------------------------------------------- */
3722 /*      If we didn't find an overview that helps us, just return        */
3723 /*      indicating failure and the full resolution image will be used.  */
3724 /* -------------------------------------------------------------------- */
3725     if( nBestOverviewLevel < 0 )
3726         return -1;
3727 
3728 /* -------------------------------------------------------------------- */
3729 /*      Recompute the source window in terms of the selected            */
3730 /*      overview.                                                       */
3731 /* -------------------------------------------------------------------- */
3732     const double dfXRes =
3733         poBand->GetXSize() / static_cast<double>( poBestOverview->GetXSize() );
3734     const double dfYRes =
3735         poBand->GetYSize() / static_cast<double>( poBestOverview->GetYSize() );
3736 
3737     const int nOXOff = std::min( poBestOverview->GetXSize()-1,
3738                                  static_cast<int>(nXOff / dfXRes + 0.5));
3739     const int nOYOff = std::min( poBestOverview->GetYSize()-1,
3740                                  static_cast<int>(nYOff / dfYRes + 0.5));
3741     int nOXSize = std::max(1, static_cast<int>(nXSize / dfXRes + 0.5));
3742     int nOYSize = std::max(1, static_cast<int>(nYSize / dfYRes + 0.5));
3743     if( nOXOff + nOXSize > poBestOverview->GetXSize() )
3744         nOXSize = poBestOverview->GetXSize() - nOXOff;
3745     if( nOYOff + nOYSize > poBestOverview->GetYSize() )
3746         nOYSize = poBestOverview->GetYSize() - nOYOff;
3747 
3748     nXOff = nOXOff;
3749     nYOff = nOYOff;
3750     nXSize = nOXSize;
3751     nYSize = nOYSize;
3752 
3753     if( psExtraArg && psExtraArg->bFloatingPointWindowValidity )
3754     {
3755         psExtraArg->dfXOff /= dfXRes;
3756         psExtraArg->dfXSize /= dfXRes;
3757         psExtraArg->dfYOff /= dfYRes;
3758         psExtraArg->dfYSize /= dfYRes;
3759     }
3760 
3761     return nBestOverviewLevel;
3762 }
3763 
3764 /************************************************************************/
3765 /*                          OverviewRasterIO()                          */
3766 /*                                                                      */
3767 /*      Special work function to utilize available overviews to         */
3768 /*      more efficiently satisfy downsampled requests.  It will         */
3769 /*      return CE_Failure if there are no appropriate overviews         */
3770 /*      available but it doesn't emit any error messages.               */
3771 /************************************************************************/
3772 
3773 //! @cond Doxygen_Suppress
OverviewRasterIO(GDALRWFlag eRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,GSpacing nPixelSpace,GSpacing nLineSpace,GDALRasterIOExtraArg * psExtraArg)3774 CPLErr GDALRasterBand::OverviewRasterIO( GDALRWFlag eRWFlag,
3775                                 int nXOff, int nYOff, int nXSize, int nYSize,
3776                                 void * pData, int nBufXSize, int nBufYSize,
3777                                 GDALDataType eBufType,
3778                                 GSpacing nPixelSpace, GSpacing nLineSpace,
3779                                 GDALRasterIOExtraArg* psExtraArg )
3780 
3781 {
3782     GDALRasterIOExtraArg sExtraArg;
3783     GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3784 
3785     const int nOverview =
3786         GDALBandGetBestOverviewLevel2( this, nXOff, nYOff, nXSize, nYSize,
3787                                        nBufXSize, nBufYSize, &sExtraArg );
3788     if (nOverview < 0)
3789         return CE_Failure;
3790 
3791 /* -------------------------------------------------------------------- */
3792 /*      Recast the call in terms of the new raster layer.               */
3793 /* -------------------------------------------------------------------- */
3794     GDALRasterBand* poOverviewBand = GetOverview(nOverview);
3795     if (poOverviewBand == nullptr)
3796         return CE_Failure;
3797 
3798     return poOverviewBand->RasterIO( eRWFlag, nXOff, nYOff, nXSize, nYSize,
3799                                      pData, nBufXSize, nBufYSize, eBufType,
3800                                      nPixelSpace, nLineSpace, &sExtraArg );
3801 }
3802 
3803 /************************************************************************/
3804 /*                      TryOverviewRasterIO()                           */
3805 /************************************************************************/
3806 
TryOverviewRasterIO(GDALRWFlag eRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,GSpacing nPixelSpace,GSpacing nLineSpace,GDALRasterIOExtraArg * psExtraArg,int * pbTried)3807 CPLErr GDALRasterBand::TryOverviewRasterIO( GDALRWFlag eRWFlag,
3808                                             int nXOff, int nYOff, int nXSize, int nYSize,
3809                                             void * pData, int nBufXSize, int nBufYSize,
3810                                             GDALDataType eBufType,
3811                                             GSpacing nPixelSpace, GSpacing nLineSpace,
3812                                             GDALRasterIOExtraArg* psExtraArg,
3813                                             int* pbTried )
3814 {
3815     int nXOffMod = nXOff;
3816     int nYOffMod = nYOff;
3817     int nXSizeMod = nXSize;
3818     int nYSizeMod = nYSize;
3819     GDALRasterIOExtraArg sExtraArg;
3820 
3821     GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3822 
3823     int iOvrLevel = GDALBandGetBestOverviewLevel2( this,
3824                                                    nXOffMod, nYOffMod,
3825                                                    nXSizeMod, nYSizeMod,
3826                                                    nBufXSize, nBufYSize,
3827                                                    &sExtraArg );
3828 
3829     if( iOvrLevel >= 0 )
3830     {
3831         GDALRasterBand* poOverviewBand = GetOverview(iOvrLevel);
3832         if( poOverviewBand  )
3833         {
3834             *pbTried = TRUE;
3835             return poOverviewBand->RasterIO(
3836                 eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod,
3837                 pData, nBufXSize, nBufYSize, eBufType,
3838                 nPixelSpace, nLineSpace, &sExtraArg);
3839         }
3840     }
3841 
3842     *pbTried = FALSE;
3843     return CE_None;
3844 }
3845 
3846 /************************************************************************/
3847 /*                      TryOverviewRasterIO()                           */
3848 /************************************************************************/
3849 
TryOverviewRasterIO(GDALRWFlag eRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,int nBandCount,int * panBandMap,GSpacing nPixelSpace,GSpacing nLineSpace,GSpacing nBandSpace,GDALRasterIOExtraArg * psExtraArg,int * pbTried)3850 CPLErr GDALDataset::TryOverviewRasterIO( GDALRWFlag eRWFlag,
3851                                          int nXOff, int nYOff, int nXSize, int nYSize,
3852                                          void * pData, int nBufXSize, int nBufYSize,
3853                                          GDALDataType eBufType,
3854                                          int nBandCount, int *panBandMap,
3855                                          GSpacing nPixelSpace, GSpacing nLineSpace,
3856                                          GSpacing nBandSpace,
3857                                          GDALRasterIOExtraArg* psExtraArg,
3858                                          int* pbTried )
3859 {
3860     int nXOffMod = nXOff;
3861     int nYOffMod = nYOff;
3862     int nXSizeMod = nXSize;
3863     int nYSizeMod = nYSize;
3864     GDALRasterIOExtraArg sExtraArg;
3865     GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3866 
3867     int iOvrLevel = GDALBandGetBestOverviewLevel2( papoBands[0],
3868                                                    nXOffMod, nYOffMod,
3869                                                    nXSizeMod, nYSizeMod,
3870                                                    nBufXSize, nBufYSize,
3871                                                    &sExtraArg );
3872 
3873     if( iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
3874         papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr )
3875     {
3876         *pbTried = TRUE;
3877         return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
3878             eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod,
3879             pData, nBufXSize, nBufYSize, eBufType,
3880             nBandCount, panBandMap, nPixelSpace, nLineSpace, nBandSpace,
3881             &sExtraArg );
3882     }
3883     else
3884     {
3885         *pbTried = FALSE;
3886         return CE_None;
3887     }
3888 }
3889 
3890 /************************************************************************/
3891 /*                        GetBestOverviewLevel()                        */
3892 /*                                                                      */
3893 /* Returns the best overview level to satisfy the query or -1 if none   */
3894 /* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid     */
3895 /* overview level                                                       */
3896 /************************************************************************/
3897 
3898 static
GDALDatasetGetBestOverviewLevel(GDALDataset * poDS,int & nXOff,int & nYOff,int & nXSize,int & nYSize,int nBufXSize,int nBufYSize,int nBandCount,int * panBandMap,GDALRasterIOExtraArg * psExtraArg)3899 int GDALDatasetGetBestOverviewLevel( GDALDataset* poDS,
3900                                      int &nXOff, int &nYOff,
3901                                      int &nXSize, int &nYSize,
3902                                      int nBufXSize, int nBufYSize,
3903                                      int nBandCount, int *panBandMap,
3904                                      GDALRasterIOExtraArg* psExtraArg )
3905 {
3906     int nOverviewCount = 0;
3907     GDALRasterBand *poFirstBand = nullptr;
3908 
3909 /* -------------------------------------------------------------------- */
3910 /* Check that all bands have the same number of overviews and           */
3911 /* that they have all the same size and block dimensions                */
3912 /* -------------------------------------------------------------------- */
3913     for( int iBand = 0; iBand < nBandCount; iBand++ )
3914     {
3915         GDALRasterBand *poBand = poDS->GetRasterBand( panBandMap[iBand] );
3916         if ( poBand == nullptr )
3917             return -1;
3918         if (iBand == 0)
3919         {
3920             poFirstBand = poBand;
3921             nOverviewCount = poBand->GetOverviewCount();
3922         }
3923         else if (nOverviewCount != poBand->GetOverviewCount())
3924         {
3925             CPLDebug( "GDAL",
3926                       "GDALDataset::GetBestOverviewLevel() ... "
3927                       "mismatched overview count, use std method." );
3928             return -1;
3929         }
3930         else
3931         {
3932             for( int iOverview = 0; iOverview < nOverviewCount; iOverview++ )
3933             {
3934                 GDALRasterBand* poOvrBand =
3935                     poBand->GetOverview(iOverview);
3936                 GDALRasterBand* poOvrFirstBand =
3937                     poFirstBand->GetOverview(iOverview);
3938                 if ( poOvrBand == nullptr || poOvrFirstBand == nullptr)
3939                     continue;
3940 
3941                 if ( poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
3942                      poOvrFirstBand->GetYSize() != poOvrBand->GetYSize() )
3943                 {
3944                     CPLDebug( "GDAL",
3945                               "GDALDataset::GetBestOverviewLevel() ... "
3946                               "mismatched overview sizes, use std method." );
3947                     return -1;
3948                 }
3949                 int nBlockXSizeFirst = 0;
3950                 int nBlockYSizeFirst = 0;
3951                 poOvrFirstBand->GetBlockSize( &nBlockXSizeFirst,
3952                                               &nBlockYSizeFirst );
3953 
3954                 int nBlockXSizeCurrent = 0;
3955                 int nBlockYSizeCurrent = 0;
3956                 poOvrBand->GetBlockSize( &nBlockXSizeCurrent,
3957                                          &nBlockYSizeCurrent );
3958 
3959                 if (nBlockXSizeFirst != nBlockXSizeCurrent ||
3960                     nBlockYSizeFirst != nBlockYSizeCurrent)
3961                 {
3962                     CPLDebug(
3963                         "GDAL",
3964                         "GDALDataset::GetBestOverviewLevel() ... "
3965                         "mismatched block sizes, use std method." );
3966                     return -1;
3967                 }
3968             }
3969         }
3970     }
3971     if( poFirstBand == nullptr )
3972         return -1;
3973 
3974     return GDALBandGetBestOverviewLevel2( poFirstBand,
3975                                           nXOff, nYOff, nXSize, nYSize,
3976                                           nBufXSize, nBufYSize, psExtraArg );
3977 }
3978 
3979 /************************************************************************/
3980 /*                         BlockBasedRasterIO()                         */
3981 /*                                                                      */
3982 /*      This convenience function implements a dataset level            */
3983 /*      RasterIO() interface based on calling down to fetch blocks,     */
3984 /*      much like the GDALRasterBand::IRasterIO(), but it handles       */
3985 /*      all bands at once, so that a format driver that handles a       */
3986 /*      request for different bands of the same block efficiently       */
3987 /*      (i.e. without re-reading interleaved data) will efficiently.    */
3988 /*                                                                      */
3989 /*      This method is intended to be called by an overridden           */
3990 /*      IRasterIO() method in the driver specific GDALDataset           */
3991 /*      derived class.                                                  */
3992 /*                                                                      */
3993 /*      Default internal implementation of RasterIO() ... utilizes      */
3994 /*      the Block access methods to satisfy the request.  This would    */
3995 /*      normally only be overridden by formats with overviews.          */
3996 /*                                                                      */
3997 /*      To keep things relatively simple, this method does not          */
3998 /*      currently take advantage of some special cases addressed in     */
3999 /*      GDALRasterBand::IRasterIO(), so it is likely best to only       */
4000 /*      call it when you know it will help.  That is in cases where     */
4001 /*      data is at 1:1 to the buffer, and you know the driver is        */
4002 /*      implementing interleaved IO efficiently on a block by block     */
4003 /*      basis. Overviews will be used when possible.                    */
4004 /************************************************************************/
4005 
4006 CPLErr
BlockBasedRasterIO(GDALRWFlag eRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,int nBandCount,int * panBandMap,GSpacing nPixelSpace,GSpacing nLineSpace,GSpacing nBandSpace,GDALRasterIOExtraArg * psExtraArg)4007 GDALDataset::BlockBasedRasterIO( GDALRWFlag eRWFlag,
4008                                  int nXOff, int nYOff, int nXSize, int nYSize,
4009                                  void * pData, int nBufXSize, int nBufYSize,
4010                                  GDALDataType eBufType,
4011                                  int nBandCount, int *panBandMap,
4012                                  GSpacing nPixelSpace, GSpacing nLineSpace,
4013                                  GSpacing nBandSpace,
4014                                  GDALRasterIOExtraArg* psExtraArg )
4015 
4016 {
4017     CPLAssert( nullptr != pData );
4018 
4019     GByte **papabySrcBlock = nullptr;
4020     GDALRasterBlock *poBlock = nullptr;
4021     GDALRasterBlock **papoBlocks = nullptr;
4022     int nLBlockX = -1;
4023     int nLBlockY = -1;
4024     int iBufYOff;
4025     int iBufXOff;
4026     int nBlockXSize = 1;
4027     int nBlockYSize = 1;
4028     CPLErr eErr = CE_None;
4029     GDALDataType eDataType = GDT_Byte;
4030 
4031     const bool bUseIntegerRequestCoords =
4032            (!psExtraArg->bFloatingPointWindowValidity ||
4033             (nXOff == psExtraArg->dfXOff &&
4034              nYOff == psExtraArg->dfYOff &&
4035              nXSize == psExtraArg->dfXSize &&
4036              nYSize == psExtraArg->dfYSize));
4037 
4038 /* -------------------------------------------------------------------- */
4039 /*      Ensure that all bands share a common block size and data type.  */
4040 /* -------------------------------------------------------------------- */
4041     for( int iBand = 0; iBand < nBandCount; iBand++ )
4042     {
4043         GDALRasterBand *poBand = GetRasterBand( panBandMap[iBand] );
4044 
4045         if( iBand == 0 )
4046         {
4047             poBand->GetBlockSize( &nBlockXSize, &nBlockYSize );
4048             eDataType = poBand->GetRasterDataType();
4049         }
4050         else
4051         {
4052           int nThisBlockXSize = 0;
4053           int nThisBlockYSize = 0;
4054             poBand->GetBlockSize( &nThisBlockXSize, &nThisBlockYSize );
4055             if( nThisBlockXSize != nBlockXSize
4056                 || nThisBlockYSize != nBlockYSize )
4057             {
4058                 CPLDebug( "GDAL",
4059                           "GDALDataset::BlockBasedRasterIO() ... "
4060                           "mismatched block sizes, use std method." );
4061                 return BandBasedRasterIO( eRWFlag,
4062                                           nXOff, nYOff, nXSize, nYSize,
4063                                           pData, nBufXSize, nBufYSize,
4064                                           eBufType,
4065                                           nBandCount, panBandMap,
4066                                           nPixelSpace, nLineSpace,
4067                                           nBandSpace, psExtraArg );
4068             }
4069 
4070             if( eDataType != poBand->GetRasterDataType()
4071                 && (nXSize != nBufXSize || nYSize != nBufYSize) )
4072             {
4073                 CPLDebug( "GDAL",
4074                           "GDALDataset::BlockBasedRasterIO() ... "
4075                           "mismatched band data types, use std method." );
4076                 return BandBasedRasterIO( eRWFlag,
4077                                           nXOff, nYOff, nXSize, nYSize,
4078                                           pData, nBufXSize, nBufYSize,
4079                                           eBufType,
4080                                           nBandCount, panBandMap,
4081                                           nPixelSpace, nLineSpace,
4082                                           nBandSpace, psExtraArg );
4083             }
4084         }
4085     }
4086 
4087 /* ==================================================================== */
4088 /*      In this special case at full resolution we step through in      */
4089 /*      blocks, turning the request over to the per-band                */
4090 /*      IRasterIO(), but ensuring that all bands of one block are       */
4091 /*      called before proceeding to the next.                           */
4092 /* ==================================================================== */
4093 
4094     if( nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords )
4095     {
4096         GDALRasterIOExtraArg sDummyExtraArg;
4097         INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
4098 
4099         int nChunkYSize = 0;
4100         int nChunkXSize = 0;
4101 
4102         for( iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize )
4103         {
4104             const int nChunkYOff = iBufYOff + nYOff;
4105             nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
4106             if( nChunkYOff + nChunkYSize > nYOff + nYSize )
4107                 nChunkYSize = (nYOff + nYSize) - nChunkYOff;
4108 
4109             for( iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize )
4110             {
4111                 const int nChunkXOff = iBufXOff + nXOff;
4112                 nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
4113                 if( nChunkXOff + nChunkXSize > nXOff + nXSize )
4114                     nChunkXSize = (nXOff + nXSize) - nChunkXOff;
4115 
4116                 GByte *pabyChunkData =
4117                     static_cast<GByte *>(pData)
4118                     + iBufXOff * nPixelSpace
4119                     + static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
4120 
4121                 for( int iBand = 0; iBand < nBandCount; iBand++ )
4122                 {
4123                     GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4124 
4125                     eErr =
4126                         poBand->GDALRasterBand::IRasterIO(
4127                             eRWFlag, nChunkXOff, nChunkYOff,
4128                             nChunkXSize, nChunkYSize,
4129                             pabyChunkData +
4130                             static_cast<GPtrDiff_t>(iBand) * nBandSpace,
4131                             nChunkXSize, nChunkYSize, eBufType,
4132                             nPixelSpace, nLineSpace, &sDummyExtraArg );
4133                     if( eErr != CE_None )
4134                         return eErr;
4135                 }
4136             }
4137 
4138             if( psExtraArg->pfnProgress != nullptr &&
4139                 !psExtraArg->pfnProgress(
4140                     1.0 * std::min(nBufYSize,
4141                                    iBufYOff + nChunkYSize) /
4142                     nBufYSize, "", psExtraArg->pProgressData) )
4143             {
4144                 return CE_Failure;
4145             }
4146         }
4147 
4148         return CE_None;
4149     }
4150 
4151     /* Below code is not compatible with that case. It would need a complete */
4152     /* separate code like done in GDALRasterBand::IRasterIO. */
4153     if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
4154     {
4155         return BandBasedRasterIO( eRWFlag,
4156                                        nXOff, nYOff, nXSize, nYSize,
4157                                        pData, nBufXSize, nBufYSize,
4158                                        eBufType,
4159                                        nBandCount, panBandMap,
4160                                        nPixelSpace, nLineSpace,
4161                                        nBandSpace, psExtraArg );
4162     }
4163 
4164     /* We could have a smarter implementation, but that will do for now */
4165     if( psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
4166         (nBufXSize != nXSize || nBufYSize != nYSize) )
4167     {
4168         return BandBasedRasterIO( eRWFlag,
4169                                        nXOff, nYOff, nXSize, nYSize,
4170                                        pData, nBufXSize, nBufYSize,
4171                                        eBufType,
4172                                        nBandCount, panBandMap,
4173                                        nPixelSpace, nLineSpace,
4174                                        nBandSpace, psExtraArg );
4175     }
4176 
4177 /* ==================================================================== */
4178 /*      Loop reading required source blocks to satisfy output           */
4179 /*      request.  This is the most general implementation.              */
4180 /* ==================================================================== */
4181 
4182     const int nBandDataSize = GDALGetDataTypeSizeBytes( eDataType );
4183 
4184     papabySrcBlock = static_cast<GByte **>(CPLCalloc(sizeof(GByte*),nBandCount));
4185     papoBlocks = static_cast<GDALRasterBlock **>(
4186         CPLCalloc(sizeof(void*), nBandCount) );
4187 
4188 /* -------------------------------------------------------------------- */
4189 /*      Select an overview level if appropriate.                        */
4190 /* -------------------------------------------------------------------- */
4191 
4192     GDALRasterIOExtraArg sExtraArg;
4193     GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4194     const int nOverviewLevel =
4195         GDALDatasetGetBestOverviewLevel( this,
4196                                          nXOff, nYOff, nXSize, nYSize,
4197                                          nBufXSize, nBufYSize,
4198                                          nBandCount, panBandMap,
4199                                          &sExtraArg );
4200     if (nOverviewLevel >= 0)
4201     {
4202         GetRasterBand(panBandMap[0])->GetOverview(nOverviewLevel)->
4203                                 GetBlockSize( &nBlockXSize, &nBlockYSize );
4204     }
4205 
4206     double dfXOff = nXOff;
4207     double dfYOff = nYOff;
4208     double dfXSize = nXSize;
4209     double dfYSize = nYSize;
4210     if( sExtraArg.bFloatingPointWindowValidity )
4211     {
4212         dfXOff = sExtraArg.dfXOff;
4213         dfYOff = sExtraArg.dfYOff;
4214         dfXSize = sExtraArg.dfXSize;
4215         dfYSize = sExtraArg.dfYSize;
4216     }
4217 
4218 /* -------------------------------------------------------------------- */
4219 /*      Compute stepping increment.                                     */
4220 /* -------------------------------------------------------------------- */
4221     const double dfSrcXInc = dfXSize / static_cast<double>( nBufXSize );
4222     const double dfSrcYInc = dfYSize / static_cast<double>( nBufYSize );
4223 
4224     constexpr double EPS = 1e-10;
4225 /* -------------------------------------------------------------------- */
4226 /*      Loop over buffer computing source locations.                    */
4227 /* -------------------------------------------------------------------- */
4228     for( iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++ )
4229     {
4230         GPtrDiff_t  iSrcOffset;
4231 
4232         // Add small epsilon to avoid some numeric precision issues.
4233         const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
4234         const int iSrcY = static_cast<int>(std::min(std::max(0.0, dfSrcY),
4235                                     static_cast<double>(nRasterYSize - 1)));
4236 
4237         GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * static_cast<GPtrDiff_t>(nLineSpace);
4238 
4239         for( iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++ )
4240         {
4241             const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
4242             const int iSrcX = static_cast<int>(std::min(std::max(0.0, dfSrcX),
4243                                         static_cast<double>(nRasterXSize - 1)));
4244 
4245             // FIXME: this code likely doesn't work if the dirty block gets flushed
4246             // to disk before being completely written.
4247             // In the meantime, bJustInitialize should probably be set to FALSE
4248             // even if it is not ideal performance wise, and for lossy compression
4249 
4250 /* -------------------------------------------------------------------- */
4251 /*      Ensure we have the appropriate block loaded.                    */
4252 /* -------------------------------------------------------------------- */
4253             if( iSrcX < nLBlockX * nBlockXSize
4254                 || iSrcX - nBlockXSize >= nLBlockX * nBlockXSize
4255                 || iSrcY < nLBlockY * nBlockYSize
4256                 || iSrcY - nBlockYSize >= nLBlockY * nBlockYSize )
4257             {
4258                 nLBlockX = iSrcX / nBlockXSize;
4259                 nLBlockY = iSrcY / nBlockYSize;
4260 
4261                 const bool bJustInitialize =
4262                     eRWFlag == GF_Write
4263                     && nYOff <= nLBlockY * nBlockYSize
4264                     && nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize
4265                     && nXOff <= nLBlockX * nBlockXSize
4266                     && nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
4267                 /*bool bMemZeroBuffer = FALSE;
4268                 if( eRWFlag == GF_Write && !bJustInitialize &&
4269                     nXOff <= nLBlockX * nBlockXSize &&
4270                     nYOff <= nLBlockY * nBlockYSize &&
4271                     (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
4272                      (nXOff + nXSize == GetRasterXSize() &&
4273                      (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
4274                     (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
4275                      (nYOff + nYSize == GetRasterYSize() &&
4276                      (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
4277                 {
4278                     bJustInitialize = TRUE;
4279                     bMemZeroBuffer = TRUE;
4280                 }*/
4281                 for( int iBand = 0; iBand < nBandCount; iBand++ )
4282                 {
4283                     GDALRasterBand *poBand = GetRasterBand( panBandMap[iBand]);
4284                     if (nOverviewLevel >= 0)
4285                         poBand = poBand->GetOverview(nOverviewLevel);
4286                     poBlock = poBand->GetLockedBlockRef( nLBlockX, nLBlockY,
4287                                                          bJustInitialize );
4288                     if( poBlock == nullptr )
4289                     {
4290                         eErr = CE_Failure;
4291                         goto CleanupAndReturn;
4292                     }
4293 
4294                     if( eRWFlag == GF_Write )
4295                         poBlock->MarkDirty();
4296 
4297                     if( papoBlocks[iBand] != nullptr )
4298                         papoBlocks[iBand]->DropLock();
4299 
4300                     papoBlocks[iBand] = poBlock;
4301 
4302                     papabySrcBlock[iBand] = static_cast<GByte *>(poBlock->GetDataRef());
4303                     /*if( bMemZeroBuffer )
4304                     {
4305                         memset(papabySrcBlock[iBand], 0,
4306                             static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize * nBlockYSize);
4307                     }*/
4308                 }
4309             }
4310 
4311 /* -------------------------------------------------------------------- */
4312 /*      Copy over this pixel of data.                                   */
4313 /* -------------------------------------------------------------------- */
4314             iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) - static_cast<GPtrDiff_t>(nLBlockX)*nBlockXSize
4315                 + (static_cast<GPtrDiff_t>(iSrcY) - static_cast<GPtrDiff_t>(nLBlockY)*nBlockYSize) * nBlockXSize)*nBandDataSize;
4316 
4317             for( int iBand = 0; iBand < nBandCount; iBand++ )
4318             {
4319                 GByte *pabySrcBlock = papabySrcBlock[iBand];
4320                 GPtrDiff_t iBandBufOffset = iBufOffset + static_cast<GPtrDiff_t>(iBand) * static_cast<GPtrDiff_t>(nBandSpace);
4321 
4322                 if( eDataType == eBufType )
4323                 {
4324                     if( eRWFlag == GF_Read )
4325                         memcpy( static_cast<GByte *>(pData) + iBandBufOffset,
4326                                 pabySrcBlock + iSrcOffset, nBandDataSize );
4327                 else
4328                     memcpy( pabySrcBlock + iSrcOffset,
4329                             static_cast<const GByte *>(pData) + iBandBufOffset, nBandDataSize );
4330                 }
4331                 else
4332                 {
4333                     /* type to type conversion ... ouch, this is expensive way
4334                        of handling single words */
4335 
4336                     if( eRWFlag == GF_Read )
4337                         GDALCopyWords( pabySrcBlock + iSrcOffset, eDataType, 0,
4338                                        static_cast<GByte *>(pData) + iBandBufOffset,
4339                                        eBufType, 0, 1 );
4340                     else
4341                         GDALCopyWords( static_cast<const GByte *>(pData) + iBandBufOffset,
4342                                        eBufType, 0,
4343                                        pabySrcBlock + iSrcOffset, eDataType, 0,
4344                                        1 );
4345                 }
4346             }
4347 
4348             iBufOffset += static_cast<int>(nPixelSpace);
4349         }
4350     }
4351 
4352 /* -------------------------------------------------------------------- */
4353 /*      CleanupAndReturn.                                               */
4354 /* -------------------------------------------------------------------- */
4355   CleanupAndReturn:
4356     CPLFree( papabySrcBlock );
4357     if( papoBlocks != nullptr )
4358     {
4359         for( int iBand = 0; iBand < nBandCount; iBand++ )
4360         {
4361             if( papoBlocks[iBand] != nullptr )
4362                 papoBlocks[iBand]->DropLock();
4363         }
4364         CPLFree( papoBlocks );
4365     }
4366 
4367     return eErr;
4368 }
4369 //! @endcond
4370 
4371 /************************************************************************/
4372 /*                  GDALCopyWholeRasterGetSwathSize()                   */
4373 /************************************************************************/
4374 
GDALCopyWholeRasterGetSwathSize(GDALRasterBand * poSrcPrototypeBand,GDALRasterBand * poDstPrototypeBand,int nBandCount,int bDstIsCompressed,int bInterleave,int * pnSwathCols,int * pnSwathLines)4375 static void GDALCopyWholeRasterGetSwathSize(
4376     GDALRasterBand *poSrcPrototypeBand,
4377     GDALRasterBand *poDstPrototypeBand,
4378     int nBandCount,
4379     int bDstIsCompressed, int bInterleave,
4380     int* pnSwathCols, int *pnSwathLines )
4381 {
4382     GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4383     int nSrcBlockXSize = 0;
4384     int nSrcBlockYSize = 0;
4385     int nBlockXSize = 0;
4386     int nBlockYSize = 0;
4387 
4388     int nXSize = poSrcPrototypeBand->GetXSize();
4389     int nYSize = poSrcPrototypeBand->GetYSize();
4390 
4391     poSrcPrototypeBand->GetBlockSize( &nSrcBlockXSize, &nSrcBlockYSize );
4392     poDstPrototypeBand->GetBlockSize( &nBlockXSize, &nBlockYSize );
4393 
4394     const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
4395     const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
4396 
4397     int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4398     if( bInterleave)
4399         nPixelSize *= nBandCount;
4400 
4401     // aim for one row of blocks.  Do not settle for less.
4402     int nSwathCols  = nXSize;
4403     int nSwathLines = nBlockYSize;
4404 
4405     const char* pszSrcCompression =
4406         poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4407     if( pszSrcCompression == nullptr )
4408     {
4409         auto poSrcDS = poSrcPrototypeBand->GetDataset();
4410         if( poSrcDS )
4411             pszSrcCompression = poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4412     }
4413 
4414 /* -------------------------------------------------------------------- */
4415 /*      What will our swath size be?                                    */
4416 /* -------------------------------------------------------------------- */
4417     // When writing interleaved data in a compressed format, we want to be sure
4418     // that each block will only be written once, so the swath size must not be
4419     // greater than the block cache.
4420     const char* pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
4421     int nTargetSwathSize;
4422     if( pszSwathSize != nullptr )
4423         nTargetSwathSize = static_cast<int>(
4424             std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
4425     else
4426     {
4427       // As a default, take one 1/4 of the cache size.
4428         nTargetSwathSize = static_cast<int>(
4429             std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
4430 
4431         // but if the minimum idal swath buf size is less, then go for it to
4432         // avoid unnecessarily abusing RAM usage.
4433         // but try to use 10 MB at least.
4434         GIntBig nIdealSwathBufSize =
4435             static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
4436         if( nIdealSwathBufSize < nTargetSwathSize &&
4437             nIdealSwathBufSize < 10 * 1000 * 1000 )
4438         {
4439             nIdealSwathBufSize = 10 * 1000 * 1000;
4440         }
4441         if( pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
4442             (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
4443                                    (nSrcBlockYSize % nBlockYSize) == 0)) )
4444         {
4445             nIdealSwathBufSize =
4446                 std::max( nIdealSwathBufSize,
4447                           static_cast<GIntBig>(nSwathCols) *
4448                           nSrcBlockYSize * nPixelSize) ;
4449         }
4450         if( nTargetSwathSize > nIdealSwathBufSize )
4451             nTargetSwathSize = static_cast<int>(
4452                 std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
4453     }
4454 
4455     if (nTargetSwathSize < 1000000)
4456         nTargetSwathSize = 1000000;
4457 
4458     /* But let's check that  */
4459     if( bDstIsCompressed && bInterleave &&
4460         nTargetSwathSize > GDALGetCacheMax64() )
4461     {
4462         CPLError(
4463             CE_Warning, CPLE_AppDefined,
4464             "When translating into a compressed interleave format, "
4465             "the block cache size (" CPL_FRMT_GIB ") "
4466             "should be at least the size of the swath (%d) "
4467             "(GDAL_SWATH_SIZE config. option)",
4468             GDALGetCacheMax64(), nTargetSwathSize);
4469     }
4470 
4471 #define IS_DIVIDER_OF(x,y) ((y)%(x) == 0)
4472 #define ROUND_TO(x,y) (((x)/(y))*(y))
4473 
4474     // if both input and output datasets are tiled, that the tile dimensions
4475     // are "compatible", try to stick  to a swath dimension that is a multiple
4476     // of input and output block dimensions.
4477     if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
4478         IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
4479         IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
4480         IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4481         IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4482     {
4483         if( static_cast<GIntBig>(nMaxBlockXSize) *
4484             nMaxBlockYSize * nPixelSize <=
4485             static_cast<GIntBig>(nTargetSwathSize) )
4486         {
4487             nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
4488             nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
4489             if (nSwathCols == 0)
4490                 nSwathCols = nMaxBlockXSize;
4491             if (nSwathCols > nXSize)
4492                 nSwathCols = nXSize;
4493             nSwathLines = nMaxBlockYSize;
4494 
4495             if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
4496                 static_cast<GIntBig>(nTargetSwathSize))
4497             {
4498                 nSwathCols  = nXSize;
4499                 nSwathLines = nBlockYSize;
4500             }
4501         }
4502     }
4503 
4504     const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
4505     const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
4506     if( nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize) )
4507     {
4508         nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
4509         if (nSwathLines == 0)
4510             nSwathLines = 1;
4511 
4512         CPLDebug(
4513             "GDAL",
4514             "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
4515             "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
4516             "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
4517             nSwathLines,
4518             nBlockYSize * nMemoryPerCol,
4519             nTargetSwathSize);
4520     }
4521     // If we are processing single scans, try to handle several at once.
4522     // If we are handling swaths already, only grow the swath if a row
4523     // of blocks is substantially less than our target buffer size.
4524     else if( nSwathLines == 1
4525         || nMemoryPerCol * nSwathLines <
4526                         static_cast<GIntBig>(nTargetSwathSize) / 10 )
4527     {
4528         nSwathLines =
4529             std::min(nYSize, std::max(1,
4530                      static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
4531 
4532         /* If possible try to align to source and target block height */
4533         if ((nSwathLines % nMaxBlockYSize) != 0 &&
4534             nSwathLines > nMaxBlockYSize &&
4535             IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4536             IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4537             nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
4538     }
4539 
4540     if( pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
4541         (!bDstIsCompressed ||
4542             (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
4543              IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))) )
4544     {
4545         // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
4546         if( nSwathLines < nSrcBlockYSize )
4547         {
4548             nSwathLines = nSrcBlockYSize;
4549 
4550             // Number of pixels that can be read/write simultaneously.
4551             nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
4552             nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
4553             if (nSwathCols == 0)
4554                 nSwathCols = nSrcBlockXSize;
4555             if (nSwathCols > nXSize)
4556                 nSwathCols = nXSize;
4557 
4558             CPLDebug(
4559                 "GDAL",
4560                 "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4561                 "too high block, "
4562                 "use partial width at one time" );
4563         }
4564         else if ((nSwathLines % nSrcBlockYSize) != 0)
4565         {
4566             /* Round on a multiple of nSrcBlockYSize */
4567             nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
4568             CPLDebug(
4569                 "GDAL",
4570                 "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4571                 "round nSwathLines to block height : %d", nSwathLines);
4572         }
4573     }
4574     else if (bDstIsCompressed)
4575     {
4576         if (nSwathLines < nBlockYSize)
4577         {
4578             nSwathLines = nBlockYSize;
4579 
4580             // Number of pixels that can be read/write simultaneously.
4581             nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
4582             nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
4583             if (nSwathCols == 0)
4584                 nSwathCols = nBlockXSize;
4585             if (nSwathCols > nXSize)
4586                 nSwathCols = nXSize;
4587 
4588             CPLDebug(
4589                 "GDAL",
4590                 "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4591                 "too high block, "
4592                 "use partial width at one time" );
4593         }
4594         else if ((nSwathLines % nBlockYSize) != 0)
4595         {
4596             // Round on a multiple of nBlockYSize.
4597             nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
4598             CPLDebug(
4599                 "GDAL",
4600                 "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4601                 "round nSwathLines to block height : %d", nSwathLines);
4602         }
4603     }
4604 
4605     *pnSwathCols = nSwathCols;
4606     *pnSwathLines = nSwathLines;
4607 }
4608 
4609 /************************************************************************/
4610 /*                     GDALDatasetCopyWholeRaster()                     */
4611 /************************************************************************/
4612 
4613 /**
4614  * \brief Copy all dataset raster data.
4615  *
4616  * This function copies the complete raster contents of one dataset to
4617  * another similarly configured dataset.  The source and destination
4618  * dataset must have the same number of bands, and the same width
4619  * and height.  The bands do not have to have the same data type.
4620  *
4621  * This function is primarily intended to support implementation of
4622  * driver specific CreateCopy() functions.  It implements efficient copying,
4623  * in particular "chunking" the copy in substantial blocks and, if appropriate,
4624  * performing the transfer in a pixel interleaved fashion.
4625  *
4626  * Currently the only papszOptions value supported are :
4627  * <ul>
4628  * <li>"INTERLEAVE=PIXEL" to force pixel interleaved operation</li>
4629  * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4630  * achieve best compression.</li>
4631  * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4632  * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL &gt;= 2.2)</li>
4633  * </ul>
4634  * More options may be supported in the future.
4635  *
4636  * @param hSrcDS the source dataset
4637  * @param hDstDS the destination dataset
4638  * @param papszOptions transfer hints in "StringList" Name=Value format.
4639  * @param pfnProgress progress reporting function.
4640  * @param pProgressData callback data for progress function.
4641  *
4642  * @return CE_None on success, or CE_Failure on failure.
4643  */
4644 
GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,GDALDatasetH hDstDS,CSLConstList papszOptions,GDALProgressFunc pfnProgress,void * pProgressData)4645 CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(
4646     GDALDatasetH hSrcDS, GDALDatasetH hDstDS, CSLConstList papszOptions,
4647     GDALProgressFunc pfnProgress, void *pProgressData )
4648 
4649 {
4650     VALIDATE_POINTER1( hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure );
4651     VALIDATE_POINTER1( hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure );
4652 
4653     GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
4654     GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
4655 
4656     if( pfnProgress == nullptr )
4657         pfnProgress = GDALDummyProgress;
4658 
4659 /* -------------------------------------------------------------------- */
4660 /*      Confirm the datasets match in size and band counts.             */
4661 /* -------------------------------------------------------------------- */
4662     const int nXSize = poDstDS->GetRasterXSize();
4663     const int nYSize = poDstDS->GetRasterYSize();
4664     const int nBandCount = poDstDS->GetRasterCount();
4665 
4666     if( poSrcDS->GetRasterXSize() != nXSize
4667         || poSrcDS->GetRasterYSize() != nYSize
4668         || poSrcDS->GetRasterCount() != nBandCount )
4669     {
4670         CPLError( CE_Failure, CPLE_AppDefined,
4671                   "Input and output dataset sizes or band counts do not\n"
4672                   "match in GDALDatasetCopyWholeRaster()" );
4673         return CE_Failure;
4674     }
4675 
4676 /* -------------------------------------------------------------------- */
4677 /*      Report preliminary (0) progress.                                */
4678 /* -------------------------------------------------------------------- */
4679     if( !pfnProgress( 0.0, nullptr, pProgressData ) )
4680     {
4681         CPLError( CE_Failure, CPLE_UserInterrupt,
4682                   "User terminated CreateCopy()" );
4683         return CE_Failure;
4684     }
4685 
4686 /* -------------------------------------------------------------------- */
4687 /*      Get our prototype band, and assume the others are similarly     */
4688 /*      configured.                                                     */
4689 /* -------------------------------------------------------------------- */
4690     if( nBandCount == 0 )
4691         return CE_None;
4692 
4693     GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
4694     GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
4695     GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4696 
4697 /* -------------------------------------------------------------------- */
4698 /*      Do we want to try and do the operation in a pixel               */
4699 /*      interleaved fashion?                                            */
4700 /* -------------------------------------------------------------------- */
4701     bool bInterleave = false;
4702     const char *pszInterleave = poSrcDS->GetMetadataItem(
4703                                             "INTERLEAVE", "IMAGE_STRUCTURE");
4704     if( pszInterleave != nullptr
4705         && (EQUAL(pszInterleave,"PIXEL") || EQUAL(pszInterleave,"LINE")) )
4706         bInterleave = true;
4707 
4708     pszInterleave = poDstDS->GetMetadataItem( "INTERLEAVE", "IMAGE_STRUCTURE");
4709     if( pszInterleave != nullptr
4710         && (EQUAL(pszInterleave,"PIXEL") || EQUAL(pszInterleave,"LINE")) )
4711         bInterleave = true;
4712 
4713     pszInterleave = CSLFetchNameValue( papszOptions, "INTERLEAVE" );
4714     if( pszInterleave != nullptr
4715         && (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")) )
4716         bInterleave = true;
4717     else if( pszInterleave != nullptr && EQUAL(pszInterleave, "BAND") )
4718         bInterleave = false;
4719 
4720     // If the destination is compressed, we must try to write blocks just once,
4721     // to save disk space (GTiff case for example), and to avoid data loss
4722     // (JPEG compression for example).
4723     bool bDstIsCompressed = false;
4724     const char* pszDstCompressed =
4725         CSLFetchNameValue( papszOptions, "COMPRESSED" );
4726     if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
4727         bDstIsCompressed = true;
4728 
4729 /* -------------------------------------------------------------------- */
4730 /*      What will our swath size be?                                    */
4731 /* -------------------------------------------------------------------- */
4732 
4733     int nSwathCols = 0;
4734     int nSwathLines = 0;
4735     GDALCopyWholeRasterGetSwathSize( poSrcPrototypeBand,
4736                                      poDstPrototypeBand,
4737                                      nBandCount,
4738                                      bDstIsCompressed, bInterleave,
4739                                      &nSwathCols, &nSwathLines );
4740 
4741     int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4742     if( bInterleave)
4743         nPixelSize *= nBandCount;
4744 
4745     void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize );
4746     if( pSwathBuf == nullptr )
4747     {
4748         return CE_Failure;
4749     }
4750 
4751     CPLDebug( "GDAL",
4752               "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
4753               nSwathCols, nSwathLines, static_cast<int>(bInterleave) );
4754 
4755     // Advise the source raster that we are going to read it completely
4756     // Note: this might already have been done by GDALCreateCopy() in the
4757     // likely case this function is indirectly called by it
4758     poSrcDS->AdviseRead( 0, 0, nXSize, nYSize, nXSize, nYSize, eDT,
4759                          nBandCount, nullptr, nullptr );
4760 
4761 /* ==================================================================== */
4762 /*      Band oriented (uninterleaved) case.                             */
4763 /* ==================================================================== */
4764     CPLErr eErr = CE_None;
4765     const bool bCheckHoles = CPLTestBool( CSLFetchNameValueDef(
4766                                         papszOptions, "SKIP_HOLES", "NO" ) );
4767 
4768     if( !bInterleave )
4769     {
4770         GDALRasterIOExtraArg sExtraArg;
4771         INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4772 
4773         const GIntBig nTotalBlocks =
4774             static_cast<GIntBig>(nBandCount) *
4775             DIV_ROUND_UP(nYSize, nSwathLines) *
4776             DIV_ROUND_UP(nXSize, nSwathCols);
4777         GIntBig nBlocksDone = 0;
4778 
4779         for( int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++ )
4780         {
4781             int nBand = iBand + 1;
4782 
4783             for( int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines )
4784             {
4785                 int nThisLines = nSwathLines;
4786 
4787                 if( iY + nThisLines > nYSize )
4788                     nThisLines = nYSize - iY;
4789 
4790                 for( int iX = 0;
4791                      iX < nXSize && eErr == CE_None;
4792                      iX += nSwathCols )
4793                 {
4794                     int nThisCols = nSwathCols;
4795 
4796                     if( iX + nThisCols > nXSize )
4797                         nThisCols = nXSize - iX;
4798 
4799                     int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4800                     if( bCheckHoles )
4801                     {
4802                         nStatus = poSrcDS->GetRasterBand(nBand)->GetDataCoverageStatus(
4803                               iX, iY, nThisCols, nThisLines,
4804                               GDAL_DATA_COVERAGE_STATUS_DATA);
4805                     }
4806                     if( nStatus & GDAL_DATA_COVERAGE_STATUS_DATA )
4807                     {
4808                         sExtraArg.pfnProgress = GDALScaledProgress;
4809                         sExtraArg.pProgressData =
4810                             GDALCreateScaledProgress(
4811                                 nBlocksDone / static_cast<double>(nTotalBlocks),
4812                                 (nBlocksDone + 0.5) /
4813                                 static_cast<double>(nTotalBlocks),
4814                                 pfnProgress,
4815                                 pProgressData );
4816                         if( sExtraArg.pProgressData == nullptr )
4817                             sExtraArg.pfnProgress = nullptr;
4818 
4819                         eErr = poSrcDS->RasterIO( GF_Read,
4820                                                   iX, iY, nThisCols, nThisLines,
4821                                                   pSwathBuf, nThisCols, nThisLines,
4822                                                   eDT, 1, &nBand,
4823                                                   0, 0, 0, &sExtraArg );
4824 
4825                         GDALDestroyScaledProgress( sExtraArg.pProgressData );
4826 
4827                         if( eErr == CE_None )
4828                             eErr = poDstDS->RasterIO( GF_Write,
4829                                                       iX, iY, nThisCols, nThisLines,
4830                                                       pSwathBuf, nThisCols,
4831                                                       nThisLines,
4832                                                       eDT, 1, &nBand,
4833                                                       0, 0, 0, nullptr );
4834                     }
4835 
4836                     nBlocksDone++;
4837                     if( eErr == CE_None
4838                         && !pfnProgress(
4839                             nBlocksDone / static_cast<double>(nTotalBlocks),
4840                             nullptr, pProgressData ) )
4841                     {
4842                         eErr = CE_Failure;
4843                         CPLError( CE_Failure, CPLE_UserInterrupt,
4844                                   "User terminated CreateCopy()" );
4845                     }
4846                 }
4847             }
4848         }
4849     }
4850 
4851 /* ==================================================================== */
4852 /*      Pixel interleaved case.                                         */
4853 /* ==================================================================== */
4854     else /* if( bInterleave ) */
4855     {
4856         GDALRasterIOExtraArg sExtraArg;
4857         INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4858 
4859         const GIntBig nTotalBlocks =
4860             static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
4861             DIV_ROUND_UP(nXSize, nSwathCols);
4862         GIntBig nBlocksDone = 0;
4863 
4864         for( int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines )
4865         {
4866             int nThisLines = nSwathLines;
4867 
4868             if( iY + nThisLines > nYSize )
4869                 nThisLines = nYSize - iY;
4870 
4871             for( int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols )
4872             {
4873                 int nThisCols = nSwathCols;
4874 
4875                 if( iX + nThisCols > nXSize )
4876                     nThisCols = nXSize - iX;
4877 
4878                 int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4879                 if( bCheckHoles )
4880                 {
4881                     for( int iBand = 0; iBand < nBandCount; iBand++ )
4882                     {
4883                         nStatus |= poSrcDS->GetRasterBand(iBand+1)->GetDataCoverageStatus(
4884                           iX, iY, nThisCols, nThisLines,
4885                           GDAL_DATA_COVERAGE_STATUS_DATA);
4886                         if( nStatus & GDAL_DATA_COVERAGE_STATUS_DATA )
4887                             break;
4888                     }
4889                 }
4890                 if( nStatus & GDAL_DATA_COVERAGE_STATUS_DATA )
4891                 {
4892                     sExtraArg.pfnProgress = GDALScaledProgress;
4893                     sExtraArg.pProgressData =
4894                         GDALCreateScaledProgress(
4895                             nBlocksDone / static_cast<double>(nTotalBlocks),
4896                             (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
4897                             pfnProgress,
4898                             pProgressData );
4899                     if( sExtraArg.pProgressData == nullptr )
4900                         sExtraArg.pfnProgress = nullptr;
4901 
4902                     eErr = poSrcDS->RasterIO( GF_Read,
4903                                               iX, iY, nThisCols, nThisLines,
4904                                               pSwathBuf, nThisCols, nThisLines,
4905                                               eDT, nBandCount, nullptr,
4906                                               0, 0, 0, &sExtraArg );
4907 
4908                     GDALDestroyScaledProgress( sExtraArg.pProgressData );
4909 
4910                     if( eErr == CE_None )
4911                         eErr = poDstDS->RasterIO( GF_Write,
4912                                                   iX, iY, nThisCols, nThisLines,
4913                                                   pSwathBuf, nThisCols, nThisLines,
4914                                                   eDT, nBandCount, nullptr,
4915                                                   0, 0, 0, nullptr );
4916                 }
4917 
4918                 nBlocksDone++;
4919                 if( eErr == CE_None &&
4920                     !pfnProgress(
4921                         nBlocksDone / static_cast<double>( nTotalBlocks ),
4922                         nullptr, pProgressData ) )
4923                 {
4924                     eErr = CE_Failure;
4925                     CPLError( CE_Failure, CPLE_UserInterrupt,
4926                             "User terminated CreateCopy()" );
4927                 }
4928             }
4929         }
4930     }
4931 
4932 /* -------------------------------------------------------------------- */
4933 /*      Cleanup                                                         */
4934 /* -------------------------------------------------------------------- */
4935     CPLFree( pSwathBuf );
4936 
4937     return eErr;
4938 }
4939 
4940 /************************************************************************/
4941 /*                     GDALRasterBandCopyWholeRaster()                  */
4942 /************************************************************************/
4943 
4944 /**
4945  * \brief Copy a whole raster band
4946  *
4947  * This function copies the complete raster contents of one band to
4948  * another similarly configured band.  The source and destination
4949  * bands must have the same width and height.  The bands do not have
4950  * to have the same data type.
4951  *
4952  * It implements efficient copying, in particular "chunking" the copy in
4953  * substantial blocks.
4954  *
4955  * Currently the only papszOptions value supported are :
4956  * <ul>
4957  * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4958  * achieve best compression.</li>
4959  * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4960  * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL &gt;= 2.2)</li>
4961  * </ul>
4962  *
4963  * @param hSrcBand the source band
4964  * @param hDstBand the destination band
4965  * @param papszOptions transfer hints in "StringList" Name=Value format.
4966  * @param pfnProgress progress reporting function.
4967  * @param pProgressData callback data for progress function.
4968  *
4969  * @return CE_None on success, or CE_Failure on failure.
4970  */
4971 
GDALRasterBandCopyWholeRaster(GDALRasterBandH hSrcBand,GDALRasterBandH hDstBand,const char * const * const papszOptions,GDALProgressFunc pfnProgress,void * pProgressData)4972 CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
4973     GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
4974     const char * const * const papszOptions,
4975     GDALProgressFunc pfnProgress, void *pProgressData )
4976 
4977 {
4978     VALIDATE_POINTER1( hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure );
4979     VALIDATE_POINTER1( hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure );
4980 
4981     GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle( hSrcBand );
4982     GDALRasterBand *poDstBand = GDALRasterBand::FromHandle( hDstBand );
4983     CPLErr eErr = CE_None;
4984 
4985     if( pfnProgress == nullptr )
4986         pfnProgress = GDALDummyProgress;
4987 
4988 /* -------------------------------------------------------------------- */
4989 /*      Confirm the datasets match in size and band counts.             */
4990 /* -------------------------------------------------------------------- */
4991     int nXSize = poSrcBand->GetXSize();
4992     int nYSize = poSrcBand->GetYSize();
4993 
4994     if( poDstBand->GetXSize() != nXSize
4995         || poDstBand->GetYSize() != nYSize )
4996     {
4997         CPLError( CE_Failure, CPLE_AppDefined,
4998                   "Input and output band sizes do not\n"
4999                   "match in GDALRasterBandCopyWholeRaster()" );
5000         return CE_Failure;
5001     }
5002 
5003 /* -------------------------------------------------------------------- */
5004 /*      Report preliminary (0) progress.                                */
5005 /* -------------------------------------------------------------------- */
5006     if( !pfnProgress( 0.0, nullptr, pProgressData ) )
5007     {
5008         CPLError( CE_Failure, CPLE_UserInterrupt,
5009                   "User terminated CreateCopy()" );
5010         return CE_Failure;
5011     }
5012 
5013     GDALDataType eDT = poDstBand->GetRasterDataType();
5014 
5015     // If the destination is compressed, we must try to write blocks just once,
5016     // to save disk space (GTiff case for example), and to avoid data loss
5017     // (JPEG compression for example).
5018     bool bDstIsCompressed = false;
5019     const char* pszDstCompressed =
5020         CSLFetchNameValue( const_cast<char **>(papszOptions), "COMPRESSED" );
5021     if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5022         bDstIsCompressed = true;
5023 
5024 /* -------------------------------------------------------------------- */
5025 /*      What will our swath size be?                                    */
5026 /* -------------------------------------------------------------------- */
5027 
5028     int nSwathCols = 0;
5029     int nSwathLines = 0;
5030     GDALCopyWholeRasterGetSwathSize( poSrcBand,
5031                                      poDstBand,
5032                                      1,
5033                                      bDstIsCompressed, FALSE,
5034                                      &nSwathCols, &nSwathLines);
5035 
5036     const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5037 
5038     void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize );
5039     if( pSwathBuf == nullptr )
5040     {
5041         return CE_Failure;
5042     }
5043 
5044     CPLDebug( "GDAL",
5045               "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
5046               nSwathCols, nSwathLines );
5047 
5048     const bool bCheckHoles = CPLTestBool( CSLFetchNameValueDef(
5049                     papszOptions, "SKIP_HOLES", "NO" ) );
5050 
5051     // Advise the source raster that we are going to read it completely
5052     poSrcBand->AdviseRead( 0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr );
5053 
5054 /* ==================================================================== */
5055 /*      Band oriented (uninterleaved) case.                             */
5056 /* ==================================================================== */
5057 
5058     for( int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines )
5059     {
5060         int nThisLines = nSwathLines;
5061 
5062         if( iY + nThisLines > nYSize )
5063             nThisLines = nYSize - iY;
5064 
5065         for( int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols )
5066         {
5067             int nThisCols = nSwathCols;
5068 
5069             if( iX + nThisCols > nXSize )
5070                 nThisCols = nXSize - iX;
5071 
5072             int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5073             if( bCheckHoles )
5074             {
5075                 nStatus = poSrcBand->GetDataCoverageStatus(
5076                         iX, iY, nThisCols, nThisLines,
5077                         GDAL_DATA_COVERAGE_STATUS_DATA);
5078             }
5079             if( nStatus & GDAL_DATA_COVERAGE_STATUS_DATA )
5080             {
5081                 eErr = poSrcBand->RasterIO( GF_Read,
5082                                             iX, iY, nThisCols, nThisLines,
5083                                             pSwathBuf, nThisCols, nThisLines,
5084                                             eDT, 0, 0, nullptr );
5085 
5086                 if( eErr == CE_None )
5087                     eErr = poDstBand->RasterIO( GF_Write,
5088                                                 iX, iY, nThisCols, nThisLines,
5089                                                 pSwathBuf, nThisCols, nThisLines,
5090                                                 eDT, 0, 0, nullptr );
5091             }
5092 
5093             if( eErr == CE_None
5094                 && !pfnProgress(
5095                     (iY + nThisLines) / static_cast<float>(nYSize),
5096                     nullptr, pProgressData ) )
5097             {
5098                 eErr = CE_Failure;
5099                 CPLError( CE_Failure, CPLE_UserInterrupt,
5100                           "User terminated CreateCopy()" );
5101             }
5102         }
5103     }
5104 
5105 /* -------------------------------------------------------------------- */
5106 /*      Cleanup                                                         */
5107 /* -------------------------------------------------------------------- */
5108     CPLFree( pSwathBuf );
5109 
5110     return eErr;
5111 }
5112 
5113 /************************************************************************/
5114 /*                      GDALCopyRasterIOExtraArg ()                     */
5115 /************************************************************************/
5116 
GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg * psDestArg,GDALRasterIOExtraArg * psSrcArg)5117 void GDALCopyRasterIOExtraArg( GDALRasterIOExtraArg* psDestArg,
5118                                GDALRasterIOExtraArg* psSrcArg )
5119 {
5120     INIT_RASTERIO_EXTRA_ARG(*psDestArg);
5121     if( psSrcArg )
5122     {
5123         psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
5124         psDestArg->pfnProgress = psSrcArg->pfnProgress;
5125         psDestArg->pProgressData = psSrcArg->pProgressData;
5126         psDestArg->bFloatingPointWindowValidity =
5127             psSrcArg->bFloatingPointWindowValidity;
5128         if( psSrcArg->bFloatingPointWindowValidity )
5129         {
5130             psDestArg->dfXOff = psSrcArg->dfXOff;
5131             psDestArg->dfYOff = psSrcArg->dfYOff;
5132             psDestArg->dfXSize = psSrcArg->dfXSize;
5133             psDestArg->dfYSize = psSrcArg->dfYSize;
5134         }
5135     }
5136 }
5137