1 /******************************************************************************
2 *
3 * Project: GDAL Core
4 * Purpose: Contains default implementation of GDALRasterBand::IRasterIO()
5 * and supporting functions of broader utility.
6 * Author: Frank Warmerdam, warmerdam@pobox.com
7 *
8 ******************************************************************************
9 * Copyright (c) 1998, Frank Warmerdam
10 * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11 *
12 * Permission is hereby granted, free of charge, to any person obtaining a
13 * copy of this software and associated documentation files (the "Software"),
14 * to deal in the Software without restriction, including without limitation
15 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 * and/or sell copies of the Software, and to permit persons to whom the
17 * Software is furnished to do so, subject to the following conditions:
18 *
19 * The above copyright notice and this permission notice shall be included
20 * in all copies or substantial portions of the Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
23 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 * DEALINGS IN THE SOFTWARE.
29 ****************************************************************************/
30
31 #include "cpl_port.h"
32 #include "gdal.h"
33 #include "gdal_priv.h"
34
35 #include <climits>
36 #include <cmath>
37 #include <cstddef>
38 #include <cstdio>
39 #include <cstdlib>
40 #include <cstring>
41
42 #include <algorithm>
43 #include <limits>
44 #include <stdexcept>
45
46 #include "cpl_conv.h"
47 #include "cpl_cpu_features.h"
48 #include "cpl_error.h"
49 #include "cpl_progress.h"
50 #include "cpl_string.h"
51 #include "cpl_vsi.h"
52 #include "gdal_priv_templates.hpp"
53 #include "gdal_vrt.h"
54 #include "gdalwarper.h"
55 #include "memdataset.h"
56 #include "vrtdataset.h"
57
58 CPL_CVSID("$Id: rasterio.cpp 1c4378ccd440f5578f86f5539d61d05a9ad2089f 2021-10-22 09:18:07 +0200 Even Rouault $")
59
60 static void GDALFastCopyByte( const GByte * CPL_RESTRICT pSrcData,
61 int nSrcPixelStride,
62 GByte * CPL_RESTRICT pDstData,
63 int nDstPixelStride,
64 GPtrDiff_t nWordCount );
65
66 /************************************************************************/
67 /* DownsamplingIntegerXFactor() */
68 /************************************************************************/
69
70 template<bool bSameDataType, int DATA_TYPE_SIZE> static
DownsamplingIntegerXFactor(GDALRasterBand * poBand,int iSrcX,int nSrcXInc,GPtrDiff_t iSrcOffsetCst,GByte * CPL_RESTRICT pabyDstData,int nPixelSpace,int nBufXSize,GDALDataType eDataType,GDALDataType eBufType,int & nStartBlockX,int nBlockXSize,GDALRasterBlock * & poBlock,int nLBlockY)71 bool DownsamplingIntegerXFactor(GDALRasterBand* poBand,
72 int iSrcX,
73 int nSrcXInc,
74 GPtrDiff_t iSrcOffsetCst,
75 GByte* CPL_RESTRICT pabyDstData,
76 int nPixelSpace,
77 int nBufXSize,
78 GDALDataType eDataType,
79 GDALDataType eBufType,
80 int& nStartBlockX,
81 int nBlockXSize,
82 GDALRasterBlock*& poBlock,
83 int nLBlockY)
84 {
85 const int nBandDataSize = bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes( eDataType );
86 int nOuterLoopIters = nBufXSize - 1;
87 const int nIncSrcOffset = nSrcXInc * nBandDataSize;
88 const GByte* CPL_RESTRICT pabySrcData;
89 int nEndBlockX = nBlockXSize + nStartBlockX;
90
91 if( iSrcX < nEndBlockX )
92 {
93 goto no_reload_block;
94 }
95 goto reload_block;
96
97 // Don't do the last iteration in the loop, as iSrcX might go beyond nRasterXSize - 1
98 while( --nOuterLoopIters >= 1 )
99 {
100 iSrcX += nSrcXInc;
101 pabySrcData += nIncSrcOffset;
102 pabyDstData += nPixelSpace;
103
104 /* -------------------------------------------------------------------- */
105 /* Ensure we have the appropriate block loaded. */
106 /* -------------------------------------------------------------------- */
107 if( iSrcX >= nEndBlockX )
108 {
109 reload_block:
110 {
111 const int nLBlockX = iSrcX / nBlockXSize;
112 nStartBlockX = nLBlockX * nBlockXSize;
113 nEndBlockX = nStartBlockX + nBlockXSize;
114
115 if( poBlock != nullptr )
116 poBlock->DropLock();
117
118 poBlock = poBand->GetLockedBlockRef( nLBlockX, nLBlockY, FALSE );
119 if( poBlock == nullptr )
120 {
121 return false;
122 }
123 }
124
125 no_reload_block:
126 const GByte* pabySrcBlock = static_cast<const GByte *>(poBlock->GetDataRef());
127 GPtrDiff_t iSrcOffset = (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
128 pabySrcData = pabySrcBlock + iSrcOffset;
129 }
130
131 /* -------------------------------------------------------------------- */
132 /* Copy the maximum run of pixels. */
133 /* -------------------------------------------------------------------- */
134
135 const int nIters = std::min(
136 (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
137 if( bSameDataType )
138 {
139 memcpy( pabyDstData, pabySrcData, nBandDataSize );
140 if( nIters > 1 )
141 {
142 if( DATA_TYPE_SIZE == 1 )
143 {
144 pabySrcData += nIncSrcOffset;
145 pabyDstData += nPixelSpace;
146 GDALFastCopyByte( pabySrcData, nIncSrcOffset,
147 pabyDstData, nPixelSpace,
148 nIters - 1 );
149 pabySrcData += static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
150 pabyDstData += static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
151 }
152 else
153 {
154 for( int i = 0; i < nIters - 1; i ++)
155 {
156 pabySrcData += nIncSrcOffset;
157 pabyDstData += nPixelSpace;
158 memcpy( pabyDstData, pabySrcData, nBandDataSize );
159 }
160 }
161 iSrcX += nSrcXInc * (nIters - 1);
162 nOuterLoopIters -= nIters - 1;
163 }
164 }
165 else
166 {
167 // Type to type conversion ...
168 GDALCopyWords(
169 pabySrcData, eDataType, nIncSrcOffset,
170 pabyDstData, eBufType, nPixelSpace,
171 std::max(1, nIters) );
172 if( nIters > 1 )
173 {
174 pabySrcData += static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
175 pabyDstData += static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
176 iSrcX += nSrcXInc * (nIters - 1);
177 nOuterLoopIters -= nIters - 1;
178 }
179 }
180 }
181
182 // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
183 if( nOuterLoopIters == 0 )
184 {
185 const int nRasterXSize = poBand->GetXSize();
186 iSrcX = static_cast<int>(
187 std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
188 static_cast<GInt64>(nRasterXSize - 1)));
189 pabyDstData += nPixelSpace;
190 if( iSrcX < nEndBlockX )
191 {
192 goto no_reload_block;
193 }
194 goto reload_block;
195 }
196 return true;
197 }
198
199 /************************************************************************/
200 /* IRasterIO() */
201 /* */
202 /* Default internal implementation of RasterIO() ... utilizes */
203 /* the Block access methods to satisfy the request. This would */
204 /* normally only be overridden by formats with overviews. */
205 /************************************************************************/
206
IRasterIO(GDALRWFlag eRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,GSpacing nPixelSpace,GSpacing nLineSpace,GDALRasterIOExtraArg * psExtraArg)207 CPLErr GDALRasterBand::IRasterIO( GDALRWFlag eRWFlag,
208 int nXOff, int nYOff, int nXSize, int nYSize,
209 void * pData, int nBufXSize, int nBufYSize,
210 GDALDataType eBufType,
211 GSpacing nPixelSpace, GSpacing nLineSpace,
212 GDALRasterIOExtraArg* psExtraArg )
213
214 {
215 if( eRWFlag == GF_Write && eFlushBlockErr != CE_None )
216 {
217 CPLError(eFlushBlockErr, CPLE_AppDefined,
218 "An error occurred while writing a dirty block "
219 "from GDALRasterBand::IRasterIO");
220 CPLErr eErr = eFlushBlockErr;
221 eFlushBlockErr = CE_None;
222 return eErr;
223 }
224 if( nBlockXSize <= 0 || nBlockYSize <= 0 )
225 {
226 CPLError( CE_Failure, CPLE_AppDefined, "Invalid block size" );
227 return CE_Failure;
228 }
229
230 const int nBandDataSize = GDALGetDataTypeSizeBytes( eDataType );
231 const int nBufDataSize = GDALGetDataTypeSizeBytes( eBufType );
232 GByte dummyBlock[2] = {0, 0};
233 GByte *pabySrcBlock = dummyBlock; /* to avoid Coverity warning about nullptr dereference */
234 GDALRasterBlock *poBlock = nullptr;
235 const bool bUseIntegerRequestCoords =
236 (!psExtraArg->bFloatingPointWindowValidity ||
237 (nXOff == psExtraArg->dfXOff &&
238 nYOff == psExtraArg->dfYOff &&
239 nXSize == psExtraArg->dfXSize &&
240 nYSize == psExtraArg->dfYSize));
241
242 /* ==================================================================== */
243 /* A common case is the data requested with the destination */
244 /* is packed, and the block width is the raster width. */
245 /* ==================================================================== */
246 if( nPixelSpace == nBufDataSize
247 && nLineSpace == nPixelSpace * nXSize
248 && nBlockXSize == GetXSize()
249 && nBufXSize == nXSize
250 && nBufYSize == nYSize
251 && bUseIntegerRequestCoords )
252 {
253 CPLErr eErr = CE_None;
254 int nLBlockY = -1;
255
256 for( int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++ )
257 {
258 const int iSrcY = iBufYOff + nYOff;
259
260 if( iSrcY < nLBlockY * nBlockYSize
261 || iSrcY - nBlockYSize >= nLBlockY * nBlockYSize )
262 {
263 nLBlockY = iSrcY / nBlockYSize;
264 bool bJustInitialize =
265 eRWFlag == GF_Write
266 && nXOff == 0 && nXSize == nBlockXSize
267 && nYOff <= nLBlockY * nBlockYSize
268 && nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
269
270 // Is this a partial tile at right and/or bottom edges of
271 // the raster, and that is going to be completely written?
272 // If so, do not load it from storage, but zero it so that
273 // the content outsize of the validity area is initialized.
274 bool bMemZeroBuffer = false;
275 if( eRWFlag == GF_Write && !bJustInitialize &&
276 nXOff == 0 && nXSize == nBlockXSize &&
277 nYOff <= nLBlockY * nBlockYSize &&
278 nYOff + nYSize == GetYSize() &&
279 nLBlockY * nBlockYSize > GetYSize() - nBlockYSize )
280 {
281 bJustInitialize = true;
282 bMemZeroBuffer = true;
283 }
284
285 if( poBlock )
286 poBlock->DropLock();
287
288 const GUInt32 nErrorCounter = CPLGetErrorCounter();
289 poBlock = GetLockedBlockRef( 0, nLBlockY, bJustInitialize );
290 if( poBlock == nullptr )
291 {
292 if( strstr(CPLGetLastErrorMsg(), "IReadBlock failed") == nullptr )
293 {
294 CPLError( CE_Failure, CPLE_AppDefined,
295 "GetBlockRef failed at X block offset %d, "
296 "Y block offset %d%s",
297 0, nLBlockY,
298 (nErrorCounter != CPLGetErrorCounter()) ?
299 CPLSPrintf(": %s", CPLGetLastErrorMsg()) : "");
300 }
301 eErr = CE_Failure;
302 break;
303 }
304
305 if( eRWFlag == GF_Write )
306 poBlock->MarkDirty();
307
308 pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
309 if( bMemZeroBuffer )
310 {
311 memset(pabySrcBlock, 0,
312 static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize * nBlockYSize);
313 }
314 }
315
316 const auto nSrcByteOffset =
317 (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) * nBlockXSize + nXOff)
318 * nBandDataSize;
319
320 if( eDataType == eBufType )
321 {
322 if( eRWFlag == GF_Read )
323 memcpy(
324 static_cast<GByte *>(pData)
325 + static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
326 pabySrcBlock + nSrcByteOffset,
327 static_cast<size_t>(nLineSpace) );
328 else
329 memcpy(
330 pabySrcBlock + nSrcByteOffset,
331 static_cast<GByte *>(pData)
332 + static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
333 static_cast<size_t>(nLineSpace) );
334 }
335 else
336 {
337 // Type to type conversion.
338
339 if( eRWFlag == GF_Read )
340 GDALCopyWords(
341 pabySrcBlock + nSrcByteOffset,
342 eDataType, nBandDataSize,
343 static_cast<GByte *>(pData)
344 + static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
345 eBufType,
346 static_cast<int>(nPixelSpace), nBufXSize );
347 else
348 GDALCopyWords(
349 static_cast<GByte *>(pData)
350 + static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
351 eBufType, static_cast<int>(nPixelSpace),
352 pabySrcBlock + nSrcByteOffset,
353 eDataType, nBandDataSize, nBufXSize );
354 }
355
356 if( psExtraArg->pfnProgress != nullptr &&
357 !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
358 psExtraArg->pProgressData) )
359 {
360 eErr = CE_Failure;
361 break;
362 }
363 }
364
365 if( poBlock )
366 poBlock->DropLock();
367
368 return eErr;
369 }
370
371 /* ==================================================================== */
372 /* Do we have overviews that would be appropriate to satisfy */
373 /* this request? */
374 /* ==================================================================== */
375 if( (nBufXSize < nXSize || nBufYSize < nYSize)
376 && GetOverviewCount() > 0 && eRWFlag == GF_Read )
377 {
378 GDALRasterIOExtraArg sExtraArg;
379 GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
380
381 const int nOverview =
382 GDALBandGetBestOverviewLevel2( this, nXOff, nYOff, nXSize, nYSize,
383 nBufXSize, nBufYSize, &sExtraArg );
384 if (nOverview >= 0)
385 {
386 GDALRasterBand* poOverviewBand = GetOverview(nOverview);
387 if (poOverviewBand == nullptr)
388 return CE_Failure;
389
390 return poOverviewBand->RasterIO(
391 eRWFlag, nXOff, nYOff, nXSize, nYSize,
392 pData, nBufXSize, nBufYSize, eBufType,
393 nPixelSpace, nLineSpace, &sExtraArg );
394 }
395 }
396
397 if( eRWFlag == GF_Read &&
398 nBufXSize < nXSize / 100 && nBufYSize < nYSize / 100 &&
399 nPixelSpace == nBufDataSize &&
400 nLineSpace == nPixelSpace * nBufXSize &&
401 CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")) )
402 {
403 memset( pData, 0, static_cast<size_t>(nLineSpace * nBufYSize) );
404 return CE_None;
405 }
406
407 /* ==================================================================== */
408 /* The second case when we don't need subsample data but likely */
409 /* need data type conversion. */
410 /* ==================================================================== */
411 if ( // nPixelSpace == nBufDataSize &&
412 nXSize == nBufXSize
413 && nYSize == nBufYSize
414 && bUseIntegerRequestCoords )
415 {
416 #if DEBUG_VERBOSE
417 printf( "IRasterIO(%d,%d,%d,%d) rw=%d case 2\n",/*ok*/
418 nXOff, nYOff, nXSize, nYSize,
419 static_cast<int>(eRWFlag) );
420 #endif
421
422 /* -------------------------------------------------------------------- */
423 /* Loop over buffer computing source locations. */
424 /* -------------------------------------------------------------------- */
425 // Calculate starting values out of loop
426 const int nLBlockXStart = nXOff / nBlockXSize;
427 const int nXSpanEnd = nBufXSize + nXOff;
428
429 int nYInc = 0;
430 for( int iBufYOff = 0, iSrcY = nYOff;
431 iBufYOff < nBufYSize;
432 iBufYOff += nYInc, iSrcY += nYInc )
433 {
434 GPtrDiff_t iSrcOffset = 0;
435 int nXSpan = 0;
436
437 GPtrDiff_t iBufOffset =
438 static_cast<GPtrDiff_t>(iBufYOff) *
439 static_cast<GPtrDiff_t>(nLineSpace);
440 int nLBlockY = iSrcY / nBlockYSize;
441 int nLBlockX = nLBlockXStart;
442 int iSrcX = nXOff;
443 while( iSrcX < nXSpanEnd )
444 {
445 nXSpan = nLBlockX * nBlockXSize;
446 if( nXSpan < INT_MAX - nBlockXSize )
447 nXSpan += nBlockXSize;
448 else
449 nXSpan = INT_MAX;
450 const int nXRight = nXSpan;
451 nXSpan = ( nXSpan < nXSpanEnd ? nXSpan:nXSpanEnd ) - iSrcX;
452 const size_t nXSpanSize = nXSpan * static_cast<size_t>(nPixelSpace);
453
454 bool bJustInitialize =
455 eRWFlag == GF_Write
456 && nYOff <= nLBlockY * nBlockYSize
457 && nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize
458 && nXOff <= nLBlockX * nBlockXSize
459 && nXOff + nXSize >= nXRight;
460
461 // Is this a partial tile at right and/or bottom edges of
462 // the raster, and that is going to be completely written?
463 // If so, do not load it from storage, but zero it so that
464 // the content outsize of the validity area is initialized.
465 bool bMemZeroBuffer = false;
466 if( eRWFlag == GF_Write && !bJustInitialize &&
467 nXOff <= nLBlockX * nBlockXSize &&
468 nYOff <= nLBlockY * nBlockYSize &&
469 (nXOff + nXSize >= nXRight ||
470 (nXOff + nXSize == GetXSize() &&
471 nXRight > GetXSize())) &&
472 (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
473 (nYOff + nYSize == GetYSize() &&
474 nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)) )
475 {
476 bJustInitialize = true;
477 bMemZeroBuffer = true;
478 }
479
480 /* -------------------------------------------------------------------- */
481 /* Ensure we have the appropriate block loaded. */
482 /* -------------------------------------------------------------------- */
483 const GUInt32 nErrorCounter = CPLGetErrorCounter();
484 poBlock = GetLockedBlockRef( nLBlockX, nLBlockY,
485 bJustInitialize );
486 if( !poBlock )
487 {
488 if( strstr(CPLGetLastErrorMsg(), "IReadBlock failed") == nullptr )
489 {
490 CPLError( CE_Failure, CPLE_AppDefined,
491 "GetBlockRef failed at X block offset %d, "
492 "Y block offset %d%s",
493 nLBlockX, nLBlockY,
494 (nErrorCounter != CPLGetErrorCounter()) ?
495 CPLSPrintf(": %s", CPLGetLastErrorMsg()) : "");
496 }
497 return( CE_Failure );
498 }
499
500 if( eRWFlag == GF_Write )
501 poBlock->MarkDirty();
502
503 pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
504 if( bMemZeroBuffer )
505 {
506 memset(pabySrcBlock, 0,
507 static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize * nBlockYSize);
508 }
509 /* -------------------------------------------------------------------- */
510 /* Copy over this chunk of data. */
511 /* -------------------------------------------------------------------- */
512 iSrcOffset =
513 (static_cast<GPtrDiff_t>(iSrcX)
514 - static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize)
515 + (static_cast<GPtrDiff_t>(iSrcY)
516 - static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize)
517 * nBlockXSize)
518 * nBandDataSize;
519 // Fill up as many rows as possible for the loaded block.
520 const int kmax = std::min(
521 nBlockYSize - (iSrcY % nBlockYSize), nBufYSize - iBufYOff );
522 for(int k=0; k<kmax;k++)
523 {
524 if( eDataType == eBufType
525 && nPixelSpace == nBufDataSize )
526 {
527 if( eRWFlag == GF_Read )
528 memcpy( static_cast<GByte *>(pData) + iBufOffset
529 + static_cast<GPtrDiff_t>(k) * nLineSpace,
530 pabySrcBlock + iSrcOffset,
531 nXSpanSize );
532 else
533 memcpy( pabySrcBlock + iSrcOffset,
534 static_cast<GByte *>(pData) + iBufOffset
535 + static_cast<GPtrDiff_t>(k) * nLineSpace,
536 nXSpanSize );
537 }
538 else
539 {
540 /* type to type conversion */
541 if( eRWFlag == GF_Read )
542 GDALCopyWords(
543 pabySrcBlock + iSrcOffset,
544 eDataType, nBandDataSize,
545 static_cast<GByte *>(pData) + iBufOffset
546 + static_cast<GPtrDiff_t>(k) * nLineSpace,
547 eBufType, static_cast<int>(nPixelSpace),
548 nXSpan );
549 else
550 GDALCopyWords(
551 static_cast<GByte *>(pData) + iBufOffset +
552 static_cast<GPtrDiff_t>(k) * nLineSpace,
553 eBufType, static_cast<int>(nPixelSpace),
554 pabySrcBlock + iSrcOffset,
555 eDataType, nBandDataSize, nXSpan );
556 }
557
558 iSrcOffset += nBlockXSize * nBandDataSize;
559 }
560
561 iBufOffset = CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
562 nLBlockX++;
563 iSrcX+=nXSpan;
564
565 poBlock->DropLock();
566 poBlock = nullptr;
567 }
568
569 /* Compute the increment to go on a block boundary */
570 nYInc = nBlockYSize - (iSrcY % nBlockYSize);
571
572 if( psExtraArg->pfnProgress != nullptr &&
573 !psExtraArg->pfnProgress(
574 1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
575 psExtraArg->pProgressData) )
576 {
577 return CE_Failure;
578 }
579 }
580
581 return CE_None;
582 }
583
584 /* ==================================================================== */
585 /* Loop reading required source blocks to satisfy output */
586 /* request. This is the most general implementation. */
587 /* ==================================================================== */
588
589 double dfXOff = nXOff;
590 double dfYOff = nYOff;
591 double dfXSize = nXSize;
592 double dfYSize = nYSize;
593 if( psExtraArg->bFloatingPointWindowValidity )
594 {
595 dfXOff = psExtraArg->dfXOff;
596 dfYOff = psExtraArg->dfYOff;
597 dfXSize = psExtraArg->dfXSize;
598 dfYSize = psExtraArg->dfYSize;
599 }
600
601 /* -------------------------------------------------------------------- */
602 /* Compute stepping increment. */
603 /* -------------------------------------------------------------------- */
604 const double dfSrcXInc = dfXSize / static_cast<double>( nBufXSize );
605 const double dfSrcYInc = dfYSize / static_cast<double>( nBufYSize );
606 CPLErr eErr = CE_None;
607
608 if (eRWFlag == GF_Write)
609 {
610 /* -------------------------------------------------------------------- */
611 /* Write case */
612 /* Loop over raster window computing source locations in the buffer. */
613 /* -------------------------------------------------------------------- */
614 GByte* pabyDstBlock = nullptr;
615 int nLBlockX = -1;
616 int nLBlockY = -1;
617
618 for( int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY ++)
619 {
620 GPtrDiff_t iBufOffset = 0;
621 GPtrDiff_t iDstOffset = 0;
622 const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
623
624 for( int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX ++)
625 {
626 const int iBufXOff = static_cast<int>((iDstX - nXOff) / dfSrcXInc);
627 iBufOffset =
628 static_cast<GPtrDiff_t>(iBufYOff)
629 * static_cast<GPtrDiff_t>(nLineSpace)
630 + iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
631
632 // FIXME: this code likely doesn't work if the dirty block gets
633 // flushed to disk before being completely written.
634 // In the meantime, bJustInitialize should probably be set to
635 // FALSE even if it is not ideal performance wise, and for
636 // lossy compression.
637
638 /* -------------------------------------------------------------------- */
639 /* Ensure we have the appropriate block loaded. */
640 /* -------------------------------------------------------------------- */
641 if( iDstX < nLBlockX * nBlockXSize
642 || iDstX - nBlockXSize >= nLBlockX * nBlockXSize
643 || iDstY < nLBlockY * nBlockYSize
644 || iDstY - nBlockYSize >= nLBlockY * nBlockYSize )
645 {
646 nLBlockX = iDstX / nBlockXSize;
647 nLBlockY = iDstY / nBlockYSize;
648
649 const bool bJustInitialize =
650 nYOff <= nLBlockY * nBlockYSize
651 && nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize
652 && nXOff <= nLBlockX * nBlockXSize
653 && nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
654 /*bool bMemZeroBuffer = FALSE;
655 if( !bJustInitialize &&
656 nXOff <= nLBlockX * nBlockXSize &&
657 nYOff <= nLBlockY * nBlockYSize &&
658 (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
659 (nXOff + nXSize == GetXSize() &&
660 (nLBlockX+1) * nBlockXSize > GetXSize())) &&
661 (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
662 (nYOff + nYSize == GetYSize() &&
663 (nLBlockY+1) * nBlockYSize > GetYSize())) )
664 {
665 bJustInitialize = TRUE;
666 bMemZeroBuffer = TRUE;
667 }*/
668 if( poBlock != nullptr )
669 poBlock->DropLock();
670
671 poBlock = GetLockedBlockRef( nLBlockX, nLBlockY,
672 bJustInitialize );
673 if( poBlock == nullptr )
674 {
675 return( CE_Failure );
676 }
677
678 poBlock->MarkDirty();
679
680 pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef());
681 /*if( bMemZeroBuffer )
682 {
683 memset(pabyDstBlock, 0,
684 static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize * nBlockYSize);
685 }*/
686 }
687
688 // To make Coverity happy. Should not happen by design.
689 if( pabyDstBlock == nullptr )
690 {
691 CPLAssert(false);
692 eErr = CE_Failure;
693 break;
694 }
695
696 /* -------------------------------------------------------------------- */
697 /* Copy over this pixel of data. */
698 /* -------------------------------------------------------------------- */
699 iDstOffset =
700 (static_cast<GPtrDiff_t>(iDstX)
701 - static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize
702 + (static_cast<GPtrDiff_t>(iDstY)
703 - static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize)
704 * nBlockXSize )
705 * nBandDataSize;
706
707 if( eDataType == eBufType )
708 {
709 memcpy(
710 pabyDstBlock + iDstOffset,
711 static_cast<GByte *>(pData) + iBufOffset,
712 nBandDataSize );
713 }
714 else
715 {
716 /* type to type conversion ... ouch, this is expensive way
717 of handling single words */
718
719 GDALCopyWords(
720 static_cast<GByte *>(pData) + iBufOffset, eBufType, 0,
721 pabyDstBlock + iDstOffset, eDataType, 0,
722 1 );
723 }
724 }
725
726 if( psExtraArg->pfnProgress != nullptr &&
727 !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
728 psExtraArg->pProgressData) )
729 {
730 eErr = CE_Failure;
731 break;
732 }
733 }
734 }
735 else
736 {
737 if( psExtraArg->eResampleAlg != GRIORA_NearestNeighbour )
738 {
739 if( (psExtraArg->eResampleAlg == GRIORA_Cubic ||
740 psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
741 psExtraArg->eResampleAlg == GRIORA_Bilinear ||
742 psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
743 GetColorTable() != nullptr )
744 {
745 CPLError(CE_Warning, CPLE_NotSupported,
746 "Resampling method not supported on paletted band. "
747 "Falling back to nearest neighbour");
748 }
749 else if( psExtraArg->eResampleAlg == GRIORA_Gauss &&
750 GDALDataTypeIsComplex( eDataType ) )
751 {
752 CPLError(
753 CE_Warning, CPLE_NotSupported,
754 "Resampling method not supported on complex data type "
755 "band. Falling back to nearest neighbour");
756 }
757 else
758 {
759 return RasterIOResampled( eRWFlag,
760 nXOff, nYOff, nXSize, nYSize,
761 pData, nBufXSize, nBufYSize,
762 eBufType,
763 nPixelSpace, nLineSpace,
764 psExtraArg );
765 }
766 }
767
768 int nLimitBlockY = 0;
769 const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
770 int nStartBlockX = -nBlockXSize;
771 const double EPS = 1e-10;
772 int nLBlockY = -1;
773 const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
774 const bool bIntegerXFactor = bUseIntegerRequestCoords &&
775 static_cast<int>(dfSrcXInc) == dfSrcXInc &&
776 static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
777
778 /* -------------------------------------------------------------------- */
779 /* Read case */
780 /* Loop over buffer computing source locations. */
781 /* -------------------------------------------------------------------- */
782 for( int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++ )
783 {
784 // Add small epsilon to avoid some numeric precision issues.
785 const double dfSrcY = (iBufYOff+0.5) * dfSrcYInc + dfYOff + EPS;
786 const int iSrcY = static_cast<int>(std::min(std::max(0.0, dfSrcY),
787 static_cast<double>(nRasterYSize - 1)));
788
789 GPtrDiff_t iBufOffset =
790 static_cast<GPtrDiff_t>(iBufYOff)
791 * static_cast<GPtrDiff_t>(nLineSpace);
792
793 if( iSrcY >= nLimitBlockY )
794 {
795 nLBlockY = iSrcY / nBlockYSize;
796 nLimitBlockY = nLBlockY * nBlockYSize;
797 if( nLimitBlockY < INT_MAX - nBlockYSize )
798 nLimitBlockY += nBlockYSize;
799 else
800 nLimitBlockY = INT_MAX;
801 // Make sure a new block is loaded.
802 nStartBlockX = -nBlockXSize;
803 }
804 else if( static_cast<int>(dfSrcXStart) < nStartBlockX )
805 {
806 // Make sure a new block is loaded.
807 nStartBlockX = -nBlockXSize;
808 }
809
810 GPtrDiff_t iSrcOffsetCst =
811 (iSrcY - nLBlockY*nBlockYSize)
812 * static_cast<GPtrDiff_t>(nBlockXSize);
813
814 if( bIntegerXFactor )
815 {
816 int iSrcX = static_cast<int>(dfSrcXStart);
817 const int nSrcXInc = static_cast<int>(dfSrcXInc);
818 GByte* pabyDstData= static_cast<GByte*>(pData) + iBufOffset;
819 bool bRet = false;
820 if( bByteCopy )
821 {
822 bRet = DownsamplingIntegerXFactor<true, 1>(this, iSrcX, nSrcXInc,
823 iSrcOffsetCst,
824 pabyDstData,
825 static_cast<int>(nPixelSpace),
826 nBufXSize,
827 GDT_Byte, GDT_Byte,
828 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
829 }
830 else if( eDataType == eBufType )
831 {
832 switch( nBandDataSize )
833 {
834 case 2:
835 bRet = DownsamplingIntegerXFactor<true, 2>(this, iSrcX, nSrcXInc,
836 iSrcOffsetCst,
837 pabyDstData,
838 static_cast<int>(nPixelSpace),
839 nBufXSize,
840 eDataType, eDataType,
841 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
842 break;
843 case 4:
844 bRet = DownsamplingIntegerXFactor<true, 4>(this, iSrcX, nSrcXInc,
845 iSrcOffsetCst,
846 pabyDstData,
847 static_cast<int>(nPixelSpace),
848 nBufXSize,
849 eDataType, eDataType,
850 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
851 break;
852 case 8:
853 bRet = DownsamplingIntegerXFactor<true, 8>(this, iSrcX, nSrcXInc,
854 iSrcOffsetCst,
855 pabyDstData,
856 static_cast<int>(nPixelSpace),
857 nBufXSize,
858 eDataType, eDataType,
859 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
860 break;
861 case 16:
862 bRet = DownsamplingIntegerXFactor<true, 16>(this, iSrcX, nSrcXInc,
863 iSrcOffsetCst,
864 pabyDstData,
865 static_cast<int>(nPixelSpace),
866 nBufXSize,
867 eDataType, eDataType,
868 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
869 break;
870 default:
871 CPLAssert(false);
872 break;
873 }
874 }
875 else
876 {
877 bRet = DownsamplingIntegerXFactor<false, 0>(this, iSrcX, nSrcXInc,
878 iSrcOffsetCst,
879 pabyDstData,
880 static_cast<int>(nPixelSpace),
881 nBufXSize,
882 eDataType, eBufType,
883 nStartBlockX, nBlockXSize, poBlock, nLBlockY);
884 }
885 if( !bRet )
886 eErr = CE_Failure;
887 }
888 else
889 {
890 double dfSrcX = dfSrcXStart;
891 for( int iBufXOff = 0;
892 iBufXOff < nBufXSize;
893 iBufXOff++, dfSrcX += dfSrcXInc )
894 {
895 // TODO?: try to avoid the clamping for most iterations
896 const int iSrcX = static_cast<int>(std::min(std::max(0.0, dfSrcX),
897 static_cast<double>(nRasterXSize - 1)));
898
899 /* -------------------------------------------------------------------- */
900 /* Ensure we have the appropriate block loaded. */
901 /* -------------------------------------------------------------------- */
902 if( iSrcX >= nBlockXSize + nStartBlockX )
903 {
904 const int nLBlockX = iSrcX / nBlockXSize;
905 nStartBlockX = nLBlockX * nBlockXSize;
906
907 if( poBlock != nullptr )
908 poBlock->DropLock();
909
910 poBlock = GetLockedBlockRef( nLBlockX, nLBlockY, FALSE );
911 if( poBlock == nullptr )
912 {
913 eErr = CE_Failure;
914 break;
915 }
916
917 pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
918 }
919 const GPtrDiff_t nDiffX = static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
920
921 /* -------------------------------------------------------------------- */
922 /* Copy over this pixel of data. */
923 /* -------------------------------------------------------------------- */
924
925 if( bByteCopy )
926 {
927 GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
928 static_cast<GByte *>(pData)[iBufOffset] =
929 pabySrcBlock[iSrcOffset];
930 }
931 else if( eDataType == eBufType )
932 {
933 GPtrDiff_t iSrcOffset =
934 (nDiffX + iSrcOffsetCst)
935 * nBandDataSize;
936 memcpy( static_cast<GByte *>(pData) + iBufOffset,
937 pabySrcBlock + iSrcOffset, nBandDataSize );
938 }
939 else
940 {
941 // Type to type conversion ...
942 GPtrDiff_t iSrcOffset =
943 (nDiffX + iSrcOffsetCst)
944 * nBandDataSize;
945 GDALCopyWords(
946 pabySrcBlock + iSrcOffset, eDataType, 0,
947 static_cast<GByte *>(pData) + iBufOffset, eBufType, 0,
948 1 );
949 }
950
951 iBufOffset += static_cast<int>(nPixelSpace);
952 }
953 }
954 if( eErr == CE_Failure )
955 break;
956
957 if( psExtraArg->pfnProgress != nullptr &&
958 !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
959 psExtraArg->pProgressData) )
960 {
961 eErr = CE_Failure;
962 break;
963 }
964 }
965 }
966
967 if( poBlock != nullptr )
968 poBlock->DropLock();
969
970 return eErr;
971 }
972
973 /************************************************************************/
974 /* GDALRasterIOTransformer() */
975 /************************************************************************/
976
977 struct GDALRasterIOTransformerStruct
978 {
979 double dfXOff;
980 double dfYOff;
981 double dfXRatioDstToSrc;
982 double dfYRatioDstToSrc;
983 };
984
GDALRasterIOTransformer(void * pTransformerArg,int bDstToSrc,int nPointCount,double * x,double * y,double *,int * panSuccess)985 static int GDALRasterIOTransformer( void *pTransformerArg,
986 int bDstToSrc,
987 int nPointCount,
988 double *x, double *y, double * /* z */,
989 int *panSuccess )
990 {
991 GDALRasterIOTransformerStruct* psParams =
992 static_cast<GDALRasterIOTransformerStruct *>( pTransformerArg );
993 if( bDstToSrc )
994 {
995 for(int i = 0; i < nPointCount; i++)
996 {
997 x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
998 y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
999 panSuccess[i] = TRUE;
1000 }
1001 }
1002 else
1003 {
1004 for(int i = 0; i < nPointCount; i++)
1005 {
1006 x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
1007 y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
1008 panSuccess[i] = TRUE;
1009 }
1010 }
1011 return TRUE;
1012 }
1013
1014 /************************************************************************/
1015 /* RasterIOResampled() */
1016 /************************************************************************/
1017
1018 //! @cond Doxygen_Suppress
RasterIOResampled(GDALRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,GSpacing nPixelSpace,GSpacing nLineSpace,GDALRasterIOExtraArg * psExtraArg)1019 CPLErr GDALRasterBand::RasterIOResampled(
1020 GDALRWFlag /* eRWFlag */,
1021 int nXOff, int nYOff, int nXSize, int nYSize,
1022 void * pData, int nBufXSize, int nBufYSize,
1023 GDALDataType eBufType,
1024 GSpacing nPixelSpace, GSpacing nLineSpace,
1025 GDALRasterIOExtraArg* psExtraArg )
1026 {
1027 // Determine if we use warping resampling or overview resampling
1028 bool bUseWarp = false;
1029 if( GDALDataTypeIsComplex( eDataType ) )
1030 bUseWarp = true;
1031
1032 double dfXOff = nXOff;
1033 double dfYOff = nYOff;
1034 double dfXSize = nXSize;
1035 double dfYSize = nYSize;
1036 if( psExtraArg->bFloatingPointWindowValidity )
1037 {
1038 dfXOff = psExtraArg->dfXOff;
1039 dfYOff = psExtraArg->dfYOff;
1040 dfXSize = psExtraArg->dfXSize;
1041 dfYSize = psExtraArg->dfYSize;
1042 }
1043
1044 const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1045 const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1046
1047 // Determine the coordinates in the "virtual" output raster to see
1048 // if there are not integers, in which case we will use them as a shift
1049 // so that subwindow extracts give the exact same results as entire raster
1050 // scaling.
1051 double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1052 bool bHasXOffVirtual = false;
1053 int nDestXOffVirtual = 0;
1054 if( fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8 )
1055 {
1056 bHasXOffVirtual = true;
1057 dfXOff = nXOff;
1058 nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1059 }
1060
1061 double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1062 bool bHasYOffVirtual = false;
1063 int nDestYOffVirtual = 0;
1064 if( fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8 )
1065 {
1066 bHasYOffVirtual = true;
1067 dfYOff = nYOff;
1068 nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1069 }
1070
1071 // Create a MEM dataset that wraps the output buffer.
1072 GDALDataset* poMEMDS;
1073 void* pTempBuffer = nullptr;
1074 GSpacing nPSMem = nPixelSpace;
1075 GSpacing nLSMem = nLineSpace;
1076 void* pDataMem = pData;
1077 GDALDataType eDTMem = eBufType;
1078 if( eBufType != eDataType )
1079 {
1080 nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1081 nLSMem = nPSMem * nBufXSize;
1082 pTempBuffer = VSI_MALLOC2_VERBOSE( nBufYSize, static_cast<size_t>(nLSMem) );
1083 if( pTempBuffer == nullptr )
1084 return CE_Failure;
1085 pDataMem = pTempBuffer;
1086 eDTMem = eDataType;
1087 }
1088
1089 poMEMDS = MEMDataset::Create( "", nDestXOffVirtual + nBufXSize,
1090 nDestYOffVirtual + nBufYSize, 0,
1091 eDTMem, nullptr );
1092 char szBuffer[32] = { '\0' };
1093 int nRet =
1094 CPLPrintPointer(
1095 szBuffer, static_cast<GByte*>(pDataMem)
1096 - nPSMem * nDestXOffVirtual
1097 - nLSMem * nDestYOffVirtual, sizeof(szBuffer));
1098 szBuffer[nRet] = '\0';
1099
1100 char szBuffer0[64] = { '\0' };
1101 snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer);
1102 char szBuffer1[64] = { '\0' };
1103 snprintf( szBuffer1, sizeof(szBuffer1),
1104 "PIXELOFFSET=" CPL_FRMT_GIB, static_cast<GIntBig>(nPSMem) );
1105 char szBuffer2[64] = { '\0' };
1106 snprintf( szBuffer2, sizeof(szBuffer2),
1107 "LINEOFFSET=" CPL_FRMT_GIB, static_cast<GIntBig>(nLSMem) );
1108 char* apszOptions[4] = { szBuffer0, szBuffer1, szBuffer2, nullptr };
1109
1110 poMEMDS->AddBand(eDTMem, apszOptions);
1111
1112 GDALRasterBandH hMEMBand = poMEMDS->GetRasterBand(1);
1113
1114 const char* pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1115 if( pszNBITS )
1116 reinterpret_cast<GDALRasterBand *>(hMEMBand)->
1117 SetMetadataItem("NBITS", pszNBITS, "IMAGE_STRUCTURE");
1118
1119 CPLErr eErr = CE_None;
1120
1121 // Do the resampling.
1122 if( bUseWarp )
1123 {
1124 int bHasNoData = FALSE;
1125 double dfNoDataValue = GetNoDataValue(&bHasNoData) ;
1126
1127 VRTDatasetH hVRTDS = nullptr;
1128 GDALRasterBandH hVRTBand = nullptr;
1129 if( GetDataset() == nullptr )
1130 {
1131 /* Create VRT dataset that wraps the whole dataset */
1132 hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1133 VRTAddBand( hVRTDS, eDataType, nullptr );
1134 hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1135 VRTAddSimpleSource( hVRTBand,
1136 this,
1137 0, 0,
1138 nRasterXSize, nRasterYSize,
1139 0, 0,
1140 nRasterXSize, nRasterYSize,
1141 nullptr, VRT_NODATA_UNSET );
1142
1143 /* Add a mask band if needed */
1144 if( GetMaskFlags() != GMF_ALL_VALID )
1145 {
1146 reinterpret_cast<GDALDataset *>(hVRTDS)->CreateMaskBand(0);
1147 VRTSourcedRasterBand* poVRTMaskBand =
1148 reinterpret_cast<VRTSourcedRasterBand *>(
1149 reinterpret_cast<GDALRasterBand *>(hVRTBand)->
1150 GetMaskBand());
1151 poVRTMaskBand->
1152 AddMaskBandSource( this,
1153 0, 0,
1154 nRasterXSize, nRasterYSize,
1155 0, 0,
1156 nRasterXSize, nRasterYSize );
1157 }
1158 }
1159
1160 GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1161 switch(psExtraArg->eResampleAlg)
1162 {
1163 case GRIORA_NearestNeighbour:
1164 psWarpOptions->eResampleAlg = GRA_NearestNeighbour; break;
1165 case GRIORA_Bilinear:
1166 psWarpOptions->eResampleAlg = GRA_Bilinear; break;
1167 case GRIORA_Cubic:
1168 psWarpOptions->eResampleAlg = GRA_Cubic; break;
1169 case GRIORA_CubicSpline:
1170 psWarpOptions->eResampleAlg = GRA_CubicSpline; break;
1171 case GRIORA_Lanczos:
1172 psWarpOptions->eResampleAlg = GRA_Lanczos; break;
1173 case GRIORA_Average:
1174 psWarpOptions->eResampleAlg = GRA_Average; break;
1175 case GRIORA_RMS:
1176 psWarpOptions->eResampleAlg = GRA_RMS; break;
1177 case GRIORA_Mode:
1178 psWarpOptions->eResampleAlg = GRA_Mode; break;
1179 default:
1180 CPLAssert(false);
1181 psWarpOptions->eResampleAlg = GRA_NearestNeighbour; break;
1182 }
1183 psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
1184 psWarpOptions->hDstDS = poMEMDS;
1185 psWarpOptions->nBandCount = 1;
1186 int nSrcBandNumber = hVRTDS ? 1 : nBand;
1187 int nDstBandNumber = 1;
1188 psWarpOptions->panSrcBands = &nSrcBandNumber;
1189 psWarpOptions->panDstBands = &nDstBandNumber;
1190 psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1191 psExtraArg->pfnProgress : GDALDummyProgress;
1192 psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1193 psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1194 if (bHasNoData)
1195 {
1196 psWarpOptions->papszWarpOptions = CSLSetNameValue( psWarpOptions->papszWarpOptions,
1197 "INIT_DEST", "NO_DATA");
1198 if (psWarpOptions->padfSrcNoDataReal == nullptr)
1199 {
1200 psWarpOptions->padfSrcNoDataReal = static_cast<double*>(CPLMalloc(sizeof(double)));
1201 psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
1202 }
1203
1204 if (psWarpOptions->padfDstNoDataReal == nullptr)
1205 {
1206 psWarpOptions->padfDstNoDataReal = static_cast<double*>(CPLMalloc(sizeof(double)));
1207 psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
1208 }
1209 }
1210
1211 GDALRasterIOTransformerStruct sTransformer;
1212 sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1213 sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1214 sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1215 sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1216 psWarpOptions->pTransformerArg = &sTransformer;
1217
1218 GDALWarpOperationH hWarpOperation =
1219 GDALCreateWarpOperation(psWarpOptions);
1220 eErr = GDALChunkAndWarpImage( hWarpOperation,
1221 nDestXOffVirtual, nDestYOffVirtual,
1222 nBufXSize, nBufYSize );
1223 GDALDestroyWarpOperation( hWarpOperation );
1224
1225 psWarpOptions->panSrcBands = nullptr;
1226 psWarpOptions->panDstBands = nullptr;
1227 GDALDestroyWarpOptions( psWarpOptions );
1228
1229 if( hVRTDS )
1230 GDALClose(hVRTDS);
1231 }
1232 else
1233 {
1234 const char* pszResampling =
1235 (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR" :
1236 (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC" :
1237 (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE" :
1238 (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS" :
1239 (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE" :
1240 (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS" :
1241 (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE" :
1242 (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS" : "UNKNOWN";
1243
1244 int nKernelRadius = 0;
1245 GDALResampleFunction pfnResampleFunc =
1246 GDALGetResampleFunction(pszResampling, &nKernelRadius);
1247 CPLAssert(pfnResampleFunc);
1248 GDALDataType eWrkDataType =
1249 GDALGetOvrWorkDataType(pszResampling, eDataType);
1250 int bHasNoData = FALSE;
1251 float fNoDataValue = static_cast<float>( GetNoDataValue(&bHasNoData) );
1252 if( !bHasNoData )
1253 fNoDataValue = 0.0f;
1254
1255 int nDstBlockXSize = nBufXSize;
1256 int nDstBlockYSize = nBufYSize;
1257 int nFullResXChunk = 0;
1258 int nFullResYChunk = 0;
1259 while( true )
1260 {
1261 nFullResXChunk =
1262 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1263 nFullResYChunk =
1264 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1265 if( nFullResXChunk > nRasterXSize )
1266 nFullResXChunk = nRasterXSize;
1267 if( nFullResYChunk > nRasterYSize )
1268 nFullResYChunk = nRasterYSize;
1269 if( (nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1270 (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <= 1024 * 1024) )
1271 break;
1272 // When operating on the full width of a raster whose block width is
1273 // the raster width, prefer doing chunks in height.
1274 if( nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1275 nDstBlockYSize > 1 )
1276 nDstBlockYSize /= 2;
1277 /* Otherwise cut the maximal dimension */
1278 else if( nDstBlockXSize > 1 &&
1279 (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1) )
1280 nDstBlockXSize /= 2;
1281 else
1282 nDstBlockYSize /= 2;
1283 }
1284
1285 int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
1286 int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
1287 if( nOvrXFactor == 0 ) nOvrXFactor = 1;
1288 if( nOvrYFactor == 0 ) nOvrYFactor = 1;
1289 int nFullResXSizeQueried =
1290 nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
1291 int nFullResYSizeQueried =
1292 nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
1293
1294 if( nFullResXSizeQueried > nRasterXSize )
1295 nFullResXSizeQueried = nRasterXSize;
1296 if( nFullResYSizeQueried > nRasterYSize )
1297 nFullResYSizeQueried = nRasterYSize;
1298
1299 void * pChunk =
1300 VSI_MALLOC3_VERBOSE( GDALGetDataTypeSizeBytes(eWrkDataType),
1301 nFullResXSizeQueried, nFullResYSizeQueried );
1302 GByte * pabyChunkNoDataMask = nullptr;
1303
1304 GDALRasterBand* poMaskBand = GetMaskBand();
1305 int l_nMaskFlags = GetMaskFlags();
1306
1307 bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
1308 if (bUseNoDataMask)
1309 {
1310 pabyChunkNoDataMask = static_cast<GByte *>(
1311 VSI_MALLOC2_VERBOSE( nFullResXSizeQueried,
1312 nFullResYSizeQueried ) );
1313 }
1314 if( pChunk == nullptr || (bUseNoDataMask && pabyChunkNoDataMask == nullptr) )
1315 {
1316 GDALClose(poMEMDS);
1317 CPLFree(pChunk);
1318 CPLFree(pabyChunkNoDataMask);
1319 VSIFree(pTempBuffer);
1320 return CE_Failure;
1321 }
1322
1323 int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1324 ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1325 int nBlocksDone = 0;
1326
1327 int nDstYOff;
1328 for( nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1329 nDstYOff += nDstBlockYSize )
1330 {
1331 int nDstYCount;
1332 if (nDstYOff + nDstBlockYSize <= nBufYSize)
1333 nDstYCount = nDstBlockYSize;
1334 else
1335 nDstYCount = nBufYSize - nDstYOff;
1336
1337 int nChunkYOff =
1338 nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1339 int nChunkYOff2 =
1340 nYOff + 1 +
1341 static_cast<int>(
1342 ceil((nDstYOff + nDstYCount) * dfYRatioDstToSrc ) );
1343 if( nChunkYOff2 > nRasterYSize )
1344 nChunkYOff2 = nRasterYSize;
1345 int nYCount = nChunkYOff2 - nChunkYOff;
1346 CPLAssert(nYCount <= nFullResYChunk);
1347
1348 int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
1349 int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
1350 if( nChunkYOffQueried < 0 )
1351 {
1352 nChunkYSizeQueried += nChunkYOffQueried;
1353 nChunkYOffQueried = 0;
1354 }
1355 if( nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize )
1356 nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1357 CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1358
1359 int nDstXOff = 0;
1360 for( nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1361 nDstXOff += nDstBlockXSize )
1362 {
1363 int nDstXCount = 0;
1364 if (nDstXOff + nDstBlockXSize <= nBufXSize)
1365 nDstXCount = nDstBlockXSize;
1366 else
1367 nDstXCount = nBufXSize - nDstXOff;
1368
1369 int nChunkXOff =
1370 nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1371 int nChunkXOff2 =
1372 nXOff + 1 +
1373 static_cast<int>(
1374 ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1375 if( nChunkXOff2 > nRasterXSize )
1376 nChunkXOff2 = nRasterXSize;
1377 int nXCount = nChunkXOff2 - nChunkXOff;
1378 CPLAssert(nXCount <= nFullResXChunk);
1379
1380 int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrXFactor;
1381 int nChunkXSizeQueried =
1382 nXCount + 2 * nKernelRadius * nOvrXFactor;
1383 if( nChunkXOffQueried < 0 )
1384 {
1385 nChunkXSizeQueried += nChunkXOffQueried;
1386 nChunkXOffQueried = 0;
1387 }
1388 if( nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize )
1389 nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1390 CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1391
1392 // Read the source buffers.
1393 eErr = RasterIO( GF_Read,
1394 nChunkXOffQueried, nChunkYOffQueried,
1395 nChunkXSizeQueried, nChunkYSizeQueried,
1396 pChunk,
1397 nChunkXSizeQueried, nChunkYSizeQueried,
1398 eWrkDataType, 0, 0, nullptr );
1399
1400 bool bSkipResample = false;
1401 bool bNoDataMaskFullyOpaque = false;
1402 if (eErr == CE_None && bUseNoDataMask)
1403 {
1404 eErr = poMaskBand->RasterIO( GF_Read,
1405 nChunkXOffQueried,
1406 nChunkYOffQueried,
1407 nChunkXSizeQueried,
1408 nChunkYSizeQueried,
1409 pabyChunkNoDataMask,
1410 nChunkXSizeQueried,
1411 nChunkYSizeQueried,
1412 GDT_Byte, 0, 0, nullptr );
1413
1414 /* Optimizations if mask if fully opaque or transparent */
1415 int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1416 GByte bVal = pabyChunkNoDataMask[0];
1417 int i = 1;
1418 for( ; i < nPixels; i++ )
1419 {
1420 if( pabyChunkNoDataMask[i] != bVal )
1421 break;
1422 }
1423 if( i == nPixels )
1424 {
1425 if( bVal == 0 )
1426 {
1427 for(int j=0;j<nDstYCount;j++)
1428 {
1429 GDALCopyWords(
1430 &fNoDataValue, GDT_Float32, 0,
1431 static_cast<GByte*>(pDataMem) +
1432 nLSMem * (j + nDstYOff) +
1433 nDstXOff * nPSMem,
1434 eDTMem, static_cast<int>(nPSMem),
1435 nDstXCount);
1436 }
1437 bSkipResample = true;
1438 }
1439 else
1440 {
1441 bNoDataMaskFullyOpaque = true;
1442 }
1443 }
1444 }
1445
1446 if( !bSkipResample && eErr == CE_None )
1447 {
1448 const bool bPropagateNoData = false;
1449 void* pDstBuffer = nullptr;
1450 GDALDataType eDstBufferDataType = GDT_Unknown;
1451 GDALRasterBand* poMEMBand = GDALRasterBand::FromHandle(hMEMBand);
1452 eErr = pfnResampleFunc(
1453 dfXRatioDstToSrc,
1454 dfYRatioDstToSrc,
1455 dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1456 dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1457 eWrkDataType,
1458 pChunk,
1459 bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1460 nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1461 nChunkXSizeQueried,
1462 nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1463 nChunkYSizeQueried,
1464 nDstXOff + nDestXOffVirtual,
1465 nDstXOff + nDestXOffVirtual + nDstXCount,
1466 nDstYOff + nDestYOffVirtual,
1467 nDstYOff + nDestYOffVirtual + nDstYCount,
1468 poMEMBand,
1469 &pDstBuffer,
1470 &eDstBufferDataType,
1471 pszResampling,
1472 bHasNoData, fNoDataValue,
1473 GetColorTable(),
1474 eDataType,
1475 bPropagateNoData);
1476 if( eErr == CE_None )
1477 {
1478 eErr = poMEMBand->RasterIO(
1479 GF_Write,
1480 nDstXOff + nDestXOffVirtual,
1481 nDstYOff + nDestYOffVirtual,
1482 nDstXCount,
1483 nDstYCount,
1484 pDstBuffer,
1485 nDstXCount,
1486 nDstYCount,
1487 eDstBufferDataType,
1488 0, 0, nullptr );
1489 }
1490 CPLFree(pDstBuffer);
1491 }
1492
1493 nBlocksDone ++;
1494 if( eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1495 !psExtraArg->pfnProgress(
1496 1.0 * nBlocksDone / nTotalBlocks, "",
1497 psExtraArg->pProgressData) )
1498 {
1499 eErr = CE_Failure;
1500 }
1501 }
1502 }
1503
1504 CPLFree(pChunk);
1505 CPLFree(pabyChunkNoDataMask);
1506 }
1507
1508 if( eBufType != eDataType )
1509 {
1510 CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(GF_Read,
1511 nDestXOffVirtual, nDestYOffVirtual,
1512 nBufXSize, nBufYSize,
1513 pData,
1514 nBufXSize, nBufYSize,
1515 eBufType,
1516 nPixelSpace, nLineSpace,
1517 nullptr));
1518 }
1519 GDALClose(poMEMDS);
1520 VSIFree(pTempBuffer);
1521
1522 return eErr;
1523 }
1524
1525 /************************************************************************/
1526 /* RasterIOResampled() */
1527 /************************************************************************/
1528
RasterIOResampled(GDALRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,int nBandCount,int * panBandMap,GSpacing nPixelSpace,GSpacing nLineSpace,GSpacing nBandSpace,GDALRasterIOExtraArg * psExtraArg)1529 CPLErr GDALDataset::RasterIOResampled(
1530 GDALRWFlag /* eRWFlag */,
1531 int nXOff, int nYOff, int nXSize, int nYSize,
1532 void *pData, int nBufXSize, int nBufYSize,
1533 GDALDataType eBufType,
1534 int nBandCount, int *panBandMap,
1535 GSpacing nPixelSpace, GSpacing nLineSpace,
1536 GSpacing nBandSpace,
1537 GDALRasterIOExtraArg* psExtraArg )
1538
1539 {
1540 #if 0
1541 // Determine if we use warping resampling or overview resampling
1542 bool bUseWarp = false;
1543 if( GDALDataTypeIsComplex( eDataType ) )
1544 bUseWarp = true;
1545 #endif
1546
1547 double dfXOff = nXOff;
1548 double dfYOff = nYOff;
1549 double dfXSize = nXSize;
1550 double dfYSize = nYSize;
1551 if( psExtraArg->bFloatingPointWindowValidity )
1552 {
1553 dfXOff = psExtraArg->dfXOff;
1554 dfYOff = psExtraArg->dfYOff;
1555 dfXSize = psExtraArg->dfXSize;
1556 dfYSize = psExtraArg->dfYSize;
1557 }
1558
1559 const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1560 const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1561
1562 // Determine the coordinates in the "virtual" output raster to see
1563 // if there are not integers, in which case we will use them as a shift
1564 // so that subwindow extracts give the exact same results as entire raster
1565 // scaling.
1566 double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1567 bool bHasXOffVirtual = false;
1568 int nDestXOffVirtual = 0;
1569 if( fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8 )
1570 {
1571 bHasXOffVirtual = true;
1572 dfXOff = nXOff;
1573 nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1574 }
1575
1576 double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1577 bool bHasYOffVirtual = false;
1578 int nDestYOffVirtual = 0;
1579 if( fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8 )
1580 {
1581 bHasYOffVirtual = true;
1582 dfYOff = nYOff;
1583 nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1584 }
1585
1586 // Create a MEM dataset that wraps the output buffer.
1587 GDALDataset* poMEMDS = MEMDataset::Create( "", nDestXOffVirtual + nBufXSize,
1588 nDestYOffVirtual + nBufYSize, 0,
1589 eBufType, nullptr );
1590 GDALRasterBand** papoDstBands =
1591 static_cast<GDALRasterBand **>(
1592 CPLMalloc( nBandCount * sizeof(GDALRasterBand*)) );
1593 for(int i=0;i<nBandCount;i++)
1594 {
1595 char szBuffer[32] = { '\0' };
1596 int nRet = CPLPrintPointer(
1597 szBuffer,
1598 static_cast<GByte*>(pData) - nPixelSpace * nDestXOffVirtual
1599 - nLineSpace * nDestYOffVirtual + nBandSpace * i, sizeof(szBuffer));
1600 szBuffer[nRet] = 0;
1601
1602 char szBuffer0[64] = { '\0' };
1603 snprintf( szBuffer0, sizeof(szBuffer0),
1604 "DATAPOINTER=%s", szBuffer );
1605
1606 char szBuffer1[64] = { '\0' };
1607 snprintf( szBuffer1, sizeof(szBuffer1),
1608 "PIXELOFFSET=" CPL_FRMT_GIB,
1609 static_cast<GIntBig>(nPixelSpace) );
1610
1611 char szBuffer2[64] = { '\0' };
1612 snprintf( szBuffer2, sizeof(szBuffer2),
1613 "LINEOFFSET=" CPL_FRMT_GIB,
1614 static_cast<GIntBig>(nLineSpace) );
1615
1616 char* apszOptions[4] = { szBuffer0, szBuffer1, szBuffer2, nullptr };
1617
1618 poMEMDS->AddBand(eBufType, apszOptions);
1619
1620 GDALRasterBand* poSrcBand = GetRasterBand(panBandMap[i]);
1621 papoDstBands[i] = poMEMDS->GetRasterBand(i+1);
1622 const char* pszNBITS = poSrcBand->GetMetadataItem( "NBITS",
1623 "IMAGE_STRUCTURE" );
1624 if( pszNBITS )
1625 poMEMDS->GetRasterBand(i+1)->SetMetadataItem( "NBITS", pszNBITS,
1626 "IMAGE_STRUCTURE" );
1627 }
1628
1629 CPLErr eErr = CE_None;
1630
1631 // TODO(schwehr): Why disabled? Why not just delete?
1632 // Looks like this code was initially added as disable by copying
1633 // from RasterIO here:
1634 // https://trac.osgeo.org/gdal/changeset/29572
1635 #if 0
1636 // Do the resampling.
1637 if( bUseWarp )
1638 {
1639 VRTDatasetH hVRTDS = nullptr;
1640 GDALRasterBandH hVRTBand = nullptr;
1641 if( GetDataset() == nullptr )
1642 {
1643 /* Create VRT dataset that wraps the whole dataset */
1644 hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1645 VRTAddBand( hVRTDS, eDataType, nullptr );
1646 hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1647 VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1648 (GDALRasterBandH)this,
1649 0, 0,
1650 nRasterXSize, nRasterYSize,
1651 0, 0,
1652 nRasterXSize, nRasterYSize,
1653 nullptr, VRT_NODATA_UNSET );
1654
1655 /* Add a mask band if needed */
1656 if( GetMaskFlags() != GMF_ALL_VALID )
1657 {
1658 ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1659 VRTSourcedRasterBand* poVRTMaskBand =
1660 (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1661 poVRTMaskBand->
1662 AddMaskBandSource( this,
1663 0, 0,
1664 nRasterXSize, nRasterYSize,
1665 0, 0,
1666 nRasterXSize, nRasterYSize);
1667 }
1668 }
1669
1670 GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1671 psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1672 psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1673 psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1674 psWarpOptions->nBandCount = 1;
1675 int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1676 int nDstBandNumber = 1;
1677 psWarpOptions->panSrcBands = &nSrcBandNumber;
1678 psWarpOptions->panDstBands = &nDstBandNumber;
1679 psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1680 psExtraArg->pfnProgress : GDALDummyProgress;
1681 psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1682 psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1683 GDALRasterIOTransformerStruct sTransformer;
1684 sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1685 sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1686 sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1687 sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1688 psWarpOptions->pTransformerArg = &sTransformer;
1689
1690 GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1691 eErr = GDALChunkAndWarpImage( hWarpOperation,
1692 nDestXOffVirtual, nDestYOffVirtual,
1693 nBufXSize, nBufYSize );
1694 GDALDestroyWarpOperation( hWarpOperation );
1695
1696 psWarpOptions->panSrcBands = nullptr;
1697 psWarpOptions->panDstBands = nullptr;
1698 GDALDestroyWarpOptions( psWarpOptions );
1699
1700 if( hVRTDS )
1701 GDALClose(hVRTDS);
1702 }
1703 else
1704 #endif
1705 {
1706 const char* pszResampling =
1707 (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR" :
1708 (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC" :
1709 (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE" :
1710 (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS" :
1711 (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE" :
1712 (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS" :
1713 (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE" :
1714 (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS" : "UNKNOWN";
1715
1716 GDALRasterBand* poFirstSrcBand = GetRasterBand(panBandMap[0]);
1717 GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
1718 int nBlockXSize, nBlockYSize;
1719 poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
1720
1721 int nKernelRadius;
1722 GDALResampleFunction pfnResampleFunc =
1723 GDALGetResampleFunction(pszResampling, &nKernelRadius);
1724 CPLAssert(pfnResampleFunc);
1725 #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1726 GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1727 GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1728 #endif
1729 GDALDataType eWrkDataType =
1730 GDALGetOvrWorkDataType(pszResampling, eDataType);
1731
1732 int nDstBlockXSize = nBufXSize;
1733 int nDstBlockYSize = nBufYSize;
1734 int nFullResXChunk, nFullResYChunk;
1735 while( true )
1736 {
1737 nFullResXChunk =
1738 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1739 nFullResYChunk =
1740 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1741 if( nFullResXChunk > nRasterXSize )
1742 nFullResXChunk = nRasterXSize;
1743 if( nFullResYChunk > nRasterYSize )
1744 nFullResYChunk = nRasterYSize;
1745 if( (nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1746 (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <= 1024 * 1024) )
1747 break;
1748 // When operating on the full width of a raster whose block width is
1749 // the raster width, prefer doing chunks in height.
1750 if( nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1751 nDstBlockYSize > 1 )
1752 nDstBlockYSize /= 2;
1753 /* Otherwise cut the maximal dimension */
1754 else if( nDstBlockXSize > 1 &&
1755 (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1) )
1756 nDstBlockXSize /= 2;
1757 else
1758 nDstBlockYSize /= 2;
1759 }
1760
1761 int nOvrFactor = std::max( static_cast<int>(0.5 + dfXRatioDstToSrc),
1762 static_cast<int>(0.5 + dfYRatioDstToSrc) );
1763 if( nOvrFactor == 0 ) nOvrFactor = 1;
1764 int nFullResXSizeQueried = nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
1765 int nFullResYSizeQueried = nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
1766
1767 if( nFullResXSizeQueried > nRasterXSize )
1768 nFullResXSizeQueried = nRasterXSize;
1769 if( nFullResYSizeQueried > nRasterYSize )
1770 nFullResYSizeQueried = nRasterYSize;
1771
1772 void * pChunk =
1773 VSI_MALLOC3_VERBOSE(
1774 GDALGetDataTypeSizeBytes(eWrkDataType) * nBandCount,
1775 nFullResXSizeQueried, nFullResYSizeQueried );
1776 GByte * pabyChunkNoDataMask = nullptr;
1777
1778 GDALRasterBand* poMaskBand = poFirstSrcBand->GetMaskBand();
1779 int nMaskFlags = poFirstSrcBand->GetMaskFlags();
1780
1781 bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
1782 if (bUseNoDataMask)
1783 {
1784 pabyChunkNoDataMask = static_cast<GByte *>(
1785 VSI_MALLOC2_VERBOSE( nFullResXSizeQueried, nFullResYSizeQueried ));
1786 }
1787 if( pChunk == nullptr || (bUseNoDataMask && pabyChunkNoDataMask == nullptr) )
1788 {
1789 GDALClose(poMEMDS);
1790 CPLFree(pChunk);
1791 CPLFree(pabyChunkNoDataMask);
1792 CPLFree(papoDstBands);
1793 return CE_Failure;
1794 }
1795
1796 int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1797 ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1798 int nBlocksDone = 0;
1799
1800 int nDstYOff;
1801 for( nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1802 nDstYOff += nDstBlockYSize )
1803 {
1804 int nDstYCount;
1805 if (nDstYOff + nDstBlockYSize <= nBufYSize)
1806 nDstYCount = nDstBlockYSize;
1807 else
1808 nDstYCount = nBufYSize - nDstYOff;
1809
1810 int nChunkYOff =
1811 nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1812 int nChunkYOff2 =
1813 nYOff + 1 +
1814 static_cast<int>(
1815 ceil((nDstYOff + nDstYCount) * dfYRatioDstToSrc) );
1816 if( nChunkYOff2 > nRasterYSize )
1817 nChunkYOff2 = nRasterYSize;
1818 int nYCount = nChunkYOff2 - nChunkYOff;
1819 CPLAssert(nYCount <= nFullResYChunk);
1820
1821 int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
1822 int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
1823 if( nChunkYOffQueried < 0 )
1824 {
1825 nChunkYSizeQueried += nChunkYOffQueried;
1826 nChunkYOffQueried = 0;
1827 }
1828 if( nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize )
1829 nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1830 CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1831
1832 int nDstXOff;
1833 for( nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1834 nDstXOff += nDstBlockXSize )
1835 {
1836 int nDstXCount;
1837 if (nDstXOff + nDstBlockXSize <= nBufXSize)
1838 nDstXCount = nDstBlockXSize;
1839 else
1840 nDstXCount = nBufXSize - nDstXOff;
1841
1842 int nChunkXOff =
1843 nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1844 int nChunkXOff2 =
1845 nXOff + 1 + static_cast<int>(
1846 ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc) );
1847 if( nChunkXOff2 > nRasterXSize )
1848 nChunkXOff2 = nRasterXSize;
1849 int nXCount = nChunkXOff2 - nChunkXOff;
1850 CPLAssert(nXCount <= nFullResXChunk);
1851
1852 int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
1853 int nChunkXSizeQueried = nXCount + 2 * nKernelRadius * nOvrFactor;
1854 if( nChunkXOffQueried < 0 )
1855 {
1856 nChunkXSizeQueried += nChunkXOffQueried;
1857 nChunkXOffQueried = 0;
1858 }
1859 if( nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize )
1860 nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1861 CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1862
1863 bool bSkipResample = false;
1864 bool bNoDataMaskFullyOpaque = false;
1865 if (eErr == CE_None && bUseNoDataMask)
1866 {
1867 eErr = poMaskBand->RasterIO( GF_Read,
1868 nChunkXOffQueried,
1869 nChunkYOffQueried,
1870 nChunkXSizeQueried,
1871 nChunkYSizeQueried,
1872 pabyChunkNoDataMask,
1873 nChunkXSizeQueried,
1874 nChunkYSizeQueried,
1875 GDT_Byte, 0, 0, nullptr );
1876
1877 /* Optimizations if mask if fully opaque or transparent */
1878 const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1879 const GByte bVal = pabyChunkNoDataMask[0];
1880 int i = 1; // Used after for.
1881 for( ; i < nPixels; i++ )
1882 {
1883 if( pabyChunkNoDataMask[i] != bVal )
1884 break;
1885 }
1886 if( i == nPixels )
1887 {
1888 if( bVal == 0 )
1889 {
1890 float fNoDataValue = 0.0f;
1891 for( int iBand = 0; iBand < nBandCount; iBand++ )
1892 {
1893 for( int j = 0; j < nDstYCount; j++ )
1894 {
1895 GDALCopyWords(
1896 &fNoDataValue, GDT_Float32, 0,
1897 static_cast<GByte *>(pData) +
1898 iBand * nBandSpace +
1899 nLineSpace * (j + nDstYOff) +
1900 nDstXOff * nPixelSpace,
1901 eBufType,
1902 static_cast<int>(nPixelSpace),
1903 nDstXCount);
1904 }
1905 }
1906 bSkipResample = true;
1907 }
1908 else
1909 {
1910 bNoDataMaskFullyOpaque = true;
1911 }
1912 }
1913 }
1914
1915 if( !bSkipResample && eErr == CE_None )
1916 {
1917 /* Read the source buffers */
1918 eErr = RasterIO( GF_Read,
1919 nChunkXOffQueried, nChunkYOffQueried,
1920 nChunkXSizeQueried, nChunkYSizeQueried,
1921 pChunk,
1922 nChunkXSizeQueried, nChunkYSizeQueried,
1923 eWrkDataType,
1924 nBandCount, panBandMap,
1925 0, 0, 0, nullptr );
1926 }
1927
1928 #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1929 if( pfnResampleFuncMultiBands &&
1930 !bSkipResample &&
1931 eErr == CE_None )
1932 {
1933 eErr = pfnResampleFuncMultiBands(
1934 dfXRatioDstToSrc,
1935 dfYRatioDstToSrc,
1936 dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1937 dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1938 eWrkDataType,
1939 (GByte*)pChunk,
1940 nBandCount,
1941 bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1942 nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1943 nChunkXSizeQueried,
1944 nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1945 nChunkYSizeQueried,
1946 nDstXOff + nDestXOffVirtual,
1947 nDstXOff + nDestXOffVirtual + nDstXCount,
1948 nDstYOff + nDestYOffVirtual,
1949 nDstYOff + nDestYOffVirtual + nDstYCount,
1950 papoDstBands,
1951 pszResampling,
1952 FALSE /*bHasNoData*/,
1953 0.f /* fNoDataValue */,
1954 nullptr /* color table*/,
1955 eDataType );
1956 }
1957 else
1958 #endif
1959 {
1960 size_t nChunkBandOffset =
1961 static_cast<size_t>(nChunkXSizeQueried) *
1962 nChunkYSizeQueried *
1963 GDALGetDataTypeSizeBytes(eWrkDataType);
1964 for( int i = 0;
1965 i < nBandCount && !bSkipResample && eErr == CE_None;
1966 i++ )
1967 {
1968 const bool bPropagateNoData = false;
1969 void* pDstBuffer = nullptr;
1970 GDALDataType eDstBufferDataType = GDT_Unknown;
1971 GDALRasterBand* poMEMBand = poMEMDS->GetRasterBand(i+1);
1972 eErr = pfnResampleFunc(
1973 dfXRatioDstToSrc,
1974 dfYRatioDstToSrc,
1975 dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1976 dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1977 eWrkDataType,
1978 reinterpret_cast<GByte*>(pChunk) + i * nChunkBandOffset,
1979 bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1980 nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1981 nChunkXSizeQueried,
1982 nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1983 nChunkYSizeQueried,
1984 nDstXOff + nDestXOffVirtual,
1985 nDstXOff + nDestXOffVirtual + nDstXCount,
1986 nDstYOff + nDestYOffVirtual,
1987 nDstYOff + nDestYOffVirtual + nDstYCount,
1988 poMEMBand,
1989 &pDstBuffer,
1990 &eDstBufferDataType,
1991 pszResampling,
1992 FALSE /*bHasNoData*/,
1993 0.f /* fNoDataValue */,
1994 nullptr /* color table*/,
1995 eDataType,
1996 bPropagateNoData );
1997 if( eErr == CE_None )
1998 {
1999 eErr = poMEMBand->RasterIO(
2000 GF_Write,
2001 nDstXOff + nDestXOffVirtual,
2002 nDstYOff + nDestYOffVirtual,
2003 nDstXCount,
2004 nDstYCount,
2005 pDstBuffer,
2006 nDstXCount,
2007 nDstYCount,
2008 eDstBufferDataType,
2009 0, 0, nullptr );
2010 }
2011 CPLFree(pDstBuffer);
2012 }
2013 }
2014
2015 nBlocksDone ++;
2016 if( eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
2017 !psExtraArg->pfnProgress(
2018 1.0 * nBlocksDone / nTotalBlocks, "",
2019 psExtraArg->pProgressData) )
2020 {
2021 eErr = CE_Failure;
2022 }
2023 }
2024 }
2025
2026 CPLFree(pChunk);
2027 CPLFree(pabyChunkNoDataMask);
2028 }
2029
2030 CPLFree(papoDstBands);
2031 GDALClose(poMEMDS);
2032
2033 return eErr;
2034 }
2035 //! @endcond
2036
2037 /************************************************************************/
2038 /* GDALSwapWords() */
2039 /************************************************************************/
2040
2041 /**
2042 * Byte swap words in-place.
2043 *
2044 * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2045 * a memory array. No assumption is made that the words being swapped are
2046 * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2047 * to determine if the current platform is big endian or little endian. Use
2048 * The macros like CPL_SWAP32() to byte swap single values without the overhead
2049 * of a function call.
2050 *
2051 * @param pData pointer to start of data buffer.
2052 * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2053 * @param nWordCount the number of words to be swapped in this call.
2054 * @param nWordSkip the byte offset from the start of one word to the start of
2055 * the next. For packed buffers this is the same as nWordSize.
2056 */
2057
GDALSwapWords(void * pData,int nWordSize,int nWordCount,int nWordSkip)2058 void CPL_STDCALL GDALSwapWords( void *pData, int nWordSize, int nWordCount,
2059 int nWordSkip )
2060
2061 {
2062 if (nWordCount > 0)
2063 VALIDATE_POINTER0( pData , "GDALSwapWords" );
2064
2065 GByte *pabyData = static_cast<GByte *>( pData );
2066
2067 switch( nWordSize )
2068 {
2069 case 1:
2070 break;
2071
2072 case 2:
2073 CPLAssert( nWordSkip >= 2 || nWordCount == 1 );
2074 for( int i = 0; i < nWordCount; i++ )
2075 {
2076 CPL_SWAP16PTR(pabyData);
2077 pabyData += nWordSkip;
2078 }
2079 break;
2080
2081 case 4:
2082 CPLAssert( nWordSkip >= 4 || nWordCount == 1 );
2083 if( CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0 )
2084 {
2085 for( int i = 0; i < nWordCount; i++ )
2086 {
2087 *reinterpret_cast<GUInt32*>(pabyData) = CPL_SWAP32(*reinterpret_cast<const GUInt32*>(pabyData));
2088 pabyData += nWordSkip;
2089 }
2090 }
2091 else
2092 {
2093 for( int i = 0; i < nWordCount; i++ )
2094 {
2095 CPL_SWAP32PTR(pabyData);
2096 pabyData += nWordSkip;
2097 }
2098 }
2099 break;
2100
2101 case 8:
2102 CPLAssert( nWordSkip >= 8 || nWordCount == 1 );
2103 #ifdef CPL_HAS_GINT64
2104 if( CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0 )
2105 {
2106 for( int i = 0; i < nWordCount; i++ )
2107 {
2108 *reinterpret_cast<GUInt64*>(pabyData) = CPL_SWAP64(*reinterpret_cast<const GUInt64*>(pabyData));
2109 pabyData += nWordSkip;
2110 }
2111 }
2112 else
2113 #endif
2114 {
2115 for( int i = 0; i < nWordCount; i++ )
2116 {
2117 CPL_SWAP64PTR(pabyData);
2118 pabyData += nWordSkip;
2119 }
2120 }
2121 break;
2122
2123 default:
2124 CPLAssert( false );
2125 }
2126 }
2127
2128 /************************************************************************/
2129 /* GDALSwapWordsEx() */
2130 /************************************************************************/
2131
2132 /**
2133 * Byte swap words in-place.
2134 *
2135 * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2136 * a memory array. No assumption is made that the words being swapped are
2137 * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2138 * to determine if the current platform is big endian or little endian. Use
2139 * The macros like CPL_SWAP32() to byte swap single values without the overhead
2140 * of a function call.
2141 *
2142 * @param pData pointer to start of data buffer.
2143 * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2144 * @param nWordCount the number of words to be swapped in this call.
2145 * @param nWordSkip the byte offset from the start of one word to the start of
2146 * the next. For packed buffers this is the same as nWordSize.
2147 * @since GDAL 2.1
2148 */
GDALSwapWordsEx(void * pData,int nWordSize,size_t nWordCount,int nWordSkip)2149 void CPL_STDCALL GDALSwapWordsEx( void *pData, int nWordSize, size_t nWordCount,
2150 int nWordSkip )
2151 {
2152 GByte* pabyData = static_cast<GByte*>(pData);
2153 while( nWordCount )
2154 {
2155 // Pick-up a multiple of 8 as max chunk size.
2156 const int nWordCountSmall =
2157 (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
2158 GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
2159 pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
2160 nWordCount -= nWordCountSmall;
2161 }
2162 }
2163
2164 // Place the new GDALCopyWords helpers in an anonymous namespace
2165 namespace {
2166
2167 /************************************************************************/
2168 /* GDALCopyWordsT() */
2169 /************************************************************************/
2170 /**
2171 * Template function, used to copy data from pSrcData into buffer
2172 * pDstData, with stride nSrcPixelStride in the source data and
2173 * stride nDstPixelStride in the destination data. This template can
2174 * deal with the case where the input data type is real or complex and
2175 * the output is real.
2176 *
2177 * @param pSrcData the source data buffer
2178 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2179 * of interest.
2180 * @param pDstData the destination buffer.
2181 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2182 * interest.
2183 * @param nWordCount the total number of pixel words to copy
2184 *
2185 * @code
2186 * // Assume an input buffer of type GUInt16 named pBufferIn
2187 * GByte *pBufferOut = new GByte[numBytesOut];
2188 * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2189 * @endcode
2190 * @note
2191 * This is a private function, and should not be exposed outside of rasterio.cpp.
2192 * External users should call the GDALCopyWords driver function.
2193 */
2194
2195 template <class Tin, class Tout>
GDALCopyWordsGenericT(const Tin * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2196 static void inline GDALCopyWordsGenericT( const Tin* const CPL_RESTRICT pSrcData,
2197 int nSrcPixelStride,
2198 Tout* const CPL_RESTRICT pDstData,
2199 int nDstPixelStride,
2200 GPtrDiff_t nWordCount)
2201 {
2202 decltype(nWordCount) nDstOffset = 0;
2203
2204 const char* const pSrcDataPtr = reinterpret_cast<const char*>(pSrcData);
2205 char* const pDstDataPtr = reinterpret_cast<char*>(pDstData);
2206 for (decltype(nWordCount) n = 0; n < nWordCount; n++ )
2207 {
2208 const Tin tValue =
2209 *reinterpret_cast<const Tin*>(pSrcDataPtr + (n * nSrcPixelStride));
2210 Tout* const pOutPixel =
2211 reinterpret_cast<Tout*>(pDstDataPtr + nDstOffset);
2212
2213 GDALCopyWord(tValue, *pOutPixel);
2214
2215 nDstOffset += nDstPixelStride;
2216 }
2217 }
2218
2219 template <class Tin, class Tout>
GDALCopyWordsT(const Tin * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2220 static void inline GDALCopyWordsT( const Tin* const CPL_RESTRICT pSrcData,
2221 int nSrcPixelStride,
2222 Tout* const CPL_RESTRICT pDstData,
2223 int nDstPixelStride,
2224 GPtrDiff_t nWordCount)
2225 {
2226 GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2227 pDstData, nDstPixelStride,
2228 nWordCount);
2229 }
2230
2231 template <class Tin, class Tout>
GDALCopyWordsT_8atatime(const Tin * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2232 static void inline GDALCopyWordsT_8atatime( const Tin* const CPL_RESTRICT pSrcData,
2233 int nSrcPixelStride,
2234 Tout* const CPL_RESTRICT pDstData,
2235 int nDstPixelStride,
2236 GPtrDiff_t nWordCount )
2237 {
2238 decltype(nWordCount) nDstOffset = 0;
2239
2240 const char* const pSrcDataPtr = reinterpret_cast<const char*>(pSrcData);
2241 char* const pDstDataPtr = reinterpret_cast<char*>(pDstData);
2242 decltype(nWordCount) n = 0;
2243 if( nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
2244 nDstPixelStride == static_cast<int>(sizeof(Tout)) )
2245 {
2246 for (; n < nWordCount-7; n+=8)
2247 {
2248 const Tin* pInValues =
2249 reinterpret_cast<const Tin*>(pSrcDataPtr + (n * nSrcPixelStride));
2250 Tout* const pOutPixels =
2251 reinterpret_cast<Tout*>(pDstDataPtr + nDstOffset);
2252
2253 GDALCopy8Words(pInValues, pOutPixels);
2254
2255 nDstOffset += 8 * nDstPixelStride;
2256 }
2257 }
2258 for( ; n < nWordCount; n++ )
2259 {
2260 const Tin tValue =
2261 *reinterpret_cast<const Tin*>(pSrcDataPtr + (n * nSrcPixelStride));
2262 Tout* const pOutPixel =
2263 reinterpret_cast<Tout*>(pDstDataPtr + nDstOffset);
2264
2265 GDALCopyWord(tValue, *pOutPixel);
2266
2267 nDstOffset += nDstPixelStride;
2268 }
2269 }
2270
2271 #if defined(__x86_64) || defined(_M_X64)
2272
2273 #include <emmintrin.h>
2274
GDALCopyWordsByteTo16Bit(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2275 template<class Tout> void GDALCopyWordsByteTo16Bit(
2276 const GByte* const CPL_RESTRICT pSrcData,
2277 int nSrcPixelStride,
2278 Tout* const CPL_RESTRICT pDstData,
2279 int nDstPixelStride,
2280 GPtrDiff_t nWordCount )
2281 {
2282 if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2283 nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2284 {
2285 decltype(nWordCount) n = 0;
2286 const __m128i xmm_zero = _mm_setzero_si128 ();
2287 GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2288 for (; n < nWordCount-15; n+=16)
2289 {
2290 __m128i xmm = _mm_loadu_si128(
2291 reinterpret_cast<const __m128i*> (pSrcData + n) );
2292 __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
2293 __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
2294 _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 2),
2295 xmm0 );
2296 _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 2 + 16),
2297 xmm1 );
2298 }
2299 for( ; n < nWordCount; n++ )
2300 {
2301 pDstData[n] = pSrcData[n];
2302 }
2303 }
2304 else
2305 {
2306 GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2307 pDstData, nDstPixelStride,
2308 nWordCount);
2309 }
2310 }
2311
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GUInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2312 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2313 int nSrcPixelStride,
2314 GUInt16* const CPL_RESTRICT pDstData,
2315 int nDstPixelStride,
2316 GPtrDiff_t nWordCount )
2317 {
2318 GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2319 nDstPixelStride, nWordCount);
2320 }
2321
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2322 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2323 int nSrcPixelStride,
2324 GInt16* const CPL_RESTRICT pDstData,
2325 int nDstPixelStride,
2326 GPtrDiff_t nWordCount )
2327 {
2328 GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2329 nDstPixelStride, nWordCount);
2330 }
2331
GDALCopyWordsByteTo32Bit(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2332 template<class Tout> void GDALCopyWordsByteTo32Bit(
2333 const GByte* const CPL_RESTRICT pSrcData,
2334 int nSrcPixelStride,
2335 Tout* const CPL_RESTRICT pDstData,
2336 int nDstPixelStride,
2337 GPtrDiff_t nWordCount )
2338 {
2339 if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2340 nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2341 {
2342 decltype(nWordCount) n = 0;
2343 const __m128i xmm_zero = _mm_setzero_si128 ();
2344 GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2345 for (; n < nWordCount-15; n+=16)
2346 {
2347 __m128i xmm = _mm_loadu_si128(
2348 reinterpret_cast<const __m128i*> (pSrcData + n) );
2349 __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2350 __m128i xmm_high= _mm_unpackhi_epi8(xmm, xmm_zero);
2351 __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2352 __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2353 __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2354 __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2355 _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 4),
2356 xmm0 );
2357 _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 4 + 16),
2358 xmm1 );
2359 _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 4 + 32),
2360 xmm2 );
2361 _mm_storeu_si128( reinterpret_cast<__m128i*>(pabyDstDataPtr + n * 4 + 48),
2362 xmm3 );
2363 }
2364 for( ; n < nWordCount; n++ )
2365 {
2366 pDstData[n] = pSrcData[n];
2367 }
2368 }
2369 else
2370 {
2371 GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2372 pDstData, nDstPixelStride,
2373 nWordCount);
2374 }
2375 }
2376
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GUInt32 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2377 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2378 int nSrcPixelStride,
2379 GUInt32* const CPL_RESTRICT pDstData,
2380 int nDstPixelStride,
2381 GPtrDiff_t nWordCount )
2382 {
2383 GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2384 nDstPixelStride, nWordCount);
2385 }
2386
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GInt32 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2387 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2388 int nSrcPixelStride,
2389 GInt32* const CPL_RESTRICT pDstData,
2390 int nDstPixelStride,
2391 GPtrDiff_t nWordCount )
2392 {
2393 GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2394 nDstPixelStride, nWordCount);
2395 }
2396
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,float * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2397 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2398 int nSrcPixelStride,
2399 float* const CPL_RESTRICT pDstData,
2400 int nDstPixelStride,
2401 GPtrDiff_t nWordCount )
2402 {
2403 if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2404 nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2405 {
2406 decltype(nWordCount) n = 0;
2407 const __m128i xmm_zero = _mm_setzero_si128 ();
2408 GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2409 for (; n < nWordCount-15; n+=16)
2410 {
2411 __m128i xmm = _mm_loadu_si128(
2412 reinterpret_cast<const __m128i*> (pSrcData + n) );
2413 __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2414 __m128i xmm_high= _mm_unpackhi_epi8(xmm, xmm_zero);
2415 __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2416 __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2417 __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2418 __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2419 __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2420 __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2421 __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
2422 __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
2423 _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4),
2424 xmm0_f );
2425 _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4 + 16),
2426 xmm1_f );
2427 _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4 + 32),
2428 xmm2_f );
2429 _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4 + 48),
2430 xmm3_f );
2431 }
2432 for( ; n < nWordCount; n++ )
2433 {
2434 pDstData[n] = pSrcData[n];
2435 }
2436 }
2437 else
2438 {
2439 GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2440 pDstData, nDstPixelStride,
2441 nWordCount);
2442 }
2443 }
2444
GDALCopyWordsT(const GByte * const CPL_RESTRICT pSrcData,int nSrcPixelStride,double * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2445 template<> void GDALCopyWordsT( const GByte* const CPL_RESTRICT pSrcData,
2446 int nSrcPixelStride,
2447 double* const CPL_RESTRICT pDstData,
2448 int nDstPixelStride,
2449 GPtrDiff_t nWordCount )
2450 {
2451 if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2452 nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2453 {
2454 decltype(nWordCount) n = 0;
2455 const __m128i xmm_zero = _mm_setzero_si128 ();
2456 GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2457 for (; n < nWordCount-15; n+=16)
2458 {
2459 __m128i xmm = _mm_loadu_si128(
2460 reinterpret_cast<const __m128i*> (pSrcData + n) );
2461 __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2462 __m128i xmm_high= _mm_unpackhi_epi8(xmm, xmm_zero);
2463 __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2464 __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2465 __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2466 __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2467
2468 __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2469 __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2470 __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
2471 __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
2472 xmm0 = _mm_srli_si128(xmm0, 8);
2473 xmm1 = _mm_srli_si128(xmm1, 8);
2474 xmm2 = _mm_srli_si128(xmm2, 8);
2475 xmm3 = _mm_srli_si128(xmm3, 8);
2476 __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2477 __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2478 __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
2479 __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
2480
2481 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8),
2482 xmm0_low_d );
2483 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 16),
2484 xmm0_high_d );
2485 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 32),
2486 xmm1_low_d );
2487 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 48),
2488 xmm1_high_d );
2489 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 64),
2490 xmm2_low_d );
2491 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 80),
2492 xmm2_high_d );
2493 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 96),
2494 xmm3_low_d );
2495 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 112),
2496 xmm3_high_d );
2497 }
2498 for( ; n < nWordCount; n++ )
2499 {
2500 pDstData[n] = pSrcData[n];
2501 }
2502 }
2503 else
2504 {
2505 GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2506 pDstData, nDstPixelStride,
2507 nWordCount);
2508 }
2509 }
2510
GDALCopyWordsT(const GUInt16 * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GByte * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2511 template<> void GDALCopyWordsT( const GUInt16* const CPL_RESTRICT pSrcData,
2512 int nSrcPixelStride,
2513 GByte* const CPL_RESTRICT pDstData,
2514 int nDstPixelStride,
2515 GPtrDiff_t nWordCount )
2516 {
2517 if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2518 nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2519 {
2520 decltype(nWordCount) n = 0;
2521 // In SSE2, min_epu16 does not exist, so shift from
2522 // UInt16 to SInt16 to be able to use min_epi16
2523 const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16 (-32768);
2524 const __m128i xmm_m255_shifted = _mm_set1_epi16 (255-32768);
2525 for (; n < nWordCount-7; n+=8)
2526 {
2527 __m128i xmm = _mm_loadu_si128(
2528 reinterpret_cast<const __m128i*> (pSrcData + n) );
2529 xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2530 xmm = _mm_min_epi16(xmm, xmm_m255_shifted);
2531 xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2532 xmm = _mm_packus_epi16(xmm, xmm);
2533 GDALCopyXMMToInt64(xmm, reinterpret_cast<GPtrDiff_t*>(pDstData + n));
2534 }
2535 for( ; n < nWordCount; n++ )
2536 {
2537 pDstData[n] =
2538 pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]);
2539 }
2540 }
2541 else
2542 {
2543 GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2544 pDstData, nDstPixelStride,
2545 nWordCount);
2546 }
2547 }
2548
GDALCopyWordsT(const GUInt16 * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2549 template<> void GDALCopyWordsT( const GUInt16* const CPL_RESTRICT pSrcData,
2550 int nSrcPixelStride,
2551 GInt16* const CPL_RESTRICT pDstData,
2552 int nDstPixelStride,
2553 GPtrDiff_t nWordCount )
2554 {
2555 if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2556 nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2557 {
2558 decltype(nWordCount) n = 0;
2559 // In SSE2, min_epu16 does not exist, so shift from
2560 // UInt16 to SInt16 to be able to use min_epi16
2561 const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16 (-32768);
2562 const __m128i xmm_32767_shifted = _mm_set1_epi16 (32767-32768);
2563 for (; n < nWordCount-7; n+=8)
2564 {
2565 __m128i xmm = _mm_loadu_si128(
2566 reinterpret_cast<const __m128i*> (pSrcData + n) );
2567 xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2568 xmm = _mm_min_epi16(xmm, xmm_32767_shifted);
2569 xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2570 _mm_storeu_si128( reinterpret_cast<__m128i*>(pDstData + n),
2571 xmm );
2572 }
2573 for( ; n < nWordCount; n++ )
2574 {
2575 pDstData[n] =
2576 pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]);
2577 }
2578 }
2579 else
2580 {
2581 GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2582 pDstData, nDstPixelStride,
2583 nWordCount);
2584 }
2585 }
2586
GDALCopyWordsT(const GUInt16 * const CPL_RESTRICT pSrcData,int nSrcPixelStride,float * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2587 template<> void GDALCopyWordsT( const GUInt16* const CPL_RESTRICT pSrcData,
2588 int nSrcPixelStride,
2589 float* const CPL_RESTRICT pDstData,
2590 int nDstPixelStride,
2591 GPtrDiff_t nWordCount )
2592 {
2593 if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2594 nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2595 {
2596 decltype(nWordCount) n = 0;
2597 const __m128i xmm_zero = _mm_setzero_si128 ();
2598 GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2599 for (; n < nWordCount-7; n+=8)
2600 {
2601 __m128i xmm = _mm_loadu_si128(
2602 reinterpret_cast<const __m128i*> (pSrcData + n) );
2603 __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2604 __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2605 __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2606 __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2607 _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4),
2608 xmm0_f );
2609 _mm_storeu_ps( reinterpret_cast<float*>(pabyDstDataPtr + n * 4 + 16),
2610 xmm1_f );
2611 }
2612 for( ; n < nWordCount; n++ )
2613 {
2614 pDstData[n] = pSrcData[n];
2615 }
2616 }
2617 else
2618 {
2619 GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2620 pDstData, nDstPixelStride,
2621 nWordCount);
2622 }
2623 }
2624
GDALCopyWordsT(const GUInt16 * const CPL_RESTRICT pSrcData,int nSrcPixelStride,double * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2625 template<> void GDALCopyWordsT( const GUInt16* const CPL_RESTRICT pSrcData,
2626 int nSrcPixelStride,
2627 double* const CPL_RESTRICT pDstData,
2628 int nDstPixelStride,
2629 GPtrDiff_t nWordCount )
2630 {
2631 if( nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2632 nDstPixelStride == static_cast<int>(sizeof(*pDstData)) )
2633 {
2634 decltype(nWordCount) n = 0;
2635 const __m128i xmm_zero = _mm_setzero_si128 ();
2636 GByte* CPL_RESTRICT pabyDstDataPtr = reinterpret_cast<GByte*>(pDstData);
2637 for (; n < nWordCount-7; n+=8)
2638 {
2639 __m128i xmm = _mm_loadu_si128(
2640 reinterpret_cast<const __m128i*> (pSrcData + n) );
2641 __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2642 __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2643
2644 __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2645 __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2646 xmm0 = _mm_srli_si128(xmm0, 8);
2647 xmm1 = _mm_srli_si128(xmm1, 8);
2648 __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2649 __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2650
2651 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8),
2652 xmm0_low_d );
2653 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 16),
2654 xmm0_high_d );
2655 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 32),
2656 xmm1_low_d );
2657 _mm_storeu_pd( reinterpret_cast<double*>(pabyDstDataPtr + n * 8 + 48),
2658 xmm1_high_d );
2659 }
2660 for( ; n < nWordCount; n++ )
2661 {
2662 pDstData[n] = pSrcData[n];
2663 }
2664 }
2665 else
2666 {
2667 GDALCopyWordsGenericT(pSrcData, nSrcPixelStride,
2668 pDstData, nDstPixelStride,
2669 nWordCount);
2670 }
2671 }
2672
GDALCopyWordsT(const double * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GUInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2673 template<> void GDALCopyWordsT( const double* const CPL_RESTRICT pSrcData,
2674 int nSrcPixelStride,
2675 GUInt16* const CPL_RESTRICT pDstData,
2676 int nDstPixelStride,
2677 GPtrDiff_t nWordCount )
2678 {
2679 GDALCopyWordsT_8atatime( pSrcData, nSrcPixelStride,
2680 pDstData, nDstPixelStride, nWordCount );
2681 }
2682
2683 #endif // defined(__x86_64) || defined(_M_X64)
2684
GDALCopyWordsT(const float * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GByte * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2685 template<> void GDALCopyWordsT( const float* const CPL_RESTRICT pSrcData,
2686 int nSrcPixelStride,
2687 GByte* const CPL_RESTRICT pDstData,
2688 int nDstPixelStride,
2689 GPtrDiff_t nWordCount )
2690 {
2691 GDALCopyWordsT_8atatime( pSrcData, nSrcPixelStride,
2692 pDstData, nDstPixelStride, nWordCount );
2693 }
2694
GDALCopyWordsT(const float * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2695 template<> void GDALCopyWordsT( const float* const CPL_RESTRICT pSrcData,
2696 int nSrcPixelStride,
2697 GInt16* const CPL_RESTRICT pDstData,
2698 int nDstPixelStride,
2699 GPtrDiff_t nWordCount )
2700 {
2701 GDALCopyWordsT_8atatime( pSrcData, nSrcPixelStride,
2702 pDstData, nDstPixelStride, nWordCount );
2703 }
2704
GDALCopyWordsT(const float * const CPL_RESTRICT pSrcData,int nSrcPixelStride,GUInt16 * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2705 template<> void GDALCopyWordsT( const float* const CPL_RESTRICT pSrcData,
2706 int nSrcPixelStride,
2707 GUInt16* const CPL_RESTRICT pDstData,
2708 int nDstPixelStride,
2709 GPtrDiff_t nWordCount )
2710 {
2711 GDALCopyWordsT_8atatime( pSrcData, nSrcPixelStride,
2712 pDstData, nDstPixelStride, nWordCount );
2713 }
2714
2715 /************************************************************************/
2716 /* GDALCopyWordsComplexT() */
2717 /************************************************************************/
2718 /**
2719 * Template function, used to copy data from pSrcData into buffer
2720 * pDstData, with stride nSrcPixelStride in the source data and
2721 * stride nDstPixelStride in the destination data. Deals with the
2722 * complex case, where input is complex and output is complex.
2723 *
2724 * @param pSrcData the source data buffer
2725 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2726 * of interest.
2727 * @param pDstData the destination buffer.
2728 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2729 * interest.
2730 * @param nWordCount the total number of pixel words to copy
2731 *
2732 */
2733 template <class Tin, class Tout>
GDALCopyWordsComplexT(const Tin * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2734 inline void GDALCopyWordsComplexT( const Tin* const CPL_RESTRICT pSrcData,
2735 int nSrcPixelStride,
2736 Tout* const CPL_RESTRICT pDstData,
2737 int nDstPixelStride,
2738 GPtrDiff_t nWordCount )
2739 {
2740 decltype(nWordCount) nDstOffset = 0;
2741 const char* const pSrcDataPtr = reinterpret_cast<const char*>(pSrcData);
2742 char* const pDstDataPtr = reinterpret_cast<char*>(pDstData);
2743
2744 for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2745 {
2746 const Tin* const pPixelIn =
2747 reinterpret_cast<const Tin*>(pSrcDataPtr + n * nSrcPixelStride);
2748 Tout* const pPixelOut =
2749 reinterpret_cast<Tout*>(pDstDataPtr + nDstOffset);
2750
2751 GDALCopyWord(pPixelIn[0], pPixelOut[0]);
2752 GDALCopyWord(pPixelIn[1], pPixelOut[1]);
2753
2754 nDstOffset += nDstPixelStride;
2755 }
2756 }
2757
2758 /************************************************************************/
2759 /* GDALCopyWordsComplexOutT() */
2760 /************************************************************************/
2761 /**
2762 * Template function, used to copy data from pSrcData into buffer
2763 * pDstData, with stride nSrcPixelStride in the source data and
2764 * stride nDstPixelStride in the destination data. Deals with the
2765 * case where the value is real coming in, but complex going out.
2766 *
2767 * @param pSrcData the source data buffer
2768 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2769 * of interest, in bytes.
2770 * @param pDstData the destination buffer.
2771 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2772 * interest, in bytes.
2773 * @param nWordCount the total number of pixel words to copy
2774 *
2775 */
2776 template <class Tin, class Tout>
GDALCopyWordsComplexOutT(const Tin * const CPL_RESTRICT pSrcData,int nSrcPixelStride,Tout * const CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2777 inline void GDALCopyWordsComplexOutT( const Tin* const CPL_RESTRICT pSrcData,
2778 int nSrcPixelStride,
2779 Tout* const CPL_RESTRICT pDstData,
2780 int nDstPixelStride,
2781 GPtrDiff_t nWordCount )
2782 {
2783 decltype(nWordCount) nDstOffset = 0;
2784
2785 const Tout tOutZero = static_cast<Tout>(0);
2786
2787 const char* const pSrcDataPtr = reinterpret_cast<const char*>(pSrcData);
2788 char* const pDstDataPtr = reinterpret_cast<char*>(pDstData);
2789
2790 for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2791 {
2792 const Tin tValue =
2793 *reinterpret_cast<const Tin*>(pSrcDataPtr + n * nSrcPixelStride);
2794 Tout* const pPixelOut =
2795 reinterpret_cast<Tout*>(pDstDataPtr + nDstOffset);
2796 GDALCopyWord(tValue, *pPixelOut);
2797
2798 pPixelOut[1] = tOutZero;
2799
2800 nDstOffset += nDstPixelStride;
2801 }
2802 }
2803
2804 /************************************************************************/
2805 /* GDALCopyWordsFromT() */
2806 /************************************************************************/
2807 /**
2808 * Template driver function. Given the input type T, call the appropriate
2809 * GDALCopyWordsT function template for the desired output type. You should
2810 * never call this function directly (call GDALCopyWords instead).
2811 *
2812 * @param pSrcData source data buffer
2813 * @param nSrcPixelStride pixel stride in input buffer, in pixel words
2814 * @param bInComplex input is complex
2815 * @param pDstData destination data buffer
2816 * @param eDstType destination data type
2817 * @param nDstPixelStride pixel stride in output buffer, in pixel words
2818 * @param nWordCount number of pixel words to be copied
2819 */
2820 template <class T>
GDALCopyWordsFromT(const T * const CPL_RESTRICT pSrcData,int nSrcPixelStride,bool bInComplex,void * CPL_RESTRICT pDstData,GDALDataType eDstType,int nDstPixelStride,GPtrDiff_t nWordCount)2821 inline void GDALCopyWordsFromT( const T* const CPL_RESTRICT pSrcData,
2822 int nSrcPixelStride, bool bInComplex,
2823 void * CPL_RESTRICT pDstData,
2824 GDALDataType eDstType, int nDstPixelStride,
2825 GPtrDiff_t nWordCount )
2826 {
2827 switch (eDstType)
2828 {
2829 case GDT_Byte:
2830 GDALCopyWordsT( pSrcData, nSrcPixelStride,
2831 static_cast<unsigned char*>(pDstData), nDstPixelStride,
2832 nWordCount );
2833 break;
2834 case GDT_UInt16:
2835 GDALCopyWordsT( pSrcData, nSrcPixelStride,
2836 static_cast<unsigned short*>(pDstData), nDstPixelStride,
2837 nWordCount );
2838 break;
2839 case GDT_Int16:
2840 GDALCopyWordsT( pSrcData, nSrcPixelStride,
2841 static_cast<short*>(pDstData), nDstPixelStride,
2842 nWordCount );
2843 break;
2844 case GDT_UInt32:
2845 GDALCopyWordsT( pSrcData, nSrcPixelStride,
2846 static_cast<unsigned int*>(pDstData), nDstPixelStride,
2847 nWordCount );
2848 break;
2849 case GDT_Int32:
2850 GDALCopyWordsT( pSrcData, nSrcPixelStride,
2851 static_cast<int*>(pDstData), nDstPixelStride,
2852 nWordCount );
2853 break;
2854 case GDT_Float32:
2855 GDALCopyWordsT( pSrcData, nSrcPixelStride,
2856 static_cast<float*>(pDstData), nDstPixelStride,
2857 nWordCount );
2858 break;
2859 case GDT_Float64:
2860 GDALCopyWordsT( pSrcData, nSrcPixelStride,
2861 static_cast<double*>(pDstData), nDstPixelStride,
2862 nWordCount );
2863 break;
2864 case GDT_CInt16:
2865 if (bInComplex)
2866 {
2867 GDALCopyWordsComplexT(
2868 pSrcData, nSrcPixelStride,
2869 static_cast<short *>(pDstData), nDstPixelStride,
2870 nWordCount );
2871 }
2872 else // input is not complex, so we need to promote to a complex buffer
2873 {
2874 GDALCopyWordsComplexOutT(
2875 pSrcData, nSrcPixelStride,
2876 static_cast<short *>(pDstData), nDstPixelStride,
2877 nWordCount );
2878 }
2879 break;
2880 case GDT_CInt32:
2881 if (bInComplex)
2882 {
2883 GDALCopyWordsComplexT(
2884 pSrcData, nSrcPixelStride,
2885 static_cast<int *>(pDstData), nDstPixelStride,
2886 nWordCount );
2887 }
2888 else // input is not complex, so we need to promote to a complex buffer
2889 {
2890 GDALCopyWordsComplexOutT(
2891 pSrcData, nSrcPixelStride,
2892 static_cast<int *>(pDstData), nDstPixelStride,
2893 nWordCount );
2894 }
2895 break;
2896 case GDT_CFloat32:
2897 if (bInComplex)
2898 {
2899 GDALCopyWordsComplexT(
2900 pSrcData, nSrcPixelStride,
2901 static_cast<float *>(pDstData), nDstPixelStride,
2902 nWordCount );
2903 }
2904 else // input is not complex, so we need to promote to a complex buffer
2905 {
2906 GDALCopyWordsComplexOutT(
2907 pSrcData, nSrcPixelStride,
2908 static_cast<float *>(pDstData), nDstPixelStride,
2909 nWordCount );
2910 }
2911 break;
2912 case GDT_CFloat64:
2913 if (bInComplex)
2914 {
2915 GDALCopyWordsComplexT(
2916 pSrcData, nSrcPixelStride,
2917 static_cast<double *>(pDstData), nDstPixelStride,
2918 nWordCount );
2919 }
2920 else // input is not complex, so we need to promote to a complex buffer
2921 {
2922 GDALCopyWordsComplexOutT(
2923 pSrcData, nSrcPixelStride,
2924 static_cast<double *>(pDstData), nDstPixelStride,
2925 nWordCount );
2926 }
2927 break;
2928 case GDT_Unknown:
2929 default:
2930 CPLAssert(false);
2931 }
2932 }
2933
2934 } // end anonymous namespace
2935
2936 /************************************************************************/
2937 /* GDALReplicateWord() */
2938 /************************************************************************/
2939
2940 template <class T>
GDALReplicateWordT(void * pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)2941 inline void GDALReplicateWordT( void * pDstData,
2942 int nDstPixelStride,
2943 GPtrDiff_t nWordCount )
2944 {
2945 const T valSet = *static_cast<const T*>(pDstData);
2946 if( nDstPixelStride == static_cast<int>(sizeof(T)) )
2947 {
2948 T* pDstPtr = static_cast<T*>(pDstData) + 1;
2949 while( nWordCount >= 4 )
2950 {
2951 nWordCount -= 4;
2952 pDstPtr[0] = valSet;
2953 pDstPtr[1] = valSet;
2954 pDstPtr[2] = valSet;
2955 pDstPtr[3] = valSet;
2956 pDstPtr += 4;
2957 }
2958 while( nWordCount > 0 )
2959 {
2960 --nWordCount;
2961 *pDstPtr = valSet;
2962 pDstPtr++;
2963 }
2964 }
2965 else
2966 {
2967 GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
2968 while( nWordCount > 0 )
2969 {
2970 --nWordCount;
2971 *reinterpret_cast<T*>(pabyDstPtr) = valSet;
2972 pabyDstPtr += nDstPixelStride;
2973 }
2974 }
2975 }
2976
2977 static
GDALReplicateWord(const void * CPL_RESTRICT pSrcData,GDALDataType eSrcType,void * CPL_RESTRICT pDstData,GDALDataType eDstType,int nDstPixelStride,GPtrDiff_t nWordCount)2978 void GDALReplicateWord( const void * CPL_RESTRICT pSrcData,
2979 GDALDataType eSrcType,
2980 void * CPL_RESTRICT pDstData,
2981 GDALDataType eDstType,
2982 int nDstPixelStride,
2983 GPtrDiff_t nWordCount)
2984 {
2985 /* ----------------------------------------------------------------------- */
2986 /* Special case when the source data is always the same value */
2987 /* (for VRTSourcedRasterBand::IRasterIO and VRTDerivedRasterBand::IRasterIO*/
2988 /* for example) */
2989 /* ----------------------------------------------------------------------- */
2990 // Let the general translation case do the necessary conversions
2991 // on the first destination element.
2992 GDALCopyWords(pSrcData, eSrcType, 0,
2993 pDstData, eDstType, 0,
2994 1 );
2995
2996 // Now copy the first element to the nWordCount - 1 following destination
2997 // elements.
2998 nWordCount--;
2999 GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
3000
3001 switch (eDstType)
3002 {
3003 case GDT_Byte:
3004 {
3005 if (nDstPixelStride == 1)
3006 {
3007 if (nWordCount > 0)
3008 memset(pabyDstWord, *reinterpret_cast<const GByte*>(pDstData), nWordCount);
3009 }
3010 else
3011 {
3012 GByte valSet = *reinterpret_cast<const GByte*>(pDstData);
3013 while( nWordCount > 0 )
3014 {
3015 --nWordCount;
3016 *pabyDstWord = valSet;
3017 pabyDstWord += nDstPixelStride;
3018 }
3019 }
3020 break;
3021 }
3022
3023 #define CASE_DUPLICATE_SIMPLE(enum_type, c_type) \
3024 case enum_type:\
3025 { \
3026 GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount); \
3027 break; \
3028 }
3029
3030 CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
3031 CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16)
3032 CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
3033 CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32)
3034 CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
3035 CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
3036
3037 #define CASE_DUPLICATE_COMPLEX(enum_type, c_type) \
3038 case enum_type:\
3039 { \
3040 c_type valSet1 = reinterpret_cast<const c_type*>(pDstData)[0]; \
3041 c_type valSet2 = reinterpret_cast<const c_type*>(pDstData)[1]; \
3042 while( nWordCount > 0 ) \
3043 { \
3044 --nWordCount; \
3045 reinterpret_cast<c_type*>(pabyDstWord)[0] = valSet1; \
3046 reinterpret_cast<c_type*>(pabyDstWord)[1] = valSet2; \
3047 pabyDstWord += nDstPixelStride; \
3048 } \
3049 break; \
3050 }
3051
3052 CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
3053 CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
3054 CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
3055 CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
3056
3057 default:
3058 CPLAssert( false );
3059 }
3060 }
3061
3062 /************************************************************************/
3063 /* GDALUnrolledCopy() */
3064 /************************************************************************/
3065
3066 template<class T, int srcStride, int dstStride>
GDALUnrolledCopyGeneric(T * CPL_RESTRICT pDest,const T * CPL_RESTRICT pSrc,GPtrDiff_t nIters)3067 static inline void GDALUnrolledCopyGeneric( T* CPL_RESTRICT pDest,
3068 const T* CPL_RESTRICT pSrc,
3069 GPtrDiff_t nIters )
3070 {
3071 if (nIters >= 16)
3072 {
3073 for ( GPtrDiff_t i = nIters / 16; i != 0; i -- )
3074 {
3075 pDest[0*dstStride] = pSrc[0*srcStride];
3076 pDest[1*dstStride] = pSrc[1*srcStride];
3077 pDest[2*dstStride] = pSrc[2*srcStride];
3078 pDest[3*dstStride] = pSrc[3*srcStride];
3079 pDest[4*dstStride] = pSrc[4*srcStride];
3080 pDest[5*dstStride] = pSrc[5*srcStride];
3081 pDest[6*dstStride] = pSrc[6*srcStride];
3082 pDest[7*dstStride] = pSrc[7*srcStride];
3083 pDest[8*dstStride] = pSrc[8*srcStride];
3084 pDest[9*dstStride] = pSrc[9*srcStride];
3085 pDest[10*dstStride] = pSrc[10*srcStride];
3086 pDest[11*dstStride] = pSrc[11*srcStride];
3087 pDest[12*dstStride] = pSrc[12*srcStride];
3088 pDest[13*dstStride] = pSrc[13*srcStride];
3089 pDest[14*dstStride] = pSrc[14*srcStride];
3090 pDest[15*dstStride] = pSrc[15*srcStride];
3091 pDest += 16*dstStride;
3092 pSrc += 16*srcStride;
3093 }
3094 nIters = nIters % 16;
3095 }
3096 for( GPtrDiff_t i = 0; i < nIters; i++ )
3097 {
3098 pDest[i*dstStride] = *pSrc;
3099 pSrc += srcStride;
3100 }
3101 }
3102
3103 template<class T, int srcStride, int dstStride>
GDALUnrolledCopy(T * CPL_RESTRICT pDest,const T * CPL_RESTRICT pSrc,GPtrDiff_t nIters)3104 static inline void GDALUnrolledCopy( T* CPL_RESTRICT pDest,
3105 const T* CPL_RESTRICT pSrc,
3106 GPtrDiff_t nIters )
3107 {
3108 GDALUnrolledCopyGeneric<T,srcStride,dstStride>(pDest, pSrc, nIters);
3109 }
3110
3111 #if (defined(__x86_64) || defined(_M_X64)) && !(defined(__GNUC__) && __GNUC__ < 4)
3112
3113 #ifdef HAVE_SSSE3_AT_COMPILE_TIME
3114
3115 void GDALUnrolledCopy_GByte_3_1_SSSE3( GByte* CPL_RESTRICT pDest,
3116 const GByte* CPL_RESTRICT pSrc,
3117 GPtrDiff_t nIters );
3118 #endif
3119
3120
GDALUnrolledCopy(GByte * CPL_RESTRICT pDest,const GByte * CPL_RESTRICT pSrc,GPtrDiff_t nIters)3121 template<> void GDALUnrolledCopy<GByte,2,1>( GByte* CPL_RESTRICT pDest,
3122 const GByte* CPL_RESTRICT pSrc,
3123 GPtrDiff_t nIters )
3124 {
3125 decltype(nIters) i = 0;
3126 if( nIters > 16 )
3127 {
3128 const __m128i xmm_mask = _mm_set1_epi16(0xff);
3129 // If we were sure that there would always be 1 trailing byte, we could
3130 // check against nIters - 15
3131 for ( ; i < nIters - 16; i += 16 )
3132 {
3133 __m128i xmm0 = _mm_loadu_si128( reinterpret_cast<__m128i const*>(pSrc + 0) );
3134 __m128i xmm1 = _mm_loadu_si128( reinterpret_cast<__m128i const*>(pSrc + 16) );
3135 // Set higher 8bit of each int16 packed word to 0
3136 xmm0 = _mm_and_si128(xmm0, xmm_mask);
3137 xmm1 = _mm_and_si128(xmm1, xmm_mask);
3138 // Pack int16 to uint8 and merge back both vector
3139 xmm0 = _mm_packus_epi16(xmm0, xmm1);
3140
3141 // Store result
3142 _mm_storeu_si128( reinterpret_cast<__m128i*>(pDest + i), xmm0);
3143
3144 pSrc += 2 * 16;
3145 }
3146 }
3147 for( ; i < nIters; i++ )
3148 {
3149 pDest[i] = *pSrc;
3150 pSrc += 2;
3151 }
3152 }
3153
3154
3155 #ifdef HAVE_SSSE3_AT_COMPILE_TIME
3156
GDALUnrolledCopy(GByte * CPL_RESTRICT pDest,const GByte * CPL_RESTRICT pSrc,GPtrDiff_t nIters)3157 template<> void GDALUnrolledCopy<GByte,3,1>( GByte* CPL_RESTRICT pDest,
3158 const GByte* CPL_RESTRICT pSrc,
3159 GPtrDiff_t nIters )
3160 {
3161 if( nIters > 16 && CPLHaveRuntimeSSSE3() )
3162 {
3163 GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3164 }
3165 else
3166 {
3167 GDALUnrolledCopyGeneric<GByte,3,1>(pDest, pSrc, nIters);
3168 }
3169 }
3170
3171 #endif
3172
GDALUnrolledCopy(GByte * CPL_RESTRICT pDest,const GByte * CPL_RESTRICT pSrc,GPtrDiff_t nIters)3173 template<> void GDALUnrolledCopy<GByte,4,1>( GByte* CPL_RESTRICT pDest,
3174 const GByte* CPL_RESTRICT pSrc,
3175 GPtrDiff_t nIters )
3176 {
3177 decltype(nIters) i = 0;
3178 if( nIters > 16 )
3179 {
3180 const __m128i xmm_mask = _mm_set1_epi32(0xff);
3181 // If we were sure that there would always be 3 trailing bytes, we could
3182 // check against nIters - 15
3183 for ( ; i < nIters - 16; i += 16 )
3184 {
3185 __m128i xmm0 = _mm_loadu_si128( reinterpret_cast<__m128i const*> (pSrc + 0) );
3186 __m128i xmm1 = _mm_loadu_si128( reinterpret_cast<__m128i const*> (pSrc + 16) );
3187 __m128i xmm2 = _mm_loadu_si128( reinterpret_cast<__m128i const*> (pSrc + 32) );
3188 __m128i xmm3 = _mm_loadu_si128( reinterpret_cast<__m128i const*> (pSrc + 48) );
3189 // Set higher 24bit of each int32 packed word to 0
3190 xmm0 = _mm_and_si128(xmm0, xmm_mask);
3191 xmm1 = _mm_and_si128(xmm1, xmm_mask);
3192 xmm2 = _mm_and_si128(xmm2, xmm_mask);
3193 xmm3 = _mm_and_si128(xmm3, xmm_mask);
3194 // Pack int32 to int16
3195 xmm0 = _mm_packs_epi32(xmm0, xmm1);
3196 xmm2 = _mm_packs_epi32(xmm2, xmm3);
3197 // Pack int16 to uint8
3198 xmm0 = _mm_packus_epi16(xmm0, xmm2);
3199
3200 // Store result
3201 _mm_storeu_si128( reinterpret_cast<__m128i*>(pDest + i), xmm0);
3202
3203 pSrc += 4 * 16;
3204 }
3205 }
3206 for( ; i < nIters; i++ )
3207 {
3208 pDest[i] = *pSrc;
3209 pSrc += 4;
3210 }
3211 }
3212 #endif // defined(__x86_64) || defined(_M_X64)
3213
3214 /************************************************************************/
3215 /* GDALFastCopy() */
3216 /************************************************************************/
3217
3218 template<class T>
GDALFastCopy(T * CPL_RESTRICT pDest,int nDestStride,const T * CPL_RESTRICT pSrc,int nSrcStride,GPtrDiff_t nIters)3219 static inline void GDALFastCopy( T* CPL_RESTRICT pDest,
3220 int nDestStride,
3221 const T* CPL_RESTRICT pSrc,
3222 int nSrcStride,
3223 GPtrDiff_t nIters )
3224 {
3225 if( nIters == 1 )
3226 {
3227 *pDest = *pSrc;
3228 }
3229 else if( nDestStride == static_cast<int>(sizeof(T)) )
3230 {
3231 if( nSrcStride == static_cast<int>(sizeof(T)) )
3232 {
3233 memcpy(pDest, pSrc, nIters * sizeof(T));
3234 }
3235 else if( nSrcStride == 2 * static_cast<int>(sizeof(T)) )
3236 {
3237 GDALUnrolledCopy<T, 2,1>(pDest, pSrc, nIters);
3238 }
3239 else if( nSrcStride == 3 * static_cast<int>(sizeof(T)) )
3240 {
3241 GDALUnrolledCopy<T, 3,1>(pDest, pSrc, nIters);
3242 }
3243 else if( nSrcStride == 4 * static_cast<int>(sizeof(T)) )
3244 {
3245 GDALUnrolledCopy<T, 4,1>(pDest, pSrc, nIters);
3246 }
3247 else
3248 {
3249 while( nIters-- > 0 )
3250 {
3251 *pDest = *pSrc;
3252 pSrc += nSrcStride / static_cast<int>(sizeof(T));
3253 pDest ++;
3254 }
3255 }
3256 }
3257 else if( nSrcStride == static_cast<int>(sizeof(T)) )
3258 {
3259 if( nDestStride == 2 * static_cast<int>(sizeof(T)) )
3260 {
3261 GDALUnrolledCopy<T, 1,2>(pDest, pSrc, nIters);
3262 }
3263 else if( nDestStride == 3 * static_cast<int>(sizeof(T)) )
3264 {
3265 GDALUnrolledCopy<T, 1,3>(pDest, pSrc, nIters);
3266 }
3267 else if( nDestStride == 4 * static_cast<int>(sizeof(T)) )
3268 {
3269 GDALUnrolledCopy<T, 1,4>(pDest, pSrc, nIters);
3270 }
3271 else
3272 {
3273 while( nIters-- > 0 )
3274 {
3275 *pDest = *pSrc;
3276 pSrc ++;
3277 pDest += nDestStride / static_cast<int>(sizeof(T));
3278 }
3279 }
3280 }
3281 else
3282 {
3283 while( nIters-- > 0 )
3284 {
3285 *pDest = *pSrc;
3286 pSrc += nSrcStride / static_cast<int>(sizeof(T));
3287 pDest += nDestStride / static_cast<int>(sizeof(T));
3288 }
3289 }
3290 }
3291
3292 /************************************************************************/
3293 /* GDALFastCopyByte() */
3294 /************************************************************************/
3295
GDALFastCopyByte(const GByte * CPL_RESTRICT pSrcData,int nSrcPixelStride,GByte * CPL_RESTRICT pDstData,int nDstPixelStride,GPtrDiff_t nWordCount)3296 static void GDALFastCopyByte( const GByte * CPL_RESTRICT pSrcData,
3297 int nSrcPixelStride,
3298 GByte * CPL_RESTRICT pDstData,
3299 int nDstPixelStride,
3300 GPtrDiff_t nWordCount )
3301 {
3302 GDALFastCopy( pDstData, nDstPixelStride,
3303 pSrcData, nSrcPixelStride, nWordCount );
3304 }
3305
3306 /************************************************************************/
3307 /* GDALCopyWords() */
3308 /************************************************************************/
3309
3310 /**
3311 * Copy pixel words from buffer to buffer.
3312 *
3313 * @see GDALCopyWords64()
3314 */
3315 void CPL_STDCALL
GDALCopyWords(const void * CPL_RESTRICT pSrcData,GDALDataType eSrcType,int nSrcPixelStride,void * CPL_RESTRICT pDstData,GDALDataType eDstType,int nDstPixelStride,int nWordCount)3316 GDALCopyWords( const void * CPL_RESTRICT pSrcData,
3317 GDALDataType eSrcType,
3318 int nSrcPixelStride,
3319 void * CPL_RESTRICT pDstData,
3320 GDALDataType eDstType,
3321 int nDstPixelStride,
3322 int nWordCount )
3323 {
3324 GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride,
3325 pDstData, eDstType, nDstPixelStride,
3326 nWordCount);
3327 }
3328
3329 /************************************************************************/
3330 /* GDALCopyWords64() */
3331 /************************************************************************/
3332
3333 /**
3334 * Copy pixel words from buffer to buffer.
3335 *
3336 * This function is used to copy pixel word values from one memory buffer
3337 * to another, with support for conversion between data types, and differing
3338 * step factors. The data type conversion is done using the normal GDAL
3339 * rules. Values assigned to a lower range integer type are clipped. For
3340 * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values
3341 * less the 0 to be set to 0, and values larger than 255 to be set to 255.
3342 * Assignment from floating point to integer uses default C type casting
3343 * semantics. Assignment from non-complex to complex will result in the
3344 * imaginary part being set to zero on output. Assignment from complex to
3345 * non-complex will result in the complex portion being lost and the real
3346 * component being preserved (<i>not magnitude!</i>).
3347 *
3348 * No assumptions are made about the source or destination words occurring
3349 * on word boundaries. It is assumed that all values are in native machine
3350 * byte order.
3351 *
3352 * @param pSrcData Pointer to source data to be converted.
3353 * @param eSrcType the source data type (see GDALDataType enum)
3354 * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
3355 * in bytes
3356 * @param pDstData Pointer to buffer where destination data should go
3357 * @param eDstType the destination data type (see GDALDataType enum)
3358 * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
3359 * words), in bytes
3360 * @param nWordCount number of words to be copied
3361 *
3362 * @note
3363 * When adding a new data type to GDAL, you must do the following to
3364 * support it properly within the GDALCopyWords function:
3365 * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
3366 * This should invoke the appropriate GDALCopyWordsFromT wrapper.
3367 * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
3368 * This should call the appropriate GDALCopyWordsT template.
3369 * 3. If appropriate, overload the appropriate CopyWord template in the
3370 * above namespace. This will ensure that any conversion issues are
3371 * handled (cases like the float -> int32 case, where the min/max)
3372 * values are subject to roundoff error.
3373 */
3374
3375 void CPL_STDCALL
GDALCopyWords64(const void * CPL_RESTRICT pSrcData,GDALDataType eSrcType,int nSrcPixelStride,void * CPL_RESTRICT pDstData,GDALDataType eDstType,int nDstPixelStride,GPtrDiff_t nWordCount)3376 GDALCopyWords64( const void * CPL_RESTRICT pSrcData,
3377 GDALDataType eSrcType,
3378 int nSrcPixelStride,
3379 void * CPL_RESTRICT pDstData,
3380 GDALDataType eDstType,
3381 int nDstPixelStride,
3382 GPtrDiff_t nWordCount )
3383
3384 {
3385 // On platforms where alignment matters, be careful
3386 const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
3387 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
3388 const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
3389 if( !(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
3390 ( (reinterpret_cast<GPtrDiff_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
3391 (reinterpret_cast<GPtrDiff_t>(pDstData) % nDstDataTypeSize) != 0 ||
3392 ( nSrcPixelStride % nSrcDataTypeSize) != 0 ||
3393 ( nDstPixelStride % nDstDataTypeSize) != 0 ) )
3394 {
3395 if( eSrcType == eDstType )
3396 {
3397 for( decltype(nWordCount) i = 0; i < nWordCount; i++ )
3398 {
3399 memcpy( static_cast<GByte*>(pDstData) + nDstPixelStride * i,
3400 static_cast<const GByte*>(pSrcData) + nSrcPixelStride * i,
3401 nDstDataTypeSize );
3402 }
3403 }
3404 else
3405 {
3406 #define ALIGN_PTR(ptr, align) ((ptr) + ((align) - (reinterpret_cast<size_t>(ptr) % (align))) % (align))
3407 // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
3408 // be sure to get correctly aligned pointer.
3409 GByte abySrcBuffer[32];
3410 GByte abyDstBuffer[32];
3411 GByte* pabySrcBuffer = ALIGN_PTR(abySrcBuffer, nSrcDataTypeSize);
3412 GByte* pabyDstBuffer = ALIGN_PTR(abyDstBuffer, nDstDataTypeSize);
3413 for( decltype(nWordCount) i = 0; i < nWordCount; i++ )
3414 {
3415 memcpy( pabySrcBuffer,
3416 static_cast<const GByte*>(pSrcData) + nSrcPixelStride * i,
3417 nSrcDataTypeSize );
3418 GDALCopyWords64( pabySrcBuffer,
3419 eSrcType,
3420 0,
3421 pabyDstBuffer,
3422 eDstType,
3423 0,
3424 1 );
3425 memcpy( static_cast<GByte*>(pDstData) + nDstPixelStride * i,
3426 pabyDstBuffer,
3427 nDstDataTypeSize );
3428 }
3429 }
3430 return;
3431 }
3432 #endif
3433
3434 // Deal with the case where we're replicating a single word into the
3435 // provided buffer
3436 if (nSrcPixelStride == 0 && nWordCount > 1)
3437 {
3438 GDALReplicateWord( pSrcData, eSrcType, pDstData,
3439 eDstType, nDstPixelStride, nWordCount );
3440 return;
3441 }
3442
3443 if (eSrcType == eDstType)
3444 {
3445 if( eSrcType == GDT_Byte )
3446 {
3447 GDALFastCopy(
3448 static_cast<GByte*>(pDstData), nDstPixelStride,
3449 static_cast<const GByte*>(pSrcData), nSrcPixelStride, nWordCount );
3450 return;
3451 }
3452
3453 if( nSrcDataTypeSize == 2 && (nSrcPixelStride%2) == 0 &&
3454 (nDstPixelStride%2) == 0 )
3455 {
3456 GDALFastCopy(
3457 static_cast<short*>(pDstData), nDstPixelStride,
3458 static_cast<const short*>(pSrcData), nSrcPixelStride, nWordCount );
3459 return;
3460 }
3461
3462 if( nWordCount == 1 )
3463 {
3464 #ifdef CSA_BUILD
3465 // Avoid false positives...
3466 memcpy(pDstData, pSrcData, nSrcDataTypeSize);
3467 #else
3468 if( nSrcDataTypeSize == 2 )
3469 memcpy(pDstData, pSrcData, 2);
3470 else if( nSrcDataTypeSize == 4 )
3471 memcpy(pDstData, pSrcData, 4);
3472 else if( nSrcDataTypeSize == 8 )
3473 memcpy(pDstData, pSrcData, 8 );
3474 else /* if( eSrcType == GDT_CFloat64 ) */
3475 memcpy(pDstData, pSrcData, 16);
3476 #endif
3477 return;
3478 }
3479
3480 // Let memcpy() handle the case where we're copying a packed buffer
3481 // of pixels.
3482 if( nSrcPixelStride == nDstPixelStride )
3483 {
3484 if( nSrcPixelStride == nSrcDataTypeSize)
3485 {
3486 memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
3487 return;
3488 }
3489 }
3490 }
3491
3492 // Handle the more general case -- deals with conversion of data types
3493 // directly.
3494 switch (eSrcType)
3495 {
3496 case GDT_Byte:
3497 GDALCopyWordsFromT<unsigned char>(
3498 static_cast<const unsigned char *>(pSrcData),
3499 nSrcPixelStride, false,
3500 pDstData, eDstType, nDstPixelStride,
3501 nWordCount );
3502 break;
3503 case GDT_UInt16:
3504 GDALCopyWordsFromT<unsigned short>(
3505 static_cast<const unsigned short *>(pSrcData),
3506 nSrcPixelStride, false,
3507 pDstData, eDstType, nDstPixelStride,
3508 nWordCount );
3509 break;
3510 case GDT_Int16:
3511 GDALCopyWordsFromT<short>(
3512 static_cast<const short *>(pSrcData), nSrcPixelStride, false,
3513 pDstData, eDstType, nDstPixelStride,
3514 nWordCount );
3515 break;
3516 case GDT_UInt32:
3517 GDALCopyWordsFromT<unsigned int>(
3518 static_cast<const unsigned int *>(pSrcData), nSrcPixelStride, false,
3519 pDstData, eDstType, nDstPixelStride,
3520 nWordCount );
3521 break;
3522 case GDT_Int32:
3523 GDALCopyWordsFromT<int>(
3524 static_cast<const int *>(pSrcData), nSrcPixelStride, false,
3525 pDstData, eDstType, nDstPixelStride,
3526 nWordCount );
3527 break;
3528 case GDT_Float32:
3529 GDALCopyWordsFromT<float>(
3530 static_cast<const float *>(pSrcData), nSrcPixelStride, false,
3531 pDstData, eDstType, nDstPixelStride,
3532 nWordCount );
3533 break;
3534 case GDT_Float64:
3535 GDALCopyWordsFromT<double>(
3536 static_cast<const double *>(pSrcData), nSrcPixelStride, false,
3537 pDstData, eDstType, nDstPixelStride,
3538 nWordCount );
3539 break;
3540 case GDT_CInt16:
3541 GDALCopyWordsFromT<short>(
3542 static_cast<const short *>(pSrcData), nSrcPixelStride, true,
3543 pDstData, eDstType, nDstPixelStride,
3544 nWordCount );
3545 break;
3546 case GDT_CInt32:
3547 GDALCopyWordsFromT<int>(
3548 static_cast<const int *>(pSrcData), nSrcPixelStride, true,
3549 pDstData, eDstType, nDstPixelStride,
3550 nWordCount );
3551 break;
3552 case GDT_CFloat32:
3553 GDALCopyWordsFromT<float>(
3554 static_cast<const float *>(pSrcData), nSrcPixelStride, true,
3555 pDstData, eDstType, nDstPixelStride,
3556 nWordCount );
3557 break;
3558 case GDT_CFloat64:
3559 GDALCopyWordsFromT<double>(
3560 static_cast<const double *>(pSrcData), nSrcPixelStride, true,
3561 pDstData, eDstType, nDstPixelStride,
3562 nWordCount );
3563 break;
3564 case GDT_Unknown:
3565 default:
3566 CPLAssert(false);
3567 }
3568 }
3569
3570 /************************************************************************/
3571 /* GDALCopyBits() */
3572 /************************************************************************/
3573
3574 /**
3575 * Bitwise word copying.
3576 *
3577 * A function for moving sets of partial bytes around. Loosely
3578 * speaking this is a bitwise analog to GDALCopyWords().
3579 *
3580 * It copies nStepCount "words" where each word is nBitCount bits long.
3581 * The nSrcStep and nDstStep are the number of bits from the start of one
3582 * word to the next (same as nBitCount if they are packed). The nSrcOffset
3583 * and nDstOffset are the offset into the source and destination buffers
3584 * to start at, also measured in bits.
3585 *
3586 * All bit offsets are assumed to start from the high order bit in a byte
3587 * (i.e. most significant bit first). Currently this function is not very
3588 * optimized, but it may be improved for some common cases in the future
3589 * as needed.
3590 *
3591 * @param pabySrcData the source data buffer.
3592 * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
3593 * first word to copy.
3594 * @param nSrcStep the offset in bits from the start one source word to the
3595 * start of the next.
3596 * @param pabyDstData the destination data buffer.
3597 * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
3598 * first word to copy over.
3599 * @param nDstStep the offset in bits from the start one word to the
3600 * start of the next.
3601 * @param nBitCount the number of bits in a word to be copied.
3602 * @param nStepCount the number of words to copy.
3603 */
3604
GDALCopyBits(const GByte * pabySrcData,int nSrcOffset,int nSrcStep,GByte * pabyDstData,int nDstOffset,int nDstStep,int nBitCount,int nStepCount)3605 void GDALCopyBits( const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
3606 GByte *pabyDstData, int nDstOffset, int nDstStep,
3607 int nBitCount, int nStepCount )
3608
3609 {
3610 VALIDATE_POINTER0( pabySrcData, "GDALCopyBits" );
3611
3612 for( int iStep = 0; iStep < nStepCount; iStep++ )
3613 {
3614 for( int iBit = 0; iBit < nBitCount; iBit++ )
3615 {
3616 if( pabySrcData[nSrcOffset>>3]
3617 & (0x80 >>(nSrcOffset & 7)) )
3618 pabyDstData[nDstOffset>>3] |= (0x80 >> (nDstOffset & 7));
3619 else
3620 pabyDstData[nDstOffset>>3] &= ~(0x80 >> (nDstOffset & 7));
3621
3622 nSrcOffset++;
3623 nDstOffset++;
3624 }
3625
3626 nSrcOffset += (nSrcStep - nBitCount);
3627 nDstOffset += (nDstStep - nBitCount);
3628 }
3629 }
3630
3631 /************************************************************************/
3632 /* GDALGetBestOverviewLevel() */
3633 /* */
3634 /* Returns the best overview level to satisfy the query or -1 if none */
3635 /* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when */
3636 /* returning a valid overview level */
3637 /************************************************************************/
3638
GDALBandGetBestOverviewLevel(GDALRasterBand * poBand,int & nXOff,int & nYOff,int & nXSize,int & nYSize,int nBufXSize,int nBufYSize)3639 int GDALBandGetBestOverviewLevel( GDALRasterBand* poBand,
3640 int &nXOff, int &nYOff,
3641 int &nXSize, int &nYSize,
3642 int nBufXSize, int nBufYSize )
3643 {
3644 return GDALBandGetBestOverviewLevel2( poBand, nXOff, nYOff, nXSize, nYSize,
3645 nBufXSize, nBufYSize, nullptr );
3646 }
3647
GDALBandGetBestOverviewLevel2(GDALRasterBand * poBand,int & nXOff,int & nYOff,int & nXSize,int & nYSize,int nBufXSize,int nBufYSize,GDALRasterIOExtraArg * psExtraArg)3648 int GDALBandGetBestOverviewLevel2( GDALRasterBand* poBand,
3649 int &nXOff, int &nYOff,
3650 int &nXSize, int &nYSize,
3651 int nBufXSize, int nBufYSize,
3652 GDALRasterIOExtraArg* psExtraArg )
3653 {
3654 double dfDesiredResolution = 0.0;
3655 /* -------------------------------------------------------------------- */
3656 /* Compute the desired resolution. The resolution is */
3657 /* based on the least reduced axis, and represents the number */
3658 /* of source pixels to one destination pixel. */
3659 /* -------------------------------------------------------------------- */
3660 if( (nXSize / static_cast<double>(nBufXSize)) <
3661 (nYSize / static_cast<double>(nBufYSize))
3662 || nBufYSize == 1 )
3663 dfDesiredResolution = nXSize / static_cast<double>( nBufXSize );
3664 else
3665 dfDesiredResolution = nYSize / static_cast<double>( nBufYSize );
3666
3667 /* -------------------------------------------------------------------- */
3668 /* Find the overview level that largest resolution value (most */
3669 /* downsampled) that is still less than (or only a little more) */
3670 /* downsampled than the request. */
3671 /* -------------------------------------------------------------------- */
3672 int nOverviewCount = poBand->GetOverviewCount();
3673 GDALRasterBand* poBestOverview = nullptr;
3674 double dfBestResolution = 0;
3675 int nBestOverviewLevel = -1;
3676
3677 for( int iOverview = 0; iOverview < nOverviewCount; iOverview++ )
3678 {
3679 GDALRasterBand *poOverview = poBand->GetOverview( iOverview );
3680 if (poOverview == nullptr ||
3681 poOverview->GetXSize() > poBand->GetXSize() ||
3682 poOverview->GetYSize() > poBand->GetYSize() )
3683 {
3684 continue;
3685 }
3686
3687 double dfResolution = 0.0;
3688
3689 // What resolution is this?
3690 if( (poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()))
3691 < (poBand->GetYSize() /
3692 static_cast<double>(poOverview->GetYSize())) )
3693 dfResolution =
3694 poBand->GetXSize() /
3695 static_cast<double>( poOverview->GetXSize() );
3696 else
3697 dfResolution =
3698 poBand->GetYSize() /
3699 static_cast<double>( poOverview->GetYSize() );
3700
3701 // Is it nearly the requested resolution and better (lower) than
3702 // the current best resolution?
3703 if( dfResolution >= dfDesiredResolution * 1.2
3704 || dfResolution <= dfBestResolution )
3705 continue;
3706
3707 // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
3708 const char *pszResampling =
3709 poOverview->GetMetadataItem( "RESAMPLING" );
3710
3711 if( pszResampling != nullptr &&
3712 STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2") )
3713 continue;
3714
3715 // OK, this is our new best overview.
3716 poBestOverview = poOverview;
3717 nBestOverviewLevel = iOverview;
3718 dfBestResolution = dfResolution;
3719 }
3720
3721 /* -------------------------------------------------------------------- */
3722 /* If we didn't find an overview that helps us, just return */
3723 /* indicating failure and the full resolution image will be used. */
3724 /* -------------------------------------------------------------------- */
3725 if( nBestOverviewLevel < 0 )
3726 return -1;
3727
3728 /* -------------------------------------------------------------------- */
3729 /* Recompute the source window in terms of the selected */
3730 /* overview. */
3731 /* -------------------------------------------------------------------- */
3732 const double dfXRes =
3733 poBand->GetXSize() / static_cast<double>( poBestOverview->GetXSize() );
3734 const double dfYRes =
3735 poBand->GetYSize() / static_cast<double>( poBestOverview->GetYSize() );
3736
3737 const int nOXOff = std::min( poBestOverview->GetXSize()-1,
3738 static_cast<int>(nXOff / dfXRes + 0.5));
3739 const int nOYOff = std::min( poBestOverview->GetYSize()-1,
3740 static_cast<int>(nYOff / dfYRes + 0.5));
3741 int nOXSize = std::max(1, static_cast<int>(nXSize / dfXRes + 0.5));
3742 int nOYSize = std::max(1, static_cast<int>(nYSize / dfYRes + 0.5));
3743 if( nOXOff + nOXSize > poBestOverview->GetXSize() )
3744 nOXSize = poBestOverview->GetXSize() - nOXOff;
3745 if( nOYOff + nOYSize > poBestOverview->GetYSize() )
3746 nOYSize = poBestOverview->GetYSize() - nOYOff;
3747
3748 nXOff = nOXOff;
3749 nYOff = nOYOff;
3750 nXSize = nOXSize;
3751 nYSize = nOYSize;
3752
3753 if( psExtraArg && psExtraArg->bFloatingPointWindowValidity )
3754 {
3755 psExtraArg->dfXOff /= dfXRes;
3756 psExtraArg->dfXSize /= dfXRes;
3757 psExtraArg->dfYOff /= dfYRes;
3758 psExtraArg->dfYSize /= dfYRes;
3759 }
3760
3761 return nBestOverviewLevel;
3762 }
3763
3764 /************************************************************************/
3765 /* OverviewRasterIO() */
3766 /* */
3767 /* Special work function to utilize available overviews to */
3768 /* more efficiently satisfy downsampled requests. It will */
3769 /* return CE_Failure if there are no appropriate overviews */
3770 /* available but it doesn't emit any error messages. */
3771 /************************************************************************/
3772
3773 //! @cond Doxygen_Suppress
OverviewRasterIO(GDALRWFlag eRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,GSpacing nPixelSpace,GSpacing nLineSpace,GDALRasterIOExtraArg * psExtraArg)3774 CPLErr GDALRasterBand::OverviewRasterIO( GDALRWFlag eRWFlag,
3775 int nXOff, int nYOff, int nXSize, int nYSize,
3776 void * pData, int nBufXSize, int nBufYSize,
3777 GDALDataType eBufType,
3778 GSpacing nPixelSpace, GSpacing nLineSpace,
3779 GDALRasterIOExtraArg* psExtraArg )
3780
3781 {
3782 GDALRasterIOExtraArg sExtraArg;
3783 GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3784
3785 const int nOverview =
3786 GDALBandGetBestOverviewLevel2( this, nXOff, nYOff, nXSize, nYSize,
3787 nBufXSize, nBufYSize, &sExtraArg );
3788 if (nOverview < 0)
3789 return CE_Failure;
3790
3791 /* -------------------------------------------------------------------- */
3792 /* Recast the call in terms of the new raster layer. */
3793 /* -------------------------------------------------------------------- */
3794 GDALRasterBand* poOverviewBand = GetOverview(nOverview);
3795 if (poOverviewBand == nullptr)
3796 return CE_Failure;
3797
3798 return poOverviewBand->RasterIO( eRWFlag, nXOff, nYOff, nXSize, nYSize,
3799 pData, nBufXSize, nBufYSize, eBufType,
3800 nPixelSpace, nLineSpace, &sExtraArg );
3801 }
3802
3803 /************************************************************************/
3804 /* TryOverviewRasterIO() */
3805 /************************************************************************/
3806
TryOverviewRasterIO(GDALRWFlag eRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,GSpacing nPixelSpace,GSpacing nLineSpace,GDALRasterIOExtraArg * psExtraArg,int * pbTried)3807 CPLErr GDALRasterBand::TryOverviewRasterIO( GDALRWFlag eRWFlag,
3808 int nXOff, int nYOff, int nXSize, int nYSize,
3809 void * pData, int nBufXSize, int nBufYSize,
3810 GDALDataType eBufType,
3811 GSpacing nPixelSpace, GSpacing nLineSpace,
3812 GDALRasterIOExtraArg* psExtraArg,
3813 int* pbTried )
3814 {
3815 int nXOffMod = nXOff;
3816 int nYOffMod = nYOff;
3817 int nXSizeMod = nXSize;
3818 int nYSizeMod = nYSize;
3819 GDALRasterIOExtraArg sExtraArg;
3820
3821 GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3822
3823 int iOvrLevel = GDALBandGetBestOverviewLevel2( this,
3824 nXOffMod, nYOffMod,
3825 nXSizeMod, nYSizeMod,
3826 nBufXSize, nBufYSize,
3827 &sExtraArg );
3828
3829 if( iOvrLevel >= 0 )
3830 {
3831 GDALRasterBand* poOverviewBand = GetOverview(iOvrLevel);
3832 if( poOverviewBand )
3833 {
3834 *pbTried = TRUE;
3835 return poOverviewBand->RasterIO(
3836 eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod,
3837 pData, nBufXSize, nBufYSize, eBufType,
3838 nPixelSpace, nLineSpace, &sExtraArg);
3839 }
3840 }
3841
3842 *pbTried = FALSE;
3843 return CE_None;
3844 }
3845
3846 /************************************************************************/
3847 /* TryOverviewRasterIO() */
3848 /************************************************************************/
3849
TryOverviewRasterIO(GDALRWFlag eRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,int nBandCount,int * panBandMap,GSpacing nPixelSpace,GSpacing nLineSpace,GSpacing nBandSpace,GDALRasterIOExtraArg * psExtraArg,int * pbTried)3850 CPLErr GDALDataset::TryOverviewRasterIO( GDALRWFlag eRWFlag,
3851 int nXOff, int nYOff, int nXSize, int nYSize,
3852 void * pData, int nBufXSize, int nBufYSize,
3853 GDALDataType eBufType,
3854 int nBandCount, int *panBandMap,
3855 GSpacing nPixelSpace, GSpacing nLineSpace,
3856 GSpacing nBandSpace,
3857 GDALRasterIOExtraArg* psExtraArg,
3858 int* pbTried )
3859 {
3860 int nXOffMod = nXOff;
3861 int nYOffMod = nYOff;
3862 int nXSizeMod = nXSize;
3863 int nYSizeMod = nYSize;
3864 GDALRasterIOExtraArg sExtraArg;
3865 GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3866
3867 int iOvrLevel = GDALBandGetBestOverviewLevel2( papoBands[0],
3868 nXOffMod, nYOffMod,
3869 nXSizeMod, nYSizeMod,
3870 nBufXSize, nBufYSize,
3871 &sExtraArg );
3872
3873 if( iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
3874 papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr )
3875 {
3876 *pbTried = TRUE;
3877 return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
3878 eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod,
3879 pData, nBufXSize, nBufYSize, eBufType,
3880 nBandCount, panBandMap, nPixelSpace, nLineSpace, nBandSpace,
3881 &sExtraArg );
3882 }
3883 else
3884 {
3885 *pbTried = FALSE;
3886 return CE_None;
3887 }
3888 }
3889
3890 /************************************************************************/
3891 /* GetBestOverviewLevel() */
3892 /* */
3893 /* Returns the best overview level to satisfy the query or -1 if none */
3894 /* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid */
3895 /* overview level */
3896 /************************************************************************/
3897
3898 static
GDALDatasetGetBestOverviewLevel(GDALDataset * poDS,int & nXOff,int & nYOff,int & nXSize,int & nYSize,int nBufXSize,int nBufYSize,int nBandCount,int * panBandMap,GDALRasterIOExtraArg * psExtraArg)3899 int GDALDatasetGetBestOverviewLevel( GDALDataset* poDS,
3900 int &nXOff, int &nYOff,
3901 int &nXSize, int &nYSize,
3902 int nBufXSize, int nBufYSize,
3903 int nBandCount, int *panBandMap,
3904 GDALRasterIOExtraArg* psExtraArg )
3905 {
3906 int nOverviewCount = 0;
3907 GDALRasterBand *poFirstBand = nullptr;
3908
3909 /* -------------------------------------------------------------------- */
3910 /* Check that all bands have the same number of overviews and */
3911 /* that they have all the same size and block dimensions */
3912 /* -------------------------------------------------------------------- */
3913 for( int iBand = 0; iBand < nBandCount; iBand++ )
3914 {
3915 GDALRasterBand *poBand = poDS->GetRasterBand( panBandMap[iBand] );
3916 if ( poBand == nullptr )
3917 return -1;
3918 if (iBand == 0)
3919 {
3920 poFirstBand = poBand;
3921 nOverviewCount = poBand->GetOverviewCount();
3922 }
3923 else if (nOverviewCount != poBand->GetOverviewCount())
3924 {
3925 CPLDebug( "GDAL",
3926 "GDALDataset::GetBestOverviewLevel() ... "
3927 "mismatched overview count, use std method." );
3928 return -1;
3929 }
3930 else
3931 {
3932 for( int iOverview = 0; iOverview < nOverviewCount; iOverview++ )
3933 {
3934 GDALRasterBand* poOvrBand =
3935 poBand->GetOverview(iOverview);
3936 GDALRasterBand* poOvrFirstBand =
3937 poFirstBand->GetOverview(iOverview);
3938 if ( poOvrBand == nullptr || poOvrFirstBand == nullptr)
3939 continue;
3940
3941 if ( poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
3942 poOvrFirstBand->GetYSize() != poOvrBand->GetYSize() )
3943 {
3944 CPLDebug( "GDAL",
3945 "GDALDataset::GetBestOverviewLevel() ... "
3946 "mismatched overview sizes, use std method." );
3947 return -1;
3948 }
3949 int nBlockXSizeFirst = 0;
3950 int nBlockYSizeFirst = 0;
3951 poOvrFirstBand->GetBlockSize( &nBlockXSizeFirst,
3952 &nBlockYSizeFirst );
3953
3954 int nBlockXSizeCurrent = 0;
3955 int nBlockYSizeCurrent = 0;
3956 poOvrBand->GetBlockSize( &nBlockXSizeCurrent,
3957 &nBlockYSizeCurrent );
3958
3959 if (nBlockXSizeFirst != nBlockXSizeCurrent ||
3960 nBlockYSizeFirst != nBlockYSizeCurrent)
3961 {
3962 CPLDebug(
3963 "GDAL",
3964 "GDALDataset::GetBestOverviewLevel() ... "
3965 "mismatched block sizes, use std method." );
3966 return -1;
3967 }
3968 }
3969 }
3970 }
3971 if( poFirstBand == nullptr )
3972 return -1;
3973
3974 return GDALBandGetBestOverviewLevel2( poFirstBand,
3975 nXOff, nYOff, nXSize, nYSize,
3976 nBufXSize, nBufYSize, psExtraArg );
3977 }
3978
3979 /************************************************************************/
3980 /* BlockBasedRasterIO() */
3981 /* */
3982 /* This convenience function implements a dataset level */
3983 /* RasterIO() interface based on calling down to fetch blocks, */
3984 /* much like the GDALRasterBand::IRasterIO(), but it handles */
3985 /* all bands at once, so that a format driver that handles a */
3986 /* request for different bands of the same block efficiently */
3987 /* (i.e. without re-reading interleaved data) will efficiently. */
3988 /* */
3989 /* This method is intended to be called by an overridden */
3990 /* IRasterIO() method in the driver specific GDALDataset */
3991 /* derived class. */
3992 /* */
3993 /* Default internal implementation of RasterIO() ... utilizes */
3994 /* the Block access methods to satisfy the request. This would */
3995 /* normally only be overridden by formats with overviews. */
3996 /* */
3997 /* To keep things relatively simple, this method does not */
3998 /* currently take advantage of some special cases addressed in */
3999 /* GDALRasterBand::IRasterIO(), so it is likely best to only */
4000 /* call it when you know it will help. That is in cases where */
4001 /* data is at 1:1 to the buffer, and you know the driver is */
4002 /* implementing interleaved IO efficiently on a block by block */
4003 /* basis. Overviews will be used when possible. */
4004 /************************************************************************/
4005
4006 CPLErr
BlockBasedRasterIO(GDALRWFlag eRWFlag,int nXOff,int nYOff,int nXSize,int nYSize,void * pData,int nBufXSize,int nBufYSize,GDALDataType eBufType,int nBandCount,int * panBandMap,GSpacing nPixelSpace,GSpacing nLineSpace,GSpacing nBandSpace,GDALRasterIOExtraArg * psExtraArg)4007 GDALDataset::BlockBasedRasterIO( GDALRWFlag eRWFlag,
4008 int nXOff, int nYOff, int nXSize, int nYSize,
4009 void * pData, int nBufXSize, int nBufYSize,
4010 GDALDataType eBufType,
4011 int nBandCount, int *panBandMap,
4012 GSpacing nPixelSpace, GSpacing nLineSpace,
4013 GSpacing nBandSpace,
4014 GDALRasterIOExtraArg* psExtraArg )
4015
4016 {
4017 CPLAssert( nullptr != pData );
4018
4019 GByte **papabySrcBlock = nullptr;
4020 GDALRasterBlock *poBlock = nullptr;
4021 GDALRasterBlock **papoBlocks = nullptr;
4022 int nLBlockX = -1;
4023 int nLBlockY = -1;
4024 int iBufYOff;
4025 int iBufXOff;
4026 int nBlockXSize = 1;
4027 int nBlockYSize = 1;
4028 CPLErr eErr = CE_None;
4029 GDALDataType eDataType = GDT_Byte;
4030
4031 const bool bUseIntegerRequestCoords =
4032 (!psExtraArg->bFloatingPointWindowValidity ||
4033 (nXOff == psExtraArg->dfXOff &&
4034 nYOff == psExtraArg->dfYOff &&
4035 nXSize == psExtraArg->dfXSize &&
4036 nYSize == psExtraArg->dfYSize));
4037
4038 /* -------------------------------------------------------------------- */
4039 /* Ensure that all bands share a common block size and data type. */
4040 /* -------------------------------------------------------------------- */
4041 for( int iBand = 0; iBand < nBandCount; iBand++ )
4042 {
4043 GDALRasterBand *poBand = GetRasterBand( panBandMap[iBand] );
4044
4045 if( iBand == 0 )
4046 {
4047 poBand->GetBlockSize( &nBlockXSize, &nBlockYSize );
4048 eDataType = poBand->GetRasterDataType();
4049 }
4050 else
4051 {
4052 int nThisBlockXSize = 0;
4053 int nThisBlockYSize = 0;
4054 poBand->GetBlockSize( &nThisBlockXSize, &nThisBlockYSize );
4055 if( nThisBlockXSize != nBlockXSize
4056 || nThisBlockYSize != nBlockYSize )
4057 {
4058 CPLDebug( "GDAL",
4059 "GDALDataset::BlockBasedRasterIO() ... "
4060 "mismatched block sizes, use std method." );
4061 return BandBasedRasterIO( eRWFlag,
4062 nXOff, nYOff, nXSize, nYSize,
4063 pData, nBufXSize, nBufYSize,
4064 eBufType,
4065 nBandCount, panBandMap,
4066 nPixelSpace, nLineSpace,
4067 nBandSpace, psExtraArg );
4068 }
4069
4070 if( eDataType != poBand->GetRasterDataType()
4071 && (nXSize != nBufXSize || nYSize != nBufYSize) )
4072 {
4073 CPLDebug( "GDAL",
4074 "GDALDataset::BlockBasedRasterIO() ... "
4075 "mismatched band data types, use std method." );
4076 return BandBasedRasterIO( eRWFlag,
4077 nXOff, nYOff, nXSize, nYSize,
4078 pData, nBufXSize, nBufYSize,
4079 eBufType,
4080 nBandCount, panBandMap,
4081 nPixelSpace, nLineSpace,
4082 nBandSpace, psExtraArg );
4083 }
4084 }
4085 }
4086
4087 /* ==================================================================== */
4088 /* In this special case at full resolution we step through in */
4089 /* blocks, turning the request over to the per-band */
4090 /* IRasterIO(), but ensuring that all bands of one block are */
4091 /* called before proceeding to the next. */
4092 /* ==================================================================== */
4093
4094 if( nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords )
4095 {
4096 GDALRasterIOExtraArg sDummyExtraArg;
4097 INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
4098
4099 int nChunkYSize = 0;
4100 int nChunkXSize = 0;
4101
4102 for( iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize )
4103 {
4104 const int nChunkYOff = iBufYOff + nYOff;
4105 nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
4106 if( nChunkYOff + nChunkYSize > nYOff + nYSize )
4107 nChunkYSize = (nYOff + nYSize) - nChunkYOff;
4108
4109 for( iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize )
4110 {
4111 const int nChunkXOff = iBufXOff + nXOff;
4112 nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
4113 if( nChunkXOff + nChunkXSize > nXOff + nXSize )
4114 nChunkXSize = (nXOff + nXSize) - nChunkXOff;
4115
4116 GByte *pabyChunkData =
4117 static_cast<GByte *>(pData)
4118 + iBufXOff * nPixelSpace
4119 + static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
4120
4121 for( int iBand = 0; iBand < nBandCount; iBand++ )
4122 {
4123 GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4124
4125 eErr =
4126 poBand->GDALRasterBand::IRasterIO(
4127 eRWFlag, nChunkXOff, nChunkYOff,
4128 nChunkXSize, nChunkYSize,
4129 pabyChunkData +
4130 static_cast<GPtrDiff_t>(iBand) * nBandSpace,
4131 nChunkXSize, nChunkYSize, eBufType,
4132 nPixelSpace, nLineSpace, &sDummyExtraArg );
4133 if( eErr != CE_None )
4134 return eErr;
4135 }
4136 }
4137
4138 if( psExtraArg->pfnProgress != nullptr &&
4139 !psExtraArg->pfnProgress(
4140 1.0 * std::min(nBufYSize,
4141 iBufYOff + nChunkYSize) /
4142 nBufYSize, "", psExtraArg->pProgressData) )
4143 {
4144 return CE_Failure;
4145 }
4146 }
4147
4148 return CE_None;
4149 }
4150
4151 /* Below code is not compatible with that case. It would need a complete */
4152 /* separate code like done in GDALRasterBand::IRasterIO. */
4153 if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
4154 {
4155 return BandBasedRasterIO( eRWFlag,
4156 nXOff, nYOff, nXSize, nYSize,
4157 pData, nBufXSize, nBufYSize,
4158 eBufType,
4159 nBandCount, panBandMap,
4160 nPixelSpace, nLineSpace,
4161 nBandSpace, psExtraArg );
4162 }
4163
4164 /* We could have a smarter implementation, but that will do for now */
4165 if( psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
4166 (nBufXSize != nXSize || nBufYSize != nYSize) )
4167 {
4168 return BandBasedRasterIO( eRWFlag,
4169 nXOff, nYOff, nXSize, nYSize,
4170 pData, nBufXSize, nBufYSize,
4171 eBufType,
4172 nBandCount, panBandMap,
4173 nPixelSpace, nLineSpace,
4174 nBandSpace, psExtraArg );
4175 }
4176
4177 /* ==================================================================== */
4178 /* Loop reading required source blocks to satisfy output */
4179 /* request. This is the most general implementation. */
4180 /* ==================================================================== */
4181
4182 const int nBandDataSize = GDALGetDataTypeSizeBytes( eDataType );
4183
4184 papabySrcBlock = static_cast<GByte **>(CPLCalloc(sizeof(GByte*),nBandCount));
4185 papoBlocks = static_cast<GDALRasterBlock **>(
4186 CPLCalloc(sizeof(void*), nBandCount) );
4187
4188 /* -------------------------------------------------------------------- */
4189 /* Select an overview level if appropriate. */
4190 /* -------------------------------------------------------------------- */
4191
4192 GDALRasterIOExtraArg sExtraArg;
4193 GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4194 const int nOverviewLevel =
4195 GDALDatasetGetBestOverviewLevel( this,
4196 nXOff, nYOff, nXSize, nYSize,
4197 nBufXSize, nBufYSize,
4198 nBandCount, panBandMap,
4199 &sExtraArg );
4200 if (nOverviewLevel >= 0)
4201 {
4202 GetRasterBand(panBandMap[0])->GetOverview(nOverviewLevel)->
4203 GetBlockSize( &nBlockXSize, &nBlockYSize );
4204 }
4205
4206 double dfXOff = nXOff;
4207 double dfYOff = nYOff;
4208 double dfXSize = nXSize;
4209 double dfYSize = nYSize;
4210 if( sExtraArg.bFloatingPointWindowValidity )
4211 {
4212 dfXOff = sExtraArg.dfXOff;
4213 dfYOff = sExtraArg.dfYOff;
4214 dfXSize = sExtraArg.dfXSize;
4215 dfYSize = sExtraArg.dfYSize;
4216 }
4217
4218 /* -------------------------------------------------------------------- */
4219 /* Compute stepping increment. */
4220 /* -------------------------------------------------------------------- */
4221 const double dfSrcXInc = dfXSize / static_cast<double>( nBufXSize );
4222 const double dfSrcYInc = dfYSize / static_cast<double>( nBufYSize );
4223
4224 constexpr double EPS = 1e-10;
4225 /* -------------------------------------------------------------------- */
4226 /* Loop over buffer computing source locations. */
4227 /* -------------------------------------------------------------------- */
4228 for( iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++ )
4229 {
4230 GPtrDiff_t iSrcOffset;
4231
4232 // Add small epsilon to avoid some numeric precision issues.
4233 const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
4234 const int iSrcY = static_cast<int>(std::min(std::max(0.0, dfSrcY),
4235 static_cast<double>(nRasterYSize - 1)));
4236
4237 GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * static_cast<GPtrDiff_t>(nLineSpace);
4238
4239 for( iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++ )
4240 {
4241 const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
4242 const int iSrcX = static_cast<int>(std::min(std::max(0.0, dfSrcX),
4243 static_cast<double>(nRasterXSize - 1)));
4244
4245 // FIXME: this code likely doesn't work if the dirty block gets flushed
4246 // to disk before being completely written.
4247 // In the meantime, bJustInitialize should probably be set to FALSE
4248 // even if it is not ideal performance wise, and for lossy compression
4249
4250 /* -------------------------------------------------------------------- */
4251 /* Ensure we have the appropriate block loaded. */
4252 /* -------------------------------------------------------------------- */
4253 if( iSrcX < nLBlockX * nBlockXSize
4254 || iSrcX - nBlockXSize >= nLBlockX * nBlockXSize
4255 || iSrcY < nLBlockY * nBlockYSize
4256 || iSrcY - nBlockYSize >= nLBlockY * nBlockYSize )
4257 {
4258 nLBlockX = iSrcX / nBlockXSize;
4259 nLBlockY = iSrcY / nBlockYSize;
4260
4261 const bool bJustInitialize =
4262 eRWFlag == GF_Write
4263 && nYOff <= nLBlockY * nBlockYSize
4264 && nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize
4265 && nXOff <= nLBlockX * nBlockXSize
4266 && nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
4267 /*bool bMemZeroBuffer = FALSE;
4268 if( eRWFlag == GF_Write && !bJustInitialize &&
4269 nXOff <= nLBlockX * nBlockXSize &&
4270 nYOff <= nLBlockY * nBlockYSize &&
4271 (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
4272 (nXOff + nXSize == GetRasterXSize() &&
4273 (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
4274 (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
4275 (nYOff + nYSize == GetRasterYSize() &&
4276 (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
4277 {
4278 bJustInitialize = TRUE;
4279 bMemZeroBuffer = TRUE;
4280 }*/
4281 for( int iBand = 0; iBand < nBandCount; iBand++ )
4282 {
4283 GDALRasterBand *poBand = GetRasterBand( panBandMap[iBand]);
4284 if (nOverviewLevel >= 0)
4285 poBand = poBand->GetOverview(nOverviewLevel);
4286 poBlock = poBand->GetLockedBlockRef( nLBlockX, nLBlockY,
4287 bJustInitialize );
4288 if( poBlock == nullptr )
4289 {
4290 eErr = CE_Failure;
4291 goto CleanupAndReturn;
4292 }
4293
4294 if( eRWFlag == GF_Write )
4295 poBlock->MarkDirty();
4296
4297 if( papoBlocks[iBand] != nullptr )
4298 papoBlocks[iBand]->DropLock();
4299
4300 papoBlocks[iBand] = poBlock;
4301
4302 papabySrcBlock[iBand] = static_cast<GByte *>(poBlock->GetDataRef());
4303 /*if( bMemZeroBuffer )
4304 {
4305 memset(papabySrcBlock[iBand], 0,
4306 static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize * nBlockYSize);
4307 }*/
4308 }
4309 }
4310
4311 /* -------------------------------------------------------------------- */
4312 /* Copy over this pixel of data. */
4313 /* -------------------------------------------------------------------- */
4314 iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) - static_cast<GPtrDiff_t>(nLBlockX)*nBlockXSize
4315 + (static_cast<GPtrDiff_t>(iSrcY) - static_cast<GPtrDiff_t>(nLBlockY)*nBlockYSize) * nBlockXSize)*nBandDataSize;
4316
4317 for( int iBand = 0; iBand < nBandCount; iBand++ )
4318 {
4319 GByte *pabySrcBlock = papabySrcBlock[iBand];
4320 GPtrDiff_t iBandBufOffset = iBufOffset + static_cast<GPtrDiff_t>(iBand) * static_cast<GPtrDiff_t>(nBandSpace);
4321
4322 if( eDataType == eBufType )
4323 {
4324 if( eRWFlag == GF_Read )
4325 memcpy( static_cast<GByte *>(pData) + iBandBufOffset,
4326 pabySrcBlock + iSrcOffset, nBandDataSize );
4327 else
4328 memcpy( pabySrcBlock + iSrcOffset,
4329 static_cast<const GByte *>(pData) + iBandBufOffset, nBandDataSize );
4330 }
4331 else
4332 {
4333 /* type to type conversion ... ouch, this is expensive way
4334 of handling single words */
4335
4336 if( eRWFlag == GF_Read )
4337 GDALCopyWords( pabySrcBlock + iSrcOffset, eDataType, 0,
4338 static_cast<GByte *>(pData) + iBandBufOffset,
4339 eBufType, 0, 1 );
4340 else
4341 GDALCopyWords( static_cast<const GByte *>(pData) + iBandBufOffset,
4342 eBufType, 0,
4343 pabySrcBlock + iSrcOffset, eDataType, 0,
4344 1 );
4345 }
4346 }
4347
4348 iBufOffset += static_cast<int>(nPixelSpace);
4349 }
4350 }
4351
4352 /* -------------------------------------------------------------------- */
4353 /* CleanupAndReturn. */
4354 /* -------------------------------------------------------------------- */
4355 CleanupAndReturn:
4356 CPLFree( papabySrcBlock );
4357 if( papoBlocks != nullptr )
4358 {
4359 for( int iBand = 0; iBand < nBandCount; iBand++ )
4360 {
4361 if( papoBlocks[iBand] != nullptr )
4362 papoBlocks[iBand]->DropLock();
4363 }
4364 CPLFree( papoBlocks );
4365 }
4366
4367 return eErr;
4368 }
4369 //! @endcond
4370
4371 /************************************************************************/
4372 /* GDALCopyWholeRasterGetSwathSize() */
4373 /************************************************************************/
4374
GDALCopyWholeRasterGetSwathSize(GDALRasterBand * poSrcPrototypeBand,GDALRasterBand * poDstPrototypeBand,int nBandCount,int bDstIsCompressed,int bInterleave,int * pnSwathCols,int * pnSwathLines)4375 static void GDALCopyWholeRasterGetSwathSize(
4376 GDALRasterBand *poSrcPrototypeBand,
4377 GDALRasterBand *poDstPrototypeBand,
4378 int nBandCount,
4379 int bDstIsCompressed, int bInterleave,
4380 int* pnSwathCols, int *pnSwathLines )
4381 {
4382 GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4383 int nSrcBlockXSize = 0;
4384 int nSrcBlockYSize = 0;
4385 int nBlockXSize = 0;
4386 int nBlockYSize = 0;
4387
4388 int nXSize = poSrcPrototypeBand->GetXSize();
4389 int nYSize = poSrcPrototypeBand->GetYSize();
4390
4391 poSrcPrototypeBand->GetBlockSize( &nSrcBlockXSize, &nSrcBlockYSize );
4392 poDstPrototypeBand->GetBlockSize( &nBlockXSize, &nBlockYSize );
4393
4394 const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
4395 const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
4396
4397 int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4398 if( bInterleave)
4399 nPixelSize *= nBandCount;
4400
4401 // aim for one row of blocks. Do not settle for less.
4402 int nSwathCols = nXSize;
4403 int nSwathLines = nBlockYSize;
4404
4405 const char* pszSrcCompression =
4406 poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4407 if( pszSrcCompression == nullptr )
4408 {
4409 auto poSrcDS = poSrcPrototypeBand->GetDataset();
4410 if( poSrcDS )
4411 pszSrcCompression = poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4412 }
4413
4414 /* -------------------------------------------------------------------- */
4415 /* What will our swath size be? */
4416 /* -------------------------------------------------------------------- */
4417 // When writing interleaved data in a compressed format, we want to be sure
4418 // that each block will only be written once, so the swath size must not be
4419 // greater than the block cache.
4420 const char* pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
4421 int nTargetSwathSize;
4422 if( pszSwathSize != nullptr )
4423 nTargetSwathSize = static_cast<int>(
4424 std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
4425 else
4426 {
4427 // As a default, take one 1/4 of the cache size.
4428 nTargetSwathSize = static_cast<int>(
4429 std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
4430
4431 // but if the minimum idal swath buf size is less, then go for it to
4432 // avoid unnecessarily abusing RAM usage.
4433 // but try to use 10 MB at least.
4434 GIntBig nIdealSwathBufSize =
4435 static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
4436 if( nIdealSwathBufSize < nTargetSwathSize &&
4437 nIdealSwathBufSize < 10 * 1000 * 1000 )
4438 {
4439 nIdealSwathBufSize = 10 * 1000 * 1000;
4440 }
4441 if( pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
4442 (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
4443 (nSrcBlockYSize % nBlockYSize) == 0)) )
4444 {
4445 nIdealSwathBufSize =
4446 std::max( nIdealSwathBufSize,
4447 static_cast<GIntBig>(nSwathCols) *
4448 nSrcBlockYSize * nPixelSize) ;
4449 }
4450 if( nTargetSwathSize > nIdealSwathBufSize )
4451 nTargetSwathSize = static_cast<int>(
4452 std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
4453 }
4454
4455 if (nTargetSwathSize < 1000000)
4456 nTargetSwathSize = 1000000;
4457
4458 /* But let's check that */
4459 if( bDstIsCompressed && bInterleave &&
4460 nTargetSwathSize > GDALGetCacheMax64() )
4461 {
4462 CPLError(
4463 CE_Warning, CPLE_AppDefined,
4464 "When translating into a compressed interleave format, "
4465 "the block cache size (" CPL_FRMT_GIB ") "
4466 "should be at least the size of the swath (%d) "
4467 "(GDAL_SWATH_SIZE config. option)",
4468 GDALGetCacheMax64(), nTargetSwathSize);
4469 }
4470
4471 #define IS_DIVIDER_OF(x,y) ((y)%(x) == 0)
4472 #define ROUND_TO(x,y) (((x)/(y))*(y))
4473
4474 // if both input and output datasets are tiled, that the tile dimensions
4475 // are "compatible", try to stick to a swath dimension that is a multiple
4476 // of input and output block dimensions.
4477 if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
4478 IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
4479 IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
4480 IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4481 IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4482 {
4483 if( static_cast<GIntBig>(nMaxBlockXSize) *
4484 nMaxBlockYSize * nPixelSize <=
4485 static_cast<GIntBig>(nTargetSwathSize) )
4486 {
4487 nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
4488 nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
4489 if (nSwathCols == 0)
4490 nSwathCols = nMaxBlockXSize;
4491 if (nSwathCols > nXSize)
4492 nSwathCols = nXSize;
4493 nSwathLines = nMaxBlockYSize;
4494
4495 if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
4496 static_cast<GIntBig>(nTargetSwathSize))
4497 {
4498 nSwathCols = nXSize;
4499 nSwathLines = nBlockYSize;
4500 }
4501 }
4502 }
4503
4504 const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
4505 const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
4506 if( nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize) )
4507 {
4508 nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
4509 if (nSwathLines == 0)
4510 nSwathLines = 1;
4511
4512 CPLDebug(
4513 "GDAL",
4514 "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
4515 "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
4516 "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
4517 nSwathLines,
4518 nBlockYSize * nMemoryPerCol,
4519 nTargetSwathSize);
4520 }
4521 // If we are processing single scans, try to handle several at once.
4522 // If we are handling swaths already, only grow the swath if a row
4523 // of blocks is substantially less than our target buffer size.
4524 else if( nSwathLines == 1
4525 || nMemoryPerCol * nSwathLines <
4526 static_cast<GIntBig>(nTargetSwathSize) / 10 )
4527 {
4528 nSwathLines =
4529 std::min(nYSize, std::max(1,
4530 static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
4531
4532 /* If possible try to align to source and target block height */
4533 if ((nSwathLines % nMaxBlockYSize) != 0 &&
4534 nSwathLines > nMaxBlockYSize &&
4535 IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4536 IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4537 nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
4538 }
4539
4540 if( pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
4541 (!bDstIsCompressed ||
4542 (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
4543 IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))) )
4544 {
4545 // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
4546 if( nSwathLines < nSrcBlockYSize )
4547 {
4548 nSwathLines = nSrcBlockYSize;
4549
4550 // Number of pixels that can be read/write simultaneously.
4551 nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
4552 nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
4553 if (nSwathCols == 0)
4554 nSwathCols = nSrcBlockXSize;
4555 if (nSwathCols > nXSize)
4556 nSwathCols = nXSize;
4557
4558 CPLDebug(
4559 "GDAL",
4560 "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4561 "too high block, "
4562 "use partial width at one time" );
4563 }
4564 else if ((nSwathLines % nSrcBlockYSize) != 0)
4565 {
4566 /* Round on a multiple of nSrcBlockYSize */
4567 nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
4568 CPLDebug(
4569 "GDAL",
4570 "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4571 "round nSwathLines to block height : %d", nSwathLines);
4572 }
4573 }
4574 else if (bDstIsCompressed)
4575 {
4576 if (nSwathLines < nBlockYSize)
4577 {
4578 nSwathLines = nBlockYSize;
4579
4580 // Number of pixels that can be read/write simultaneously.
4581 nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
4582 nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
4583 if (nSwathCols == 0)
4584 nSwathCols = nBlockXSize;
4585 if (nSwathCols > nXSize)
4586 nSwathCols = nXSize;
4587
4588 CPLDebug(
4589 "GDAL",
4590 "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4591 "too high block, "
4592 "use partial width at one time" );
4593 }
4594 else if ((nSwathLines % nBlockYSize) != 0)
4595 {
4596 // Round on a multiple of nBlockYSize.
4597 nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
4598 CPLDebug(
4599 "GDAL",
4600 "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4601 "round nSwathLines to block height : %d", nSwathLines);
4602 }
4603 }
4604
4605 *pnSwathCols = nSwathCols;
4606 *pnSwathLines = nSwathLines;
4607 }
4608
4609 /************************************************************************/
4610 /* GDALDatasetCopyWholeRaster() */
4611 /************************************************************************/
4612
4613 /**
4614 * \brief Copy all dataset raster data.
4615 *
4616 * This function copies the complete raster contents of one dataset to
4617 * another similarly configured dataset. The source and destination
4618 * dataset must have the same number of bands, and the same width
4619 * and height. The bands do not have to have the same data type.
4620 *
4621 * This function is primarily intended to support implementation of
4622 * driver specific CreateCopy() functions. It implements efficient copying,
4623 * in particular "chunking" the copy in substantial blocks and, if appropriate,
4624 * performing the transfer in a pixel interleaved fashion.
4625 *
4626 * Currently the only papszOptions value supported are :
4627 * <ul>
4628 * <li>"INTERLEAVE=PIXEL" to force pixel interleaved operation</li>
4629 * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4630 * achieve best compression.</li>
4631 * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4632 * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL >= 2.2)</li>
4633 * </ul>
4634 * More options may be supported in the future.
4635 *
4636 * @param hSrcDS the source dataset
4637 * @param hDstDS the destination dataset
4638 * @param papszOptions transfer hints in "StringList" Name=Value format.
4639 * @param pfnProgress progress reporting function.
4640 * @param pProgressData callback data for progress function.
4641 *
4642 * @return CE_None on success, or CE_Failure on failure.
4643 */
4644
GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,GDALDatasetH hDstDS,CSLConstList papszOptions,GDALProgressFunc pfnProgress,void * pProgressData)4645 CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(
4646 GDALDatasetH hSrcDS, GDALDatasetH hDstDS, CSLConstList papszOptions,
4647 GDALProgressFunc pfnProgress, void *pProgressData )
4648
4649 {
4650 VALIDATE_POINTER1( hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure );
4651 VALIDATE_POINTER1( hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure );
4652
4653 GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
4654 GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
4655
4656 if( pfnProgress == nullptr )
4657 pfnProgress = GDALDummyProgress;
4658
4659 /* -------------------------------------------------------------------- */
4660 /* Confirm the datasets match in size and band counts. */
4661 /* -------------------------------------------------------------------- */
4662 const int nXSize = poDstDS->GetRasterXSize();
4663 const int nYSize = poDstDS->GetRasterYSize();
4664 const int nBandCount = poDstDS->GetRasterCount();
4665
4666 if( poSrcDS->GetRasterXSize() != nXSize
4667 || poSrcDS->GetRasterYSize() != nYSize
4668 || poSrcDS->GetRasterCount() != nBandCount )
4669 {
4670 CPLError( CE_Failure, CPLE_AppDefined,
4671 "Input and output dataset sizes or band counts do not\n"
4672 "match in GDALDatasetCopyWholeRaster()" );
4673 return CE_Failure;
4674 }
4675
4676 /* -------------------------------------------------------------------- */
4677 /* Report preliminary (0) progress. */
4678 /* -------------------------------------------------------------------- */
4679 if( !pfnProgress( 0.0, nullptr, pProgressData ) )
4680 {
4681 CPLError( CE_Failure, CPLE_UserInterrupt,
4682 "User terminated CreateCopy()" );
4683 return CE_Failure;
4684 }
4685
4686 /* -------------------------------------------------------------------- */
4687 /* Get our prototype band, and assume the others are similarly */
4688 /* configured. */
4689 /* -------------------------------------------------------------------- */
4690 if( nBandCount == 0 )
4691 return CE_None;
4692
4693 GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
4694 GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
4695 GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4696
4697 /* -------------------------------------------------------------------- */
4698 /* Do we want to try and do the operation in a pixel */
4699 /* interleaved fashion? */
4700 /* -------------------------------------------------------------------- */
4701 bool bInterleave = false;
4702 const char *pszInterleave = poSrcDS->GetMetadataItem(
4703 "INTERLEAVE", "IMAGE_STRUCTURE");
4704 if( pszInterleave != nullptr
4705 && (EQUAL(pszInterleave,"PIXEL") || EQUAL(pszInterleave,"LINE")) )
4706 bInterleave = true;
4707
4708 pszInterleave = poDstDS->GetMetadataItem( "INTERLEAVE", "IMAGE_STRUCTURE");
4709 if( pszInterleave != nullptr
4710 && (EQUAL(pszInterleave,"PIXEL") || EQUAL(pszInterleave,"LINE")) )
4711 bInterleave = true;
4712
4713 pszInterleave = CSLFetchNameValue( papszOptions, "INTERLEAVE" );
4714 if( pszInterleave != nullptr
4715 && (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")) )
4716 bInterleave = true;
4717 else if( pszInterleave != nullptr && EQUAL(pszInterleave, "BAND") )
4718 bInterleave = false;
4719
4720 // If the destination is compressed, we must try to write blocks just once,
4721 // to save disk space (GTiff case for example), and to avoid data loss
4722 // (JPEG compression for example).
4723 bool bDstIsCompressed = false;
4724 const char* pszDstCompressed =
4725 CSLFetchNameValue( papszOptions, "COMPRESSED" );
4726 if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
4727 bDstIsCompressed = true;
4728
4729 /* -------------------------------------------------------------------- */
4730 /* What will our swath size be? */
4731 /* -------------------------------------------------------------------- */
4732
4733 int nSwathCols = 0;
4734 int nSwathLines = 0;
4735 GDALCopyWholeRasterGetSwathSize( poSrcPrototypeBand,
4736 poDstPrototypeBand,
4737 nBandCount,
4738 bDstIsCompressed, bInterleave,
4739 &nSwathCols, &nSwathLines );
4740
4741 int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4742 if( bInterleave)
4743 nPixelSize *= nBandCount;
4744
4745 void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize );
4746 if( pSwathBuf == nullptr )
4747 {
4748 return CE_Failure;
4749 }
4750
4751 CPLDebug( "GDAL",
4752 "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
4753 nSwathCols, nSwathLines, static_cast<int>(bInterleave) );
4754
4755 // Advise the source raster that we are going to read it completely
4756 // Note: this might already have been done by GDALCreateCopy() in the
4757 // likely case this function is indirectly called by it
4758 poSrcDS->AdviseRead( 0, 0, nXSize, nYSize, nXSize, nYSize, eDT,
4759 nBandCount, nullptr, nullptr );
4760
4761 /* ==================================================================== */
4762 /* Band oriented (uninterleaved) case. */
4763 /* ==================================================================== */
4764 CPLErr eErr = CE_None;
4765 const bool bCheckHoles = CPLTestBool( CSLFetchNameValueDef(
4766 papszOptions, "SKIP_HOLES", "NO" ) );
4767
4768 if( !bInterleave )
4769 {
4770 GDALRasterIOExtraArg sExtraArg;
4771 INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4772
4773 const GIntBig nTotalBlocks =
4774 static_cast<GIntBig>(nBandCount) *
4775 DIV_ROUND_UP(nYSize, nSwathLines) *
4776 DIV_ROUND_UP(nXSize, nSwathCols);
4777 GIntBig nBlocksDone = 0;
4778
4779 for( int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++ )
4780 {
4781 int nBand = iBand + 1;
4782
4783 for( int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines )
4784 {
4785 int nThisLines = nSwathLines;
4786
4787 if( iY + nThisLines > nYSize )
4788 nThisLines = nYSize - iY;
4789
4790 for( int iX = 0;
4791 iX < nXSize && eErr == CE_None;
4792 iX += nSwathCols )
4793 {
4794 int nThisCols = nSwathCols;
4795
4796 if( iX + nThisCols > nXSize )
4797 nThisCols = nXSize - iX;
4798
4799 int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4800 if( bCheckHoles )
4801 {
4802 nStatus = poSrcDS->GetRasterBand(nBand)->GetDataCoverageStatus(
4803 iX, iY, nThisCols, nThisLines,
4804 GDAL_DATA_COVERAGE_STATUS_DATA);
4805 }
4806 if( nStatus & GDAL_DATA_COVERAGE_STATUS_DATA )
4807 {
4808 sExtraArg.pfnProgress = GDALScaledProgress;
4809 sExtraArg.pProgressData =
4810 GDALCreateScaledProgress(
4811 nBlocksDone / static_cast<double>(nTotalBlocks),
4812 (nBlocksDone + 0.5) /
4813 static_cast<double>(nTotalBlocks),
4814 pfnProgress,
4815 pProgressData );
4816 if( sExtraArg.pProgressData == nullptr )
4817 sExtraArg.pfnProgress = nullptr;
4818
4819 eErr = poSrcDS->RasterIO( GF_Read,
4820 iX, iY, nThisCols, nThisLines,
4821 pSwathBuf, nThisCols, nThisLines,
4822 eDT, 1, &nBand,
4823 0, 0, 0, &sExtraArg );
4824
4825 GDALDestroyScaledProgress( sExtraArg.pProgressData );
4826
4827 if( eErr == CE_None )
4828 eErr = poDstDS->RasterIO( GF_Write,
4829 iX, iY, nThisCols, nThisLines,
4830 pSwathBuf, nThisCols,
4831 nThisLines,
4832 eDT, 1, &nBand,
4833 0, 0, 0, nullptr );
4834 }
4835
4836 nBlocksDone++;
4837 if( eErr == CE_None
4838 && !pfnProgress(
4839 nBlocksDone / static_cast<double>(nTotalBlocks),
4840 nullptr, pProgressData ) )
4841 {
4842 eErr = CE_Failure;
4843 CPLError( CE_Failure, CPLE_UserInterrupt,
4844 "User terminated CreateCopy()" );
4845 }
4846 }
4847 }
4848 }
4849 }
4850
4851 /* ==================================================================== */
4852 /* Pixel interleaved case. */
4853 /* ==================================================================== */
4854 else /* if( bInterleave ) */
4855 {
4856 GDALRasterIOExtraArg sExtraArg;
4857 INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4858
4859 const GIntBig nTotalBlocks =
4860 static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
4861 DIV_ROUND_UP(nXSize, nSwathCols);
4862 GIntBig nBlocksDone = 0;
4863
4864 for( int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines )
4865 {
4866 int nThisLines = nSwathLines;
4867
4868 if( iY + nThisLines > nYSize )
4869 nThisLines = nYSize - iY;
4870
4871 for( int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols )
4872 {
4873 int nThisCols = nSwathCols;
4874
4875 if( iX + nThisCols > nXSize )
4876 nThisCols = nXSize - iX;
4877
4878 int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4879 if( bCheckHoles )
4880 {
4881 for( int iBand = 0; iBand < nBandCount; iBand++ )
4882 {
4883 nStatus |= poSrcDS->GetRasterBand(iBand+1)->GetDataCoverageStatus(
4884 iX, iY, nThisCols, nThisLines,
4885 GDAL_DATA_COVERAGE_STATUS_DATA);
4886 if( nStatus & GDAL_DATA_COVERAGE_STATUS_DATA )
4887 break;
4888 }
4889 }
4890 if( nStatus & GDAL_DATA_COVERAGE_STATUS_DATA )
4891 {
4892 sExtraArg.pfnProgress = GDALScaledProgress;
4893 sExtraArg.pProgressData =
4894 GDALCreateScaledProgress(
4895 nBlocksDone / static_cast<double>(nTotalBlocks),
4896 (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
4897 pfnProgress,
4898 pProgressData );
4899 if( sExtraArg.pProgressData == nullptr )
4900 sExtraArg.pfnProgress = nullptr;
4901
4902 eErr = poSrcDS->RasterIO( GF_Read,
4903 iX, iY, nThisCols, nThisLines,
4904 pSwathBuf, nThisCols, nThisLines,
4905 eDT, nBandCount, nullptr,
4906 0, 0, 0, &sExtraArg );
4907
4908 GDALDestroyScaledProgress( sExtraArg.pProgressData );
4909
4910 if( eErr == CE_None )
4911 eErr = poDstDS->RasterIO( GF_Write,
4912 iX, iY, nThisCols, nThisLines,
4913 pSwathBuf, nThisCols, nThisLines,
4914 eDT, nBandCount, nullptr,
4915 0, 0, 0, nullptr );
4916 }
4917
4918 nBlocksDone++;
4919 if( eErr == CE_None &&
4920 !pfnProgress(
4921 nBlocksDone / static_cast<double>( nTotalBlocks ),
4922 nullptr, pProgressData ) )
4923 {
4924 eErr = CE_Failure;
4925 CPLError( CE_Failure, CPLE_UserInterrupt,
4926 "User terminated CreateCopy()" );
4927 }
4928 }
4929 }
4930 }
4931
4932 /* -------------------------------------------------------------------- */
4933 /* Cleanup */
4934 /* -------------------------------------------------------------------- */
4935 CPLFree( pSwathBuf );
4936
4937 return eErr;
4938 }
4939
4940 /************************************************************************/
4941 /* GDALRasterBandCopyWholeRaster() */
4942 /************************************************************************/
4943
4944 /**
4945 * \brief Copy a whole raster band
4946 *
4947 * This function copies the complete raster contents of one band to
4948 * another similarly configured band. The source and destination
4949 * bands must have the same width and height. The bands do not have
4950 * to have the same data type.
4951 *
4952 * It implements efficient copying, in particular "chunking" the copy in
4953 * substantial blocks.
4954 *
4955 * Currently the only papszOptions value supported are :
4956 * <ul>
4957 * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4958 * achieve best compression.</li>
4959 * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4960 * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL >= 2.2)</li>
4961 * </ul>
4962 *
4963 * @param hSrcBand the source band
4964 * @param hDstBand the destination band
4965 * @param papszOptions transfer hints in "StringList" Name=Value format.
4966 * @param pfnProgress progress reporting function.
4967 * @param pProgressData callback data for progress function.
4968 *
4969 * @return CE_None on success, or CE_Failure on failure.
4970 */
4971
GDALRasterBandCopyWholeRaster(GDALRasterBandH hSrcBand,GDALRasterBandH hDstBand,const char * const * const papszOptions,GDALProgressFunc pfnProgress,void * pProgressData)4972 CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
4973 GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
4974 const char * const * const papszOptions,
4975 GDALProgressFunc pfnProgress, void *pProgressData )
4976
4977 {
4978 VALIDATE_POINTER1( hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure );
4979 VALIDATE_POINTER1( hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure );
4980
4981 GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle( hSrcBand );
4982 GDALRasterBand *poDstBand = GDALRasterBand::FromHandle( hDstBand );
4983 CPLErr eErr = CE_None;
4984
4985 if( pfnProgress == nullptr )
4986 pfnProgress = GDALDummyProgress;
4987
4988 /* -------------------------------------------------------------------- */
4989 /* Confirm the datasets match in size and band counts. */
4990 /* -------------------------------------------------------------------- */
4991 int nXSize = poSrcBand->GetXSize();
4992 int nYSize = poSrcBand->GetYSize();
4993
4994 if( poDstBand->GetXSize() != nXSize
4995 || poDstBand->GetYSize() != nYSize )
4996 {
4997 CPLError( CE_Failure, CPLE_AppDefined,
4998 "Input and output band sizes do not\n"
4999 "match in GDALRasterBandCopyWholeRaster()" );
5000 return CE_Failure;
5001 }
5002
5003 /* -------------------------------------------------------------------- */
5004 /* Report preliminary (0) progress. */
5005 /* -------------------------------------------------------------------- */
5006 if( !pfnProgress( 0.0, nullptr, pProgressData ) )
5007 {
5008 CPLError( CE_Failure, CPLE_UserInterrupt,
5009 "User terminated CreateCopy()" );
5010 return CE_Failure;
5011 }
5012
5013 GDALDataType eDT = poDstBand->GetRasterDataType();
5014
5015 // If the destination is compressed, we must try to write blocks just once,
5016 // to save disk space (GTiff case for example), and to avoid data loss
5017 // (JPEG compression for example).
5018 bool bDstIsCompressed = false;
5019 const char* pszDstCompressed =
5020 CSLFetchNameValue( const_cast<char **>(papszOptions), "COMPRESSED" );
5021 if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5022 bDstIsCompressed = true;
5023
5024 /* -------------------------------------------------------------------- */
5025 /* What will our swath size be? */
5026 /* -------------------------------------------------------------------- */
5027
5028 int nSwathCols = 0;
5029 int nSwathLines = 0;
5030 GDALCopyWholeRasterGetSwathSize( poSrcBand,
5031 poDstBand,
5032 1,
5033 bDstIsCompressed, FALSE,
5034 &nSwathCols, &nSwathLines);
5035
5036 const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5037
5038 void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize );
5039 if( pSwathBuf == nullptr )
5040 {
5041 return CE_Failure;
5042 }
5043
5044 CPLDebug( "GDAL",
5045 "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
5046 nSwathCols, nSwathLines );
5047
5048 const bool bCheckHoles = CPLTestBool( CSLFetchNameValueDef(
5049 papszOptions, "SKIP_HOLES", "NO" ) );
5050
5051 // Advise the source raster that we are going to read it completely
5052 poSrcBand->AdviseRead( 0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr );
5053
5054 /* ==================================================================== */
5055 /* Band oriented (uninterleaved) case. */
5056 /* ==================================================================== */
5057
5058 for( int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines )
5059 {
5060 int nThisLines = nSwathLines;
5061
5062 if( iY + nThisLines > nYSize )
5063 nThisLines = nYSize - iY;
5064
5065 for( int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols )
5066 {
5067 int nThisCols = nSwathCols;
5068
5069 if( iX + nThisCols > nXSize )
5070 nThisCols = nXSize - iX;
5071
5072 int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5073 if( bCheckHoles )
5074 {
5075 nStatus = poSrcBand->GetDataCoverageStatus(
5076 iX, iY, nThisCols, nThisLines,
5077 GDAL_DATA_COVERAGE_STATUS_DATA);
5078 }
5079 if( nStatus & GDAL_DATA_COVERAGE_STATUS_DATA )
5080 {
5081 eErr = poSrcBand->RasterIO( GF_Read,
5082 iX, iY, nThisCols, nThisLines,
5083 pSwathBuf, nThisCols, nThisLines,
5084 eDT, 0, 0, nullptr );
5085
5086 if( eErr == CE_None )
5087 eErr = poDstBand->RasterIO( GF_Write,
5088 iX, iY, nThisCols, nThisLines,
5089 pSwathBuf, nThisCols, nThisLines,
5090 eDT, 0, 0, nullptr );
5091 }
5092
5093 if( eErr == CE_None
5094 && !pfnProgress(
5095 (iY + nThisLines) / static_cast<float>(nYSize),
5096 nullptr, pProgressData ) )
5097 {
5098 eErr = CE_Failure;
5099 CPLError( CE_Failure, CPLE_UserInterrupt,
5100 "User terminated CreateCopy()" );
5101 }
5102 }
5103 }
5104
5105 /* -------------------------------------------------------------------- */
5106 /* Cleanup */
5107 /* -------------------------------------------------------------------- */
5108 CPLFree( pSwathBuf );
5109
5110 return eErr;
5111 }
5112
5113 /************************************************************************/
5114 /* GDALCopyRasterIOExtraArg () */
5115 /************************************************************************/
5116
GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg * psDestArg,GDALRasterIOExtraArg * psSrcArg)5117 void GDALCopyRasterIOExtraArg( GDALRasterIOExtraArg* psDestArg,
5118 GDALRasterIOExtraArg* psSrcArg )
5119 {
5120 INIT_RASTERIO_EXTRA_ARG(*psDestArg);
5121 if( psSrcArg )
5122 {
5123 psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
5124 psDestArg->pfnProgress = psSrcArg->pfnProgress;
5125 psDestArg->pProgressData = psSrcArg->pProgressData;
5126 psDestArg->bFloatingPointWindowValidity =
5127 psSrcArg->bFloatingPointWindowValidity;
5128 if( psSrcArg->bFloatingPointWindowValidity )
5129 {
5130 psDestArg->dfXOff = psSrcArg->dfXOff;
5131 psDestArg->dfYOff = psSrcArg->dfYOff;
5132 psDestArg->dfXSize = psSrcArg->dfXSize;
5133 psDestArg->dfYSize = psSrcArg->dfYSize;
5134 }
5135 }
5136 }
5137