1 /**********************************************************************
2  *
3  * Name:     cpl_virtualmem.cpp
4  * Project:  CPL - Common Portability Library
5  * Purpose:  Virtual memory
6  * Author:   Even Rouault, <even dot rouault at spatialys.com>
7  *
8  **********************************************************************
9  * Copyright (c) 2014, Even Rouault <even dot rouault at spatialys.com>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #ifndef _GNU_SOURCE
31 #define _GNU_SOURCE
32 #endif
33 
34 // to have off_t on 64bit possibly
35 #ifndef _FILE_OFFSET_BITS
36 #define _FILE_OFFSET_BITS 64
37 #endif
38 
39 #include "cpl_virtualmem.h"
40 
41 #include <cassert>
42 // TODO(schwehr): Should ucontext.h be included?
43 // #include <ucontext.h>
44 
45 #include "cpl_atomic_ops.h"
46 #include "cpl_config.h"
47 #include "cpl_conv.h"
48 #include "cpl_error.h"
49 #include "cpl_multiproc.h"
50 
51 CPL_CVSID("$Id: cpl_virtualmem.cpp fa752ad6eabafaf630a704e1892a9d837d683cb3 2021-03-06 17:04:38 +0100 Even Rouault $")
52 
53 #ifdef NDEBUG
54 // Non NDEBUG: Ignore the result.
55 #define IGNORE_OR_ASSERT_IN_DEBUG(expr) CPL_IGNORE_RET_VAL((expr))
56 #else
57 // Debug: Assert.
58 #define IGNORE_OR_ASSERT_IN_DEBUG(expr) assert((expr))
59 #endif
60 
61 #if defined(__linux) && defined(CPL_MULTIPROC_PTHREAD)
62 #define HAVE_VIRTUAL_MEM_VMA
63 #endif
64 
65 #if defined(HAVE_MMAP) || defined(HAVE_VIRTUAL_MEM_VMA)
66 #include <unistd.h>     // read, write, close, pipe, sysconf
67 #include <sys/mman.h>   // mmap, munmap, mremap
68 #endif
69 
70 typedef enum
71 {
72     VIRTUAL_MEM_TYPE_FILE_MEMORY_MAPPED,
73     VIRTUAL_MEM_TYPE_VMA
74 } CPLVirtualMemType;
75 
76 struct CPLVirtualMem
77 {
78     CPLVirtualMemType eType;
79 
80     struct CPLVirtualMem *pVMemBase;
81     int                   nRefCount;
82 
83     CPLVirtualMemAccessMode eAccessMode;
84 
85     size_t       nPageSize;
86     // Aligned on nPageSize.
87     void        *pData;
88     // Returned by mmap(), potentially lower than pData.
89     void        *pDataToFree;
90     // Requested size (unrounded).
91     size_t       nSize;
92 
93     bool         bSingleThreadUsage;
94 
95     void                         *pCbkUserData;
96     CPLVirtualMemFreeUserData     pfnFreeUserData;
97 };
98 
99 #ifdef HAVE_VIRTUAL_MEM_VMA
100 
101 #include <sys/select.h> // select
102 #include <sys/stat.h>   // open()
103 #include <sys/types.h>  // open()
104 #include <errno.h>
105 #include <fcntl.h>      // open()
106 #include <signal.h>     // sigaction
107 #include <stdio.h>
108 #include <stdlib.h>
109 #include <string.h>
110 #include <pthread.h>
111 
112 // FIXME? gcore/virtualmem.py tests fail/crash when HAVE_5ARGS_MREMAP
113 // is not defined.
114 
115 #ifndef HAVE_5ARGS_MREMAP
116 #include "cpl_atomic_ops.h"
117 #endif
118 
119 /* Linux specific (i.e. non POSIX compliant) features used:
120    - returning from a SIGSEGV handler is clearly a POSIX violation, but in
121      practice most POSIX systems should be happy.
122    - mremap() with 5 args is Linux specific. It is used when the user
123      callback is invited to fill a page, we currently mmap() a
124      writable page, let it filled it, and afterwards mremap() that
125      temporary page onto the location where the fault occurred.
126      If we have no mremap(), the workaround is to pause other threads that
127      consume the current view while we are updating the faulted page, otherwise
128      a non-paused thread could access a page that is in the middle of being
129      filled... The way we pause those threads is quite original : we send them
130      a SIGUSR1 and wait that they are stuck in the temporary SIGUSR1 handler...
131    - MAP_ANONYMOUS isn't documented in POSIX, but very commonly found
132      (sometimes called MAP_ANON)
133    - dealing with the limitation of number of memory mapping regions,
134      and the 65536 limit.
135    - other things I've not identified
136 */
137 
138 #define ALIGN_DOWN(p,pagesize) reinterpret_cast<void*>((reinterpret_cast<GUIntptr_t>(p)) / (pagesize) * (pagesize))
139 #define ALIGN_UP(p,pagesize) reinterpret_cast<void*>((reinterpret_cast<GUIntptr_t>(p) + (pagesize) - 1) / (pagesize) * (pagesize))
140 
141 #define DEFAULT_PAGE_SIZE       (256*256)
142 #define MAXIMUM_PAGE_SIZE       (32*1024*1024)
143 
144 // Linux Kernel limit.
145 #define MAXIMUM_COUNT_OF_MAPPINGS   65536
146 
147 #define BYEBYE_ADDR             (reinterpret_cast<void*>(~static_cast<size_t>(0)))
148 
149 #define MAPPING_FOUND           "yeah"
150 #define MAPPING_NOT_FOUND       "doh!"
151 
152 #define SET_BIT(ar,bitnumber)   ar[(bitnumber)/8] |= 1 << ((bitnumber) % 8)
153 #define UNSET_BIT(ar,bitnumber) ar[(bitnumber)/8] &= ~(1 << ((bitnumber) % 8))
154 #define TEST_BIT(ar,bitnumber)  (ar[(bitnumber)/8] & (1 << ((bitnumber) % 8)))
155 
156 typedef enum
157 {
158     OP_LOAD,
159     OP_STORE,
160     OP_MOVS_RSI_RDI,
161     OP_UNKNOWN
162 } OpType;
163 
164 typedef struct
165 {
166     CPLVirtualMem sBase;
167 
168     GByte       *pabitMappedPages;
169     GByte       *pabitRWMappedPages;
170 
171     int          nCacheMaxSizeInPages;   // Maximum size of page array.
172     int         *panLRUPageIndices;      // Array with indices of cached pages.
173     int          iLRUStart;              // Index in array where to
174                                          // write next page index.
175     int          nLRUSize;               // Current size of the array.
176 
177     int          iLastPage;              // Last page accessed.
178     int          nRetry;                 // Number of consecutive
179                                          // retries to that last page.
180 
181     CPLVirtualMemCachePageCbk     pfnCachePage;    // Called when a page is
182                                                    // mapped.
183     CPLVirtualMemUnCachePageCbk   pfnUnCachePage;  // Called when a (writable)
184                                                    // page is unmapped.
185 
186 #ifndef HAVE_5ARGS_MREMAP
187     CPLMutex               *hMutexThreadArray;
188     int                     nThreads;
189     pthread_t              *pahThreads;
190 #endif
191 } CPLVirtualMemVMA;
192 
193 typedef struct
194 {
195     // hVirtualMemManagerMutex protects the 2 following variables.
196     CPLVirtualMemVMA **pasVirtualMem;
197     int              nVirtualMemCount;
198 
199     int              pipefd_to_thread[2];
200     int              pipefd_from_thread[2];
201     int              pipefd_wait_thread[2];
202     CPLJoinableThread *hHelperThread;
203 
204     struct sigaction oldact;
205 } CPLVirtualMemManager;
206 
207 typedef struct
208 {
209     void            *pFaultAddr;
210     OpType           opType;
211     pthread_t        hRequesterThread;
212 } CPLVirtualMemMsgToWorkerThread;
213 
214 // TODO: Singletons.
215 static CPLVirtualMemManager* pVirtualMemManager = nullptr;
216 static CPLMutex* hVirtualMemManagerMutex = nullptr;
217 
218 static bool CPLVirtualMemManagerInit();
219 
220 #ifdef DEBUG_VIRTUALMEM
221 
222 /************************************************************************/
223 /*                           fprintfstderr()                            */
224 /************************************************************************/
225 
226 // This function may be called from signal handlers where most functions
227 // from the C library are unsafe to be called. fprintf() is clearly one
228 // of those functions (see
229 // http://stackoverflow.com/questions/4554129/linux-glibc-can-i-use-fprintf-in-signal-handler)
230 // vsnprintf() is *probably* safer with respect to that (but there is no
231 // guarantee though).
232 // write() is async-signal-safe.
fprintfstderr(const char * fmt,...)233 static void fprintfstderr(const char* fmt, ...)
234 {
235     char buffer[80] = {};
236     va_list ap;
237     va_start(ap, fmt);
238     vsnprintf(buffer, sizeof(buffer), fmt, ap);
239     va_end(ap);
240     int offset = 0;
241     while( true )
242     {
243         const size_t nSizeToWrite = strlen(buffer + offset);
244         int ret = static_cast<int>(write(2, buffer + offset, nSizeToWrite));
245         if( ret < 0 && errno == EINTR )
246         {
247         }
248         else
249         {
250             if( ret == static_cast<int>(nSizeToWrite) )
251                 break;
252             offset += ret;
253         }
254     }
255 }
256 
257 #endif
258 
259 /************************************************************************/
260 /*              CPLVirtualMemManagerRegisterVirtualMem()                */
261 /************************************************************************/
262 
CPLVirtualMemManagerRegisterVirtualMem(CPLVirtualMemVMA * ctxt)263 static bool CPLVirtualMemManagerRegisterVirtualMem( CPLVirtualMemVMA* ctxt )
264 {
265     if( !CPLVirtualMemManagerInit() )
266         return false;
267 
268     bool bSuccess = true;
269     IGNORE_OR_ASSERT_IN_DEBUG(ctxt);
270     CPLAcquireMutex(hVirtualMemManagerMutex, 1000.0);
271     CPLVirtualMemVMA** pasVirtualMemNew = static_cast<CPLVirtualMemVMA **>(
272         VSI_REALLOC_VERBOSE(
273             pVirtualMemManager->pasVirtualMem,
274             sizeof(CPLVirtualMemVMA *) *
275             (pVirtualMemManager->nVirtualMemCount + 1)));
276     if( pasVirtualMemNew == nullptr )
277     {
278         bSuccess = false;
279     }
280     else
281     {
282         pVirtualMemManager->pasVirtualMem = pasVirtualMemNew;
283         pVirtualMemManager->
284             pasVirtualMem[pVirtualMemManager->nVirtualMemCount] = ctxt;
285         pVirtualMemManager->nVirtualMemCount++;
286     }
287     CPLReleaseMutex(hVirtualMemManagerMutex);
288     return bSuccess;
289 }
290 
291 /************************************************************************/
292 /*               CPLVirtualMemManagerUnregisterVirtualMem()             */
293 /************************************************************************/
294 
CPLVirtualMemManagerUnregisterVirtualMem(CPLVirtualMemVMA * ctxt)295 static void CPLVirtualMemManagerUnregisterVirtualMem( CPLVirtualMemVMA* ctxt )
296 {
297     CPLAcquireMutex(hVirtualMemManagerMutex, 1000.0);
298     for( int i=0; i < pVirtualMemManager->nVirtualMemCount; i++ )
299     {
300         if( pVirtualMemManager->pasVirtualMem[i] == ctxt )
301         {
302             if( i < pVirtualMemManager->nVirtualMemCount - 1 )
303             {
304                 memmove(
305                     pVirtualMemManager->pasVirtualMem + i,
306                     pVirtualMemManager->pasVirtualMem + i + 1,
307                     sizeof(CPLVirtualMem*) *
308                     (pVirtualMemManager->nVirtualMemCount - i - 1) );
309             }
310             pVirtualMemManager->nVirtualMemCount--;
311             break;
312         }
313     }
314     CPLReleaseMutex(hVirtualMemManagerMutex);
315 }
316 
317 /************************************************************************/
318 /*                           CPLVirtualMemNew()                         */
319 /************************************************************************/
320 
321 static void CPLVirtualMemFreeFileMemoryMapped( CPLVirtualMemVMA* ctxt );
322 
CPLVirtualMemNew(size_t nSize,size_t nCacheSize,size_t nPageSizeHint,int bSingleThreadUsage,CPLVirtualMemAccessMode eAccessMode,CPLVirtualMemCachePageCbk pfnCachePage,CPLVirtualMemUnCachePageCbk pfnUnCachePage,CPLVirtualMemFreeUserData pfnFreeUserData,void * pCbkUserData)323 CPLVirtualMem* CPLVirtualMemNew( size_t nSize,
324                                  size_t nCacheSize,
325                                  size_t nPageSizeHint,
326                                  int bSingleThreadUsage,
327                                  CPLVirtualMemAccessMode eAccessMode,
328                                  CPLVirtualMemCachePageCbk pfnCachePage,
329                                  CPLVirtualMemUnCachePageCbk pfnUnCachePage,
330                                  CPLVirtualMemFreeUserData pfnFreeUserData,
331                                  void *pCbkUserData )
332 {
333     size_t nMinPageSize = CPLGetPageSize();
334     size_t nPageSize = DEFAULT_PAGE_SIZE;
335 
336     IGNORE_OR_ASSERT_IN_DEBUG(nSize > 0);
337     IGNORE_OR_ASSERT_IN_DEBUG(pfnCachePage != nullptr);
338 
339     if( nPageSizeHint >= nMinPageSize && nPageSizeHint <= MAXIMUM_PAGE_SIZE )
340     {
341         if( (nPageSizeHint % nMinPageSize) == 0 )
342             nPageSize = nPageSizeHint;
343         else
344         {
345             int nbits = 0;
346             nPageSize = static_cast<size_t>(nPageSizeHint);
347             do
348             {
349                 nPageSize >>= 1;
350                 nbits++;
351             } while( nPageSize > 0 );
352             nPageSize = static_cast<size_t>(1) << (nbits - 1);
353             if( nPageSize < static_cast<size_t>(nPageSizeHint) )
354                 nPageSize <<= 1;
355         }
356     }
357 
358     if( (nPageSize % nMinPageSize) != 0 )
359         nPageSize = nMinPageSize;
360 
361     if( nCacheSize > nSize )
362         nCacheSize = nSize;
363     else if( nCacheSize == 0 )
364         nCacheSize = 1;
365 
366     int nMappings = 0;
367 
368     // Linux specific:
369     // Count the number of existing memory mappings.
370     FILE* f = fopen("/proc/self/maps", "rb");
371     if( f != nullptr )
372     {
373         char buffer[80] = {};
374         while( fgets(buffer, sizeof(buffer), f) != nullptr )
375             nMappings++;
376         fclose(f);
377     }
378 
379     size_t nCacheMaxSizeInPages = 0;
380     while( true )
381     {
382         // /proc/self/maps must not have more than 65K lines.
383         nCacheMaxSizeInPages = (nCacheSize + 2 * nPageSize - 1) / nPageSize;
384         if( nCacheMaxSizeInPages >
385             static_cast<size_t>((MAXIMUM_COUNT_OF_MAPPINGS * 9 / 10) -
386                                 nMappings) )
387             nPageSize <<= 1;
388         else
389             break;
390     }
391     size_t nRoundedMappingSize =
392         ((nSize + 2 * nPageSize - 1) / nPageSize) * nPageSize;
393     void* pData = mmap(nullptr, nRoundedMappingSize, PROT_NONE,
394                        MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
395     if( pData == MAP_FAILED )
396     {
397         perror("mmap");
398         return nullptr;
399     }
400     CPLVirtualMemVMA* ctxt = static_cast<CPLVirtualMemVMA *>(
401         VSI_CALLOC_VERBOSE(1, sizeof(CPLVirtualMemVMA)));
402     if( ctxt == nullptr )
403     {
404         munmap(pData, nRoundedMappingSize);
405         return nullptr;
406     }
407     ctxt->sBase.nRefCount = 1;
408     ctxt->sBase.eType = VIRTUAL_MEM_TYPE_VMA;
409     ctxt->sBase.eAccessMode = eAccessMode;
410     ctxt->sBase.pDataToFree = pData;
411     ctxt->sBase.pData = ALIGN_UP(pData, nPageSize);
412     ctxt->sBase.nPageSize = nPageSize;
413     ctxt->sBase.nSize = nSize;
414     ctxt->sBase.bSingleThreadUsage = CPL_TO_BOOL(bSingleThreadUsage);
415     ctxt->sBase.pfnFreeUserData = pfnFreeUserData;
416     ctxt->sBase.pCbkUserData = pCbkUserData;
417 
418     ctxt->pabitMappedPages = static_cast<GByte *>(
419         VSI_CALLOC_VERBOSE(1, (nRoundedMappingSize / nPageSize + 7) / 8));
420     if( ctxt->pabitMappedPages == nullptr )
421     {
422         CPLVirtualMemFreeFileMemoryMapped(ctxt);
423         CPLFree(ctxt);
424         return nullptr;
425     }
426     ctxt->pabitRWMappedPages = static_cast<GByte*>(
427         VSI_CALLOC_VERBOSE(1, (nRoundedMappingSize / nPageSize + 7) / 8));
428     if( ctxt->pabitRWMappedPages == nullptr )
429     {
430         CPLVirtualMemFreeFileMemoryMapped(ctxt);
431         CPLFree(ctxt);
432         return nullptr;
433     }
434     // Need at least 2 pages in case for a rep movs instruction
435     // that operate in the view.
436     ctxt->nCacheMaxSizeInPages = static_cast<int>(nCacheMaxSizeInPages);
437     ctxt->panLRUPageIndices = static_cast<int*>(
438         VSI_MALLOC_VERBOSE(ctxt->nCacheMaxSizeInPages * sizeof(int)));
439     if( ctxt->panLRUPageIndices == nullptr )
440     {
441         CPLVirtualMemFreeFileMemoryMapped(ctxt);
442         CPLFree(ctxt);
443         return nullptr;
444     }
445     ctxt->iLRUStart = 0;
446     ctxt->nLRUSize = 0;
447     ctxt->iLastPage = -1;
448     ctxt->nRetry = 0;
449     ctxt->pfnCachePage = pfnCachePage;
450     ctxt->pfnUnCachePage = pfnUnCachePage;
451 
452 #ifndef HAVE_5ARGS_MREMAP
453     if( !ctxt->sBase.bSingleThreadUsage )
454     {
455         ctxt->hMutexThreadArray = CPLCreateMutex();
456         IGNORE_OR_ASSERT_IN_DEBUG(ctxt->hMutexThreadArray != nullptr);
457         CPLReleaseMutex(ctxt->hMutexThreadArray);
458         ctxt->nThreads = 0;
459         ctxt->pahThreads = nullptr;
460     }
461 #endif
462 
463     if( !CPLVirtualMemManagerRegisterVirtualMem(ctxt) )
464     {
465         CPLVirtualMemFreeFileMemoryMapped(ctxt);
466         CPLFree(ctxt);
467         return nullptr;
468     }
469 
470     return reinterpret_cast<CPLVirtualMem*>(ctxt);
471 }
472 
473 /************************************************************************/
474 /*                  CPLVirtualMemFreeFileMemoryMapped()                 */
475 /************************************************************************/
476 
CPLVirtualMemFreeFileMemoryMapped(CPLVirtualMemVMA * ctxt)477 static void CPLVirtualMemFreeFileMemoryMapped(CPLVirtualMemVMA* ctxt)
478 {
479     CPLVirtualMemManagerUnregisterVirtualMem(ctxt);
480 
481     size_t nRoundedMappingSize =
482         ((ctxt->sBase.nSize + 2 * ctxt->sBase.nPageSize - 1) /
483          ctxt->sBase.nPageSize) * ctxt->sBase.nPageSize;
484     if( ctxt->sBase.eAccessMode == VIRTUALMEM_READWRITE &&
485         ctxt->pabitRWMappedPages != nullptr &&
486         ctxt->pfnUnCachePage != nullptr )
487     {
488         for( size_t i = 0;
489              i < nRoundedMappingSize / ctxt->sBase.nPageSize;
490              i++ )
491         {
492             if( TEST_BIT(ctxt->pabitRWMappedPages, i) )
493             {
494                 void* addr = static_cast<char*>(ctxt->sBase.pData) + i * ctxt->sBase.nPageSize;
495                 ctxt->pfnUnCachePage(reinterpret_cast<CPLVirtualMem*>(ctxt),
496                                  i * ctxt->sBase.nPageSize,
497                                  addr,
498                                  ctxt->sBase.nPageSize,
499                                  ctxt->sBase.pCbkUserData);
500             }
501         }
502     }
503     int nRet = munmap(ctxt->sBase.pDataToFree, nRoundedMappingSize);
504     IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
505     CPLFree(ctxt->pabitMappedPages);
506     CPLFree(ctxt->pabitRWMappedPages);
507     CPLFree(ctxt->panLRUPageIndices);
508 #ifndef HAVE_5ARGS_MREMAP
509     if( !ctxt->sBase.bSingleThreadUsage )
510     {
511         CPLFree(ctxt->pahThreads);
512         CPLDestroyMutex(ctxt->hMutexThreadArray);
513     }
514 #endif
515 }
516 
517 #ifndef HAVE_5ARGS_MREMAP
518 
519 static volatile int nCountThreadsInSigUSR1 = 0;
520 static volatile int nWaitHelperThread = 0;
521 
522 /************************************************************************/
523 /*                   CPLVirtualMemSIGUSR1Handler()                      */
524 /************************************************************************/
525 
CPLVirtualMemSIGUSR1Handler(int,siginfo_t *,void *)526 static void CPLVirtualMemSIGUSR1Handler( int /* signum_unused */,
527                                          siginfo_t* /* the_info_unused */,
528                                          void* /* the_ctxt_unused */)
529 {
530 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
531     fprintfstderr("entering CPLVirtualMemSIGUSR1Handler %X\n", pthread_self());
532 #endif
533     // Rouault guesses this is only POSIX correct if it is implemented by an
534     // intrinsic.
535     CPLAtomicInc(&nCountThreadsInSigUSR1);
536     while( nWaitHelperThread )
537         // Not explicitly indicated as signal-async-safe, but hopefully ok.
538         usleep(1);
539     CPLAtomicDec(&nCountThreadsInSigUSR1);
540 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
541     fprintfstderr("leaving CPLVirtualMemSIGUSR1Handler %X\n", pthread_self());
542 #endif
543 }
544 #endif
545 
546 /************************************************************************/
547 /*                      CPLVirtualMemDeclareThread()                    */
548 /************************************************************************/
549 
CPLVirtualMemDeclareThread(CPLVirtualMem * ctxt)550 void CPLVirtualMemDeclareThread( CPLVirtualMem* ctxt )
551 {
552     if( ctxt->eType == VIRTUAL_MEM_TYPE_FILE_MEMORY_MAPPED )
553         return;
554 #ifndef HAVE_5ARGS_MREMAP
555     CPLVirtualMemVMA* ctxtVMA = reinterpret_cast<CPLVirtualMemVMA *>(ctxt);
556     IGNORE_OR_ASSERT_IN_DEBUG( !ctxt->bSingleThreadUsage );
557     CPLAcquireMutex(ctxtVMA->hMutexThreadArray, 1000.0);
558     ctxtVMA->pahThreads = static_cast<pthread_t *>(
559         CPLRealloc(ctxtVMA->pahThreads,
560                    (ctxtVMA->nThreads + 1) * sizeof(pthread_t)));
561     ctxtVMA->pahThreads[ctxtVMA->nThreads] = pthread_self();
562     ctxtVMA->nThreads++;
563 
564     CPLReleaseMutex(ctxtVMA->hMutexThreadArray);
565 #endif
566 }
567 
568 /************************************************************************/
569 /*                     CPLVirtualMemUnDeclareThread()                   */
570 /************************************************************************/
571 
CPLVirtualMemUnDeclareThread(CPLVirtualMem * ctxt)572 void CPLVirtualMemUnDeclareThread( CPLVirtualMem* ctxt )
573 {
574     if( ctxt->eType == VIRTUAL_MEM_TYPE_FILE_MEMORY_MAPPED )
575         return;
576 #ifndef HAVE_5ARGS_MREMAP
577     CPLVirtualMemVMA* ctxtVMA = reinterpret_cast<CPLVirtualMemVMA *>(ctxt);
578     pthread_t self = pthread_self();
579     IGNORE_OR_ASSERT_IN_DEBUG( !ctxt->bSingleThreadUsage );
580     CPLAcquireMutex(ctxtVMA->hMutexThreadArray, 1000.0);
581     for( int i = 0; i < ctxtVMA->nThreads; i++ )
582     {
583         if( ctxtVMA->pahThreads[i] == self )
584         {
585             if( i < ctxtVMA->nThreads - 1 )
586                 memmove(ctxtVMA->pahThreads + i + 1,
587                         ctxtVMA->pahThreads + i,
588                         (ctxtVMA->nThreads - 1 - i) * sizeof(pthread_t));
589             ctxtVMA->nThreads--;
590             break;
591         }
592     }
593 
594     CPLReleaseMutex(ctxtVMA->hMutexThreadArray);
595 #endif
596 }
597 
598 /************************************************************************/
599 /*                     CPLVirtualMemGetPageToFill()                     */
600 /************************************************************************/
601 
602 // Must be paired with CPLVirtualMemAddPage.
603 static
CPLVirtualMemGetPageToFill(CPLVirtualMemVMA * ctxt,void * start_page_addr)604 void* CPLVirtualMemGetPageToFill( CPLVirtualMemVMA* ctxt,
605                                   void* start_page_addr )
606 {
607     void* pPageToFill = nullptr;
608 
609     if( ctxt->sBase.bSingleThreadUsage )
610     {
611         pPageToFill = start_page_addr;
612         const int nRet =
613             mprotect( pPageToFill, ctxt->sBase.nPageSize,
614                       PROT_READ | PROT_WRITE );
615         IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
616     }
617     else
618     {
619 #ifndef HAVE_5ARGS_MREMAP
620         CPLAcquireMutex(ctxt->hMutexThreadArray, 1000.0);
621         if( ctxt->nThreads == 1 )
622         {
623             pPageToFill = start_page_addr;
624             const int nRet =
625                 mprotect( pPageToFill, ctxt->sBase.nPageSize,
626                           PROT_READ | PROT_WRITE );
627             IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
628         }
629         else
630 #endif
631         {
632             // Allocate a temporary writable page that the user
633             // callback can fill.
634             pPageToFill = mmap(nullptr, ctxt->sBase.nPageSize,
635                                 PROT_READ | PROT_WRITE,
636                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
637             IGNORE_OR_ASSERT_IN_DEBUG(pPageToFill != MAP_FAILED);
638         }
639     }
640     return pPageToFill;
641 }
642 
643 /************************************************************************/
644 /*                        CPLVirtualMemAddPage()                        */
645 /************************************************************************/
646 
647 static
CPLVirtualMemAddPage(CPLVirtualMemVMA * ctxt,void * target_addr,void * pPageToFill,OpType opType,pthread_t hRequesterThread)648 void CPLVirtualMemAddPage( CPLVirtualMemVMA* ctxt, void* target_addr,
649                            void* pPageToFill,
650                            OpType opType, pthread_t hRequesterThread )
651 {
652     const int iPage = static_cast<int>(
653        (static_cast<char*>(target_addr) - static_cast<char*>(ctxt->sBase.pData)) / ctxt->sBase.nPageSize);
654     if( ctxt->nLRUSize == ctxt->nCacheMaxSizeInPages )
655     {
656 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
657         fprintfstderr("uncaching page %d\n", iPage);
658 #endif
659         int nOldPage = ctxt->panLRUPageIndices[ctxt->iLRUStart];
660         void* addr = static_cast<char*>(ctxt->sBase.pData) + nOldPage * ctxt->sBase.nPageSize;
661         if( ctxt->sBase.eAccessMode == VIRTUALMEM_READWRITE &&
662             ctxt->pfnUnCachePage != nullptr &&
663             TEST_BIT(ctxt->pabitRWMappedPages, nOldPage) )
664         {
665             size_t nToBeEvicted = ctxt->sBase.nPageSize;
666             if( static_cast<char*>(addr) + nToBeEvicted >=
667                 static_cast<char*>(ctxt->sBase.pData) + ctxt->sBase.nSize )
668                 nToBeEvicted =
669                     static_cast<char*>(ctxt->sBase.pData) + ctxt->sBase.nSize - static_cast<char*>(addr);
670 
671             ctxt->pfnUnCachePage(reinterpret_cast<CPLVirtualMem*>(ctxt),
672                                  nOldPage * ctxt->sBase.nPageSize,
673                                  addr,
674                                  nToBeEvicted,
675                                  ctxt->sBase.pCbkUserData);
676         }
677         // "Free" the least recently used page.
678         UNSET_BIT(ctxt->pabitMappedPages, nOldPage);
679         UNSET_BIT(ctxt->pabitRWMappedPages, nOldPage);
680         // Free the old page.
681         // Not sure how portable it is to do that that way.
682         const void * const pRet = mmap(addr, ctxt->sBase.nPageSize, PROT_NONE,
683                     MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
684         IGNORE_OR_ASSERT_IN_DEBUG(pRet == addr);
685         // cppcheck-suppress memleak
686     }
687     ctxt->panLRUPageIndices[ctxt->iLRUStart] = iPage;
688     ctxt->iLRUStart = (ctxt->iLRUStart + 1) % ctxt->nCacheMaxSizeInPages;
689     if( ctxt->nLRUSize < ctxt->nCacheMaxSizeInPages )
690     {
691         ctxt->nLRUSize++;
692     }
693     SET_BIT(ctxt->pabitMappedPages, iPage);
694 
695     if( ctxt->sBase.bSingleThreadUsage )
696     {
697         if( opType == OP_STORE &&
698             ctxt->sBase.eAccessMode == VIRTUALMEM_READWRITE )
699         {
700             // Let (and mark) the page writable since the instruction that
701             // triggered the fault is a store.
702             SET_BIT(ctxt->pabitRWMappedPages, iPage);
703         }
704         else if( ctxt->sBase.eAccessMode != VIRTUALMEM_READONLY )
705         {
706             const int nRet =
707                 mprotect(target_addr, ctxt->sBase.nPageSize, PROT_READ);
708             IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
709         }
710     }
711     else
712     {
713 #ifdef HAVE_5ARGS_MREMAP
714         (void)hRequesterThread;
715 
716         if( opType == OP_STORE &&
717             ctxt->sBase.eAccessMode == VIRTUALMEM_READWRITE )
718         {
719             // Let (and mark) the page writable since the instruction that
720             // triggered the fault is a store.
721             SET_BIT(ctxt->pabitRWMappedPages, iPage);
722         }
723         else if( ctxt->sBase.eAccessMode != VIRTUALMEM_READONLY )
724         {
725             // Turn the temporary page read-only before remapping it.
726             // Only turn it writtable when a new fault occurs (and the
727             // mapping is writable).
728             const int nRet =
729                 mprotect(pPageToFill, ctxt->sBase.nPageSize, PROT_READ);
730             IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
731         }
732         /* Can now remap the pPageToFill onto the target page */
733         const void * const pRet =
734             mremap( pPageToFill, ctxt->sBase.nPageSize, ctxt->sBase.nPageSize,
735                     MREMAP_MAYMOVE | MREMAP_FIXED, target_addr );
736         IGNORE_OR_ASSERT_IN_DEBUG(pRet == target_addr);
737 
738 #else
739         if( ctxt->nThreads > 1 )
740         {
741             /* Pause threads that share this mem view */
742             CPLAtomicInc(&nWaitHelperThread);
743 
744             /* Install temporary SIGUSR1 signal handler */
745             struct sigaction act, oldact;
746             act.sa_sigaction = CPLVirtualMemSIGUSR1Handler;
747             sigemptyset (&act.sa_mask);
748             /* We don't want the sigsegv handler to be called when we are */
749             /* running the sigusr1 handler */
750             IGNORE_OR_ASSERT_IN_DEBUG(sigaddset(&act.sa_mask, SIGSEGV) == 0);
751             act.sa_flags = 0;
752             IGNORE_OR_ASSERT_IN_DEBUG(sigaction(SIGUSR1, &act, &oldact) == 0);
753 
754             for( int i = 0; i < ctxt->nThreads; i++)
755             {
756                 if( ctxt->pahThreads[i] != hRequesterThread )
757                 {
758 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
759                     fprintfstderr("stopping thread %X\n", ctxt->pahThreads[i]);
760 #endif
761                     IGNORE_OR_ASSERT_IN_DEBUG(
762                         pthread_kill( ctxt->pahThreads[i], SIGUSR1 ) == 0);
763                 }
764             }
765 
766             /* Wait that they are all paused */
767             while( nCountThreadsInSigUSR1 != ctxt->nThreads-1 )
768                 usleep(1);
769 
770             /* Restore old SIGUSR1 signal handler */
771             IGNORE_OR_ASSERT_IN_DEBUG(sigaction(SIGUSR1, &oldact, nullptr) == 0);
772 
773             int nRet = mprotect( target_addr, ctxt->sBase.nPageSize,
774                                  PROT_READ | PROT_WRITE );
775             IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
776 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
777             fprintfstderr("memcpying page %d\n", iPage);
778 #endif
779             memcpy(target_addr, pPageToFill, ctxt->sBase.nPageSize);
780 
781             if( opType == OP_STORE &&
782                 ctxt->sBase.eAccessMode == VIRTUALMEM_READWRITE )
783             {
784                 // Let (and mark) the page writable since the instruction that
785                 // triggered the fault is a store.
786                 SET_BIT(ctxt->pabitRWMappedPages, iPage);
787             }
788             else
789             {
790                 nRet = mprotect(target_addr, ctxt->sBase.nPageSize, PROT_READ);
791                 IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
792             }
793 
794             /* Wake up sleeping threads */
795             CPLAtomicDec(&nWaitHelperThread);
796             while( nCountThreadsInSigUSR1 != 0 )
797                 usleep(1);
798 
799             IGNORE_OR_ASSERT_IN_DEBUG(
800                 munmap(pPageToFill, ctxt->sBase.nPageSize) == 0);
801         }
802         else
803         {
804             if( opType == OP_STORE &&
805                 ctxt->sBase.eAccessMode == VIRTUALMEM_READWRITE )
806             {
807                 // Let (and mark) the page writable since the instruction that
808                 // triggered the fault is a store.
809                 SET_BIT(ctxt->pabitRWMappedPages, iPage);
810             }
811             else if( ctxt->sBase.eAccessMode != VIRTUALMEM_READONLY )
812             {
813                 const int nRet2 =
814                     mprotect(target_addr, ctxt->sBase.nPageSize, PROT_READ);
815                 IGNORE_OR_ASSERT_IN_DEBUG(nRet2 == 0);
816             }
817         }
818 
819         CPLReleaseMutex(ctxt->hMutexThreadArray);
820 #endif
821     }
822     // cppcheck-suppress memleak
823 }
824 
825 /************************************************************************/
826 /*                    CPLVirtualMemGetOpTypeImm()                       */
827 /************************************************************************/
828 
829 #if defined(__x86_64__) || defined(__i386__)
CPLVirtualMemGetOpTypeImm(GByte val_rip)830 static OpType CPLVirtualMemGetOpTypeImm(GByte val_rip)
831 {
832     OpType opType = OP_UNKNOWN;
833     if( (/*val_rip >= 0x00 &&*/ val_rip <= 0x07) ||
834         (val_rip >= 0x40 && val_rip <= 0x47) )  // add $, (X)
835         opType = OP_STORE;
836     if( (val_rip >= 0x08 && val_rip <= 0x0f) ||
837         (val_rip >= 0x48 && val_rip <= 0x4f) )  // or $, (X)
838         opType = OP_STORE;
839     if( (val_rip >= 0x20 && val_rip <= 0x27) ||
840         (val_rip >= 0x60 && val_rip <= 0x67) )  // and $, (X)
841         opType = OP_STORE;
842     if( (val_rip >= 0x28 && val_rip <= 0x2f) ||
843         (val_rip >= 0x68 && val_rip <= 0x6f) )  // sub $, (X)
844         opType = OP_STORE;
845     if( (val_rip >= 0x30 && val_rip <= 0x37) ||
846         (val_rip >= 0x70 && val_rip <= 0x77) )  // xor $, (X)
847         opType = OP_STORE;
848     if( (val_rip >= 0x38 && val_rip <= 0x3f) ||
849         (val_rip >= 0x78 && val_rip <= 0x7f) )  // cmp $, (X)
850         opType = OP_LOAD;
851     return opType;
852 }
853 #endif
854 
855 /************************************************************************/
856 /*                      CPLVirtualMemGetOpType()                        */
857 /************************************************************************/
858 
859 // Don't need exhaustivity. It is just a hint for an optimization:
860 // If the fault occurs on a store operation, then we can directly put
861 // the page in writable mode if the mapping allows it.
862 
863 #if defined(__x86_64__) || defined(__i386__)
CPLVirtualMemGetOpType(const GByte * rip)864 static OpType CPLVirtualMemGetOpType( const GByte* rip )
865 {
866     OpType opType = OP_UNKNOWN;
867 
868 #if defined(__x86_64__) || defined(__i386__)
869     switch( rip[0] )
870     {
871         case 0x00: /* add %al,(%rax) */
872         case 0x01: /* add %eax,(%rax) */
873             opType = OP_STORE;
874             break;
875         case 0x02: /* add (%rax),%al */
876         case 0x03: /* add (%rax),%eax */
877             opType = OP_LOAD;
878             break;
879 
880         case 0x08: /* or %al,(%rax) */
881         case 0x09: /* or %eax,(%rax) */
882             opType = OP_STORE;
883             break;
884         case 0x0a: /* or (%rax),%al */
885         case 0x0b: /* or (%rax),%eax */
886             opType = OP_LOAD;
887             break;
888 
889         case 0x0f:
890         {
891             switch( rip[1] )
892             {
893                 case 0xb6: /* movzbl (%rax),%eax */
894                 case 0xb7: /* movzwl (%rax),%eax */
895                 case 0xbe: /* movsbl (%rax),%eax */
896                 case 0xbf: /* movswl (%rax),%eax */
897                     opType = OP_LOAD;
898                     break;
899                 default:
900                     break;
901             }
902             break;
903         }
904         case 0xc6: /* movb $,(%rax) */
905         case 0xc7: /* movl $,(%rax) */
906             opType = OP_STORE;
907             break;
908 
909         case 0x20: /* and %al,(%rax) */
910         case 0x21: /* and %eax,(%rax) */
911             opType = OP_STORE;
912             break;
913         case 0x22: /* and (%rax),%al */
914         case 0x23: /* and (%rax),%eax */
915             opType = OP_LOAD;
916             break;
917 
918         case 0x28: /* sub %al,(%rax) */
919         case 0x29: /* sub %eax,(%rax) */
920             opType = OP_STORE;
921             break;
922         case 0x2a: /* sub (%rax),%al */
923         case 0x2b: /* sub (%rax),%eax */
924             opType = OP_LOAD;
925             break;
926 
927         case 0x30: /* xor %al,(%rax) */
928         case 0x31: /* xor %eax,(%rax) */
929             opType = OP_STORE;
930             break;
931         case 0x32: /* xor (%rax),%al */
932         case 0x33: /* xor (%rax),%eax */
933             opType = OP_LOAD;
934             break;
935 
936         case 0x38: /* cmp %al,(%rax) */
937         case 0x39: /* cmp %eax,(%rax) */
938             opType = OP_LOAD;
939             break;
940         case 0x40:
941         {
942             switch( rip[1] )
943             {
944                 case 0x00: /* add %spl,(%rax) */
945                     opType = OP_STORE;
946                     break;
947                 case 0x02: /* add (%rax),%spl */
948                     opType = OP_LOAD;
949                     break;
950                 case 0x28: /* sub %spl,(%rax) */
951                     opType = OP_STORE;
952                     break;
953                 case 0x2a: /* sub (%rax),%spl */
954                     opType = OP_LOAD;
955                     break;
956                 case 0x3a: /* cmp (%rax),%spl */
957                     opType = OP_LOAD;
958                     break;
959                 case 0x8a: /* mov (%rax),%spl */
960                     opType = OP_LOAD;
961                     break;
962                 default:
963                     break;
964             }
965             break;
966         }
967 #if defined(__x86_64__)
968         case 0x41: /* reg=%al/%eax, X=%r8 */
969         case 0x42: /* reg=%al/%eax, X=%rax,%r8,1 */
970         case 0x43: /* reg=%al/%eax, X=%r8,%r8,1 */
971         case 0x44: /* reg=%r8b/%r8w, X = %rax */
972         case 0x45: /* reg=%r8b/%r8w, X = %r8 */
973         case 0x46: /* reg=%r8b/%r8w, X = %rax,%r8,1 */
974         case 0x47: /* reg=%r8b/%r8w, X = %r8,%r8,1 */
975         {
976             switch( rip[1] )
977             {
978                 case 0x00: /* add regb,(X) */
979                 case 0x01: /* add regl,(X) */
980                     opType = OP_STORE;
981                     break;
982                 case 0x02: /* add (X),regb */
983                 case 0x03: /* add (X),regl */
984                     opType = OP_LOAD;
985                     break;
986                 case 0x0f:
987                 {
988                     switch( rip[2] )
989                     {
990                         case 0xb6: /* movzbl (X),regl */
991                         case 0xb7: /* movzwl (X),regl */
992                         case 0xbe: /* movsbl (X),regl */
993                         case 0xbf: /* movswl (X),regl */
994                             opType = OP_LOAD;
995                             break;
996                         default:
997                             break;
998                     }
999                     break;
1000                 }
1001                 case 0x28: /* sub regb,(X) */
1002                 case 0x29: /* sub regl,(X) */
1003                     opType = OP_STORE;
1004                     break;
1005                 case 0x2a: /* sub (X),regb */
1006                 case 0x2b: /* sub (X),regl */
1007                     opType = OP_LOAD;
1008                     break;
1009                 case 0x38: /* cmp regb,(X) */
1010                 case 0x39: /* cmp regl,(X) */
1011                     opType = OP_LOAD;
1012                     break;
1013                 case 0x80: /* cmpb,... $,(X) */
1014                 case 0x81: /* cmpl,... $,(X) */
1015                 case 0x83: /* cmpl,... $,(X) */
1016                     opType = CPLVirtualMemGetOpTypeImm(rip[2]);
1017                     break;
1018                 case 0x88: /* mov regb,(X) */
1019                 case 0x89: /* mov regl,(X) */
1020                     opType = OP_STORE;
1021                     break;
1022                 case 0x8a: /* mov (X),regb */
1023                 case 0x8b: /* mov (X),regl */
1024                     opType = OP_LOAD;
1025                     break;
1026                 case 0xc6: /* movb $,(X) */
1027                 case 0xc7: /* movl $,(X) */
1028                     opType = OP_STORE;
1029                     break;
1030                 case 0x84: /* test %al,(X) */
1031                     opType = OP_LOAD;
1032                     break;
1033                 case 0xf6: /* testb $,(X) or notb (X) */
1034                 case 0xf7: /* testl $,(X) or notl (X)*/
1035                 {
1036                     if( rip[2] < 0x10 ) /* test (X) */
1037                         opType = OP_LOAD;
1038                     else /* not (X) */
1039                         opType = OP_STORE;
1040                     break;
1041                 }
1042                 default:
1043                     break;
1044             }
1045             break;
1046         }
1047         case 0x48: /* reg=%rax, X=%rax or %rax,%rax,1 */
1048         case 0x49: /* reg=%rax, X=%r8 or %r8,%rax,1 */
1049         case 0x4a: /* reg=%rax, X=%rax,%r8,1 */
1050         case 0x4b: /* reg=%rax, X=%r8,%r8,1 */
1051         case 0x4c: /* reg=%r8, X=%rax or %rax,%rax,1 */
1052         case 0x4d: /* reg=%r8, X=%r8 or %r8,%rax,1 */
1053         case 0x4e: /* reg=%r8, X=%rax,%r8,1 */
1054         case 0x4f: /* reg=%r8, X=%r8,%r8,1 */
1055         {
1056             switch( rip[1] )
1057             {
1058                 case 0x01: /* add reg,(X) */
1059                     opType = OP_STORE;
1060                     break;
1061                 case 0x03: /* add (X),reg */
1062                     opType = OP_LOAD;
1063                     break;
1064 
1065                 case 0x09: /* or reg,(%rax) */
1066                     opType = OP_STORE;
1067                     break;
1068                 case 0x0b: /* or (%rax),reg */
1069                     opType = OP_LOAD;
1070                     break;
1071                 case 0x0f:
1072                 {
1073                     switch( rip[2] )
1074                     {
1075                         case 0xc3: /* movnti reg,(X) */
1076                             opType = OP_STORE;
1077                             break;
1078                         default:
1079                             break;
1080                     }
1081                     break;
1082                 }
1083                 case 0x21: /* and reg,(X) */
1084                     opType = OP_STORE;
1085                     break;
1086                 case 0x23: /* and (X),reg */
1087                     opType = OP_LOAD;
1088                     break;
1089 
1090                 case 0x29: /* sub reg,(X) */
1091                     opType = OP_STORE;
1092                     break;
1093                 case 0x2b: /* sub (X),reg */
1094                     opType = OP_LOAD;
1095                     break;
1096 
1097                 case 0x31: /* xor reg,(X) */
1098                     opType = OP_STORE;
1099                     break;
1100                 case 0x33: /* xor (X),reg */
1101                     opType = OP_LOAD;
1102                     break;
1103 
1104                 case 0x39: /* cmp reg,(X) */
1105                     opType = OP_LOAD;
1106                     break;
1107 
1108                 case 0x81:
1109                 case 0x83:
1110                     opType = CPLVirtualMemGetOpTypeImm(rip[2]);
1111                     break;
1112 
1113                 case 0x85: /* test reg,(X) */
1114                     opType = OP_LOAD;
1115                     break;
1116 
1117                 case 0x89: /* mov reg,(X) */
1118                     opType = OP_STORE;
1119                     break;
1120                 case 0x8b: /* mov (X),reg */
1121                     opType = OP_LOAD;
1122                     break;
1123 
1124                 case 0xc7: /* movq $,(X) */
1125                     opType = OP_STORE;
1126                     break;
1127 
1128                 case 0xf7:
1129                 {
1130                     if( rip[2] < 0x10 ) /* testq $,(X) */
1131                         opType = OP_LOAD;
1132                     else /* notq (X) */
1133                         opType = OP_STORE;
1134                     break;
1135                 }
1136                 default:
1137                     break;
1138             }
1139             break;
1140         }
1141 #endif
1142         case 0x66:
1143         {
1144             switch( rip[1] )
1145             {
1146                 case 0x01: /* add %ax,(%rax) */
1147                     opType = OP_STORE;
1148                     break;
1149                 case 0x03: /* add (%rax),%ax */
1150                     opType = OP_LOAD;
1151                     break;
1152                 case 0x0f:
1153                 {
1154                     switch( rip[2] )
1155                     {
1156                         case 0x2e: /* ucomisd (%rax),%xmm0 */
1157                             opType = OP_LOAD;
1158                             break;
1159                         case 0x6f: /* movdqa (%rax),%xmm0 */
1160                             opType = OP_LOAD;
1161                             break;
1162                         case 0x7f: /* movdqa %xmm0,(%rax) */
1163                             opType = OP_STORE;
1164                             break;
1165                         case 0xb6: /* movzbw (%rax),%ax */
1166                             opType = OP_LOAD;
1167                             break;
1168                         case 0xe7: /* movntdq %xmm0,(%rax) */
1169                             opType = OP_STORE;
1170                             break;
1171                         default:
1172                             break;
1173                     }
1174                     break;
1175                 }
1176                 case 0x29: /* sub %ax,(%rax) */
1177                     opType = OP_STORE;
1178                     break;
1179                 case 0x2b: /* sub (%rax),%ax */
1180                     opType = OP_LOAD;
1181                     break;
1182                 case 0x39: /* cmp %ax,(%rax) */
1183                     opType = OP_LOAD;
1184                     break;
1185 #if defined(__x86_64__)
1186                 case 0x41: /* reg = %ax (or %xmm0), X = %r8 */
1187                 case 0x42: /* reg = %ax (or %xmm0), X = %rax,%r8,1 */
1188                 case 0x43: /* reg = %ax (or %xmm0), X = %r8,%r8,1 */
1189                 case 0x44: /* reg = %r8w (or %xmm8), X = %rax */
1190                 case 0x45: /* reg = %r8w (or %xmm8), X = %r8 */
1191                 case 0x46: /* reg = %r8w (or %xmm8), X = %rax,%r8,1 */
1192                 case 0x47: /* reg = %r8w (or %xmm8), X = %r8,%r8,1 */
1193                 {
1194                     switch( rip[2] )
1195                     {
1196                         case 0x01: /* add reg,(X) */
1197                             opType = OP_STORE;
1198                             break;
1199                         case 0x03: /* add (X),reg */
1200                             opType = OP_LOAD;
1201                             break;
1202                         case 0x0f:
1203                         {
1204                             switch( rip[3] )
1205                             {
1206                                 case 0x2e: /* ucomisd (X),reg */
1207                                     opType = OP_LOAD;
1208                                     break;
1209                                 case 0x6f: /* movdqa (X),reg */
1210                                     opType = OP_LOAD;
1211                                     break;
1212                                 case 0x7f: /* movdqa reg,(X) */
1213                                     opType = OP_STORE;
1214                                     break;
1215                                 case 0xb6: /* movzbw (X),reg */
1216                                     opType = OP_LOAD;
1217                                     break;
1218                                 case 0xe7: /* movntdq reg,(X) */
1219                                     opType = OP_STORE;
1220                                     break;
1221                                 default:
1222                                     break;
1223                             }
1224                             break;
1225                         }
1226                         case 0x29: /* sub reg,(X) */
1227                             opType = OP_STORE;
1228                             break;
1229                         case 0x2b: /* sub (X),reg */
1230                             opType = OP_LOAD;
1231                             break;
1232                         case 0x39: /* cmp reg,(X) */
1233                             opType = OP_LOAD;
1234                             break;
1235                         case 0x81: /* cmpw,... $,(X) */
1236                         case 0x83: /* cmpw,... $,(X) */
1237                             opType = CPLVirtualMemGetOpTypeImm(rip[3]);
1238                             break;
1239                         case 0x85: /* test reg,(X) */
1240                             opType = OP_LOAD;
1241                             break;
1242                         case 0x89: /* mov reg,(X) */
1243                             opType = OP_STORE;
1244                             break;
1245                         case 0x8b: /* mov (X),reg */
1246                             opType = OP_LOAD;
1247                             break;
1248                         case 0xc7: /* movw $,(X) */
1249                             opType = OP_STORE;
1250                             break;
1251                         case 0xf7:
1252                         {
1253                             if( rip[3] < 0x10 ) /* testw $,(X) */
1254                                 opType = OP_LOAD;
1255                             else /* notw (X) */
1256                                 opType = OP_STORE;
1257                             break;
1258                         }
1259                         default:
1260                             break;
1261                     }
1262                     break;
1263                 }
1264 #endif
1265                 case 0x81: /* cmpw,... $,(%rax) */
1266                 case 0x83: /* cmpw,... $,(%rax) */
1267                     opType = CPLVirtualMemGetOpTypeImm(rip[2]);
1268                     break;
1269 
1270                 case 0x85: /* test %ax,(%rax) */
1271                     opType = OP_LOAD;
1272                     break;
1273                 case 0x89: /* mov %ax,(%rax) */
1274                     opType = OP_STORE;
1275                     break;
1276                 case 0x8b: /* mov (%rax),%ax */
1277                     opType = OP_LOAD;
1278                     break;
1279                 case 0xc7: /* movw $,(%rax) */
1280                     opType = OP_STORE;
1281                     break;
1282                 case 0xf3:
1283                 {
1284                     switch( rip[2] )
1285                     {
1286                         case 0xa5: /* rep movsw %ds:(%rsi),%es:(%rdi) */
1287                             opType = OP_MOVS_RSI_RDI;
1288                             break;
1289                         default:
1290                             break;
1291                     }
1292                     break;
1293                 }
1294                 case 0xf7: /* testw $,(%rax) or notw (%rax) */
1295                 {
1296                     if( rip[2] < 0x10 ) /* test */
1297                         opType = OP_LOAD;
1298                     else /* not */
1299                         opType = OP_STORE;
1300                     break;
1301                 }
1302                 default:
1303                     break;
1304             }
1305             break;
1306         }
1307         case 0x80: /* cmpb,... $,(%rax) */
1308         case 0x81: /* cmpl,... $,(%rax) */
1309         case 0x83: /* cmpl,... $,(%rax) */
1310             opType = CPLVirtualMemGetOpTypeImm(rip[1]);
1311             break;
1312         case 0x84: /* test %al,(%rax) */
1313         case 0x85: /* test %eax,(%rax) */
1314             opType = OP_LOAD;
1315             break;
1316         case 0x88: /* mov %al,(%rax) */
1317             opType = OP_STORE;
1318             break;
1319         case 0x89: /* mov %eax,(%rax) */
1320             opType = OP_STORE;
1321             break;
1322         case 0x8a: /* mov (%rax),%al */
1323             opType = OP_LOAD;
1324             break;
1325         case 0x8b: /* mov (%rax),%eax */
1326             opType = OP_LOAD;
1327             break;
1328         case 0xd9: /* 387 float */
1329         {
1330             if( rip[1] < 0x08 ) /* flds (%eax) */
1331                 opType = OP_LOAD;
1332             else if( rip[1] >= 0x18 && rip[1] <= 0x20 ) /* fstps (%eax) */
1333                 opType = OP_STORE;
1334             break;
1335         }
1336         case 0xf2: /* SSE 2 */
1337         {
1338             switch( rip[1] )
1339             {
1340                 case 0x0f:
1341                 {
1342                     switch( rip[2] )
1343                     {
1344                         case 0x10: /* movsd (%rax),%xmm0 */
1345                             opType = OP_LOAD;
1346                             break;
1347                         case 0x11: /* movsd %xmm0,(%rax) */
1348                             opType = OP_STORE;
1349                             break;
1350                         case 0x58: /* addsd (%rax),%xmm0 */
1351                             opType = OP_LOAD;
1352                             break;
1353                         case 0x59: /* mulsd (%rax),%xmm0 */
1354                             opType = OP_LOAD;
1355                             break;
1356                         case 0x5c: /* subsd (%rax),%xmm0 */
1357                             opType = OP_LOAD;
1358                             break;
1359                         case 0x5e: /* divsd (%rax),%xmm0 */
1360                             opType = OP_LOAD;
1361                             break;
1362                         default:
1363                             break;
1364                     }
1365                     break;
1366                 }
1367 #if defined(__x86_64__)
1368                 case 0x41: /* reg=%xmm0, X=%r8 or %r8,%rax,1 */
1369                 case 0x42: /* reg=%xmm0, X=%rax,%r8,1 */
1370                 case 0x43: /* reg=%xmm0, X=%r8,%r8,1 */
1371                 case 0x44: /* reg=%xmm8, X=%rax or %rax,%rax,1*/
1372                 case 0x45: /* reg=%xmm8, X=%r8 or %r8,%rax,1 */
1373                 case 0x46: /* reg=%xmm8, X=%rax,%r8,1 */
1374                 case 0x47: /* reg=%xmm8, X=%r8,%r8,1 */
1375                 {
1376                     switch( rip[2] )
1377                     {
1378                         case 0x0f:
1379                         {
1380                             switch( rip[3] )
1381                             {
1382                                 case 0x10: /* movsd (X),reg */
1383                                     opType = OP_LOAD;
1384                                     break;
1385                                 case 0x11: /* movsd reg,(X) */
1386                                     opType = OP_STORE;
1387                                     break;
1388                                 case 0x58: /* addsd (X),reg */
1389                                     opType = OP_LOAD;
1390                                     break;
1391                                  case 0x59: /* mulsd (X),reg */
1392                                     opType = OP_LOAD;
1393                                     break;
1394                                 case 0x5c: /* subsd (X),reg */
1395                                     opType = OP_LOAD;
1396                                     break;
1397                                 case 0x5e: /* divsd (X),reg */
1398                                     opType = OP_LOAD;
1399                                     break;
1400                                 default:
1401                                     break;
1402                             }
1403                             break;
1404                         }
1405                         default:
1406                             break;
1407                     }
1408                     break;
1409                 }
1410 #endif
1411                 default:
1412                     break;
1413             }
1414             break;
1415         }
1416         case 0xf3:
1417         {
1418             switch( rip[1] )
1419             {
1420                 case 0x0f: /* SSE 2 */
1421                 {
1422                     switch( rip[2] )
1423                     {
1424                         case 0x10: /* movss (%rax),%xmm0 */
1425                             opType = OP_LOAD;
1426                             break;
1427                         case 0x11: /* movss %xmm0,(%rax) */
1428                             opType = OP_STORE;
1429                             break;
1430                         case 0x6f: /* movdqu (%rax),%xmm0 */
1431                             opType = OP_LOAD;
1432                             break;
1433                         case 0x7f: /* movdqu %xmm0,(%rax) */
1434                             opType = OP_STORE;
1435                             break;
1436                         default:
1437                             break;
1438                     }
1439                     break;
1440                 }
1441 #if defined(__x86_64__)
1442                 case 0x41: /* reg=%xmm0, X=%r8 */
1443                 case 0x42: /* reg=%xmm0, X=%rax,%r8,1 */
1444                 case 0x43: /* reg=%xmm0, X=%r8,%r8,1 */
1445                 case 0x44: /* reg=%xmm8, X = %rax */
1446                 case 0x45: /* reg=%xmm8, X = %r8 */
1447                 case 0x46: /* reg=%xmm8, X = %rax,%r8,1 */
1448                 case 0x47: /* reg=%xmm8, X = %r8,%r8,1 */
1449                 {
1450                     switch( rip[2] )
1451                     {
1452                         case 0x0f: /* SSE 2 */
1453                         {
1454                             switch( rip[3] )
1455                             {
1456                                 case 0x10: /* movss (X),reg */
1457                                     opType = OP_LOAD;
1458                                     break;
1459                                 case 0x11: /* movss reg,(X) */
1460                                     opType = OP_STORE;
1461                                     break;
1462                                 case 0x6f: /* movdqu (X),reg */
1463                                     opType = OP_LOAD;
1464                                     break;
1465                                 case 0x7f: /* movdqu reg,(X) */
1466                                     opType = OP_STORE;
1467                                     break;
1468                                 default:
1469                                     break;
1470                             }
1471                             break;
1472                         }
1473                         default:
1474                             break;
1475                     }
1476                     break;
1477                 }
1478                 case 0x48:
1479                 {
1480                     switch( rip[2] )
1481                     {
1482                         case 0xa5: /* rep movsq %ds:(%rsi),%es:(%rdi) */
1483                             opType = OP_MOVS_RSI_RDI;
1484                             break;
1485                         default:
1486                             break;
1487                     }
1488                     break;
1489                 }
1490 #endif
1491                 case 0xa4: /* rep movsb %ds:(%rsi),%es:(%rdi) */
1492                 case 0xa5: /* rep movsl %ds:(%rsi),%es:(%rdi) */
1493                     opType = OP_MOVS_RSI_RDI;
1494                     break;
1495                 case 0xa6: /* repz cmpsb %es:(%rdi),%ds:(%rsi) */
1496                     opType = OP_LOAD;
1497                     break;
1498                 default:
1499                     break;
1500             }
1501             break;
1502         }
1503         case 0xf6: /* testb $,(%rax) or notb (%rax) */
1504         case 0xf7: /* testl $,(%rax) or notl (%rax) */
1505         {
1506             if( rip[1] < 0x10 ) /* test */
1507                 opType = OP_LOAD;
1508             else /* not */
1509                 opType = OP_STORE;
1510             break;
1511         }
1512         default:
1513             break;
1514     }
1515 #endif
1516     return opType;
1517 }
1518 #endif
1519 
1520 /************************************************************************/
1521 /*                    CPLVirtualMemManagerPinAddrInternal()             */
1522 /************************************************************************/
1523 
1524 static int
CPLVirtualMemManagerPinAddrInternal(CPLVirtualMemMsgToWorkerThread * msg)1525 CPLVirtualMemManagerPinAddrInternal( CPLVirtualMemMsgToWorkerThread* msg )
1526 {
1527     char wait_ready = '\0';
1528     char response_buf[4] = {};
1529 
1530     // Wait for the helper thread to be ready to process another request.
1531     while( true )
1532     {
1533         const int ret =
1534             static_cast<int>(read( pVirtualMemManager->pipefd_wait_thread[0],
1535                                    &wait_ready, 1 ));
1536         if( ret < 0 && errno == EINTR )
1537         {
1538             // NOP
1539         }
1540         else
1541         {
1542             IGNORE_OR_ASSERT_IN_DEBUG(ret == 1);
1543             break;
1544         }
1545     }
1546 
1547     // Pass the address that caused the fault to the helper thread.
1548     const ssize_t nRetWrite =
1549         write(pVirtualMemManager->pipefd_to_thread[1], msg, sizeof(*msg));
1550     IGNORE_OR_ASSERT_IN_DEBUG(nRetWrite == sizeof(*msg));
1551 
1552     // Wait that the helper thread has fixed the fault.
1553     while( true )
1554     {
1555         const int ret =
1556             static_cast<int>(read(pVirtualMemManager->pipefd_from_thread[0],
1557                                   response_buf, 4));
1558         if( ret < 0 && errno == EINTR )
1559         {
1560             // NOP
1561         }
1562         else
1563         {
1564             IGNORE_OR_ASSERT_IN_DEBUG(ret == 4);
1565             break;
1566         }
1567     }
1568 
1569     // In case the helper thread did not recognize the address as being
1570     // one that it should take care of, just rely on the previous SIGSEGV
1571     // handler (with might abort the process).
1572     return( memcmp(response_buf, MAPPING_FOUND, 4) == 0 );
1573 }
1574 
1575 /************************************************************************/
1576 /*                      CPLVirtualMemPin()                              */
1577 /************************************************************************/
1578 
CPLVirtualMemPin(CPLVirtualMem * ctxt,void * pAddr,size_t nSize,int bWriteOp)1579 void CPLVirtualMemPin( CPLVirtualMem* ctxt,
1580                        void* pAddr, size_t nSize, int bWriteOp )
1581 {
1582     if( ctxt->eType == VIRTUAL_MEM_TYPE_FILE_MEMORY_MAPPED )
1583         return;
1584 
1585     CPLVirtualMemMsgToWorkerThread msg;
1586 
1587     memset(&msg, 0, sizeof(msg));
1588     msg.hRequesterThread = pthread_self();
1589     msg.opType = (bWriteOp) ? OP_STORE : OP_LOAD;
1590 
1591     char* pBase = reinterpret_cast<char*>(ALIGN_DOWN(pAddr, ctxt->nPageSize));
1592     const size_t n =
1593         (reinterpret_cast<char*>(pAddr) - pBase + nSize + ctxt->nPageSize - 1) / ctxt->nPageSize;
1594     for( size_t i = 0; i < n; i++ )
1595     {
1596         msg.pFaultAddr = reinterpret_cast<char*>(pBase) + i * ctxt->nPageSize;
1597         CPLVirtualMemManagerPinAddrInternal(&msg);
1598     }
1599 }
1600 
1601 /************************************************************************/
1602 /*                   CPLVirtualMemManagerSIGSEGVHandler()               */
1603 /************************************************************************/
1604 
1605 #if defined(__x86_64__)
1606 #define REG_IP      REG_RIP
1607 #define REG_SI      REG_RSI
1608 #define REG_DI      REG_RDI
1609 #elif defined(__i386__)
1610 #define REG_IP      REG_EIP
1611 #define REG_SI      REG_ESI
1612 #define REG_DI      REG_EDI
1613 #endif
1614 
1615 // Must take care of only using "asynchronous-signal-safe" functions in a signal
1616 // handler pthread_self(), read() and write() are such.  See:
1617 // https://www.securecoding.cert.org/confluence/display/seccode/SIG30-C.+Call+only+asynchronous-safe+functions+within+signal+handlers
CPLVirtualMemManagerSIGSEGVHandler(int the_signal,siginfo_t * the_info,void * the_ctxt)1618 static void CPLVirtualMemManagerSIGSEGVHandler( int the_signal,
1619                                                 siginfo_t* the_info,
1620                                                 void* the_ctxt )
1621 {
1622     CPLVirtualMemMsgToWorkerThread msg;
1623 
1624     memset(&msg, 0, sizeof(msg));
1625     msg.pFaultAddr = the_info->si_addr;
1626     msg.hRequesterThread = pthread_self();
1627     msg.opType = OP_UNKNOWN;
1628 
1629 #if defined(__x86_64__) || defined(__i386__)
1630     ucontext_t* the_ucontext = static_cast<ucontext_t *>(the_ctxt);
1631     const GByte* rip = reinterpret_cast<const GByte*>(the_ucontext->uc_mcontext.gregs[REG_IP]);
1632     msg.opType = CPLVirtualMemGetOpType(rip);
1633 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
1634     fprintfstderr("at rip %p, bytes: %02x %02x %02x %02x\n",
1635                   rip, rip[0], rip[1], rip[2], rip[3]);
1636 #endif
1637     if( msg.opType == OP_MOVS_RSI_RDI )
1638     {
1639         void* rsi = reinterpret_cast<void*>(the_ucontext->uc_mcontext.gregs[REG_SI]);
1640         void* rdi = reinterpret_cast<void*>(the_ucontext->uc_mcontext.gregs[REG_DI]);
1641 
1642 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
1643         fprintfstderr("fault=%p rsi=%p rsi=%p\n", msg.pFaultAddr, rsi, rdi);
1644 #endif
1645         if( msg.pFaultAddr == rsi )
1646         {
1647 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
1648             fprintfstderr("load\n");
1649 #endif
1650             msg.opType = OP_LOAD;
1651         }
1652         else if( msg.pFaultAddr == rdi )
1653         {
1654 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
1655             fprintfstderr("store\n");
1656 #endif
1657             msg.opType = OP_STORE;
1658         }
1659     }
1660 #ifdef DEBUG_VIRTUALMEM
1661     else if( msg.opType == OP_UNKNOWN )
1662     {
1663         static bool bHasWarned = false;
1664         if( !bHasWarned )
1665         {
1666             bHasWarned = true;
1667             fprintfstderr("at rip %p, unknown bytes: %02x %02x %02x %02x\n",
1668                           rip, rip[0], rip[1], rip[2], rip[3]);
1669         }
1670     }
1671 #endif
1672 #endif
1673 
1674 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
1675     fprintfstderr("entering handler for %X (addr=%p)\n",
1676                   pthread_self(), the_info->si_addr);
1677 #endif
1678 
1679     if( the_info->si_code != SEGV_ACCERR )
1680     {
1681         pVirtualMemManager->oldact.sa_sigaction(the_signal, the_info, the_ctxt);
1682         return;
1683     }
1684 
1685     if( !CPLVirtualMemManagerPinAddrInternal(&msg) )
1686     {
1687         // In case the helper thread did not recognize the address as being
1688         // one that it should take care of, just rely on the previous SIGSEGV
1689         // handler (with might abort the process).
1690         pVirtualMemManager->oldact.sa_sigaction(the_signal, the_info, the_ctxt);
1691     }
1692 
1693 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
1694     fprintfstderr("leaving handler for %X (addr=%p)\n",
1695                   pthread_self(), the_info->si_addr);
1696 #endif
1697 }
1698 
1699 /************************************************************************/
1700 /*                      CPLVirtualMemManagerThread()                    */
1701 /************************************************************************/
1702 
CPLVirtualMemManagerThread(void *)1703 static void CPLVirtualMemManagerThread( void* /* unused_param */ )
1704 {
1705     while( true )
1706     {
1707         char i_m_ready = 1;
1708         CPLVirtualMemVMA* ctxt = nullptr;
1709         bool bMappingFound = false;
1710         CPLVirtualMemMsgToWorkerThread msg;
1711 
1712         // Signal that we are ready to process a new request.
1713         ssize_t nRetWrite =
1714             write(pVirtualMemManager->pipefd_wait_thread[1], &i_m_ready, 1);
1715         IGNORE_OR_ASSERT_IN_DEBUG(nRetWrite == 1);
1716 
1717         // Fetch the address to process.
1718         const ssize_t nRetRead =
1719             read(pVirtualMemManager->pipefd_to_thread[0], &msg,
1720                  sizeof(msg));
1721         IGNORE_OR_ASSERT_IN_DEBUG(nRetRead == sizeof(msg));
1722 
1723         // If CPLVirtualMemManagerTerminate() is called, it will use BYEBYE_ADDR
1724         // as a means to ask for our termination.
1725         if( msg.pFaultAddr == BYEBYE_ADDR )
1726             break;
1727 
1728         /* Lookup for a mapping that contains addr */
1729         CPLAcquireMutex(hVirtualMemManagerMutex, 1000.0);
1730         for( int i=0; i < pVirtualMemManager->nVirtualMemCount; i++ )
1731         {
1732             ctxt = pVirtualMemManager->pasVirtualMem[i];
1733             if( static_cast<char*>(msg.pFaultAddr) >= static_cast<char*>(ctxt->sBase.pData) &&
1734                 static_cast<char*>(msg.pFaultAddr) <
1735                 static_cast<char*>(ctxt->sBase.pData) + ctxt->sBase.nSize )
1736             {
1737                 bMappingFound = true;
1738                 break;
1739             }
1740         }
1741         CPLReleaseMutex(hVirtualMemManagerMutex);
1742 
1743         if( bMappingFound )
1744         {
1745             char * const start_page_addr =
1746                 static_cast<char*>(
1747                     ALIGN_DOWN(msg.pFaultAddr, ctxt->sBase.nPageSize));
1748             const int iPage = static_cast<int>(
1749                 (static_cast<char*>(start_page_addr) -
1750                  static_cast<char*>(ctxt->sBase.pData)) / ctxt->sBase.nPageSize);
1751 
1752             if( iPage == ctxt->iLastPage )
1753             {
1754                 // In case 2 threads try to access the same page concurrently it
1755                 // is possible that we are asked to mapped the page again
1756                 // whereas it is always mapped. However, if that number of
1757                 // successive retries is too high, this is certainly a sign that
1758                 // something else happen, like trying to write-access a
1759                 // read-only page 100 is a bit of magic number. Rouault believes
1760                 // it must be at least the number of concurrent threads. 100
1761                 // seems to be really safe!
1762                 ctxt->nRetry++;
1763 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
1764                 fprintfstderr("retry on page %d : %d\n",
1765                               iPage, ctxt->nRetry);
1766 #endif
1767                 if( ctxt->nRetry >= 100 )
1768                 {
1769                     CPLError(CE_Failure, CPLE_AppDefined,
1770                              "CPLVirtualMemManagerThread: trying to "
1771                              "write into read-only mapping");
1772                     nRetWrite = write(pVirtualMemManager->pipefd_from_thread[1],
1773                                     MAPPING_NOT_FOUND, 4);
1774                     IGNORE_OR_ASSERT_IN_DEBUG(nRetWrite == 4);
1775                     break;
1776                 }
1777                 else if( msg.opType != OP_LOAD &&
1778                          ctxt->sBase.eAccessMode == VIRTUALMEM_READWRITE &&
1779                          !TEST_BIT(ctxt->pabitRWMappedPages, iPage) )
1780                 {
1781 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
1782                     fprintfstderr("switching page %d to write mode\n",
1783                                   iPage);
1784 #endif
1785                     SET_BIT(ctxt->pabitRWMappedPages, iPage);
1786                     const int nRet =
1787                         mprotect(start_page_addr, ctxt->sBase.nPageSize,
1788                                  PROT_READ | PROT_WRITE);
1789                     IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
1790                 }
1791             }
1792             else
1793             {
1794                 ctxt->iLastPage = iPage;
1795                 ctxt->nRetry = 0;
1796 
1797                 if( TEST_BIT(ctxt->pabitMappedPages, iPage) )
1798                 {
1799                     if( msg.opType != OP_LOAD &&
1800                         ctxt->sBase.eAccessMode == VIRTUALMEM_READWRITE &&
1801                         !TEST_BIT(ctxt->pabitRWMappedPages, iPage) )
1802                     {
1803 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
1804                         fprintfstderr("switching page %d to write mode\n",
1805                                       iPage);
1806 #endif
1807                         SET_BIT(ctxt->pabitRWMappedPages, iPage);
1808                         const int nRet =
1809                             mprotect(start_page_addr, ctxt->sBase.nPageSize,
1810                                      PROT_READ | PROT_WRITE);
1811                         IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
1812                     }
1813                     else
1814                     {
1815 #if defined DEBUG_VIRTUALMEM && defined DEBUG_VERBOSE
1816                         fprintfstderr("unexpected case for page %d\n",
1817                                       iPage);
1818 #endif
1819                     }
1820                 }
1821                 else
1822                 {
1823                     void * const pPageToFill =
1824                         CPLVirtualMemGetPageToFill(ctxt, start_page_addr);
1825 
1826                     size_t nToFill = ctxt->sBase.nPageSize;
1827                     if( start_page_addr + nToFill >=
1828                         static_cast<char*>(ctxt->sBase.pData) + ctxt->sBase.nSize )
1829                     {
1830                         nToFill =
1831                             static_cast<char*>(ctxt->sBase.pData) +
1832                             ctxt->sBase.nSize - start_page_addr;
1833                     }
1834 
1835                     ctxt->pfnCachePage(
1836                             reinterpret_cast<CPLVirtualMem*>(ctxt),
1837                             start_page_addr - static_cast<char*>(ctxt->sBase.pData),
1838                             pPageToFill,
1839                             nToFill,
1840                             ctxt->sBase.pCbkUserData);
1841 
1842                     // Now remap this page to its target address and
1843                     // register it in the LRU.
1844                     CPLVirtualMemAddPage(ctxt, start_page_addr, pPageToFill,
1845                                       msg.opType, msg.hRequesterThread);
1846                 }
1847             }
1848 
1849             // Warn the segfault handler that we have finished our job.
1850             nRetWrite = write(pVirtualMemManager->pipefd_from_thread[1],
1851                             MAPPING_FOUND, 4);
1852             IGNORE_OR_ASSERT_IN_DEBUG(nRetWrite == 4);
1853         }
1854         else
1855         {
1856             // Warn the segfault handler that we have finished our job
1857             // but that the fault didn't occur in a memory range that
1858             // is under our responsibility.
1859             CPLError(CE_Failure, CPLE_AppDefined,
1860                      "CPLVirtualMemManagerThread: no mapping found");
1861             nRetWrite = write(pVirtualMemManager->pipefd_from_thread[1],
1862                          MAPPING_NOT_FOUND, 4);
1863             IGNORE_OR_ASSERT_IN_DEBUG(nRetWrite == 4);
1864         }
1865     }
1866 }
1867 
1868 /************************************************************************/
1869 /*                       CPLVirtualMemManagerInit()                     */
1870 /************************************************************************/
1871 
CPLVirtualMemManagerInit()1872 static bool CPLVirtualMemManagerInit()
1873 {
1874     CPLMutexHolderD(&hVirtualMemManagerMutex);
1875     if( pVirtualMemManager != nullptr )
1876         return true;
1877 
1878     struct sigaction act;
1879     pVirtualMemManager = static_cast<CPLVirtualMemManager *>(
1880         VSI_MALLOC_VERBOSE(sizeof(CPLVirtualMemManager)) );
1881     if( pVirtualMemManager == nullptr )
1882         return false;
1883     pVirtualMemManager->pasVirtualMem = nullptr;
1884     pVirtualMemManager->nVirtualMemCount = 0;
1885     int nRet = pipe(pVirtualMemManager->pipefd_to_thread);
1886     IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
1887     nRet = pipe(pVirtualMemManager->pipefd_from_thread);
1888     IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
1889     nRet = pipe(pVirtualMemManager->pipefd_wait_thread);
1890     IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
1891 
1892     // Install our custom SIGSEGV handler.
1893     act.sa_sigaction = CPLVirtualMemManagerSIGSEGVHandler;
1894     sigemptyset (&act.sa_mask);
1895     act.sa_flags = SA_SIGINFO;
1896     nRet = sigaction(SIGSEGV, &act, &pVirtualMemManager->oldact);
1897     IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
1898 
1899     // Starts the helper thread.
1900     pVirtualMemManager->hHelperThread =
1901             CPLCreateJoinableThread(CPLVirtualMemManagerThread, nullptr);
1902     if( pVirtualMemManager->hHelperThread == nullptr )
1903     {
1904         VSIFree(pVirtualMemManager);
1905         pVirtualMemManager = nullptr;
1906         return false;
1907     }
1908     return true;
1909 }
1910 
1911 /************************************************************************/
1912 /*                      CPLVirtualMemManagerTerminate()                 */
1913 /************************************************************************/
1914 
CPLVirtualMemManagerTerminate(void)1915 void CPLVirtualMemManagerTerminate(void)
1916 {
1917     if( pVirtualMemManager == nullptr )
1918         return;
1919 
1920     CPLVirtualMemMsgToWorkerThread msg;
1921     msg.pFaultAddr = BYEBYE_ADDR;
1922     msg.opType = OP_UNKNOWN;
1923     memset(&msg.hRequesterThread, 0, sizeof(msg.hRequesterThread));
1924 
1925     // Wait for the helper thread to be ready.
1926     char wait_ready;
1927     const ssize_t nRetRead =
1928         read(pVirtualMemManager->pipefd_wait_thread[0], &wait_ready, 1);
1929     IGNORE_OR_ASSERT_IN_DEBUG(nRetRead == 1);
1930 
1931     // Ask it to terminate.
1932     const ssize_t nRetWrite =
1933         write(pVirtualMemManager->pipefd_to_thread[1], &msg, sizeof(msg));
1934     IGNORE_OR_ASSERT_IN_DEBUG(nRetWrite == sizeof(msg));
1935 
1936     // Wait for its termination.
1937     CPLJoinThread(pVirtualMemManager->hHelperThread);
1938 
1939     // Cleanup everything.
1940     while( pVirtualMemManager->nVirtualMemCount > 0 )
1941         CPLVirtualMemFree(
1942             reinterpret_cast<CPLVirtualMem*>(pVirtualMemManager->
1943                 pasVirtualMem[pVirtualMemManager->nVirtualMemCount - 1]));
1944     CPLFree(pVirtualMemManager->pasVirtualMem);
1945 
1946     close(pVirtualMemManager->pipefd_to_thread[0]);
1947     close(pVirtualMemManager->pipefd_to_thread[1]);
1948     close(pVirtualMemManager->pipefd_from_thread[0]);
1949     close(pVirtualMemManager->pipefd_from_thread[1]);
1950     close(pVirtualMemManager->pipefd_wait_thread[0]);
1951     close(pVirtualMemManager->pipefd_wait_thread[1]);
1952 
1953     // Restore previous handler.
1954     sigaction(SIGSEGV, &pVirtualMemManager->oldact, nullptr);
1955 
1956     CPLFree(pVirtualMemManager);
1957     pVirtualMemManager = nullptr;
1958 
1959     CPLDestroyMutex(hVirtualMemManagerMutex);
1960     hVirtualMemManagerMutex = nullptr;
1961 }
1962 
1963 #else  // HAVE_VIRTUAL_MEM_VMA
1964 
CPLVirtualMemNew(size_t,size_t,size_t,int,CPLVirtualMemAccessMode,CPLVirtualMemCachePageCbk,CPLVirtualMemUnCachePageCbk,CPLVirtualMemFreeUserData,void *)1965 CPLVirtualMem *CPLVirtualMemNew(
1966     size_t /* nSize */,
1967     size_t /* nCacheSize */,
1968     size_t /* nPageSizeHint */,
1969     int /* bSingleThreadUsage */,
1970     CPLVirtualMemAccessMode /* eAccessMode */,
1971     CPLVirtualMemCachePageCbk /* pfnCachePage */,
1972     CPLVirtualMemUnCachePageCbk /* pfnUnCachePage */,
1973     CPLVirtualMemFreeUserData /* pfnFreeUserData */,
1974     void * /* pCbkUserData */ )
1975 {
1976     CPLError(CE_Failure, CPLE_NotSupported,
1977              "CPLVirtualMemNew() unsupported on "
1978              "this operating system / configuration");
1979     return nullptr;
1980 }
1981 
CPLVirtualMemDeclareThread(CPLVirtualMem *)1982 void CPLVirtualMemDeclareThread( CPLVirtualMem* /* ctxt */ ) {}
1983 
CPLVirtualMemUnDeclareThread(CPLVirtualMem *)1984 void CPLVirtualMemUnDeclareThread( CPLVirtualMem* /* ctxt */ ) {}
1985 
CPLVirtualMemPin(CPLVirtualMem *,void *,size_t,int)1986 void CPLVirtualMemPin( CPLVirtualMem* /* ctxt */,
1987                        void* /* pAddr */,
1988                        size_t /* nSize */,
1989                        int /* bWriteOp */)
1990 {}
1991 
CPLVirtualMemManagerTerminate(void)1992 void CPLVirtualMemManagerTerminate( void ) {}
1993 
1994 #endif  // HAVE_VIRTUAL_MEM_VMA
1995 
1996 #ifdef HAVE_MMAP
1997 
1998 /************************************************************************/
1999 /*                     CPLVirtualMemFreeFileMemoryMapped()              */
2000 /************************************************************************/
2001 
CPLVirtualMemFreeFileMemoryMapped(CPLVirtualMem * ctxt)2002 static void CPLVirtualMemFreeFileMemoryMapped( CPLVirtualMem* ctxt )
2003 {
2004     const size_t nMappingSize =
2005         ctxt->nSize + static_cast<GByte*>(ctxt->pData) - static_cast<GByte*>(ctxt->pDataToFree);
2006     const int nRet = munmap(ctxt->pDataToFree, nMappingSize);
2007     IGNORE_OR_ASSERT_IN_DEBUG(nRet == 0);
2008 }
2009 
2010 /************************************************************************/
2011 /*                       CPLVirtualMemFileMapNew()                      */
2012 /************************************************************************/
2013 
2014 CPLVirtualMem *
CPLVirtualMemFileMapNew(VSILFILE * fp,vsi_l_offset nOffset,vsi_l_offset nLength,CPLVirtualMemAccessMode eAccessMode,CPLVirtualMemFreeUserData pfnFreeUserData,void * pCbkUserData)2015 CPLVirtualMemFileMapNew( VSILFILE* fp,
2016                          vsi_l_offset nOffset,
2017                          vsi_l_offset nLength,
2018                          CPLVirtualMemAccessMode eAccessMode,
2019                          CPLVirtualMemFreeUserData pfnFreeUserData,
2020                          void *pCbkUserData )
2021 {
2022 #if SIZEOF_VOIDP == 4
2023     if( nLength != static_cast<size_t>(nLength) )
2024     {
2025         CPLError(
2026             CE_Failure, CPLE_AppDefined,
2027             "nLength = " CPL_FRMT_GUIB " incompatible with 32 bit architecture",
2028             nLength);
2029         return nullptr;
2030     }
2031     if( nOffset + CPLGetPageSize() !=
2032         static_cast<vsi_l_offset>(
2033             static_cast<off_t>(nOffset + CPLGetPageSize())) )
2034     {
2035         CPLError(CE_Failure, CPLE_AppDefined,
2036                  "nOffset = " CPL_FRMT_GUIB
2037                  " incompatible with 32 bit architecture",
2038                  nOffset);
2039         return nullptr;
2040     }
2041 #endif
2042 
2043     int fd = static_cast<int>(reinterpret_cast<GUIntptr_t>(VSIFGetNativeFileDescriptorL(fp)));
2044     if( fd == 0 )
2045     {
2046         CPLError(CE_Failure, CPLE_AppDefined,
2047                  "Cannot operate on a virtual file");
2048         return nullptr;
2049     }
2050 
2051     const off_t nAlignedOffset =
2052         static_cast<off_t>((nOffset / CPLGetPageSize()) * CPLGetPageSize());
2053     size_t nAlignment = static_cast<size_t>(nOffset - nAlignedOffset);
2054     size_t nMappingSize = static_cast<size_t>(nLength + nAlignment);
2055 
2056     // Need to ensure that the requested extent fits into the file size
2057     // otherwise SIGBUS errors will occur when using the mapping.
2058     vsi_l_offset nCurPos = VSIFTellL(fp);
2059     if( VSIFSeekL(fp, 0, SEEK_END) != 0 )
2060         return nullptr;
2061     vsi_l_offset nFileSize = VSIFTellL(fp);
2062     if( nFileSize < nOffset + nLength )
2063     {
2064         if( eAccessMode != VIRTUALMEM_READWRITE )
2065         {
2066             CPLError( CE_Failure, CPLE_AppDefined,
2067                       "Trying to map an extent outside of the file");
2068             CPL_IGNORE_RET_VAL(VSIFSeekL(fp, nCurPos, SEEK_SET));
2069             return nullptr;
2070         }
2071         else
2072         {
2073             char ch = 0;
2074             if( VSIFSeekL(fp, nOffset + nLength - 1, SEEK_SET) != 0 ||
2075                 VSIFWriteL(&ch, 1, 1, fp) != 1 )
2076             {
2077                 CPLError(CE_Failure, CPLE_AppDefined,
2078                          "Cannot extend file to mapping size");
2079                 CPL_IGNORE_RET_VAL(VSIFSeekL(fp, nCurPos, SEEK_SET));
2080                 return nullptr;
2081             }
2082         }
2083     }
2084     if( VSIFSeekL(fp, nCurPos, SEEK_SET) != 0 )
2085         return nullptr;
2086 
2087     CPLVirtualMem* ctxt = static_cast<CPLVirtualMem *>(
2088         VSI_CALLOC_VERBOSE(1, sizeof(CPLVirtualMem)));
2089     if( ctxt == nullptr )
2090         return nullptr;
2091 
2092     void* addr = mmap(nullptr, nMappingSize,
2093                       eAccessMode == VIRTUALMEM_READWRITE
2094                       ? PROT_READ | PROT_WRITE : PROT_READ,
2095                       MAP_SHARED, fd, nAlignedOffset);
2096     if( addr == MAP_FAILED )
2097     {
2098         int myerrno = errno;
2099         CPLError(CE_Failure, CPLE_AppDefined,
2100                  "mmap() failed : %s", strerror(myerrno));
2101         VSIFree(ctxt);
2102         // cppcheck thinks we are leaking addr.
2103         // cppcheck-suppress memleak
2104         return nullptr;
2105     }
2106 
2107     ctxt->eType = VIRTUAL_MEM_TYPE_FILE_MEMORY_MAPPED;
2108     ctxt->nRefCount = 1;
2109     ctxt->eAccessMode = eAccessMode;
2110     ctxt->pData = static_cast<GByte *>(addr) + nAlignment;
2111     ctxt->pDataToFree = addr;
2112     ctxt->nSize = static_cast<size_t>(nLength);
2113     ctxt->nPageSize = CPLGetPageSize();
2114     ctxt->bSingleThreadUsage = false;
2115     ctxt->pfnFreeUserData = pfnFreeUserData;
2116     ctxt->pCbkUserData = pCbkUserData;
2117 
2118     return ctxt;
2119 }
2120 
2121 #else  // HAVE_MMAP
2122 
CPLVirtualMemFileMapNew(VSILFILE *,vsi_l_offset,vsi_l_offset,CPLVirtualMemAccessMode,CPLVirtualMemFreeUserData,void *)2123 CPLVirtualMem *CPLVirtualMemFileMapNew(
2124     VSILFILE* /* fp */,
2125     vsi_l_offset /* nOffset */,
2126     vsi_l_offset /* nLength */,
2127     CPLVirtualMemAccessMode /* eAccessMode */,
2128     CPLVirtualMemFreeUserData /* pfnFreeUserData */,
2129     void * /* pCbkUserData */ )
2130 {
2131     CPLError(CE_Failure, CPLE_NotSupported,
2132              "CPLVirtualMemFileMapNew() unsupported on this "
2133              "operating system / configuration");
2134     return nullptr;
2135 }
2136 
2137 #endif  // HAVE_MMAP
2138 
2139 /************************************************************************/
2140 /*                         CPLGetPageSize()                             */
2141 /************************************************************************/
2142 
CPLGetPageSize(void)2143 size_t CPLGetPageSize( void )
2144 {
2145 #if defined(HAVE_MMAP) || defined(HAVE_VIRTUAL_MEM_VMA)
2146     return static_cast<size_t>( sysconf(_SC_PAGESIZE) );
2147 #else
2148     return 0;
2149 #endif
2150 }
2151 
2152 /************************************************************************/
2153 /*                   CPLIsVirtualMemFileMapAvailable()                  */
2154 /************************************************************************/
2155 
CPLIsVirtualMemFileMapAvailable(void)2156 int CPLIsVirtualMemFileMapAvailable( void )
2157 {
2158 #ifdef HAVE_MMAP
2159     return TRUE;
2160 #else
2161     return FALSE;
2162 #endif
2163 }
2164 
2165 /************************************************************************/
2166 /*                        CPLVirtualMemFree()                           */
2167 /************************************************************************/
2168 
CPLVirtualMemFree(CPLVirtualMem * ctxt)2169 void CPLVirtualMemFree( CPLVirtualMem* ctxt )
2170 {
2171     if( ctxt == nullptr || --(ctxt->nRefCount) > 0 )
2172         return;
2173 
2174     if( ctxt->pVMemBase != nullptr )
2175     {
2176         CPLVirtualMemFree(ctxt->pVMemBase);
2177         if( ctxt->pfnFreeUserData != nullptr )
2178             ctxt->pfnFreeUserData(ctxt->pCbkUserData);
2179         CPLFree(ctxt);
2180         return;
2181     }
2182 
2183 #ifdef HAVE_MMAP
2184     if( ctxt->eType == VIRTUAL_MEM_TYPE_FILE_MEMORY_MAPPED )
2185         CPLVirtualMemFreeFileMemoryMapped(ctxt);
2186 #endif
2187 #ifdef HAVE_VIRTUAL_MEM_VMA
2188     if( ctxt->eType == VIRTUAL_MEM_TYPE_VMA )
2189       CPLVirtualMemFreeFileMemoryMapped(
2190           reinterpret_cast<CPLVirtualMemVMA*>(ctxt));
2191 #endif
2192 
2193     if( ctxt->pfnFreeUserData != nullptr )
2194         ctxt->pfnFreeUserData(ctxt->pCbkUserData);
2195     CPLFree(ctxt);
2196 }
2197 
2198 /************************************************************************/
2199 /*                      CPLVirtualMemGetAddr()                          */
2200 /************************************************************************/
2201 
CPLVirtualMemGetAddr(CPLVirtualMem * ctxt)2202 void* CPLVirtualMemGetAddr( CPLVirtualMem* ctxt )
2203 {
2204     return ctxt->pData;
2205 }
2206 
2207 /************************************************************************/
2208 /*                     CPLVirtualMemIsFileMapping()                     */
2209 /************************************************************************/
2210 
CPLVirtualMemIsFileMapping(CPLVirtualMem * ctxt)2211 int CPLVirtualMemIsFileMapping( CPLVirtualMem* ctxt )
2212 {
2213     return ctxt->eType == VIRTUAL_MEM_TYPE_FILE_MEMORY_MAPPED;
2214 }
2215 
2216 /************************************************************************/
2217 /*                     CPLVirtualMemGetAccessMode()                     */
2218 /************************************************************************/
2219 
CPLVirtualMemGetAccessMode(CPLVirtualMem * ctxt)2220 CPLVirtualMemAccessMode CPLVirtualMemGetAccessMode( CPLVirtualMem* ctxt )
2221 {
2222     return ctxt->eAccessMode;
2223 }
2224 
2225 /************************************************************************/
2226 /*                      CPLVirtualMemGetPageSize()                      */
2227 /************************************************************************/
2228 
CPLVirtualMemGetPageSize(CPLVirtualMem * ctxt)2229 size_t CPLVirtualMemGetPageSize( CPLVirtualMem* ctxt )
2230 {
2231     return ctxt->nPageSize;
2232 }
2233 
2234 /************************************************************************/
2235 /*                        CPLVirtualMemGetSize()                        */
2236 /************************************************************************/
2237 
CPLVirtualMemGetSize(CPLVirtualMem * ctxt)2238 size_t CPLVirtualMemGetSize( CPLVirtualMem* ctxt )
2239 {
2240     return ctxt->nSize;
2241 }
2242 
2243 /************************************************************************/
2244 /*                   CPLVirtualMemIsAccessThreadSafe()                  */
2245 /************************************************************************/
2246 
CPLVirtualMemIsAccessThreadSafe(CPLVirtualMem * ctxt)2247 int CPLVirtualMemIsAccessThreadSafe( CPLVirtualMem* ctxt )
2248 {
2249     return !ctxt->bSingleThreadUsage;
2250 }
2251 
2252 /************************************************************************/
2253 /*                       CPLVirtualMemDerivedNew()                      */
2254 /************************************************************************/
2255 
CPLVirtualMemDerivedNew(CPLVirtualMem * pVMemBase,vsi_l_offset nOffset,vsi_l_offset nSize,CPLVirtualMemFreeUserData pfnFreeUserData,void * pCbkUserData)2256 CPLVirtualMem *CPLVirtualMemDerivedNew(
2257     CPLVirtualMem* pVMemBase,
2258     vsi_l_offset nOffset,
2259     vsi_l_offset nSize,
2260     CPLVirtualMemFreeUserData pfnFreeUserData,
2261     void *pCbkUserData )
2262 {
2263     if( nOffset + nSize > pVMemBase->nSize )
2264         return nullptr;
2265 
2266     CPLVirtualMem* ctxt = static_cast<CPLVirtualMem *>(
2267         VSI_CALLOC_VERBOSE(1, sizeof(CPLVirtualMem)));
2268     if( ctxt == nullptr )
2269         return nullptr;
2270 
2271     ctxt->eType = pVMemBase->eType;
2272     ctxt->nRefCount = 1;
2273     ctxt->pVMemBase = pVMemBase;
2274     pVMemBase->nRefCount++;
2275     ctxt->eAccessMode = pVMemBase->eAccessMode;
2276     ctxt->pData = static_cast<GByte *>(pVMemBase->pData) + nOffset;
2277     ctxt->pDataToFree = nullptr;
2278     ctxt->nSize = static_cast<size_t>(nSize);
2279     ctxt->nPageSize = pVMemBase->nPageSize;
2280     ctxt->bSingleThreadUsage = CPL_TO_BOOL(pVMemBase->bSingleThreadUsage);
2281     ctxt->pfnFreeUserData = pfnFreeUserData;
2282     ctxt->pCbkUserData = pCbkUserData;
2283 
2284     return ctxt;
2285 }
2286