1 /* Interface to mmap style I/O
2 
3    (c) 2006-2008 (W3C) MIT, ERCIM, Keio University
4    See tidy.h for the copyright notice.
5 
6    Originally contributed by Cory Nelson and Nuno Lopes
7 
8 */
9 
10 /* keep these here to keep file non-empty */
11 #include "forward.h"
12 #include "mappedio.h"
13 
14 #if SUPPORT_POSIX_MAPPED_FILES
15 
16 #include "fileio.h"
17 
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21 #include <stdio.h>
22 
23 #include <sys/mman.h>
24 
25 
26 typedef struct
27 {
28     TidyAllocator *allocator;
29     const byte *base;
30     size_t pos, size;
31 } MappedFileSource;
32 
mapped_getByte(void * sourceData)33 static int TIDY_CALL mapped_getByte( void* sourceData )
34 {
35     MappedFileSource* fin = (MappedFileSource*) sourceData;
36     return fin->base[fin->pos++];
37 }
38 
mapped_eof(void * sourceData)39 static Bool TIDY_CALL mapped_eof( void* sourceData )
40 {
41     MappedFileSource* fin = (MappedFileSource*) sourceData;
42     return (fin->pos >= fin->size);
43 }
44 
mapped_ungetByte(void * sourceData,byte ARG_UNUSED (bv))45 static void TIDY_CALL mapped_ungetByte( void* sourceData, byte ARG_UNUSED(bv) )
46 {
47     MappedFileSource* fin = (MappedFileSource*) sourceData;
48     fin->pos--;
49 }
50 
TY_(initFileSource)51 int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
52 {
53     MappedFileSource* fin;
54     struct stat sbuf;
55     int fd;
56 
57     fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
58     if ( !fin )
59         return -1;
60 
61     fd = fileno(fp);
62     if ( fstat(fd, &sbuf) == -1
63          || sbuf.st_size == 0
64          || (fin->base = mmap(0, fin->size = sbuf.st_size, PROT_READ,
65                               MAP_SHARED, fd, 0)) == MAP_FAILED)
66     {
67         TidyFree( allocator, fin );
68         /* Fallback on standard I/O */
69         return TY_(initStdIOFileSource)( allocator, inp, fp );
70     }
71 
72     fin->pos = 0;
73     fin->allocator = allocator;
74     fclose(fp);
75 
76     inp->getByte    = mapped_getByte;
77     inp->eof        = mapped_eof;
78     inp->ungetByte  = mapped_ungetByte;
79     inp->sourceData = fin;
80 
81     return 0;
82 }
83 
TY_(freeFileSource)84 void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
85 {
86     if ( inp->getByte == mapped_getByte )
87     {
88         MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
89         munmap( (void*)fin->base, fin->size );
90         TidyFree( fin->allocator, fin );
91     }
92     else
93         TY_(freeStdIOFileSource)( inp, closeIt );
94 }
95 
96 #endif /* SUPPORT_POSIX_MAPPED_FILES */
97 
98 
99 #if defined(_WIN32)
100 #  if defined(_MSC_VER) && (_MSC_VER < 1300)  /* less than msvc++ 7.0 */
101 #    pragma warning(disable:4115) /* named type definition in parentheses in windows headers */
102 #  endif
103 #  include <windows.h>
104 #  include <errno.h>
105 #  include "streamio.h"
106 #  include "tidy-int.h"
107 #  include "message.h"
108 
109 typedef struct _fp_input_mapped_source
110 {
111     TidyAllocator *allocator;
112     LONGLONG size, pos;
113     HANDLE file, map;
114     byte *view, *iter, *end;
115     unsigned int gran;
116 } MappedFileSource;
117 
mapped_openView(MappedFileSource * data)118 static int mapped_openView( MappedFileSource *data )
119 {
120     DWORD numb = ( ( data->size - data->pos ) > data->gran ) ?
121         data->gran : (DWORD)( data->size - data->pos );
122 
123     if ( data->view )
124     {
125         UnmapViewOfFile( data->view );
126         data->view = NULL;
127     }
128 
129     data->view = MapViewOfFile( data->map, FILE_MAP_READ,
130                                 (DWORD)( data->pos >> 32 ),
131                                 (DWORD)data->pos, numb );
132 
133     if ( !data->view ) return -1;
134 
135     data->iter = data->view;
136     data->end = data->iter + numb;
137 
138     return 0;
139 }
140 
mapped_getByte(void * sourceData)141 static int TIDY_CALL mapped_getByte( void *sourceData )
142 {
143     MappedFileSource *data = sourceData;
144 
145     if ( !data->view || data->iter >= data->end )
146     {
147         data->pos += data->gran;
148 
149         if ( data->pos >= data->size || mapped_openView(data) != 0 )
150             return EndOfStream;
151     }
152 
153     return *( data->iter++ );
154 }
155 
mapped_eof(void * sourceData)156 static Bool TIDY_CALL mapped_eof( void *sourceData )
157 {
158     MappedFileSource *data = sourceData;
159     return ( data->pos >= data->size );
160 }
161 
mapped_ungetByte(void * sourceData,byte ARG_UNUSED (bt))162 static void TIDY_CALL mapped_ungetByte( void *sourceData, byte ARG_UNUSED(bt) )
163 {
164     MappedFileSource *data = sourceData;
165 
166     if ( data->iter >= data->view )
167     {
168         --data->iter;
169         return;
170     }
171 
172     if ( data->pos < data->gran )
173     {
174         assert(0);
175         return;
176     }
177 
178     data->pos -= data->gran;
179     mapped_openView( data );
180 }
181 
initMappedFileSource(TidyAllocator * allocator,TidyInputSource * inp,HANDLE fp)182 static int initMappedFileSource( TidyAllocator *allocator, TidyInputSource* inp, HANDLE fp )
183 {
184     MappedFileSource* fin = NULL;
185 
186     inp->getByte    = mapped_getByte;
187     inp->eof        = mapped_eof;
188     inp->ungetByte  = mapped_ungetByte;
189 
190     fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
191     if ( !fin )
192         return -1;
193 
194 #  if defined(__MINGW32__)
195     {
196         DWORD lowVal, highVal;
197         lowVal = GetFileSize(fp, &highVal);
198         if ((lowVal == INVALID_FILE_SIZE) && (GetLastError() != NO_ERROR))
199         {
200             TidyFree(allocator, fin);
201             return -1;
202         }
203         fin->size = highVal;
204         fin->size = (fin->size << 32);
205         fin->size += lowVal;
206     }
207 #  else /* NOT a MinGW build */
208 #    if defined(_MSC_VER) && (_MSC_VER < 1300)  /* less than msvc++ 7.0 */
209     {
210         LARGE_INTEGER* pli = (LARGE_INTEGER *)&fin->size;
211         (DWORD)pli->LowPart = GetFileSize( fp, (DWORD *)&pli->HighPart );
212         if ( GetLastError() != NO_ERROR || fin->size <= 0 )
213         {
214             TidyFree(allocator, fin);
215             return -1;
216         }
217     }
218 #    else
219     if ( !GetFileSizeEx( fp, (LARGE_INTEGER*)&fin->size )
220          || fin->size <= 0 )
221     {
222         TidyFree(allocator, fin);
223         return -1;
224     }
225 #    endif
226 #  endif /* MinGW y/n */
227 
228     fin->map = CreateFileMapping( fp, NULL, PAGE_READONLY, 0, 0, NULL );
229 
230     if ( !fin->map )
231     {
232         TidyFree(allocator, fin);
233         return -1;
234     }
235 
236     {
237         SYSTEM_INFO info;
238         GetSystemInfo( &info );
239         fin->gran = info.dwAllocationGranularity;
240     }
241 
242     fin->allocator = allocator;
243     fin->pos    = 0;
244     fin->view   = NULL;
245     fin->iter   = NULL;
246     fin->end    = NULL;
247 
248     if ( mapped_openView( fin ) != 0 )
249     {
250         CloseHandle( fin->map );
251         TidyFree( allocator, fin );
252         return -1;
253     }
254 
255     fin->file = fp;
256     inp->sourceData = fin;
257 
258     return 0;
259 }
260 
freeMappedFileSource(TidyInputSource * inp,Bool closeIt)261 static void freeMappedFileSource( TidyInputSource* inp, Bool closeIt )
262 {
263     MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
264     if ( closeIt && fin && fin->file != INVALID_HANDLE_VALUE )
265     {
266         if ( fin->view )
267             UnmapViewOfFile( fin->view );
268 
269         CloseHandle( fin->map );
270         CloseHandle( fin->file );
271     }
272     TidyFree( fin->allocator, fin );
273 }
274 
MappedFileInput(TidyDocImpl * doc,HANDLE fp,int encoding)275 StreamIn* MappedFileInput ( TidyDocImpl* doc, HANDLE fp, int encoding )
276 {
277     StreamIn *in = TY_(initStreamIn)( doc, encoding );
278     if ( initMappedFileSource( doc->allocator, &in->source, fp ) != 0 )
279     {
280         TY_(freeStreamIn)( in );
281         return NULL;
282     }
283     in->iotype = FileIO;
284     return in;
285 }
286 
287 
TY_(DocParseFileWithMappedFile)288 int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam ) {
289     int status = -ENOENT;
290     HANDLE fin = CreateFileA( filnam, GENERIC_READ, FILE_SHARE_READ, NULL,
291                               OPEN_EXISTING, 0, NULL );
292 
293 #  if PRESERVE_FILE_TIMES
294     LONGLONG actime, modtime;
295     TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
296 
297     if ( fin != INVALID_HANDLE_VALUE && cfgBool(doc,TidyKeepFileTimes) &&
298          GetFileTime(fin, NULL, (FILETIME*)&actime, (FILETIME*)&modtime) )
299     {
300 #    define TY_I64(str) TYDYAPPEND(str,LL)
301 #    if _MSC_VER < 1300  && !defined(__GNUC__) /* less than msvc++ 7.0 */
302 #      undef TY_I64
303 #      define TY_I64(str) TYDYAPPEND(str,i64)
304 #    endif
305         doc->filetimes.actime =
306             (time_t)( ( actime  - TY_I64(116444736000000000)) / 10000000 );
307 
308         doc->filetimes.modtime =
309             (time_t)( ( modtime - TY_I64(116444736000000000)) / 10000000 );
310     }
311 #  endif /* PRESERVE_FILE_TIMES */
312 
313     if ( fin != INVALID_HANDLE_VALUE )
314     {
315         StreamIn* in = MappedFileInput( doc, fin,
316                                         cfg( doc, TidyInCharEncoding ) );
317         if ( !in )
318         {
319             CloseHandle( fin );
320             return -ENOMEM;
321         }
322 
323         status = TY_(DocParseStream)( doc, in );
324         freeMappedFileSource( &in->source, yes );
325         TY_(freeStreamIn)( in );
326     }
327     else /* Error message! */
328         TY_(ReportFileError)( doc, filnam, FILE_CANT_OPEN );
329     return status;
330 }
331 
332 #endif /* defined(_WIN32) */
333 
334