1 /******************************************************************************
2  *
3  * Project:  OGR
4  * Purpose:  Convenience function for parsing with Expat library
5  * Author:   Even Rouault, even dot rouault at spatialys.com
6  *
7  ******************************************************************************
8  * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included
18  * in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  ****************************************************************************/
28 
29 #ifdef HAVE_EXPAT
30 
31 #include "cpl_port.h"
32 #include "cpl_conv.h"
33 #include "cpl_string.h"
34 #include "ogr_expat.h"
35 
36 #include <cstddef>
37 #include <cstdlib>
38 
39 #include "cpl_error.h"
40 
41 
42 CPL_CVSID("$Id: ogr_expat.cpp 148115fcc40f1651a5d15fa34c9a8c528e7147bb 2019-08-04 21:00:33 +0200 Even Rouault $")
43 
44 constexpr size_t OGR_EXPAT_MAX_ALLOWED_ALLOC = 10000000;
45 
46 static void* OGRExpatMalloc( size_t size ) CPL_WARN_UNUSED_RESULT;
47 static void* OGRExpatRealloc( void *ptr, size_t size ) CPL_WARN_UNUSED_RESULT;
48 
49 /************************************************************************/
50 /*                              CanAlloc()                              */
51 /************************************************************************/
52 
CanAlloc(size_t size)53 static bool CanAlloc( size_t size )
54 {
55     if( size < OGR_EXPAT_MAX_ALLOWED_ALLOC )
56         return true;
57 
58     if( CPLTestBool(CPLGetConfigOption("OGR_EXPAT_UNLIMITED_MEM_ALLOC", "NO")) )
59         return true;
60 
61     CPLError(CE_Failure, CPLE_OutOfMemory,
62              "Expat tried to malloc %d bytes. File probably corrupted. "
63              "This may also happen in case of a very big XML comment, in which case "
64              "you may define the OGR_EXPAT_UNLIMITED_MEM_ALLOC configuration "
65              "option to YES to remove that protection.",
66              static_cast<int>(size));
67     return false;
68 }
69 
70 /************************************************************************/
71 /*                          OGRExpatMalloc()                            */
72 /************************************************************************/
73 
OGRExpatMalloc(size_t size)74 static void* OGRExpatMalloc( size_t size )
75 {
76     if( CanAlloc(size) )
77         return malloc(size);
78 
79     return nullptr;
80 }
81 
82 /************************************************************************/
83 /*                         OGRExpatRealloc()                            */
84 /************************************************************************/
85 
86 // Caller must replace the pointer with the returned pointer.
OGRExpatRealloc(void * ptr,size_t size)87 static void* OGRExpatRealloc( void *ptr, size_t size )
88 {
89     if( CanAlloc(size) )
90         return realloc(ptr, size);
91 
92     return nullptr;
93 }
94 
95 /************************************************************************/
96 /*                            FillWINDOWS1252()                         */
97 /************************************************************************/
98 
FillWINDOWS1252(XML_Encoding * info)99 static void FillWINDOWS1252( XML_Encoding *info )
100 {
101     // Map CP1252 bytes to Unicode values.
102     for( int i = 0; i < 0x80; ++i )
103         info->map[i] = i;
104 
105     info->map[0x80] = 0x20AC;
106     info->map[0x81] = -1;
107     info->map[0x82] = 0x201A;
108     info->map[0x83] = 0x0192;
109     info->map[0x84] = 0x201E;
110     info->map[0x85] = 0x2026;
111     info->map[0x86] = 0x2020;
112     info->map[0x87] = 0x2021;
113     info->map[0x88] = 0x02C6;
114     info->map[0x89] = 0x2030;
115     info->map[0x8A] = 0x0160;
116     info->map[0x8B] = 0x2039;
117     info->map[0x8C] = 0x0152;
118     info->map[0x8D] = -1;
119     info->map[0x8E] = 0x017D;
120     info->map[0x8F] = -1;
121     info->map[0x90] = -1;
122     info->map[0x91] = 0x2018;
123     info->map[0x92] = 0x2019;
124     info->map[0x93] = 0x201C;
125     info->map[0x94] = 0x201D;
126     info->map[0x95] = 0x2022;
127     info->map[0x96] = 0x2013;
128     info->map[0x97] = 0x2014;
129     info->map[0x98] = 0x02DC;
130     info->map[0x99] = 0x2122;
131     info->map[0x9A] = 0x0161;
132     info->map[0x9B] = 0x203A;
133     info->map[0x9C] = 0x0153;
134     info->map[0x9D] = -1;
135     info->map[0x9E] = 0x017E;
136     info->map[0x9F] = 0x0178;
137 
138     for( int i = 0xA0; i <= 0xFF; ++i )
139         info->map[i] = i;
140 }
141 
142 /************************************************************************/
143 /*                             FillISO885915()                          */
144 /************************************************************************/
145 
FillISO885915(XML_Encoding * info)146 static void FillISO885915( XML_Encoding *info )
147 {
148     // Map ISO-8859-15 bytes to Unicode values.
149     // Generated by generate_encoding_table.c.
150     for( int i = 0x00; i < 0xA4; ++i)
151         info->map[i] = i;
152     info->map[0xA4] = 0x20AC;
153     info->map[0xA5] = 0xA5;
154     info->map[0xA6] = 0x0160;
155     info->map[0xA7] = 0xA7;
156     info->map[0xA8] = 0x0161;
157     for( int i = 0xA9; i < 0xB4; ++i )
158         info->map[i] = i;
159     info->map[0xB4] = 0x017D;
160     for( int i = 0xB5; i < 0xB8; ++i )
161         info->map[i] = i;
162     info->map[0xB8] = 0x017E;
163     for( int i = 0xB9; i < 0xBC; ++i )
164         info->map[i] = i;
165     info->map[0xBC] = 0x0152;
166     info->map[0xBD] = 0x0153;
167     info->map[0xBE] = 0x0178;
168     for( int i = 0xBF; i < 0x100; ++i )
169         info->map[i] = i;
170 }
171 
172 /************************************************************************/
173 /*                  OGRExpatUnknownEncodingHandler()                    */
174 /************************************************************************/
175 
OGRExpatUnknownEncodingHandler(void *,const XML_Char * name,XML_Encoding * info)176 static int OGRExpatUnknownEncodingHandler(
177     void * /* unused_encodingHandlerData */,
178     const XML_Char *name,
179     XML_Encoding *info )
180 {
181     if( EQUAL(name, "WINDOWS-1252") )
182         FillWINDOWS1252(info);
183     else if( EQUAL(name, "ISO-8859-15") )
184         FillISO885915(info);
185     else
186     {
187         CPLDebug("OGR", "Unhandled encoding %s", name);
188         return XML_STATUS_ERROR;
189     }
190 
191     info->data    = nullptr;
192     info->convert = nullptr;
193     info->release = nullptr;
194 
195     return XML_STATUS_OK;
196 }
197 
198 /************************************************************************/
199 /*                       OGRCreateExpatXMLParser()                      */
200 /************************************************************************/
201 
OGRCreateExpatXMLParser()202 XML_Parser OGRCreateExpatXMLParser()
203 {
204     XML_Memory_Handling_Suite memsuite;
205     memsuite.malloc_fcn = OGRExpatMalloc;
206     memsuite.realloc_fcn = OGRExpatRealloc;
207     memsuite.free_fcn = free;
208     XML_Parser hParser = XML_ParserCreate_MM(nullptr, &memsuite, nullptr);
209 
210     XML_SetUnknownEncodingHandler(hParser,
211                                   OGRExpatUnknownEncodingHandler,
212                                   nullptr);
213 
214     return hParser;
215 }
216 
217 #endif  // HAVE_EXPAT
218