1 /******************************************************************************
2  * Project:  OGR
3  * Purpose:  OGRGMLASDriver implementation
4  * Author:   Even Rouault, <even dot rouault at spatialys dot com>
5  *
6  * Initial development funded by the European Earth observation programme
7  * Copernicus
8  *
9  ******************************************************************************
10  * Copyright (c) 2016, Even Rouault, <even dot rouault at spatialys dot com>
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a
13  * copy of this software and associated documentation files (the "Software"),
14  * to deal in the Software without restriction, including without limitation
15  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
16  * and/or sell copies of the Software, and to permit persons to whom the
17  * Software is furnished to do so, subject to the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included
20  * in all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
23  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  ****************************************************************************/
30 
31 // Must be first for DEBUG_BOOL case
32 #include "ogr_gmlas.h"
33 
34 #include "cpl_http.h"
35 
36 #include <time.h>
37 
38 CPL_CVSID("$Id: ogrgmlasxlinkresolver.cpp 0b629564aaf603602a76d48b0d0e1e501727ad59 2017-12-17 20:55:57Z Even Rouault $")
39 
40 /************************************************************************/
41 /*                         GMLASXLinkResolver()                         */
42 /************************************************************************/
43 
GMLASXLinkResolver()44 GMLASXLinkResolver::GMLASXLinkResolver() :
45     m_nGlobalResolutionTime(0),
46     m_nMaxRAMCacheSize(atoi(CPLGetConfigOption("GMLAS_XLINK_RAM_CACHE_SIZE",
47                                                "10000000"))),
48     m_nCurrentRAMCacheSize(0)
49 {
50 }
51 
52 /************************************************************************/
53 /*                             SetConf()                                */
54 /************************************************************************/
55 
SetConf(const GMLASXLinkResolutionConf & oConf)56 void GMLASXLinkResolver::SetConf( const GMLASXLinkResolutionConf& oConf )
57 {
58     m_oConf = oConf;
59     SetCacheDirectory(m_oConf.m_osCacheDirectory);
60 }
61 
62 /************************************************************************/
63 /*                          FetchRawContent()                           */
64 /************************************************************************/
65 
FetchRawContent(const CPLString & osURL,const char * pszHeaders)66 CPLString GMLASXLinkResolver::FetchRawContent(const CPLString& osURL,
67                                               const char* pszHeaders)
68 {
69     char** papszOptions = nullptr;
70     if( m_oConf.m_nMaxGlobalResolutionTime > 0 &&
71         m_nGlobalResolutionTime > m_oConf.m_nMaxGlobalResolutionTime )
72     {
73         CPLError(CE_Failure, CPLE_AppDefined,
74                  "Maximum global resolution time has been reached. "
75                  "No remote resource will be fetched");
76         return CPLString();
77     }
78     if( m_oConf.m_nTimeOut > 0 || m_oConf.m_nMaxGlobalResolutionTime > 0 )
79     {
80         int nTimeout = m_oConf.m_nTimeOut;
81         if( m_oConf.m_nTimeOut > 0 && m_oConf.m_nMaxGlobalResolutionTime > 0 )
82         {
83             // Select the minimum between the individual timeout and the
84             // remaining time granted by the max global resolution time.
85             int nRemaining = m_oConf.m_nMaxGlobalResolutionTime -
86                              m_nGlobalResolutionTime;
87             if( nRemaining < nTimeout )
88                 nTimeout = nRemaining;
89         }
90         else if( m_oConf.m_nMaxGlobalResolutionTime > 0 )
91         {
92             nTimeout = m_oConf.m_nMaxGlobalResolutionTime -
93                        m_nGlobalResolutionTime;
94         }
95         papszOptions = CSLSetNameValue(papszOptions, "TIMEOUT",
96                                        CPLSPrintf("%d", nTimeout));
97     }
98     if( m_oConf.m_nMaxFileSize > 0 )
99     {
100         papszOptions = CSLSetNameValue(papszOptions, "MAX_FILE_SIZE",
101                                        CPLSPrintf("%d", m_oConf.m_nMaxFileSize));
102     }
103     if( !m_oConf.m_osProxyServerPort.empty() )
104     {
105         papszOptions = CSLSetNameValue(papszOptions, "PROXY",
106                                        m_oConf.m_osProxyServerPort);
107     }
108     if( !m_oConf.m_osProxyUserPassword.empty() )
109     {
110         papszOptions = CSLSetNameValue(papszOptions, "PROXYUSERPWD",
111                                        m_oConf.m_osProxyUserPassword);
112     }
113     if( !m_oConf.m_osProxyAuth.empty() )
114     {
115         papszOptions = CSLSetNameValue(papszOptions, "PROXYAUTH",
116                                        m_oConf.m_osProxyAuth);
117     }
118     if( pszHeaders != nullptr )
119     {
120         papszOptions = CSLSetNameValue(papszOptions, "HEADERS",
121                                        pszHeaders);
122     }
123     time_t nTimeStart = time(nullptr);
124     CPLHTTPResult* psResult = CPLHTTPFetch(osURL, papszOptions);
125     time_t nTimeStop = time(nullptr);
126     m_nGlobalResolutionTime += static_cast<int>(nTimeStop - nTimeStart);
127     CSLDestroy(papszOptions);
128     if( psResult == nullptr )
129         return CPLString();
130 
131     if( psResult->nStatus != 0 ||
132         psResult->pabyData == nullptr )
133     {
134         CPLHTTPDestroyResult(psResult);
135         return CPLString();
136     }
137 
138     CPLString osResult;
139     osResult.assign( reinterpret_cast<char*>(psResult->pabyData),
140                      psResult->nDataLen );
141     CPLHTTPDestroyResult(psResult);
142     return osResult;
143 }
144 
145 /************************************************************************/
146 /*                           GetRawContent()                            */
147 /************************************************************************/
148 
GetRawContent(const CPLString & osURL,const char * pszHeaders,bool bAllowRemoteDownload,bool bCacheResults)149 CPLString GMLASXLinkResolver::GetRawContent(const CPLString& osURL,
150                                             const char* pszHeaders,
151                                             bool bAllowRemoteDownload,
152                                             bool bCacheResults)
153 {
154     bool bDiskCacheAvailable = false;
155     if( !m_osCacheDirectory.empty() &&
156         RecursivelyCreateDirectoryIfNeeded() )
157     {
158         bDiskCacheAvailable = true;
159 
160         CPLString osCachedFileName(GetCachedFilename(osURL));
161         VSILFILE* fp = nullptr;
162         if( !m_bRefresh ||
163             m_aoSetRefreshedFiles.find(osCachedFileName) !=
164                                             m_aoSetRefreshedFiles.end() )
165         {
166             fp = VSIFOpenL( osCachedFileName, "rb");
167         }
168         if( fp != nullptr )
169         {
170             CPLDebug("GMLAS", "Use cached %s", osCachedFileName.c_str());
171             GByte* pabyRet = nullptr;
172             vsi_l_offset nSize = 0;
173             CPLString osContent;
174             if( VSIIngestFile( fp, nullptr, &pabyRet, &nSize, -1 ) )
175             {
176                 osContent.assign( reinterpret_cast<const char*>(pabyRet),
177                                   static_cast<size_t>(nSize) );
178             }
179             VSIFree(pabyRet);
180             VSIFCloseL(fp);
181             return osContent;
182         }
183         else if( bAllowRemoteDownload )
184         {
185             if( m_bRefresh )
186                 m_aoSetRefreshedFiles.insert(osCachedFileName);
187         }
188         else
189         {
190             CPLDebug("GMLAS",
191                      "Could not find locally cached %s, and not allowed to"
192                      "download it",
193                      osURL.c_str());
194             return CPLString();
195         }
196     }
197 
198     // Check memory cache first
199     {
200         const auto oIter = m_oMapURLToContent.find(osURL);
201         if( oIter != m_oMapURLToContent.end() )
202             return oIter->second;
203     }
204 
205     const CPLString osContent(FetchRawContent(osURL, pszHeaders));
206     // Cache to disk if possible
207     if( bDiskCacheAvailable && bCacheResults && !osContent.empty() )
208     {
209         CPLString osCachedFileName(GetCachedFilename(osURL));
210         CPLString osTmpfilename( osCachedFileName + ".tmp" );
211         VSILFILE* fpTemp = VSIFOpenL( osTmpfilename, "wb" );
212         if( fpTemp != nullptr )
213         {
214             const bool bSuccess = VSIFWriteL( osContent.data(),
215                                               osContent.size(), 1,
216                                               fpTemp ) == 1;
217             VSIFCloseL(fpTemp);
218             if( bSuccess )
219                 VSIRename( osTmpfilename, osCachedFileName );
220         }
221     }
222     // Otherwise to RAM
223     else if( !osContent.empty() && osContent.size() < m_nMaxRAMCacheSize )
224     {
225         // If cache is going to be saturated, evict larger objects first
226         while( osContent.size() + m_nCurrentRAMCacheSize > m_nMaxRAMCacheSize )
227         {
228             std::map<size_t, std::vector<CPLString> >::reverse_iterator oIter =
229                 m_oMapFileSizeToURLs.rbegin();
230             const size_t nSizeToEvict = oIter->first;
231             m_nCurrentRAMCacheSize -= nSizeToEvict;
232             const CPLString osURLToEvict(oIter->second.front());
233             m_oMapURLToContent.erase(osURLToEvict);
234             oIter->second.erase(oIter->second.begin());
235             if( oIter->second.empty() )
236                 m_oMapFileSizeToURLs.erase( nSizeToEvict );
237         }
238         m_oMapURLToContent[osURL] = osContent;
239         m_oMapFileSizeToURLs[osContent.size()].push_back(osURL);
240         m_nCurrentRAMCacheSize += osContent.size();
241     }
242     return osContent;
243 }
244 
245 /************************************************************************/
246 /*                     IsRawContentResolutionEnabled()                  */
247 /************************************************************************/
248 
IsRawContentResolutionEnabled() const249 bool GMLASXLinkResolver::IsRawContentResolutionEnabled() const
250 {
251     return m_oConf.m_bDefaultResolutionEnabled &&
252            m_oConf.m_eDefaultResolutionMode ==
253                                         GMLASXLinkResolutionConf::RawContent;
254 }
255 
256 /************************************************************************/
257 /*                      GetMatchingResolutionRule()                      */
258 /************************************************************************/
259 
GetMatchingResolutionRule(const CPLString & osURL) const260 int GMLASXLinkResolver::GetMatchingResolutionRule(const CPLString& osURL) const
261 {
262     for(size_t i = 0; i < m_oConf.m_aoURLSpecificRules.size(); ++i )
263     {
264         if( osURL.compare(0,
265                           m_oConf.m_aoURLSpecificRules[i].m_osURLPrefix.size(),
266                           m_oConf.m_aoURLSpecificRules[i].m_osURLPrefix) == 0 )
267         {
268             return static_cast<int>(i);
269         }
270     }
271 
272     // No match
273     return -1;
274 }
275 
276 /************************************************************************/
277 /*                           GetRawContent()                            */
278 /************************************************************************/
279 
GetRawContent(const CPLString & osURL)280 CPLString GMLASXLinkResolver::GetRawContent(const CPLString& osURL)
281 {
282     return GetRawContent(osURL,
283                          nullptr,
284                          m_oConf.m_bDefaultAllowRemoteDownload,
285                          m_oConf.m_bDefaultCacheResults);
286 }
287 
288 /************************************************************************/
289 /*                         GetRawContentForRule()                       */
290 /************************************************************************/
291 
GetRawContentForRule(const CPLString & osURL,int nIdxRule)292 CPLString GMLASXLinkResolver::GetRawContentForRule(const CPLString& osURL,
293                                                    int nIdxRule)
294 {
295     const GMLASXLinkResolutionConf::URLSpecificResolution& oRule(
296                                     m_oConf.m_aoURLSpecificRules[nIdxRule] );
297 
298     CPLString osHeaders;
299     for( size_t i=0; i< oRule.m_aosNameValueHTTPHeaders.size(); ++i )
300     {
301         if( !osHeaders.empty() )
302             osHeaders += "\r\n";
303         osHeaders += oRule.m_aosNameValueHTTPHeaders[i].first;
304         osHeaders += ": ";
305         osHeaders += oRule.m_aosNameValueHTTPHeaders[i].second;
306     }
307     return GetRawContent(osURL,
308                          osHeaders.empty() ? nullptr : osHeaders.c_str(),
309                          oRule.m_bAllowRemoteDownload,
310                          oRule.m_bCacheResults);
311 }
312