1 /**
2  * Licensed to the University Corporation for Advanced Internet
3  * Development, Inc. (UCAID) under one or more contributor license
4  * agreements. See the NOTICE file distributed with this work for
5  * additional information regarding copyright ownership.
6  *
7  * UCAID licenses this file to you under the Apache License,
8  * Version 2.0 (the "License"); you may not use this file except
9  * in compliance with the License. You may obtain a copy of the
10  * License at
11  *
12  * http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing,
15  * software distributed under the License is distributed on an
16  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
17  * either express or implied. See the License for the specific
18  * language governing permissions and limitations under the License.
19  */
20 
21 /**
22  * @file xmltooling/util/CurlURLInputStream.h
23  *
24  * Asynchronous use of curl to fetch data from a URL.
25  */
26 
27 #if !defined(__xmltooling_curlinstr_h__) && !defined(XMLTOOLING_LITE)
28 #define __xmltooling_curlinstr_h__
29 
30 #include <xmltooling/logging.h>
31 
32 #include <string>
33 #include <vector>
34 #include <curl/curl.h>
35 #include <xercesc/util/BinInputStream.hpp>
36 
37 namespace xmltooling {
38 
39     /**
40      * Adapted from Xerces-C as a more advanced input stream implementation
41      * for subsequent use in parsing remote documents.
42      */
43     class XMLTOOL_API CurlURLInputStream : public xercesc::BinInputStream
44     {
45     public :
46         /**
47          * Constructor.
48          *
49          * @param url       the URL of the resource to fetch
50          * @param cacheTag  optional pointer to string used for cache management
51          */
52         CurlURLInputStream(const char* url, std::string* cacheTag=nullptr);
53 
54         /**
55          * Constructor.
56          *
57          * @param url       the URL of the resource to fetch
58          * @param cacheTag  optional pointer to string used for cache management
59          */
60         CurlURLInputStream(const XMLCh* url, std::string* cacheTag=nullptr);
61 
62         /**
63          * Constructor taking a DOM element supporting the following content:
64          *
65          * <dl>
66          *  <dt>uri | url</dt>
67          *  <dd>identifies the remote resource</dd>
68          *  <dt>verifyHost</dt>
69          *  <dd>true iff name of host should be matched against TLS/SSL certificate</dd>
70          *  <dt>TransportOption elements, like so:</dt>
71          *  <dd>&lt;TransportOption provider="CURL" option="150"&gt;0&lt;/TransportOption&gt;</dd>
72          * </dl>
73          *
74          * @param e         DOM to supply configuration
75          * @param cacheTag  optional pointer to string used for cache management
76          */
77         CurlURLInputStream(const xercesc::DOMElement* e, std::string* cacheTag=nullptr);
78 
79         ~CurlURLInputStream();
80 
curPos()81         XMLFilePos curPos() const {
82             return fTotalBytesRead;
83         }
84 
getContentType()85         const XMLCh* getContentType() const {
86             return fContentType;
87         }
88 
89         XMLSize_t readBytes(XMLByte* const toFill, const XMLSize_t maxToRead);
90 
91         /**
92          * Access the OpenSSL context options in place for this object.
93          *
94          * @return bitmask suitable for use with SSL_CTX_set_options
95          */
getOpenSSLOps()96         int getOpenSSLOps() const {
97             return fOpenSSLOps;
98         }
99 
100     private :
101         CurlURLInputStream(const CurlURLInputStream&);
102         CurlURLInputStream& operator=(const CurlURLInputStream&);
103 
104         // libcurl callbacks for data read/write
105         static size_t staticWriteCallback(char *buffer, size_t size, size_t nitems, void *outstream);
106         size_t writeCallback(char *buffer, size_t size, size_t nitems);
107 
108         void init(const xercesc::DOMElement* e=nullptr);
109         bool readMore(int *runningHandles);
110 
111         logging::Category&  fLog;
112         std::string*        fCacheTag;
113         std::string         fURL;
114         std::vector<std::string>    fSavedOptions;
115         int                 fOpenSSLOps;
116 
117         CURLM*              fMulti;
118         CURL*               fEasy;
119         struct curl_slist*  fHeaders;
120 
121         unsigned long       fTotalBytesRead;
122         XMLByte*            fWritePtr;
123         XMLSize_t           fBytesRead;
124         XMLSize_t           fBytesToRead;
125         bool                fDataAvailable;
126 
127         // Overflow buffer for when curl writes more data to us
128         // than we've asked for.
129         XMLByte*            fBuffer;
130         XMLByte*            fBufferHeadPtr;
131         XMLByte*            fBufferTailPtr;
132         size_t              fBufferSize;
133 
134         XMLCh*              fContentType;
135         long                fStatusCode;
136 
137         char                fError[CURL_ERROR_SIZE];
138     };
139 };
140 
141 #endif // __xmltooling_curlinstr_h__
142