1 /**
2  * Orthanc - A Lightweight, RESTful DICOM Store
3  * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
4  * Department, University Hospital of Liege, Belgium
5  * Copyright (C) 2017-2021 Osimis S.A., Belgium
6  *
7  * This program is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public License
9  * as published by the Free Software Foundation, either version 3 of
10  * the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this program. If not, see
19  * <http://www.gnu.org/licenses/>.
20  **/
21 
22 
23 #include "../PrecompiledHeaders.h"
24 #include "MultipartStreamReader.h"
25 
26 #include "../Logging.h"
27 #include "../OrthancException.h"
28 #include "../Toolbox.h"
29 
30 #include <boost/algorithm/string/predicate.hpp>
31 #include <boost/lexical_cast.hpp>
32 
33 #if defined(_MSC_VER)
34 #  include <BaseTsd.h>   // Definition of ssize_t
35 #endif
36 
37 namespace Orthanc
38 {
ParseHeaders(MultipartStreamReader::HttpHeaders & headers,const char * start,const char * end)39   static void ParseHeaders(MultipartStreamReader::HttpHeaders& headers,
40                            const char* start,
41                            const char* end /* exclusive */)
42   {
43     assert(start <= end);
44     std::string tmp(start, end - start);
45 
46     std::vector<std::string> lines;
47     Toolbox::TokenizeString(lines, tmp, '\n');
48 
49     headers.clear();
50 
51     for (size_t i = 0; i < lines.size(); i++)
52     {
53       size_t separator = lines[i].find(':');
54       if (separator != std::string::npos)
55       {
56         std::string key = Toolbox::StripSpaces(lines[i].substr(0, separator));
57         std::string value = Toolbox::StripSpaces(lines[i].substr(separator + 1));
58 
59         Toolbox::ToLowerCase(key);
60         headers[key] = value;
61       }
62     }
63   }
64 
65 
LookupHeaderSizeValue(size_t & target,const MultipartStreamReader::HttpHeaders & headers,const std::string & key)66   static bool LookupHeaderSizeValue(size_t& target,
67                                     const MultipartStreamReader::HttpHeaders& headers,
68                                     const std::string& key)
69   {
70     MultipartStreamReader::HttpHeaders::const_iterator it = headers.find(key);
71     if (it == headers.end())
72     {
73       return false;
74     }
75     else
76     {
77       int64_t value;
78 
79       try
80       {
81         value = boost::lexical_cast<int64_t>(it->second);
82       }
83       catch (boost::bad_lexical_cast&)
84       {
85         throw OrthancException(ErrorCode_ParameterOutOfRange);
86       }
87 
88       if (value < 0)
89       {
90         throw OrthancException(ErrorCode_ParameterOutOfRange);
91       }
92       else
93       {
94         target = static_cast<size_t>(value);
95         return true;
96       }
97     }
98   }
99 
100 
ParseBlock(const void * data,size_t size)101   void MultipartStreamReader::ParseBlock(const void* data,
102                                          size_t size)
103   {
104     if (handler_ == NULL ||
105         state_ == State_Done ||
106         size == 0)
107     {
108       return;
109     }
110     else
111     {
112       const char* current = reinterpret_cast<const char*>(data);
113       const char* corpusEnd = current + size;
114 
115       if (state_ == State_UnusedArea)
116       {
117         /**
118          * "Before the first boundary is an area that is ignored by
119          * MIME-compliant clients. This area is generally used to put
120          * a message to users of old non-MIME clients."
121          * https://en.wikipedia.org/wiki/MIME#Multipart_messages
122          **/
123 
124         if (boundaryMatcher_.Apply(current, corpusEnd))
125         {
126           current = boundaryMatcher_.GetMatchBegin();
127           state_ = State_Content;
128         }
129         else
130         {
131           // We have not seen the end of the unused area yet
132           assert(current <= corpusEnd);
133           buffer_.AddChunk(current, corpusEnd - current);
134           return;
135         }
136       }
137 
138       for (;;)
139       {
140         assert(current <= corpusEnd);
141 
142         size_t patternSize = boundaryMatcher_.GetPattern().size();
143         size_t remainingSize = corpusEnd - current;
144         if (remainingSize < patternSize + 2)
145         {
146           break;  // Not enough data available
147         }
148 
149         std::string boundary(current, current + patternSize + 2);
150         if (boundary == boundaryMatcher_.GetPattern() + "--")
151         {
152           state_ = State_Done;
153           return;
154         }
155 
156         if (boundary != boundaryMatcher_.GetPattern() + "\r\n")
157         {
158           throw OrthancException(ErrorCode_NetworkProtocol,
159                                  "Garbage between two items in a multipart stream");
160         }
161 
162         const char* start = current + patternSize + 2;
163 
164         if (!headersMatcher_.Apply(start, corpusEnd))
165         {
166           break;  // Not enough data available
167         }
168 
169         HttpHeaders headers;
170         ParseHeaders(headers, start, headersMatcher_.GetMatchBegin());
171 
172         size_t contentLength = 0;
173         if (!LookupHeaderSizeValue(contentLength, headers, "content-length"))
174         {
175           if (boundaryMatcher_.Apply(headersMatcher_.GetMatchEnd(), corpusEnd))
176           {
177             assert(headersMatcher_.GetMatchEnd() <= boundaryMatcher_.GetMatchBegin());
178             size_t d = boundaryMatcher_.GetMatchBegin() - headersMatcher_.GetMatchEnd();
179             if (d <= 1)
180             {
181               throw OrthancException(ErrorCode_NetworkProtocol);
182             }
183             else
184             {
185               contentLength = d - 2;
186             }
187           }
188           else
189           {
190             break;  // Not enough data available to have a full part
191           }
192         }
193 
194         // "static_cast<>" to avoid warning about signed vs. unsigned comparison
195         assert(headersMatcher_.GetMatchEnd() <= corpusEnd);
196         if (contentLength + 2 > static_cast<size_t>(corpusEnd - headersMatcher_.GetMatchEnd()))
197         {
198           break;  // Not enough data available to have a full part
199         }
200 
201         const char* p = headersMatcher_.GetMatchEnd() + contentLength;
202         if (p[0] != '\r' ||
203             p[1] != '\n')
204         {
205           throw OrthancException(ErrorCode_NetworkProtocol,
206                                  "No endline at the end of a part");
207         }
208 
209         handler_->HandlePart(headers, headersMatcher_.GetMatchEnd(), contentLength);
210         current = headersMatcher_.GetMatchEnd() + contentLength + 2;
211       }
212 
213       if (current != corpusEnd)
214       {
215         assert(current < corpusEnd);
216         buffer_.AddChunk(current, corpusEnd - current);
217       }
218     }
219   }
220 
221 
ParseStream()222   void MultipartStreamReader::ParseStream()
223   {
224     if (handler_ == NULL ||
225         state_ == State_Done)
226     {
227       return;
228     }
229     else
230     {
231       std::string corpus;
232       buffer_.Flatten(corpus);
233 
234       if (!corpus.empty())
235       {
236         ParseBlock(corpus.c_str(), corpus.size());
237       }
238     }
239   }
240 
241 
MultipartStreamReader(const std::string & boundary)242   MultipartStreamReader::MultipartStreamReader(const std::string& boundary) :
243     state_(State_UnusedArea),
244     handler_(NULL),
245     headersMatcher_("\r\n\r\n"),
246     boundaryMatcher_("--" + boundary),
247     blockSize_(10 * 1024 * 1024)
248   {
249   }
250 
251 
SetBlockSize(size_t size)252   void MultipartStreamReader::SetBlockSize(size_t size)
253   {
254     if (size == 0)
255     {
256       throw OrthancException(ErrorCode_ParameterOutOfRange);
257     }
258     else
259     {
260       blockSize_ = size;
261     }
262   }
263 
GetBlockSize() const264   size_t MultipartStreamReader::GetBlockSize() const
265   {
266     return blockSize_;
267   }
268 
SetHandler(MultipartStreamReader::IHandler & handler)269   void MultipartStreamReader::SetHandler(MultipartStreamReader::IHandler &handler)
270   {
271     handler_ = &handler;
272   }
273 
274 
AddChunk(const void * chunk,size_t size)275   void MultipartStreamReader::AddChunk(const void* chunk,
276                                        size_t size)
277   {
278     if (state_ != State_Done &&
279         size != 0)
280     {
281       size_t oldSize = buffer_.GetNumBytes();
282       if (oldSize == 0)
283       {
284         /**
285          * Optimization in Orthanc 1.9.3: Directly parse the input
286          * buffer instead of going through the ChunkedBuffer if the
287          * latter is still empty. This notably avoids one memcpy() in
288          * STOW-RS server if chunked transfers is disabled.
289          **/
290         ParseBlock(chunk, size);
291       }
292       else
293       {
294         buffer_.AddChunk(chunk, size);
295 
296         if (oldSize / blockSize_ != buffer_.GetNumBytes() / blockSize_)
297         {
298           ParseStream();
299         }
300       }
301     }
302   }
303 
304 
AddChunk(const std::string & chunk)305   void MultipartStreamReader::AddChunk(const std::string& chunk)
306   {
307     if (!chunk.empty())
308     {
309       AddChunk(chunk.c_str(), chunk.size());
310     }
311   }
312 
313 
CloseStream()314   void MultipartStreamReader::CloseStream()
315   {
316     if (buffer_.GetNumBytes() != 0)
317     {
318       ParseStream();
319     }
320   }
321 
322 
GetMainContentType(std::string & contentType,const HttpHeaders & headers)323   bool MultipartStreamReader::GetMainContentType(std::string& contentType,
324                                                  const HttpHeaders& headers)
325   {
326     HttpHeaders::const_iterator it = headers.find("content-type");
327 
328     if (it == headers.end())
329     {
330       return false;
331     }
332     else
333     {
334       contentType = it->second;
335       return true;
336     }
337   }
338 
339 
RemoveSurroundingQuotes(std::string & value)340   static void RemoveSurroundingQuotes(std::string& value)
341   {
342     if (value.size() >= 2 &&
343         value[0] == '"' &&
344         value[value.size() - 1] == '"')
345     {
346       value = value.substr(1, value.size() - 2);
347     }
348   }
349 
350 
ParseMultipartContentType(std::string & contentType,std::string & subType,std::string & boundary,const std::string & contentTypeHeader)351   bool MultipartStreamReader::ParseMultipartContentType(std::string& contentType,
352                                                         std::string& subType,
353                                                         std::string& boundary,
354                                                         const std::string& contentTypeHeader)
355   {
356     std::vector<std::string> tokens;
357     Toolbox::TokenizeString(tokens, contentTypeHeader, ';');
358 
359     if (tokens.empty())
360     {
361       return false;
362     }
363 
364     contentType = Toolbox::StripSpaces(tokens[0]);
365     Toolbox::ToLowerCase(contentType);
366 
367     if (contentType.empty())
368     {
369       return false;
370     }
371 
372     bool valid = false;
373     subType.clear();
374 
375     for (size_t i = 1; i < tokens.size(); i++)
376     {
377       std::vector<std::string> items;
378       Toolbox::TokenizeString(items, tokens[i], '=');
379 
380       if (items.size() == 2)
381       {
382         if (boost::iequals("boundary", Toolbox::StripSpaces(items[0])))
383         {
384           boundary = Toolbox::StripSpaces(items[1]);
385 
386           // https://bugs.orthanc-server.com/show_bug.cgi?id=190
387           RemoveSurroundingQuotes(boundary);
388 
389           valid = !boundary.empty();
390         }
391         else if (boost::iequals("type", Toolbox::StripSpaces(items[0])))
392         {
393           subType = Toolbox::StripSpaces(items[1]);
394           Toolbox::ToLowerCase(subType);
395 
396           // https://bugs.orthanc-server.com/show_bug.cgi?id=54
397           // https://tools.ietf.org/html/rfc7231#section-3.1.1.1
398           RemoveSurroundingQuotes(subType);
399         }
400       }
401     }
402 
403     return valid;
404   }
405 
406 
ParseHeaderArguments(std::string & main,std::map<std::string,std::string> & arguments,const std::string & header)407   bool MultipartStreamReader::ParseHeaderArguments(std::string& main,
408                                                    std::map<std::string, std::string>& arguments,
409                                                    const std::string& header)
410   {
411     std::vector<std::string> tokens;
412     Toolbox::TokenizeString(tokens, header, ';');
413 
414     if (tokens.empty())
415     {
416       return false;
417     }
418 
419     main = Toolbox::StripSpaces(tokens[0]);
420     Toolbox::ToLowerCase(main);
421     if (main.empty())
422     {
423       return false;
424     }
425 
426     arguments.clear();
427 
428     for (size_t i = 1; i < tokens.size(); i++)
429     {
430       std::vector<std::string> items;
431       Toolbox::TokenizeString(items, tokens[i], '=');
432 
433       if (items.size() > 2)
434       {
435         return false;
436       }
437       else if (!items.empty())
438       {
439         std::string key = Toolbox::StripSpaces(items[0]);
440         Toolbox::ToLowerCase(key);
441 
442         if (arguments.find(key) != arguments.end())
443         {
444           LOG(ERROR) << "The same argument was provided twice in an HTTP header: \""
445                      << key << "\" in \"" << header << "\"";
446           return false;
447         }
448         else if (!key.empty())
449         {
450           if (items.size() == 1)
451           {
452             arguments[key] = "";
453           }
454           else
455           {
456             assert(items.size() == 2);
457             std::string value = Toolbox::StripSpaces(items[1]);
458             RemoveSurroundingQuotes(value);
459             arguments[key] = value;
460           }
461         }
462       }
463     }
464 
465     return true;
466   }
467 }
468