1 /** 2 * Orthanc - A Lightweight, RESTful DICOM Store 3 * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics 4 * Department, University Hospital of Liege, Belgium 5 * Copyright (C) 2017-2021 Osimis S.A., Belgium 6 * 7 * This program is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public License 9 * as published by the Free Software Foundation, either version 3 of 10 * the License, or (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with this program. If not, see 19 * <http://www.gnu.org/licenses/>. 20 **/ 21 22 23 #include "../PrecompiledHeaders.h" 24 #include "MultipartStreamReader.h" 25 26 #include "../Logging.h" 27 #include "../OrthancException.h" 28 #include "../Toolbox.h" 29 30 #include <boost/algorithm/string/predicate.hpp> 31 #include <boost/lexical_cast.hpp> 32 33 #if defined(_MSC_VER) 34 # include <BaseTsd.h> // Definition of ssize_t 35 #endif 36 37 namespace Orthanc 38 { ParseHeaders(MultipartStreamReader::HttpHeaders & headers,const char * start,const char * end)39 static void ParseHeaders(MultipartStreamReader::HttpHeaders& headers, 40 const char* start, 41 const char* end /* exclusive */) 42 { 43 assert(start <= end); 44 std::string tmp(start, end - start); 45 46 std::vector<std::string> lines; 47 Toolbox::TokenizeString(lines, tmp, '\n'); 48 49 headers.clear(); 50 51 for (size_t i = 0; i < lines.size(); i++) 52 { 53 size_t separator = lines[i].find(':'); 54 if (separator != std::string::npos) 55 { 56 std::string key = Toolbox::StripSpaces(lines[i].substr(0, separator)); 57 std::string value = Toolbox::StripSpaces(lines[i].substr(separator + 1)); 58 59 Toolbox::ToLowerCase(key); 60 headers[key] = value; 61 } 62 } 63 } 64 65 LookupHeaderSizeValue(size_t & target,const MultipartStreamReader::HttpHeaders & headers,const std::string & key)66 static bool LookupHeaderSizeValue(size_t& target, 67 const MultipartStreamReader::HttpHeaders& headers, 68 const std::string& key) 69 { 70 MultipartStreamReader::HttpHeaders::const_iterator it = headers.find(key); 71 if (it == headers.end()) 72 { 73 return false; 74 } 75 else 76 { 77 int64_t value; 78 79 try 80 { 81 value = boost::lexical_cast<int64_t>(it->second); 82 } 83 catch (boost::bad_lexical_cast&) 84 { 85 throw OrthancException(ErrorCode_ParameterOutOfRange); 86 } 87 88 if (value < 0) 89 { 90 throw OrthancException(ErrorCode_ParameterOutOfRange); 91 } 92 else 93 { 94 target = static_cast<size_t>(value); 95 return true; 96 } 97 } 98 } 99 100 ParseBlock(const void * data,size_t size)101 void MultipartStreamReader::ParseBlock(const void* data, 102 size_t size) 103 { 104 if (handler_ == NULL || 105 state_ == State_Done || 106 size == 0) 107 { 108 return; 109 } 110 else 111 { 112 const char* current = reinterpret_cast<const char*>(data); 113 const char* corpusEnd = current + size; 114 115 if (state_ == State_UnusedArea) 116 { 117 /** 118 * "Before the first boundary is an area that is ignored by 119 * MIME-compliant clients. This area is generally used to put 120 * a message to users of old non-MIME clients." 121 * https://en.wikipedia.org/wiki/MIME#Multipart_messages 122 **/ 123 124 if (boundaryMatcher_.Apply(current, corpusEnd)) 125 { 126 current = boundaryMatcher_.GetMatchBegin(); 127 state_ = State_Content; 128 } 129 else 130 { 131 // We have not seen the end of the unused area yet 132 assert(current <= corpusEnd); 133 buffer_.AddChunk(current, corpusEnd - current); 134 return; 135 } 136 } 137 138 for (;;) 139 { 140 assert(current <= corpusEnd); 141 142 size_t patternSize = boundaryMatcher_.GetPattern().size(); 143 size_t remainingSize = corpusEnd - current; 144 if (remainingSize < patternSize + 2) 145 { 146 break; // Not enough data available 147 } 148 149 std::string boundary(current, current + patternSize + 2); 150 if (boundary == boundaryMatcher_.GetPattern() + "--") 151 { 152 state_ = State_Done; 153 return; 154 } 155 156 if (boundary != boundaryMatcher_.GetPattern() + "\r\n") 157 { 158 throw OrthancException(ErrorCode_NetworkProtocol, 159 "Garbage between two items in a multipart stream"); 160 } 161 162 const char* start = current + patternSize + 2; 163 164 if (!headersMatcher_.Apply(start, corpusEnd)) 165 { 166 break; // Not enough data available 167 } 168 169 HttpHeaders headers; 170 ParseHeaders(headers, start, headersMatcher_.GetMatchBegin()); 171 172 size_t contentLength = 0; 173 if (!LookupHeaderSizeValue(contentLength, headers, "content-length")) 174 { 175 if (boundaryMatcher_.Apply(headersMatcher_.GetMatchEnd(), corpusEnd)) 176 { 177 assert(headersMatcher_.GetMatchEnd() <= boundaryMatcher_.GetMatchBegin()); 178 size_t d = boundaryMatcher_.GetMatchBegin() - headersMatcher_.GetMatchEnd(); 179 if (d <= 1) 180 { 181 throw OrthancException(ErrorCode_NetworkProtocol); 182 } 183 else 184 { 185 contentLength = d - 2; 186 } 187 } 188 else 189 { 190 break; // Not enough data available to have a full part 191 } 192 } 193 194 // "static_cast<>" to avoid warning about signed vs. unsigned comparison 195 assert(headersMatcher_.GetMatchEnd() <= corpusEnd); 196 if (contentLength + 2 > static_cast<size_t>(corpusEnd - headersMatcher_.GetMatchEnd())) 197 { 198 break; // Not enough data available to have a full part 199 } 200 201 const char* p = headersMatcher_.GetMatchEnd() + contentLength; 202 if (p[0] != '\r' || 203 p[1] != '\n') 204 { 205 throw OrthancException(ErrorCode_NetworkProtocol, 206 "No endline at the end of a part"); 207 } 208 209 handler_->HandlePart(headers, headersMatcher_.GetMatchEnd(), contentLength); 210 current = headersMatcher_.GetMatchEnd() + contentLength + 2; 211 } 212 213 if (current != corpusEnd) 214 { 215 assert(current < corpusEnd); 216 buffer_.AddChunk(current, corpusEnd - current); 217 } 218 } 219 } 220 221 ParseStream()222 void MultipartStreamReader::ParseStream() 223 { 224 if (handler_ == NULL || 225 state_ == State_Done) 226 { 227 return; 228 } 229 else 230 { 231 std::string corpus; 232 buffer_.Flatten(corpus); 233 234 if (!corpus.empty()) 235 { 236 ParseBlock(corpus.c_str(), corpus.size()); 237 } 238 } 239 } 240 241 MultipartStreamReader(const std::string & boundary)242 MultipartStreamReader::MultipartStreamReader(const std::string& boundary) : 243 state_(State_UnusedArea), 244 handler_(NULL), 245 headersMatcher_("\r\n\r\n"), 246 boundaryMatcher_("--" + boundary), 247 blockSize_(10 * 1024 * 1024) 248 { 249 } 250 251 SetBlockSize(size_t size)252 void MultipartStreamReader::SetBlockSize(size_t size) 253 { 254 if (size == 0) 255 { 256 throw OrthancException(ErrorCode_ParameterOutOfRange); 257 } 258 else 259 { 260 blockSize_ = size; 261 } 262 } 263 GetBlockSize() const264 size_t MultipartStreamReader::GetBlockSize() const 265 { 266 return blockSize_; 267 } 268 SetHandler(MultipartStreamReader::IHandler & handler)269 void MultipartStreamReader::SetHandler(MultipartStreamReader::IHandler &handler) 270 { 271 handler_ = &handler; 272 } 273 274 AddChunk(const void * chunk,size_t size)275 void MultipartStreamReader::AddChunk(const void* chunk, 276 size_t size) 277 { 278 if (state_ != State_Done && 279 size != 0) 280 { 281 size_t oldSize = buffer_.GetNumBytes(); 282 if (oldSize == 0) 283 { 284 /** 285 * Optimization in Orthanc 1.9.3: Directly parse the input 286 * buffer instead of going through the ChunkedBuffer if the 287 * latter is still empty. This notably avoids one memcpy() in 288 * STOW-RS server if chunked transfers is disabled. 289 **/ 290 ParseBlock(chunk, size); 291 } 292 else 293 { 294 buffer_.AddChunk(chunk, size); 295 296 if (oldSize / blockSize_ != buffer_.GetNumBytes() / blockSize_) 297 { 298 ParseStream(); 299 } 300 } 301 } 302 } 303 304 AddChunk(const std::string & chunk)305 void MultipartStreamReader::AddChunk(const std::string& chunk) 306 { 307 if (!chunk.empty()) 308 { 309 AddChunk(chunk.c_str(), chunk.size()); 310 } 311 } 312 313 CloseStream()314 void MultipartStreamReader::CloseStream() 315 { 316 if (buffer_.GetNumBytes() != 0) 317 { 318 ParseStream(); 319 } 320 } 321 322 GetMainContentType(std::string & contentType,const HttpHeaders & headers)323 bool MultipartStreamReader::GetMainContentType(std::string& contentType, 324 const HttpHeaders& headers) 325 { 326 HttpHeaders::const_iterator it = headers.find("content-type"); 327 328 if (it == headers.end()) 329 { 330 return false; 331 } 332 else 333 { 334 contentType = it->second; 335 return true; 336 } 337 } 338 339 RemoveSurroundingQuotes(std::string & value)340 static void RemoveSurroundingQuotes(std::string& value) 341 { 342 if (value.size() >= 2 && 343 value[0] == '"' && 344 value[value.size() - 1] == '"') 345 { 346 value = value.substr(1, value.size() - 2); 347 } 348 } 349 350 ParseMultipartContentType(std::string & contentType,std::string & subType,std::string & boundary,const std::string & contentTypeHeader)351 bool MultipartStreamReader::ParseMultipartContentType(std::string& contentType, 352 std::string& subType, 353 std::string& boundary, 354 const std::string& contentTypeHeader) 355 { 356 std::vector<std::string> tokens; 357 Toolbox::TokenizeString(tokens, contentTypeHeader, ';'); 358 359 if (tokens.empty()) 360 { 361 return false; 362 } 363 364 contentType = Toolbox::StripSpaces(tokens[0]); 365 Toolbox::ToLowerCase(contentType); 366 367 if (contentType.empty()) 368 { 369 return false; 370 } 371 372 bool valid = false; 373 subType.clear(); 374 375 for (size_t i = 1; i < tokens.size(); i++) 376 { 377 std::vector<std::string> items; 378 Toolbox::TokenizeString(items, tokens[i], '='); 379 380 if (items.size() == 2) 381 { 382 if (boost::iequals("boundary", Toolbox::StripSpaces(items[0]))) 383 { 384 boundary = Toolbox::StripSpaces(items[1]); 385 386 // https://bugs.orthanc-server.com/show_bug.cgi?id=190 387 RemoveSurroundingQuotes(boundary); 388 389 valid = !boundary.empty(); 390 } 391 else if (boost::iequals("type", Toolbox::StripSpaces(items[0]))) 392 { 393 subType = Toolbox::StripSpaces(items[1]); 394 Toolbox::ToLowerCase(subType); 395 396 // https://bugs.orthanc-server.com/show_bug.cgi?id=54 397 // https://tools.ietf.org/html/rfc7231#section-3.1.1.1 398 RemoveSurroundingQuotes(subType); 399 } 400 } 401 } 402 403 return valid; 404 } 405 406 ParseHeaderArguments(std::string & main,std::map<std::string,std::string> & arguments,const std::string & header)407 bool MultipartStreamReader::ParseHeaderArguments(std::string& main, 408 std::map<std::string, std::string>& arguments, 409 const std::string& header) 410 { 411 std::vector<std::string> tokens; 412 Toolbox::TokenizeString(tokens, header, ';'); 413 414 if (tokens.empty()) 415 { 416 return false; 417 } 418 419 main = Toolbox::StripSpaces(tokens[0]); 420 Toolbox::ToLowerCase(main); 421 if (main.empty()) 422 { 423 return false; 424 } 425 426 arguments.clear(); 427 428 for (size_t i = 1; i < tokens.size(); i++) 429 { 430 std::vector<std::string> items; 431 Toolbox::TokenizeString(items, tokens[i], '='); 432 433 if (items.size() > 2) 434 { 435 return false; 436 } 437 else if (!items.empty()) 438 { 439 std::string key = Toolbox::StripSpaces(items[0]); 440 Toolbox::ToLowerCase(key); 441 442 if (arguments.find(key) != arguments.end()) 443 { 444 LOG(ERROR) << "The same argument was provided twice in an HTTP header: \"" 445 << key << "\" in \"" << header << "\""; 446 return false; 447 } 448 else if (!key.empty()) 449 { 450 if (items.size() == 1) 451 { 452 arguments[key] = ""; 453 } 454 else 455 { 456 assert(items.size() == 2); 457 std::string value = Toolbox::StripSpaces(items[1]); 458 RemoveSurroundingQuotes(value); 459 arguments[key] = value; 460 } 461 } 462 } 463 } 464 465 return true; 466 } 467 } 468