1 /* 2 * HTTP.actor.cpp 3 * 4 * This source file is part of the FoundationDB open source project 5 * 6 * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 #include "fdbclient/HTTP.h" 22 #include "fdbclient/md5/md5.h" 23 #include "fdbclient/libb64/encode.h" 24 #include <cctype> 25 #include "flow/actorcompiler.h" // has to be last include 26 27 namespace HTTP { 28 urlEncode(const std::string & s)29 std::string urlEncode(const std::string &s) { 30 std::string o; 31 o.reserve(s.size() * 3); 32 char buf[4]; 33 for(auto c : s) 34 if(std::isalnum(c) || c == '?' || c == '/' || c == '-' || c == '_' || c == '.' || c == ',' || c == ':') 35 o.append(&c, 1); 36 else { 37 sprintf(buf, "%%%.02X", c); 38 o.append(buf); 39 } 40 return o; 41 } 42 verifyMD5(bool fail_if_header_missing,Optional<std::string> content_sum)43 bool Response::verifyMD5(bool fail_if_header_missing, Optional<std::string> content_sum) { 44 auto i = headers.find("Content-MD5"); 45 if(i != headers.end()) { 46 // If a content sum is not provided, calculate one from the response content 47 if(!content_sum.present()) { 48 MD5_CTX sum; 49 ::MD5_Init(&sum); 50 ::MD5_Update(&sum, content.data(), content.size()); 51 std::string sumBytes; 52 sumBytes.resize(16); 53 ::MD5_Final((unsigned char *)sumBytes.data(), &sum); 54 std::string sumStr = base64::encoder::from_string(sumBytes); 55 sumStr.resize(sumStr.size() - 1); 56 content_sum = sumStr; 57 } 58 return i->second == content_sum.get(); 59 } 60 return !fail_if_header_missing; 61 } 62 toString()63 std::string Response::toString() { 64 std::string r = format("Response Code: %d\n", code); 65 r += format("Response ContentLen: %lld\n", contentLen); 66 for(auto h : headers) 67 r += format("Reponse Header: %s: %s\n", h.first.c_str(), h.second.c_str()); 68 r.append("-- RESPONSE CONTENT--\n"); 69 r.append(content); 70 r.append("\n--------\n"); 71 return r; 72 } 73 writeRequestHeader(std::string const & verb,std::string const & resource,HTTP::Headers const & headers,PacketBuffer * dest)74 PacketBuffer * writeRequestHeader(std::string const &verb, std::string const &resource, HTTP::Headers const &headers, PacketBuffer *dest) { 75 PacketWriter writer(dest, NULL, Unversioned()); 76 writer.serializeBytes(verb); 77 writer.serializeBytes(" ", 1); 78 writer.serializeBytes(resource); 79 writer.serializeBytes(LiteralStringRef(" HTTP/1.1\r\n")); 80 for(auto h : headers) { 81 writer.serializeBytes(h.first); 82 writer.serializeBytes(LiteralStringRef(": ")); 83 writer.serializeBytes(h.second); 84 writer.serializeBytes(LiteralStringRef("\r\n")); 85 } 86 writer.serializeBytes(LiteralStringRef("\r\n")); 87 return writer.finish(); 88 } 89 90 // Read at least 1 bytes from conn and up to maxlen in a single read, append read data into *buf 91 // Returns the number of bytes read. read_into_string(Reference<IConnection> conn,std::string * buf,int maxlen)92 ACTOR Future<int> read_into_string(Reference<IConnection> conn, std::string *buf, int maxlen) { 93 loop { 94 // Wait for connection to have something to read 95 wait(conn->onReadable()); 96 wait( delay( 0, TaskReadSocket ) ); 97 98 // Read into buffer 99 int originalSize = buf->size(); 100 // TODO: resize is zero-initializing the space we're about to overwrite, so do something else, which probably means 101 // not using a string for this buffer 102 buf->resize(originalSize + maxlen); 103 uint8_t *wptr = (uint8_t *)buf->data() + originalSize; 104 int len = conn->read(wptr, wptr + maxlen); 105 buf->resize(originalSize + len); 106 107 // Make sure data was actually read, it's possible for there to be none. 108 if(len > 0) 109 return len; 110 } 111 } 112 113 // Returns the position of delim within buf, relative to pos. If delim is not found, continues to read from conn until 114 // either it is found or the connection ends, at which point connection_failed is thrown and buf contains 115 // everything that was read up to that point. read_delimited_into_string(Reference<IConnection> conn,const char * delim,std::string * buf,size_t pos)116 ACTOR Future<size_t> read_delimited_into_string(Reference<IConnection> conn, const char *delim, std::string *buf, size_t pos) { 117 state size_t sPos = pos; 118 state int lookBack = strlen(delim) - 1; 119 ASSERT(lookBack >= 0); 120 121 loop { 122 size_t endPos = buf->find(delim, sPos); 123 if(endPos != std::string::npos) 124 return endPos - pos; 125 // Next search will start at the current end of the buffer - delim size + 1 126 if(sPos >= lookBack) 127 sPos -= lookBack; 128 wait(success(read_into_string(conn, buf, CLIENT_KNOBS->HTTP_READ_SIZE))); 129 } 130 } 131 132 // Reads from conn (as needed) until there are at least len bytes starting at pos in buf read_fixed_into_string(Reference<IConnection> conn,int len,std::string * buf,size_t pos)133 ACTOR Future<Void> read_fixed_into_string(Reference<IConnection> conn, int len, std::string *buf, size_t pos) { 134 state int stop_size = pos + len; 135 while(buf->size() < stop_size) 136 wait(success(read_into_string(conn, buf, CLIENT_KNOBS->HTTP_READ_SIZE))); 137 return Void(); 138 } 139 read_http_response_headers(Reference<IConnection> conn,Headers * headers,std::string * buf,size_t * pos)140 ACTOR Future<Void> read_http_response_headers(Reference<IConnection> conn, Headers *headers, std::string *buf, size_t *pos) { 141 loop { 142 // Get a line, reading more data from conn if necessary 143 size_t lineLen = wait(read_delimited_into_string(conn, "\r\n", buf, *pos)); 144 145 // If line is empty we have reached the end of the headers. 146 if(lineLen == 0) { 147 // Increment pos to move past the empty line. 148 *pos += 2; 149 return Void(); 150 } 151 152 int nameEnd=-1, valueStart=-1, valueEnd=-1; 153 int len = -1; 154 155 // Read header of the form "Name: Value\n" 156 // Note that multi line header values are not supported here. 157 // Format string breaks down as follows: 158 // %*[^:]%n Some characters other than ':' which are discarded, save the end position 159 // :%*[ \t]%n A colon followed by 0 or more spaces or tabs only, save the end position 160 // %*[^\r]%n Some characters other than \r which are discarded, save the end position 161 // %*1[\r] Exactly one \r 162 // %*1[\n] Exactly one \n 163 // %n Save final end position 164 if(sscanf(buf->c_str() + *pos, "%*[^:]%n:%*[ \t]%n%*[^\r]%n%*1[\r]%*1[\n]%n", &nameEnd, &valueStart, &valueEnd, &len) >= 0 && len > 0) { 165 const std::string name(buf->substr(*pos, nameEnd)); 166 const std::string value(buf->substr(*pos + valueStart, valueEnd - valueStart)); 167 (*headers)[name] = value; 168 *pos += len; 169 len = -1; 170 } 171 else // Malformed header line (at least according to this simple parsing) 172 throw http_bad_response(); 173 } 174 } 175 176 // Reads an HTTP response from a network connection 177 // If the connection fails while being read the exception will emitted 178 // If the response is not parseable or complete in some way, http_bad_response will be thrown read_http_response(Reference<HTTP::Response> r,Reference<IConnection> conn,bool header_only)179 ACTOR Future<Void> read_http_response(Reference<HTTP::Response> r, Reference<IConnection> conn, bool header_only) { 180 state std::string buf; 181 state size_t pos = 0; 182 183 // Read HTTP reponse code and version line 184 size_t lineLen = wait(read_delimited_into_string(conn, "\r\n", &buf, pos)); 185 186 int reachedEnd = -1; 187 sscanf(buf.c_str() + pos, "HTTP/%f %d%n", &r->version, &r->code, &reachedEnd); 188 if(reachedEnd < 0) 189 throw http_bad_response(); 190 191 // Move position past the line found and the delimiter length 192 pos += lineLen + 2; 193 194 // Read headers 195 r->headers.clear(); 196 197 wait(read_http_response_headers(conn, &r->headers, &buf, &pos)); 198 199 auto i = r->headers.find("Content-Length"); 200 if(i != r->headers.end()) 201 r->contentLen = atoi(i->second.c_str()); 202 else 203 r->contentLen = -1; // Content length unknown 204 205 state std::string transferEncoding; 206 i = r->headers.find("Transfer-Encoding"); 207 if(i != r->headers.end()) 208 transferEncoding = i->second; 209 210 r->content.clear(); 211 212 // If this is supposed to be a header-only response and the buffer has been fully processed then stop. Otherwise, there must be response content. 213 if(header_only && pos == buf.size()) 214 return Void(); 215 216 // There should be content (or at least metadata describing that there is no content. 217 // Chunked transfer and 'normal' mode (content length given, data in one segment after headers) are supported. 218 if(r->contentLen >= 0) { 219 // Use response content as the buffer so there's no need to copy it later. 220 r->content = buf.substr(pos); 221 pos = 0; 222 223 // Read until there are at least contentLen bytes available at pos 224 wait(read_fixed_into_string(conn, r->contentLen, &r->content, pos)); 225 226 // There shouldn't be any bytes after content. 227 if(r->content.size() != r->contentLen) 228 throw http_bad_response(); 229 } 230 else if(transferEncoding == "chunked") { 231 // Copy remaining buffer data to content which will now be the read buffer for the chunk encoded data. 232 // Overall this will be fairly efficient since most bytes will only be written once but some bytes will 233 // have to be copied forward in the buffer when removing chunk overhead bytes. 234 r->content = buf.substr(pos); 235 pos = 0; 236 237 loop { 238 { 239 // Read the line that contains the chunk length as text in hex 240 size_t lineLen = wait(read_delimited_into_string(conn, "\r\n", &r->content, pos)); 241 state int chunkLen = strtol(r->content.substr(pos, lineLen).c_str(), NULL, 16); 242 243 // Instead of advancing pos, erase the chunk length header line (line length + delimiter size) from the content buffer 244 r->content.erase(pos, lineLen + 2); 245 246 // If chunkLen is 0 then this marks the end of the content chunks. 247 if(chunkLen == 0) 248 break; 249 250 // Read (if needed) until chunkLen bytes are available at pos, then advance pos by chunkLen 251 wait(read_fixed_into_string(conn, chunkLen, &r->content, pos)); 252 pos += chunkLen; 253 } 254 255 { 256 // Read the final empty line at the end of the chunk (the required "\r\n" after the chunk bytes) 257 size_t lineLen = wait(read_delimited_into_string(conn, "\r\n", &r->content, pos)); 258 if(lineLen != 0) 259 throw http_bad_response(); 260 261 // Instead of advancing pos, erase the empty line from the content buffer 262 r->content.erase(pos, 2); 263 } 264 } 265 266 // The content buffer now contains the de-chunked, contiguous content at position 0 to pos. Save this length. 267 r->contentLen = pos; 268 269 // Next is the post-chunk header block, so read that. 270 wait(read_http_response_headers(conn, &r->headers, &r->content, &pos)); 271 272 // If the header parsing did not consume all of the buffer then something is wrong 273 if(pos != r->content.size()) 274 throw http_bad_response(); 275 276 // Now truncate the buffer to just the dechunked contiguous content. 277 r->content.erase(r->contentLen); 278 } 279 else { 280 // Some unrecogize response content scheme is being used. 281 throw http_bad_response(); 282 } 283 284 // If there is actual response content, check the MD5 sum against the Content-MD5 response header 285 if(r->content.size() > 0) 286 if(!r->verifyMD5(false)) // false arg means do not fail if the Content-MD5 header is missing. 287 throw http_bad_response(); 288 289 return Void(); 290 } 291 read(Reference<IConnection> conn,bool header_only)292 Future<Void> HTTP::Response::read(Reference<IConnection> conn, bool header_only) { 293 return read_http_response(Reference<HTTP::Response>::addRef(this), conn, header_only); 294 } 295 296 // Do a request, get a Response. 297 // Request content is provided as UnsentPacketQueue *pContent which will be depleted as bytes are sent but the queue itself must live for the life of this actor 298 // and be destroyed by the caller 299 // TODO: pSent is very hackish, do something better. doRequest(Reference<IConnection> conn,std::string verb,std::string resource,HTTP::Headers headers,UnsentPacketQueue * pContent,int contentLen,Reference<IRateControl> sendRate,int64_t * pSent,Reference<IRateControl> recvRate,std::string requestIDHeader)300 ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn, std::string verb, std::string resource, HTTP::Headers headers, UnsentPacketQueue *pContent, int contentLen, Reference<IRateControl> sendRate, int64_t *pSent, Reference<IRateControl> recvRate, std::string requestIDHeader) { 301 state TraceEvent event(SevDebug, "HTTPRequest"); 302 303 state UnsentPacketQueue empty; 304 if(pContent == NULL) 305 pContent = ∅ 306 307 // There is no standard http request id header field, so either a global default can be set via a knob 308 // or it can be set per-request with the requestIDHeader argument (which overrides the default) 309 if(requestIDHeader.empty()) { 310 requestIDHeader = CLIENT_KNOBS->HTTP_REQUEST_ID_HEADER; 311 } 312 313 state bool earlyResponse = false; 314 state int total_sent = 0; 315 state double send_start; 316 317 event.detail("DebugID", conn->getDebugID()); 318 event.detail("RemoteAddress", conn->getPeerAddress()); 319 event.detail("Verb", verb); 320 event.detail("Resource", resource); 321 event.detail("RequestContentLen", contentLen); 322 323 try { 324 state std::string requestID; 325 if(!requestIDHeader.empty()) { 326 requestID = g_random->randomUniqueID().toString(); 327 requestID = requestID.insert(20, "-"); 328 requestID = requestID.insert(16, "-"); 329 requestID = requestID.insert(12, "-"); 330 requestID = requestID.insert(8, "-"); 331 332 headers[requestIDHeader] = requestID; 333 event.detail("RequestIDSent", requestID); 334 } 335 336 // Write headers to a packet buffer chain 337 PacketBuffer *pFirst = new PacketBuffer(); 338 PacketBuffer *pLast = writeRequestHeader(verb, resource, headers, pFirst); 339 // Prepend headers to content packer buffer chain 340 pContent->prependWriteBuffer(pFirst, pLast); 341 342 if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 1) 343 printf("[%s] HTTP starting %s %s ContentLen:%d\n", conn->getDebugID().toString().c_str(), verb.c_str(), resource.c_str(), contentLen); 344 if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 2) { 345 for(auto h : headers) 346 printf("Request Header: %s: %s\n", h.first.c_str(), h.second.c_str()); 347 } 348 349 state Reference<HTTP::Response> r(new HTTP::Response()); 350 state Future<Void> responseReading = r->read(conn, verb == "HEAD" || verb == "DELETE"); 351 352 send_start = timer(); 353 354 loop { 355 wait(conn->onWritable()); 356 wait( delay( 0, TaskWriteSocket ) ); 357 358 // If we already got a response, before finishing sending the request, then close the connection, 359 // set the Connection header to "close" as a hint to the caller that this connection can't be used 360 // again, and break out of the send loop. 361 if(responseReading.isReady()) { 362 conn->close(); 363 r->headers["Connection"] = "close"; 364 earlyResponse = true; 365 break; 366 } 367 368 state int trySend = CLIENT_KNOBS->HTTP_SEND_SIZE; 369 wait(sendRate->getAllowance(trySend)); 370 int len = conn->write(pContent->getUnsent(), trySend); 371 if(pSent != nullptr) 372 *pSent += len; 373 sendRate->returnUnused(trySend - len); 374 total_sent += len; 375 pContent->sent(len); 376 if(pContent->empty()) 377 break; 378 } 379 380 wait(responseReading); 381 double elapsed = timer() - send_start; 382 383 event.detail("ResponseCode", r->code); 384 event.detail("ResponseContentLen", r->contentLen); 385 event.detail("Elapsed", elapsed); 386 387 Optional<Error> err; 388 if(!requestIDHeader.empty()) { 389 std::string responseID; 390 auto iid = r->headers.find(requestIDHeader); 391 if(iid != r->headers.end()) { 392 responseID = iid->second; 393 } 394 event.detail("RequestIDReceived", responseID); 395 if(requestID != responseID) { 396 err = http_bad_request_id(); 397 398 // Log a non-debug a error 399 Severity sev = SevError; 400 // If the response code is 5xx (server error) and the responseID is empty then just warn 401 if(responseID.empty() && r->code >= 500 && r->code < 600) { 402 sev = SevWarnAlways; 403 } 404 405 TraceEvent(sev, "HTTPRequestFailedIDMismatch") 406 .detail("DebugID", conn->getDebugID()) 407 .detail("RemoteAddress", conn->getPeerAddress()) 408 .detail("Verb", verb) 409 .detail("Resource", resource) 410 .detail("RequestContentLen", contentLen) 411 .detail("ResponseCode", r->code) 412 .detail("ResponseContentLen", r->contentLen) 413 .detail("RequestIDSent", requestID) 414 .detail("RequestIDReceived", responseID) 415 .error(err.get()); 416 } 417 } 418 419 if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0) { 420 printf("[%s] HTTP %scode=%d early=%d, time=%fs %s %s contentLen=%d [%d out, response content len %d]\n", 421 conn->getDebugID().toString().c_str(), 422 (err.present() ? format("*ERROR*=%s ", err.get().name()).c_str() : ""), 423 r->code, earlyResponse, elapsed, verb.c_str(), resource.c_str(), contentLen, total_sent, (int)r->contentLen); 424 } 425 if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 2) { 426 printf("[%s] HTTP RESPONSE: %s %s\n%s\n", conn->getDebugID().toString().c_str(), verb.c_str(), resource.c_str(), r->toString().c_str()); 427 } 428 429 if(err.present()) { 430 throw err.get(); 431 } 432 433 return r; 434 } catch(Error &e) { 435 double elapsed = timer() - send_start; 436 if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0 && e.code() != error_code_http_bad_request_id) { 437 printf("[%s] HTTP *ERROR*=%s early=%d, time=%fs %s %s contentLen=%d [%d out]\n", 438 conn->getDebugID().toString().c_str(), e.name(), earlyResponse, elapsed, verb.c_str(), resource.c_str(), contentLen, total_sent); 439 } 440 event.error(e); 441 throw; 442 } 443 } 444 445 } 446