1 /*
2  * HTTP.actor.cpp
3  *
4  * This source file is part of the FoundationDB open source project
5  *
6  * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *     http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 #include "fdbclient/HTTP.h"
22 #include "fdbclient/md5/md5.h"
23 #include "fdbclient/libb64/encode.h"
24 #include <cctype>
25 #include "flow/actorcompiler.h" // has to be last include
26 
27 namespace HTTP {
28 
urlEncode(const std::string & s)29 	std::string urlEncode(const std::string &s) {
30 		std::string o;
31 		o.reserve(s.size() * 3);
32 		char buf[4];
33 		for(auto c : s)
34 			if(std::isalnum(c) || c == '?' || c == '/' || c == '-' || c == '_' || c == '.' || c == ',' || c == ':')
35 				o.append(&c, 1);
36 			else {
37 				sprintf(buf, "%%%.02X", c);
38 				o.append(buf);
39 			}
40 		return o;
41 	}
42 
verifyMD5(bool fail_if_header_missing,Optional<std::string> content_sum)43 	bool Response::verifyMD5(bool fail_if_header_missing, Optional<std::string> content_sum) {
44 		auto i = headers.find("Content-MD5");
45 		if(i != headers.end()) {
46 			// If a content sum is not provided, calculate one from the response content
47 			if(!content_sum.present()) {
48 				MD5_CTX sum;
49 				::MD5_Init(&sum);
50 				::MD5_Update(&sum, content.data(), content.size());
51 				std::string sumBytes;
52 				sumBytes.resize(16);
53 				::MD5_Final((unsigned char *)sumBytes.data(), &sum);
54 				std::string sumStr = base64::encoder::from_string(sumBytes);
55 				sumStr.resize(sumStr.size() - 1);
56 				content_sum = sumStr;
57 			}
58 			return i->second == content_sum.get();
59 		}
60 		return !fail_if_header_missing;
61 	}
62 
toString()63 	std::string Response::toString() {
64 		std::string r = format("Response Code: %d\n", code);
65 		r += format("Response ContentLen: %lld\n", contentLen);
66 		for(auto h : headers)
67 			r += format("Reponse Header: %s: %s\n", h.first.c_str(), h.second.c_str());
68 		r.append("-- RESPONSE CONTENT--\n");
69 		r.append(content);
70 		r.append("\n--------\n");
71 		return r;
72 	}
73 
writeRequestHeader(std::string const & verb,std::string const & resource,HTTP::Headers const & headers,PacketBuffer * dest)74 	PacketBuffer * writeRequestHeader(std::string const &verb, std::string const &resource, HTTP::Headers const &headers, PacketBuffer *dest) {
75 		PacketWriter writer(dest, NULL, Unversioned());
76 		writer.serializeBytes(verb);
77 		writer.serializeBytes(" ", 1);
78 		writer.serializeBytes(resource);
79 		writer.serializeBytes(LiteralStringRef(" HTTP/1.1\r\n"));
80 		for(auto h : headers) {
81 			writer.serializeBytes(h.first);
82 			writer.serializeBytes(LiteralStringRef(": "));
83 			writer.serializeBytes(h.second);
84 			writer.serializeBytes(LiteralStringRef("\r\n"));
85 		}
86 		writer.serializeBytes(LiteralStringRef("\r\n"));
87 		return writer.finish();
88 	}
89 
90 	// Read at least 1 bytes from conn and up to maxlen in a single read, append read data into *buf
91 	// Returns the number of bytes read.
read_into_string(Reference<IConnection> conn,std::string * buf,int maxlen)92 	ACTOR Future<int> read_into_string(Reference<IConnection> conn, std::string *buf, int maxlen) {
93 		loop {
94 			// Wait for connection to have something to read
95 			wait(conn->onReadable());
96 			wait( delay( 0, TaskReadSocket ) );
97 
98 			// Read into buffer
99 			int originalSize = buf->size();
100 			// TODO:  resize is zero-initializing the space we're about to overwrite, so do something else, which probably means
101 			// not using a string for this buffer
102 			buf->resize(originalSize + maxlen);
103 			uint8_t *wptr = (uint8_t *)buf->data() + originalSize;
104 			int len = conn->read(wptr, wptr + maxlen);
105 			buf->resize(originalSize + len);
106 
107 			// Make sure data was actually read, it's possible for there to be none.
108 			if(len > 0)
109 				return len;
110 		}
111 	}
112 
113 	// Returns the position of delim within buf, relative to pos.  If delim is not found, continues to read from conn until
114 	// either it is found or the connection ends, at which point connection_failed is thrown and buf contains
115 	// everything that was read up to that point.
read_delimited_into_string(Reference<IConnection> conn,const char * delim,std::string * buf,size_t pos)116 	ACTOR Future<size_t> read_delimited_into_string(Reference<IConnection> conn, const char *delim, std::string *buf, size_t pos) {
117 		state size_t sPos = pos;
118 		state int lookBack = strlen(delim) - 1;
119 		ASSERT(lookBack >= 0);
120 
121 		loop {
122 			size_t endPos = buf->find(delim, sPos);
123 			if(endPos != std::string::npos)
124 				return endPos - pos;
125 			// Next search will start at the current end of the buffer - delim size + 1
126 			if(sPos >= lookBack)
127 				sPos -= lookBack;
128 			wait(success(read_into_string(conn, buf, CLIENT_KNOBS->HTTP_READ_SIZE)));
129 		}
130 	}
131 
132 	// Reads from conn (as needed) until there are at least len bytes starting at pos in buf
read_fixed_into_string(Reference<IConnection> conn,int len,std::string * buf,size_t pos)133 	ACTOR Future<Void> read_fixed_into_string(Reference<IConnection> conn, int len, std::string *buf, size_t pos) {
134 		state int stop_size = pos + len;
135 		while(buf->size() < stop_size)
136 			wait(success(read_into_string(conn, buf, CLIENT_KNOBS->HTTP_READ_SIZE)));
137 		return Void();
138 	}
139 
read_http_response_headers(Reference<IConnection> conn,Headers * headers,std::string * buf,size_t * pos)140 	ACTOR Future<Void> read_http_response_headers(Reference<IConnection> conn, Headers *headers, std::string *buf, size_t *pos) {
141 		loop {
142 			// Get a line, reading more data from conn if necessary
143 			size_t lineLen = wait(read_delimited_into_string(conn, "\r\n", buf, *pos));
144 
145 			// If line is empty we have reached the end of the headers.
146 			if(lineLen == 0) {
147 				// Increment pos to move past the empty line.
148 				*pos += 2;
149 				return Void();
150 			}
151 
152 			int nameEnd=-1, valueStart=-1, valueEnd=-1;
153 			int len = -1;
154 
155 			// Read header of the form "Name: Value\n"
156 			// Note that multi line header values are not supported here.
157 			// Format string breaks down as follows:
158 			//   %*[^:]%n      Some characters other than ':' which are discarded, save the end position
159 			//   :%*[ \t]%n    A colon followed by 0 or more spaces or tabs only, save the end position
160 			//   %*[^\r]%n     Some characters other than \r which are discarded, save the end position
161 			//   %*1[\r]       Exactly one \r
162 			//   %*1[\n]       Exactly one \n
163 			//   %n            Save final end position
164 			if(sscanf(buf->c_str() + *pos, "%*[^:]%n:%*[ \t]%n%*[^\r]%n%*1[\r]%*1[\n]%n", &nameEnd, &valueStart, &valueEnd, &len) >= 0 && len > 0) {
165 				const std::string name(buf->substr(*pos, nameEnd));
166 				const std::string value(buf->substr(*pos + valueStart, valueEnd - valueStart));
167 				(*headers)[name] = value;
168 				*pos += len;
169 				len = -1;
170 			}
171 			else // Malformed header line (at least according to this simple parsing)
172 				throw http_bad_response();
173 		}
174 	}
175 
176 	// Reads an HTTP response from a network connection
177 	// If the connection fails while being read the exception will emitted
178 	// If the response is not parseable or complete in some way, http_bad_response will be thrown
read_http_response(Reference<HTTP::Response> r,Reference<IConnection> conn,bool header_only)179 	ACTOR Future<Void> read_http_response(Reference<HTTP::Response> r, Reference<IConnection> conn, bool header_only) {
180 		state std::string buf;
181 		state size_t pos = 0;
182 
183 		// Read HTTP reponse code and version line
184 		size_t lineLen = wait(read_delimited_into_string(conn, "\r\n", &buf, pos));
185 
186 		int reachedEnd = -1;
187 		sscanf(buf.c_str() + pos, "HTTP/%f %d%n", &r->version, &r->code, &reachedEnd);
188 		if(reachedEnd < 0)
189 			throw http_bad_response();
190 
191 		// Move position past the line found and the delimiter length
192 		pos += lineLen + 2;
193 
194 		// Read headers
195 		r->headers.clear();
196 
197 		wait(read_http_response_headers(conn, &r->headers, &buf, &pos));
198 
199 		auto i = r->headers.find("Content-Length");
200 		if(i != r->headers.end())
201 			r->contentLen = atoi(i->second.c_str());
202 		else
203 			r->contentLen = -1;  // Content length unknown
204 
205 		state std::string transferEncoding;
206 		i = r->headers.find("Transfer-Encoding");
207 		if(i != r->headers.end())
208 			transferEncoding = i->second;
209 
210 		r->content.clear();
211 
212 		// If this is supposed to be a header-only response and the buffer has been fully processed then stop.  Otherwise, there must be response content.
213 		if(header_only && pos == buf.size())
214 			return Void();
215 
216 		// There should be content (or at least metadata describing that there is no content.
217 		// Chunked transfer and 'normal' mode (content length given, data in one segment after headers) are supported.
218 		if(r->contentLen >= 0) {
219 			// Use response content as the buffer so there's no need to copy it later.
220 			r->content = buf.substr(pos);
221 			pos = 0;
222 
223 			// Read until there are at least contentLen bytes available at pos
224 			wait(read_fixed_into_string(conn, r->contentLen, &r->content, pos));
225 
226 			// There shouldn't be any bytes after content.
227 			if(r->content.size() != r->contentLen)
228 				throw http_bad_response();
229 		}
230 		else if(transferEncoding == "chunked") {
231 			// Copy remaining buffer data to content which will now be the read buffer for the chunk encoded data.
232 			// Overall this will be fairly efficient since most bytes will only be written once but some bytes will
233 			// have to be copied forward in the buffer when removing chunk overhead bytes.
234 			r->content = buf.substr(pos);
235 			pos = 0;
236 
237 			loop {
238 				{
239 					// Read the line that contains the chunk length as text in hex
240 					size_t lineLen = wait(read_delimited_into_string(conn, "\r\n", &r->content, pos));
241 					state int chunkLen = strtol(r->content.substr(pos, lineLen).c_str(), NULL, 16);
242 
243 					// Instead of advancing pos, erase the chunk length header line (line length + delimiter size) from the content buffer
244 					r->content.erase(pos, lineLen + 2);
245 
246 					// If chunkLen is 0 then this marks the end of the content chunks.
247 					if(chunkLen == 0)
248 						break;
249 
250 					// Read (if needed) until chunkLen bytes are available at pos, then advance pos by chunkLen
251 					wait(read_fixed_into_string(conn, chunkLen, &r->content, pos));
252 					pos += chunkLen;
253 				}
254 
255 				{
256 					// Read the final empty line at the end of the chunk (the required "\r\n" after the chunk bytes)
257 					size_t lineLen = wait(read_delimited_into_string(conn, "\r\n", &r->content, pos));
258 					if(lineLen != 0)
259 						throw http_bad_response();
260 
261 					// Instead of advancing pos, erase the empty line from the content buffer
262 					r->content.erase(pos, 2);
263 				}
264 			}
265 
266 			// The content buffer now contains the de-chunked, contiguous content at position 0 to pos.  Save this length.
267 			r->contentLen = pos;
268 
269 			// Next is the post-chunk header block, so read that.
270 			wait(read_http_response_headers(conn, &r->headers, &r->content, &pos));
271 
272 			// If the header parsing did not consume all of the buffer then something is wrong
273 			if(pos != r->content.size())
274 				throw http_bad_response();
275 
276 			// Now truncate the buffer to just the dechunked contiguous content.
277 			r->content.erase(r->contentLen);
278 		}
279 		else {
280 			// Some unrecogize response content scheme is being used.
281 			throw http_bad_response();
282 		}
283 
284 		// If there is actual response content, check the MD5 sum against the Content-MD5 response header
285 		if(r->content.size() > 0)
286 			if(!r->verifyMD5(false))  // false arg means do not fail if the Content-MD5 header is missing.
287 				throw http_bad_response();
288 
289 		return Void();
290 	}
291 
read(Reference<IConnection> conn,bool header_only)292 	Future<Void> HTTP::Response::read(Reference<IConnection> conn, bool header_only) {
293 		return read_http_response(Reference<HTTP::Response>::addRef(this), conn, header_only);
294 	}
295 
296 	// Do a request, get a Response.
297 	// Request content is provided as UnsentPacketQueue *pContent which will be depleted as bytes are sent but the queue itself must live for the life of this actor
298 	// and be destroyed by the caller
299 	// TODO:  pSent is very hackish, do something better.
doRequest(Reference<IConnection> conn,std::string verb,std::string resource,HTTP::Headers headers,UnsentPacketQueue * pContent,int contentLen,Reference<IRateControl> sendRate,int64_t * pSent,Reference<IRateControl> recvRate,std::string requestIDHeader)300 	ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn, std::string verb, std::string resource, HTTP::Headers headers, UnsentPacketQueue *pContent, int contentLen, Reference<IRateControl> sendRate, int64_t *pSent, Reference<IRateControl> recvRate, std::string requestIDHeader) {
301 		state TraceEvent event(SevDebug, "HTTPRequest");
302 
303 		state UnsentPacketQueue empty;
304 		if(pContent == NULL)
305 			pContent = &empty;
306 
307 		// There is no standard http request id header field, so either a global default can be set via a knob
308 		// or it can be set per-request with the requestIDHeader argument (which overrides the default)
309 		if(requestIDHeader.empty()) {
310 			requestIDHeader = CLIENT_KNOBS->HTTP_REQUEST_ID_HEADER;
311 		}
312 
313 		state bool earlyResponse = false;
314 		state int total_sent = 0;
315 		state double send_start;
316 
317 		event.detail("DebugID", conn->getDebugID());
318 		event.detail("RemoteAddress", conn->getPeerAddress());
319 		event.detail("Verb", verb);
320 		event.detail("Resource", resource);
321 		event.detail("RequestContentLen", contentLen);
322 
323 		try {
324 			state std::string requestID;
325 			if(!requestIDHeader.empty()) {
326 				requestID = g_random->randomUniqueID().toString();
327 				requestID = requestID.insert(20, "-");
328 				requestID = requestID.insert(16, "-");
329 				requestID = requestID.insert(12, "-");
330 				requestID = requestID.insert(8, "-");
331 
332 				headers[requestIDHeader] = requestID;
333 				event.detail("RequestIDSent", requestID);
334 			}
335 
336 			// Write headers to a packet buffer chain
337 			PacketBuffer *pFirst = new PacketBuffer();
338 			PacketBuffer *pLast = writeRequestHeader(verb, resource, headers, pFirst);
339 			// Prepend headers to content packer buffer chain
340 			pContent->prependWriteBuffer(pFirst, pLast);
341 
342 			if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 1)
343 				printf("[%s] HTTP starting %s %s ContentLen:%d\n", conn->getDebugID().toString().c_str(), verb.c_str(), resource.c_str(), contentLen);
344 			if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 2) {
345 				for(auto h : headers)
346 					printf("Request Header: %s: %s\n", h.first.c_str(), h.second.c_str());
347 			}
348 
349 			state Reference<HTTP::Response> r(new HTTP::Response());
350 			state Future<Void> responseReading = r->read(conn, verb == "HEAD" || verb == "DELETE");
351 
352 			send_start = timer();
353 
354 			loop {
355 				wait(conn->onWritable());
356 				wait( delay( 0, TaskWriteSocket ) );
357 
358 				// If we already got a response, before finishing sending the request, then close the connection,
359 				// set the Connection header to "close" as a hint to the caller that this connection can't be used
360 				// again, and break out of the send loop.
361 				if(responseReading.isReady()) {
362 					conn->close();
363 					r->headers["Connection"] = "close";
364 					earlyResponse = true;
365 					break;
366 				}
367 
368 				state int trySend = CLIENT_KNOBS->HTTP_SEND_SIZE;
369 				wait(sendRate->getAllowance(trySend));
370 				int len = conn->write(pContent->getUnsent(), trySend);
371 				if(pSent != nullptr)
372 					*pSent += len;
373 				sendRate->returnUnused(trySend - len);
374 				total_sent += len;
375 				pContent->sent(len);
376 				if(pContent->empty())
377 					break;
378 			}
379 
380 			wait(responseReading);
381 			double elapsed = timer() - send_start;
382 
383 			event.detail("ResponseCode", r->code);
384 			event.detail("ResponseContentLen", r->contentLen);
385 			event.detail("Elapsed", elapsed);
386 
387 			Optional<Error> err;
388 			if(!requestIDHeader.empty()) {
389 				std::string responseID;
390 				auto iid = r->headers.find(requestIDHeader);
391 				if(iid != r->headers.end()) {
392 					responseID = iid->second;
393 				}
394 				event.detail("RequestIDReceived", responseID);
395 				if(requestID != responseID) {
396 					err = http_bad_request_id();
397 
398 					// Log a non-debug a error
399 					Severity sev = SevError;
400 					// If the response code is 5xx (server error) and the responseID is empty then just warn
401 					if(responseID.empty() && r->code >= 500 && r->code < 600) {
402 						sev = SevWarnAlways;
403 					}
404 
405 					TraceEvent(sev, "HTTPRequestFailedIDMismatch")
406 						.detail("DebugID", conn->getDebugID())
407 						.detail("RemoteAddress", conn->getPeerAddress())
408 						.detail("Verb", verb)
409 						.detail("Resource", resource)
410 						.detail("RequestContentLen", contentLen)
411 						.detail("ResponseCode", r->code)
412 						.detail("ResponseContentLen", r->contentLen)
413 						.detail("RequestIDSent", requestID)
414 						.detail("RequestIDReceived", responseID)
415 						.error(err.get());
416 				}
417 			}
418 
419 			if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0) {
420 				printf("[%s] HTTP %scode=%d early=%d, time=%fs %s %s contentLen=%d [%d out, response content len %d]\n",
421 					conn->getDebugID().toString().c_str(),
422 					(err.present() ? format("*ERROR*=%s ", err.get().name()).c_str() : ""),
423 					r->code, earlyResponse, elapsed, verb.c_str(), resource.c_str(), contentLen, total_sent, (int)r->contentLen);
424 			}
425 			if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 2) {
426 				printf("[%s] HTTP RESPONSE:  %s %s\n%s\n", conn->getDebugID().toString().c_str(), verb.c_str(), resource.c_str(), r->toString().c_str());
427 			}
428 
429 			if(err.present()) {
430 				throw err.get();
431 			}
432 
433 			return r;
434 		} catch(Error &e) {
435 			double elapsed = timer() - send_start;
436 			if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0 && e.code() != error_code_http_bad_request_id) {
437 				printf("[%s] HTTP *ERROR*=%s early=%d, time=%fs %s %s contentLen=%d [%d out]\n",
438 					conn->getDebugID().toString().c_str(), e.name(), earlyResponse, elapsed, verb.c_str(), resource.c_str(), contentLen, total_sent);
439 			}
440 			event.error(e);
441 			throw;
442 		}
443 	}
444 
445 }
446