1 /*
2  * nghttp2 - HTTP/2 C Library
3  *
4  * Copyright (c) 2012 Tatsuhiro Tsujikawa
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 #ifndef SHRPX_DOWNSTREAM_H
26 #define SHRPX_DOWNSTREAM_H
27 
28 #include "shrpx.h"
29 
30 #include <cinttypes>
31 #include <vector>
32 #include <string>
33 #include <memory>
34 #include <chrono>
35 #include <algorithm>
36 
37 #include <ev.h>
38 
39 #include <nghttp2/nghttp2.h>
40 
41 #include "llhttp.h"
42 
43 #include "shrpx_io_control.h"
44 #include "shrpx_log_config.h"
45 #include "http2.h"
46 #include "memchunk.h"
47 #include "allocator.h"
48 
49 using namespace nghttp2;
50 
51 namespace shrpx {
52 
53 class Upstream;
54 class DownstreamConnection;
55 struct BlockedLink;
56 struct DownstreamAddrGroup;
57 struct DownstreamAddr;
58 
59 class FieldStore {
60 public:
FieldStore(BlockAllocator & balloc,size_t headers_initial_capacity)61   FieldStore(BlockAllocator &balloc, size_t headers_initial_capacity)
62       : content_length(-1),
63         balloc_(balloc),
64         buffer_size_(0),
65         header_key_prev_(false),
66         trailer_key_prev_(false) {
67     headers_.reserve(headers_initial_capacity);
68   }
69 
headers()70   const HeaderRefs &headers() const { return headers_; }
trailers()71   const HeaderRefs &trailers() const { return trailers_; }
72 
headers()73   HeaderRefs &headers() { return headers_; }
74 
add_extra_buffer_size(size_t n)75   const void add_extra_buffer_size(size_t n) { buffer_size_ += n; }
buffer_size()76   size_t buffer_size() const { return buffer_size_; }
77 
num_fields()78   size_t num_fields() const { return headers_.size() + trailers_.size(); }
79 
80   // Returns pointer to the header field with the name |name|.  If
81   // multiple header have |name| as name, return last occurrence from
82   // the beginning.  If no such header is found, returns nullptr.
83   const HeaderRefs::value_type *header(int32_t token) const;
84   HeaderRefs::value_type *header(int32_t token);
85   // Returns pointer to the header field with the name |name|.  If no
86   // such header is found, returns nullptr.
87   const HeaderRefs::value_type *header(const StringRef &name) const;
88 
89   void add_header_token(const StringRef &name, const StringRef &value,
90                         bool no_index, int32_t token);
91 
92   // Adds header field name |name|.  First, the copy of header field
93   // name pointed by name.c_str() of length name.size() is made, and
94   // stored.
95   void alloc_add_header_name(const StringRef &name);
96 
97   void append_last_header_key(const char *data, size_t len);
98   void append_last_header_value(const char *data, size_t len);
99 
header_key_prev()100   bool header_key_prev() const { return header_key_prev_; }
101 
102   // Parses content-length, and records it in the field.  If there are
103   // multiple Content-Length, returns -1.
104   int parse_content_length();
105 
106   // Empties headers.
107   void clear_headers();
108 
109   void add_trailer_token(const StringRef &name, const StringRef &value,
110                          bool no_index, int32_t token);
111 
112   // Adds trailer field name |name|.  First, the copy of trailer field
113   // name pointed by name.c_str() of length name.size() is made, and
114   // stored.
115   void alloc_add_trailer_name(const StringRef &name);
116 
117   void append_last_trailer_key(const char *data, size_t len);
118   void append_last_trailer_value(const char *data, size_t len);
119 
trailer_key_prev()120   bool trailer_key_prev() const { return trailer_key_prev_; }
121 
122   // erase_content_length_and_transfer_encoding erases content-length
123   // and transfer-encoding header fields.
124   void erase_content_length_and_transfer_encoding();
125 
126   // content-length, -1 if it is unknown.
127   int64_t content_length;
128 
129 private:
130   BlockAllocator &balloc_;
131   HeaderRefs headers_;
132   // trailer fields.  For HTTP/1.1, trailer fields are only included
133   // with chunked encoding.  For HTTP/2, there is no such limit.
134   HeaderRefs trailers_;
135   // Sum of the length of name and value in headers_ and trailers_.
136   // This could also be increased by add_extra_buffer_size() to take
137   // into account for request URI in case of HTTP/1.x request.
138   size_t buffer_size_;
139   bool header_key_prev_;
140   bool trailer_key_prev_;
141 };
142 
143 // Protocols allowed in HTTP/2 :protocol header field.
144 enum class ConnectProto {
145   NONE,
146   WEBSOCKET,
147 };
148 
149 struct Request {
RequestRequest150   Request(BlockAllocator &balloc)
151       : fs(balloc, 16),
152         recv_body_length(0),
153         unconsumed_body_length(0),
154         method(-1),
155         http_major(1),
156         http_minor(1),
157         connect_proto(ConnectProto::NONE),
158         upgrade_request(false),
159         http2_upgrade_seen(false),
160         connection_close(false),
161         http2_expect_body(false),
162         no_authority(false),
163         forwarded_once(false) {}
164 
consumeRequest165   void consume(size_t len) {
166     assert(unconsumed_body_length >= len);
167     unconsumed_body_length -= len;
168   }
169 
regular_connect_methodRequest170   bool regular_connect_method() const {
171     return method == HTTP_CONNECT && connect_proto == ConnectProto::NONE;
172   }
173 
extended_connect_methodRequest174   bool extended_connect_method() const {
175     return connect_proto != ConnectProto::NONE;
176   }
177 
178   FieldStore fs;
179   // Timestamp when all request header fields are received.
180   std::shared_ptr<Timestamp> tstamp;
181   // Request scheme.  For HTTP/2, this is :scheme header field value.
182   // For HTTP/1.1, this is deduced from URI or connection.
183   StringRef scheme;
184   // Request authority.  This is HTTP/2 :authority header field value
185   // or host header field value.  We may deduce it from absolute-form
186   // HTTP/1 request.  We also store authority-form HTTP/1 request.
187   // This could be empty if request comes from HTTP/1.0 without Host
188   // header field and origin-form.
189   StringRef authority;
190   // Request path, including query component.  For HTTP/1.1, this is
191   // request-target.  For HTTP/2, this is :path header field value.
192   // For CONNECT request, this is empty.
193   StringRef path;
194   // This is original authority which cannot be changed by per-pattern
195   // mruby script.
196   StringRef orig_authority;
197   // This is original path which cannot be changed by per-pattern
198   // mruby script.
199   StringRef orig_path;
200   // the length of request body received so far
201   int64_t recv_body_length;
202   // The number of bytes not consumed by the application yet.
203   size_t unconsumed_body_length;
204   int method;
205   // HTTP major and minor version
206   int http_major, http_minor;
207   // connect_proto specified in HTTP/2 :protocol pseudo header field
208   // which enables extended CONNECT method.  This field is also set if
209   // WebSocket upgrade is requested in h1 frontend for convenience.
210   ConnectProto connect_proto;
211   // Returns true if the request is HTTP upgrade (HTTP Upgrade or
212   // CONNECT method).  Upgrade to HTTP/2 is excluded.  For HTTP/2
213   // Upgrade, check get_http2_upgrade_request().
214   bool upgrade_request;
215   // true if h2c is seen in Upgrade header field.
216   bool http2_upgrade_seen;
217   bool connection_close;
218   // true if this is HTTP/2, and request body is expected.  Note that
219   // we don't take into account HTTP method here.
220   bool http2_expect_body;
221   // true if request does not have any information about authority.
222   // This happens when: For HTTP/2 request, :authority is missing.
223   // For HTTP/1 request, origin or asterisk form is used.
224   bool no_authority;
225   // true if backend selection is done for request once.
226   // orig_authority and orig_path have the authority and path which
227   // are used for the first backend selection.
228   bool forwarded_once;
229 };
230 
231 struct Response {
ResponseResponse232   Response(BlockAllocator &balloc)
233       : fs(balloc, 32),
234         recv_body_length(0),
235         unconsumed_body_length(0),
236         http_status(0),
237         http_major(1),
238         http_minor(1),
239         connection_close(false),
240         headers_only(false) {}
241 
consumeResponse242   void consume(size_t len) {
243     assert(unconsumed_body_length >= len);
244     unconsumed_body_length -= len;
245   }
246 
247   // returns true if a resource denoted by scheme, authority, and path
248   // has already been pushed.
is_resource_pushedResponse249   bool is_resource_pushed(const StringRef &scheme, const StringRef &authority,
250                           const StringRef &path) const {
251     if (!pushed_resources) {
252       return false;
253     }
254     return std::find(std::begin(*pushed_resources), std::end(*pushed_resources),
255                      std::make_tuple(scheme, authority, path)) !=
256            std::end(*pushed_resources);
257   }
258 
259   // remember that a resource denoted by scheme, authority, and path
260   // is pushed.
resource_pushedResponse261   void resource_pushed(const StringRef &scheme, const StringRef &authority,
262                        const StringRef &path) {
263     if (!pushed_resources) {
264       pushed_resources = std::make_unique<
265           std::vector<std::tuple<StringRef, StringRef, StringRef>>>();
266     }
267     pushed_resources->emplace_back(scheme, authority, path);
268   }
269 
270   FieldStore fs;
271   // array of the tuple of scheme, authority, and path of pushed
272   // resource.  This is required because RFC 8297 says that server
273   // typically includes header fields appeared in non-final response
274   // header fields in final response header fields.  Without checking
275   // that a particular resource has already been pushed, or not, we
276   // end up pushing the same resource at least twice.  It is unknown
277   // that we should use more complex data structure (e.g., std::set)
278   // to find the resources faster.
279   std::unique_ptr<std::vector<std::tuple<StringRef, StringRef, StringRef>>>
280       pushed_resources;
281   // the length of response body received so far
282   int64_t recv_body_length;
283   // The number of bytes not consumed by the application yet.  This is
284   // mainly for HTTP/2 backend.
285   size_t unconsumed_body_length;
286   // HTTP status code
287   unsigned int http_status;
288   int http_major, http_minor;
289   bool connection_close;
290   // true if response only consists of HEADERS, and it bears
291   // END_STREAM.  This is used to tell Http2Upstream that it can send
292   // response with single HEADERS with END_STREAM flag only.
293   bool headers_only;
294 };
295 
296 enum class DownstreamState {
297   INITIAL,
298   HEADER_COMPLETE,
299   MSG_COMPLETE,
300   STREAM_CLOSED,
301   CONNECT_FAIL,
302   MSG_RESET,
303   // header contains invalid header field.  We can safely send error
304   // response (502) to a client.
305   MSG_BAD_HEADER,
306   // header fields in HTTP/1 request exceed the configuration limit.
307   // This state is only transitioned from INITIAL state, and solely
308   // used to signal 431 status code to the client.
309   HTTP1_REQUEST_HEADER_TOO_LARGE,
310 };
311 
312 enum class DispatchState {
313   NONE,
314   PENDING,
315   BLOCKED,
316   ACTIVE,
317   FAILURE,
318 };
319 
320 class Downstream {
321 public:
322   Downstream(Upstream *upstream, MemchunkPool *mcpool, int32_t stream_id);
323   ~Downstream();
324   void reset_upstream(Upstream *upstream);
325   Upstream *get_upstream() const;
326   void set_stream_id(int32_t stream_id);
327   int32_t get_stream_id() const;
328   void set_assoc_stream_id(int32_t stream_id);
329   int32_t get_assoc_stream_id() const;
330   void pause_read(IOCtrlReason reason);
331   int resume_read(IOCtrlReason reason, size_t consumed);
332   void force_resume_read();
333   // Set stream ID for downstream HTTP2 connection.
334   void set_downstream_stream_id(int32_t stream_id);
335   int32_t get_downstream_stream_id() const;
336 
337   int attach_downstream_connection(std::unique_ptr<DownstreamConnection> dconn);
338   void detach_downstream_connection();
339   DownstreamConnection *get_downstream_connection();
340   // Returns dconn_ and nullifies dconn_.
341   std::unique_ptr<DownstreamConnection> pop_downstream_connection();
342 
343   // Returns true if output buffer is full. If underlying dconn_ is
344   // NULL, this function always returns false.
345   bool request_buf_full();
346   // Returns true if upgrade (HTTP Upgrade or CONNECT) is succeeded in
347   // h1 backend.  This should not depend on inspect_http1_response().
348   void check_upgrade_fulfilled_http1();
349   // Returns true if upgrade (HTTP Upgrade or CONNECT) is succeeded in
350   // h2 backend.
351   void check_upgrade_fulfilled_http2();
352   // Returns true if the upgrade is succeeded as a result of the call
353   // check_upgrade_fulfilled_http*().  HTTP/2 Upgrade is excluded.
354   bool get_upgraded() const;
355   // Inspects HTTP/2 request.
356   void inspect_http2_request();
357   // Inspects HTTP/1 request.  This checks whether the request is
358   // upgrade request and tranfer-encoding etc.
359   void inspect_http1_request();
360   // Returns true if the request is HTTP Upgrade for HTTP/2
361   bool get_http2_upgrade_request() const;
362   // Returns the value of HTTP2-Settings request header field.
363   StringRef get_http2_settings() const;
364 
365   // downstream request API
request()366   const Request &request() const { return req_; }
request()367   Request &request() { return req_; }
368 
369   // Count number of crumbled cookies
370   size_t count_crumble_request_cookie();
371   // Crumbles (split cookie by ";") in request_headers_ and adds them
372   // in |nva|.  Headers::no_index is inherited.
373   void crumble_request_cookie(std::vector<nghttp2_nv> &nva);
374   // Assembles request cookies.  The opposite operation against
375   // crumble_request_cookie().
376   StringRef assemble_request_cookie();
377 
378   void
379   set_request_start_time(std::chrono::high_resolution_clock::time_point time);
380   const std::chrono::high_resolution_clock::time_point &
381   get_request_start_time() const;
382   int push_request_headers();
383   bool get_chunked_request() const;
384   void set_chunked_request(bool f);
385   int push_upload_data_chunk(const uint8_t *data, size_t datalen);
386   int end_upload_data();
387   // Validates that received request body length and content-length
388   // matches.
389   bool validate_request_recv_body_length() const;
390   void set_request_downstream_host(const StringRef &host);
391   bool expect_response_body() const;
392   bool expect_response_trailer() const;
393   void set_request_state(DownstreamState state);
394   DownstreamState get_request_state() const;
395   DefaultMemchunks *get_request_buf();
396   void set_request_pending(bool f);
397   bool get_request_pending() const;
398   void set_request_header_sent(bool f);
399   bool get_request_header_sent() const;
400   // Returns true if request is ready to be submitted to downstream.
401   // When sending pending request, get_request_pending() should be
402   // checked too because this function may return true when
403   // get_request_pending() returns false.
404   bool request_submission_ready() const;
405 
406   DefaultMemchunks *get_blocked_request_buf();
407   bool get_blocked_request_data_eof() const;
408   void set_blocked_request_data_eof(bool f);
409 
410   // downstream response API
response()411   const Response &response() const { return resp_; }
response()412   Response &response() { return resp_; }
413 
414   // Rewrites the location response header field.
415   void rewrite_location_response_header(const StringRef &upstream_scheme);
416 
417   bool get_chunked_response() const;
418   void set_chunked_response(bool f);
419 
420   void set_response_state(DownstreamState state);
421   DownstreamState get_response_state() const;
422   DefaultMemchunks *get_response_buf();
423   bool response_buf_full();
424   // Validates that received response body length and content-length
425   // matches.
426   bool validate_response_recv_body_length() const;
427   uint32_t get_response_rst_stream_error_code() const;
428   void set_response_rst_stream_error_code(uint32_t error_code);
429   // Inspects HTTP/1 response.  This checks tranfer-encoding etc.
430   void inspect_http1_response();
431   // Clears some of member variables for response.
432   void reset_response();
433   // True if the response is non-final (1xx status code).  Note that
434   // if connection was upgraded, 101 status code is treated as final.
435   bool get_non_final_response() const;
436   // True if protocol version used by client supports non final
437   // response.  Only HTTP/1.1 and HTTP/2 clients support it.
438   bool supports_non_final_response() const;
439   void set_expect_final_response(bool f);
440   bool get_expect_final_response() const;
441 
442   // Call this method when there is incoming data in downstream
443   // connection.
444   int on_read();
445 
446   // Resets upstream read timer.  If it is active, timeout value is
447   // reset.  If it is not active, timer will be started.
448   void reset_upstream_rtimer();
449   // Resets upstream write timer. If it is active, timeout value is
450   // reset.  If it is not active, timer will be started.  This
451   // function also resets read timer if it has been started.
452   void reset_upstream_wtimer();
453   // Makes sure that upstream write timer is started.  If it has been
454   // started, do nothing.  Otherwise, write timer will be started.
455   void ensure_upstream_wtimer();
456   // Disables upstream read timer.
457   void disable_upstream_rtimer();
458   // Disables upstream write timer.
459   void disable_upstream_wtimer();
460 
461   // Downstream timer functions.  They works in a similar way just
462   // like the upstream timer function.
463   void reset_downstream_rtimer();
464   void reset_downstream_wtimer();
465   void ensure_downstream_wtimer();
466   void disable_downstream_rtimer();
467   void disable_downstream_wtimer();
468 
469   // Returns true if accesslog can be written for this downstream.
470   bool accesslog_ready() const;
471 
472   // Increment retry count
473   void add_retry();
474   // true if retry attempt should not be done.
475   bool no_more_retry() const;
476 
477   DispatchState get_dispatch_state() const;
478   void set_dispatch_state(DispatchState s);
479 
480   void attach_blocked_link(BlockedLink *l);
481   BlockedLink *detach_blocked_link();
482 
483   // Returns true if downstream_connection can be detached and reused.
484   bool can_detach_downstream_connection() const;
485 
486   DefaultMemchunks pop_response_buf();
487 
488   BlockAllocator &get_block_allocator();
489 
490   void add_rcbuf(nghttp2_rcbuf *rcbuf);
491 
492   void
493   set_downstream_addr_group(const std::shared_ptr<DownstreamAddrGroup> &group);
494   void set_addr(const DownstreamAddr *addr);
495 
496   const DownstreamAddr *get_addr() const;
497 
498   void set_accesslog_written(bool f);
499 
500   // Finds affinity cookie from request header fields.  The name of
501   // cookie is given in |name|.  If an affinity cookie is found, it is
502   // assigned to a member function, and is returned.  If it is not
503   // found, or is malformed, returns 0.
504   uint32_t find_affinity_cookie(const StringRef &name);
505   // Set |h| as affinity cookie.
506   void renew_affinity_cookie(uint32_t h);
507   // Returns affinity cookie to send.  If it does not need to be sent,
508   // for example, because the value is retrieved from a request header
509   // field, returns 0.
510   uint32_t get_affinity_cookie_to_send() const;
511 
512   void set_ws_key(const StringRef &key);
513 
514   bool get_expect_100_continue() const;
515 
516   enum {
517     EVENT_ERROR = 0x1,
518     EVENT_TIMEOUT = 0x2,
519   };
520 
521   Downstream *dlnext, *dlprev;
522 
523   // the length of response body sent to upstream client
524   int64_t response_sent_body_length;
525 
526 private:
527   BlockAllocator balloc_;
528 
529   std::vector<nghttp2_rcbuf *> rcbufs_;
530 
531   Request req_;
532   Response resp_;
533 
534   std::chrono::high_resolution_clock::time_point request_start_time_;
535 
536   // host we requested to downstream.  This is used to rewrite
537   // location header field to decide the location should be rewritten
538   // or not.
539   StringRef request_downstream_host_;
540 
541   // Data arrived in frontend before sending header fields to backend
542   // are stored in this buffer.
543   DefaultMemchunks blocked_request_buf_;
544   DefaultMemchunks request_buf_;
545   DefaultMemchunks response_buf_;
546 
547   // The Sec-WebSocket-Key field sent to the peer.  This field is used
548   // if frontend uses RFC 8441 WebSocket bootstrapping via HTTP/2.
549   StringRef ws_key_;
550 
551   ev_timer upstream_rtimer_;
552   ev_timer upstream_wtimer_;
553 
554   ev_timer downstream_rtimer_;
555   ev_timer downstream_wtimer_;
556 
557   Upstream *upstream_;
558   std::unique_ptr<DownstreamConnection> dconn_;
559 
560   // only used by HTTP/2 upstream
561   BlockedLink *blocked_link_;
562   // The backend address used to fulfill this request.  These are for
563   // logging purpose.
564   std::shared_ptr<DownstreamAddrGroup> group_;
565   const DownstreamAddr *addr_;
566   // How many times we tried in backend connection
567   size_t num_retry_;
568   // The stream ID in frontend connection
569   int32_t stream_id_;
570   // The associated stream ID in frontend connection if this is pushed
571   // stream.
572   int32_t assoc_stream_id_;
573   // stream ID in backend connection
574   int32_t downstream_stream_id_;
575   // RST_STREAM error_code from downstream HTTP2 connection
576   uint32_t response_rst_stream_error_code_;
577   // An affinity cookie value.
578   uint32_t affinity_cookie_;
579   // request state
580   DownstreamState request_state_;
581   // response state
582   DownstreamState response_state_;
583   // only used by HTTP/2 upstream
584   DispatchState dispatch_state_;
585   // true if the connection is upgraded (HTTP Upgrade or CONNECT),
586   // excluding upgrade to HTTP/2.
587   bool upgraded_;
588   // true if backend request uses chunked transfer-encoding
589   bool chunked_request_;
590   // true if response to client uses chunked transfer-encoding
591   bool chunked_response_;
592   // true if we have not got final response code
593   bool expect_final_response_;
594   // true if downstream request is pending because backend connection
595   // has not been established or should be checked before use;
596   // currently used only with HTTP/2 connection.
597   bool request_pending_;
598   // true if downstream request header is considered to be sent.
599   bool request_header_sent_;
600   // true if access.log has been written.
601   bool accesslog_written_;
602   // true if affinity cookie is generated for this request.
603   bool new_affinity_cookie_;
604   // true if eof is received from client before sending header fields
605   // to backend.
606   bool blocked_request_data_eof_;
607   // true if request contains "expect: 100-continue" header field.
608   bool expect_100_continue_;
609 };
610 
611 } // namespace shrpx
612 
613 #endif // SHRPX_DOWNSTREAM_H
614