1 #include "common.h"
2 #include "filewrapper.h"
3 #include "wsgi.h"
4 #include "py2py3.h"
5 
6 static void wsgi_getheaders(Request*, PyObject** buf, Py_ssize_t* length);
7 
8 typedef struct {
9   PyObject_HEAD
10   Request* request;
11 } StartResponse;
12 
13 bool
wsgi_call_application(Request * request)14 wsgi_call_application(Request* request)
15 {
16   StartResponse* start_response = PyObject_NEW(StartResponse, &StartResponse_Type);
17   start_response->request = request;
18 
19   /* From now on, `headers` stores the _response_ headers
20    * (passed by the WSGI app) rather than the _request_ headers */
21   PyObject* request_headers = request->headers;
22   request->headers = NULL;
23 
24   /* application(environ, start_response) call */
25   PyObject* retval = PyObject_CallFunctionObjArgs(
26     request->server_info->wsgi_app,
27     request_headers,
28     start_response,
29     NULL /* sentinel */
30   );
31 
32   Py_DECREF(request_headers);
33   Py_DECREF(start_response);
34 
35   if(retval == NULL)
36     return false;
37 
38   /* The following code is somewhat magic, so worth an explanation.
39    *
40    * If the application we called was a generator, we have to call .next() on
41    * it before we do anything else because that may execute code that
42    * invokes `start_response` (which might not have been invoked yet).
43    * Think of the following scenario:
44    *
45    *   def app(environ, start_response):
46    *     start_response('200 Ok', ...)
47    *     yield 'Hello World'
48    *
49    * That would make `app` return an iterator (more precisely, a generator).
50    * Unfortunately, `start_response` wouldn't be called until the first item
51    * of that iterator is requested; `start_response` however has to be called
52    * _before_ the wsgi body is sent, because it passes the HTTP headers.
53    *
54    * If the application returned a list this would not be required of course,
55    * but special-handling is painful - especially in C - so here's one generic
56    * way to solve the problem:
57    *
58    * Look into the returned iterator in any case. This allows us to do other
59    * optimizations, for example if the returned value is a list with exactly
60    * one bytestring in it, we can pick the bytestring and throw away the list
61    * so bjoern does not have to come back again and look into the iterator a
62    * second time.
63    */
64   PyObject* first_chunk;
65 
66   if(PyList_Check(retval) && PyList_GET_SIZE(retval) == 1 &&
67      _PEP3333_Bytes_Check(PyList_GET_ITEM(retval, 0)))
68   {
69     /* Optimize the most common case, a single bytestring in a list: */
70     DBG_REQ(request, "WSGI iterable is list of size 1");
71     PyObject* tmp = PyList_GET_ITEM(retval, 0);
72     Py_INCREF(tmp);
73     Py_DECREF(retval);
74     retval = tmp;
75     goto bytestring; /* eeevil */
76   } else if(_PEP3333_Bytes_Check(retval)) {
77     /* According to PEP 333 strings should be handled like any other iterable,
78      * i.e. sending the response item for item. "item for item" means
79      * "char for char" if you have a bytestring. -- I'm not that stupid. */
80     bytestring:
81     DBG_REQ(request, "WSGI iterable is byte string");
82     request->iterable = NULL;
83     request->iterator = NULL;
84     if(_PEP3333_Bytes_GET_SIZE(retval)) {
85       first_chunk = retval;
86     } else {
87       // empty response
88       Py_DECREF(retval);
89       first_chunk = NULL;
90     }
91   } else if(!request->state.response_length_unknown && FileWrapper_CheckExact(retval) && FileWrapper_GetFd(retval) != -1) {
92     DBG_REQ(request, "WSGI iterable is wsgi.file_wrapper instance and Content-Length is known");
93     request->iterable = retval;
94     request->iterator = NULL;
95     first_chunk = NULL;
96   } else {
97     /* Generic iterable (list of length != 1, generator, ...) */
98     DBG_REQ(request, "WSGI iterable is some other iterable");
99     request->iterable = retval;
100     request->iterator = PyObject_GetIter(retval);
101     if(request->iterator == NULL)
102       return false;
103     first_chunk = wsgi_iterable_get_next_chunk(request);
104     if(first_chunk == NULL && PyErr_Occurred())
105       return false;
106   }
107 
108   if(request->headers == NULL) {
109     /* It is important that this check comes *after* the call to
110      * wsgi_iterable_get_next_chunk(), because in case the WSGI application
111      * was an iterator, there's no chance start_response could be called
112      * before. See above if you don't understand what I say. */
113     PyErr_SetString(
114       PyExc_RuntimeError,
115       "wsgi application returned before start_response was called"
116     );
117     Py_XDECREF(first_chunk);
118     return false;
119   }
120 
121   /* Special-case HTTP 204 and 304 */
122   if (!strncmp(_PEP3333_Bytes_AS_DATA(request->status), "204", 3) ||
123       !strncmp(_PEP3333_Bytes_AS_DATA(request->status), "304", 3)) {
124     request->state.response_length_unknown = false;
125   }
126 
127   /* keep-alive cruft */
128   if(http_should_keep_alive(&request->parser.parser)) {
129     if(request->state.response_length_unknown) {
130       if(request->parser.parser.http_major > 0 && request->parser.parser.http_minor > 0) {
131         /* On HTTP 1.1, we can use Transfer-Encoding: chunked. */
132         DBG_REQ(request, "Content-Length unknown, HTTP/1.1 -> Connection: will keep-alive with chunked response");
133         request->state.chunked_response = true;
134         request->state.keep_alive = true;
135       } else {
136         /* On HTTP 1.0, we can only resort to closing the connection.  */
137         DBG_REQ(request, "Content-Length unknown, HTTP/1.10 -> will close");
138         request->state.keep_alive = false;
139       }
140     } else {
141       /* We know the content-length. Can always keep-alive. */
142         DBG_REQ(request, "Content-Length known -> will keep alive");
143       request->state.keep_alive = true;
144     }
145   } else {
146     /* Explicit "Connection: close" (HTTP 1.1) or missing "Connection: keep-alive" (HTTP 1.0) */
147     DBG_REQ(request, "Connection: close request by client");
148     request->state.keep_alive = false;
149   }
150 
151   /* Get the headers and concatenate the first body chunk.
152    * In the first place this makes the code more simple because afterwards
153    * we can throw away the first chunk PyObject; but it also is an optimization:
154    * At least for small responses, the complete response could be sent with
155    * one send() call (in server.c:ev_io_on_write) which is a (tiny) performance
156    * booster because less kernel calls means less kernel call overhead. */
157   Py_ssize_t length;
158   PyObject* buf;
159   wsgi_getheaders(request, &buf, &length);
160 
161   if(first_chunk == NULL) {
162     _PEP3333_Bytes_Resize(&buf, length);
163     goto out;
164   }
165 
166   if(request->state.chunked_response) {
167     PyObject* new_chunk = wrap_http_chunk_cruft_around(first_chunk);
168     Py_DECREF(first_chunk);
169     assert(_PEP3333_Bytes_GET_SIZE(new_chunk) >= _PEP3333_Bytes_GET_SIZE(first_chunk) + 5);
170     first_chunk = new_chunk;
171   }
172 
173   _PEP3333_Bytes_Resize(&buf, length + _PEP3333_Bytes_GET_SIZE(first_chunk));
174   memcpy((void *)(_PEP3333_Bytes_AS_DATA(buf)+length), _PEP3333_Bytes_AS_DATA(first_chunk),
175          _PEP3333_Bytes_GET_SIZE(first_chunk));
176   Py_DECREF(first_chunk);
177 
178 out:
179   request->state.wsgi_call_done = true;
180   request->current_chunk = buf;
181   request->current_chunk_p = 0;
182   return true;
183 }
184 
185 static inline bool
inspect_headers(Request * request)186 inspect_headers(Request* request)
187 {
188   Py_ssize_t i;
189   PyObject* tuple;
190 
191   if(!PyList_Check(request->headers)) {
192     TYPE_ERROR("start response argument 2", "a list of 2-tuples", request->headers);
193     return NULL;
194   }
195 
196   for(i=0; i<PyList_GET_SIZE(request->headers); ++i) {
197     tuple = PyList_GET_ITEM(request->headers, i);
198 
199     if(!PyTuple_Check(tuple) || PyTuple_GET_SIZE(tuple) != 2)
200       goto err;
201 
202     PyObject* unicode_field = PyTuple_GET_ITEM(tuple, 0);
203     PyObject* unicode_value = PyTuple_GET_ITEM(tuple, 1);
204 
205     PyObject* bytes_field = _PEP3333_BytesLatin1_FromUnicode(unicode_field);
206     PyObject* bytes_value = _PEP3333_BytesLatin1_FromUnicode(unicode_value);
207 
208     if (bytes_field == NULL || bytes_value == NULL) {
209       Py_XDECREF(bytes_field);
210       Py_XDECREF(bytes_value);
211       goto err;
212     }
213 
214     PyList_SET_ITEM(request->headers, i, PyTuple_Pack(2, bytes_field, bytes_value));
215     Py_DECREF(tuple);
216 
217     if(!strncasecmp(_PEP3333_Bytes_AS_DATA(bytes_field), "Content-Length", _PEP3333_Bytes_GET_SIZE(bytes_field)))
218       request->state.response_length_unknown = false;
219 
220     Py_DECREF(bytes_field);
221     Py_DECREF(bytes_value);
222   }
223   return true;
224 
225 err:
226   TYPE_ERROR_INNER("start_response argument 2", "a list of 2-tuples (field: str, value: str)",
227     "(found invalid '%.200s' object at position %zd)", Py_TYPE(tuple)->tp_name, i);
228   return false;
229 }
230 
231 
232 static void
wsgi_getheaders(Request * request,PyObject ** buf,Py_ssize_t * length)233 wsgi_getheaders(Request* request, PyObject** buf, Py_ssize_t *length)
234 {
235   Py_ssize_t length_upperbound = strlen("HTTP/1.1 ") + _PEP3333_Bytes_GET_SIZE(request->status) + strlen("\r\nConnection: Keep-Alive") + strlen("\r\nTransfer-Encoding: chunked") + strlen("\r\n\r\n");
236   for(Py_ssize_t i=0; i<PyList_GET_SIZE(request->headers); ++i) {
237     PyObject* tuple = PyList_GET_ITEM(request->headers, i);
238     PyObject* field = PyTuple_GET_ITEM(tuple, 0);
239     PyObject* value = PyTuple_GET_ITEM(tuple, 1);
240     length_upperbound += strlen("\r\n") + _PEP3333_Bytes_GET_SIZE(field) + strlen(": ") + _PEP3333_Bytes_GET_SIZE(value);
241   }
242 
243   PyObject* bufobj = _PEP3333_Bytes_FromStringAndSize(NULL, length_upperbound);
244   char* bufp = (char *)_PEP3333_Bytes_AS_DATA(bufobj);
245 
246   #define buf_write(src, len) \
247     do { \
248       size_t n = len; \
249       const char* s = src;  \
250       while(n--) *bufp++ = *s++; \
251     } while(0)
252   #define buf_write2(src) buf_write(src, strlen(src))
253 
254   /* First line, e.g. "HTTP/1.1 200 Ok" */
255   buf_write2("HTTP/1.1 ");
256   buf_write(_PEP3333_Bytes_AS_DATA(request->status),
257             _PEP3333_Bytes_GET_SIZE(request->status));
258 
259   /* Headers, from the `request->headers` mapping.
260    * [("Header1", "value1"), ("Header2", "value2")]
261    * --> "Header1: value1\r\nHeader2: value2"
262    */
263   for(Py_ssize_t i=0; i<PyList_GET_SIZE(request->headers); ++i) {
264     PyObject* tuple = PyList_GET_ITEM(request->headers, i);
265     PyObject* field = PyTuple_GET_ITEM(tuple, 0);
266     PyObject* value = PyTuple_GET_ITEM(tuple, 1);
267     buf_write2("\r\n");
268     buf_write(_PEP3333_Bytes_AS_DATA(field), _PEP3333_Bytes_GET_SIZE(field));
269     buf_write2(": ");
270     buf_write(_PEP3333_Bytes_AS_DATA(value), _PEP3333_Bytes_GET_SIZE(value));
271   }
272 
273   /* See `wsgi_call_application` */
274   if(request->state.keep_alive) {
275     buf_write2("\r\nConnection: Keep-Alive");
276     if(request->state.chunked_response) {
277       buf_write2("\r\nTransfer-Encoding: chunked");
278     }
279   } else {
280     buf_write2("\r\nConnection: close");
281   }
282 
283   buf_write2("\r\n\r\n");
284 
285   *buf = bufobj;
286   *length = bufp - _PEP3333_Bytes_AS_DATA(bufobj);
287 }
288 
289 inline PyObject*
wsgi_iterable_get_next_chunk(Request * request)290 wsgi_iterable_get_next_chunk(Request* request)
291 {
292   /* Get the next item out of ``request->iterable``, skipping empty ones. */
293   PyObject* next;
294   while(true) {
295     next = PyIter_Next(request->iterator);
296     if(next == NULL)
297       return NULL;
298     if(!_PEP3333_Bytes_Check(next)) {
299       TYPE_ERROR("wsgi iterable items", "bytes", next);
300       Py_DECREF(next);
301       return NULL;
302     }
303     if(_PEP3333_Bytes_GET_SIZE(next))
304       return next;
305     Py_DECREF(next);
306   }
307 }
308 
309 static inline void
restore_exception_tuple(PyObject * exc_info,bool incref_items)310 restore_exception_tuple(PyObject* exc_info, bool incref_items)
311 {
312   if(incref_items) {
313     Py_INCREF(PyTuple_GET_ITEM(exc_info, 0));
314     Py_INCREF(PyTuple_GET_ITEM(exc_info, 1));
315     Py_INCREF(PyTuple_GET_ITEM(exc_info, 2));
316   }
317   PyErr_Restore(
318     PyTuple_GET_ITEM(exc_info, 0),
319     PyTuple_GET_ITEM(exc_info, 1),
320     PyTuple_GET_ITEM(exc_info, 2)
321   );
322 }
323 
324 static PyObject*
start_response(PyObject * self,PyObject * args,PyObject * kwargs)325 start_response(PyObject* self, PyObject* args, PyObject* kwargs)
326 {
327   Request* request = ((StartResponse*)self)->request;
328 
329   if(request->state.start_response_called) {
330     /* not the first call of start_response --
331      * throw away any previous status and headers. */
332     Py_CLEAR(request->status);
333     Py_CLEAR(request->headers);
334     request->state.response_length_unknown = true;
335   }
336 
337   PyObject* exc_info = NULL;
338   PyObject* status_unicode = NULL;
339   if(!PyArg_UnpackTuple(args, "start_response", 2, 3, &status_unicode, &request->headers, &exc_info))
340     return NULL;
341 
342   if(exc_info && exc_info != Py_None) {
343     if(!PyTuple_Check(exc_info) || PyTuple_GET_SIZE(exc_info) != 3) {
344       TYPE_ERROR("start_response argument 3", "a 3-tuple", exc_info);
345       return NULL;
346     }
347 
348     restore_exception_tuple(exc_info, /* incref items? */ true);
349 
350     if(request->state.wsgi_call_done) {
351       /* Too late to change headers. According to PEP 333, we should let
352        * the exception propagate in this case. */
353       return NULL;
354     }
355 
356     /* Headers not yet sent; handle this start_response call as if 'exc_info'
357      * would not have been passed, but print and clear the exception. */
358     PyErr_Print();
359   }
360   else if(request->state.start_response_called) {
361     PyErr_SetString(PyExc_TypeError, "'start_response' called twice without "
362                      "passing 'exc_info' the second time");
363     return NULL;
364   }
365 
366   request->status = _PEP3333_BytesLatin1_FromUnicode(status_unicode);
367   if (request->status == NULL) {
368     return NULL;
369   } else if (_PEP3333_Bytes_GET_SIZE(request->status) < 3) {
370     PyErr_SetString(PyExc_ValueError, "'status' must be 3-digit");
371     Py_CLEAR(request->status);
372     return NULL;
373   }
374 
375   if(!inspect_headers(request)) {
376     request->headers = NULL;
377     return NULL;
378   }
379 
380   Py_INCREF(request->headers);
381 
382   request->state.start_response_called = true;
383 
384   Py_RETURN_NONE;
385 }
386 
387 PyTypeObject StartResponse_Type = {
388   PyVarObject_HEAD_INIT(NULL, 0)
389   "start_response",           /* tp_name (__name__)                         */
390   sizeof(StartResponse),      /* tp_basicsize                               */
391   0,                          /* tp_itemsize                                */
392   (destructor)PyObject_FREE,  /* tp_dealloc                                 */
393   0, 0, 0, 0, 0, 0, 0, 0, 0,  /* tp_{print,getattr,setattr,compare,...}     */
394   start_response              /* tp_call (__call__)                         */
395 };
396 
397 
398 PyObject*
wrap_http_chunk_cruft_around(PyObject * chunk)399 wrap_http_chunk_cruft_around(PyObject* chunk)
400 {
401   /* Who the hell decided to use decimal representation for Content-Length
402    * but hexadecimal representation for chunk lengths btw!?! Fuck W3C */
403   size_t chunklen = _PEP3333_Bytes_GET_SIZE(chunk);
404   assert(chunklen);
405   char buf[strlen("ffffffff") + 2];
406   size_t n = sprintf(buf, "%x\r\n", (unsigned int)chunklen);
407   PyObject* new_chunk = _PEP3333_Bytes_FromStringAndSize(NULL, n + chunklen + 2);
408   char * new_chunk_p = (char *)_PEP3333_Bytes_AS_DATA(new_chunk);
409   memcpy(new_chunk_p, buf, n);
410   new_chunk_p += n;
411   memcpy(new_chunk_p, _PEP3333_Bytes_AS_DATA(chunk), chunklen);
412   new_chunk_p += chunklen;
413   *new_chunk_p++ = '\r'; *new_chunk_p = '\n';
414   assert(new_chunk_p == _PEP3333_Bytes_AS_DATA(new_chunk) + n + chunklen + 1);
415   return new_chunk;
416 }
417