1 #include "common.h"
2 #include "filewrapper.h"
3 #include "wsgi.h"
4 #include "py2py3.h"
5
6 static void wsgi_getheaders(Request*, PyObject** buf, Py_ssize_t* length);
7
8 typedef struct {
9 PyObject_HEAD
10 Request* request;
11 } StartResponse;
12
13 bool
wsgi_call_application(Request * request)14 wsgi_call_application(Request* request)
15 {
16 StartResponse* start_response = PyObject_NEW(StartResponse, &StartResponse_Type);
17 start_response->request = request;
18
19 /* From now on, `headers` stores the _response_ headers
20 * (passed by the WSGI app) rather than the _request_ headers */
21 PyObject* request_headers = request->headers;
22 request->headers = NULL;
23
24 /* application(environ, start_response) call */
25 PyObject* retval = PyObject_CallFunctionObjArgs(
26 request->server_info->wsgi_app,
27 request_headers,
28 start_response,
29 NULL /* sentinel */
30 );
31
32 Py_DECREF(request_headers);
33 Py_DECREF(start_response);
34
35 if(retval == NULL)
36 return false;
37
38 /* The following code is somewhat magic, so worth an explanation.
39 *
40 * If the application we called was a generator, we have to call .next() on
41 * it before we do anything else because that may execute code that
42 * invokes `start_response` (which might not have been invoked yet).
43 * Think of the following scenario:
44 *
45 * def app(environ, start_response):
46 * start_response('200 Ok', ...)
47 * yield 'Hello World'
48 *
49 * That would make `app` return an iterator (more precisely, a generator).
50 * Unfortunately, `start_response` wouldn't be called until the first item
51 * of that iterator is requested; `start_response` however has to be called
52 * _before_ the wsgi body is sent, because it passes the HTTP headers.
53 *
54 * If the application returned a list this would not be required of course,
55 * but special-handling is painful - especially in C - so here's one generic
56 * way to solve the problem:
57 *
58 * Look into the returned iterator in any case. This allows us to do other
59 * optimizations, for example if the returned value is a list with exactly
60 * one bytestring in it, we can pick the bytestring and throw away the list
61 * so bjoern does not have to come back again and look into the iterator a
62 * second time.
63 */
64 PyObject* first_chunk;
65
66 if(PyList_Check(retval) && PyList_GET_SIZE(retval) == 1 &&
67 _PEP3333_Bytes_Check(PyList_GET_ITEM(retval, 0)))
68 {
69 /* Optimize the most common case, a single bytestring in a list: */
70 DBG_REQ(request, "WSGI iterable is list of size 1");
71 PyObject* tmp = PyList_GET_ITEM(retval, 0);
72 Py_INCREF(tmp);
73 Py_DECREF(retval);
74 retval = tmp;
75 goto bytestring; /* eeevil */
76 } else if(_PEP3333_Bytes_Check(retval)) {
77 /* According to PEP 333 strings should be handled like any other iterable,
78 * i.e. sending the response item for item. "item for item" means
79 * "char for char" if you have a bytestring. -- I'm not that stupid. */
80 bytestring:
81 DBG_REQ(request, "WSGI iterable is byte string");
82 request->iterable = NULL;
83 request->iterator = NULL;
84 if(_PEP3333_Bytes_GET_SIZE(retval)) {
85 first_chunk = retval;
86 } else {
87 // empty response
88 Py_DECREF(retval);
89 first_chunk = NULL;
90 }
91 } else if(!request->state.response_length_unknown && FileWrapper_CheckExact(retval) && FileWrapper_GetFd(retval) != -1) {
92 DBG_REQ(request, "WSGI iterable is wsgi.file_wrapper instance and Content-Length is known");
93 request->iterable = retval;
94 request->iterator = NULL;
95 first_chunk = NULL;
96 } else {
97 /* Generic iterable (list of length != 1, generator, ...) */
98 DBG_REQ(request, "WSGI iterable is some other iterable");
99 request->iterable = retval;
100 request->iterator = PyObject_GetIter(retval);
101 if(request->iterator == NULL)
102 return false;
103 first_chunk = wsgi_iterable_get_next_chunk(request);
104 if(first_chunk == NULL && PyErr_Occurred())
105 return false;
106 }
107
108 if(request->headers == NULL) {
109 /* It is important that this check comes *after* the call to
110 * wsgi_iterable_get_next_chunk(), because in case the WSGI application
111 * was an iterator, there's no chance start_response could be called
112 * before. See above if you don't understand what I say. */
113 PyErr_SetString(
114 PyExc_RuntimeError,
115 "wsgi application returned before start_response was called"
116 );
117 Py_XDECREF(first_chunk);
118 return false;
119 }
120
121 /* Special-case HTTP 204 and 304 */
122 if (!strncmp(_PEP3333_Bytes_AS_DATA(request->status), "204", 3) ||
123 !strncmp(_PEP3333_Bytes_AS_DATA(request->status), "304", 3)) {
124 request->state.response_length_unknown = false;
125 }
126
127 /* keep-alive cruft */
128 if(http_should_keep_alive(&request->parser.parser)) {
129 if(request->state.response_length_unknown) {
130 if(request->parser.parser.http_major > 0 && request->parser.parser.http_minor > 0) {
131 /* On HTTP 1.1, we can use Transfer-Encoding: chunked. */
132 DBG_REQ(request, "Content-Length unknown, HTTP/1.1 -> Connection: will keep-alive with chunked response");
133 request->state.chunked_response = true;
134 request->state.keep_alive = true;
135 } else {
136 /* On HTTP 1.0, we can only resort to closing the connection. */
137 DBG_REQ(request, "Content-Length unknown, HTTP/1.10 -> will close");
138 request->state.keep_alive = false;
139 }
140 } else {
141 /* We know the content-length. Can always keep-alive. */
142 DBG_REQ(request, "Content-Length known -> will keep alive");
143 request->state.keep_alive = true;
144 }
145 } else {
146 /* Explicit "Connection: close" (HTTP 1.1) or missing "Connection: keep-alive" (HTTP 1.0) */
147 DBG_REQ(request, "Connection: close request by client");
148 request->state.keep_alive = false;
149 }
150
151 /* Get the headers and concatenate the first body chunk.
152 * In the first place this makes the code more simple because afterwards
153 * we can throw away the first chunk PyObject; but it also is an optimization:
154 * At least for small responses, the complete response could be sent with
155 * one send() call (in server.c:ev_io_on_write) which is a (tiny) performance
156 * booster because less kernel calls means less kernel call overhead. */
157 Py_ssize_t length;
158 PyObject* buf;
159 wsgi_getheaders(request, &buf, &length);
160
161 if(first_chunk == NULL) {
162 _PEP3333_Bytes_Resize(&buf, length);
163 goto out;
164 }
165
166 if(request->state.chunked_response) {
167 PyObject* new_chunk = wrap_http_chunk_cruft_around(first_chunk);
168 Py_DECREF(first_chunk);
169 assert(_PEP3333_Bytes_GET_SIZE(new_chunk) >= _PEP3333_Bytes_GET_SIZE(first_chunk) + 5);
170 first_chunk = new_chunk;
171 }
172
173 _PEP3333_Bytes_Resize(&buf, length + _PEP3333_Bytes_GET_SIZE(first_chunk));
174 memcpy((void *)(_PEP3333_Bytes_AS_DATA(buf)+length), _PEP3333_Bytes_AS_DATA(first_chunk),
175 _PEP3333_Bytes_GET_SIZE(first_chunk));
176 Py_DECREF(first_chunk);
177
178 out:
179 request->state.wsgi_call_done = true;
180 request->current_chunk = buf;
181 request->current_chunk_p = 0;
182 return true;
183 }
184
185 static inline bool
inspect_headers(Request * request)186 inspect_headers(Request* request)
187 {
188 Py_ssize_t i;
189 PyObject* tuple;
190
191 if(!PyList_Check(request->headers)) {
192 TYPE_ERROR("start response argument 2", "a list of 2-tuples", request->headers);
193 return NULL;
194 }
195
196 for(i=0; i<PyList_GET_SIZE(request->headers); ++i) {
197 tuple = PyList_GET_ITEM(request->headers, i);
198
199 if(!PyTuple_Check(tuple) || PyTuple_GET_SIZE(tuple) != 2)
200 goto err;
201
202 PyObject* unicode_field = PyTuple_GET_ITEM(tuple, 0);
203 PyObject* unicode_value = PyTuple_GET_ITEM(tuple, 1);
204
205 PyObject* bytes_field = _PEP3333_BytesLatin1_FromUnicode(unicode_field);
206 PyObject* bytes_value = _PEP3333_BytesLatin1_FromUnicode(unicode_value);
207
208 if (bytes_field == NULL || bytes_value == NULL) {
209 Py_XDECREF(bytes_field);
210 Py_XDECREF(bytes_value);
211 goto err;
212 }
213
214 PyList_SET_ITEM(request->headers, i, PyTuple_Pack(2, bytes_field, bytes_value));
215 Py_DECREF(tuple);
216
217 if(!strncasecmp(_PEP3333_Bytes_AS_DATA(bytes_field), "Content-Length", _PEP3333_Bytes_GET_SIZE(bytes_field)))
218 request->state.response_length_unknown = false;
219
220 Py_DECREF(bytes_field);
221 Py_DECREF(bytes_value);
222 }
223 return true;
224
225 err:
226 TYPE_ERROR_INNER("start_response argument 2", "a list of 2-tuples (field: str, value: str)",
227 "(found invalid '%.200s' object at position %zd)", Py_TYPE(tuple)->tp_name, i);
228 return false;
229 }
230
231
232 static void
wsgi_getheaders(Request * request,PyObject ** buf,Py_ssize_t * length)233 wsgi_getheaders(Request* request, PyObject** buf, Py_ssize_t *length)
234 {
235 Py_ssize_t length_upperbound = strlen("HTTP/1.1 ") + _PEP3333_Bytes_GET_SIZE(request->status) + strlen("\r\nConnection: Keep-Alive") + strlen("\r\nTransfer-Encoding: chunked") + strlen("\r\n\r\n");
236 for(Py_ssize_t i=0; i<PyList_GET_SIZE(request->headers); ++i) {
237 PyObject* tuple = PyList_GET_ITEM(request->headers, i);
238 PyObject* field = PyTuple_GET_ITEM(tuple, 0);
239 PyObject* value = PyTuple_GET_ITEM(tuple, 1);
240 length_upperbound += strlen("\r\n") + _PEP3333_Bytes_GET_SIZE(field) + strlen(": ") + _PEP3333_Bytes_GET_SIZE(value);
241 }
242
243 PyObject* bufobj = _PEP3333_Bytes_FromStringAndSize(NULL, length_upperbound);
244 char* bufp = (char *)_PEP3333_Bytes_AS_DATA(bufobj);
245
246 #define buf_write(src, len) \
247 do { \
248 size_t n = len; \
249 const char* s = src; \
250 while(n--) *bufp++ = *s++; \
251 } while(0)
252 #define buf_write2(src) buf_write(src, strlen(src))
253
254 /* First line, e.g. "HTTP/1.1 200 Ok" */
255 buf_write2("HTTP/1.1 ");
256 buf_write(_PEP3333_Bytes_AS_DATA(request->status),
257 _PEP3333_Bytes_GET_SIZE(request->status));
258
259 /* Headers, from the `request->headers` mapping.
260 * [("Header1", "value1"), ("Header2", "value2")]
261 * --> "Header1: value1\r\nHeader2: value2"
262 */
263 for(Py_ssize_t i=0; i<PyList_GET_SIZE(request->headers); ++i) {
264 PyObject* tuple = PyList_GET_ITEM(request->headers, i);
265 PyObject* field = PyTuple_GET_ITEM(tuple, 0);
266 PyObject* value = PyTuple_GET_ITEM(tuple, 1);
267 buf_write2("\r\n");
268 buf_write(_PEP3333_Bytes_AS_DATA(field), _PEP3333_Bytes_GET_SIZE(field));
269 buf_write2(": ");
270 buf_write(_PEP3333_Bytes_AS_DATA(value), _PEP3333_Bytes_GET_SIZE(value));
271 }
272
273 /* See `wsgi_call_application` */
274 if(request->state.keep_alive) {
275 buf_write2("\r\nConnection: Keep-Alive");
276 if(request->state.chunked_response) {
277 buf_write2("\r\nTransfer-Encoding: chunked");
278 }
279 } else {
280 buf_write2("\r\nConnection: close");
281 }
282
283 buf_write2("\r\n\r\n");
284
285 *buf = bufobj;
286 *length = bufp - _PEP3333_Bytes_AS_DATA(bufobj);
287 }
288
289 inline PyObject*
wsgi_iterable_get_next_chunk(Request * request)290 wsgi_iterable_get_next_chunk(Request* request)
291 {
292 /* Get the next item out of ``request->iterable``, skipping empty ones. */
293 PyObject* next;
294 while(true) {
295 next = PyIter_Next(request->iterator);
296 if(next == NULL)
297 return NULL;
298 if(!_PEP3333_Bytes_Check(next)) {
299 TYPE_ERROR("wsgi iterable items", "bytes", next);
300 Py_DECREF(next);
301 return NULL;
302 }
303 if(_PEP3333_Bytes_GET_SIZE(next))
304 return next;
305 Py_DECREF(next);
306 }
307 }
308
309 static inline void
restore_exception_tuple(PyObject * exc_info,bool incref_items)310 restore_exception_tuple(PyObject* exc_info, bool incref_items)
311 {
312 if(incref_items) {
313 Py_INCREF(PyTuple_GET_ITEM(exc_info, 0));
314 Py_INCREF(PyTuple_GET_ITEM(exc_info, 1));
315 Py_INCREF(PyTuple_GET_ITEM(exc_info, 2));
316 }
317 PyErr_Restore(
318 PyTuple_GET_ITEM(exc_info, 0),
319 PyTuple_GET_ITEM(exc_info, 1),
320 PyTuple_GET_ITEM(exc_info, 2)
321 );
322 }
323
324 static PyObject*
start_response(PyObject * self,PyObject * args,PyObject * kwargs)325 start_response(PyObject* self, PyObject* args, PyObject* kwargs)
326 {
327 Request* request = ((StartResponse*)self)->request;
328
329 if(request->state.start_response_called) {
330 /* not the first call of start_response --
331 * throw away any previous status and headers. */
332 Py_CLEAR(request->status);
333 Py_CLEAR(request->headers);
334 request->state.response_length_unknown = true;
335 }
336
337 PyObject* exc_info = NULL;
338 PyObject* status_unicode = NULL;
339 if(!PyArg_UnpackTuple(args, "start_response", 2, 3, &status_unicode, &request->headers, &exc_info))
340 return NULL;
341
342 if(exc_info && exc_info != Py_None) {
343 if(!PyTuple_Check(exc_info) || PyTuple_GET_SIZE(exc_info) != 3) {
344 TYPE_ERROR("start_response argument 3", "a 3-tuple", exc_info);
345 return NULL;
346 }
347
348 restore_exception_tuple(exc_info, /* incref items? */ true);
349
350 if(request->state.wsgi_call_done) {
351 /* Too late to change headers. According to PEP 333, we should let
352 * the exception propagate in this case. */
353 return NULL;
354 }
355
356 /* Headers not yet sent; handle this start_response call as if 'exc_info'
357 * would not have been passed, but print and clear the exception. */
358 PyErr_Print();
359 }
360 else if(request->state.start_response_called) {
361 PyErr_SetString(PyExc_TypeError, "'start_response' called twice without "
362 "passing 'exc_info' the second time");
363 return NULL;
364 }
365
366 request->status = _PEP3333_BytesLatin1_FromUnicode(status_unicode);
367 if (request->status == NULL) {
368 return NULL;
369 } else if (_PEP3333_Bytes_GET_SIZE(request->status) < 3) {
370 PyErr_SetString(PyExc_ValueError, "'status' must be 3-digit");
371 Py_CLEAR(request->status);
372 return NULL;
373 }
374
375 if(!inspect_headers(request)) {
376 request->headers = NULL;
377 return NULL;
378 }
379
380 Py_INCREF(request->headers);
381
382 request->state.start_response_called = true;
383
384 Py_RETURN_NONE;
385 }
386
387 PyTypeObject StartResponse_Type = {
388 PyVarObject_HEAD_INIT(NULL, 0)
389 "start_response", /* tp_name (__name__) */
390 sizeof(StartResponse), /* tp_basicsize */
391 0, /* tp_itemsize */
392 (destructor)PyObject_FREE, /* tp_dealloc */
393 0, 0, 0, 0, 0, 0, 0, 0, 0, /* tp_{print,getattr,setattr,compare,...} */
394 start_response /* tp_call (__call__) */
395 };
396
397
398 PyObject*
wrap_http_chunk_cruft_around(PyObject * chunk)399 wrap_http_chunk_cruft_around(PyObject* chunk)
400 {
401 /* Who the hell decided to use decimal representation for Content-Length
402 * but hexadecimal representation for chunk lengths btw!?! Fuck W3C */
403 size_t chunklen = _PEP3333_Bytes_GET_SIZE(chunk);
404 assert(chunklen);
405 char buf[strlen("ffffffff") + 2];
406 size_t n = sprintf(buf, "%x\r\n", (unsigned int)chunklen);
407 PyObject* new_chunk = _PEP3333_Bytes_FromStringAndSize(NULL, n + chunklen + 2);
408 char * new_chunk_p = (char *)_PEP3333_Bytes_AS_DATA(new_chunk);
409 memcpy(new_chunk_p, buf, n);
410 new_chunk_p += n;
411 memcpy(new_chunk_p, _PEP3333_Bytes_AS_DATA(chunk), chunklen);
412 new_chunk_p += chunklen;
413 *new_chunk_p++ = '\r'; *new_chunk_p = '\n';
414 assert(new_chunk_p == _PEP3333_Bytes_AS_DATA(new_chunk) + n + chunklen + 1);
415 return new_chunk;
416 }
417