1
2 /* Traceback implementation */
3
4 #include "Python.h"
5
6 #include "code.h" // PyCode_Addr2Line etc
7 #include "frameobject.h" // PyFrame_GetBack()
8 #include "pycore_ast.h" // asdl_seq_*
9 #include "pycore_compile.h" // _PyAST_Optimize
10 #include "pycore_fileutils.h" // _Py_BEGIN_SUPPRESS_IPH
11 #include "pycore_frame.h" // _PyFrame_GetCode()
12 #include "pycore_interp.h" // PyInterpreterState.gc
13 #include "pycore_parser.h" // _PyParser_ASTFromString
14 #include "pycore_pyarena.h" // _PyArena_Free()
15 #include "pycore_pyerrors.h" // _PyErr_Fetch()
16 #include "pycore_pystate.h" // _PyThreadState_GET()
17 #include "pycore_traceback.h" // EXCEPTION_TB_HEADER
18 #include "../Parser/pegen.h" // _PyPegen_byte_offset_to_character_offset()
19 #include "structmember.h" // PyMemberDef
20 #include "osdefs.h" // SEP
21 #ifdef HAVE_FCNTL_H
22 # include <fcntl.h>
23 #endif
24
25 #define OFF(x) offsetof(PyTracebackObject, x)
26
27 #define PUTS(fd, str) _Py_write_noraise(fd, str, (int)strlen(str))
28 #define MAX_STRING_LENGTH 500
29 #define MAX_FRAME_DEPTH 100
30 #define MAX_NTHREADS 100
31
32 /* Function from Parser/tokenizer.c */
33 extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);
34
35 _Py_IDENTIFIER(TextIOWrapper);
36 _Py_IDENTIFIER(close);
37 _Py_IDENTIFIER(open);
38 _Py_IDENTIFIER(path);
39
40 /*[clinic input]
41 class TracebackType "PyTracebackObject *" "&PyTraceback_Type"
42 [clinic start generated code]*/
43 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=928fa06c10151120]*/
44
45 #include "clinic/traceback.c.h"
46
47 static PyObject *
tb_create_raw(PyTracebackObject * next,PyFrameObject * frame,int lasti,int lineno)48 tb_create_raw(PyTracebackObject *next, PyFrameObject *frame, int lasti,
49 int lineno)
50 {
51 PyTracebackObject *tb;
52 if ((next != NULL && !PyTraceBack_Check(next)) ||
53 frame == NULL || !PyFrame_Check(frame)) {
54 PyErr_BadInternalCall();
55 return NULL;
56 }
57 tb = PyObject_GC_New(PyTracebackObject, &PyTraceBack_Type);
58 if (tb != NULL) {
59 Py_XINCREF(next);
60 tb->tb_next = next;
61 Py_XINCREF(frame);
62 tb->tb_frame = frame;
63 tb->tb_lasti = lasti;
64 tb->tb_lineno = lineno;
65 PyObject_GC_Track(tb);
66 }
67 return (PyObject *)tb;
68 }
69
70 /*[clinic input]
71 @classmethod
72 TracebackType.__new__ as tb_new
73
74 tb_next: object
75 tb_frame: object(type='PyFrameObject *', subclass_of='&PyFrame_Type')
76 tb_lasti: int
77 tb_lineno: int
78
79 Create a new traceback object.
80 [clinic start generated code]*/
81
82 static PyObject *
tb_new_impl(PyTypeObject * type,PyObject * tb_next,PyFrameObject * tb_frame,int tb_lasti,int tb_lineno)83 tb_new_impl(PyTypeObject *type, PyObject *tb_next, PyFrameObject *tb_frame,
84 int tb_lasti, int tb_lineno)
85 /*[clinic end generated code: output=fa077debd72d861a input=01cbe8ec8783fca7]*/
86 {
87 if (tb_next == Py_None) {
88 tb_next = NULL;
89 } else if (!PyTraceBack_Check(tb_next)) {
90 return PyErr_Format(PyExc_TypeError,
91 "expected traceback object or None, got '%s'",
92 Py_TYPE(tb_next)->tp_name);
93 }
94
95 return tb_create_raw((PyTracebackObject *)tb_next, tb_frame, tb_lasti,
96 tb_lineno);
97 }
98
99 static PyObject *
tb_dir(PyTracebackObject * self,PyObject * Py_UNUSED (ignored))100 tb_dir(PyTracebackObject *self, PyObject *Py_UNUSED(ignored))
101 {
102 return Py_BuildValue("[ssss]", "tb_frame", "tb_next",
103 "tb_lasti", "tb_lineno");
104 }
105
106 static PyObject *
tb_next_get(PyTracebackObject * self,void * Py_UNUSED (_))107 tb_next_get(PyTracebackObject *self, void *Py_UNUSED(_))
108 {
109 PyObject* ret = (PyObject*)self->tb_next;
110 if (!ret) {
111 ret = Py_None;
112 }
113 Py_INCREF(ret);
114 return ret;
115 }
116
117 static int
tb_next_set(PyTracebackObject * self,PyObject * new_next,void * Py_UNUSED (_))118 tb_next_set(PyTracebackObject *self, PyObject *new_next, void *Py_UNUSED(_))
119 {
120 if (!new_next) {
121 PyErr_Format(PyExc_TypeError, "can't delete tb_next attribute");
122 return -1;
123 }
124
125 /* We accept None or a traceback object, and map None -> NULL (inverse of
126 tb_next_get) */
127 if (new_next == Py_None) {
128 new_next = NULL;
129 } else if (!PyTraceBack_Check(new_next)) {
130 PyErr_Format(PyExc_TypeError,
131 "expected traceback object, got '%s'",
132 Py_TYPE(new_next)->tp_name);
133 return -1;
134 }
135
136 /* Check for loops */
137 PyTracebackObject *cursor = (PyTracebackObject *)new_next;
138 while (cursor) {
139 if (cursor == self) {
140 PyErr_Format(PyExc_ValueError, "traceback loop detected");
141 return -1;
142 }
143 cursor = cursor->tb_next;
144 }
145
146 PyObject *old_next = (PyObject*)self->tb_next;
147 Py_XINCREF(new_next);
148 self->tb_next = (PyTracebackObject *)new_next;
149 Py_XDECREF(old_next);
150
151 return 0;
152 }
153
154
155 static PyMethodDef tb_methods[] = {
156 {"__dir__", (PyCFunction)tb_dir, METH_NOARGS},
157 {NULL, NULL, 0, NULL},
158 };
159
160 static PyMemberDef tb_memberlist[] = {
161 {"tb_frame", T_OBJECT, OFF(tb_frame), READONLY|PY_AUDIT_READ},
162 {"tb_lasti", T_INT, OFF(tb_lasti), READONLY},
163 {"tb_lineno", T_INT, OFF(tb_lineno), READONLY},
164 {NULL} /* Sentinel */
165 };
166
167 static PyGetSetDef tb_getsetters[] = {
168 {"tb_next", (getter)tb_next_get, (setter)tb_next_set, NULL, NULL},
169 {NULL} /* Sentinel */
170 };
171
172 static void
tb_dealloc(PyTracebackObject * tb)173 tb_dealloc(PyTracebackObject *tb)
174 {
175 PyObject_GC_UnTrack(tb);
176 Py_TRASHCAN_BEGIN(tb, tb_dealloc)
177 Py_XDECREF(tb->tb_next);
178 Py_XDECREF(tb->tb_frame);
179 PyObject_GC_Del(tb);
180 Py_TRASHCAN_END
181 }
182
183 static int
tb_traverse(PyTracebackObject * tb,visitproc visit,void * arg)184 tb_traverse(PyTracebackObject *tb, visitproc visit, void *arg)
185 {
186 Py_VISIT(tb->tb_next);
187 Py_VISIT(tb->tb_frame);
188 return 0;
189 }
190
191 static int
tb_clear(PyTracebackObject * tb)192 tb_clear(PyTracebackObject *tb)
193 {
194 Py_CLEAR(tb->tb_next);
195 Py_CLEAR(tb->tb_frame);
196 return 0;
197 }
198
199 PyTypeObject PyTraceBack_Type = {
200 PyVarObject_HEAD_INIT(&PyType_Type, 0)
201 "traceback",
202 sizeof(PyTracebackObject),
203 0,
204 (destructor)tb_dealloc, /*tp_dealloc*/
205 0, /*tp_vectorcall_offset*/
206 0, /*tp_getattr*/
207 0, /*tp_setattr*/
208 0, /*tp_as_async*/
209 0, /*tp_repr*/
210 0, /*tp_as_number*/
211 0, /*tp_as_sequence*/
212 0, /*tp_as_mapping*/
213 0, /* tp_hash */
214 0, /* tp_call */
215 0, /* tp_str */
216 PyObject_GenericGetAttr, /* tp_getattro */
217 0, /* tp_setattro */
218 0, /* tp_as_buffer */
219 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
220 tb_new__doc__, /* tp_doc */
221 (traverseproc)tb_traverse, /* tp_traverse */
222 (inquiry)tb_clear, /* tp_clear */
223 0, /* tp_richcompare */
224 0, /* tp_weaklistoffset */
225 0, /* tp_iter */
226 0, /* tp_iternext */
227 tb_methods, /* tp_methods */
228 tb_memberlist, /* tp_members */
229 tb_getsetters, /* tp_getset */
230 0, /* tp_base */
231 0, /* tp_dict */
232 0, /* tp_descr_get */
233 0, /* tp_descr_set */
234 0, /* tp_dictoffset */
235 0, /* tp_init */
236 0, /* tp_alloc */
237 tb_new, /* tp_new */
238 };
239
240
241 PyObject*
_PyTraceBack_FromFrame(PyObject * tb_next,PyFrameObject * frame)242 _PyTraceBack_FromFrame(PyObject *tb_next, PyFrameObject *frame)
243 {
244 assert(tb_next == NULL || PyTraceBack_Check(tb_next));
245 assert(frame != NULL);
246
247 return tb_create_raw((PyTracebackObject *)tb_next, frame, frame->f_frame->f_lasti*sizeof(_Py_CODEUNIT),
248 PyFrame_GetLineNumber(frame));
249 }
250
251
252 int
PyTraceBack_Here(PyFrameObject * frame)253 PyTraceBack_Here(PyFrameObject *frame)
254 {
255 PyObject *exc, *val, *tb, *newtb;
256 PyErr_Fetch(&exc, &val, &tb);
257 newtb = _PyTraceBack_FromFrame(tb, frame);
258 if (newtb == NULL) {
259 _PyErr_ChainExceptions(exc, val, tb);
260 return -1;
261 }
262 PyErr_Restore(exc, val, newtb);
263 Py_XDECREF(tb);
264 return 0;
265 }
266
267 /* Insert a frame into the traceback for (funcname, filename, lineno). */
_PyTraceback_Add(const char * funcname,const char * filename,int lineno)268 void _PyTraceback_Add(const char *funcname, const char *filename, int lineno)
269 {
270 PyObject *globals;
271 PyCodeObject *code;
272 PyFrameObject *frame;
273 PyObject *exc, *val, *tb;
274 PyThreadState *tstate = _PyThreadState_GET();
275
276 /* Save and clear the current exception. Python functions must not be
277 called with an exception set. Calling Python functions happens when
278 the codec of the filesystem encoding is implemented in pure Python. */
279 _PyErr_Fetch(tstate, &exc, &val, &tb);
280
281 globals = PyDict_New();
282 if (!globals)
283 goto error;
284 code = PyCode_NewEmpty(filename, funcname, lineno);
285 if (!code) {
286 Py_DECREF(globals);
287 goto error;
288 }
289 frame = PyFrame_New(tstate, code, globals, NULL);
290 Py_DECREF(globals);
291 Py_DECREF(code);
292 if (!frame)
293 goto error;
294 frame->f_lineno = lineno;
295
296 _PyErr_Restore(tstate, exc, val, tb);
297 PyTraceBack_Here(frame);
298 Py_DECREF(frame);
299 return;
300
301 error:
302 _PyErr_ChainExceptions(exc, val, tb);
303 }
304
305 static PyObject *
_Py_FindSourceFile(PyObject * filename,char * namebuf,size_t namelen,PyObject * io)306 _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *io)
307 {
308 Py_ssize_t i;
309 PyObject *binary;
310 PyObject *v;
311 Py_ssize_t npath;
312 size_t taillen;
313 PyObject *syspath;
314 PyObject *path;
315 const char* tail;
316 PyObject *filebytes;
317 const char* filepath;
318 Py_ssize_t len;
319 PyObject* result;
320
321 filebytes = PyUnicode_EncodeFSDefault(filename);
322 if (filebytes == NULL) {
323 PyErr_Clear();
324 return NULL;
325 }
326 filepath = PyBytes_AS_STRING(filebytes);
327
328 /* Search tail of filename in sys.path before giving up */
329 tail = strrchr(filepath, SEP);
330 if (tail == NULL)
331 tail = filepath;
332 else
333 tail++;
334 taillen = strlen(tail);
335
336 syspath = _PySys_GetObjectId(&PyId_path);
337 if (syspath == NULL || !PyList_Check(syspath))
338 goto error;
339 npath = PyList_Size(syspath);
340
341 for (i = 0; i < npath; i++) {
342 v = PyList_GetItem(syspath, i);
343 if (v == NULL) {
344 PyErr_Clear();
345 break;
346 }
347 if (!PyUnicode_Check(v))
348 continue;
349 path = PyUnicode_EncodeFSDefault(v);
350 if (path == NULL) {
351 PyErr_Clear();
352 continue;
353 }
354 len = PyBytes_GET_SIZE(path);
355 if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) {
356 Py_DECREF(path);
357 continue; /* Too long */
358 }
359 strcpy(namebuf, PyBytes_AS_STRING(path));
360 Py_DECREF(path);
361 if (strlen(namebuf) != (size_t)len)
362 continue; /* v contains '\0' */
363 if (len > 0 && namebuf[len-1] != SEP)
364 namebuf[len++] = SEP;
365 strcpy(namebuf+len, tail);
366
367 binary = _PyObject_CallMethodId(io, &PyId_open, "ss", namebuf, "rb");
368 if (binary != NULL) {
369 result = binary;
370 goto finally;
371 }
372 PyErr_Clear();
373 }
374 goto error;
375
376 error:
377 result = NULL;
378 finally:
379 Py_DECREF(filebytes);
380 return result;
381 }
382
383 /* Writes indent spaces. Returns 0 on success and non-zero on failure.
384 */
385 int
_Py_WriteIndent(int indent,PyObject * f)386 _Py_WriteIndent(int indent, PyObject *f)
387 {
388 char buf[11] = " ";
389 assert(strlen(buf) == 10);
390 while (indent > 0) {
391 if (indent < 10) {
392 buf[indent] = '\0';
393 }
394 if (PyFile_WriteString(buf, f) < 0) {
395 return -1;
396 }
397 indent -= 10;
398 }
399 return 0;
400 }
401
402 /* Writes indent spaces, followed by the margin if it is not `\0`.
403 Returns 0 on success and non-zero on failure.
404 */
405 int
_Py_WriteIndentedMargin(int indent,const char * margin,PyObject * f)406 _Py_WriteIndentedMargin(int indent, const char *margin, PyObject *f)
407 {
408 if (_Py_WriteIndent(indent, f) < 0) {
409 return -1;
410 }
411 if (margin) {
412 if (PyFile_WriteString(margin, f) < 0) {
413 return -1;
414 }
415 }
416 return 0;
417 }
418
419 static int
display_source_line_with_margin(PyObject * f,PyObject * filename,int lineno,int indent,int margin_indent,const char * margin,int * truncation,PyObject ** line)420 display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int indent,
421 int margin_indent, const char *margin,
422 int *truncation, PyObject **line)
423 {
424 int fd;
425 int i;
426 char *found_encoding;
427 const char *encoding;
428 PyObject *io;
429 PyObject *binary;
430 PyObject *fob = NULL;
431 PyObject *lineobj = NULL;
432 PyObject *res;
433 char buf[MAXPATHLEN+1];
434 int kind;
435 const void *data;
436
437 /* open the file */
438 if (filename == NULL)
439 return 0;
440
441 /* Do not attempt to open things like <string> or <stdin> */
442 assert(PyUnicode_Check(filename));
443 if (PyUnicode_READ_CHAR(filename, 0) == '<') {
444 Py_ssize_t len = PyUnicode_GET_LENGTH(filename);
445 if (len > 0 && PyUnicode_READ_CHAR(filename, len - 1) == '>') {
446 return 0;
447 }
448 }
449
450 io = PyImport_ImportModuleNoBlock("io");
451 if (io == NULL)
452 return -1;
453 binary = _PyObject_CallMethodId(io, &PyId_open, "Os", filename, "rb");
454
455 if (binary == NULL) {
456 PyErr_Clear();
457
458 binary = _Py_FindSourceFile(filename, buf, sizeof(buf), io);
459 if (binary == NULL) {
460 Py_DECREF(io);
461 return -1;
462 }
463 }
464
465 /* use the right encoding to decode the file as unicode */
466 fd = PyObject_AsFileDescriptor(binary);
467 if (fd < 0) {
468 Py_DECREF(io);
469 Py_DECREF(binary);
470 return 0;
471 }
472 found_encoding = _PyTokenizer_FindEncodingFilename(fd, filename);
473 if (found_encoding == NULL)
474 PyErr_Clear();
475 encoding = (found_encoding != NULL) ? found_encoding : "utf-8";
476 /* Reset position */
477 if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
478 Py_DECREF(io);
479 Py_DECREF(binary);
480 PyMem_Free(found_encoding);
481 return 0;
482 }
483 fob = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "Os", binary, encoding);
484 Py_DECREF(io);
485 PyMem_Free(found_encoding);
486
487 if (fob == NULL) {
488 PyErr_Clear();
489
490 res = _PyObject_CallMethodIdNoArgs(binary, &PyId_close);
491 Py_DECREF(binary);
492 if (res)
493 Py_DECREF(res);
494 else
495 PyErr_Clear();
496 return 0;
497 }
498 Py_DECREF(binary);
499
500 /* get the line number lineno */
501 for (i = 0; i < lineno; i++) {
502 Py_XDECREF(lineobj);
503 lineobj = PyFile_GetLine(fob, -1);
504 if (!lineobj) {
505 PyErr_Clear();
506 break;
507 }
508 }
509 res = _PyObject_CallMethodIdNoArgs(fob, &PyId_close);
510 if (res) {
511 Py_DECREF(res);
512 }
513 else {
514 PyErr_Clear();
515 }
516 Py_DECREF(fob);
517 if (!lineobj || !PyUnicode_Check(lineobj)) {
518 Py_XDECREF(lineobj);
519 return -1;
520 }
521
522 if (line) {
523 Py_INCREF(lineobj);
524 *line = lineobj;
525 }
526
527 /* remove the indentation of the line */
528 kind = PyUnicode_KIND(lineobj);
529 data = PyUnicode_DATA(lineobj);
530 for (i=0; i < PyUnicode_GET_LENGTH(lineobj); i++) {
531 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
532 if (ch != ' ' && ch != '\t' && ch != '\014')
533 break;
534 }
535 if (i) {
536 PyObject *truncated;
537 truncated = PyUnicode_Substring(lineobj, i, PyUnicode_GET_LENGTH(lineobj));
538 if (truncated) {
539 Py_DECREF(lineobj);
540 lineobj = truncated;
541 } else {
542 PyErr_Clear();
543 }
544 }
545
546 if (truncation != NULL) {
547 *truncation = i - indent;
548 }
549
550 if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
551 goto error;
552 }
553
554 /* Write some spaces before the line */
555 if (_Py_WriteIndent(indent, f) < 0) {
556 goto error;
557 }
558
559 /* finally display the line */
560 if (PyFile_WriteObject(lineobj, f, Py_PRINT_RAW) < 0) {
561 goto error;
562 }
563
564 if (PyFile_WriteString("\n", f) < 0) {
565 goto error;
566 }
567
568 Py_DECREF(lineobj);
569 return 0;
570 error:
571 Py_DECREF(lineobj);
572 return -1;
573 }
574
575 int
_Py_DisplaySourceLine(PyObject * f,PyObject * filename,int lineno,int indent,int * truncation,PyObject ** line)576 _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent,
577 int *truncation, PyObject **line)
578 {
579 return display_source_line_with_margin(f, filename, lineno, indent, 0,
580 NULL, truncation, line);
581 }
582
583 /* AST based Traceback Specialization
584 *
585 * When displaying a new traceback line, for certain syntactical constructs
586 * (e.g a subscript, an arithmetic operation) we try to create a representation
587 * that separates the primary source of error from the rest.
588 *
589 * Example specialization of BinOp nodes:
590 * Traceback (most recent call last):
591 * File "/home/isidentical/cpython/cpython/t.py", line 10, in <module>
592 * add_values(1, 2, 'x', 3, 4)
593 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
594 * File "/home/isidentical/cpython/cpython/t.py", line 2, in add_values
595 * return a + b + c + d + e
596 * ~~~~~~^~~
597 * TypeError: 'NoneType' object is not subscriptable
598 */
599
600 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
601
602 static int
extract_anchors_from_expr(const char * segment_str,expr_ty expr,Py_ssize_t * left_anchor,Py_ssize_t * right_anchor,char ** primary_error_char,char ** secondary_error_char)603 extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
604 char** primary_error_char, char** secondary_error_char)
605 {
606 switch (expr->kind) {
607 case BinOp_kind: {
608 expr_ty left = expr->v.BinOp.left;
609 expr_ty right = expr->v.BinOp.right;
610 for (int i = left->end_col_offset; i < right->col_offset; i++) {
611 if (IS_WHITESPACE(segment_str[i])) {
612 continue;
613 }
614
615 *left_anchor = i;
616 *right_anchor = i + 1;
617
618 // Check whether if this a two-character operator (e.g //)
619 if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) {
620 ++*right_anchor;
621 }
622
623 // Set the error characters
624 *primary_error_char = "~";
625 *secondary_error_char = "^";
626 break;
627 }
628 return 1;
629 }
630 case Subscript_kind: {
631 *left_anchor = expr->v.Subscript.value->end_col_offset;
632 *right_anchor = expr->v.Subscript.slice->end_col_offset + 1;
633
634 // Set the error characters
635 *primary_error_char = "~";
636 *secondary_error_char = "^";
637 return 1;
638 }
639 default:
640 return 0;
641 }
642 }
643
644 static int
extract_anchors_from_stmt(const char * segment_str,stmt_ty statement,Py_ssize_t * left_anchor,Py_ssize_t * right_anchor,char ** primary_error_char,char ** secondary_error_char)645 extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
646 char** primary_error_char, char** secondary_error_char)
647 {
648 switch (statement->kind) {
649 case Expr_kind: {
650 return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor,
651 primary_error_char, secondary_error_char);
652 }
653 default:
654 return 0;
655 }
656 }
657
658 static int
extract_anchors_from_line(PyObject * filename,PyObject * line,Py_ssize_t start_offset,Py_ssize_t end_offset,Py_ssize_t * left_anchor,Py_ssize_t * right_anchor,char ** primary_error_char,char ** secondary_error_char)659 extract_anchors_from_line(PyObject *filename, PyObject *line,
660 Py_ssize_t start_offset, Py_ssize_t end_offset,
661 Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
662 char** primary_error_char, char** secondary_error_char)
663 {
664 int res = -1;
665 PyArena *arena = NULL;
666 PyObject *segment = PyUnicode_Substring(line, start_offset, end_offset);
667 if (!segment) {
668 goto done;
669 }
670
671 const char *segment_str = PyUnicode_AsUTF8(segment);
672 if (!segment_str) {
673 goto done;
674 }
675
676 arena = _PyArena_New();
677 if (!arena) {
678 goto done;
679 }
680
681 PyCompilerFlags flags = _PyCompilerFlags_INIT;
682
683 _PyASTOptimizeState state;
684 state.optimize = _Py_GetConfig()->optimization_level;
685 state.ff_features = 0;
686
687 mod_ty module = _PyParser_ASTFromString(segment_str, filename, Py_file_input,
688 &flags, arena);
689 if (!module) {
690 goto done;
691 }
692 if (!_PyAST_Optimize(module, arena, &state)) {
693 goto done;
694 }
695
696 assert(module->kind == Module_kind);
697 if (asdl_seq_LEN(module->v.Module.body) == 1) {
698 stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0);
699 res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor,
700 primary_error_char, secondary_error_char);
701 } else {
702 res = 0;
703 }
704
705 done:
706 if (res > 0) {
707 *left_anchor += start_offset;
708 *right_anchor += start_offset;
709 }
710 Py_XDECREF(segment);
711 if (arena) {
712 _PyArena_Free(arena);
713 }
714 return res;
715 }
716
717 #define _TRACEBACK_SOURCE_LINE_INDENT 4
718
719 static inline int
ignore_source_errors(void)720 ignore_source_errors(void) {
721 if (PyErr_Occurred()) {
722 if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
723 return -1;
724 }
725 PyErr_Clear();
726 }
727 return 0;
728 }
729
730 static inline int
print_error_location_carets(PyObject * f,int offset,Py_ssize_t start_offset,Py_ssize_t end_offset,Py_ssize_t right_start_offset,Py_ssize_t left_end_offset,const char * primary,const char * secondary)731 print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py_ssize_t end_offset,
732 Py_ssize_t right_start_offset, Py_ssize_t left_end_offset,
733 const char *primary, const char *secondary) {
734 int special_chars = (left_end_offset != -1 || right_start_offset != -1);
735 const char *str;
736 while (++offset <= end_offset) {
737 if (offset <= start_offset || offset > end_offset) {
738 str = " ";
739 } else if (special_chars && left_end_offset < offset && offset <= right_start_offset) {
740 str = secondary;
741 } else {
742 str = primary;
743 }
744 if (PyFile_WriteString(str, f) < 0) {
745 return -1;
746 }
747 }
748 if (PyFile_WriteString("\n", f) < 0) {
749 return -1;
750 }
751 return 0;
752 }
753
754 static int
tb_displayline(PyTracebackObject * tb,PyObject * f,PyObject * filename,int lineno,PyFrameObject * frame,PyObject * name,int margin_indent,const char * margin)755 tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
756 PyFrameObject *frame, PyObject *name, int margin_indent, const char *margin)
757 {
758 if (filename == NULL || name == NULL) {
759 return -1;
760 }
761
762 if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
763 return -1;
764 }
765
766 PyObject *line = PyUnicode_FromFormat(" File \"%U\", line %d, in %U\n",
767 filename, lineno, name);
768 if (line == NULL) {
769 return -1;
770 }
771
772 int res = PyFile_WriteObject(line, f, Py_PRINT_RAW);
773 Py_DECREF(line);
774 if (res < 0) {
775 return -1;
776 }
777
778 int err = 0;
779
780 int truncation = _TRACEBACK_SOURCE_LINE_INDENT;
781 PyObject* source_line = NULL;
782 int rc = display_source_line_with_margin(
783 f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
784 margin_indent, margin, &truncation, &source_line);
785 if (rc != 0 || !source_line) {
786 /* ignore errors since we can't report them, can we? */
787 err = ignore_source_errors();
788 goto done;
789 }
790
791 int code_offset = tb->tb_lasti;
792 PyCodeObject* code = frame->f_frame->f_code;
793
794 int start_line;
795 int end_line;
796 int start_col_byte_offset;
797 int end_col_byte_offset;
798 if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
799 &end_line, &end_col_byte_offset)) {
800 goto done;
801 }
802
803 if (start_line < 0 || end_line < 0
804 || start_col_byte_offset < 0
805 || end_col_byte_offset < 0)
806 {
807 goto done;
808 }
809
810 // When displaying errors, we will use the following generic structure:
811 //
812 // ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE
813 // ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~
814 // | |-> left_end_offset | |-> left_offset
815 // |-> start_offset |-> right_start_offset
816 //
817 // In general we will only have (start_offset, end_offset) but we can gather more information
818 // by analyzing the AST of the text between *start_offset* and *end_offset*. If this succeeds
819 // we could get *left_end_offset* and *right_start_offset* and some selection of characters for
820 // the different ranges (primary_error_char and secondary_error_char). If we cannot obtain the
821 // AST information or we cannot identify special ranges within it, then left_end_offset and
822 // right_end_offset will be set to -1.
823
824 // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
825 assert(source_line);
826 Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
827 if (start_offset < 0) {
828 err = ignore_source_errors() < 0;
829 goto done;
830 }
831
832 Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
833 if (end_offset < 0) {
834 err = ignore_source_errors() < 0;
835 goto done;
836 }
837
838 Py_ssize_t left_end_offset = -1;
839 Py_ssize_t right_start_offset = -1;
840
841 char *primary_error_char = "^";
842 char *secondary_error_char = primary_error_char;
843
844 if (start_line == end_line) {
845 int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
846 &left_end_offset, &right_start_offset,
847 &primary_error_char, &secondary_error_char);
848 if (res < 0 && ignore_source_errors() < 0) {
849 goto done;
850 }
851 }
852 else {
853 // If this is a multi-line expression, then we will highlight until
854 // the last non-whitespace character.
855 const char *source_line_str = PyUnicode_AsUTF8(source_line);
856 if (!source_line_str) {
857 goto done;
858 }
859
860 Py_ssize_t i = PyUnicode_GET_LENGTH(source_line);
861 while (--i >= 0) {
862 if (!IS_WHITESPACE(source_line_str[i])) {
863 break;
864 }
865 }
866
867 end_offset = i + 1;
868 }
869
870 if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
871 err = -1;
872 goto done;
873 }
874
875 if (print_error_location_carets(f, truncation, start_offset, end_offset,
876 right_start_offset, left_end_offset,
877 primary_error_char, secondary_error_char) < 0) {
878 err = -1;
879 goto done;
880 }
881
882 done:
883 Py_XDECREF(source_line);
884 return err;
885 }
886
887 static const int TB_RECURSIVE_CUTOFF = 3; // Also hardcoded in traceback.py.
888
889 static int
tb_print_line_repeated(PyObject * f,long cnt)890 tb_print_line_repeated(PyObject *f, long cnt)
891 {
892 cnt -= TB_RECURSIVE_CUTOFF;
893 PyObject *line = PyUnicode_FromFormat(
894 (cnt > 1)
895 ? " [Previous line repeated %ld more times]\n"
896 : " [Previous line repeated %ld more time]\n",
897 cnt);
898 if (line == NULL) {
899 return -1;
900 }
901 int err = PyFile_WriteObject(line, f, Py_PRINT_RAW);
902 Py_DECREF(line);
903 return err;
904 }
905
906 static int
tb_printinternal(PyTracebackObject * tb,PyObject * f,long limit,int indent,const char * margin)907 tb_printinternal(PyTracebackObject *tb, PyObject *f, long limit,
908 int indent, const char *margin)
909 {
910 PyCodeObject *code = NULL;
911 Py_ssize_t depth = 0;
912 PyObject *last_file = NULL;
913 int last_line = -1;
914 PyObject *last_name = NULL;
915 long cnt = 0;
916 PyTracebackObject *tb1 = tb;
917 while (tb1 != NULL) {
918 depth++;
919 tb1 = tb1->tb_next;
920 }
921 while (tb != NULL && depth > limit) {
922 depth--;
923 tb = tb->tb_next;
924 }
925 while (tb != NULL) {
926 code = PyFrame_GetCode(tb->tb_frame);
927 if (last_file == NULL ||
928 code->co_filename != last_file ||
929 last_line == -1 || tb->tb_lineno != last_line ||
930 last_name == NULL || code->co_name != last_name) {
931 if (cnt > TB_RECURSIVE_CUTOFF) {
932 if (tb_print_line_repeated(f, cnt) < 0) {
933 goto error;
934 }
935 }
936 last_file = code->co_filename;
937 last_line = tb->tb_lineno;
938 last_name = code->co_name;
939 cnt = 0;
940 }
941 cnt++;
942 if (cnt <= TB_RECURSIVE_CUTOFF) {
943 if (tb_displayline(tb, f, code->co_filename, tb->tb_lineno,
944 tb->tb_frame, code->co_name, indent, margin) < 0) {
945 goto error;
946 }
947
948 if (PyErr_CheckSignals() < 0) {
949 goto error;
950 }
951 }
952 Py_CLEAR(code);
953 tb = tb->tb_next;
954 }
955 if (cnt > TB_RECURSIVE_CUTOFF) {
956 if (tb_print_line_repeated(f, cnt) < 0) {
957 goto error;
958 }
959 }
960 return 0;
961 error:
962 Py_XDECREF(code);
963 return -1;
964 }
965
966 #define PyTraceBack_LIMIT 1000
967
968 int
_PyTraceBack_Print_Indented(PyObject * v,int indent,const char * margin,const char * header_margin,const char * header,PyObject * f)969 _PyTraceBack_Print_Indented(PyObject *v, int indent, const char *margin,
970 const char *header_margin, const char *header, PyObject *f)
971 {
972 PyObject *limitv;
973 long limit = PyTraceBack_LIMIT;
974
975 if (v == NULL) {
976 return 0;
977 }
978 if (!PyTraceBack_Check(v)) {
979 PyErr_BadInternalCall();
980 return -1;
981 }
982 limitv = PySys_GetObject("tracebacklimit");
983 if (limitv && PyLong_Check(limitv)) {
984 int overflow;
985 limit = PyLong_AsLongAndOverflow(limitv, &overflow);
986 if (overflow > 0) {
987 limit = LONG_MAX;
988 }
989 else if (limit <= 0) {
990 return 0;
991 }
992 }
993 if (_Py_WriteIndentedMargin(indent, header_margin, f) < 0) {
994 return -1;
995 }
996
997 if (PyFile_WriteString(header, f) < 0) {
998 return -1;
999 }
1000
1001 if (tb_printinternal((PyTracebackObject *)v, f, limit, indent, margin) < 0) {
1002 return -1;
1003 }
1004
1005 return 0;
1006 }
1007
1008 int
PyTraceBack_Print(PyObject * v,PyObject * f)1009 PyTraceBack_Print(PyObject *v, PyObject *f)
1010 {
1011 int indent = 0;
1012 const char *margin = NULL;
1013 const char *header_margin = NULL;
1014 const char *header = EXCEPTION_TB_HEADER;
1015
1016 return _PyTraceBack_Print_Indented(v, indent, margin, header_margin, header, f);
1017 }
1018
1019 /* Format an integer in range [0; 0xffffffff] to decimal and write it
1020 into the file fd.
1021
1022 This function is signal safe. */
1023
1024 void
_Py_DumpDecimal(int fd,size_t value)1025 _Py_DumpDecimal(int fd, size_t value)
1026 {
1027 /* maximum number of characters required for output of %lld or %p.
1028 We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
1029 plus 1 for the null byte. 53/22 is an upper bound for log10(256). */
1030 char buffer[1 + (sizeof(size_t)*53-1) / 22 + 1];
1031 char *ptr, *end;
1032
1033 end = &buffer[Py_ARRAY_LENGTH(buffer) - 1];
1034 ptr = end;
1035 *ptr = '\0';
1036 do {
1037 --ptr;
1038 assert(ptr >= buffer);
1039 *ptr = '0' + (value % 10);
1040 value /= 10;
1041 } while (value);
1042
1043 _Py_write_noraise(fd, ptr, end - ptr);
1044 }
1045
1046 /* Format an integer as hexadecimal with width digits into fd file descriptor.
1047 The function is signal safe. */
1048 void
_Py_DumpHexadecimal(int fd,uintptr_t value,Py_ssize_t width)1049 _Py_DumpHexadecimal(int fd, uintptr_t value, Py_ssize_t width)
1050 {
1051 char buffer[sizeof(uintptr_t) * 2 + 1], *ptr, *end;
1052 const Py_ssize_t size = Py_ARRAY_LENGTH(buffer) - 1;
1053
1054 if (width > size)
1055 width = size;
1056 /* it's ok if width is negative */
1057
1058 end = &buffer[size];
1059 ptr = end;
1060 *ptr = '\0';
1061 do {
1062 --ptr;
1063 assert(ptr >= buffer);
1064 *ptr = Py_hexdigits[value & 15];
1065 value >>= 4;
1066 } while ((end - ptr) < width || value);
1067
1068 _Py_write_noraise(fd, ptr, end - ptr);
1069 }
1070
1071 void
_Py_DumpASCII(int fd,PyObject * text)1072 _Py_DumpASCII(int fd, PyObject *text)
1073 {
1074 PyASCIIObject *ascii = (PyASCIIObject *)text;
1075 Py_ssize_t i, size;
1076 int truncated;
1077 int kind;
1078 void *data = NULL;
1079 wchar_t *wstr = NULL;
1080 Py_UCS4 ch;
1081
1082 if (!PyUnicode_Check(text))
1083 return;
1084
1085 size = ascii->length;
1086 kind = ascii->state.kind;
1087 if (kind == PyUnicode_WCHAR_KIND) {
1088 wstr = ((PyASCIIObject *)text)->wstr;
1089 if (wstr == NULL)
1090 return;
1091 size = ((PyCompactUnicodeObject *)text)->wstr_length;
1092 }
1093 else if (ascii->state.compact) {
1094 if (ascii->state.ascii)
1095 data = ((PyASCIIObject*)text) + 1;
1096 else
1097 data = ((PyCompactUnicodeObject*)text) + 1;
1098 }
1099 else {
1100 data = ((PyUnicodeObject *)text)->data.any;
1101 if (data == NULL)
1102 return;
1103 }
1104
1105 if (MAX_STRING_LENGTH < size) {
1106 size = MAX_STRING_LENGTH;
1107 truncated = 1;
1108 }
1109 else {
1110 truncated = 0;
1111 }
1112
1113 // Is an ASCII string?
1114 if (ascii->state.ascii) {
1115 assert(kind == PyUnicode_1BYTE_KIND);
1116 char *str = data;
1117
1118 int need_escape = 0;
1119 for (i=0; i < size; i++) {
1120 ch = str[i];
1121 if (!(' ' <= ch && ch <= 126)) {
1122 need_escape = 1;
1123 break;
1124 }
1125 }
1126 if (!need_escape) {
1127 // The string can be written with a single write() syscall
1128 _Py_write_noraise(fd, str, size);
1129 goto done;
1130 }
1131 }
1132
1133 for (i=0; i < size; i++) {
1134 if (kind != PyUnicode_WCHAR_KIND)
1135 ch = PyUnicode_READ(kind, data, i);
1136 else
1137 ch = wstr[i];
1138 if (' ' <= ch && ch <= 126) {
1139 /* printable ASCII character */
1140 char c = (char)ch;
1141 _Py_write_noraise(fd, &c, 1);
1142 }
1143 else if (ch <= 0xff) {
1144 PUTS(fd, "\\x");
1145 _Py_DumpHexadecimal(fd, ch, 2);
1146 }
1147 else if (ch <= 0xffff) {
1148 PUTS(fd, "\\u");
1149 _Py_DumpHexadecimal(fd, ch, 4);
1150 }
1151 else {
1152 PUTS(fd, "\\U");
1153 _Py_DumpHexadecimal(fd, ch, 8);
1154 }
1155 }
1156
1157 done:
1158 if (truncated) {
1159 PUTS(fd, "...");
1160 }
1161 }
1162
1163 /* Write a frame into the file fd: "File "xxx", line xxx in xxx".
1164
1165 This function is signal safe. */
1166
1167 static void
dump_frame(int fd,InterpreterFrame * frame)1168 dump_frame(int fd, InterpreterFrame *frame)
1169 {
1170 PyCodeObject *code = frame->f_code;
1171 PUTS(fd, " File ");
1172 if (code->co_filename != NULL
1173 && PyUnicode_Check(code->co_filename))
1174 {
1175 PUTS(fd, "\"");
1176 _Py_DumpASCII(fd, code->co_filename);
1177 PUTS(fd, "\"");
1178 } else {
1179 PUTS(fd, "???");
1180 }
1181
1182 int lineno = PyCode_Addr2Line(code, frame->f_lasti*sizeof(_Py_CODEUNIT));
1183 PUTS(fd, ", line ");
1184 if (lineno >= 0) {
1185 _Py_DumpDecimal(fd, (size_t)lineno);
1186 }
1187 else {
1188 PUTS(fd, "???");
1189 }
1190 PUTS(fd, " in ");
1191
1192 if (code->co_name != NULL
1193 && PyUnicode_Check(code->co_name)) {
1194 _Py_DumpASCII(fd, code->co_name);
1195 }
1196 else {
1197 PUTS(fd, "???");
1198 }
1199
1200 PUTS(fd, "\n");
1201 }
1202
1203 static void
dump_traceback(int fd,PyThreadState * tstate,int write_header)1204 dump_traceback(int fd, PyThreadState *tstate, int write_header)
1205 {
1206 InterpreterFrame *frame;
1207 unsigned int depth;
1208
1209 if (write_header) {
1210 PUTS(fd, "Stack (most recent call first):\n");
1211 }
1212
1213 frame = tstate->cframe->current_frame;
1214 if (frame == NULL) {
1215 PUTS(fd, " <no Python frame>\n");
1216 return;
1217 }
1218
1219 depth = 0;
1220 while (1) {
1221 if (MAX_FRAME_DEPTH <= depth) {
1222 PUTS(fd, " ...\n");
1223 break;
1224 }
1225 dump_frame(fd, frame);
1226 frame = frame->previous;
1227 if (frame == NULL) {
1228 break;
1229 }
1230 depth++;
1231 }
1232 }
1233
1234 /* Dump the traceback of a Python thread into fd. Use write() to write the
1235 traceback and retry if write() is interrupted by a signal (failed with
1236 EINTR), but don't call the Python signal handler.
1237
1238 The caller is responsible to call PyErr_CheckSignals() to call Python signal
1239 handlers if signals were received. */
1240 void
_Py_DumpTraceback(int fd,PyThreadState * tstate)1241 _Py_DumpTraceback(int fd, PyThreadState *tstate)
1242 {
1243 dump_traceback(fd, tstate, 1);
1244 }
1245
1246 /* Write the thread identifier into the file 'fd': "Current thread 0xHHHH:\" if
1247 is_current is true, "Thread 0xHHHH:\n" otherwise.
1248
1249 This function is signal safe. */
1250
1251 static void
write_thread_id(int fd,PyThreadState * tstate,int is_current)1252 write_thread_id(int fd, PyThreadState *tstate, int is_current)
1253 {
1254 if (is_current)
1255 PUTS(fd, "Current thread 0x");
1256 else
1257 PUTS(fd, "Thread 0x");
1258 _Py_DumpHexadecimal(fd,
1259 tstate->thread_id,
1260 sizeof(unsigned long) * 2);
1261 PUTS(fd, " (most recent call first):\n");
1262 }
1263
1264 /* Dump the traceback of all Python threads into fd. Use write() to write the
1265 traceback and retry if write() is interrupted by a signal (failed with
1266 EINTR), but don't call the Python signal handler.
1267
1268 The caller is responsible to call PyErr_CheckSignals() to call Python signal
1269 handlers if signals were received. */
1270 const char*
_Py_DumpTracebackThreads(int fd,PyInterpreterState * interp,PyThreadState * current_tstate)1271 _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp,
1272 PyThreadState *current_tstate)
1273 {
1274 PyThreadState *tstate;
1275 unsigned int nthreads;
1276
1277 if (current_tstate == NULL) {
1278 /* _Py_DumpTracebackThreads() is called from signal handlers by
1279 faulthandler.
1280
1281 SIGSEGV, SIGFPE, SIGABRT, SIGBUS and SIGILL are synchronous signals
1282 and are thus delivered to the thread that caused the fault. Get the
1283 Python thread state of the current thread.
1284
1285 PyThreadState_Get() doesn't give the state of the thread that caused
1286 the fault if the thread released the GIL, and so
1287 _PyThreadState_GET() cannot be used. Read the thread specific
1288 storage (TSS) instead: call PyGILState_GetThisThreadState(). */
1289 current_tstate = PyGILState_GetThisThreadState();
1290 }
1291
1292 if (interp == NULL) {
1293 if (current_tstate == NULL) {
1294 interp = _PyGILState_GetInterpreterStateUnsafe();
1295 if (interp == NULL) {
1296 /* We need the interpreter state to get Python threads */
1297 return "unable to get the interpreter state";
1298 }
1299 }
1300 else {
1301 interp = current_tstate->interp;
1302 }
1303 }
1304 assert(interp != NULL);
1305
1306 /* Get the current interpreter from the current thread */
1307 tstate = PyInterpreterState_ThreadHead(interp);
1308 if (tstate == NULL)
1309 return "unable to get the thread head state";
1310
1311 /* Dump the traceback of each thread */
1312 tstate = PyInterpreterState_ThreadHead(interp);
1313 nthreads = 0;
1314 _Py_BEGIN_SUPPRESS_IPH
1315 do
1316 {
1317 if (nthreads != 0)
1318 PUTS(fd, "\n");
1319 if (nthreads >= MAX_NTHREADS) {
1320 PUTS(fd, "...\n");
1321 break;
1322 }
1323 write_thread_id(fd, tstate, tstate == current_tstate);
1324 if (tstate == current_tstate && tstate->interp->gc.collecting) {
1325 PUTS(fd, " Garbage-collecting\n");
1326 }
1327 dump_traceback(fd, tstate, 0);
1328 tstate = PyThreadState_Next(tstate);
1329 nthreads++;
1330 } while (tstate != NULL);
1331 _Py_END_SUPPRESS_IPH
1332
1333 return NULL;
1334 }
1335
1336