1 
2 /* Traceback implementation */
3 
4 #include "Python.h"
5 
6 #include "code.h"                 // PyCode_Addr2Line etc
7 #include "frameobject.h"          // PyFrame_GetBack()
8 #include "pycore_ast.h"           // asdl_seq_*
9 #include "pycore_compile.h"       // _PyAST_Optimize
10 #include "pycore_fileutils.h"     // _Py_BEGIN_SUPPRESS_IPH
11 #include "pycore_frame.h"         // _PyFrame_GetCode()
12 #include "pycore_interp.h"        // PyInterpreterState.gc
13 #include "pycore_parser.h"        // _PyParser_ASTFromString
14 #include "pycore_pyarena.h"       // _PyArena_Free()
15 #include "pycore_pyerrors.h"      // _PyErr_Fetch()
16 #include "pycore_pystate.h"       // _PyThreadState_GET()
17 #include "pycore_traceback.h"     // EXCEPTION_TB_HEADER
18 #include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()
19 #include "structmember.h"         // PyMemberDef
20 #include "osdefs.h"               // SEP
21 #ifdef HAVE_FCNTL_H
22 #  include <fcntl.h>
23 #endif
24 
25 #define OFF(x) offsetof(PyTracebackObject, x)
26 
27 #define PUTS(fd, str) _Py_write_noraise(fd, str, (int)strlen(str))
28 #define MAX_STRING_LENGTH 500
29 #define MAX_FRAME_DEPTH 100
30 #define MAX_NTHREADS 100
31 
32 /* Function from Parser/tokenizer.c */
33 extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);
34 
35 _Py_IDENTIFIER(TextIOWrapper);
36 _Py_IDENTIFIER(close);
37 _Py_IDENTIFIER(open);
38 _Py_IDENTIFIER(path);
39 
40 /*[clinic input]
41 class TracebackType "PyTracebackObject *" "&PyTraceback_Type"
42 [clinic start generated code]*/
43 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=928fa06c10151120]*/
44 
45 #include "clinic/traceback.c.h"
46 
47 static PyObject *
tb_create_raw(PyTracebackObject * next,PyFrameObject * frame,int lasti,int lineno)48 tb_create_raw(PyTracebackObject *next, PyFrameObject *frame, int lasti,
49               int lineno)
50 {
51     PyTracebackObject *tb;
52     if ((next != NULL && !PyTraceBack_Check(next)) ||
53                     frame == NULL || !PyFrame_Check(frame)) {
54         PyErr_BadInternalCall();
55         return NULL;
56     }
57     tb = PyObject_GC_New(PyTracebackObject, &PyTraceBack_Type);
58     if (tb != NULL) {
59         Py_XINCREF(next);
60         tb->tb_next = next;
61         Py_XINCREF(frame);
62         tb->tb_frame = frame;
63         tb->tb_lasti = lasti;
64         tb->tb_lineno = lineno;
65         PyObject_GC_Track(tb);
66     }
67     return (PyObject *)tb;
68 }
69 
70 /*[clinic input]
71 @classmethod
72 TracebackType.__new__ as tb_new
73 
74   tb_next: object
75   tb_frame: object(type='PyFrameObject *', subclass_of='&PyFrame_Type')
76   tb_lasti: int
77   tb_lineno: int
78 
79 Create a new traceback object.
80 [clinic start generated code]*/
81 
82 static PyObject *
tb_new_impl(PyTypeObject * type,PyObject * tb_next,PyFrameObject * tb_frame,int tb_lasti,int tb_lineno)83 tb_new_impl(PyTypeObject *type, PyObject *tb_next, PyFrameObject *tb_frame,
84             int tb_lasti, int tb_lineno)
85 /*[clinic end generated code: output=fa077debd72d861a input=01cbe8ec8783fca7]*/
86 {
87     if (tb_next == Py_None) {
88         tb_next = NULL;
89     } else if (!PyTraceBack_Check(tb_next)) {
90         return PyErr_Format(PyExc_TypeError,
91                             "expected traceback object or None, got '%s'",
92                             Py_TYPE(tb_next)->tp_name);
93     }
94 
95     return tb_create_raw((PyTracebackObject *)tb_next, tb_frame, tb_lasti,
96                          tb_lineno);
97 }
98 
99 static PyObject *
tb_dir(PyTracebackObject * self,PyObject * Py_UNUSED (ignored))100 tb_dir(PyTracebackObject *self, PyObject *Py_UNUSED(ignored))
101 {
102     return Py_BuildValue("[ssss]", "tb_frame", "tb_next",
103                                    "tb_lasti", "tb_lineno");
104 }
105 
106 static PyObject *
tb_next_get(PyTracebackObject * self,void * Py_UNUSED (_))107 tb_next_get(PyTracebackObject *self, void *Py_UNUSED(_))
108 {
109     PyObject* ret = (PyObject*)self->tb_next;
110     if (!ret) {
111         ret = Py_None;
112     }
113     Py_INCREF(ret);
114     return ret;
115 }
116 
117 static int
tb_next_set(PyTracebackObject * self,PyObject * new_next,void * Py_UNUSED (_))118 tb_next_set(PyTracebackObject *self, PyObject *new_next, void *Py_UNUSED(_))
119 {
120     if (!new_next) {
121         PyErr_Format(PyExc_TypeError, "can't delete tb_next attribute");
122         return -1;
123     }
124 
125     /* We accept None or a traceback object, and map None -> NULL (inverse of
126        tb_next_get) */
127     if (new_next == Py_None) {
128         new_next = NULL;
129     } else if (!PyTraceBack_Check(new_next)) {
130         PyErr_Format(PyExc_TypeError,
131                      "expected traceback object, got '%s'",
132                      Py_TYPE(new_next)->tp_name);
133         return -1;
134     }
135 
136     /* Check for loops */
137     PyTracebackObject *cursor = (PyTracebackObject *)new_next;
138     while (cursor) {
139         if (cursor == self) {
140             PyErr_Format(PyExc_ValueError, "traceback loop detected");
141             return -1;
142         }
143         cursor = cursor->tb_next;
144     }
145 
146     PyObject *old_next = (PyObject*)self->tb_next;
147     Py_XINCREF(new_next);
148     self->tb_next = (PyTracebackObject *)new_next;
149     Py_XDECREF(old_next);
150 
151     return 0;
152 }
153 
154 
155 static PyMethodDef tb_methods[] = {
156    {"__dir__", (PyCFunction)tb_dir, METH_NOARGS},
157    {NULL, NULL, 0, NULL},
158 };
159 
160 static PyMemberDef tb_memberlist[] = {
161     {"tb_frame",        T_OBJECT,       OFF(tb_frame),  READONLY|PY_AUDIT_READ},
162     {"tb_lasti",        T_INT,          OFF(tb_lasti),  READONLY},
163     {"tb_lineno",       T_INT,          OFF(tb_lineno), READONLY},
164     {NULL}      /* Sentinel */
165 };
166 
167 static PyGetSetDef tb_getsetters[] = {
168     {"tb_next", (getter)tb_next_get, (setter)tb_next_set, NULL, NULL},
169     {NULL}      /* Sentinel */
170 };
171 
172 static void
tb_dealloc(PyTracebackObject * tb)173 tb_dealloc(PyTracebackObject *tb)
174 {
175     PyObject_GC_UnTrack(tb);
176     Py_TRASHCAN_BEGIN(tb, tb_dealloc)
177     Py_XDECREF(tb->tb_next);
178     Py_XDECREF(tb->tb_frame);
179     PyObject_GC_Del(tb);
180     Py_TRASHCAN_END
181 }
182 
183 static int
tb_traverse(PyTracebackObject * tb,visitproc visit,void * arg)184 tb_traverse(PyTracebackObject *tb, visitproc visit, void *arg)
185 {
186     Py_VISIT(tb->tb_next);
187     Py_VISIT(tb->tb_frame);
188     return 0;
189 }
190 
191 static int
tb_clear(PyTracebackObject * tb)192 tb_clear(PyTracebackObject *tb)
193 {
194     Py_CLEAR(tb->tb_next);
195     Py_CLEAR(tb->tb_frame);
196     return 0;
197 }
198 
199 PyTypeObject PyTraceBack_Type = {
200     PyVarObject_HEAD_INIT(&PyType_Type, 0)
201     "traceback",
202     sizeof(PyTracebackObject),
203     0,
204     (destructor)tb_dealloc, /*tp_dealloc*/
205     0,                  /*tp_vectorcall_offset*/
206     0,    /*tp_getattr*/
207     0,                  /*tp_setattr*/
208     0,                  /*tp_as_async*/
209     0,                  /*tp_repr*/
210     0,                  /*tp_as_number*/
211     0,                  /*tp_as_sequence*/
212     0,                  /*tp_as_mapping*/
213     0,                  /* tp_hash */
214     0,                  /* tp_call */
215     0,                  /* tp_str */
216     PyObject_GenericGetAttr,                    /* tp_getattro */
217     0,                  /* tp_setattro */
218     0,                                          /* tp_as_buffer */
219     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
220     tb_new__doc__,                              /* tp_doc */
221     (traverseproc)tb_traverse,                  /* tp_traverse */
222     (inquiry)tb_clear,                          /* tp_clear */
223     0,                                          /* tp_richcompare */
224     0,                                          /* tp_weaklistoffset */
225     0,                                          /* tp_iter */
226     0,                                          /* tp_iternext */
227     tb_methods,         /* tp_methods */
228     tb_memberlist,      /* tp_members */
229     tb_getsetters,                              /* tp_getset */
230     0,                                          /* tp_base */
231     0,                                          /* tp_dict */
232     0,                                          /* tp_descr_get */
233     0,                                          /* tp_descr_set */
234     0,                                          /* tp_dictoffset */
235     0,                                          /* tp_init */
236     0,                                          /* tp_alloc */
237     tb_new,                                     /* tp_new */
238 };
239 
240 
241 PyObject*
_PyTraceBack_FromFrame(PyObject * tb_next,PyFrameObject * frame)242 _PyTraceBack_FromFrame(PyObject *tb_next, PyFrameObject *frame)
243 {
244     assert(tb_next == NULL || PyTraceBack_Check(tb_next));
245     assert(frame != NULL);
246 
247     return tb_create_raw((PyTracebackObject *)tb_next, frame, frame->f_frame->f_lasti*sizeof(_Py_CODEUNIT),
248                          PyFrame_GetLineNumber(frame));
249 }
250 
251 
252 int
PyTraceBack_Here(PyFrameObject * frame)253 PyTraceBack_Here(PyFrameObject *frame)
254 {
255     PyObject *exc, *val, *tb, *newtb;
256     PyErr_Fetch(&exc, &val, &tb);
257     newtb = _PyTraceBack_FromFrame(tb, frame);
258     if (newtb == NULL) {
259         _PyErr_ChainExceptions(exc, val, tb);
260         return -1;
261     }
262     PyErr_Restore(exc, val, newtb);
263     Py_XDECREF(tb);
264     return 0;
265 }
266 
267 /* Insert a frame into the traceback for (funcname, filename, lineno). */
_PyTraceback_Add(const char * funcname,const char * filename,int lineno)268 void _PyTraceback_Add(const char *funcname, const char *filename, int lineno)
269 {
270     PyObject *globals;
271     PyCodeObject *code;
272     PyFrameObject *frame;
273     PyObject *exc, *val, *tb;
274     PyThreadState *tstate = _PyThreadState_GET();
275 
276     /* Save and clear the current exception. Python functions must not be
277        called with an exception set. Calling Python functions happens when
278        the codec of the filesystem encoding is implemented in pure Python. */
279     _PyErr_Fetch(tstate, &exc, &val, &tb);
280 
281     globals = PyDict_New();
282     if (!globals)
283         goto error;
284     code = PyCode_NewEmpty(filename, funcname, lineno);
285     if (!code) {
286         Py_DECREF(globals);
287         goto error;
288     }
289     frame = PyFrame_New(tstate, code, globals, NULL);
290     Py_DECREF(globals);
291     Py_DECREF(code);
292     if (!frame)
293         goto error;
294     frame->f_lineno = lineno;
295 
296     _PyErr_Restore(tstate, exc, val, tb);
297     PyTraceBack_Here(frame);
298     Py_DECREF(frame);
299     return;
300 
301 error:
302     _PyErr_ChainExceptions(exc, val, tb);
303 }
304 
305 static PyObject *
_Py_FindSourceFile(PyObject * filename,char * namebuf,size_t namelen,PyObject * io)306 _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *io)
307 {
308     Py_ssize_t i;
309     PyObject *binary;
310     PyObject *v;
311     Py_ssize_t npath;
312     size_t taillen;
313     PyObject *syspath;
314     PyObject *path;
315     const char* tail;
316     PyObject *filebytes;
317     const char* filepath;
318     Py_ssize_t len;
319     PyObject* result;
320 
321     filebytes = PyUnicode_EncodeFSDefault(filename);
322     if (filebytes == NULL) {
323         PyErr_Clear();
324         return NULL;
325     }
326     filepath = PyBytes_AS_STRING(filebytes);
327 
328     /* Search tail of filename in sys.path before giving up */
329     tail = strrchr(filepath, SEP);
330     if (tail == NULL)
331         tail = filepath;
332     else
333         tail++;
334     taillen = strlen(tail);
335 
336     syspath = _PySys_GetObjectId(&PyId_path);
337     if (syspath == NULL || !PyList_Check(syspath))
338         goto error;
339     npath = PyList_Size(syspath);
340 
341     for (i = 0; i < npath; i++) {
342         v = PyList_GetItem(syspath, i);
343         if (v == NULL) {
344             PyErr_Clear();
345             break;
346         }
347         if (!PyUnicode_Check(v))
348             continue;
349         path = PyUnicode_EncodeFSDefault(v);
350         if (path == NULL) {
351             PyErr_Clear();
352             continue;
353         }
354         len = PyBytes_GET_SIZE(path);
355         if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) {
356             Py_DECREF(path);
357             continue; /* Too long */
358         }
359         strcpy(namebuf, PyBytes_AS_STRING(path));
360         Py_DECREF(path);
361         if (strlen(namebuf) != (size_t)len)
362             continue; /* v contains '\0' */
363         if (len > 0 && namebuf[len-1] != SEP)
364             namebuf[len++] = SEP;
365         strcpy(namebuf+len, tail);
366 
367         binary = _PyObject_CallMethodId(io, &PyId_open, "ss", namebuf, "rb");
368         if (binary != NULL) {
369             result = binary;
370             goto finally;
371         }
372         PyErr_Clear();
373     }
374     goto error;
375 
376 error:
377     result = NULL;
378 finally:
379     Py_DECREF(filebytes);
380     return result;
381 }
382 
383 /* Writes indent spaces. Returns 0 on success and non-zero on failure.
384  */
385 int
_Py_WriteIndent(int indent,PyObject * f)386 _Py_WriteIndent(int indent, PyObject *f)
387 {
388     char buf[11] = "          ";
389     assert(strlen(buf) == 10);
390     while (indent > 0) {
391         if (indent < 10) {
392             buf[indent] = '\0';
393         }
394         if (PyFile_WriteString(buf, f) < 0) {
395             return -1;
396         }
397         indent -= 10;
398     }
399     return 0;
400 }
401 
402 /* Writes indent spaces, followed by the margin if it is not `\0`.
403    Returns 0 on success and non-zero on failure.
404  */
405 int
_Py_WriteIndentedMargin(int indent,const char * margin,PyObject * f)406 _Py_WriteIndentedMargin(int indent, const char *margin, PyObject *f)
407 {
408     if (_Py_WriteIndent(indent, f) < 0) {
409         return -1;
410     }
411     if (margin) {
412         if (PyFile_WriteString(margin, f) < 0) {
413             return -1;
414         }
415     }
416     return 0;
417 }
418 
419 static int
display_source_line_with_margin(PyObject * f,PyObject * filename,int lineno,int indent,int margin_indent,const char * margin,int * truncation,PyObject ** line)420 display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int indent,
421                                 int margin_indent, const char *margin,
422                                 int *truncation, PyObject **line)
423 {
424     int fd;
425     int i;
426     char *found_encoding;
427     const char *encoding;
428     PyObject *io;
429     PyObject *binary;
430     PyObject *fob = NULL;
431     PyObject *lineobj = NULL;
432     PyObject *res;
433     char buf[MAXPATHLEN+1];
434     int kind;
435     const void *data;
436 
437     /* open the file */
438     if (filename == NULL)
439         return 0;
440 
441     /* Do not attempt to open things like <string> or <stdin> */
442     assert(PyUnicode_Check(filename));
443     if (PyUnicode_READ_CHAR(filename, 0) == '<') {
444         Py_ssize_t len = PyUnicode_GET_LENGTH(filename);
445         if (len > 0 && PyUnicode_READ_CHAR(filename, len - 1) == '>') {
446             return 0;
447         }
448     }
449 
450     io = PyImport_ImportModuleNoBlock("io");
451     if (io == NULL)
452         return -1;
453     binary = _PyObject_CallMethodId(io, &PyId_open, "Os", filename, "rb");
454 
455     if (binary == NULL) {
456         PyErr_Clear();
457 
458         binary = _Py_FindSourceFile(filename, buf, sizeof(buf), io);
459         if (binary == NULL) {
460             Py_DECREF(io);
461             return -1;
462         }
463     }
464 
465     /* use the right encoding to decode the file as unicode */
466     fd = PyObject_AsFileDescriptor(binary);
467     if (fd < 0) {
468         Py_DECREF(io);
469         Py_DECREF(binary);
470         return 0;
471     }
472     found_encoding = _PyTokenizer_FindEncodingFilename(fd, filename);
473     if (found_encoding == NULL)
474         PyErr_Clear();
475     encoding = (found_encoding != NULL) ? found_encoding : "utf-8";
476     /* Reset position */
477     if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
478         Py_DECREF(io);
479         Py_DECREF(binary);
480         PyMem_Free(found_encoding);
481         return 0;
482     }
483     fob = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "Os", binary, encoding);
484     Py_DECREF(io);
485     PyMem_Free(found_encoding);
486 
487     if (fob == NULL) {
488         PyErr_Clear();
489 
490         res = _PyObject_CallMethodIdNoArgs(binary, &PyId_close);
491         Py_DECREF(binary);
492         if (res)
493             Py_DECREF(res);
494         else
495             PyErr_Clear();
496         return 0;
497     }
498     Py_DECREF(binary);
499 
500     /* get the line number lineno */
501     for (i = 0; i < lineno; i++) {
502         Py_XDECREF(lineobj);
503         lineobj = PyFile_GetLine(fob, -1);
504         if (!lineobj) {
505             PyErr_Clear();
506             break;
507         }
508     }
509     res = _PyObject_CallMethodIdNoArgs(fob, &PyId_close);
510     if (res) {
511         Py_DECREF(res);
512     }
513     else {
514         PyErr_Clear();
515     }
516     Py_DECREF(fob);
517     if (!lineobj || !PyUnicode_Check(lineobj)) {
518         Py_XDECREF(lineobj);
519         return -1;
520     }
521 
522     if (line) {
523         Py_INCREF(lineobj);
524         *line = lineobj;
525     }
526 
527     /* remove the indentation of the line */
528     kind = PyUnicode_KIND(lineobj);
529     data = PyUnicode_DATA(lineobj);
530     for (i=0; i < PyUnicode_GET_LENGTH(lineobj); i++) {
531         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
532         if (ch != ' ' && ch != '\t' && ch != '\014')
533             break;
534     }
535     if (i) {
536         PyObject *truncated;
537         truncated = PyUnicode_Substring(lineobj, i, PyUnicode_GET_LENGTH(lineobj));
538         if (truncated) {
539             Py_DECREF(lineobj);
540             lineobj = truncated;
541         } else {
542             PyErr_Clear();
543         }
544     }
545 
546     if (truncation != NULL) {
547         *truncation = i - indent;
548     }
549 
550     if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
551         goto error;
552     }
553 
554     /* Write some spaces before the line */
555     if (_Py_WriteIndent(indent, f) < 0) {
556         goto error;
557     }
558 
559     /* finally display the line */
560     if (PyFile_WriteObject(lineobj, f, Py_PRINT_RAW) < 0) {
561         goto error;
562     }
563 
564     if (PyFile_WriteString("\n", f) < 0) {
565         goto error;
566     }
567 
568     Py_DECREF(lineobj);
569     return 0;
570 error:
571     Py_DECREF(lineobj);
572     return -1;
573 }
574 
575 int
_Py_DisplaySourceLine(PyObject * f,PyObject * filename,int lineno,int indent,int * truncation,PyObject ** line)576 _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent,
577                       int *truncation, PyObject **line)
578 {
579     return display_source_line_with_margin(f, filename, lineno, indent, 0,
580                                            NULL, truncation, line);
581 }
582 
583 /* AST based Traceback Specialization
584  *
585  * When displaying a new traceback line, for certain syntactical constructs
586  * (e.g a subscript, an arithmetic operation) we try to create a representation
587  * that separates the primary source of error from the rest.
588  *
589  * Example specialization of BinOp nodes:
590  *  Traceback (most recent call last):
591  *    File "/home/isidentical/cpython/cpython/t.py", line 10, in <module>
592  *      add_values(1, 2, 'x', 3, 4)
593  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
594  *    File "/home/isidentical/cpython/cpython/t.py", line 2, in add_values
595  *      return a + b + c + d + e
596  *             ~~~~~~^~~
597  *  TypeError: 'NoneType' object is not subscriptable
598  */
599 
600 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
601 
602 static int
extract_anchors_from_expr(const char * segment_str,expr_ty expr,Py_ssize_t * left_anchor,Py_ssize_t * right_anchor,char ** primary_error_char,char ** secondary_error_char)603 extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
604                           char** primary_error_char, char** secondary_error_char)
605 {
606     switch (expr->kind) {
607         case BinOp_kind: {
608             expr_ty left = expr->v.BinOp.left;
609             expr_ty right = expr->v.BinOp.right;
610             for (int i = left->end_col_offset; i < right->col_offset; i++) {
611                 if (IS_WHITESPACE(segment_str[i])) {
612                     continue;
613                 }
614 
615                 *left_anchor = i;
616                 *right_anchor = i + 1;
617 
618                 // Check whether if this a two-character operator (e.g //)
619                 if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) {
620                     ++*right_anchor;
621                 }
622 
623                 // Set the error characters
624                 *primary_error_char = "~";
625                 *secondary_error_char = "^";
626                 break;
627             }
628             return 1;
629         }
630         case Subscript_kind: {
631             *left_anchor = expr->v.Subscript.value->end_col_offset;
632             *right_anchor = expr->v.Subscript.slice->end_col_offset + 1;
633 
634             // Set the error characters
635             *primary_error_char = "~";
636             *secondary_error_char = "^";
637             return 1;
638         }
639         default:
640             return 0;
641     }
642 }
643 
644 static int
extract_anchors_from_stmt(const char * segment_str,stmt_ty statement,Py_ssize_t * left_anchor,Py_ssize_t * right_anchor,char ** primary_error_char,char ** secondary_error_char)645 extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
646                           char** primary_error_char, char** secondary_error_char)
647 {
648     switch (statement->kind) {
649         case Expr_kind: {
650             return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor,
651                                              primary_error_char, secondary_error_char);
652         }
653         default:
654             return 0;
655     }
656 }
657 
658 static int
extract_anchors_from_line(PyObject * filename,PyObject * line,Py_ssize_t start_offset,Py_ssize_t end_offset,Py_ssize_t * left_anchor,Py_ssize_t * right_anchor,char ** primary_error_char,char ** secondary_error_char)659 extract_anchors_from_line(PyObject *filename, PyObject *line,
660                           Py_ssize_t start_offset, Py_ssize_t end_offset,
661                           Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
662                           char** primary_error_char, char** secondary_error_char)
663 {
664     int res = -1;
665     PyArena *arena = NULL;
666     PyObject *segment = PyUnicode_Substring(line, start_offset, end_offset);
667     if (!segment) {
668         goto done;
669     }
670 
671     const char *segment_str = PyUnicode_AsUTF8(segment);
672     if (!segment_str) {
673         goto done;
674     }
675 
676     arena = _PyArena_New();
677     if (!arena) {
678         goto done;
679     }
680 
681     PyCompilerFlags flags = _PyCompilerFlags_INIT;
682 
683     _PyASTOptimizeState state;
684     state.optimize = _Py_GetConfig()->optimization_level;
685     state.ff_features = 0;
686 
687     mod_ty module = _PyParser_ASTFromString(segment_str, filename, Py_file_input,
688                                             &flags, arena);
689     if (!module) {
690         goto done;
691     }
692     if (!_PyAST_Optimize(module, arena, &state)) {
693         goto done;
694     }
695 
696     assert(module->kind == Module_kind);
697     if (asdl_seq_LEN(module->v.Module.body) == 1) {
698         stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0);
699         res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor,
700                                         primary_error_char, secondary_error_char);
701     } else {
702         res = 0;
703     }
704 
705 done:
706     if (res > 0) {
707         *left_anchor += start_offset;
708         *right_anchor += start_offset;
709     }
710     Py_XDECREF(segment);
711     if (arena) {
712         _PyArena_Free(arena);
713     }
714     return res;
715 }
716 
717 #define _TRACEBACK_SOURCE_LINE_INDENT 4
718 
719 static inline int
ignore_source_errors(void)720 ignore_source_errors(void) {
721     if (PyErr_Occurred()) {
722         if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
723             return -1;
724         }
725         PyErr_Clear();
726     }
727     return 0;
728 }
729 
730 static inline int
print_error_location_carets(PyObject * f,int offset,Py_ssize_t start_offset,Py_ssize_t end_offset,Py_ssize_t right_start_offset,Py_ssize_t left_end_offset,const char * primary,const char * secondary)731 print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py_ssize_t end_offset,
732                             Py_ssize_t right_start_offset, Py_ssize_t left_end_offset,
733                             const char *primary, const char *secondary) {
734     int special_chars = (left_end_offset != -1 || right_start_offset != -1);
735     const char *str;
736     while (++offset <= end_offset) {
737         if (offset <= start_offset || offset > end_offset) {
738             str = " ";
739         } else if (special_chars && left_end_offset < offset && offset <= right_start_offset) {
740             str = secondary;
741         } else {
742             str = primary;
743         }
744         if (PyFile_WriteString(str, f) < 0) {
745             return -1;
746         }
747     }
748     if (PyFile_WriteString("\n", f) < 0) {
749         return -1;
750     }
751     return 0;
752 }
753 
754 static int
tb_displayline(PyTracebackObject * tb,PyObject * f,PyObject * filename,int lineno,PyFrameObject * frame,PyObject * name,int margin_indent,const char * margin)755 tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
756                PyFrameObject *frame, PyObject *name, int margin_indent, const char *margin)
757 {
758     if (filename == NULL || name == NULL) {
759         return -1;
760     }
761 
762     if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
763         return -1;
764     }
765 
766     PyObject *line = PyUnicode_FromFormat("  File \"%U\", line %d, in %U\n",
767                                           filename, lineno, name);
768     if (line == NULL) {
769         return -1;
770     }
771 
772     int res = PyFile_WriteObject(line, f, Py_PRINT_RAW);
773     Py_DECREF(line);
774     if (res < 0) {
775         return -1;
776     }
777 
778     int err = 0;
779 
780     int truncation = _TRACEBACK_SOURCE_LINE_INDENT;
781     PyObject* source_line = NULL;
782     int rc = display_source_line_with_margin(
783             f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
784             margin_indent, margin, &truncation, &source_line);
785     if (rc != 0 || !source_line) {
786         /* ignore errors since we can't report them, can we? */
787         err = ignore_source_errors();
788         goto done;
789     }
790 
791     int code_offset = tb->tb_lasti;
792     PyCodeObject* code = frame->f_frame->f_code;
793 
794     int start_line;
795     int end_line;
796     int start_col_byte_offset;
797     int end_col_byte_offset;
798     if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
799                               &end_line, &end_col_byte_offset)) {
800         goto done;
801     }
802 
803     if (start_line < 0 || end_line < 0
804         || start_col_byte_offset < 0
805         || end_col_byte_offset < 0)
806     {
807         goto done;
808     }
809 
810     // When displaying errors, we will use the following generic structure:
811     //
812     //  ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE
813     //        ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~
814     //        |              |-> left_end_offset     |                  |-> left_offset
815     //        |-> start_offset                       |-> right_start_offset
816     //
817     // In general we will only have (start_offset, end_offset) but we can gather more information
818     // by analyzing the AST of the text between *start_offset* and *end_offset*. If this succeeds
819     // we could get *left_end_offset* and *right_start_offset* and some selection of characters for
820     // the different ranges (primary_error_char and secondary_error_char). If we cannot obtain the
821     // AST information or we cannot identify special ranges within it, then left_end_offset and
822     // right_end_offset will be set to -1.
823 
824     // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
825     assert(source_line);
826     Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
827     if (start_offset < 0) {
828         err = ignore_source_errors() < 0;
829         goto done;
830     }
831 
832     Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
833     if (end_offset < 0) {
834         err = ignore_source_errors() < 0;
835         goto done;
836     }
837 
838     Py_ssize_t left_end_offset = -1;
839     Py_ssize_t right_start_offset = -1;
840 
841     char *primary_error_char = "^";
842     char *secondary_error_char = primary_error_char;
843 
844     if (start_line == end_line) {
845         int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
846                                             &left_end_offset, &right_start_offset,
847                                             &primary_error_char, &secondary_error_char);
848         if (res < 0 && ignore_source_errors() < 0) {
849             goto done;
850         }
851     }
852     else {
853         // If this is a multi-line expression, then we will highlight until
854         // the last non-whitespace character.
855         const char *source_line_str = PyUnicode_AsUTF8(source_line);
856         if (!source_line_str) {
857             goto done;
858         }
859 
860         Py_ssize_t i = PyUnicode_GET_LENGTH(source_line);
861         while (--i >= 0) {
862             if (!IS_WHITESPACE(source_line_str[i])) {
863                 break;
864             }
865         }
866 
867         end_offset = i + 1;
868     }
869 
870     if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
871         err = -1;
872         goto done;
873     }
874 
875     if (print_error_location_carets(f, truncation, start_offset, end_offset,
876                                     right_start_offset, left_end_offset,
877                                     primary_error_char, secondary_error_char) < 0) {
878         err = -1;
879         goto done;
880     }
881 
882 done:
883     Py_XDECREF(source_line);
884     return err;
885 }
886 
887 static const int TB_RECURSIVE_CUTOFF = 3; // Also hardcoded in traceback.py.
888 
889 static int
tb_print_line_repeated(PyObject * f,long cnt)890 tb_print_line_repeated(PyObject *f, long cnt)
891 {
892     cnt -= TB_RECURSIVE_CUTOFF;
893     PyObject *line = PyUnicode_FromFormat(
894         (cnt > 1)
895           ? "  [Previous line repeated %ld more times]\n"
896           : "  [Previous line repeated %ld more time]\n",
897         cnt);
898     if (line == NULL) {
899         return -1;
900     }
901     int err = PyFile_WriteObject(line, f, Py_PRINT_RAW);
902     Py_DECREF(line);
903     return err;
904 }
905 
906 static int
tb_printinternal(PyTracebackObject * tb,PyObject * f,long limit,int indent,const char * margin)907 tb_printinternal(PyTracebackObject *tb, PyObject *f, long limit,
908                  int indent, const char *margin)
909 {
910     PyCodeObject *code = NULL;
911     Py_ssize_t depth = 0;
912     PyObject *last_file = NULL;
913     int last_line = -1;
914     PyObject *last_name = NULL;
915     long cnt = 0;
916     PyTracebackObject *tb1 = tb;
917     while (tb1 != NULL) {
918         depth++;
919         tb1 = tb1->tb_next;
920     }
921     while (tb != NULL && depth > limit) {
922         depth--;
923         tb = tb->tb_next;
924     }
925     while (tb != NULL) {
926         code = PyFrame_GetCode(tb->tb_frame);
927         if (last_file == NULL ||
928             code->co_filename != last_file ||
929             last_line == -1 || tb->tb_lineno != last_line ||
930             last_name == NULL || code->co_name != last_name) {
931             if (cnt > TB_RECURSIVE_CUTOFF) {
932                 if (tb_print_line_repeated(f, cnt) < 0) {
933                     goto error;
934                 }
935             }
936             last_file = code->co_filename;
937             last_line = tb->tb_lineno;
938             last_name = code->co_name;
939             cnt = 0;
940         }
941         cnt++;
942         if (cnt <= TB_RECURSIVE_CUTOFF) {
943             if (tb_displayline(tb, f, code->co_filename, tb->tb_lineno,
944                                tb->tb_frame, code->co_name, indent, margin) < 0) {
945                 goto error;
946             }
947 
948             if (PyErr_CheckSignals() < 0) {
949                 goto error;
950             }
951         }
952         Py_CLEAR(code);
953         tb = tb->tb_next;
954     }
955     if (cnt > TB_RECURSIVE_CUTOFF) {
956         if (tb_print_line_repeated(f, cnt) < 0) {
957             goto error;
958         }
959     }
960     return 0;
961 error:
962     Py_XDECREF(code);
963     return -1;
964 }
965 
966 #define PyTraceBack_LIMIT 1000
967 
968 int
_PyTraceBack_Print_Indented(PyObject * v,int indent,const char * margin,const char * header_margin,const char * header,PyObject * f)969 _PyTraceBack_Print_Indented(PyObject *v, int indent, const char *margin,
970                             const char *header_margin, const char *header, PyObject *f)
971 {
972     PyObject *limitv;
973     long limit = PyTraceBack_LIMIT;
974 
975     if (v == NULL) {
976         return 0;
977     }
978     if (!PyTraceBack_Check(v)) {
979         PyErr_BadInternalCall();
980         return -1;
981     }
982     limitv = PySys_GetObject("tracebacklimit");
983     if (limitv && PyLong_Check(limitv)) {
984         int overflow;
985         limit = PyLong_AsLongAndOverflow(limitv, &overflow);
986         if (overflow > 0) {
987             limit = LONG_MAX;
988         }
989         else if (limit <= 0) {
990             return 0;
991         }
992     }
993     if (_Py_WriteIndentedMargin(indent, header_margin, f) < 0) {
994         return -1;
995     }
996 
997     if (PyFile_WriteString(header, f) < 0) {
998         return -1;
999     }
1000 
1001     if (tb_printinternal((PyTracebackObject *)v, f, limit, indent, margin) < 0) {
1002         return -1;
1003     }
1004 
1005     return 0;
1006 }
1007 
1008 int
PyTraceBack_Print(PyObject * v,PyObject * f)1009 PyTraceBack_Print(PyObject *v, PyObject *f)
1010 {
1011     int indent = 0;
1012     const char *margin = NULL;
1013     const char *header_margin = NULL;
1014     const char *header = EXCEPTION_TB_HEADER;
1015 
1016     return _PyTraceBack_Print_Indented(v, indent, margin, header_margin, header, f);
1017 }
1018 
1019 /* Format an integer in range [0; 0xffffffff] to decimal and write it
1020    into the file fd.
1021 
1022    This function is signal safe. */
1023 
1024 void
_Py_DumpDecimal(int fd,size_t value)1025 _Py_DumpDecimal(int fd, size_t value)
1026 {
1027     /* maximum number of characters required for output of %lld or %p.
1028        We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
1029        plus 1 for the null byte.  53/22 is an upper bound for log10(256). */
1030     char buffer[1 + (sizeof(size_t)*53-1) / 22 + 1];
1031     char *ptr, *end;
1032 
1033     end = &buffer[Py_ARRAY_LENGTH(buffer) - 1];
1034     ptr = end;
1035     *ptr = '\0';
1036     do {
1037         --ptr;
1038         assert(ptr >= buffer);
1039         *ptr = '0' + (value % 10);
1040         value /= 10;
1041     } while (value);
1042 
1043     _Py_write_noraise(fd, ptr, end - ptr);
1044 }
1045 
1046 /* Format an integer as hexadecimal with width digits into fd file descriptor.
1047    The function is signal safe. */
1048 void
_Py_DumpHexadecimal(int fd,uintptr_t value,Py_ssize_t width)1049 _Py_DumpHexadecimal(int fd, uintptr_t value, Py_ssize_t width)
1050 {
1051     char buffer[sizeof(uintptr_t) * 2 + 1], *ptr, *end;
1052     const Py_ssize_t size = Py_ARRAY_LENGTH(buffer) - 1;
1053 
1054     if (width > size)
1055         width = size;
1056     /* it's ok if width is negative */
1057 
1058     end = &buffer[size];
1059     ptr = end;
1060     *ptr = '\0';
1061     do {
1062         --ptr;
1063         assert(ptr >= buffer);
1064         *ptr = Py_hexdigits[value & 15];
1065         value >>= 4;
1066     } while ((end - ptr) < width || value);
1067 
1068     _Py_write_noraise(fd, ptr, end - ptr);
1069 }
1070 
1071 void
_Py_DumpASCII(int fd,PyObject * text)1072 _Py_DumpASCII(int fd, PyObject *text)
1073 {
1074     PyASCIIObject *ascii = (PyASCIIObject *)text;
1075     Py_ssize_t i, size;
1076     int truncated;
1077     int kind;
1078     void *data = NULL;
1079     wchar_t *wstr = NULL;
1080     Py_UCS4 ch;
1081 
1082     if (!PyUnicode_Check(text))
1083         return;
1084 
1085     size = ascii->length;
1086     kind = ascii->state.kind;
1087     if (kind == PyUnicode_WCHAR_KIND) {
1088         wstr = ((PyASCIIObject *)text)->wstr;
1089         if (wstr == NULL)
1090             return;
1091         size = ((PyCompactUnicodeObject *)text)->wstr_length;
1092     }
1093     else if (ascii->state.compact) {
1094         if (ascii->state.ascii)
1095             data = ((PyASCIIObject*)text) + 1;
1096         else
1097             data = ((PyCompactUnicodeObject*)text) + 1;
1098     }
1099     else {
1100         data = ((PyUnicodeObject *)text)->data.any;
1101         if (data == NULL)
1102             return;
1103     }
1104 
1105     if (MAX_STRING_LENGTH < size) {
1106         size = MAX_STRING_LENGTH;
1107         truncated = 1;
1108     }
1109     else {
1110         truncated = 0;
1111     }
1112 
1113     // Is an ASCII string?
1114     if (ascii->state.ascii) {
1115         assert(kind == PyUnicode_1BYTE_KIND);
1116         char *str = data;
1117 
1118         int need_escape = 0;
1119         for (i=0; i < size; i++) {
1120             ch = str[i];
1121             if (!(' ' <= ch && ch <= 126)) {
1122                 need_escape = 1;
1123                 break;
1124             }
1125         }
1126         if (!need_escape) {
1127             // The string can be written with a single write() syscall
1128             _Py_write_noraise(fd, str, size);
1129             goto done;
1130         }
1131     }
1132 
1133     for (i=0; i < size; i++) {
1134         if (kind != PyUnicode_WCHAR_KIND)
1135             ch = PyUnicode_READ(kind, data, i);
1136         else
1137             ch = wstr[i];
1138         if (' ' <= ch && ch <= 126) {
1139             /* printable ASCII character */
1140             char c = (char)ch;
1141             _Py_write_noraise(fd, &c, 1);
1142         }
1143         else if (ch <= 0xff) {
1144             PUTS(fd, "\\x");
1145             _Py_DumpHexadecimal(fd, ch, 2);
1146         }
1147         else if (ch <= 0xffff) {
1148             PUTS(fd, "\\u");
1149             _Py_DumpHexadecimal(fd, ch, 4);
1150         }
1151         else {
1152             PUTS(fd, "\\U");
1153             _Py_DumpHexadecimal(fd, ch, 8);
1154         }
1155     }
1156 
1157 done:
1158     if (truncated) {
1159         PUTS(fd, "...");
1160     }
1161 }
1162 
1163 /* Write a frame into the file fd: "File "xxx", line xxx in xxx".
1164 
1165    This function is signal safe. */
1166 
1167 static void
dump_frame(int fd,InterpreterFrame * frame)1168 dump_frame(int fd, InterpreterFrame *frame)
1169 {
1170     PyCodeObject *code = frame->f_code;
1171     PUTS(fd, "  File ");
1172     if (code->co_filename != NULL
1173         && PyUnicode_Check(code->co_filename))
1174     {
1175         PUTS(fd, "\"");
1176         _Py_DumpASCII(fd, code->co_filename);
1177         PUTS(fd, "\"");
1178     } else {
1179         PUTS(fd, "???");
1180     }
1181 
1182     int lineno = PyCode_Addr2Line(code, frame->f_lasti*sizeof(_Py_CODEUNIT));
1183     PUTS(fd, ", line ");
1184     if (lineno >= 0) {
1185         _Py_DumpDecimal(fd, (size_t)lineno);
1186     }
1187     else {
1188         PUTS(fd, "???");
1189     }
1190     PUTS(fd, " in ");
1191 
1192     if (code->co_name != NULL
1193        && PyUnicode_Check(code->co_name)) {
1194         _Py_DumpASCII(fd, code->co_name);
1195     }
1196     else {
1197         PUTS(fd, "???");
1198     }
1199 
1200     PUTS(fd, "\n");
1201 }
1202 
1203 static void
dump_traceback(int fd,PyThreadState * tstate,int write_header)1204 dump_traceback(int fd, PyThreadState *tstate, int write_header)
1205 {
1206     InterpreterFrame *frame;
1207     unsigned int depth;
1208 
1209     if (write_header) {
1210         PUTS(fd, "Stack (most recent call first):\n");
1211     }
1212 
1213     frame = tstate->cframe->current_frame;
1214     if (frame == NULL) {
1215         PUTS(fd, "  <no Python frame>\n");
1216         return;
1217     }
1218 
1219     depth = 0;
1220     while (1) {
1221         if (MAX_FRAME_DEPTH <= depth) {
1222             PUTS(fd, "  ...\n");
1223             break;
1224         }
1225         dump_frame(fd, frame);
1226         frame = frame->previous;
1227         if (frame == NULL) {
1228             break;
1229         }
1230         depth++;
1231     }
1232 }
1233 
1234 /* Dump the traceback of a Python thread into fd. Use write() to write the
1235    traceback and retry if write() is interrupted by a signal (failed with
1236    EINTR), but don't call the Python signal handler.
1237 
1238    The caller is responsible to call PyErr_CheckSignals() to call Python signal
1239    handlers if signals were received. */
1240 void
_Py_DumpTraceback(int fd,PyThreadState * tstate)1241 _Py_DumpTraceback(int fd, PyThreadState *tstate)
1242 {
1243     dump_traceback(fd, tstate, 1);
1244 }
1245 
1246 /* Write the thread identifier into the file 'fd': "Current thread 0xHHHH:\" if
1247    is_current is true, "Thread 0xHHHH:\n" otherwise.
1248 
1249    This function is signal safe. */
1250 
1251 static void
write_thread_id(int fd,PyThreadState * tstate,int is_current)1252 write_thread_id(int fd, PyThreadState *tstate, int is_current)
1253 {
1254     if (is_current)
1255         PUTS(fd, "Current thread 0x");
1256     else
1257         PUTS(fd, "Thread 0x");
1258     _Py_DumpHexadecimal(fd,
1259                         tstate->thread_id,
1260                         sizeof(unsigned long) * 2);
1261     PUTS(fd, " (most recent call first):\n");
1262 }
1263 
1264 /* Dump the traceback of all Python threads into fd. Use write() to write the
1265    traceback and retry if write() is interrupted by a signal (failed with
1266    EINTR), but don't call the Python signal handler.
1267 
1268    The caller is responsible to call PyErr_CheckSignals() to call Python signal
1269    handlers if signals were received. */
1270 const char*
_Py_DumpTracebackThreads(int fd,PyInterpreterState * interp,PyThreadState * current_tstate)1271 _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp,
1272                          PyThreadState *current_tstate)
1273 {
1274     PyThreadState *tstate;
1275     unsigned int nthreads;
1276 
1277     if (current_tstate == NULL) {
1278         /* _Py_DumpTracebackThreads() is called from signal handlers by
1279            faulthandler.
1280 
1281            SIGSEGV, SIGFPE, SIGABRT, SIGBUS and SIGILL are synchronous signals
1282            and are thus delivered to the thread that caused the fault. Get the
1283            Python thread state of the current thread.
1284 
1285            PyThreadState_Get() doesn't give the state of the thread that caused
1286            the fault if the thread released the GIL, and so
1287            _PyThreadState_GET() cannot be used. Read the thread specific
1288            storage (TSS) instead: call PyGILState_GetThisThreadState(). */
1289         current_tstate = PyGILState_GetThisThreadState();
1290     }
1291 
1292     if (interp == NULL) {
1293         if (current_tstate == NULL) {
1294             interp = _PyGILState_GetInterpreterStateUnsafe();
1295             if (interp == NULL) {
1296                 /* We need the interpreter state to get Python threads */
1297                 return "unable to get the interpreter state";
1298             }
1299         }
1300         else {
1301             interp = current_tstate->interp;
1302         }
1303     }
1304     assert(interp != NULL);
1305 
1306     /* Get the current interpreter from the current thread */
1307     tstate = PyInterpreterState_ThreadHead(interp);
1308     if (tstate == NULL)
1309         return "unable to get the thread head state";
1310 
1311     /* Dump the traceback of each thread */
1312     tstate = PyInterpreterState_ThreadHead(interp);
1313     nthreads = 0;
1314     _Py_BEGIN_SUPPRESS_IPH
1315     do
1316     {
1317         if (nthreads != 0)
1318             PUTS(fd, "\n");
1319         if (nthreads >= MAX_NTHREADS) {
1320             PUTS(fd, "...\n");
1321             break;
1322         }
1323         write_thread_id(fd, tstate, tstate == current_tstate);
1324         if (tstate == current_tstate && tstate->interp->gc.collecting) {
1325             PUTS(fd, "  Garbage-collecting\n");
1326         }
1327         dump_traceback(fd, tstate, 0);
1328         tstate = PyThreadState_Next(tstate);
1329         nthreads++;
1330     } while (tstate != NULL);
1331     _Py_END_SUPPRESS_IPH
1332 
1333     return NULL;
1334 }
1335 
1336