1 #ifndef Py_TOKENIZER_H
2 #define Py_TOKENIZER_H
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6 
7 #include "object.h"
8 
9 /* Tokenizer interface */
10 
11 #include "token.h"      /* For token types */
12 
13 #define MAXINDENT 100   /* Max indentation level */
14 #define MAXLEVEL 200    /* Max parentheses level */
15 
16 enum decoding_state {
17     STATE_INIT,
18     STATE_RAW,
19     STATE_NORMAL        /* have a codec associated with input */
20 };
21 
22 /* Tokenizer state */
23 struct tok_state {
24     /* Input state; buf <= cur <= inp <= end */
25     /* NB an entire line is held in the buffer */
26     char *buf;          /* Input buffer, or NULL; malloc'ed if fp != NULL */
27     char *cur;          /* Next character in buffer */
28     char *inp;          /* End of data in buffer */
29     char *end;          /* End of input buffer if buf != NULL */
30     char *start;        /* Start of current token if not NULL */
31     int done;           /* E_OK normally, E_EOF at EOF, otherwise error code */
32     /* NB If done != E_OK, cur must be == inp!!! */
33     FILE *fp;           /* Rest of input; NULL if tokenizing a string */
34     int tabsize;        /* Tab spacing */
35     int indent;         /* Current indentation index */
36     int indstack[MAXINDENT];            /* Stack of indents */
37     int atbol;          /* Nonzero if at begin of new line */
38     int pendin;         /* Pending indents (if > 0) or dedents (if < 0) */
39     const char *prompt, *nextprompt;          /* For interactive prompting */
40     int lineno;         /* Current line number */
41     int first_lineno;   /* First line of a single line or multi line string
42                            expression (cf. issue 16806) */
43     int level;          /* () [] {} Parentheses nesting level */
44             /* Used to allow free continuations inside them */
45     char parenstack[MAXLEVEL];
46     int parenlinenostack[MAXLEVEL];
47     PyObject *filename;
48     /* Stuff for checking on different tab sizes */
49     int altindstack[MAXINDENT];         /* Stack of alternate indents */
50     /* Stuff for PEP 0263 */
51     enum decoding_state decoding_state;
52     int decoding_erred;         /* whether erred in decoding  */
53     int read_coding_spec;       /* whether 'coding:...' has been read  */
54     char *encoding;         /* Source encoding. */
55     int cont_line;          /* whether we are in a continuation line. */
56     const char* line_start;     /* pointer to start of current line */
57     const char* multi_line_start; /* pointer to start of first line of
58                                      a single line or multi line string
59                                      expression (cf. issue 16806) */
60     PyObject *decoding_readline; /* open(...).readline */
61     PyObject *decoding_buffer;
62     const char* enc;        /* Encoding for the current str. */
63     const char* str;
64     const char* input; /* Tokenizer's newline translated copy of the string. */
65 
66     int type_comments;      /* Whether to look for type comments */
67 
68     /* async/await related fields (still needed depending on feature_version) */
69     int async_hacks;     /* =1 if async/await aren't always keywords */
70     int async_def;        /* =1 if tokens are inside an 'async def' body. */
71     int async_def_indent; /* Indentation level of the outermost 'async def'. */
72     int async_def_nl;     /* =1 if the outermost 'async def' had at least one
73                              NEWLINE token after it. */
74 };
75 
76 extern struct tok_state *PyTokenizer_FromString(const char *, int);
77 extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
78 extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*,
79                                               const char *, const char *);
80 extern void PyTokenizer_Free(struct tok_state *);
81 extern int PyTokenizer_Get(struct tok_state *, char **, char **);
82 
83 #define tok_dump _Py_tok_dump
84 
85 #ifdef __cplusplus
86 }
87 #endif
88 #endif /* !Py_TOKENIZER_H */
89