1 #ifndef Py_TOKENIZER_H
2 #define Py_TOKENIZER_H
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6 
7 #include "object.h"
8 
9 /* Tokenizer interface */
10 
11 #include "token.h"      /* For token types */
12 
13 #define MAXINDENT 100   /* Max indentation level */
14 
15 /* Tokenizer state */
16 struct tok_state {
17     /* Input state; buf <= cur <= inp <= end */
18     /* NB an entire line is held in the buffer */
19     char *buf;          /* Input buffer, or NULL; malloc'ed if fp != NULL */
20     char *cur;          /* Next character in buffer */
21     char *inp;          /* End of data in buffer */
22     char *end;          /* End of input buffer if buf != NULL */
23     char *start;        /* Start of current token if not NULL */
24     int done;           /* E_OK normally, E_EOF at EOF, otherwise error code */
25     /* NB If done != E_OK, cur must be == inp!!! */
26     FILE *fp;           /* Rest of input; NULL if tokenizing a string */
27     int tabsize;        /* Tab spacing */
28     int indent;         /* Current indentation index */
29     int indstack[MAXINDENT];            /* Stack of indents */
30     int atbol;          /* Nonzero if at begin of new line */
31     int pendin;         /* Pending indents (if > 0) or dedents (if < 0) */
32     char *prompt, *nextprompt;          /* For interactive prompting */
33     int lineno;         /* Current line number */
34     int level;          /* () [] {} Parentheses nesting level */
35             /* Used to allow free continuations inside them */
36     /* Stuff for checking on different tab sizes */
37     const char *filename;       /* For error messages */
38     int altwarning;     /* Issue warning if alternate tabs don't match */
39     int alterror;       /* Issue error if alternate tabs don't match */
40     int alttabsize;     /* Alternate tab spacing */
41     int altindstack[MAXINDENT];         /* Stack of alternate indents */
42     /* Stuff for PEP 0263 */
43     int decoding_state;         /* -1:decoding, 0:init, 1:raw */
44     int decoding_erred;         /* whether erred in decoding  */
45     int read_coding_spec;       /* whether 'coding:...' has been read  */
46     char *encoding;
47     int cont_line;          /* whether we are in a continuation line. */
48     const char* line_start;     /* pointer to start of current line */
49 #ifndef PGEN
50     PyObject *decoding_readline; /* codecs.open(...).readline */
51     PyObject *decoding_buffer;
52 #endif
53     const char* enc;
54     const char* str;
55     const char* input; /* Tokenizer's newline translated copy of the string. */
56 };
57 
58 extern struct tok_state *PyTokenizer_FromString(const char *, int);
59 extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
60 extern void PyTokenizer_Free(struct tok_state *);
61 extern int PyTokenizer_Get(struct tok_state *, char **, char **);
62 #if defined(PGEN) || defined(Py_USING_UNICODE)
63 extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
64                                           int len, int *offset);
65 #endif
66 
67 #ifdef __cplusplus
68 }
69 #endif
70 #endif /* !Py_TOKENIZER_H */
71