1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; either version 2 of the License, or
6    (at your option) any later version.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 
18 #ifndef _UDM_LEX_H
19 #define _UDM_LEX_H
20 
21 
22 typedef enum
23 {
24   UDM_LEX_EXCLAM=          '!',        /* 0x21 */
25   UDM_LEX_HASH=            '#',        /* 0x23 */
26   UDM_LEX_DOLLAR=          '$',        /* 0x24 */
27   UDM_LEX_PERCENT=         '%',        /* 0x25 */
28   UDM_LEX_AMPERSAND=       '&',        /* 0x26 */
29   UDM_LEX_LP=              '(',        /* 0x28 */
30   UDM_LEX_RP=              ')',        /* 0x29 */
31   UDM_LEX_ASTERISK=        '*',        /* 0x2A */
32   UDM_LEX_PLUS=            '+',        /* 0x2B */
33   UDM_LEX_COMMA=           ',',        /* 0x2C */
34   UDM_LEX_MINUS=           '-',        /* 0x2D */
35   UDM_LEX_DOT=             '.',        /* 0x2E */
36   UDM_LEX_SLASH=           '/',        /* 0x2F */
37   UDM_LEX_COLON=           ':',        /* 0x3A */
38   UDM_LEX_SEMICOLON=       ';',        /* 0x3B */
39   UDM_LEX_LT=              '<',        /* 0x3C */
40   UDM_LEX_EQ=              '=',        /* 0x3D */
41   UDM_LEX_GT=              '>',        /* 0x3E */
42   UDM_LEX_QUESTION=        '?',        /* 0x3F */
43   UDM_LEX_AT=              '@',        /* 0x40 */
44   UDM_LEX_LB=              '[',        /* 0x5B */
45   UDM_LEX_RB=              ']',        /* 0x5D */
46   UDM_LEX_CARET=           '^',        /* 0x5E */
47   UDM_LEX_LCB=             '{',        /* 0x7B */
48   UDM_LEX_VBAR=            '|',        /* 0x7C */
49   UDM_LEX_RCB=             '}',        /* 0x7D */
50   UDM_LEX_TILDE=           '~',        /* 0x7E */
51 
52 #if UDM_EXTENDED_PUNCTUATION_NOT_USED
53   UDM_LEX_SPACE=           ' ',        /* 0x20 */
54   UDM_LEX_DOUBLE_QUOTE=    '"',        /* 0x22 */  /* Part of a double-quoted string   */
55   UDM_LEX_QUOTE=           '\'',       /* 0x27 */  /* Part of a quoted string          */
56   UDM_LEX_BACKSLASH=       '\\',       /* 0x5C */  /* Part of a [double] quoted string */
57   UDM_LEX_UNDERSCORE=      '_',        /* 0x5F */  /* Part of identifier               */
58   UDM_LEX_BACKTICK=        '`',        /* 0x60 */  /* Part of a back-ticked identifier */
59 #endif
60 
61   UDM_LEX_COMMENT,
62   UDM_LEX_INC,                         /* ++ */
63   UDM_LEX_DEC,                         /* -- */
64   UDM_LEX_MUL_EQ,                      /* *= */
65   UDM_LEX_DIV_EQ,                      /* /= */
66   UDM_LEX_REM_EQ,                      /* %= */
67   UDM_LEX_INC_EQ,                      /* += */
68   UDM_LEX_DEC_EQ,                      /* -= */
69   UDM_LEX_AND_EQ,                      /* &= */
70   UDM_LEX_OR_EQ,                       /* |= */
71   UDM_LEX_XOR_EQ,                      /* &= */
72   UDM_LEX_LE,                          /* <= */
73   UDM_LEX_GE,                          /* >= */
74   UDM_LEX_EQ_EQ,                       /* == */
75   UDM_LEX_NOT_EQ,                      /* != */
76   UDM_LEX_LSHIFT_EQ,                   /* <<= */
77   UDM_LEX_RSHIFT_EQ,                   /* >>= */
78 
79   UDM_LEX_BOOL_OR,                     /* || */
80   UDM_LEX_BOOL_AND,                    /* && */
81 
82   UDM_LEX_LSHIFT,                      /* << */
83   UDM_LEX_RSHIFT,                      /* >> */
84 
85   /* C data types */
86   UDM_LEX_CHAR,
87   UDM_LEX_DOUBLE,
88   UDM_LEX_FLOAT,
89   UDM_LEX_INT,
90   UDM_LEX_LONG,
91   UDM_LEX_SHORT,
92   UDM_LEX_SIGNED,
93   UDM_LEX_UNSIGNED,
94 
95   /* C keywords */
96   UDM_LEX_AUTO,
97   UDM_LEX_CASE,
98   UDM_LEX_CONST,
99   UDM_LEX_DEFAULT,
100   UDM_LEX_ENUM,
101   UDM_LEX_EXTERN,
102   UDM_LEX_GOTO,
103   UDM_LEX_REGISTER,
104   UDM_LEX_RETURN,
105   UDM_LEX_STATIC,
106   UDM_LEX_STRUCT,
107   UDM_LEX_SWITCH,
108   UDM_LEX_TYPEDEF,
109   UDM_LEX_UNION,
110   UDM_LEX_VOLATILE,
111 
112   UDM_LEX_IF,                          /* if */
113   UDM_LEX_ELSE,                        /* else */
114   UDM_LEX_WHILE,                       /* while */
115   UDM_LEX_FOR,                         /* for */
116   UDM_LEX_DO,                          /* do */
117   UDM_LEX_BREAK,                       /* break */
118   UDM_LEX_CONTINUE,                    /* continue */
119   UDM_LEX_SIZEOF,                      /* sizeof */
120 
121   /* C++ keywords */
122   UDM_LEX_OPERATOR,
123 
124   UDM_LEX_COUT,                        /* cout */
125 
126   UDM_LEX_CHAR_LITERAL,
127   UDM_LEX_STRING=          's',
128   UDM_LEX_ESCAPED_STRING=  'S',
129   UDM_LEX_IDENT=           'I',
130   UDM_LEX_TEXT=            'T',
131   UDM_LEX_UINT=            'u',
132   UDM_LEX_UNKNOWN=         'U',
133   UDM_LEX_EOF=             'E'
134 
135 } udm_lex_t;
136 
137 
138 const char *UdmLex2str(udm_lex_t lex);
139 
140 
141 typedef struct lex_token_st
142 {
143   UDM_CONST_TOKEN token;
144   udm_lex_t type;
145 } UDM_LEX_TOKEN;
146 
147 
148 typedef struct lex_scanner_st
149 {
150   UDM_LEX_TOKEN token;     /* The last scanned token, for look-ahead */
151   UDM_CONST_TOKEN content; /* The entire text to parse      */
152   const char *cur;         /* Current position in "content" */
153 } UDM_LEX_SCANNER;
154 
155 #define UdmLexScannerEOF(s)    ((s)->cur >= (s)->content.end)
156 #define UdmLexScannerCur(s)    ((s)->cur)
157 #define UdmLexScannerShift(s)  ((s)->cur++)
158 #define UdmLexScannerShiftN(p,n) do { ((p)->cur+= (n)); } while (0)
159 #define UdmLexScannerStr(s)    ((s)->content.str)
160 #define UdmLexScannerEnd(s)    ((s)->content.end)
161 #define UdmLexScannerIsSpace(x)  strchr(" \t\r\n", (x))
162 
163 void UdmLexScannerSkipSpaces(UDM_LEX_SCANNER *s);
164 void UdmLexScannerInit(UDM_LEX_SCANNER *s, const char *str, size_t length);
165 udm_bool_t UdmLexScannerScanString(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a);
166 udm_bool_t UdmLexScannerScanEscapedString(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a);
167 udm_bool_t UdmLexScannerScanChar(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a);
168 udm_bool_t UdmLexScannerScanXMLIdentifier(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a);
169 udm_bool_t UdmLexScannerScanPunctuation(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a);
170 udm_bool_t UdmLexScannerScanIdentifier(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a);
171 udm_bool_t UdmLexScannerScanUnsignedNumber(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a);
172 udm_bool_t UdmLexScannerScanCOperator(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a);
173 udm_bool_t UdmLexScannerScanCComment(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a);
174 udm_bool_t UdmLexScannerScanUntil(UDM_LEX_SCANNER *scanner,
175                                   const UDM_CONST_STR *str,
176                                   udm_lex_t on_success,
177                                   udm_lex_t on_error);
178 #endif /* UDM_LEX_H */
179