1 /* PSPP - a program for statistical analysis. 2 Copyright (C) 1997-9, 2000, 2010, 2011, 2013, 2014 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17 #ifndef LEXER_H 18 #define LEXER_H 1 19 20 #include <stdbool.h> 21 #include <stddef.h> 22 #include <unistd.h> 23 24 #include "data/identifier.h" 25 #include "data/variable.h" 26 #include "libpspp/cast.h" 27 #include "libpspp/compiler.h" 28 #include "libpspp/prompt.h" 29 30 struct lexer; 31 32 /* The syntax mode for which a syntax file is intended. */ 33 enum lex_syntax_mode 34 { 35 LEX_SYNTAX_AUTO, /* Try to guess intent. */ 36 LEX_SYNTAX_INTERACTIVE, /* Interactive mode. */ 37 LEX_SYNTAX_BATCH /* Batch mode. */ 38 }; 39 40 /* Handling of errors. */ 41 enum lex_error_mode 42 { 43 LEX_ERROR_TERMINAL, /* Discard input line and continue reading. */ 44 LEX_ERROR_CONTINUE, /* Continue to next command, except for 45 cascading failures. */ 46 LEX_ERROR_STOP /* Stop processing. */ 47 }; 48 49 /* Reads a single syntax file as a stream of bytes encoded in UTF-8. 50 51 Not opaque. */ 52 struct lex_reader 53 { 54 const struct lex_reader_class *class; 55 enum lex_syntax_mode syntax; 56 enum lex_error_mode error; 57 char *encoding; 58 char *file_name; /* NULL if not associated with a file. */ 59 int line_number; /* 1-based initial line number, 0 if none. */ 60 bool eof; 61 }; 62 63 /* An implementation of a lex_reader. */ 64 struct lex_reader_class 65 { 66 /* Reads up to N bytes of data from READER into N. Returns the positive 67 number of bytes read if successful, or zero at end of input or on 68 error. 69 70 STYLE provides a hint to interactive readers as to what kind of syntax 71 is being read right now. */ 72 size_t (*read) (struct lex_reader *reader, char *buf, size_t n, 73 enum prompt_style style); 74 75 /* Closes and destroys READER, releasing any allocated storage. 76 77 The caller will free the 'file_name' member of READER, so the 78 implementation should not do so. */ 79 void (*destroy) (struct lex_reader *reader); 80 }; 81 82 /* Helper functions for lex_reader. */ 83 void lex_reader_init (struct lex_reader *, const struct lex_reader_class *); 84 void lex_reader_set_file_name (struct lex_reader *, const char *file_name); 85 86 /* Creating various kinds of lex_readers. */ 87 struct lex_reader *lex_reader_for_file (const char *file_name, 88 const char *encoding, 89 enum lex_syntax_mode syntax, 90 enum lex_error_mode error); 91 struct lex_reader *lex_reader_for_string (const char *, const char *encoding); 92 struct lex_reader *lex_reader_for_format (const char *, const char *, ...) 93 PRINTF_FORMAT (1, 3); 94 struct lex_reader *lex_reader_for_substring_nocopy (struct substring, const char *encoding); 95 96 /* Initialization. */ 97 struct lexer *lex_create (void); 98 void lex_destroy (struct lexer *); 99 100 /* Files. */ 101 void lex_include (struct lexer *, struct lex_reader *); 102 void lex_append (struct lexer *, struct lex_reader *); 103 104 /* Advancing. */ 105 void lex_get (struct lexer *); 106 107 /* Token testing functions. */ 108 bool lex_is_number (const struct lexer *); 109 double lex_number (const struct lexer *); 110 bool lex_is_integer (const struct lexer *); 111 long lex_integer (const struct lexer *); 112 bool lex_is_string (const struct lexer *); 113 114 /* Token testing functions with lookahead. */ 115 bool lex_next_is_number (const struct lexer *, int n); 116 double lex_next_number (const struct lexer *, int n); 117 bool lex_next_is_integer (const struct lexer *, int n); 118 long lex_next_integer (const struct lexer *, int n); 119 bool lex_next_is_string (const struct lexer *, int n); 120 121 /* Token matching functions. */ 122 bool lex_match (struct lexer *, enum token_type); 123 bool lex_match_id (struct lexer *, const char *); 124 bool lex_match_id_n (struct lexer *, const char *, size_t n); 125 bool lex_match_int (struct lexer *, int); 126 bool lex_match_phrase (struct lexer *, const char *s); 127 128 /* Forcible matching functions. */ 129 bool lex_force_match (struct lexer *, enum token_type) WARN_UNUSED_RESULT; 130 bool lex_force_match_id (struct lexer *, const char *) WARN_UNUSED_RESULT; 131 bool lex_force_int (struct lexer *) WARN_UNUSED_RESULT; 132 bool lex_force_num (struct lexer *) WARN_UNUSED_RESULT; 133 bool lex_force_id (struct lexer *) WARN_UNUSED_RESULT; 134 bool lex_force_string (struct lexer *) WARN_UNUSED_RESULT; 135 bool lex_force_string_or_id (struct lexer *) WARN_UNUSED_RESULT; 136 137 /* Token accessors. */ 138 enum token_type lex_token (const struct lexer *); 139 double lex_tokval (const struct lexer *); 140 const char *lex_tokcstr (const struct lexer *); 141 struct substring lex_tokss (const struct lexer *); 142 143 /* Looking ahead. */ 144 const struct token *lex_next (const struct lexer *, int n); 145 enum token_type lex_next_token (const struct lexer *, int n); 146 const char *lex_next_tokcstr (const struct lexer *, int n); 147 double lex_next_tokval (const struct lexer *, int n); 148 struct substring lex_next_tokss (const struct lexer *, int n); 149 150 /* Current position. */ 151 int lex_get_first_line_number (const struct lexer *, int n); 152 int lex_get_last_line_number (const struct lexer *, int n); 153 int lex_get_first_column (const struct lexer *, int n); 154 int lex_get_last_column (const struct lexer *, int n); 155 const char *lex_get_file_name (const struct lexer *); 156 const char *lex_get_encoding (const struct lexer *); 157 158 /* Issuing errors. */ 159 void lex_error (struct lexer *, const char *, ...) PRINTF_FORMAT (2, 3); 160 void lex_next_error (struct lexer *, int n0, int n1, const char *, ...) 161 PRINTF_FORMAT (4, 5); 162 int lex_end_of_command (struct lexer *); 163 164 void lex_error_expecting (struct lexer *, const char *, ...) SENTINEL(0); 165 #define lex_error_expecting(...) \ 166 lex_error_expecting(__VA_ARGS__, NULL_SENTINEL) 167 168 void lex_sbc_only_once (const char *); 169 void lex_sbc_missing (const char *); 170 171 void lex_spec_only_once (struct lexer *, const char *subcommand, 172 const char *specification); 173 void lex_spec_missing (struct lexer *, const char *subcommand, 174 const char *specification); 175 176 void lex_error_valist (struct lexer *, const char *, va_list) 177 PRINTF_FORMAT (2, 0); 178 void lex_next_error_valist (struct lexer *lexer, int n0, int n1, 179 const char *format, va_list) 180 PRINTF_FORMAT (4, 0); 181 182 /* Error handling. */ 183 enum lex_syntax_mode lex_get_syntax_mode (const struct lexer *); 184 enum lex_error_mode lex_get_error_mode (const struct lexer *); 185 void lex_discard_rest_of_command (struct lexer *); 186 void lex_interactive_reset (struct lexer *); 187 void lex_discard_noninteractive (struct lexer *); 188 189 #endif /* lexer.h */ 190