1 // Copyright (c) 1999-2018 David Muse
2 // See the COPYING file for more information
3
4 #include <rudiments/csvsax.h>
5 //#define DEBUG_MESSAGES
6 #include <rudiments/debugprint.h>
7
8 enum csvstate {
9 HEADER_START=0,
10 COLUMN_START,
11 COLUMN,
12 COLUMN_END,
13 HEADER_END,
14 BODY_START,
15 ROW_START,
16 FIELD_START,
17 FIELD,
18 FIELD_END,
19 ROW_END,
20 BODY_END
21 };
22
23 class csvsaxprivate {
24 friend class csvsax;
25 private:
26 csvstate _state;
27 char _quote;
28 char _delimiter;
29 };
30
csvsax()31 csvsax::csvsax() : sax() {
32 pvt=new csvsaxprivate;
33 pvt->_quote='"';
34 pvt->_delimiter=',';
35 reset();
36 }
37
~csvsax()38 csvsax::~csvsax() {
39 delete pvt;
40 }
41
reset()42 void csvsax::reset() {
43 pvt->_state=HEADER_START;
44 sax::reset();
45 }
46
setQuote(char quote)47 void csvsax::setQuote(char quote) {
48 pvt->_quote=quote;
49 }
50
getQuote() const51 char csvsax::getQuote() const {
52 return pvt->_quote;
53 }
54
setDelimiter(char delimiter)55 void csvsax::setDelimiter(char delimiter) {
56 pvt->_delimiter=delimiter;
57 }
58
getDelimiter() const59 char csvsax::getDelimiter() const {
60 return pvt->_delimiter;
61 }
62
headerStart()63 bool csvsax::headerStart() {
64 // by default, just return success
65 debugPrintf("headerStart {\n");
66 return true;
67 }
68
column(const char * name,bool quoted)69 bool csvsax::column(const char *name, bool quoted) {
70 // by default, just return success
71 debugPrintf(" column: \"%s\" (%squoted)\n",name,(quoted)?"":"not ");
72 return true;
73 }
74
headerEnd()75 bool csvsax::headerEnd() {
76 // by default, just return success
77 debugPrintf("}\n");
78 return true;
79 }
80
bodyStart()81 bool csvsax::bodyStart() {
82 // by default, just return success
83 debugPrintf("bodyStart {\n");
84 return true;
85 }
86
rowStart()87 bool csvsax::rowStart() {
88 // by default, just return success
89 debugPrintf(" rowStart {\n");
90 return true;
91 }
92
field(const char * value,bool quoted)93 bool csvsax::field(const char *value, bool quoted) {
94 // by default, just return success
95 debugPrintf(" field: \"%s\" (%squoted)\n",
96 value,(quoted)?"":"not ");
97 return true;
98 }
99
rowEnd()100 bool csvsax::rowEnd() {
101 // by default, just return success
102 debugPrintf(" }\n");
103 return true;
104 }
105
bodyEnd()106 bool csvsax::bodyEnd() {
107 // by default, just return success
108 debugPrintf("}\n");
109 return true;
110 }
111
parse()112 bool csvsax::parse() {
113
114 stringbuffer current;
115 bool quoted=false;
116 bool inquotes=false;
117 bool ignore=false;
118 bool keepchar=false;
119 char ch='\0';
120
121 for (;;) {
122
123 // get a character
124 if (!keepchar) {
125 ch=getCharacter();
126 } else {
127 keepchar=false;
128 }
129
130 // at the very beginning of the file, skip leading \n or \r
131 // and return an error if the file/string was empty
132 if (pvt->_state==HEADER_START) {
133 if (ch=='\n' || ch=='\r') {
134 continue;
135 } else if (ch=='\0') {
136 return false;
137 }
138 }
139
140 // handle end of file/string
141 if (ch=='\0') {
142 break;
143 }
144
145 // handle various states
146 if (pvt->_state==HEADER_START) {
147 headerStart();
148 pvt->_state=COLUMN_START;
149 }
150 if (pvt->_state==COLUMN_START) {
151 quoted=(ch==pvt->_quote);
152 pvt->_state=COLUMN;
153 if (quoted) {
154 inquotes=true;
155 continue;
156 }
157 }
158 if (pvt->_state==COLUMN) {
159 if (inquotes) {
160 if (ch==pvt->_quote) {
161 ch=getCharacter();
162 if (ch!=pvt->_quote) {
163 inquotes=false;
164 keepchar=true;
165 ignore=true;
166 continue;
167 }
168 }
169 } else {
170 if (ch==pvt->_delimiter) {
171 pvt->_state=COLUMN_END;
172 column(current.getString(),quoted);
173 current.clear();
174 ignore=false;
175 pvt->_state=COLUMN_START;
176 continue;
177 } else if (ch=='\n') {
178 pvt->_state=COLUMN_END;
179 column(current.getString(),quoted);
180 current.clear();
181 ignore=false;
182 pvt->_state=HEADER_END;
183 headerEnd();
184 continue;
185 }
186 }
187 if (!ignore) {
188 current.append(ch);
189 }
190 continue;
191 }
192 if (pvt->_state==HEADER_END) {
193 pvt->_state=BODY_START;
194 bodyStart();
195 pvt->_state=ROW_START;
196 }
197 if (pvt->_state==ROW_START) {
198 if (ch=='\r') {
199 continue;
200 }
201 rowStart();
202 pvt->_state=FIELD_START;
203 }
204 if (pvt->_state==FIELD_START) {
205 quoted=(ch==pvt->_quote);
206 pvt->_state=FIELD;
207 if (quoted) {
208 inquotes=true;
209 continue;
210 }
211 }
212 if (pvt->_state==FIELD) {
213 if (inquotes) {
214 if (ch==pvt->_quote) {
215 ch=getCharacter();
216 if (ch!=pvt->_quote) {
217 inquotes=false;
218 keepchar=true;
219 ignore=true;
220 continue;
221 }
222 }
223 } else {
224 if (ch==pvt->_delimiter) {
225 pvt->_state=FIELD_END;
226 field(current.getString(),quoted);
227 current.clear();
228 ignore=false;
229 pvt->_state=FIELD_START;
230 continue;
231 } else if (ch=='\n') {
232 pvt->_state=FIELD_END;
233 field(current.getString(),quoted);
234 current.clear();
235 ignore=false;
236 pvt->_state=ROW_END;
237 rowEnd();
238 pvt->_state=ROW_START;
239 continue;
240 }
241 }
242 if (!ignore) {
243 current.append(ch);
244 }
245 continue;
246 }
247 }
248
249 // document parsed successfully
250 pvt->_state=BODY_END;
251 bodyEnd();
252 return true;
253 }
254