1 // Copyright (c) 1999-2018 David Muse
2 // See the COPYING file for more information
3 
4 #include <rudiments/csvsax.h>
5 //#define DEBUG_MESSAGES
6 #include <rudiments/debugprint.h>
7 
8 enum csvstate {
9 	HEADER_START=0,
10 	COLUMN_START,
11 	COLUMN,
12 	COLUMN_END,
13 	HEADER_END,
14 	BODY_START,
15 	ROW_START,
16 	FIELD_START,
17 	FIELD,
18 	FIELD_END,
19 	ROW_END,
20 	BODY_END
21 };
22 
23 class csvsaxprivate {
24 	friend class csvsax;
25 	private:
26 		csvstate	_state;
27 		char		_quote;
28 		char		_delimiter;
29 };
30 
csvsax()31 csvsax::csvsax() : sax() {
32 	pvt=new csvsaxprivate;
33 	pvt->_quote='"';
34 	pvt->_delimiter=',';
35 	reset();
36 }
37 
~csvsax()38 csvsax::~csvsax() {
39 	delete pvt;
40 }
41 
reset()42 void csvsax::reset() {
43 	pvt->_state=HEADER_START;
44 	sax::reset();
45 }
46 
setQuote(char quote)47 void csvsax::setQuote(char quote) {
48 	pvt->_quote=quote;
49 }
50 
getQuote() const51 char csvsax::getQuote() const {
52 	return pvt->_quote;
53 }
54 
setDelimiter(char delimiter)55 void csvsax::setDelimiter(char delimiter) {
56 	pvt->_delimiter=delimiter;
57 }
58 
getDelimiter() const59 char csvsax::getDelimiter() const {
60 	return pvt->_delimiter;
61 }
62 
headerStart()63 bool csvsax::headerStart() {
64 	// by default, just return success
65 	debugPrintf("headerStart {\n");
66 	return true;
67 }
68 
column(const char * name,bool quoted)69 bool csvsax::column(const char *name, bool quoted) {
70 	// by default, just return success
71 	debugPrintf("    column: \"%s\" (%squoted)\n",name,(quoted)?"":"not ");
72 	return true;
73 }
74 
headerEnd()75 bool csvsax::headerEnd() {
76 	// by default, just return success
77 	debugPrintf("}\n");
78 	return true;
79 }
80 
bodyStart()81 bool csvsax::bodyStart() {
82 	// by default, just return success
83 	debugPrintf("bodyStart {\n");
84 	return true;
85 }
86 
rowStart()87 bool csvsax::rowStart() {
88 	// by default, just return success
89 	debugPrintf("    rowStart {\n");
90 	return true;
91 }
92 
field(const char * value,bool quoted)93 bool csvsax::field(const char *value, bool quoted) {
94 	// by default, just return success
95 	debugPrintf("        field: \"%s\" (%squoted)\n",
96 					value,(quoted)?"":"not ");
97 	return true;
98 }
99 
rowEnd()100 bool csvsax::rowEnd() {
101 	// by default, just return success
102 	debugPrintf("    }\n");
103 	return true;
104 }
105 
bodyEnd()106 bool csvsax::bodyEnd() {
107 	// by default, just return success
108 	debugPrintf("}\n");
109 	return true;
110 }
111 
parse()112 bool csvsax::parse() {
113 
114 	stringbuffer	current;
115 	bool		quoted=false;
116 	bool		inquotes=false;
117 	bool		ignore=false;
118 	bool		keepchar=false;
119 	char		ch='\0';
120 
121 	for (;;) {
122 
123 		// get a character
124 		if (!keepchar) {
125 			ch=getCharacter();
126 		} else {
127 			keepchar=false;
128 		}
129 
130 		// at the very beginning of the file, skip leading \n or \r
131 		// and return an error if the file/string was empty
132 		if (pvt->_state==HEADER_START) {
133 			if (ch=='\n' || ch=='\r') {
134 				continue;
135 			} else if (ch=='\0') {
136 				return false;
137 			}
138 		}
139 
140 		// handle end of file/string
141 		if (ch=='\0') {
142 			break;
143 		}
144 
145 		// handle various states
146 		if (pvt->_state==HEADER_START) {
147 			headerStart();
148 			pvt->_state=COLUMN_START;
149 		}
150 		if (pvt->_state==COLUMN_START) {
151 			quoted=(ch==pvt->_quote);
152 			pvt->_state=COLUMN;
153 			if (quoted) {
154 				inquotes=true;
155 				continue;
156 			}
157 		}
158 		if (pvt->_state==COLUMN) {
159 			if (inquotes) {
160 				if (ch==pvt->_quote) {
161 					ch=getCharacter();
162 					if (ch!=pvt->_quote) {
163 						inquotes=false;
164 						keepchar=true;
165 						ignore=true;
166 						continue;
167 					}
168 				}
169 			} else {
170 				if (ch==pvt->_delimiter) {
171 					pvt->_state=COLUMN_END;
172 					column(current.getString(),quoted);
173 					current.clear();
174 					ignore=false;
175 					pvt->_state=COLUMN_START;
176 					continue;
177 				} else if (ch=='\n') {
178 					pvt->_state=COLUMN_END;
179 					column(current.getString(),quoted);
180 					current.clear();
181 					ignore=false;
182 					pvt->_state=HEADER_END;
183 					headerEnd();
184 					continue;
185 				}
186 			}
187 			if (!ignore) {
188 				current.append(ch);
189 			}
190 			continue;
191 		}
192 		if (pvt->_state==HEADER_END) {
193 			pvt->_state=BODY_START;
194 			bodyStart();
195 			pvt->_state=ROW_START;
196 		}
197 		if (pvt->_state==ROW_START) {
198 			if (ch=='\r') {
199 				continue;
200 			}
201 			rowStart();
202 			pvt->_state=FIELD_START;
203 		}
204 		if (pvt->_state==FIELD_START) {
205 			quoted=(ch==pvt->_quote);
206 			pvt->_state=FIELD;
207 			if (quoted) {
208 				inquotes=true;
209 				continue;
210 			}
211 		}
212 		if (pvt->_state==FIELD) {
213 			if (inquotes) {
214 				if (ch==pvt->_quote) {
215 					ch=getCharacter();
216 					if (ch!=pvt->_quote) {
217 						inquotes=false;
218 						keepchar=true;
219 						ignore=true;
220 						continue;
221 					}
222 				}
223 			} else {
224 				if (ch==pvt->_delimiter) {
225 					pvt->_state=FIELD_END;
226 					field(current.getString(),quoted);
227 					current.clear();
228 					ignore=false;
229 					pvt->_state=FIELD_START;
230 					continue;
231 				} else if (ch=='\n') {
232 					pvt->_state=FIELD_END;
233 					field(current.getString(),quoted);
234 					current.clear();
235 					ignore=false;
236 					pvt->_state=ROW_END;
237 					rowEnd();
238 					pvt->_state=ROW_START;
239 					continue;
240 				}
241 			}
242 			if (!ignore) {
243 				current.append(ch);
244 			}
245 			continue;
246 		}
247 	}
248 
249 	// document parsed successfully
250 	pvt->_state=BODY_END;
251 	bodyEnd();
252 	return true;
253 }
254