1 #include <stdio.h>
2 #include <string.h>
3 #include <slang.h>
4
5 SLANG_MODULE(csv);
6
7 static int CSV_Type_Id = 0;
8
9 typedef struct _CSV_Type CSV_Type;
10 struct _CSV_Type
11 {
12 char delimchar;
13 char quotechar;
14 SLang_Name_Type *read_callback;
15 SLang_Any_Type *callback_data;
16 #define CSV_SKIP_BLANK_ROWS 0x01
17 #define CSV_STOP_ON_BLANK_ROWS 0x02
18 #define BLANK_ROW_BEHAVIOR (CSV_SKIP_BLANK_ROWS|CSV_STOP_ON_BLANK_ROWS)
19 #define CSV_QUOTE_SOME 0x04
20 #define CSV_QUOTE_ALL 0x08
21 int flags;
22 };
23
check_special_chars(CSV_Type * csv)24 static int check_special_chars (CSV_Type *csv)
25 {
26 if (csv->delimchar == 0) csv->delimchar = ',';
27 if (csv->quotechar == 0) csv->quotechar = '"';
28 return 0;
29 }
30
execute_read_callback(CSV_Type * csv,char ** sptr)31 static int execute_read_callback (CSV_Type *csv, char **sptr)
32 {
33 char *s;
34
35 *sptr = NULL;
36
37 if ((-1 == SLang_start_arg_list ())
38 || (-1 == SLang_push_anytype (csv->callback_data))
39 || (-1 == SLang_end_arg_list ())
40 || (-1 == SLexecute_function (csv->read_callback)))
41 return -1;
42
43 if (SLang_peek_at_stack () == SLANG_NULL_TYPE)
44 {
45 (void) SLang_pop_null ();
46 return 0;
47 }
48
49 if (-1 == SLang_pop_slstring (&s))
50 return -1;
51
52 *sptr = s;
53 return 1;
54 }
55
56 typedef struct
57 {
58 char **values;
59 SLindex_Type num_allocated;
60 SLindex_Type num;
61 }
62 Values_Array_Type;
63
push_values_array(Values_Array_Type * av,int allow_empty_array)64 static int push_values_array (Values_Array_Type *av, int allow_empty_array)
65 {
66 SLang_Array_Type *at;
67 char **new_values;
68
69 if (av->num == 0)
70 {
71 if (allow_empty_array == 0)
72 return SLang_push_null ();
73 SLfree ((char *) av->values);
74 av->values = NULL;
75 }
76 else
77 {
78 if (NULL == (new_values = (char **)SLrealloc ((char *)av->values, av->num*sizeof(char *))))
79 return -1;
80 av->values = new_values;
81 }
82
83 av->num_allocated = av->num;
84 at = SLang_create_array (SLANG_STRING_TYPE, 0, av->values, &av->num, 1);
85
86 if (at == NULL)
87 return -1;
88
89 av->num_allocated = 0;
90 av->num = 0;
91 av->values = NULL;
92
93 return SLang_push_array (at, 1);
94 }
95
init_values_array_type(Values_Array_Type * av)96 static int init_values_array_type (Values_Array_Type *av)
97 {
98 memset ((char *)av, 0, sizeof(Values_Array_Type));
99 return 0;
100 }
101
free_values_array(Values_Array_Type * av)102 static void free_values_array (Values_Array_Type *av)
103 {
104 SLindex_Type i, num;
105 char **values;
106
107 if (NULL == (values = av->values))
108 return;
109 num = av->num;
110 for (i = 0; i < num; i++)
111 SLang_free_slstring (values[i]);
112 SLfree ((char *)values);
113 }
114
store_value(Values_Array_Type * va,char * value)115 static int store_value (Values_Array_Type *va, char *value)
116 {
117 SLindex_Type num_allocated;
118
119 num_allocated = va->num_allocated;
120 if (num_allocated == va->num)
121 {
122 char **values;
123 num_allocated += 256;
124 values = (char **)SLrealloc ((char *)va->values, num_allocated*sizeof(char *));
125 if (values == NULL)
126 return -1;
127 va->values = values;
128 va->num_allocated = num_allocated;
129 }
130 if (NULL == (va->values[va->num] = SLang_create_slstring (value)))
131 return -1;
132
133 va->num++;
134 return 0;
135 }
136
137 #define NEXT_CHAR(ch) \
138 while (do_read \
139 || (0 == (ch = line[line_ofs++])) \
140 || (ch == '\r')) \
141 { \
142 if ((do_read == 0) && (ch == '\r') && (line[line_ofs] == '\n')) \
143 { \
144 line_ofs++; \
145 ch = '\n'; \
146 break; \
147 } \
148 SLang_free_slstring (line); \
149 line = NULL; \
150 status = execute_read_callback (csv, &line); \
151 do_read = 0; \
152 if (status == -1) \
153 goto return_error; \
154 line_ofs = 0; \
155 if (status == 0) \
156 { \
157 ch = 0; \
158 break; \
159 } \
160 }
161
decode_csv_row(CSV_Type * csv,int flags)162 static int decode_csv_row (CSV_Type *csv, int flags)
163 {
164 char *line;
165 size_t line_ofs;
166 char *value;
167 size_t value_size, value_ofs;
168 char delimchar, quotechar;
169 int return_status;
170 Values_Array_Type av;
171 int do_read, in_quote;
172 int blank_line_seen;
173 int is_quoted;
174
175 if (NULL == csv->read_callback)
176 {
177 SLang_verror (SL_InvalidParm_Error, "CSV decoder object has no read callback function");
178 return -1;
179 }
180
181 if (-1 == init_values_array_type (&av))
182 return -1;
183
184 delimchar = csv->delimchar;
185 quotechar = csv->quotechar;
186 value_ofs = line_ofs = 0;
187 value_size = 0;
188 value = NULL;
189 line = NULL;
190 do_read = 1;
191
192 in_quote = 0;
193 return_status = -1;
194 blank_line_seen = 0;
195 is_quoted = 0;
196 while (1)
197 {
198 int status;
199 char ch;
200
201 if (value_ofs == value_size)
202 {
203 char *new_value;
204
205 if (value_size < 64)
206 value_size += 32;
207 else if (value_size < 8192)
208 value_size *= 2;
209 else value_size += 8192;
210
211 new_value = (char *)SLrealloc (value, value_size);
212 if (new_value == NULL)
213 goto return_error;
214 value = new_value;
215 }
216
217 NEXT_CHAR(ch)
218
219 if ((ch == quotechar) && quotechar)
220 {
221 if (in_quote)
222 {
223 NEXT_CHAR(ch)
224 if (ch == quotechar)
225 {
226 value[value_ofs++] = ch;
227 continue;
228 }
229
230 if ((ch != delimchar) && (ch != 0) && (ch != '\n'))
231 {
232 SLang_verror (SL_Data_Error, "Expecting a delimiter after an end-quote character in field #%ld",
233 (long)av.num+1);
234 goto return_error;
235 }
236 in_quote = 0;
237 /* drop */
238 }
239 else if (value_ofs != 0)
240 {
241 SLang_verror (SL_Data_Error, "Misplaced quote character inside csv field #%ld",
242 (long)av.num+1);
243 goto return_error;
244 }
245 else
246 {
247 in_quote = 1;
248 is_quoted = 1;
249 continue;
250 }
251 }
252
253 if (ch == delimchar)
254 {
255 if (in_quote)
256 {
257 value[value_ofs++] = ch;
258 continue;
259 }
260 value[value_ofs] = 0;
261 if (-1 == store_value (&av, value))
262 goto return_error;
263 value_ofs = 0;
264 continue;
265 }
266 if ((ch == 0) || (ch == '\n'))
267 {
268 if (in_quote)
269 {
270 if (ch == '\n')
271 {
272 value[value_ofs++] = ch;
273 do_read = 1;
274 continue;
275 }
276 SLang_verror (SL_Data_Error, "No closing quote seen parsing CSV data field #%ld",
277 (long)av.num+1);
278 goto return_error;
279 }
280
281 if ((ch == '\n') || (av.num != 0) || (value_ofs > 0))
282 {
283 if ((is_quoted == 0)
284 && (ch == '\n') && (av.num == 0) && (value_ofs == 0))
285 {
286 /* blank line */
287 int blank_line_behavior = (flags & BLANK_ROW_BEHAVIOR);
288 if (blank_line_behavior == CSV_SKIP_BLANK_ROWS)
289 {
290 do_read = 1;
291 continue;
292 }
293 if (blank_line_behavior == CSV_STOP_ON_BLANK_ROWS)
294 {
295 blank_line_seen = 1;
296 break;
297 }
298 }
299 value[value_ofs] = 0;
300 if (-1 == store_value (&av, value))
301 goto return_error;
302 }
303 break; /* done */
304 }
305
306 value[value_ofs++] = ch;
307 }
308
309 /* Get here if at end of line or file */
310 return_status = push_values_array (&av, blank_line_seen);
311 /* drop */
312
313 return_error:
314 SLfree (value);
315 free_values_array(&av);
316 if (line != NULL)
317 SLang_free_slstring (line);
318 return return_status;
319 }
320
free_csv_type(CSV_Type * csv)321 static void free_csv_type (CSV_Type *csv)
322 {
323 if (csv == NULL)
324 return;
325 if (csv->callback_data != NULL) SLang_free_anytype (csv->callback_data);
326 if (csv->read_callback != NULL) SLang_free_function (csv->read_callback);
327 SLfree ((char *)csv);
328 }
329
pop_csv_type(SLang_MMT_Type ** mmtp)330 static CSV_Type *pop_csv_type (SLang_MMT_Type **mmtp)
331 {
332 SLang_MMT_Type *mmt;
333
334 if (NULL == (mmt = SLang_pop_mmt (CSV_Type_Id)))
335 {
336 *mmtp = NULL;
337 return NULL;
338 }
339 *mmtp = mmt;
340 return (CSV_Type *)SLang_object_from_mmt (mmt);
341 }
342
343 /* Usage: obj = cvs_decoder_new (&read_callback, callback_data, delim, quote, flags) */
new_csv_decoder_intrin(void)344 static void new_csv_decoder_intrin (void)
345 {
346 CSV_Type *csv;
347 SLang_MMT_Type *mmt;
348
349 if (NULL == (csv = (CSV_Type *)SLmalloc(sizeof(CSV_Type))))
350 return;
351 memset ((char *)csv, 0, sizeof(CSV_Type));
352
353 if ((-1 == SLang_pop_int (&csv->flags))
354 ||(-1 == SLang_pop_char (&csv->quotechar))
355 || (-1 == SLang_pop_char (&csv->delimchar))
356 || (-1 == check_special_chars (csv))
357 || (-1 == SLang_pop_anytype (&csv->callback_data))
358 || (NULL == (csv->read_callback = SLang_pop_function ()))
359 || (NULL == (mmt = SLang_create_mmt (CSV_Type_Id, (VOID_STAR)csv))))
360 {
361 free_csv_type (csv);
362 return;
363 }
364
365 if (-1 == SLang_push_mmt (mmt))
366 SLang_free_mmt (mmt);
367 }
368
decode_csv_row_intrin(void)369 static void decode_csv_row_intrin (void)
370 {
371 CSV_Type *csv;
372 SLang_MMT_Type *mmt;
373 int flags = 0;
374 int has_flags = 0;
375
376 if (SLang_Num_Function_Args == 2)
377 {
378 if (-1 == SLang_pop_int (&flags))
379 return;
380
381 has_flags = 1;
382 }
383 if (NULL == (csv = pop_csv_type (&mmt)))
384 return;
385
386 if (has_flags == 0)
387 flags = csv->flags;
388
389 (void) decode_csv_row (csv, flags);
390 SLang_free_mmt (mmt);
391 }
392
393 /* returns a malloced string */
csv_encode(CSV_Type * csv,char ** fields,SLuindex_Type nfields,int flags)394 static char *csv_encode (CSV_Type *csv,
395 char **fields, SLuindex_Type nfields,
396 int flags)
397 {
398 char *encoded_str, *s;
399 size_t size;
400 SLuindex_Type i;
401 char delimchar, quotechar;
402 int quote_some, quote_all;
403 char *fieldflags;
404
405 delimchar = csv->delimchar;
406 quotechar = csv->quotechar;
407 quote_some = flags & (CSV_QUOTE_SOME|CSV_QUOTE_ALL);
408 quote_all = flags & CSV_QUOTE_ALL;
409
410 size = 0;
411 if (nfields > 1)
412 size += nfields-1; /* for delimiters */
413 size += 3; /* for CRLF\0 */
414
415 fieldflags = (char *)SLmalloc(nfields+1);
416 if (fieldflags == NULL)
417 return NULL;
418
419 for (i = 0; i < nfields; i++)
420 {
421 char ch, *f, *field = fields[i];
422 int needs_quote = 0;
423
424 fieldflags[i] = 0;
425 if ((field == NULL) || (*field == 0))
426 {
427 if (quote_some)
428 {
429 fieldflags[i] = 1;
430 size += 2;
431 }
432 continue;
433 }
434 f = field;
435 while ((ch = *f++) != 0)
436 {
437 size++;
438 if (ch == quotechar)
439 {
440 needs_quote=1;
441 size++;
442 continue;
443 }
444 if (ch == delimchar)
445 {
446 needs_quote = 1;
447 continue;
448 }
449
450 if ((unsigned char)ch > ' ')
451 continue;
452
453 if (ch == '\n')
454 {
455 #if 0
456 /* Do not insert a \r-- excel cannot deal with \r\n in a field.
457 * It requires \r\n only at the end of the CSV line
458 */
459 size++; /* for \r */
460 #endif
461 needs_quote = 1;
462 continue;
463 }
464 if (quote_some)
465 needs_quote = 1;
466 }
467
468 if (needs_quote || quote_all)
469 {
470 fieldflags[i] = 1;
471 size += 2;
472 }
473 }
474
475 if (NULL == (encoded_str = (char *)SLmalloc (size)))
476 {
477 SLfree (fieldflags);
478 return NULL;
479 }
480 s = encoded_str;
481
482 i = 0;
483 while (i < nfields)
484 {
485 char ch, *f, *field;
486 int needs_quote;
487
488 needs_quote = fieldflags[i];
489 field = fields[i];
490 i++;
491
492 if ((i > 1) && (i <= nfields))
493 *s++ = delimchar;
494
495 if (needs_quote) *s++ = quotechar;
496
497 if ((field == NULL) || (*field == 0))
498 {
499 if (needs_quote)
500 *s++ = quotechar;
501 continue;
502 }
503
504 f = field;
505 while ((ch = *f++) != 0)
506 {
507 if (ch == quotechar)
508 {
509 *s++ = ch;
510 *s++ = ch;
511 continue;
512 }
513
514 if (ch == '\n')
515 {
516 /* *s++ = '\r'; --- See above comment about excel and \r\n in a field*/
517 *s++ = ch;
518 continue;
519 }
520
521 *s++ = ch;
522 }
523 if (needs_quote)
524 *s++ = quotechar;
525 }
526
527 *s++ = '\r';
528 *s++ = '\n';
529 *s = 0;
530
531 SLfree (fieldflags);
532 return encoded_str;
533 }
534
encode_csv_row_intrin(void)535 static void encode_csv_row_intrin (void)
536 {
537 SLang_Array_Type *at;
538 CSV_Type *csv;
539 SLang_MMT_Type *mmt;
540 int flags;
541 int has_flags;
542 char *str;
543
544 if (SLang_Num_Function_Args == 3)
545 {
546 if (-1 == SLang_pop_int (&flags))
547 return;
548 has_flags = 1;
549 }
550 else has_flags = 0;
551
552 if (-1 == SLang_pop_array_of_type (&at, SLANG_STRING_TYPE))
553 return;
554
555 if (NULL == (csv = pop_csv_type (&mmt)))
556 {
557 SLang_free_array (at);
558 return;
559 }
560
561 if (0 == has_flags)
562 flags = csv->flags;
563
564 str = csv_encode (csv, (char **)at->data, at->num_elements, flags);
565 SLang_free_mmt (mmt);
566 SLang_free_array (at);
567 (void) SLang_push_malloced_string (str);
568 }
569
new_csv_encoder_intrin(void)570 static void new_csv_encoder_intrin (void)
571 {
572 CSV_Type *csv;
573 SLang_MMT_Type *mmt;
574
575 if (NULL == (csv = (CSV_Type *)SLmalloc(sizeof(CSV_Type))))
576 return;
577 memset ((char *)csv, 0, sizeof(CSV_Type));
578
579 if ((-1 == SLang_pop_int (&csv->flags))
580 ||(-1 == SLang_pop_char (&csv->quotechar))
581 || (-1 == SLang_pop_char (&csv->delimchar))
582 || (-1 == check_special_chars (csv))
583 || (NULL == (mmt = SLang_create_mmt (CSV_Type_Id, (VOID_STAR)csv))))
584 {
585 free_csv_type (csv);
586 return;
587 }
588
589 if (-1 == SLang_push_mmt (mmt))
590 SLang_free_mmt (mmt);
591 }
592
593 #define DUMMY_CSV_TYPE ((SLtype)-1)
594 static SLang_Intrin_Fun_Type Module_Intrinsics [] =
595 {
596 MAKE_INTRINSIC_0("_csv_decoder_new", new_csv_decoder_intrin, SLANG_VOID_TYPE),
597 MAKE_INTRINSIC_0("_csv_decode_row", decode_csv_row_intrin, SLANG_VOID_TYPE),
598 MAKE_INTRINSIC_0("_csv_encoder_new", new_csv_encoder_intrin, SLANG_VOID_TYPE),
599 MAKE_INTRINSIC_0("_csv_encode_row", encode_csv_row_intrin, SLANG_VOID_TYPE),
600 SLANG_END_INTRIN_FUN_TABLE
601 };
602
603 static SLang_IConstant_Type Module_Constants [] =
604 {
605 MAKE_ICONSTANT("CSV_SKIP_BLANK_ROWS", CSV_SKIP_BLANK_ROWS),
606 MAKE_ICONSTANT("CSV_STOP_BLANK_ROWS", CSV_STOP_ON_BLANK_ROWS),
607 MAKE_ICONSTANT("CSV_QUOTE_SOME", CSV_QUOTE_SOME),
608 MAKE_ICONSTANT("CSV_QUOTE_ALL", CSV_QUOTE_ALL),
609 SLANG_END_ICONST_TABLE
610 };
611
destroy_csv(SLtype type,VOID_STAR f)612 static void destroy_csv (SLtype type, VOID_STAR f)
613 {
614 (void) type;
615 free_csv_type ((CSV_Type *)f);
616 }
617
register_csv_type(void)618 static int register_csv_type (void)
619 {
620 SLang_Class_Type *cl;
621
622 if (CSV_Type_Id != 0)
623 return 0;
624
625 if (NULL == (cl = SLclass_allocate_class ("CSV_Type")))
626 return -1;
627
628 if (-1 == SLclass_set_destroy_function (cl, destroy_csv))
629 return -1;
630
631 /* By registering as SLANG_VOID_TYPE, slang will dynamically allocate a
632 * type.
633 */
634 if (-1 == SLclass_register_class (cl, SLANG_VOID_TYPE, sizeof (CSV_Type), SLANG_CLASS_TYPE_MMT))
635 return -1;
636
637 CSV_Type_Id = SLclass_get_class_id (cl);
638 if (-1 == SLclass_patch_intrin_fun_table1 (Module_Intrinsics, DUMMY_CSV_TYPE, CSV_Type_Id))
639 return -1;
640
641 return 0;
642 }
643
init_csv_module_ns(char * ns_name)644 int init_csv_module_ns (char *ns_name)
645 {
646 SLang_NameSpace_Type *ns = SLns_create_namespace (ns_name);
647 if (ns == NULL)
648 return -1;
649
650 if (-1 == register_csv_type ())
651 return -1;
652
653 if ((-1 == SLns_add_intrin_fun_table (ns, Module_Intrinsics, NULL))
654 || (-1 == SLns_add_iconstant_table (ns, Module_Constants, NULL)))
655 return -1;
656
657 return 0;
658 }
659
660 /* This function is optional */
deinit_csv_module(void)661 void deinit_csv_module (void)
662 {
663 }
664