1 #include <stdio.h>
2 #include <string.h>
3 #include <slang.h>
4 
5 SLANG_MODULE(csv);
6 
7 static int CSV_Type_Id = 0;
8 
9 typedef struct _CSV_Type CSV_Type;
10 struct _CSV_Type
11 {
12    char delimchar;
13    char quotechar;
14    SLang_Name_Type *read_callback;
15    SLang_Any_Type *callback_data;
16 #define CSV_SKIP_BLANK_ROWS	0x01
17 #define CSV_STOP_ON_BLANK_ROWS	0x02
18 #define BLANK_ROW_BEHAVIOR	(CSV_SKIP_BLANK_ROWS|CSV_STOP_ON_BLANK_ROWS)
19 #define CSV_QUOTE_SOME		0x04
20 #define CSV_QUOTE_ALL		0x08
21    int flags;
22 };
23 
check_special_chars(CSV_Type * csv)24 static int check_special_chars (CSV_Type *csv)
25 {
26    if (csv->delimchar == 0) csv->delimchar = ',';
27    if (csv->quotechar == 0) csv->quotechar = '"';
28    return 0;
29 }
30 
execute_read_callback(CSV_Type * csv,char ** sptr)31 static int execute_read_callback (CSV_Type *csv, char **sptr)
32 {
33    char *s;
34 
35    *sptr = NULL;
36 
37    if ((-1 == SLang_start_arg_list ())
38        || (-1 == SLang_push_anytype (csv->callback_data))
39        || (-1 == SLang_end_arg_list ())
40        || (-1 == SLexecute_function (csv->read_callback)))
41      return -1;
42 
43    if (SLang_peek_at_stack () == SLANG_NULL_TYPE)
44      {
45 	(void) SLang_pop_null ();
46 	return 0;
47      }
48 
49    if (-1 == SLang_pop_slstring (&s))
50      return -1;
51 
52    *sptr = s;
53    return 1;
54 }
55 
56 typedef struct
57 {
58    char **values;
59    SLindex_Type num_allocated;
60    SLindex_Type num;
61 }
62 Values_Array_Type;
63 
push_values_array(Values_Array_Type * av,int allow_empty_array)64 static int push_values_array (Values_Array_Type *av, int allow_empty_array)
65 {
66    SLang_Array_Type *at;
67    char **new_values;
68 
69    if (av->num == 0)
70      {
71 	if (allow_empty_array == 0)
72 	  return SLang_push_null ();
73 	SLfree ((char *) av->values);
74 	av->values = NULL;
75      }
76    else
77      {
78 	if (NULL == (new_values = (char **)SLrealloc ((char *)av->values, av->num*sizeof(char *))))
79 	  return -1;
80 	av->values = new_values;
81      }
82 
83    av->num_allocated = av->num;
84    at = SLang_create_array (SLANG_STRING_TYPE, 0, av->values, &av->num, 1);
85 
86    if (at == NULL)
87      return -1;
88 
89    av->num_allocated = 0;
90    av->num = 0;
91    av->values = NULL;
92 
93    return SLang_push_array (at, 1);
94 }
95 
init_values_array_type(Values_Array_Type * av)96 static int init_values_array_type (Values_Array_Type *av)
97 {
98    memset ((char *)av, 0, sizeof(Values_Array_Type));
99    return 0;
100 }
101 
free_values_array(Values_Array_Type * av)102 static void free_values_array (Values_Array_Type *av)
103 {
104    SLindex_Type i, num;
105    char **values;
106 
107    if (NULL == (values = av->values))
108      return;
109    num = av->num;
110    for (i = 0; i < num; i++)
111      SLang_free_slstring (values[i]);
112    SLfree ((char *)values);
113 }
114 
store_value(Values_Array_Type * va,char * value)115 static int store_value (Values_Array_Type *va, char *value)
116 {
117    SLindex_Type num_allocated;
118 
119    num_allocated = va->num_allocated;
120    if (num_allocated == va->num)
121      {
122 	char **values;
123 	num_allocated += 256;
124 	values = (char **)SLrealloc ((char *)va->values, num_allocated*sizeof(char *));
125 	if (values == NULL)
126 	  return -1;
127 	va->values = values;
128 	va->num_allocated = num_allocated;
129      }
130    if (NULL == (va->values[va->num] = SLang_create_slstring (value)))
131      return -1;
132 
133    va->num++;
134    return 0;
135 }
136 
137 #define NEXT_CHAR(ch) \
138    while (do_read \
139 	  || (0 == (ch = line[line_ofs++])) \
140 	  || (ch == '\r')) \
141    { \
142       if ((do_read == 0) && (ch == '\r') && (line[line_ofs] == '\n')) \
143 	{ \
144 	   line_ofs++; \
145 	   ch = '\n'; \
146 	   break; \
147 	} \
148       SLang_free_slstring (line); \
149       line = NULL; \
150       status = execute_read_callback (csv, &line); \
151       do_read = 0; \
152       if (status == -1) \
153 	goto return_error; \
154       line_ofs = 0; \
155       if (status == 0) \
156 	{ \
157 	   ch = 0; \
158 	   break; \
159 	} \
160    }
161 
decode_csv_row(CSV_Type * csv,int flags)162 static int decode_csv_row (CSV_Type *csv, int flags)
163 {
164    char *line;
165    size_t line_ofs;
166    char *value;
167    size_t value_size, value_ofs;
168    char delimchar, quotechar;
169    int return_status;
170    Values_Array_Type av;
171    int do_read, in_quote;
172    int blank_line_seen;
173    int is_quoted;
174 
175    if (NULL == csv->read_callback)
176      {
177 	SLang_verror (SL_InvalidParm_Error, "CSV decoder object has no read callback function");
178 	return -1;
179      }
180 
181    if (-1 == init_values_array_type (&av))
182      return -1;
183 
184    delimchar = csv->delimchar;
185    quotechar = csv->quotechar;
186    value_ofs = line_ofs = 0;
187    value_size = 0;
188    value = NULL;
189    line = NULL;
190    do_read = 1;
191 
192    in_quote = 0;
193    return_status = -1;
194    blank_line_seen = 0;
195    is_quoted = 0;
196    while (1)
197      {
198 	int status;
199 	char ch;
200 
201 	if (value_ofs == value_size)
202 	  {
203 	     char *new_value;
204 
205 	     if (value_size < 64)
206 	       value_size += 32;
207 	     else if (value_size < 8192)
208 	       value_size *= 2;
209 	     else value_size += 8192;
210 
211 	     new_value = (char *)SLrealloc (value, value_size);
212 	     if (new_value == NULL)
213 	       goto return_error;
214 	     value = new_value;
215 	  }
216 
217 	NEXT_CHAR(ch)
218 
219 	if ((ch == quotechar) && quotechar)
220 	  {
221 	     if (in_quote)
222 	       {
223 		  NEXT_CHAR(ch)
224 		  if (ch == quotechar)
225 		    {
226 		       value[value_ofs++] = ch;
227 		       continue;
228 		    }
229 
230 		  if ((ch != delimchar) && (ch != 0) && (ch != '\n'))
231 		    {
232 		       SLang_verror (SL_Data_Error, "Expecting a delimiter after an end-quote character in field #%ld",
233 				    (long)av.num+1);
234 		       goto return_error;
235 		    }
236 		  in_quote = 0;
237 		  /* drop */
238 	       }
239 	     else if (value_ofs != 0)
240 	       {
241 		  SLang_verror (SL_Data_Error, "Misplaced quote character inside csv field #%ld",
242 				(long)av.num+1);
243 		  goto return_error;
244 	       }
245 	     else
246 	       {
247 		  in_quote = 1;
248 		  is_quoted = 1;
249 		  continue;
250 	       }
251 	  }
252 
253 	if (ch == delimchar)
254 	  {
255 	     if (in_quote)
256 	       {
257 		  value[value_ofs++] = ch;
258 		  continue;
259 	       }
260 	     value[value_ofs] = 0;
261 	     if (-1 == store_value (&av, value))
262 	       goto return_error;
263 	     value_ofs = 0;
264 	     continue;
265 	  }
266 	if ((ch == 0) || (ch == '\n'))
267 	  {
268 	     if (in_quote)
269 	       {
270 		  if (ch == '\n')
271 		    {
272 		       value[value_ofs++] = ch;
273 		       do_read = 1;
274 		       continue;
275 		    }
276 		  SLang_verror (SL_Data_Error, "No closing quote seen parsing CSV data field #%ld",
277 				(long)av.num+1);
278 		  goto return_error;
279 	       }
280 
281 	     if ((ch == '\n') || (av.num != 0) || (value_ofs > 0))
282 	       {
283 		  if ((is_quoted == 0)
284 		      && (ch == '\n') && (av.num == 0) && (value_ofs == 0))
285 		    {
286 		       /* blank line */
287 		       int blank_line_behavior = (flags & BLANK_ROW_BEHAVIOR);
288 		       if (blank_line_behavior == CSV_SKIP_BLANK_ROWS)
289 			 {
290 			    do_read = 1;
291 			    continue;
292 			 }
293 		       if (blank_line_behavior == CSV_STOP_ON_BLANK_ROWS)
294 			 {
295 			    blank_line_seen = 1;
296 			    break;
297 			 }
298 		    }
299 		  value[value_ofs] = 0;
300 		  if (-1 == store_value (&av, value))
301 		    goto return_error;
302 	       }
303 	     break;		       /* done */
304 	  }
305 
306 	value[value_ofs++] = ch;
307      }
308 
309    /* Get here if at end of line or file */
310    return_status = push_values_array (&av, blank_line_seen);
311    /* drop */
312 
313 return_error:
314    SLfree (value);
315    free_values_array(&av);
316    if (line != NULL)
317      SLang_free_slstring (line);
318    return return_status;
319 }
320 
free_csv_type(CSV_Type * csv)321 static void free_csv_type (CSV_Type *csv)
322 {
323    if (csv == NULL)
324      return;
325    if (csv->callback_data != NULL) SLang_free_anytype (csv->callback_data);
326    if (csv->read_callback != NULL) SLang_free_function (csv->read_callback);
327    SLfree ((char *)csv);
328 }
329 
pop_csv_type(SLang_MMT_Type ** mmtp)330 static CSV_Type *pop_csv_type (SLang_MMT_Type **mmtp)
331 {
332    SLang_MMT_Type *mmt;
333 
334    if (NULL == (mmt = SLang_pop_mmt (CSV_Type_Id)))
335      {
336 	*mmtp = NULL;
337 	return NULL;
338      }
339    *mmtp = mmt;
340    return (CSV_Type *)SLang_object_from_mmt (mmt);
341 }
342 
343 /* Usage: obj = cvs_decoder_new (&read_callback, callback_data, delim, quote, flags) */
new_csv_decoder_intrin(void)344 static void new_csv_decoder_intrin (void)
345 {
346    CSV_Type *csv;
347    SLang_MMT_Type *mmt;
348 
349    if (NULL == (csv = (CSV_Type *)SLmalloc(sizeof(CSV_Type))))
350      return;
351    memset ((char *)csv, 0, sizeof(CSV_Type));
352 
353    if ((-1 == SLang_pop_int (&csv->flags))
354        ||(-1 == SLang_pop_char (&csv->quotechar))
355        || (-1 == SLang_pop_char (&csv->delimchar))
356        || (-1 == check_special_chars (csv))
357        || (-1 == SLang_pop_anytype (&csv->callback_data))
358        || (NULL == (csv->read_callback = SLang_pop_function ()))
359        || (NULL == (mmt = SLang_create_mmt (CSV_Type_Id, (VOID_STAR)csv))))
360      {
361 	free_csv_type (csv);
362 	return;
363      }
364 
365    if (-1 == SLang_push_mmt (mmt))
366      SLang_free_mmt (mmt);
367 }
368 
decode_csv_row_intrin(void)369 static void decode_csv_row_intrin (void)
370 {
371    CSV_Type *csv;
372    SLang_MMT_Type *mmt;
373    int flags = 0;
374    int has_flags = 0;
375 
376    if (SLang_Num_Function_Args == 2)
377      {
378 	if (-1 == SLang_pop_int (&flags))
379 	  return;
380 
381 	has_flags = 1;
382      }
383    if (NULL == (csv = pop_csv_type (&mmt)))
384      return;
385 
386    if (has_flags == 0)
387      flags = csv->flags;
388 
389    (void) decode_csv_row (csv, flags);
390    SLang_free_mmt (mmt);
391 }
392 
393 /* returns a malloced string */
csv_encode(CSV_Type * csv,char ** fields,SLuindex_Type nfields,int flags)394 static char *csv_encode (CSV_Type *csv,
395 			 char **fields, SLuindex_Type nfields,
396 			 int flags)
397 {
398    char *encoded_str, *s;
399    size_t size;
400    SLuindex_Type i;
401    char delimchar, quotechar;
402    int quote_some, quote_all;
403    char *fieldflags;
404 
405    delimchar = csv->delimchar;
406    quotechar = csv->quotechar;
407    quote_some = flags & (CSV_QUOTE_SOME|CSV_QUOTE_ALL);
408    quote_all = flags & CSV_QUOTE_ALL;
409 
410    size = 0;
411    if (nfields > 1)
412      size += nfields-1;		       /* for delimiters */
413    size += 3;			       /* for CRLF\0 */
414 
415    fieldflags = (char *)SLmalloc(nfields+1);
416    if (fieldflags == NULL)
417      return NULL;
418 
419    for (i = 0; i < nfields; i++)
420      {
421 	char ch, *f, *field = fields[i];
422 	int needs_quote = 0;
423 
424 	fieldflags[i] = 0;
425 	if ((field == NULL) || (*field == 0))
426 	  {
427 	     if (quote_some)
428 	       {
429 		  fieldflags[i] = 1;
430 		  size += 2;
431 	       }
432 	     continue;
433 	  }
434 	f = field;
435 	while ((ch = *f++) != 0)
436 	  {
437 	     size++;
438 	     if (ch == quotechar)
439 	       {
440 		  needs_quote=1;
441 		  size++;
442 		  continue;
443 	       }
444 	     if (ch == delimchar)
445 	       {
446 		  needs_quote = 1;
447 		  continue;
448 	       }
449 
450 	     if ((unsigned char)ch > ' ')
451 	       continue;
452 
453 	     if (ch == '\n')
454 	       {
455 #if 0
456 		  /* Do not insert a \r-- excel cannot deal with \r\n in a field.
457 		   * It requires \r\n only at the end of the CSV line
458 		   */
459 		  size++;	       /* for \r */
460 #endif
461 		  needs_quote = 1;
462 		  continue;
463 	       }
464 	     if (quote_some)
465 	       needs_quote = 1;
466 	  }
467 
468 	if (needs_quote || quote_all)
469 	  {
470 	     fieldflags[i] = 1;
471 	     size += 2;
472 	  }
473      }
474 
475    if (NULL == (encoded_str = (char *)SLmalloc (size)))
476      {
477 	SLfree (fieldflags);
478 	return NULL;
479      }
480    s = encoded_str;
481 
482    i = 0;
483    while (i < nfields)
484      {
485 	char ch, *f, *field;
486 	int needs_quote;
487 
488 	needs_quote = fieldflags[i];
489 	field = fields[i];
490 	i++;
491 
492 	if ((i > 1) && (i <= nfields))
493 	  *s++ = delimchar;
494 
495 	if (needs_quote) *s++ = quotechar;
496 
497 	if ((field == NULL) || (*field == 0))
498 	  {
499 	     if (needs_quote)
500 	       *s++ = quotechar;
501 	     continue;
502 	  }
503 
504 	f = field;
505 	while ((ch = *f++) != 0)
506 	  {
507 	     if (ch == quotechar)
508 	       {
509 		  *s++ = ch;
510 		  *s++ = ch;
511 		  continue;
512 	       }
513 
514 	     if (ch == '\n')
515 	       {
516 		  /* *s++ = '\r'; --- See above comment about excel and \r\n in a field*/
517 		  *s++ = ch;
518 		  continue;
519 	       }
520 
521 	     *s++ = ch;
522 	  }
523 	if (needs_quote)
524 	  *s++ = quotechar;
525      }
526 
527    *s++ = '\r';
528    *s++ = '\n';
529    *s = 0;
530 
531    SLfree (fieldflags);
532    return encoded_str;
533 }
534 
encode_csv_row_intrin(void)535 static void encode_csv_row_intrin (void)
536 {
537    SLang_Array_Type *at;
538    CSV_Type *csv;
539    SLang_MMT_Type *mmt;
540    int flags;
541    int has_flags;
542    char *str;
543 
544    if (SLang_Num_Function_Args == 3)
545      {
546 	if (-1 == SLang_pop_int (&flags))
547 	  return;
548 	has_flags = 1;
549      }
550    else has_flags = 0;
551 
552    if (-1 == SLang_pop_array_of_type (&at, SLANG_STRING_TYPE))
553      return;
554 
555    if (NULL == (csv = pop_csv_type (&mmt)))
556      {
557 	SLang_free_array (at);
558 	return;
559      }
560 
561    if (0 == has_flags)
562      flags = csv->flags;
563 
564    str = csv_encode (csv, (char **)at->data, at->num_elements, flags);
565    SLang_free_mmt (mmt);
566    SLang_free_array (at);
567    (void) SLang_push_malloced_string (str);
568 }
569 
new_csv_encoder_intrin(void)570 static void new_csv_encoder_intrin (void)
571 {
572    CSV_Type *csv;
573    SLang_MMT_Type *mmt;
574 
575    if (NULL == (csv = (CSV_Type *)SLmalloc(sizeof(CSV_Type))))
576      return;
577    memset ((char *)csv, 0, sizeof(CSV_Type));
578 
579    if ((-1 == SLang_pop_int (&csv->flags))
580        ||(-1 == SLang_pop_char (&csv->quotechar))
581        || (-1 == SLang_pop_char (&csv->delimchar))
582        || (-1 == check_special_chars (csv))
583        || (NULL == (mmt = SLang_create_mmt (CSV_Type_Id, (VOID_STAR)csv))))
584      {
585 	free_csv_type (csv);
586 	return;
587      }
588 
589    if (-1 == SLang_push_mmt (mmt))
590      SLang_free_mmt (mmt);
591 }
592 
593 #define DUMMY_CSV_TYPE ((SLtype)-1)
594 static SLang_Intrin_Fun_Type Module_Intrinsics [] =
595 {
596    MAKE_INTRINSIC_0("_csv_decoder_new", new_csv_decoder_intrin, SLANG_VOID_TYPE),
597    MAKE_INTRINSIC_0("_csv_decode_row", decode_csv_row_intrin, SLANG_VOID_TYPE),
598    MAKE_INTRINSIC_0("_csv_encoder_new", new_csv_encoder_intrin, SLANG_VOID_TYPE),
599    MAKE_INTRINSIC_0("_csv_encode_row", encode_csv_row_intrin, SLANG_VOID_TYPE),
600    SLANG_END_INTRIN_FUN_TABLE
601 };
602 
603 static SLang_IConstant_Type Module_Constants [] =
604 {
605    MAKE_ICONSTANT("CSV_SKIP_BLANK_ROWS", CSV_SKIP_BLANK_ROWS),
606    MAKE_ICONSTANT("CSV_STOP_BLANK_ROWS", CSV_STOP_ON_BLANK_ROWS),
607    MAKE_ICONSTANT("CSV_QUOTE_SOME", CSV_QUOTE_SOME),
608    MAKE_ICONSTANT("CSV_QUOTE_ALL", CSV_QUOTE_ALL),
609    SLANG_END_ICONST_TABLE
610 };
611 
destroy_csv(SLtype type,VOID_STAR f)612 static void destroy_csv (SLtype type, VOID_STAR f)
613 {
614    (void) type;
615    free_csv_type ((CSV_Type *)f);
616 }
617 
register_csv_type(void)618 static int register_csv_type (void)
619 {
620    SLang_Class_Type *cl;
621 
622    if (CSV_Type_Id != 0)
623      return 0;
624 
625    if (NULL == (cl = SLclass_allocate_class ("CSV_Type")))
626      return -1;
627 
628    if (-1 == SLclass_set_destroy_function (cl, destroy_csv))
629      return -1;
630 
631    /* By registering as SLANG_VOID_TYPE, slang will dynamically allocate a
632     * type.
633     */
634    if (-1 == SLclass_register_class (cl, SLANG_VOID_TYPE, sizeof (CSV_Type), SLANG_CLASS_TYPE_MMT))
635      return -1;
636 
637    CSV_Type_Id = SLclass_get_class_id (cl);
638    if (-1 == SLclass_patch_intrin_fun_table1 (Module_Intrinsics, DUMMY_CSV_TYPE, CSV_Type_Id))
639      return -1;
640 
641    return 0;
642 }
643 
init_csv_module_ns(char * ns_name)644 int init_csv_module_ns (char *ns_name)
645 {
646    SLang_NameSpace_Type *ns = SLns_create_namespace (ns_name);
647    if (ns == NULL)
648      return -1;
649 
650    if (-1 == register_csv_type ())
651      return -1;
652 
653    if ((-1 == SLns_add_intrin_fun_table (ns, Module_Intrinsics, NULL))
654        || (-1 == SLns_add_iconstant_table (ns, Module_Constants, NULL)))
655      return -1;
656 
657    return 0;
658 }
659 
660 /* This function is optional */
deinit_csv_module(void)661 void deinit_csv_module (void)
662 {
663 }
664