1 /*  Copyright (c) 2007-2021 H.Merijn Brand.  All rights reserved.
2  *  Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved.
3  *  This program is free software; you can redistribute it and/or
4  *  modify it under the same terms as Perl itself.
5  */
6 #define PERL_NO_GET_CONTEXT
7 #include <EXTERN.h>
8 #include <perl.h>
9 #include <XSUB.h>
10 #define DPPP_PL_parser_NO_DUMMY
11 #define NEED_utf8_to_uvchr_buf
12 #define NEED_my_snprintf
13 #define NEED_pv_escape
14 #define NEED_pv_pretty
15 #ifndef PERLIO_F_UTF8
16 #  define PERLIO_F_UTF8	0x00008000
17 #  endif
18 #ifndef MAXINT
19 #  define MAXINT ((int)(~(unsigned)0 >> 1))
20 #  endif
21 #include "ppport.h"
22 #define is_utf8_sv(s) is_utf8_string ((U8 *)SvPV_nolen (s), SvCUR (s))
23 
24 #define MAINT_DEBUG	0
25 
26 #define BUFFER_SIZE	1024
27 
28 #define CSV_XS_TYPE_WARN	1
29 #define CSV_XS_TYPE_PV		0
30 #define CSV_XS_TYPE_IV		1
31 #define CSV_XS_TYPE_NV		2
32 
33 /* maximum length for EOL, SEP, and QUOTE - keep in sync with .pm */
34 #define MAX_ATTR_LEN	16
35 
36 #define CSV_FLAGS_QUO		0x0001
37 #define CSV_FLAGS_BIN		0x0002
38 #define CSV_FLAGS_EIF		0x0004
39 #define CSV_FLAGS_MIS		0x0010
40 
41 #define HOOK_ERROR		0x0001
42 #define HOOK_AFTER_PARSE	0x0002
43 #define HOOK_BEFORE_PRINT	0x0004
44 
45 #ifdef __THW_370__
46 /* EBCDIC on os390 z/OS: IS_EBCDIC reads better than __THW_370__ */
47 #define IS_EBCDIC
48 #endif
49 
50 #define CH_TAB		'\t'
51 #define CH_NL		'\n'
52 #define CH_CR		'\r'
53 #define CH_SPACE	' '
54 #define CH_QUO		'"'
55 
56 #ifdef IS_EBCDIC
57 #define CH_DEL		'\007'
58 static unsigned char ec, ebcdic2ascii[256] = {
59     0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f,
60     0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
61     0x10, 0x11, 0x12, 0x13, 0x9d, 0x0a, 0x08, 0x87,
62     0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f,
63     0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b,
64     0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
65     0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
66     0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
67     0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
68     0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
69     0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef,
70     0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
71     0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5,
72     0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
73     0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
74     0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
75     0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
76     0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
77     0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
78     0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4,
79     0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
80     0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae,
81     0xac, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc,
82     0xbd, 0xbe, 0xdd, 0xa8, 0xaf, 0x5d, 0xb4, 0xd7,
83     0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
84     /*          v this 0xa0 really should be 0xad. Needed for UTF = binary */
85     0x48, 0x49, 0xa0, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
86     0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
87     0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff,
88     0x5c, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
89     0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
90     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
91     0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f
92     };
93 #define is_csv_binary(ch) ((((ec = ebcdic2ascii[ch]) < 0x20 || ec >= 0x7f) && ch != CH_TAB) || ch == EOF)
94 #else
95 #define CH_DEL		'\177'
96 #define is_csv_binary(ch) ((ch < CH_SPACE || ch >= CH_DEL) && ch != CH_TAB)
97 #endif
98 #define CH_EOLX		1215
99 #define CH_EOL		*csv->eol
100 #define CH_SEPX		8888
101 #define CH_SEP		*csv->sep
102 #define CH_QUOTEX	8889
103 #define CH_QUOTE	*csv->quo
104 
105 #define useIO_EOF	0x10
106 
107 #define unless(expr)	if (!(expr))
108 
109 #define _is_reftype(f,x) \
110     (f && ((SvGMAGICAL (f) && mg_get (f)) || 1) && SvROK (f) && SvTYPE (SvRV (f)) == x)
111 #define _is_arrayref(f) _is_reftype (f, SVt_PVAV)
112 #define _is_hashref(f)  _is_reftype (f, SVt_PVHV)
113 #define _is_coderef(f)  _is_reftype (f, SVt_PVCV)
114 
115 #define SvSetUndef(sv)	sv_setpvn    (sv, NULL, 0)
116 #define SvSetEmpty(sv)	sv_setpvn_mg (sv, "",   0)
117 
118 #define CSV_XS_SELF					\
119     if (!self || !SvOK (self) || !SvROK (self) ||	\
120 	 SvTYPE (SvRV (self)) != SVt_PVHV)		\
121 	croak ("self is not a hash ref");		\
122     hv = (HV *)SvRV (self)
123 
124 /* Keep in sync with .pm! */
125 #define CACHE_ID_quote_char		0
126 #define CACHE_ID_escape_char		1
127 #define CACHE_ID_sep_char		2
128 #define CACHE_ID_binary			3
129 #define CACHE_ID_keep_meta_info		4
130 #define CACHE_ID_always_quote		5
131 #define CACHE_ID_allow_loose_quotes	6
132 #define CACHE_ID_allow_loose_escapes	7
133 #define CACHE_ID_allow_unquoted_escape	8
134 #define CACHE_ID_allow_whitespace	9
135 #define CACHE_ID_blank_is_undef		10
136 #define CACHE_ID_sep			39
137 #define CACHE_ID_sep_len		38
138 #define CACHE_ID_eol			11
139 #define CACHE_ID_eol_len		12
140 #define CACHE_ID_eol_is_cr		13
141 #define CACHE_ID_quo			15
142 #define CACHE_ID_quo_len		16
143 #define CACHE_ID_verbatim		22
144 #define CACHE_ID_empty_is_undef		23
145 #define CACHE_ID_auto_diag		24
146 #define CACHE_ID_quote_space		25
147 #define CACHE_ID_quote_empty		37
148 #define CACHE_ID__is_bound		26
149 #define CACHE_ID__has_ahead		30
150 #define CACHE_ID_escape_null		31
151 #define CACHE_ID_quote_binary		32
152 #define CACHE_ID_diag_verbose		33
153 #define CACHE_ID_has_error_input	34
154 #define CACHE_ID_decode_utf8		35
155 #define CACHE_ID__has_hooks		36
156 #define CACHE_ID_formula		38
157 #define CACHE_ID_strict			42
158 #define CACHE_ID_skip_empty_rows	43
159 #define CACHE_ID_undef_str		46
160 #define CACHE_ID_comment_str		54
161 #define CACHE_ID_types			62
162 
163 #define	byte	unsigned char
164 #define ulng	unsigned long
165 typedef struct {
166     byte	quote_char;
167     byte	escape_char;
168     byte	fld_idx;
169     byte	binary;
170 
171     byte	keep_meta_info;
172     byte	always_quote;
173     byte	useIO;		/* Also used to indicate EOF */
174     byte	eol_is_cr;
175 
176     byte	allow_loose_quotes;
177     byte	allow_loose_escapes;
178     byte	allow_unquoted_escape;
179     byte	allow_whitespace;
180 
181     byte	blank_is_undef;
182     byte	empty_is_undef;
183     byte	verbatim;
184     byte	auto_diag;
185 
186     byte	quote_space;
187     byte	escape_null;
188     byte	quote_binary;
189     byte	first_safe_char;
190 
191     byte	diag_verbose;
192     byte	has_error_input;
193     byte	decode_utf8;
194     byte	has_hooks;
195 
196     byte	quote_empty;
197     byte	formula;
198     byte	utf8;
199     byte	has_ahead;
200 
201     byte	eolx;
202     byte	strict;
203     short	strict_n;
204 
205     byte	skip_empty_rows;
206 
207     long	is_bound;
208     ulng	recno;
209 
210     byte *	cache;
211 
212     SV *	pself;	/* PL_self, for error_diag */
213     HV *	self;
214     SV *	bound;
215 
216     char *	types;
217 
218     byte	eol_len;
219     byte	sep_len;
220     byte	quo_len;
221     byte	types_len;
222 
223     char *	bptr;
224     SV *	tmp;
225     byte	undef_flg;
226     byte *	undef_str;
227     byte *	comment_str;
228     int		eol_pos;
229     STRLEN	size;
230     STRLEN	used;
231     byte	eol[MAX_ATTR_LEN];
232     byte	sep[MAX_ATTR_LEN];
233     byte	quo[MAX_ATTR_LEN];
234     char	buffer[BUFFER_SIZE];
235     } csv_t;
236 
237 #define bool_opt_def(o,d) \
238     (((svp = hv_fetchs (self, o, FALSE)) && *svp) ? SvTRUE (*svp) : d)
239 #define bool_opt(o) bool_opt_def (o, 0)
240 #define num_opt_def(o,d) \
241     (((svp = hv_fetchs (self, o, FALSE)) && *svp) ? SvIV   (*svp) : d)
242 #define num_opt(o)  num_opt_def  (o, 0)
243 
244 typedef struct {
245     int   xs_errno;
246     char *xs_errstr;
247     } xs_error_t;
248 static const xs_error_t xs_errors[] =  {
249 
250     /* Generic errors */
251     { 1000, "INI - constructor failed"						},
252     { 1001, "INI - sep_char is equal to quote_char or escape_char"		},
253     { 1002, "INI - allow_whitespace with escape_char or quote_char SP or TAB"	},
254     { 1003, "INI - \\r or \\n in main attr not allowed"				},
255     { 1004, "INI - callbacks should be undef or a hashref"			},
256     { 1005, "INI - EOL too long"						},
257     { 1006, "INI - SEP too long"						},
258     { 1007, "INI - QUOTE too long"						},
259     { 1008, "INI - SEP undefined"						},
260 
261     { 1010, "INI - the header is empty"						},
262     { 1011, "INI - the header contains more than one valid separator"		},
263     { 1012, "INI - the header contains an empty field"				},
264     { 1013, "INI - the header contains nun-unique fields"			},
265     { 1014, "INI - header called on undefined stream"				},
266 
267     /* Syntax errors */
268     { 1500, "PRM - Invalid/unsupported argument(s)"				},
269     { 1501, "PRM - The key attribute is passed as an unsupported type"		},
270     { 1502, "PRM - The value attribute is passed without the key attribute"	},
271     { 1503, "PRM - The value attribute is passed as an unsupported type"	},
272 
273     /* Parse errors */
274     { 2010, "ECR - QUO char inside quotes followed by CR not part of EOL"	},
275     { 2011, "ECR - Characters after end of quoted field"			},
276     { 2012, "EOF - End of data in parsing input stream"				},
277     { 2013, "ESP - Specification error for fragments RFC7111"			},
278     { 2014, "ENF - Inconsistent number of fields"				},
279 
280     /*  EIQ - Error Inside Quotes */
281     { 2021, "EIQ - NL char inside quotes, binary off"				},
282     { 2022, "EIQ - CR char inside quotes, binary off"				},
283     { 2023, "EIQ - QUO character not allowed"					},
284     { 2024, "EIQ - EOF cannot be escaped, not even inside quotes"		},
285     { 2025, "EIQ - Loose unescaped escape"					},
286     { 2026, "EIQ - Binary character inside quoted field, binary off"		},
287     { 2027, "EIQ - Quoted field not terminated"					},
288 
289     /* EIF - Error Inside Field */
290     { 2030, "EIF - NL char inside unquoted verbatim, binary off"		},
291     { 2031, "EIF - CR char is first char of field, not part of EOL"		},
292     { 2032, "EIF - CR char inside unquoted, not part of EOL"			},
293     { 2034, "EIF - Loose unescaped quote"					},
294     { 2035, "EIF - Escaped EOF in unquoted field"				},
295     { 2036, "EIF - ESC error"							},
296     { 2037, "EIF - Binary character in unquoted field, binary off"		},
297 
298     /* Combine errors */
299     { 2110, "ECB - Binary character in Combine, binary off"			},
300 
301     /* IO errors */
302     { 2200, "EIO - print to IO failed. See errno"				},
303 
304     /* Hash-Ref errors */
305     { 3001, "EHR - Unsupported syntax for column_names ()"			},
306     { 3002, "EHR - getline_hr () called before column_names ()"			},
307     { 3003, "EHR - bind_columns () and column_names () fields count mismatch"	},
308     { 3004, "EHR - bind_columns () only accepts refs to scalars"		},
309     { 3006, "EHR - bind_columns () did not pass enough refs for parsed fields"	},
310     { 3007, "EHR - bind_columns needs refs to writable scalars"			},
311     { 3008, "EHR - unexpected error in bound fields"				},
312     { 3009, "EHR - print_hr () called before column_names ()"			},
313     { 3010, "EHR - print_hr () called with invalid arguments"			},
314 
315     { 4001, "PRM - The key does not exist as field in the data"			},
316 
317     { 5001, "PRM - The result does not match the output to append to"		},
318     { 5002, "PRM - Unsupported output"						},
319 
320     {    0, "" },
321     };
322 
323 static int last_error = 0;
324 static SV *m_getline, *m_print;
325 
326 #define is_EOL(c) (c == CH_EOLX)
327 
328 #define __is_SEPX(c) (c == CH_SEP && (csv->sep_len == 0 || (\
329     csv->size - csv->used >= (STRLEN)csv->sep_len - 1			&&\
330     !memcmp (csv->bptr + csv->used, csv->sep + 1, csv->sep_len - 1)	&&\
331     (csv->used += csv->sep_len - 1)					&&\
332     (c = CH_SEPX))))
333 #if MAINT_DEBUG > 1
_is_SEPX(unsigned int c,csv_t * csv,int line)334 static byte _is_SEPX (unsigned int c, csv_t *csv, int line) {
335     unsigned int b = __is_SEPX (c);
336     (void)fprintf (stderr, "# %4d - is_SEPX:\t%d (%d)\n", line, b, csv->sep_len);
337     if (csv->sep_len)
338 	(void)fprintf (stderr,
339 	    "# len: %d, siz: %d, usd: %d, c: %03x, *sep: %03x\n",
340 	    csv->sep_len, csv->size, csv->used, c, CH_SEP);
341     return b;
342     } /* _is_SEPX */
343 #define is_SEP(c)  _is_SEPX (c, csv, __LINE__)
344 #else
345 #define is_SEP(c) __is_SEPX (c)
346 #endif
347 
348 #define __is_QUOTEX(c) (CH_QUOTE && c == CH_QUOTE && (csv->quo_len == 0 || (\
349     csv->size - csv->used >= (STRLEN)csv->quo_len - 1			&&\
350     !memcmp (csv->bptr + csv->used, csv->quo + 1, csv->quo_len - 1)	&&\
351     (csv->used += csv->quo_len - 1)					&&\
352     (c = CH_QUOTEX))))
353 #if MAINT_DEBUG > 1
_is_QUOTEX(unsigned int c,csv_t * csv,int line)354 static byte _is_QUOTEX (unsigned int c, csv_t *csv, int line) {
355     unsigned int b = __is_QUOTEX (c);
356     (void)fprintf (stderr, "# %4d - is_QUOTEX:\t%d (%d)\n", line, b, csv->quo_len);
357 
358     if (csv->quo_len)
359 	(void)fprintf (stderr,
360 	    "# len: %d, siz: %d, usd: %d, c: %03x, *quo: %03x\n",
361 	    csv->quo_len, csv->size, csv->used, c, CH_QUOTE);
362     return b;
363     } /* _is_QUOTEX */
364 #define is_QUOTE(c)  _is_QUOTEX (c, csv, __LINE__)
365 #else
366 #define is_QUOTE(c) __is_QUOTEX (c)
367 #endif
368 
369 #define is_whitespace(ch) \
370     ( (ch) != CH_SEP           && \
371       (ch) != CH_QUOTE         && \
372       (ch) != csv->escape_char && \
373     ( (ch) == CH_SPACE || \
374       (ch) == CH_TAB \
375       ) \
376     )
377 
378 #define SvDiag(xse)		cx_SvDiag (aTHX_ xse)
cx_SvDiag(pTHX_ int xse)379 static SV *cx_SvDiag (pTHX_ int xse) {
380     int   i = 0;
381     SV   *err;
382 
383     while (xs_errors[i].xs_errno && xs_errors[i].xs_errno != xse) i++;
384     if ((err = newSVpv (xs_errors[i].xs_errstr, 0))) {
385 	(void)SvUPGRADE (err, SVt_PVIV);
386 	SvIV_set  (err, xse);
387 	SvIOK_on  (err);
388 	}
389     return (err);
390     } /* SvDiag */
391 
392 /* This function should be altered to deal with the optional extra argument
393  * that holds the replacement message */
394 #define SetDiag(csv,xse)	cx_SetDiag (aTHX_ csv, xse)
cx_SetDiag(pTHX_ csv_t * csv,int xse)395 static SV *cx_SetDiag (pTHX_ csv_t *csv, int xse) {
396     dSP;
397     SV *err   = SvDiag (xse);
398     SV *pself = csv->pself;
399 
400     last_error = xse;
401 	(void)hv_store (csv->self, "_ERROR_DIAG",  11, err,          0);
402     if (xse == 0) {
403 	(void)hv_store (csv->self, "_ERROR_POS",   10, newSViv  (0), 0);
404 	(void)hv_store (csv->self, "_ERROR_FLD",   10, newSViv  (0), 0);
405 	(void)hv_store (csv->self, "_ERROR_INPUT", 12, &PL_sv_undef, 0);
406 	csv->has_error_input = 0;
407 	}
408     if (xse == 2012) /* EOF */
409 	(void)hv_store (csv->self, "_EOF",          4, &PL_sv_yes,   0);
410     if (csv->auto_diag) {
411 	unless (_is_hashref (pself))
412 	    pself = newRV_inc ((SV *)csv->self);
413 	ENTER;
414 	PUSHMARK (SP);
415 	XPUSHs (pself);
416 	PUTBACK;
417 	call_pv ("Text::CSV_XS::error_diag", G_VOID | G_DISCARD);
418 	LEAVE;
419 	unless (pself == csv->pself)
420 	    sv_free (pself);
421 	}
422     return (err);
423     } /* SetDiag */
424 
425 #define xs_cache_set(hv,idx,val)	cx_xs_cache_set (aTHX_ hv, idx, val)
cx_xs_cache_set(pTHX_ HV * hv,int idx,SV * val)426 static void cx_xs_cache_set (pTHX_ HV *hv, int idx, SV *val) {
427     SV    **svp;
428     byte   *cache;
429 
430     csv_t   csvs;
431     csv_t  *csv = &csvs;
432 
433     IV      iv;
434     byte    bv;
435     char   *cp  = "\0";
436     STRLEN  len = 0;
437 
438     unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp)
439 	return;
440 
441     cache = (byte *)SvPV_nolen (*svp);
442     (void)memcpy (csv, cache, sizeof (csv_t));
443 
444     if (SvPOK (val))
445 	cp = SvPV (val, len);
446     if (SvIOK (val))
447 	iv = SvIV (val);
448     else if (SvNOK (val))	/* Needed for 5.6.x but safe for 5.8.x+ */
449 	iv = (IV)SvNV (val);	/* uncoverable statement ancient perl required */
450     else
451 	iv = *cp;
452     bv = (unsigned)iv & 0xff;
453 
454     switch (idx) {
455 
456 	/* single char/byte */
457 	case CACHE_ID_sep_char:
458 	    CH_SEP			= *cp;
459 	    csv->sep_len		= 0;
460 	    break;
461 
462 	case CACHE_ID_quote_char:
463 	    CH_QUOTE			= *cp;
464 	    csv->quo_len		= 0;
465 	    break;
466 
467 	case CACHE_ID_escape_char:           csv->escape_char           = *cp; break;
468 
469 	/* boolean/numeric */
470 	case CACHE_ID_binary:                csv->binary                = bv; break;
471 	case CACHE_ID_keep_meta_info:        csv->keep_meta_info        = bv; break;
472 	case CACHE_ID_always_quote:          csv->always_quote          = bv; break;
473 	case CACHE_ID_quote_empty:           csv->quote_empty           = bv; break;
474 	case CACHE_ID_quote_space:           csv->quote_space           = bv; break;
475 	case CACHE_ID_escape_null:           csv->escape_null           = bv; break;
476 	case CACHE_ID_quote_binary:          csv->quote_binary          = bv; break;
477 	case CACHE_ID_decode_utf8:           csv->decode_utf8           = bv; break;
478 	case CACHE_ID_allow_loose_escapes:   csv->allow_loose_escapes   = bv; break;
479 	case CACHE_ID_allow_loose_quotes:    csv->allow_loose_quotes    = bv; break;
480 	case CACHE_ID_allow_unquoted_escape: csv->allow_unquoted_escape = bv; break;
481 	case CACHE_ID_allow_whitespace:      csv->allow_whitespace      = bv; break;
482 	case CACHE_ID_blank_is_undef:        csv->blank_is_undef        = bv; break;
483 	case CACHE_ID_empty_is_undef:        csv->empty_is_undef        = bv; break;
484 	case CACHE_ID_formula:               csv->formula               = bv; break;
485 	case CACHE_ID_strict:                csv->strict                = bv; break;
486 	case CACHE_ID_verbatim:              csv->verbatim              = bv; break;
487 	case CACHE_ID_skip_empty_rows:       csv->skip_empty_rows       = bv; break;
488 	case CACHE_ID_auto_diag:             csv->auto_diag             = bv; break;
489 	case CACHE_ID_diag_verbose:          csv->diag_verbose          = bv; break;
490 	case CACHE_ID__has_ahead:            csv->has_ahead             = bv; break;
491 	case CACHE_ID__has_hooks:            csv->has_hooks             = bv; break;
492 	case CACHE_ID_has_error_input:       csv->has_error_input       = bv; break;
493 
494 	/* a 4-byte IV */
495 	case CACHE_ID__is_bound:             csv->is_bound              = iv; break;
496 
497 	/* string */
498 	case CACHE_ID_sep:
499 	    (void)memcpy (csv->sep, cp, len);
500 	    csv->sep_len = len == 1 ? 0 : len;
501 	    break;
502 
503 	case CACHE_ID_quo:
504 	    (void)memcpy (csv->quo, cp, len);
505 	    csv->quo_len = len == 1 ? 0 : len;
506 	    break;
507 
508 	case CACHE_ID_eol:
509 	    (void)memcpy (csv->eol, cp, len);
510 	    csv->eol_len   = len;
511 	    csv->eol_is_cr = len == 1 && *cp == CH_CR ? 1 : 0;
512 	    break;
513 
514 	case CACHE_ID_undef_str:
515 	    if (*cp) {
516 		csv->undef_str = (byte *)cp;
517 		if (SvUTF8 (val))
518 		    csv->undef_flg = 3;
519 		}
520 	    else {
521 		csv->undef_str = NULL;
522 		csv->undef_flg = 0;
523 		}
524 	    break;
525 
526 	case CACHE_ID_comment_str:
527 	    csv->comment_str = *cp ? (byte *)cp : NULL;
528 	    break;
529 
530 	case CACHE_ID_types:
531 	    if (cp && len) {
532 		csv->types     = cp;
533 		csv->types_len = len;
534 		}
535 	    else {
536 		csv->types     = NULL;
537 		csv->types_len = 0;
538 		}
539 	    break;
540 
541 	default:
542 	    warn ("Unknown cache index %d ignored\n", idx);
543 	}
544 
545     csv->cache = cache;
546     (void)memcpy (cache, csv, sizeof (csv_t));
547     } /* cache_set */
548 
549 #define _pretty_strl(csv)	cx_pretty_str (aTHX_ csv, strlen (csv))
550 #define _pretty_str(csv,xse)	cx_pretty_str (aTHX_ csv, xse)
cx_pretty_str(pTHX_ byte * s,STRLEN l)551 static char *cx_pretty_str (pTHX_ byte *s, STRLEN l) {
552     SV *dsv = sv_2mortal (newSVpvs (""));
553     return (pv_pretty (dsv, (char *)s, l, 0, NULL, NULL,
554 	    (PERL_PV_PRETTY_DUMP | PERL_PV_ESCAPE_UNI_DETECT)));
555     } /* _pretty_str */
556 
557 #define _cache_show_byte(trim,c) \
558     warn ("  %-21s %02x:%3d\n", trim, c, c)
559 #define _cache_show_char(trim,c) \
560     warn ("  %-21s %02x:%s\n",  trim, c, _pretty_str (&c, 1))
561 #define _cache_show_str(trim,l,str) \
562     warn ("  %-21s %02d:%s\n",  trim, l, _pretty_str (str, l))
563 
564 #define xs_cache_diag(hv)	cx_xs_cache_diag (aTHX_ hv)
cx_xs_cache_diag(pTHX_ HV * hv)565 static void cx_xs_cache_diag (pTHX_ HV *hv) {
566     SV   **svp;
567     byte  *cache;
568     csv_t  csvs;
569     csv_t *csv = &csvs;
570 
571     unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp) {
572 	warn ("CACHE: invalid\n");
573 	return;
574 	}
575 
576     cache = (byte *)SvPV_nolen (*svp);
577     (void)memcpy (csv, cache, sizeof (csv_t));
578     warn ("CACHE:\n");
579     _cache_show_char ("quote_char",		CH_QUOTE);
580     _cache_show_char ("escape_char",		csv->escape_char);
581     _cache_show_char ("sep_char",		CH_SEP);
582     _cache_show_byte ("binary",			csv->binary);
583     _cache_show_byte ("decode_utf8",		csv->decode_utf8);
584 
585     _cache_show_byte ("allow_loose_escapes",	csv->allow_loose_escapes);
586     _cache_show_byte ("allow_loose_quotes",	csv->allow_loose_quotes);
587     _cache_show_byte ("allow_unquoted_escape",	csv->allow_unquoted_escape);
588     _cache_show_byte ("allow_whitespace",	csv->allow_whitespace);
589     _cache_show_byte ("always_quote",		csv->always_quote);
590     _cache_show_byte ("quote_empty",		csv->quote_empty);
591     _cache_show_byte ("quote_space",		csv->quote_space);
592     _cache_show_byte ("escape_null",		csv->escape_null);
593     _cache_show_byte ("quote_binary",		csv->quote_binary);
594     _cache_show_byte ("auto_diag",		csv->auto_diag);
595     _cache_show_byte ("diag_verbose",		csv->diag_verbose);
596     _cache_show_byte ("formula",		csv->formula);
597     _cache_show_byte ("strict",			csv->strict);
598     _cache_show_byte ("skip_empty_rows",	csv->skip_empty_rows);
599     _cache_show_byte ("has_error_input",	csv->has_error_input);
600     _cache_show_byte ("blank_is_undef",		csv->blank_is_undef);
601     _cache_show_byte ("empty_is_undef",		csv->empty_is_undef);
602     _cache_show_byte ("has_ahead",		csv->has_ahead);
603     _cache_show_byte ("keep_meta_info",		csv->keep_meta_info);
604     _cache_show_byte ("verbatim",		csv->verbatim);
605 
606     _cache_show_byte ("has_hooks",		csv->has_hooks);
607     _cache_show_byte ("eol_is_cr",		csv->eol_is_cr);
608     _cache_show_byte ("eol_len",		csv->eol_len);
609     _cache_show_str  ("eol",      csv->eol_len,	csv->eol);
610     _cache_show_byte ("sep_len",		csv->sep_len);
611     if (csv->sep_len > 1)
612 	_cache_show_str ("sep",   csv->sep_len,	csv->sep);
613     _cache_show_byte ("quo_len",		csv->quo_len);
614     if (csv->quo_len > 1)
615 	_cache_show_str ("quote", csv->quo_len,	csv->quo);
616     if (csv->types_len)
617 	_cache_show_str ("types", csv->types_len, (byte *)csv->types);
618     else
619 	_cache_show_str ("types", 0, (byte *)"");
620 
621     if (csv->bptr)
622 	_cache_show_str ("bptr", (int)strlen (csv->bptr), (byte *)csv->bptr);
623     if (csv->tmp && SvPOK (csv->tmp)) {
624 	char *s = SvPV_nolen (csv->tmp);
625 	_cache_show_str ("tmp",  (int)strlen (s), (byte *)s);
626 	}
627     } /* xs_cache_diag */
628 
629 #define set_eol_is_cr(csv)	cx_set_eol_is_cr (aTHX_ csv)
cx_set_eol_is_cr(pTHX_ csv_t * csv)630 static void cx_set_eol_is_cr (pTHX_ csv_t *csv) {
631     csv->eol[0]    = CH_CR;
632     csv->eol_is_cr = 1;
633     csv->eol_len   = 1;
634     (void)memcpy (csv->cache, csv, sizeof (csv_t));
635 
636     (void)hv_store (csv->self, "eol",  3, newSVpvn ((char *)csv->eol, 1), 0);
637     } /* set_eol_is_cr */
638 
639 #define SetupCsv(csv,self,pself)	cx_SetupCsv (aTHX_ csv, self, pself)
cx_SetupCsv(pTHX_ csv_t * csv,HV * self,SV * pself)640 static void cx_SetupCsv (pTHX_ csv_t *csv, HV *self, SV *pself) {
641     SV	       **svp;
642     STRLEN	 len;
643     char	*ptr;
644 
645     last_error = 0;
646 
647     if ((svp = hv_fetchs (self, "_CACHE", FALSE)) && *svp) {
648 	byte *cache = (byte *)SvPVX (*svp);
649 	(void)memcpy (csv, cache, sizeof (csv_t));
650 	}
651     else {
652 	SV *sv_cache;
653 
654 	(void)memset (csv, 0, sizeof (csv_t)); /* Reset everything */
655 
656 	csv->self  = self;
657 	csv->pself = pself;
658 
659 	CH_SEP = ',';
660 	if ((svp = hv_fetchs (self, "sep_char",       FALSE)) && *svp && SvOK (*svp))
661 	    CH_SEP = *SvPV (*svp, len);
662 	if ((svp = hv_fetchs (self, "sep",            FALSE)) && *svp && SvOK (*svp)) {
663 	    ptr = SvPV (*svp, len);
664 	    (void)memcpy (csv->sep, ptr, len);
665 	    if (len > 1)
666 		csv->sep_len = len;
667 	    }
668 
669 	CH_QUOTE = '"';
670 	if ((svp = hv_fetchs (self, "quote_char",     FALSE)) && *svp) {
671 	    if (SvOK (*svp)) {
672 		ptr = SvPV (*svp, len);
673 		CH_QUOTE = len ? *ptr : (char)0;
674 		}
675 	    else
676 		CH_QUOTE = (char)0;
677 	    }
678 	if ((svp = hv_fetchs (self, "quote",          FALSE)) && *svp && SvOK (*svp)) {
679 	    ptr = SvPV (*svp, len);
680 	    (void)memcpy (csv->quo, ptr, len);
681 	    if (len > 1)
682 		csv->quo_len = len;
683 	    }
684 
685 	csv->escape_char = '"';
686 	if ((svp = hv_fetchs (self, "escape_char",    FALSE)) && *svp) {
687 	    if (SvOK (*svp)) {
688 		ptr = SvPV (*svp, len);
689 		csv->escape_char = len ? *ptr : (char)0;
690 		}
691 	    else
692 		csv->escape_char = (char)0;
693 	    }
694 
695 	if ((svp = hv_fetchs (self, "eol",            FALSE)) && *svp && SvOK (*svp)) {
696 	    char *eol = SvPV (*svp, len);
697 	    (void)memcpy (csv->eol, eol, len);
698 	    csv->eol_len = len;
699 	    if (len == 1 && *csv->eol == CH_CR)
700 		csv->eol_is_cr = 1;
701 	    }
702 
703 	csv->undef_flg = 0;
704 	if ((svp = hv_fetchs (self, "undef_str",      FALSE)) && *svp && SvOK (*svp)) {
705 		/*if (sv && (SvOK (sv) || (
706 			(SvGMAGICAL (sv) && (mg_get (sv), 1) && SvOK (sv))))) {*/
707 	    csv->undef_str = (byte *)SvPV_nolen (*svp);
708 	    if (SvUTF8 (*svp))
709 		csv->undef_flg = 3;
710 	    }
711 	else
712 	    csv->undef_str = NULL;
713 
714 	if ((svp = hv_fetchs (self, "comment_str",    FALSE)) && *svp && SvOK (*svp))
715 	    csv->comment_str = (byte *)SvPV_nolen (*svp);
716 	else
717 	    csv->comment_str = NULL;
718 
719 	if ((svp = hv_fetchs (self, "_types",         FALSE)) && *svp && SvOK (*svp)) {
720 	    csv->types = SvPV (*svp, len);
721 	    csv->types_len = len;
722 	    }
723 
724 	if ((svp = hv_fetchs (self, "_is_bound",      FALSE)) && *svp && SvOK (*svp))
725 	    csv->is_bound = SvIV (*svp);
726 	if ((svp = hv_fetchs (self, "callbacks",      FALSE)) && _is_hashref (*svp)) {
727 	    HV *cb = (HV *)SvRV (*svp);
728 	    if ((svp = hv_fetchs (cb, "after_parse",  FALSE)) && _is_coderef (*svp))
729 		csv->has_hooks |= HOOK_AFTER_PARSE;
730 	    if ((svp = hv_fetchs (cb, "before_print", FALSE)) && _is_coderef (*svp))
731 		csv->has_hooks |= HOOK_BEFORE_PRINT;
732 	    }
733 
734 	csv->binary			= bool_opt ("binary");
735 	csv->decode_utf8		= bool_opt ("decode_utf8");
736 	csv->always_quote		= bool_opt ("always_quote");
737 	csv->strict			= bool_opt ("strict");
738 	csv->skip_empty_rows		= bool_opt ("skip_empty_rows");
739 	csv->quote_empty		= bool_opt ("quote_empty");
740 	csv->quote_space		= bool_opt_def ("quote_space",  1);
741 	csv->escape_null		= bool_opt_def ("escape_null",  1);
742 	csv->quote_binary		= bool_opt_def ("quote_binary", 1);
743 	csv->allow_loose_quotes		= bool_opt ("allow_loose_quotes");
744 	csv->allow_loose_escapes	= bool_opt ("allow_loose_escapes");
745 	csv->allow_unquoted_escape	= bool_opt ("allow_unquoted_escape");
746 	csv->allow_whitespace		= bool_opt ("allow_whitespace");
747 	csv->blank_is_undef		= bool_opt ("blank_is_undef");
748 	csv->empty_is_undef		= bool_opt ("empty_is_undef");
749 	csv->verbatim			= bool_opt ("verbatim");
750 
751 	csv->auto_diag			= num_opt ("auto_diag");
752 	csv->diag_verbose		= num_opt ("diag_verbose");
753 	csv->keep_meta_info		= num_opt ("keep_meta_info");
754 	csv->formula			= num_opt ("formula");
755 
756 	unless (csv->escape_char) csv->escape_null = 0;
757 
758 	sv_cache = newSVpvn ((char *)csv, sizeof (csv_t));
759 	csv->cache = (byte *)SvPVX (sv_cache);
760 	SvREADONLY_on (sv_cache);
761 
762 	(void)memcpy (csv->cache, csv, sizeof (csv_t));
763 
764 	(void)hv_store (self, "_CACHE", 6, sv_cache, 0);
765 	}
766 
767     csv->utf8 = 0;
768     csv->size = 0;
769     csv->used = 0;
770 
771     /* This is EBCDIC-safe, as it is used after translation */
772     csv->first_safe_char = csv->quote_space ? 0x21 : 0x20;
773 
774     if (csv->is_bound) {
775 	if ((svp = hv_fetchs (self, "_BOUND_COLUMNS", FALSE)) && _is_arrayref (*svp))
776 	    csv->bound = *svp;
777 	else
778 	    csv->is_bound = 0;
779 	}
780 
781     csv->eol_pos = -1;
782     csv->eolx = csv->eol_len
783 	? csv->verbatim || csv->eol_len >= 2
784 	    ? 1
785 	    : csv->eol[0] == CH_CR || csv->eol[0] == CH_NL
786 		? 0
787 		: 1
788 	: 0;
789     if (csv->sep_len > 1 && is_utf8_string ((U8 *)(csv->sep), csv->sep_len))
790 	csv->utf8 = 1;
791     if (csv->quo_len > 1 && is_utf8_string ((U8 *)(csv->quo), csv->quo_len))
792 	csv->utf8 = 1;
793     } /* SetupCsv */
794 
795 #define Print(csv,dst)		cx_Print (aTHX_ csv, dst)
cx_Print(pTHX_ csv_t * csv,SV * dst)796 static int cx_Print (pTHX_ csv_t *csv, SV *dst) {
797     int result;
798     int keep = 0;
799 
800     if (csv->useIO) {
801 	SV *tmp = sv_2mortal (newSVpvn (csv->buffer, csv->used));
802 	dSP;
803 	PUSHMARK (sp);
804 	EXTEND (sp, 2);
805 	PUSHs ((dst));
806 	if (csv->utf8) {
807 	    STRLEN	 len;
808 	    char	*ptr;
809 	    int		 j;
810 
811 	    ptr = SvPV (tmp, len);
812 	    while (len > 0 && !is_utf8_sv (tmp) && keep < 16) {
813 		ptr[--len] = (char)0;
814 		SvCUR_set (tmp, len);
815 		keep++;
816 		}
817 	    for (j = 0; j < keep; j++)
818 		csv->buffer[j] = csv->buffer[csv->used - keep + j];
819 	    SvUTF8_on (tmp);
820 	    }
821 	PUSHs (tmp);
822 	PUTBACK;
823 	result = call_sv (m_print, G_METHOD);
824 	SPAGAIN;
825 	if (result) {
826 	    result = POPi;
827 	    unless (result)
828 		(void)SetDiag (csv, 2200);
829 	    }
830 	PUTBACK;
831 	}
832     else {
833 	sv_catpvn (SvRV (dst), csv->buffer, csv->used);
834 	result = TRUE;
835 	}
836     if (csv->utf8 && !csv->useIO && csv->decode_utf8
837 		  && SvROK (dst) && is_utf8_sv (SvRV (dst)))
838 	SvUTF8_on (SvRV (dst));
839     csv->used = keep;
840     return result;
841     } /* Print */
842 
843 #define CSV_PUT(csv,dst,c) {				\
844     if ((csv)->used == sizeof ((csv)->buffer) - 1) {	\
845 	unless (Print ((csv), (dst)))			\
846 	    return FALSE;				\
847 	}						\
848     (csv)->buffer[(csv)->used++] = (c);			\
849     }
850 
851 #define bound_field(csv,i,keep)	cx_bound_field (aTHX_ csv, i, keep)
cx_bound_field(pTHX_ csv_t * csv,SSize_t i,int keep)852 static SV *cx_bound_field (pTHX_ csv_t *csv, SSize_t i, int keep) {
853     SV *sv = csv->bound;
854     AV *av;
855 
856     /* fprintf (stderr, "# New bind %d/%d\n", i, csv->is_bound);\ */
857     if (i >= csv->is_bound) {
858 	(void)SetDiag (csv, 3006);
859 	return (NULL);
860 	}
861 
862     if (sv && SvROK (sv)) {
863 	av = (AV *)(SvRV (sv));
864 	/* fprintf (stderr, "# Bind %d/%d/%d\n", i, csv->is_bound, av_len (av)); */
865 	sv = *av_fetch (av, i, FALSE);
866 	if (sv && SvROK (sv)) {
867 	    sv = SvRV (sv);
868 	    if (keep)
869 		return (sv);
870 
871 	    unless (SvREADONLY (sv)) {
872 		SvSetEmpty (sv);
873 		return (sv);
874 		}
875 	    }
876 	}
877     (void)SetDiag (csv, 3008);
878     return (NULL);
879     } /* bound_field */
880 
881 #define was_quoted(mf,idx)	cx_was_quoted (aTHX_ mf, idx)
cx_was_quoted(pTHX_ AV * mf,int idx)882 static int cx_was_quoted (pTHX_ AV *mf, int idx) {
883     SV **x = av_fetch (mf, idx, FALSE);
884     return (x && SvIOK (*x) && SvIV (*x) & CSV_FLAGS_QUO ? 1 : 0);
885     } /* was_quoted */
886 
887 #define _formula(csv,sv,len,f) cx_formula (aTHX_ csv, sv, len, f)
cx_formula(pTHX_ csv_t * csv,SV * sv,STRLEN * len,int f)888 static char *cx_formula (pTHX_ csv_t *csv, SV *sv, STRLEN *len, int f) {
889 
890     int fa = csv->formula;
891 
892     if (fa == 1) die   ("Formulas are forbidden\n");
893     if (fa == 2) croak ("Formulas are forbidden\n");
894 
895     if (fa == 3) {
896 	char *ptr = SvPV_nolen (sv);
897 	char  rec[40];
898 	char  field[128];
899 	SV  **svp;
900 
901 	if (csv->recno) (void)sprintf (rec, " in record %lu", csv->recno + 1);
902 	else           *rec = (char)0;
903 
904 	*field = (char)0;
905 	if ((svp = hv_fetchs (csv->self, "_COLUMN_NAMES", FALSE)) && _is_arrayref (*svp)) {
906 	    AV *avp = (AV *)SvRV (*svp);
907 	    if (avp && av_len (avp) >= (f - 1)) {
908 		SV **fnm = av_fetch (avp, f - 1, FALSE);
909 		if (fnm && *fnm && SvOK (*fnm))
910 		    (void)sprintf (field, " (column: '%.100s')", SvPV_nolen (*fnm));
911 		}
912 	    }
913 
914 	warn ("Field %d%s%s contains formula '%s'\n", f, field, rec, ptr);
915 	return ptr;
916 	}
917 
918     if (len) *len = 0;
919 
920     if (fa == 4) {
921 	unless (SvREADONLY (sv)) SvSetEmpty (sv);
922 	return "";
923 	}
924 
925     if (fa == 5) {
926 	unless (SvREADONLY (sv)) SvSetUndef (sv);
927 	return NULL;
928 	}
929 
930     if (fa == 6) {
931 	int result;
932 	SV **svp = hv_fetchs (csv->self, "_FORMULA_CB", FALSE);
933 	if (svp && _is_coderef (*svp)) {
934 	    dSP;
935 	    ENTER;
936 	    SAVE_DEFSV; /* local $_ */
937 	    DEFSV = sv;
938 	    PUSHMARK (SP);
939 	    PUTBACK;
940 	    result = call_sv (*svp, G_SCALAR);
941 	    SPAGAIN;
942 	    if (result)
943 		sv_setsv (sv, POPs);
944 	    PUTBACK;
945 	    LEAVE;
946 	    }
947 	return len ? SvPV (sv, *len) : SvPV_nolen (sv);
948 	}
949 
950     /* So far undefined behavior */
951     return NULL;
952     } /* _formula */
953 
954 #define Combine(csv,dst,fields)	cx_Combine (aTHX_ csv, dst, fields)
cx_Combine(pTHX_ csv_t * csv,SV * dst,AV * fields)955 static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields) {
956     SSize_t i, n;
957     int     bound = 0;
958     int     aq  = (int)csv->always_quote;
959     int     qe  = (int)csv->quote_empty;
960     int     kmi = (int)csv->keep_meta_info;
961     AV     *qm = NULL;
962 
963     n = (IV)av_len (fields);
964     if (n < 0 && csv->is_bound) {
965 	n = csv->is_bound - 1;
966 	bound = 1;
967 	}
968 
969     if (kmi >= 10) {
970 	SV **svp;
971 	if ((svp = hv_fetchs (csv->self, "_FFLAGS", FALSE)) && _is_arrayref (*svp)) {
972 	    AV *avp = (AV *)SvRV (*svp);
973 	    if (avp && av_len (avp) >= n)
974 		qm = avp;
975 	    }
976 	}
977 
978     for (i = 0; i <= n; i++) {
979 	SV     *sv;
980 	STRLEN  len = 0;
981 	char   *ptr = NULL;
982 
983 	if (i > 0) {
984 	    CSV_PUT (csv, dst, CH_SEP);
985 	    if (csv->sep_len) {
986 		int x;
987 		for (x = 1; x < (int)csv->sep_len; x++)
988 		    CSV_PUT (csv, dst, csv->sep[x]);
989 		}
990 	    }
991 
992 	if (bound)
993 	    sv = bound_field (csv, i, 1);
994 	else {
995 	    SV **svp = av_fetch (fields, i, FALSE);
996 	    sv = svp && *svp ? *svp : NULL;
997 	    }
998 
999 	if (sv && (SvOK (sv) || (
1000 		(SvGMAGICAL (sv) && (mg_get (sv), 1) && SvOK (sv))))) {
1001 
1002 	    int	    quoteMe;
1003 
1004 	    ptr = SvPV (sv, len);
1005 
1006 	    if (*ptr == '=' && csv->formula) {
1007 		unless (ptr = _formula (csv, sv, &len, i))
1008 		    continue;
1009 		}
1010 	    if (len == 0)
1011 		quoteMe = aq ? 1 : qe ? 1 : qm ? was_quoted (qm, i) : 0;
1012 	    else {
1013 
1014 		if (SvUTF8 (sv))  {
1015 		    csv->utf8   = 1;
1016 		    csv->binary = 1;
1017 		    }
1018 
1019 		quoteMe = aq ? 1 : qm ? was_quoted (qm, i) : 0;
1020 
1021 		/* Do we need quoting? We do quote, if the user requested
1022 		 * (always_quote), if binary or blank characters are found
1023 		 * and if the string contains quote or escape characters.
1024 		 */
1025 		if (!quoteMe &&
1026 		   ( quoteMe = (!SvIOK (sv) && !SvNOK (sv) && CH_QUOTE))) {
1027 		    char	*ptr2;
1028 		    STRLEN	 l;
1029 
1030 #if MAINT_DEBUG > 4
1031 		    (void)fprintf (stderr, "# Combine:\n");
1032 		    sv_dump (sv);
1033 #endif
1034 		    for (ptr2 = ptr, l = len; l; ++ptr2, --l) {
1035 			byte c = *ptr2;
1036 #ifdef IS_EBCDIC
1037 			byte x = ebcdic2ascii[c];
1038 #if MAINT_DEBUG > 4
1039 			(void)fprintf (stderr, " %02x", x);
1040 #endif
1041 #else
1042 			byte x = c;
1043 #endif
1044 
1045 			if ((CH_QUOTE          && c == CH_QUOTE)          ||
1046 			    (CH_SEP            && c == CH_SEP)            ||
1047 			    (csv->escape_char  && c == csv->escape_char)  ||
1048 			    (csv->quote_binary ? (x >= 0x7f && x <= 0xa0) ||
1049 						  x < csv->first_safe_char
1050 					       :  c == CH_NL || c == CH_CR ||
1051 						 (csv->quote_space && (
1052 						  c == CH_SPACE || c == CH_TAB)))) {
1053 			    /* Binary character */
1054 			    break;
1055 			    }
1056 			}
1057 #if defined(IS_EBCDIC) && MAINT_DEBUG > 4
1058 		    (void)fprintf (stderr, "\n");
1059 #endif
1060 		    quoteMe = (l > 0);
1061 		    }
1062 		}
1063 	    if (quoteMe) {
1064 		CSV_PUT (csv, dst, CH_QUOTE);
1065 		if (csv->quo_len) {
1066 		    int x;
1067 		    for (x = 1; x < (int)csv->quo_len; x++)
1068 			CSV_PUT (csv, dst, csv->quo[x]);
1069 		    }
1070 		}
1071 	    while (len-- > 0) {
1072 		char	c = *ptr++;
1073 		int	e = 0;
1074 
1075 		if (!csv->binary && is_csv_binary (c)) {
1076 		    SvREFCNT_inc (sv);
1077 		    csv->has_error_input = 1;
1078 		    unless (hv_store (csv->self, "_ERROR_INPUT", 12, sv, 0))
1079 			SvREFCNT_dec (sv); /* uncoverable statement memory fail */
1080 		    (void)SetDiag (csv, 2110);
1081 		    return FALSE;
1082 		    }
1083 		if (CH_QUOTE && (byte)c == CH_QUOTE && (csv->quo_len == 0 ||
1084 			 memcmp (ptr, csv->quo +1, csv->quo_len - 1) == 0))
1085 		    e = 1;
1086 		else
1087 		if (c == csv->escape_char && csv->escape_char)
1088 		    e = 1;
1089 		else
1090 		if (c == (char)0          && csv->escape_null) {
1091 		    e = 1;
1092 		    c = '0';
1093 		    }
1094 		if (e && csv->escape_char)
1095 		    CSV_PUT (csv, dst, csv->escape_char);
1096 		CSV_PUT (csv, dst, c);
1097 		}
1098 	    if (quoteMe) {
1099 		CSV_PUT (csv, dst, CH_QUOTE);
1100 		if (csv->quo_len) {
1101 		    int x;
1102 		    for (x = 1; x < (int)csv->quo_len; x++)
1103 			CSV_PUT (csv, dst, csv->quo[x]);
1104 		    }
1105 		}
1106 	    }
1107 	else {
1108 	    if (csv->undef_str) {
1109 		byte  *ptr = csv->undef_str;
1110 		STRLEN len = strlen ((char *)ptr);
1111 
1112 		if (csv->undef_flg) {
1113 		    csv->utf8   = 1;
1114 		    csv->binary = 1;
1115 		    }
1116 
1117 		while (len--)
1118 		    CSV_PUT (csv, dst, *ptr++);
1119 		}
1120 	    }
1121 	}
1122     if (csv->eol_len) {
1123 	STRLEN	len = csv->eol_len;
1124 	byte   *ptr = csv->eol;
1125 
1126 	while (len--)
1127 	    CSV_PUT (csv, dst, *ptr++);
1128 	}
1129     if (csv->used)
1130 	return Print (csv, dst);
1131     return TRUE;
1132     } /* Combine */
1133 
1134 #define ParseError(csv,xse,pos)	cx_ParseError (aTHX_ csv, xse, pos)
cx_ParseError(pTHX_ csv_t * csv,int xse,STRLEN pos)1135 static void cx_ParseError (pTHX_ csv_t *csv, int xse, STRLEN pos) {
1136     (void)hv_store (csv->self, "_ERROR_POS", 10, newSViv (pos), 0);
1137     (void)hv_store (csv->self, "_ERROR_FLD", 10, newSViv (csv->fld_idx), 0);
1138     if (csv->tmp) {
1139 	csv->has_error_input = 1;
1140 	if (hv_store (csv->self, "_ERROR_INPUT", 12, csv->tmp, 0))
1141 	    SvREFCNT_inc (csv->tmp);
1142 	}
1143     (void)SetDiag (csv, xse);
1144     } /* ParseError */
1145 
1146 #define CsvGet(csv,src)		cx_CsvGet (aTHX_ csv, src)
cx_CsvGet(pTHX_ csv_t * csv,SV * src)1147 static int cx_CsvGet (pTHX_ csv_t *csv, SV *src) {
1148     unless (csv->useIO)
1149 	return EOF;
1150 
1151     if (csv->tmp && csv->eol_pos >= 0) {
1152 	csv->eol_pos = -2;
1153 	sv_setpvn (csv->tmp, (char *)csv->eol, csv->eol_len);
1154 	csv->bptr = SvPV (csv->tmp, csv->size);
1155 	csv->used = 0;
1156 	return CH_EOLX;
1157 	}
1158 
1159     {	STRLEN		result;
1160 	dSP;
1161 
1162 	PUSHMARK (sp);
1163 	EXTEND (sp, 1);
1164 	PUSHs (src);
1165 	PUTBACK;
1166 	result = call_sv (m_getline, G_METHOD);
1167 	SPAGAIN;
1168 	csv->eol_pos = -1;
1169 	csv->tmp = result ? POPs : NULL;
1170 	PUTBACK;
1171 
1172 #if MAINT_DEBUG > 4
1173 	(void)fprintf (stderr, "getline () returned:\n");
1174 	sv_dump (csv->tmp);
1175 #endif
1176 	}
1177     if (csv->tmp && SvOK (csv->tmp)) {
1178 	STRLEN tmp_len;
1179 	csv->bptr = SvPV (csv->tmp, tmp_len);
1180 	csv->used = 0;
1181 	csv->size = tmp_len;
1182 	if (csv->eolx && csv->size >= csv->eol_len) {
1183 	    int i, match = 1;
1184 	    for (i = 1; i <= (int)csv->eol_len; i++) {
1185 		unless (csv->bptr[csv->size - i] == csv->eol[csv->eol_len - i]) {
1186 		    match = 0;
1187 		    break;
1188 		    }
1189 		}
1190 	    if (match) {
1191 #if MAINT_DEBUG > 4
1192 		(void)fprintf (stderr, "# EOLX match, size: %d\n", csv->size);
1193 #endif
1194 		csv->size -= csv->eol_len;
1195 		unless (csv->verbatim)
1196 		    csv->eol_pos = csv->size;
1197 		csv->bptr[csv->size] = (char)0;
1198 		SvCUR_set (csv->tmp, csv->size);
1199 		unless (csv->verbatim || csv->size)
1200 		    return CH_EOLX;
1201 		}
1202 	    }
1203 	if (SvUTF8 (csv->tmp)) csv->utf8 = 1;
1204 	if (tmp_len)
1205 	    return ((byte)csv->bptr[csv->used++]);
1206 	}
1207     csv->useIO |= useIO_EOF;
1208     return EOF;
1209     } /* CsvGet */
1210 
1211 #define ERROR_INSIDE_QUOTES(diag_code) {	\
1212     unless (csv->is_bound) SvREFCNT_dec (sv);	\
1213     ParseError (csv, diag_code, csv->used - 1);	\
1214     return FALSE;				\
1215     }
1216 #define ERROR_INSIDE_FIELD(diag_code) {		\
1217     unless (csv->is_bound) SvREFCNT_dec (sv);	\
1218     ParseError (csv, diag_code, csv->used - 1);	\
1219     return FALSE;				\
1220     }
1221 
1222 #if MAINT_DEBUG > 4
1223 #define PUT_RPT       (void)fprintf (stderr, "# CSV_PUT  @ %4d: 0x%02x '%c'\n", __LINE__, c, isprint (c) ? c : '?')
1224 #define PUT_SEPX_RPT1 (void)fprintf (stderr, "# PUT SEPX @ %4d\n", __LINE__)
1225 #define PUT_SEPX_RPT2 (void)fprintf (stderr, "# Done putting SEPX\n")
1226 #define PUT_QUOX_RPT1 (void)fprintf (stderr, "# PUT QUOX @ %4d\n", __LINE__)
1227 #define PUT_QUOX_RPT2 (void)fprintf (stderr, "# Done putting QUOX\n")
1228 #define PUT_EOLX_RPT1 (void)fprintf (stderr, "# PUT EOLX @ %4d\n", __LINE__)
1229 #define PUT_EOLX_RPT2 (void)fprintf (stderr, "# Done putting EOLX\n")
1230 #define PUSH_RPT      (void)fprintf (stderr, "# AV_PUSHd @ %4d\n", __LINE__); sv_dump (sv)
1231 #else
1232 #define PUT_RPT
1233 #define PUT_SEPX_RPT1
1234 #define PUT_SEPX_RPT2
1235 #define PUT_QUOX_RPT1
1236 #define PUT_QUOX_RPT2
1237 #define PUT_EOLX_RPT1
1238 #define PUT_EOLX_RPT2
1239 #define PUSH_RPT
1240 #endif
1241 #define CSV_PUT_SV1(c) {			\
1242     len = SvCUR ((sv));				\
1243     SvGROW ((sv), len + 2);			\
1244     *SvEND ((sv)) = c;				\
1245     PUT_RPT;					\
1246     SvCUR_set ((sv), len + 1);			\
1247     }
1248 #define CSV_PUT_SV(c) {				\
1249     if (c == CH_EOLX) {				\
1250 	int x; PUT_EOLX_RPT1;			\
1251 	if (csv->eol_pos == -2)			\
1252 	    csv->size = 0;			\
1253 	for (x = 0; x < (int)csv->eol_len; x++)	\
1254 	    CSV_PUT_SV1 (csv->eol[x]);		\
1255 	csv->eol_pos = -1;			\
1256 	PUT_EOLX_RPT2;				\
1257 	}					\
1258     else if (c == CH_SEPX) {			\
1259 	int x; PUT_SEPX_RPT1;			\
1260 	for (x = 0; x < (int)csv->sep_len; x++)	\
1261 	    CSV_PUT_SV1 (csv->sep[x]);		\
1262 	PUT_SEPX_RPT2;				\
1263 	}					\
1264     else if (c == CH_QUOTEX) {			\
1265 	int x; PUT_QUOX_RPT1;			\
1266 	for (x = 0; x < (int)csv->quo_len; x++)	\
1267 	    CSV_PUT_SV1 (csv->quo[x]);		\
1268 	PUT_QUOX_RPT2;				\
1269 	}					\
1270     else					\
1271 	CSV_PUT_SV1 (c);			\
1272     }
1273 
1274 #define CSV_GET1 \
1275     (csv->used < csv->size ? (byte)csv->bptr[csv->used++] : CsvGet (csv, src))
1276 
1277 #if MAINT_DEBUG > 3
CSV_GET_(pTHX_ csv_t * csv,SV * src,int l)1278 int CSV_GET_ (pTHX_ csv_t *csv, SV *src, int l) {
1279     int c;
1280     (void)fprintf (stderr, "# 1-CSV_GET @ %4d: (used: %d, size: %d, eol_pos: %d, eolx = %d)\n", l, csv->used, csv->size, csv->eol_pos, csv->eolx);
1281     c = CSV_GET1;
1282     (void)fprintf (stderr, "# 2-CSV_GET @ %4d: 0x%02x '%c'\n", l, c, isprint (c) ? c : '?');
1283     return (c);
1284     } /* CSV_GET_ */
1285 #define CSV_GET CSV_GET_ (aTHX_ csv, src, __LINE__)
1286 #else
1287 #define CSV_GET CSV_GET1
1288 #endif
1289 
1290 #define AV_PUSH { \
1291     *SvEND (sv) = (char)0;						\
1292     SvUTF8_off (sv);							\
1293     if (csv->formula && SvCUR (sv) && *(SvPV_nolen (sv)) == '=')	\
1294 	(void)_formula (csv, sv, NULL, fnum);				\
1295     if (SvCUR (sv) == 0 && (						\
1296 	    csv->empty_is_undef ||					\
1297 	    (!(f & CSV_FLAGS_QUO) && csv->blank_is_undef)))		\
1298 	SvSetUndef (sv);						\
1299     else {								\
1300 	if (csv->allow_whitespace && ! (f & CSV_FLAGS_QUO))		\
1301 	    strip_trail_whitespace (sv);				\
1302 	if (f & CSV_FLAGS_BIN && csv->decode_utf8			\
1303 			      && (csv->utf8 || is_utf8_sv (sv)))	\
1304 	    SvUTF8_on (sv);						\
1305 	}								\
1306     SvSETMAGIC (sv);							\
1307     unless (csv->is_bound) av_push (fields, sv);			\
1308     PUSH_RPT;								\
1309     sv = NULL;								\
1310     if (csv->keep_meta_info && fflags)					\
1311 	av_push (fflags, newSViv (f));					\
1312     waitingForField = 1;						\
1313     }
1314 
1315 #define strip_trail_whitespace(sv)	cx_strip_trail_whitespace (aTHX_ sv)
cx_strip_trail_whitespace(pTHX_ SV * sv)1316 static void cx_strip_trail_whitespace (pTHX_ SV *sv) {
1317     STRLEN len;
1318     char   *s = SvPV (sv, len);
1319     unless (s && len) return;
1320     while (s[len - 1] == CH_SPACE || s[len - 1] == CH_TAB)
1321 	s[--len] = (char)0;
1322     SvCUR_set (sv, len);
1323     } /* strip_trail_whitespace */
1324 
1325 #define NewField				\
1326     unless (sv) {				\
1327 	if (csv->is_bound)			\
1328 	    sv = bound_field (csv, fnum, 0);	\
1329 	else					\
1330 	    sv = newSVpvs ("");			\
1331 	fnum++;					\
1332 	unless (sv) return FALSE;		\
1333 	f = 0; csv->fld_idx++;			\
1334 	}
1335 
1336 #if MAINT_DEBUG
1337 static char str_parsed[40];
1338 #endif
1339 
1340 #if MAINT_DEBUG > 1
_sep_string(csv_t * csv)1341 static char *_sep_string (csv_t *csv) {
1342     char sep[64];
1343     if (csv->sep_len) {
1344 	int x;
1345 	for (x = 0; x < csv->sep_len; x++)
1346 	    (void)sprintf (sep + x * x, "%02x ", csv->sep[x]);
1347 	}
1348     else
1349 	(void)sprintf (sep, "'%c' (0x%02x)", CH_SEP, CH_SEP);
1350     return sep;
1351     } /* _sep_string */
1352 #endif
1353 
1354 #define Parse(csv,src,fields,fflags)	cx_Parse (aTHX_ csv, src, fields, fflags)
cx_Parse(pTHX_ csv_t * csv,SV * src,AV * fields,AV * fflags)1355 static int cx_Parse (pTHX_ csv_t *csv, SV *src, AV *fields, AV *fflags) {
1356     int		 c, f = 0;
1357     int		 waitingForField	= 1;
1358     SV		*sv			= NULL;
1359     STRLEN	 len;
1360     int		 seenSomething		= FALSE;
1361     int		 fnum			= 0;
1362     int		 spl			= -1;
1363 #if MAINT_DEBUG
1364     (void)memset (str_parsed, 0, 40);
1365 #endif
1366 
1367     csv->fld_idx = 0;
1368 
1369     while ((c = CSV_GET) != EOF) {
1370 
1371 	NewField;
1372 
1373 	seenSomething = TRUE;
1374 	spl++;
1375 #if MAINT_DEBUG
1376 	if (spl < 39) str_parsed[spl] = c;
1377 #endif
1378 restart:
1379 #if MAINT_DEBUG > 9
1380 	(void)fprintf (stderr, "# at restart: %d/%d/%03x pos %d = 0x%02x\n",
1381 	    waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c);
1382 #endif
1383 	if (is_SEP (c)) {
1384 #if MAINT_DEBUG > 1
1385 	    (void)fprintf (stderr, "# %d/%d/%03x pos %d = SEP %s\t%s\n",
1386 		waitingForField ? 1 : 0, sv ? 1 : 0, f, spl,
1387 		_sep_string (csv), _pretty_strl (csv->bptr + csv->used));
1388 #endif
1389 	    if (waitingForField) {
1390 		/* ,1,"foo, 3",,bar,
1391 		 * ^           ^
1392 		 */
1393 		if (csv->blank_is_undef || csv->empty_is_undef)
1394 		    SvSetUndef (sv);
1395 		else
1396 		    SvSetEmpty (sv);
1397 		unless (csv->is_bound)
1398 		    av_push (fields, sv);
1399 		sv = NULL;
1400 		if (csv->keep_meta_info && fflags)
1401 		    av_push (fflags, newSViv (f));
1402 		}
1403 	    else
1404 	    if (f & CSV_FLAGS_QUO) {
1405 		/* ,1,"foo, 3",,bar,
1406 		 *        ^
1407 		 */
1408 		CSV_PUT_SV (c)
1409 		}
1410 	    else {
1411 		/* ,1,"foo, 3",,bar,
1412 		 *   ^        ^    ^
1413 		 */
1414 		AV_PUSH;
1415 		}
1416 	    } /* SEP char */
1417 	else
1418 	if (is_QUOTE (c)) {
1419 #if MAINT_DEBUG > 1
1420 	    (void)fprintf (stderr, "# %d/%d/%03x pos %d = QUO '%c'\t\t%s\n",
1421 		waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
1422 		_pretty_strl (csv->bptr + csv->used));
1423 #endif
1424 	    if (waitingForField) {
1425 		/* ,1,"foo, 3",,bar,\r\n
1426 		 *    ^
1427 		 */
1428 		f |= CSV_FLAGS_QUO;
1429 		waitingForField = 0;
1430 		continue;
1431 		}
1432 
1433 	    if (f & CSV_FLAGS_QUO) {
1434 
1435 		/* ,1,"foo, 3",,bar,\r\n
1436 		 *           ^
1437 		 */
1438 
1439 		int quoesc = 0;
1440 		int c2 = CSV_GET;
1441 
1442 		if (csv->allow_whitespace) {
1443 		    /* , 1 , "foo, 3" , , bar , \r\n
1444 		     *               ^
1445 		     */
1446 		    while (is_whitespace (c2)) {
1447 			if (csv->allow_loose_quotes &&
1448 				!(csv->escape_char && c2 == csv->escape_char)) {
1449 			    /* This feels like a brittle fix for RT115953, where
1450 			     *  ["foo "bar" baz"] got parsed as [foo "bar"baz]
1451 			     * when both allow_whitespace and allow_loose_quotes
1452 			     * are true and escape does not equal quote
1453 			     */
1454 			    CSV_PUT_SV (c);
1455 			    c = c2;
1456 			    }
1457 			c2 = CSV_GET;
1458 			}
1459 		    }
1460 
1461 		if (is_SEP (c2)) {
1462 		    /* ,1,"foo, 3",,bar,\r\n
1463 		     *            ^
1464 		     */
1465 		    AV_PUSH;
1466 		    continue;
1467 		    }
1468 
1469 		if (c2 == CH_NL || c2 == CH_EOLX) {
1470 		    /* ,1,"foo, 3",,"bar"\n
1471 		     *                   ^
1472 		     */
1473 		    AV_PUSH;
1474 		    return TRUE;
1475 		    }
1476 
1477 		/* ---
1478 		 * if      QUOTE eq ESCAPE
1479 		 *    AND (    c2  eq QUOTE	1,"abc""def",2
1480 		 *         OR  c2  eq ESCAPE	1,"abc""def",2 (QUO eq ESC)
1481 		 *         OR  c2  eq NULL )	1,"abc"0def",2
1482 		 * ---
1483 		 */
1484 		if (csv->escape_char && c == csv->escape_char) {
1485 
1486 		    quoesc = 1;
1487 		    if (c2 == '0') {
1488 			/* ,1,"foo, 3"056",,bar,\r\n
1489 			 *            ^
1490 			 */
1491 			CSV_PUT_SV (0)
1492 			continue;
1493 			}
1494 
1495 		    if (is_QUOTE (c2)) {
1496 			/* ,1,"foo, 3""56",,bar,\r\n
1497 			 *            ^
1498 			 */
1499 			if (csv->utf8)
1500 			    f |= CSV_FLAGS_BIN;
1501 			CSV_PUT_SV (c2)
1502 			continue;
1503 			}
1504 
1505 		    if (csv->allow_loose_escapes && c2 != CH_CR) {
1506 			/* ,1,"foo, 3"56",,bar,\r\n
1507 			 *            ^
1508 			 */
1509 			CSV_PUT_SV (c);
1510 			c = c2;
1511 			goto restart;
1512 			}
1513 		    }
1514 
1515 		if (c2 == CH_CR) {
1516 		    int	c3;
1517 
1518 		    if (csv->eol_is_cr) {
1519 			/* ,1,"foo, 3"\r
1520 			 *            ^
1521 			 */
1522 			AV_PUSH;
1523 			return TRUE;
1524 			}
1525 
1526 		    c3 = CSV_GET;
1527 
1528 		    if (c3 == CH_NL) { /* \r is not optional before EOLX! */
1529 			/* ,1,"foo, 3"\r\n
1530 			 *              ^
1531 			 */
1532 			AV_PUSH;
1533 			return TRUE;
1534 			}
1535 
1536 		    if (csv->useIO && csv->eol_len == 0) {
1537 			if (c3 == CH_CR) { /* \r followed by an empty line */
1538 			    /* ,1,"foo, 3"\r\r
1539 			     *              ^
1540 			     */
1541 			    set_eol_is_cr (csv);
1542 			    goto EOLX;
1543 			    }
1544 
1545 			if (!is_csv_binary (c3)) {
1546 			    /* ,1,"foo\n 3",,"bar"\r
1547 			     * baz,4
1548 			     * ^
1549 			     */
1550 			    set_eol_is_cr (csv);
1551 			    csv->used--;
1552 			    csv->has_ahead++;
1553 			    AV_PUSH;
1554 			    return TRUE;
1555 			    }
1556 			}
1557 
1558 		    ParseError (csv, quoesc ? 2023 : 2010, csv->used - 2);
1559 		    return FALSE;
1560 		    }
1561 
1562 		if (c2 == EOF) {
1563 		    /* ,1,"foo, 3"
1564 		     *            ^
1565 		     */
1566 		    AV_PUSH;
1567 		    return TRUE;
1568 		    }
1569 
1570 		if (csv->allow_loose_quotes && !quoesc) {
1571 		    /* ,1,"foo, 3"456",,bar,\r\n
1572 		     *            ^
1573 		     */
1574 		    CSV_PUT_SV (c);
1575 		    c = c2;
1576 		    goto restart;
1577 		    }
1578 
1579 		/* 1,"foo" ",3
1580 		 *        ^
1581 		 */
1582 		if (quoesc) {
1583 		    csv->used--;
1584 		    ERROR_INSIDE_QUOTES (2023);
1585 		    }
1586 
1587 		ERROR_INSIDE_QUOTES (2011);
1588 		}
1589 
1590 	    /* !waitingForField, !InsideQuotes */
1591 	    if (csv->allow_loose_quotes) { /* 1,foo "boo" d'uh,1 */
1592 		f |= CSV_FLAGS_EIF;	/* Mark as error-in-field */
1593 		CSV_PUT_SV (c);
1594 		}
1595 	    else
1596 		ERROR_INSIDE_FIELD (2034);
1597 	    } /* QUO char */
1598 	else
1599 	if (c == csv->escape_char && csv->escape_char) {
1600 #if MAINT_DEBUG > 1
1601 	    (void)fprintf (stderr, "# %d/%d/%03x pos %d = ESC '%c'\t%s\n",
1602 		waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
1603 		_pretty_strl (csv->bptr + csv->used));
1604 #endif
1605 	    /* This means quote_char != escape_char */
1606 	    if (waitingForField) {
1607 		waitingForField = 0;
1608 		if (csv->allow_unquoted_escape) {
1609 		    /* The escape character is the first character of an
1610 		     * unquoted field */
1611 		    /* ... get and store next character */
1612 		    int c2 = CSV_GET;
1613 
1614 		    SvSetEmpty (sv);
1615 
1616 		    if (c2 == EOF) {
1617 			csv->used--;
1618 			ERROR_INSIDE_FIELD (2035);
1619 			}
1620 
1621 		    if (c2 == '0')
1622 			CSV_PUT_SV (0)
1623 		    else
1624 		    if ( is_QUOTE (c2) || is_SEP (c2) ||
1625 			 c2 == csv->escape_char || csv->allow_loose_escapes) {
1626 			if (csv->utf8)
1627 			    f |= CSV_FLAGS_BIN;
1628 			CSV_PUT_SV (c2)
1629 			}
1630 		    else {
1631 			csv->used--;
1632 			ERROR_INSIDE_QUOTES (2025);
1633 			}
1634 		    }
1635 		}
1636 	    else
1637 	    if (f & CSV_FLAGS_QUO) {
1638 		int c2 = CSV_GET;
1639 
1640 		if (c2 == EOF) {
1641 		    csv->used--;
1642 		    ERROR_INSIDE_QUOTES (2024);
1643 		    }
1644 
1645 		if (c2 == '0')
1646 		    CSV_PUT_SV (0)
1647 		else
1648 		if ( is_QUOTE (c2) || is_SEP (c2) ||
1649 		     c2 == csv->escape_char || csv->allow_loose_escapes) {
1650 		    if (csv->utf8)
1651 			f |= CSV_FLAGS_BIN;
1652 		    CSV_PUT_SV (c2)
1653 		    }
1654 		else {
1655 		    csv->used--;
1656 		    ERROR_INSIDE_QUOTES (2025);
1657 		    }
1658 		}
1659 	    else
1660 	    if (sv) {
1661 		int c2 = CSV_GET;
1662 
1663 		if (c2 == EOF) {
1664 		    csv->used--;
1665 		    ERROR_INSIDE_FIELD (2035);
1666 		    }
1667 
1668 		CSV_PUT_SV (c2);
1669 		}
1670 	    else
1671 		ERROR_INSIDE_FIELD (2036); /* uncoverable statement I think there's no way to get here */
1672 	    } /* ESC char */
1673 	else
1674 	if (c == CH_NL || is_EOL (c)) {
1675 EOLX:
1676 #if MAINT_DEBUG > 1
1677 	    (void)fprintf (stderr, "# %d/%d/%03x pos %d = NL\t%s\n",
1678 		waitingForField ? 1 : 0, sv ? 1 : 0, f, spl,
1679 		_pretty_strl (csv->bptr + csv->used));
1680 #endif
1681 	    if (fnum == 1 && f == 0 && SvCUR (sv) == 0 && csv->skip_empty_rows) {
1682 		csv->fld_idx = 0;
1683 		c = CSV_GET;
1684 		if (c == EOF) {
1685 		    sv_free (sv);
1686 		    sv = NULL;
1687 		    waitingForField = 0;
1688 		    break;
1689 		    }
1690 		goto restart;
1691 		}
1692 
1693 	    if (waitingForField) {
1694 		/* ,1,"foo, 3",,bar,
1695 		 *                  ^
1696 		 */
1697 		if (csv->blank_is_undef || csv->empty_is_undef)
1698 		    SvSetUndef (sv);
1699 		else
1700 		    SvSetEmpty (sv);
1701 		unless (csv->is_bound)
1702 		    av_push (fields, sv);
1703 		if (csv->keep_meta_info && fflags)
1704 		    av_push (fflags, newSViv (f));
1705 		return TRUE;
1706 		}
1707 
1708 	    if (f & CSV_FLAGS_QUO) {
1709 		/* ,1,"foo\n 3",,bar,
1710 		 *        ^
1711 		 */
1712 		f |= CSV_FLAGS_BIN;
1713 		unless (csv->binary)
1714 		    ERROR_INSIDE_QUOTES (2021);
1715 
1716 		CSV_PUT_SV (c);
1717 		}
1718 	    else
1719 	    if (csv->verbatim) {
1720 		/* ,1,foo\n 3,,bar,
1721 		 * This feature should be deprecated
1722 		 */
1723 		f |= CSV_FLAGS_BIN;
1724 		unless (csv->binary)
1725 		    ERROR_INSIDE_FIELD (2030);
1726 
1727 		CSV_PUT_SV (c);
1728 		}
1729 	    else {
1730 		/* sep=,
1731 		 *      ^
1732 		 */
1733 		if (csv->recno == 0 && csv->fld_idx == 1 && csv->useIO &&
1734 			(csv->bptr[0] == 's' || csv->bptr[0] == 'S') &&
1735 			(csv->bptr[1] == 'e' || csv->bptr[1] == 'E') &&
1736 			(csv->bptr[2] == 'p' || csv->bptr[2] == 'P') &&
1737 			 csv->bptr[3] == '=') {
1738 		    char *sep = csv->bptr + 4;
1739 		    int   lnu = csv->used - 5;
1740 		    if (lnu <= MAX_ATTR_LEN) {
1741 			sep[lnu] = (char)0;
1742 			(void)memcpy (csv->sep, sep, lnu);
1743 			csv->sep_len = lnu == 1 ? 0 : lnu;
1744 			return Parse (csv, src, fields, fflags);
1745 			}
1746 		    }
1747 
1748 		/* ,1,"foo\n 3",,bar
1749 		 *                  ^
1750 		 */
1751 		AV_PUSH;
1752 		return TRUE;
1753 		}
1754 	    } /* CH_NL */
1755 	else
1756 	if (c == CH_CR && !(csv->verbatim)) {
1757 #if MAINT_DEBUG > 1
1758 	    (void)fprintf (stderr, "# %d/%d/%03x pos %d = CR\n",
1759 		waitingForField ? 1 : 0, sv ? 1 : 0, f, spl);
1760 #endif
1761 	    if (waitingForField) {
1762 		int	c2;
1763 
1764 		if (csv->eol_is_cr) {
1765 		    /* ,1,"foo\n 3",,bar,\r
1766 		     *                   ^
1767 		     */
1768 		    c = CH_NL;
1769 		    goto EOLX;
1770 		    }
1771 
1772 		c2 = CSV_GET;
1773 
1774 		if (c2 == EOF) {
1775 		    /* ,1,"foo\n 3",,bar,\r
1776 		     *                     ^
1777 		     */
1778 		    c = EOF;
1779 
1780 #if MAINT_DEBUG > 9
1781 		    (void)fprintf (stderr, "# (%d) ... CR EOF 0x%x\n",
1782 			seenSomething, c);
1783 #endif
1784 		    unless (seenSomething)
1785 			break;
1786 		    goto restart;
1787 		    }
1788 
1789 		if (c2 == CH_NL) { /* \r is not optional before EOLX! */
1790 		    /* ,1,"foo\n 3",,bar,\r\n
1791 		     *                     ^
1792 		     */
1793 		    c = c2;
1794 		    goto EOLX;
1795 		    }
1796 
1797 		if (csv->useIO && csv->eol_len == 0) {
1798 		    if (c2 == CH_CR) { /* \r followed by an empty line */
1799 			/* ,1,"foo\n 3",,bar,\r\r
1800 			 *                     ^
1801 			 */
1802 			set_eol_is_cr (csv);
1803 			goto EOLX;
1804 			}
1805 
1806 		    waitingForField = 0;
1807 
1808 		    if (!is_csv_binary (c2)) {
1809 			/* ,1,"foo\n 3",,bar,\r
1810 			 * baz,4
1811 			 * ^
1812 			 */
1813 			set_eol_is_cr (csv);
1814 			csv->used--;
1815 			csv->has_ahead++;
1816 			if (fnum == 1 && f == 0 && SvCUR (sv) == 0 && csv->skip_empty_rows) {
1817 			    csv->fld_idx = 0;
1818 			    c = CSV_GET;
1819 			    if (c == EOF) {
1820 				sv_free (sv);
1821 				sv = NULL;
1822 				waitingForField = 0;
1823 				break;
1824 				}
1825 			    goto restart;
1826 			    }
1827 			AV_PUSH;
1828 			return TRUE;
1829 			}
1830 		    }
1831 
1832 		/* ,1,"foo\n 3",,bar,\r\t
1833 		 *                     ^
1834 		 */
1835 		csv->used--;
1836 		ERROR_INSIDE_FIELD (2031);
1837 		}
1838 
1839 	    if (f & CSV_FLAGS_QUO) {
1840 		/* ,1,"foo\r 3",,bar,\r\t
1841 		 *        ^
1842 		 */
1843 		f |= CSV_FLAGS_BIN;
1844 		unless (csv->binary)
1845 		    ERROR_INSIDE_QUOTES (2022);
1846 
1847 		CSV_PUT_SV (c);
1848 		}
1849 	    else {
1850 		int	c2;
1851 
1852 		if (csv->eol_is_cr) {
1853 		    /* ,1,"foo\n 3",,bar\r
1854 		     *                  ^
1855 		     */
1856 		    goto EOLX;
1857 		    }
1858 
1859 		c2 = CSV_GET;
1860 
1861 		if (c2 == CH_NL) { /* \r is not optional before EOLX! */
1862 		    /* ,1,"foo\n 3",,bar\r\n
1863 		     *                    ^
1864 		     */
1865 		    goto EOLX;
1866 		    }
1867 
1868 		if (csv->useIO && csv->eol_len == 0) {
1869 		    if (!is_csv_binary (c2)
1870 			    /* ,1,"foo\n 3",,bar\r
1871 			     * baz,4
1872 			     * ^
1873 			     */
1874 			|| c2 == CH_CR) {
1875 			    /* ,1,"foo\n 3",,bar,\r\r
1876 			     *                     ^
1877 			     */
1878 			set_eol_is_cr (csv);
1879 			csv->used--;
1880 			csv->has_ahead++;
1881 			if (fnum == 1 && f == 0 && SvCUR (sv) == 0 && csv->skip_empty_rows) {
1882 			    csv->fld_idx = 0;
1883 			    c = CSV_GET;
1884 			    if (c == EOF) {
1885 				sv_free (sv);
1886 				sv = NULL;
1887 				waitingForField = 0;
1888 				break;
1889 				}
1890 			    goto restart;
1891 			    }
1892 			AV_PUSH;
1893 			return TRUE;
1894 			}
1895 		    }
1896 
1897 		/* ,1,"foo\n 3",,bar\r\t
1898 		 *                    ^
1899 		 */
1900 		ERROR_INSIDE_FIELD (2032);
1901 		}
1902 	    } /* CH_CR */
1903 	else {
1904 #if MAINT_DEBUG > 1
1905 	    (void)fprintf (stderr, "# %d/%d/%03x pos %d = CCC '%c'\t\t%s\n",
1906 		waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
1907 		_pretty_strl (csv->bptr + csv->used));
1908 #endif
1909 	    /* Needed for non-IO parse, where EOL is not set during read */
1910 	    if (csv->eolx && c == CH_EOL &&
1911 		 csv->size - csv->used >= (STRLEN)csv->eol_len - 1 &&
1912 		 !memcmp (csv->bptr + csv->used, csv->eol + 1, csv->eol_len - 1) &&
1913 		 (csv->used += csv->eol_len - 1)) {
1914 		c = CH_EOLX;
1915 #if MAINT_DEBUG > 5
1916 		(void)fprintf (stderr, "# -> EOLX (0x%x)\n", c);
1917 #endif
1918 		goto EOLX;
1919 		}
1920 
1921 	    if (waitingForField) {
1922 		if (csv->comment_str && !f && !spl && c == *csv->comment_str) {
1923 		    STRLEN cl = strlen ((char *)csv->comment_str);
1924 
1925 #if MAINT_DEBUG > 5
1926 		    (void)fprintf (stderr,
1927 			"COMMENT? cl = %d, size = %d, used = %d\n",
1928 			cl, csv->size, csv->used);
1929 #endif
1930 		    if (cl == 1 || (
1931 		       (csv->size - csv->used >= cl - 1 &&
1932 			 !memcmp (csv->bptr + csv->used, csv->comment_str + 1, cl - 1) &&
1933 			 (csv->used += cl - 1)))) {
1934 			csv->used    = csv->size;
1935 			csv->fld_idx = 0;
1936 			c = CSV_GET;
1937 #if MAINT_DEBUG > 5
1938 			(void)fprintf (stderr, "# COMMENT, SKIPPED\n");
1939 #endif
1940 			goto restart;
1941 			}
1942 		    }
1943 
1944 		if (csv->allow_whitespace && is_whitespace (c)) {
1945 		    do {
1946 			c = CSV_GET;
1947 #if MAINT_DEBUG > 5
1948 			(void)fprintf (stderr, "# WS next got (0x%x)\n", c);
1949 #endif
1950 			} while (is_whitespace (c));
1951 		    if (c == EOF)
1952 			break;
1953 		    goto restart;
1954 		    }
1955 		waitingForField = 0;
1956 		goto restart;
1957 		}
1958 
1959 #if MAINT_DEBUG > 5
1960 	    (void)fprintf (stderr, "# %sc 0x%x is%s binary %s utf8\n",
1961 		f & CSV_FLAGS_QUO ? "quoted " : "", c,
1962 		is_csv_binary (c) ? "" : " not",
1963 		csv->utf8 ? "is" : "not");
1964 #endif
1965 	    if (f & CSV_FLAGS_QUO) {
1966 		if (is_csv_binary (c)) {
1967 		    f |= CSV_FLAGS_BIN;
1968 		    unless (csv->binary || csv->utf8)
1969 			ERROR_INSIDE_QUOTES (2026);
1970 		    }
1971 		CSV_PUT_SV (c);
1972 		}
1973 	    else {
1974 		if (is_csv_binary (c)) {
1975 		    if (csv->useIO && c == EOF)
1976 			break;
1977 		    f |= CSV_FLAGS_BIN;
1978 		    unless (csv->binary || csv->utf8)
1979 			ERROR_INSIDE_FIELD (2037);
1980 		    }
1981 		CSV_PUT_SV (c);
1982 		}
1983 	    }
1984 
1985 	/* continue */
1986 	if (csv->verbatim && csv->useIO && csv->used == csv->size)
1987 	    break;
1988 	}
1989 
1990     if (waitingForField) {
1991 	if (seenSomething || !csv->useIO) {
1992 	    NewField;
1993 	    if (csv->blank_is_undef || csv->empty_is_undef)
1994 		SvSetUndef (sv);
1995 	    else
1996 		SvSetEmpty (sv);
1997 	    unless (csv->is_bound)
1998 		av_push (fields, sv);
1999 	    if (csv->keep_meta_info && fflags)
2000 		av_push (fflags, newSViv (f));
2001 	    return TRUE;
2002 	    }
2003 
2004 	(void)SetDiag (csv, 2012);
2005 	return FALSE;
2006 	}
2007 
2008     if (f & CSV_FLAGS_QUO)
2009 	ERROR_INSIDE_QUOTES (2027);
2010 
2011     if (sv)
2012 	AV_PUSH;
2013     return TRUE;
2014     } /* Parse */
2015 
hook(pTHX_ HV * hv,char * cb_name,AV * av)2016 static int hook (pTHX_ HV *hv, char *cb_name, AV *av) {
2017     SV **svp;
2018     HV *cb;
2019     int res;
2020 
2021 #if MAINT_DEBUG > 1
2022     (void)fprintf (stderr, "# HOOK %s %x\n", cb_name, av);
2023 #endif
2024     unless ((svp = hv_fetchs (hv, "callbacks", FALSE)) && _is_hashref (*svp))
2025 	return 0; /* uncoverable statement defensive programming */
2026 
2027     cb  = (HV *)SvRV (*svp);
2028     svp = hv_fetch (cb, cb_name, strlen (cb_name), FALSE);
2029     unless (svp && _is_coderef (*svp))
2030 	return 0;
2031 
2032     {   dSP;
2033 	ENTER;
2034 	SAVETMPS;
2035 	PUSHMARK (SP);
2036 	mXPUSHs (newRV_inc ((SV *)hv));
2037 	mXPUSHs (newRV_inc ((SV *)av));
2038 	PUTBACK;
2039 	res = call_sv (*svp, G_SCALAR);
2040 	SPAGAIN;
2041 	if (res) {
2042 	    SV *rv = POPs;
2043 	    if (SvROK (rv) && (rv = SvRV (rv)) && SvPOK (rv)) {
2044 		if (strcmp (SvPV_nolen (rv), "skip") == 0)
2045 		    res = 0;
2046 		}
2047 	    }
2048 	PUTBACK;
2049 	FREETMPS;
2050 	LEAVE;
2051 	}
2052     return res;
2053     } /* hook */
2054 
2055 #define c_xsParse(csv,hv,av,avf,src,useIO)	cx_c_xsParse (aTHX_ csv, hv, av, avf, src, useIO)
cx_c_xsParse(pTHX_ csv_t csv,HV * hv,AV * av,AV * avf,SV * src,bool useIO)2056 static int cx_c_xsParse (pTHX_ csv_t csv, HV *hv, AV *av, AV *avf, SV *src, bool useIO) {
2057     int	result, ahead = 0;
2058     SV	*pos = NULL;
2059 
2060     ENTER;
2061     if (csv.eolx || csv.eol_is_cr) {
2062 	/* local $/ = $eol */
2063 	SAVEGENERICSV (PL_rs);
2064 	PL_rs = newSVpvn ((char *)csv.eol, csv.eol_len);
2065 	}
2066 
2067     if ((csv.useIO = useIO)) {
2068 	csv.tmp = NULL;
2069 
2070 	if ((ahead = csv.has_ahead)) {
2071 	    SV **svp;
2072 	    if ((svp = hv_fetchs (hv, "_AHEAD", FALSE)) && *svp) {
2073 		csv.bptr = SvPV (csv.tmp = *svp, csv.size);
2074 		csv.used = 0;
2075 		if (pos && SvIV (pos) > (IV)csv.size)
2076 		    sv_setiv (pos, SvIV (pos) - csv.size);
2077 		}
2078 	    }
2079 	}
2080     else {
2081 	csv.tmp  = src;
2082 	csv.utf8 = SvUTF8 (src) ? 1 : 0;
2083 	csv.bptr = SvPV (src, csv.size);
2084 	}
2085     if (csv.has_error_input) {
2086 	(void)hv_store (hv, "_ERROR_INPUT", 12, &PL_sv_undef, 0);
2087 	csv.has_error_input = 0;
2088 	}
2089 
2090     result = Parse (&csv, src, av, avf);
2091     (void)hv_store (hv, "_RECNO", 6, newSViv (++csv.recno), 0);
2092     (void)hv_store (hv, "_EOF",   4, &PL_sv_no,             0);
2093 
2094     if (csv.strict) {
2095 	unless (csv.strict_n) csv.strict_n = (short)csv.fld_idx;
2096 	if (csv.fld_idx != csv.strict_n) {
2097 	    unless (csv.useIO & useIO_EOF)
2098 		ParseError (&csv, 2014, csv.used);
2099 	    if (last_error) /* an error callback can reset and accept */
2100 		result = FALSE;
2101 	    }
2102 	}
2103 
2104     if (csv.useIO) {
2105 	if (csv.tmp && csv.used < csv.size && csv.has_ahead) {
2106 	    SV *sv = newSVpvn (csv.bptr + csv.used, csv.size - csv.used);
2107 	    (void)hv_store  (hv, "_AHEAD", 6, sv, 0);
2108 	    }
2109 	else {
2110 	    csv.has_ahead = 0;
2111 	    if (csv.useIO & useIO_EOF)
2112 		(void)hv_store (hv, "_EOF", 4, &PL_sv_yes, 0);
2113 	    }
2114 	/* csv.cache[CACHE_ID__has_ahead] = csv.has_ahead; */
2115 	(void)memcpy (csv.cache, &csv, sizeof (csv_t));
2116 
2117 	if (avf) {
2118 	    if (csv.keep_meta_info)
2119 		(void)hv_store  (hv, "_FFLAGS", 7, newRV_noinc ((SV *)avf), 0);
2120 	    else {
2121 		av_undef (avf);
2122 		sv_free ((SV *)avf);
2123 		}
2124 	    }
2125 	}
2126     else /* just copy the cache */
2127 	(void)memcpy (csv.cache, &csv, sizeof (csv_t));
2128 
2129     if (result && csv.types) {
2130 	STRLEN	i;
2131 	STRLEN	len = av_len (av);
2132 	SV    **svp;
2133 
2134 	for (i = 0; i <= len && i <= csv.types_len; i++) {
2135 	    if ((svp = av_fetch (av, i, FALSE)) && *svp && SvOK (*svp)) {
2136 		switch (csv.types[i]) {
2137 		    case CSV_XS_TYPE_IV:
2138 #ifdef CSV_XS_TYPE_WARN
2139 			sv_setiv (*svp, SvIV (*svp));
2140 #else
2141 			if (SvTRUE (*svp))
2142 			    sv_setiv (*svp, SvIV (*svp));
2143 			else
2144 			    sv_setiv (*svp, 0);
2145 #endif
2146 			break;
2147 
2148 		    case CSV_XS_TYPE_NV:
2149 #ifdef CSV_XS_TYPE_WARN
2150 			sv_setnv (*svp, SvNV (*svp));
2151 #else
2152 			if (SvTRUE (*svp))
2153 			    sv_setnv (*svp, SvNV (*svp));
2154 			else
2155 			    sv_setnv (*svp, 0.0);
2156 #endif
2157 			break;
2158 
2159 		    default:
2160 			break;
2161 		    }
2162 		}
2163 	    }
2164 	}
2165 
2166     LEAVE;
2167 
2168     return result;
2169     } /* c_xsParse */
2170 
2171 #define xsParse(self,hv,av,avf,src,useIO)	cx_xsParse (aTHX_ self, hv, av, avf, src, useIO)
cx_xsParse(pTHX_ SV * self,HV * hv,AV * av,AV * avf,SV * src,bool useIO)2172 static int cx_xsParse (pTHX_ SV *self, HV *hv, AV *av, AV *avf, SV *src, bool useIO) {
2173     csv_t	csv;
2174     int		state;
2175     SetupCsv (&csv, hv, self);
2176     state = c_xsParse (csv, hv, av, avf, src, useIO);
2177     if (state && csv.has_hooks & HOOK_AFTER_PARSE)
2178 	(void)hook (aTHX_ hv, "after_parse", av);
2179     return (state || !last_error);
2180     } /* xsParse */
2181 
2182 /* API also offers av_clear and av_undef, but they have more overhead */
2183 #define av_empty(av)	cx_av_empty (aTHX_ av)
cx_av_empty(pTHX_ AV * av)2184 static void cx_av_empty (pTHX_ AV *av) {
2185     while (av_len (av) >= 0)
2186 	sv_free (av_pop (av));
2187     } /* av_empty */
2188 
2189 #define xsParse_all(self,hv,io,off,len)		cx_xsParse_all (aTHX_ self, hv, io, off, len)
cx_xsParse_all(pTHX_ SV * self,HV * hv,SV * io,SV * off,SV * len)2190 static SV *cx_xsParse_all (pTHX_ SV *self, HV *hv, SV *io, SV *off, SV *len) {
2191     csv_t	csv;
2192     int		n = 0, skip = 0, length = MAXINT, tail = MAXINT;
2193     AV		*avr = newAV ();
2194     AV		*row = newAV ();
2195 
2196     SetupCsv (&csv, hv, self);
2197 
2198     if (SvIOK (off)) {
2199 	skip = SvIV (off);
2200 	if (skip < 0) {
2201 	    tail = -skip;
2202 	    skip = -1;
2203 	    }
2204 	}
2205     if (SvIOK (len))
2206 	length = SvIV (len);
2207 
2208     while (c_xsParse (csv, hv, row, NULL, io, 1)) {
2209 
2210 	SetupCsv (&csv, hv, self);
2211 
2212 	if (skip > 0) {
2213 	    skip--;
2214 	    av_empty (row); /* re-use */
2215 	    continue;
2216 	    }
2217 
2218 	if (n++ >= tail) {
2219 	    SvREFCNT_dec (av_shift (avr));
2220 	    n--;
2221 	    }
2222 
2223 	if (csv.has_hooks & HOOK_AFTER_PARSE) {
2224 	    unless (hook (aTHX_ hv, "after_parse", row)) {
2225 		av_empty (row); /* re-use */
2226 		continue;
2227 		}
2228 	    }
2229 	av_push (avr, newRV_noinc ((SV *)row));
2230 
2231 	if (n >= length && skip >= 0)
2232 	    break; /* We have enough */
2233 
2234 	row = newAV ();
2235 	}
2236     while (n > length) {
2237 	SvREFCNT_dec (av_pop (avr));
2238 	n--;
2239 	}
2240 
2241     return (SV *)sv_2mortal (newRV_noinc ((SV *)avr));
2242     } /* xsParse_all */
2243 
2244 #define xsCombine(self,hv,av,io,useIO)	cx_xsCombine (aTHX_ self, hv, av, io, useIO)
cx_xsCombine(pTHX_ SV * self,HV * hv,AV * av,SV * io,bool useIO)2245 static int cx_xsCombine (pTHX_ SV *self, HV *hv, AV *av, SV *io, bool useIO) {
2246     csv_t	csv;
2247     int		result;
2248 #if (PERL_BCDVERSION >= 0x5008000)
2249     SV		*ors = PL_ors_sv;
2250 #endif
2251 
2252     SetupCsv (&csv, hv, self);
2253     csv.useIO = useIO;
2254 #if (PERL_BCDVERSION >= 0x5008000)
2255     if (*csv.eol)
2256 	PL_ors_sv = NULL;
2257 #endif
2258     if (useIO && csv.has_hooks & HOOK_BEFORE_PRINT)
2259 	(void)hook (aTHX_ hv, "before_print", av);
2260     result = Combine (&csv, io, av);
2261 #if (PERL_BCDVERSION >= 0x5008000)
2262     PL_ors_sv = ors;
2263 #endif
2264     if (result && !useIO && csv.utf8)
2265 	sv_utf8_upgrade (io);
2266     return result;
2267     } /* xsCombine */
2268 
2269 MODULE = Text::CSV_XS		PACKAGE = Text::CSV_XS
2270 
2271 PROTOTYPES: DISABLE
2272 
2273 BOOT:
2274     m_getline = newSVpvs ("getline");
2275     m_print   = newSVpvs ("print");
2276     Perl_load_module (aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvs ("IO::Handle"), NULL, NULL, NULL);
2277 
2278 void
SetDiag(self,xse,...)2279 SetDiag (self, xse, ...)
2280     SV		*self
2281     int		 xse
2282 
2283   PPCODE:
2284     HV		*hv;
2285     csv_t	csv;
2286 
2287     if (SvOK (self) && SvROK (self)) {
2288 	CSV_XS_SELF;
2289 	SetupCsv (&csv, hv, self);
2290 	ST (0) = SetDiag (&csv, xse);
2291 	}
2292     else {
2293 	last_error = xse;
2294 	ST (0) = sv_2mortal (SvDiag (xse));
2295 	}
2296 
2297     if (xse && items > 1 && SvPOK (ST (2))) {
2298 	sv_setpvn (ST (0),  SvPVX (ST (2)), SvCUR (ST (2)));
2299 	SvIOK_on  (ST (0));
2300 	}
2301 
2302     XSRETURN (1);
2303     /* XS SetDiag */
2304 
2305 void
2306 error_input (self)
2307     SV		*self
2308 
2309   PPCODE:
2310     if (self && SvOK (self) && SvROK (self) && SvTYPE (SvRV (self)) == SVt_PVHV) {
2311 	HV  *hv = (HV *)SvRV (self);
2312 	SV **sv = hv_fetchs (hv, "_ERROR_INPUT", FALSE);
2313 	if (SvOK (*sv))
2314 	    ST (0) = *sv;
2315 	else
2316 	    ST (0) = newSV (0);
2317 	}
2318     else
2319 	ST (0) = newSV (0);
2320 
2321     XSRETURN (1);
2322     /* XS error_input */
2323 
2324 void
2325 Combine (self, dst, fields, useIO)
2326     SV		*self
2327     SV		*dst
2328     SV		*fields
2329     bool	 useIO
2330 
2331   PPCODE:
2332     HV	*hv;
2333     AV	*av;
2334 
2335     CSV_XS_SELF;
2336     av = (AV *)SvRV (fields);
2337     ST (0) = xsCombine (self, hv, av, dst, useIO) ? &PL_sv_yes : &PL_sv_undef;
2338     XSRETURN (1);
2339     /* XS Combine */
2340 
2341 void
2342 Parse (self, src, fields, fflags)
2343     SV		*self
2344     SV		*src
2345     SV		*fields
2346     SV		*fflags
2347 
2348   PPCODE:
2349     HV	*hv;
2350     AV	*av;
2351     AV	*avf;
2352 
2353     CSV_XS_SELF;
2354     av  = (AV *)SvRV (fields);
2355     avf = (AV *)SvRV (fflags);
2356 
2357     ST (0) = xsParse (self, hv, av, avf, src, 0) ? &PL_sv_yes : &PL_sv_no;
2358     XSRETURN (1);
2359     /* XS Parse */
2360 
2361 void
2362 print (self, io, fields)
2363     SV		*self
2364     SV		*io
2365     SV		*fields
2366 
2367   PPCODE:
2368     HV	 *hv;
2369     AV	 *av;
2370 
2371     CSV_XS_SELF;
2372     if (fields == &PL_sv_undef)
2373 	av = newAV ();
2374     else {
2375 	unless (_is_arrayref (fields))
2376 	    croak ("Expected fields to be an array ref");
2377 
2378 	av = (AV *)SvRV (fields);
2379 	}
2380 
2381     ST (0) = xsCombine (self, hv, av, io, 1) ? &PL_sv_yes : &PL_sv_no;
2382     XSRETURN (1);
2383     /* XS print */
2384 
2385 void
2386 getline (self, io)
2387     SV		*self
2388     SV		*io
2389 
2390   PPCODE:
2391     HV	*hv;
2392     AV	*av;
2393     AV	*avf;
2394 
2395     CSV_XS_SELF;
2396     av  = newAV ();
2397     avf = newAV ();
2398     ST (0) = xsParse (self, hv, av, avf, io, 1)
2399 	? sv_2mortal (newRV_noinc ((SV *)av))
2400 	: &PL_sv_undef;
2401     XSRETURN (1);
2402     /* XS getline */
2403 
2404 void
2405 getline_all (self, io, ...)
2406     SV		*self
2407     SV		*io
2408 
2409   PPCODE:
2410     HV	*hv;
2411     SV  *offset, *length;
2412 
2413     CSV_XS_SELF;
2414 
2415     offset = items > 2 ? ST (2) : &PL_sv_undef;
2416     length = items > 3 ? ST (3) : &PL_sv_undef;
2417 
2418     ST (0) = xsParse_all (self, hv, io, offset, length);
2419     XSRETURN (1);
2420     /* XS getline_all */
2421 
2422 void
2423 _cache_set (self, idx, val)
2424     SV		*self
2425     int		 idx
2426     SV		*val
2427 
2428   PPCODE:
2429     HV	*hv;
2430 
2431     CSV_XS_SELF;
2432     xs_cache_set (hv, idx, val);
2433     XSRETURN (1);
2434     /* XS _cache_set */
2435 
2436 void
2437 _cache_diag (self)
2438     SV		*self
2439 
2440   PPCODE:
2441     HV	*hv;
2442 
2443     CSV_XS_SELF;
2444     xs_cache_diag (hv);
2445     XSRETURN (1);
2446     /* XS _cache_diag */
2447