1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 */
10 
11 #define MODULE_VERSION "1.0"
12 
13 #include "Python.h"
14 #include "structmember.h"
15 
16 
17 typedef struct {
18     PyObject *error_obj;   /* CSV exception */
19     PyObject *dialects;   /* Dialect registry */
20     long field_limit;   /* max parsed field size */
21 } _csvstate;
22 
23 #define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24 
25 static int
_csv_clear(PyObject * m)26 _csv_clear(PyObject *m)
27 {
28     Py_CLEAR(_csvstate(m)->error_obj);
29     Py_CLEAR(_csvstate(m)->dialects);
30     return 0;
31 }
32 
33 static int
_csv_traverse(PyObject * m,visitproc visit,void * arg)34 _csv_traverse(PyObject *m, visitproc visit, void *arg)
35 {
36     Py_VISIT(_csvstate(m)->error_obj);
37     Py_VISIT(_csvstate(m)->dialects);
38     return 0;
39 }
40 
41 static void
_csv_free(void * m)42 _csv_free(void *m)
43 {
44    _csv_clear((PyObject *)m);
45 }
46 
47 static struct PyModuleDef _csvmodule;
48 
49 #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
50 
51 typedef enum {
52     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
54     EAT_CRNL,AFTER_ESCAPED_CRNL
55 } ParserState;
56 
57 typedef enum {
58     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
59 } QuoteStyle;
60 
61 typedef struct {
62     QuoteStyle style;
63     const char *name;
64 } StyleDesc;
65 
66 static const StyleDesc quote_styles[] = {
67     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
68     { QUOTE_ALL,        "QUOTE_ALL" },
69     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70     { QUOTE_NONE,       "QUOTE_NONE" },
71     { 0 }
72 };
73 
74 typedef struct {
75     PyObject_HEAD
76 
77     int doublequote;            /* is " represented by ""? */
78     Py_UCS4 delimiter;       /* field separator */
79     Py_UCS4 quotechar;       /* quote character */
80     Py_UCS4 escapechar;      /* escape character */
81     int skipinitialspace;       /* ignore spaces following delimiter? */
82     PyObject *lineterminator; /* string to write between records */
83     int quoting;                /* style of quoting to write */
84 
85     int strict;                 /* raise exception on bad CSV */
86 } DialectObj;
87 
88 static PyTypeObject Dialect_Type;
89 
90 typedef struct {
91     PyObject_HEAD
92 
93     PyObject *input_iter;   /* iterate over this for input lines */
94 
95     DialectObj *dialect;    /* parsing dialect */
96 
97     PyObject *fields;           /* field list for current record */
98     ParserState state;          /* current CSV parse state */
99     Py_UCS4 *field;             /* temporary buffer */
100     Py_ssize_t field_size;      /* size of allocated buffer */
101     Py_ssize_t field_len;       /* length of current field */
102     int numeric_field;          /* treat field as numeric */
103     unsigned long line_num;     /* Source-file line number */
104 } ReaderObj;
105 
106 static PyTypeObject Reader_Type;
107 
108 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
109 
110 typedef struct {
111     PyObject_HEAD
112 
113     PyObject *writeline;    /* write output lines to this file */
114 
115     DialectObj *dialect;    /* parsing dialect */
116 
117     Py_UCS4 *rec;            /* buffer for parser.join */
118     Py_ssize_t rec_size;        /* size of allocated record */
119     Py_ssize_t rec_len;         /* length of record */
120     int num_fields;             /* number of fields in record */
121 } WriterObj;
122 
123 static PyTypeObject Writer_Type;
124 
125 /*
126  * DIALECT class
127  */
128 
129 static PyObject *
get_dialect_from_registry(PyObject * name_obj)130 get_dialect_from_registry(PyObject * name_obj)
131 {
132     PyObject *dialect_obj;
133 
134     dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
135     if (dialect_obj == NULL) {
136         if (!PyErr_Occurred())
137             PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
138     }
139     else
140         Py_INCREF(dialect_obj);
141     return dialect_obj;
142 }
143 
144 static PyObject *
get_string(PyObject * str)145 get_string(PyObject *str)
146 {
147     Py_XINCREF(str);
148     return str;
149 }
150 
151 static PyObject *
get_nullchar_as_None(Py_UCS4 c)152 get_nullchar_as_None(Py_UCS4 c)
153 {
154     if (c == '\0') {
155         Py_RETURN_NONE;
156     }
157     else
158         return PyUnicode_FromOrdinal(c);
159 }
160 
161 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))162 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
163 {
164     return get_string(self->lineterminator);
165 }
166 
167 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))168 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
169 {
170     return get_nullchar_as_None(self->delimiter);
171 }
172 
173 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))174 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
175 {
176     return get_nullchar_as_None(self->escapechar);
177 }
178 
179 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))180 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
181 {
182     return get_nullchar_as_None(self->quotechar);
183 }
184 
185 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))186 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
187 {
188     return PyLong_FromLong(self->quoting);
189 }
190 
191 static int
_set_bool(const char * name,int * target,PyObject * src,int dflt)192 _set_bool(const char *name, int *target, PyObject *src, int dflt)
193 {
194     if (src == NULL)
195         *target = dflt;
196     else {
197         int b = PyObject_IsTrue(src);
198         if (b < 0)
199             return -1;
200         *target = b;
201     }
202     return 0;
203 }
204 
205 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)206 _set_int(const char *name, int *target, PyObject *src, int dflt)
207 {
208     if (src == NULL)
209         *target = dflt;
210     else {
211         int value;
212         if (!PyLong_CheckExact(src)) {
213             PyErr_Format(PyExc_TypeError,
214                          "\"%s\" must be an integer", name);
215             return -1;
216         }
217         value = _PyLong_AsInt(src);
218         if (value == -1 && PyErr_Occurred()) {
219             return -1;
220         }
221         *target = value;
222     }
223     return 0;
224 }
225 
226 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)227 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
228 {
229     if (src == NULL)
230         *target = dflt;
231     else {
232         *target = '\0';
233         if (src != Py_None) {
234             Py_ssize_t len;
235             if (!PyUnicode_Check(src)) {
236                 PyErr_Format(PyExc_TypeError,
237                     "\"%s\" must be string, not %.200s", name,
238                     src->ob_type->tp_name);
239                 return -1;
240             }
241             len = PyUnicode_GetLength(src);
242             if (len > 1) {
243                 PyErr_Format(PyExc_TypeError,
244                     "\"%s\" must be a 1-character string",
245                     name);
246                 return -1;
247             }
248             /* PyUnicode_READY() is called in PyUnicode_GetLength() */
249             if (len > 0)
250                 *target = PyUnicode_READ_CHAR(src, 0);
251         }
252     }
253     return 0;
254 }
255 
256 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)257 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
258 {
259     if (src == NULL)
260         *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
261     else {
262         if (src == Py_None)
263             *target = NULL;
264         else if (!PyUnicode_Check(src)) {
265             PyErr_Format(PyExc_TypeError,
266                          "\"%s\" must be a string", name);
267             return -1;
268         }
269         else {
270             if (PyUnicode_READY(src) == -1)
271                 return -1;
272             Py_INCREF(src);
273             Py_XSETREF(*target, src);
274         }
275     }
276     return 0;
277 }
278 
279 static int
dialect_check_quoting(int quoting)280 dialect_check_quoting(int quoting)
281 {
282     const StyleDesc *qs;
283 
284     for (qs = quote_styles; qs->name; qs++) {
285         if ((int)qs->style == quoting)
286             return 0;
287     }
288     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
289     return -1;
290 }
291 
292 #define D_OFF(x) offsetof(DialectObj, x)
293 
294 static struct PyMemberDef Dialect_memberlist[] = {
295     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
296     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
297     { "strict",             T_INT, D_OFF(strict), READONLY },
298     { NULL }
299 };
300 
301 static PyGetSetDef Dialect_getsetlist[] = {
302     { "delimiter",          (getter)Dialect_get_delimiter},
303     { "escapechar",             (getter)Dialect_get_escapechar},
304     { "lineterminator",         (getter)Dialect_get_lineterminator},
305     { "quotechar",              (getter)Dialect_get_quotechar},
306     { "quoting",                (getter)Dialect_get_quoting},
307     {NULL},
308 };
309 
310 static void
Dialect_dealloc(DialectObj * self)311 Dialect_dealloc(DialectObj *self)
312 {
313     Py_XDECREF(self->lineterminator);
314     Py_TYPE(self)->tp_free((PyObject *)self);
315 }
316 
317 static char *dialect_kws[] = {
318     "dialect",
319     "delimiter",
320     "doublequote",
321     "escapechar",
322     "lineterminator",
323     "quotechar",
324     "quoting",
325     "skipinitialspace",
326     "strict",
327     NULL
328 };
329 
330 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)331 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
332 {
333     DialectObj *self;
334     PyObject *ret = NULL;
335     PyObject *dialect = NULL;
336     PyObject *delimiter = NULL;
337     PyObject *doublequote = NULL;
338     PyObject *escapechar = NULL;
339     PyObject *lineterminator = NULL;
340     PyObject *quotechar = NULL;
341     PyObject *quoting = NULL;
342     PyObject *skipinitialspace = NULL;
343     PyObject *strict = NULL;
344 
345     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
346                                      "|OOOOOOOOO", dialect_kws,
347                                      &dialect,
348                                      &delimiter,
349                                      &doublequote,
350                                      &escapechar,
351                                      &lineterminator,
352                                      &quotechar,
353                                      &quoting,
354                                      &skipinitialspace,
355                                      &strict))
356         return NULL;
357 
358     if (dialect != NULL) {
359         if (PyUnicode_Check(dialect)) {
360             dialect = get_dialect_from_registry(dialect);
361             if (dialect == NULL)
362                 return NULL;
363         }
364         else
365             Py_INCREF(dialect);
366         /* Can we reuse this instance? */
367         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
368             delimiter == NULL &&
369             doublequote == NULL &&
370             escapechar == NULL &&
371             lineterminator == NULL &&
372             quotechar == NULL &&
373             quoting == NULL &&
374             skipinitialspace == NULL &&
375             strict == NULL)
376             return dialect;
377     }
378 
379     self = (DialectObj *)type->tp_alloc(type, 0);
380     if (self == NULL) {
381         Py_XDECREF(dialect);
382         return NULL;
383     }
384     self->lineterminator = NULL;
385 
386     Py_XINCREF(delimiter);
387     Py_XINCREF(doublequote);
388     Py_XINCREF(escapechar);
389     Py_XINCREF(lineterminator);
390     Py_XINCREF(quotechar);
391     Py_XINCREF(quoting);
392     Py_XINCREF(skipinitialspace);
393     Py_XINCREF(strict);
394     if (dialect != NULL) {
395 #define DIALECT_GETATTR(v, n) \
396         if (v == NULL) \
397             v = PyObject_GetAttrString(dialect, n)
398         DIALECT_GETATTR(delimiter, "delimiter");
399         DIALECT_GETATTR(doublequote, "doublequote");
400         DIALECT_GETATTR(escapechar, "escapechar");
401         DIALECT_GETATTR(lineterminator, "lineterminator");
402         DIALECT_GETATTR(quotechar, "quotechar");
403         DIALECT_GETATTR(quoting, "quoting");
404         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
405         DIALECT_GETATTR(strict, "strict");
406         PyErr_Clear();
407     }
408 
409     /* check types and convert to C values */
410 #define DIASET(meth, name, target, src, dflt) \
411     if (meth(name, target, src, dflt)) \
412         goto err
413     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
414     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
415     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
416     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
417     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
418     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
419     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
420     DIASET(_set_bool, "strict", &self->strict, strict, 0);
421 
422     /* validate options */
423     if (dialect_check_quoting(self->quoting))
424         goto err;
425     if (self->delimiter == 0) {
426         PyErr_SetString(PyExc_TypeError,
427                         "\"delimiter\" must be a 1-character string");
428         goto err;
429     }
430     if (quotechar == Py_None && quoting == NULL)
431         self->quoting = QUOTE_NONE;
432     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
433         PyErr_SetString(PyExc_TypeError,
434                         "quotechar must be set if quoting enabled");
435         goto err;
436     }
437     if (self->lineterminator == 0) {
438         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
439         goto err;
440     }
441 
442     ret = (PyObject *)self;
443     Py_INCREF(self);
444 err:
445     Py_XDECREF(self);
446     Py_XDECREF(dialect);
447     Py_XDECREF(delimiter);
448     Py_XDECREF(doublequote);
449     Py_XDECREF(escapechar);
450     Py_XDECREF(lineterminator);
451     Py_XDECREF(quotechar);
452     Py_XDECREF(quoting);
453     Py_XDECREF(skipinitialspace);
454     Py_XDECREF(strict);
455     return ret;
456 }
457 
458 
459 PyDoc_STRVAR(Dialect_Type_doc,
460 "CSV dialect\n"
461 "\n"
462 "The Dialect type records CSV parsing and generation options.\n");
463 
464 static PyTypeObject Dialect_Type = {
465     PyVarObject_HEAD_INIT(NULL, 0)
466     "_csv.Dialect",                         /* tp_name */
467     sizeof(DialectObj),                     /* tp_basicsize */
468     0,                                      /* tp_itemsize */
469     /*  methods  */
470     (destructor)Dialect_dealloc,            /* tp_dealloc */
471     (printfunc)0,                           /* tp_print */
472     (getattrfunc)0,                         /* tp_getattr */
473     (setattrfunc)0,                         /* tp_setattr */
474     0,                                      /* tp_reserved */
475     (reprfunc)0,                            /* tp_repr */
476     0,                                      /* tp_as_number */
477     0,                                      /* tp_as_sequence */
478     0,                                      /* tp_as_mapping */
479     (hashfunc)0,                            /* tp_hash */
480     (ternaryfunc)0,                         /* tp_call */
481     (reprfunc)0,                                /* tp_str */
482     0,                                      /* tp_getattro */
483     0,                                      /* tp_setattro */
484     0,                                      /* tp_as_buffer */
485     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
486     Dialect_Type_doc,                       /* tp_doc */
487     0,                                      /* tp_traverse */
488     0,                                      /* tp_clear */
489     0,                                      /* tp_richcompare */
490     0,                                      /* tp_weaklistoffset */
491     0,                                      /* tp_iter */
492     0,                                      /* tp_iternext */
493     0,                                          /* tp_methods */
494     Dialect_memberlist,                     /* tp_members */
495     Dialect_getsetlist,                     /* tp_getset */
496     0,                                          /* tp_base */
497     0,                                          /* tp_dict */
498     0,                                          /* tp_descr_get */
499     0,                                          /* tp_descr_set */
500     0,                                          /* tp_dictoffset */
501     0,                                          /* tp_init */
502     0,                                          /* tp_alloc */
503     dialect_new,                                /* tp_new */
504     0,                                          /* tp_free */
505 };
506 
507 /*
508  * Return an instance of the dialect type, given a Python instance or kwarg
509  * description of the dialect
510  */
511 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)512 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
513 {
514     PyObject *type = (PyObject *)&Dialect_Type;
515     if (dialect_inst) {
516         return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
517     }
518     else {
519         return _PyObject_FastCallDict(type, NULL, 0, kwargs);
520     }
521 }
522 
523 /*
524  * READER
525  */
526 static int
parse_save_field(ReaderObj * self)527 parse_save_field(ReaderObj *self)
528 {
529     PyObject *field;
530 
531     field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
532                                       (void *) self->field, self->field_len);
533     if (field == NULL)
534         return -1;
535     self->field_len = 0;
536     if (self->numeric_field) {
537         PyObject *tmp;
538 
539         self->numeric_field = 0;
540         tmp = PyNumber_Float(field);
541         Py_DECREF(field);
542         if (tmp == NULL)
543             return -1;
544         field = tmp;
545     }
546     if (PyList_Append(self->fields, field) < 0) {
547         Py_DECREF(field);
548         return -1;
549     }
550     Py_DECREF(field);
551     return 0;
552 }
553 
554 static int
parse_grow_buff(ReaderObj * self)555 parse_grow_buff(ReaderObj *self)
556 {
557     assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
558 
559     Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
560     Py_UCS4 *field_new = self->field;
561     PyMem_Resize(field_new, Py_UCS4, field_size_new);
562     if (field_new == NULL) {
563         PyErr_NoMemory();
564         return 0;
565     }
566     self->field = field_new;
567     self->field_size = field_size_new;
568     return 1;
569 }
570 
571 static int
parse_add_char(ReaderObj * self,Py_UCS4 c)572 parse_add_char(ReaderObj *self, Py_UCS4 c)
573 {
574     if (self->field_len >= _csvstate_global->field_limit) {
575         PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
576                      _csvstate_global->field_limit);
577         return -1;
578     }
579     if (self->field_len == self->field_size && !parse_grow_buff(self))
580         return -1;
581     self->field[self->field_len++] = c;
582     return 0;
583 }
584 
585 static int
parse_process_char(ReaderObj * self,Py_UCS4 c)586 parse_process_char(ReaderObj *self, Py_UCS4 c)
587 {
588     DialectObj *dialect = self->dialect;
589 
590     switch (self->state) {
591     case START_RECORD:
592         /* start of record */
593         if (c == '\0')
594             /* empty line - return [] */
595             break;
596         else if (c == '\n' || c == '\r') {
597             self->state = EAT_CRNL;
598             break;
599         }
600         /* normal character - handle as START_FIELD */
601         self->state = START_FIELD;
602         /* fallthru */
603     case START_FIELD:
604         /* expecting field */
605         if (c == '\n' || c == '\r' || c == '\0') {
606             /* save empty field - return [fields] */
607             if (parse_save_field(self) < 0)
608                 return -1;
609             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
610         }
611         else if (c == dialect->quotechar &&
612                  dialect->quoting != QUOTE_NONE) {
613             /* start quoted field */
614             self->state = IN_QUOTED_FIELD;
615         }
616         else if (c == dialect->escapechar) {
617             /* possible escaped character */
618             self->state = ESCAPED_CHAR;
619         }
620         else if (c == ' ' && dialect->skipinitialspace)
621             /* ignore space at start of field */
622             ;
623         else if (c == dialect->delimiter) {
624             /* save empty field */
625             if (parse_save_field(self) < 0)
626                 return -1;
627         }
628         else {
629             /* begin new unquoted field */
630             if (dialect->quoting == QUOTE_NONNUMERIC)
631                 self->numeric_field = 1;
632             if (parse_add_char(self, c) < 0)
633                 return -1;
634             self->state = IN_FIELD;
635         }
636         break;
637 
638     case ESCAPED_CHAR:
639         if (c == '\n' || c=='\r') {
640             if (parse_add_char(self, c) < 0)
641                 return -1;
642             self->state = AFTER_ESCAPED_CRNL;
643             break;
644         }
645         if (c == '\0')
646             c = '\n';
647         if (parse_add_char(self, c) < 0)
648             return -1;
649         self->state = IN_FIELD;
650         break;
651 
652     case AFTER_ESCAPED_CRNL:
653         if (c == '\0')
654             break;
655         /*fallthru*/
656 
657     case IN_FIELD:
658         /* in unquoted field */
659         if (c == '\n' || c == '\r' || c == '\0') {
660             /* end of line - return [fields] */
661             if (parse_save_field(self) < 0)
662                 return -1;
663             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
664         }
665         else if (c == dialect->escapechar) {
666             /* possible escaped character */
667             self->state = ESCAPED_CHAR;
668         }
669         else if (c == dialect->delimiter) {
670             /* save field - wait for new field */
671             if (parse_save_field(self) < 0)
672                 return -1;
673             self->state = START_FIELD;
674         }
675         else {
676             /* normal character - save in field */
677             if (parse_add_char(self, c) < 0)
678                 return -1;
679         }
680         break;
681 
682     case IN_QUOTED_FIELD:
683         /* in quoted field */
684         if (c == '\0')
685             ;
686         else if (c == dialect->escapechar) {
687             /* Possible escape character */
688             self->state = ESCAPE_IN_QUOTED_FIELD;
689         }
690         else if (c == dialect->quotechar &&
691                  dialect->quoting != QUOTE_NONE) {
692             if (dialect->doublequote) {
693                 /* doublequote; " represented by "" */
694                 self->state = QUOTE_IN_QUOTED_FIELD;
695             }
696             else {
697                 /* end of quote part of field */
698                 self->state = IN_FIELD;
699             }
700         }
701         else {
702             /* normal character - save in field */
703             if (parse_add_char(self, c) < 0)
704                 return -1;
705         }
706         break;
707 
708     case ESCAPE_IN_QUOTED_FIELD:
709         if (c == '\0')
710             c = '\n';
711         if (parse_add_char(self, c) < 0)
712             return -1;
713         self->state = IN_QUOTED_FIELD;
714         break;
715 
716     case QUOTE_IN_QUOTED_FIELD:
717         /* doublequote - seen a quote in a quoted field */
718         if (dialect->quoting != QUOTE_NONE &&
719             c == dialect->quotechar) {
720             /* save "" as " */
721             if (parse_add_char(self, c) < 0)
722                 return -1;
723             self->state = IN_QUOTED_FIELD;
724         }
725         else if (c == dialect->delimiter) {
726             /* save field - wait for new field */
727             if (parse_save_field(self) < 0)
728                 return -1;
729             self->state = START_FIELD;
730         }
731         else if (c == '\n' || c == '\r' || c == '\0') {
732             /* end of line - return [fields] */
733             if (parse_save_field(self) < 0)
734                 return -1;
735             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
736         }
737         else if (!dialect->strict) {
738             if (parse_add_char(self, c) < 0)
739                 return -1;
740             self->state = IN_FIELD;
741         }
742         else {
743             /* illegal */
744             PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
745                             dialect->delimiter,
746                             dialect->quotechar);
747             return -1;
748         }
749         break;
750 
751     case EAT_CRNL:
752         if (c == '\n' || c == '\r')
753             ;
754         else if (c == '\0')
755             self->state = START_RECORD;
756         else {
757             PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
758             return -1;
759         }
760         break;
761 
762     }
763     return 0;
764 }
765 
766 static int
parse_reset(ReaderObj * self)767 parse_reset(ReaderObj *self)
768 {
769     Py_XSETREF(self->fields, PyList_New(0));
770     if (self->fields == NULL)
771         return -1;
772     self->field_len = 0;
773     self->state = START_RECORD;
774     self->numeric_field = 0;
775     return 0;
776 }
777 
778 static PyObject *
Reader_iternext(ReaderObj * self)779 Reader_iternext(ReaderObj *self)
780 {
781     PyObject *fields = NULL;
782     Py_UCS4 c;
783     Py_ssize_t pos, linelen;
784     unsigned int kind;
785     void *data;
786     PyObject *lineobj;
787 
788     if (parse_reset(self) < 0)
789         return NULL;
790     do {
791         lineobj = PyIter_Next(self->input_iter);
792         if (lineobj == NULL) {
793             /* End of input OR exception */
794             if (!PyErr_Occurred() && (self->field_len != 0 ||
795                                       self->state == IN_QUOTED_FIELD)) {
796                 if (self->dialect->strict)
797                     PyErr_SetString(_csvstate_global->error_obj,
798                                     "unexpected end of data");
799                 else if (parse_save_field(self) >= 0)
800                     break;
801             }
802             return NULL;
803         }
804         if (!PyUnicode_Check(lineobj)) {
805             PyErr_Format(_csvstate_global->error_obj,
806                          "iterator should return strings, "
807                          "not %.200s "
808                          "(did you open the file in text mode?)",
809                          lineobj->ob_type->tp_name
810                 );
811             Py_DECREF(lineobj);
812             return NULL;
813         }
814         if (PyUnicode_READY(lineobj) == -1) {
815             Py_DECREF(lineobj);
816             return NULL;
817         }
818         ++self->line_num;
819         kind = PyUnicode_KIND(lineobj);
820         data = PyUnicode_DATA(lineobj);
821         pos = 0;
822         linelen = PyUnicode_GET_LENGTH(lineobj);
823         while (linelen--) {
824             c = PyUnicode_READ(kind, data, pos);
825             if (c == '\0') {
826                 Py_DECREF(lineobj);
827                 PyErr_Format(_csvstate_global->error_obj,
828                              "line contains NUL");
829                 goto err;
830             }
831             if (parse_process_char(self, c) < 0) {
832                 Py_DECREF(lineobj);
833                 goto err;
834             }
835             pos++;
836         }
837         Py_DECREF(lineobj);
838         if (parse_process_char(self, 0) < 0)
839             goto err;
840     } while (self->state != START_RECORD);
841 
842     fields = self->fields;
843     self->fields = NULL;
844 err:
845     return fields;
846 }
847 
848 static void
Reader_dealloc(ReaderObj * self)849 Reader_dealloc(ReaderObj *self)
850 {
851     PyObject_GC_UnTrack(self);
852     Py_XDECREF(self->dialect);
853     Py_XDECREF(self->input_iter);
854     Py_XDECREF(self->fields);
855     if (self->field != NULL)
856         PyMem_Free(self->field);
857     PyObject_GC_Del(self);
858 }
859 
860 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)861 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
862 {
863     Py_VISIT(self->dialect);
864     Py_VISIT(self->input_iter);
865     Py_VISIT(self->fields);
866     return 0;
867 }
868 
869 static int
Reader_clear(ReaderObj * self)870 Reader_clear(ReaderObj *self)
871 {
872     Py_CLEAR(self->dialect);
873     Py_CLEAR(self->input_iter);
874     Py_CLEAR(self->fields);
875     return 0;
876 }
877 
878 PyDoc_STRVAR(Reader_Type_doc,
879 "CSV reader\n"
880 "\n"
881 "Reader objects are responsible for reading and parsing tabular data\n"
882 "in CSV format.\n"
883 );
884 
885 static struct PyMethodDef Reader_methods[] = {
886     { NULL, NULL }
887 };
888 #define R_OFF(x) offsetof(ReaderObj, x)
889 
890 static struct PyMemberDef Reader_memberlist[] = {
891     { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
892     { "line_num", T_ULONG, R_OFF(line_num), READONLY },
893     { NULL }
894 };
895 
896 
897 static PyTypeObject Reader_Type = {
898     PyVarObject_HEAD_INIT(NULL, 0)
899     "_csv.reader",                          /*tp_name*/
900     sizeof(ReaderObj),                      /*tp_basicsize*/
901     0,                                      /*tp_itemsize*/
902     /* methods */
903     (destructor)Reader_dealloc,             /*tp_dealloc*/
904     (printfunc)0,                           /*tp_print*/
905     (getattrfunc)0,                         /*tp_getattr*/
906     (setattrfunc)0,                         /*tp_setattr*/
907     0,                                     /*tp_reserved*/
908     (reprfunc)0,                            /*tp_repr*/
909     0,                                      /*tp_as_number*/
910     0,                                      /*tp_as_sequence*/
911     0,                                      /*tp_as_mapping*/
912     (hashfunc)0,                            /*tp_hash*/
913     (ternaryfunc)0,                         /*tp_call*/
914     (reprfunc)0,                                /*tp_str*/
915     0,                                      /*tp_getattro*/
916     0,                                      /*tp_setattro*/
917     0,                                      /*tp_as_buffer*/
918     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
919         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
920     Reader_Type_doc,                        /*tp_doc*/
921     (traverseproc)Reader_traverse,          /*tp_traverse*/
922     (inquiry)Reader_clear,                  /*tp_clear*/
923     0,                                      /*tp_richcompare*/
924     0,                                      /*tp_weaklistoffset*/
925     PyObject_SelfIter,                          /*tp_iter*/
926     (getiterfunc)Reader_iternext,           /*tp_iternext*/
927     Reader_methods,                         /*tp_methods*/
928     Reader_memberlist,                      /*tp_members*/
929     0,                                      /*tp_getset*/
930 
931 };
932 
933 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)934 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
935 {
936     PyObject * iterator, * dialect = NULL;
937     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
938 
939     if (!self)
940         return NULL;
941 
942     self->dialect = NULL;
943     self->fields = NULL;
944     self->input_iter = NULL;
945     self->field = NULL;
946     self->field_size = 0;
947     self->line_num = 0;
948 
949     if (parse_reset(self) < 0) {
950         Py_DECREF(self);
951         return NULL;
952     }
953 
954     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
955         Py_DECREF(self);
956         return NULL;
957     }
958     self->input_iter = PyObject_GetIter(iterator);
959     if (self->input_iter == NULL) {
960         PyErr_SetString(PyExc_TypeError,
961                         "argument 1 must be an iterator");
962         Py_DECREF(self);
963         return NULL;
964     }
965     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
966     if (self->dialect == NULL) {
967         Py_DECREF(self);
968         return NULL;
969     }
970 
971     PyObject_GC_Track(self);
972     return (PyObject *)self;
973 }
974 
975 /*
976  * WRITER
977  */
978 /* ---------------------------------------------------------------- */
979 static void
join_reset(WriterObj * self)980 join_reset(WriterObj *self)
981 {
982     self->rec_len = 0;
983     self->num_fields = 0;
984 }
985 
986 #define MEM_INCR 32768
987 
988 /* Calculate new record length or append field to record.  Return new
989  * record length.
990  */
991 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)992 join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
993                  Py_ssize_t field_len, int *quoted,
994                  int copy_phase)
995 {
996     DialectObj *dialect = self->dialect;
997     int i;
998     Py_ssize_t rec_len;
999 
1000 #define INCLEN \
1001     do {\
1002         if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
1003             goto overflow; \
1004         } \
1005         rec_len++; \
1006     } while(0)
1007 
1008 #define ADDCH(c)                                \
1009     do {\
1010         if (copy_phase) \
1011             self->rec[rec_len] = c;\
1012         INCLEN;\
1013     } while(0)
1014 
1015     rec_len = self->rec_len;
1016 
1017     /* If this is not the first field we need a field separator */
1018     if (self->num_fields > 0)
1019         ADDCH(dialect->delimiter);
1020 
1021     /* Handle preceding quote */
1022     if (copy_phase && *quoted)
1023         ADDCH(dialect->quotechar);
1024 
1025     /* Copy/count field data */
1026     /* If field is null just pass over */
1027     for (i = 0; field_data && (i < field_len); i++) {
1028         Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1029         int want_escape = 0;
1030 
1031         if (c == dialect->delimiter ||
1032             c == dialect->escapechar ||
1033             c == dialect->quotechar  ||
1034             PyUnicode_FindChar(
1035                 dialect->lineterminator, c, 0,
1036                 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1037             if (dialect->quoting == QUOTE_NONE)
1038                 want_escape = 1;
1039             else {
1040                 if (c == dialect->quotechar) {
1041                     if (dialect->doublequote)
1042                         ADDCH(dialect->quotechar);
1043                     else
1044                         want_escape = 1;
1045                 }
1046                 if (!want_escape)
1047                     *quoted = 1;
1048             }
1049             if (want_escape) {
1050                 if (!dialect->escapechar) {
1051                     PyErr_Format(_csvstate_global->error_obj,
1052                                  "need to escape, but no escapechar set");
1053                     return -1;
1054                 }
1055                 ADDCH(dialect->escapechar);
1056             }
1057         }
1058         /* Copy field character into record buffer.
1059          */
1060         ADDCH(c);
1061     }
1062 
1063     if (*quoted) {
1064         if (copy_phase)
1065             ADDCH(dialect->quotechar);
1066         else {
1067             INCLEN; /* starting quote */
1068             INCLEN; /* ending quote */
1069         }
1070     }
1071     return rec_len;
1072 
1073   overflow:
1074     PyErr_NoMemory();
1075     return -1;
1076 #undef ADDCH
1077 #undef INCLEN
1078 }
1079 
1080 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1081 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1082 {
1083     assert(rec_len >= 0);
1084 
1085     if (rec_len > self->rec_size) {
1086         size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1087         Py_UCS4 *rec_new = self->rec;
1088         PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1089         if (rec_new == NULL) {
1090             PyErr_NoMemory();
1091             return 0;
1092         }
1093         self->rec = rec_new;
1094         self->rec_size = (Py_ssize_t)rec_size_new;
1095     }
1096     return 1;
1097 }
1098 
1099 static int
join_append(WriterObj * self,PyObject * field,int quoted)1100 join_append(WriterObj *self, PyObject *field, int quoted)
1101 {
1102     unsigned int field_kind = -1;
1103     void *field_data = NULL;
1104     Py_ssize_t field_len = 0;
1105     Py_ssize_t rec_len;
1106 
1107     if (field != NULL) {
1108         if (PyUnicode_READY(field) == -1)
1109             return 0;
1110         field_kind = PyUnicode_KIND(field);
1111         field_data = PyUnicode_DATA(field);
1112         field_len = PyUnicode_GET_LENGTH(field);
1113     }
1114     rec_len = join_append_data(self, field_kind, field_data, field_len,
1115                                &quoted, 0);
1116     if (rec_len < 0)
1117         return 0;
1118 
1119     /* grow record buffer if necessary */
1120     if (!join_check_rec_size(self, rec_len))
1121         return 0;
1122 
1123     self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1124                                      &quoted, 1);
1125     self->num_fields++;
1126 
1127     return 1;
1128 }
1129 
1130 static int
join_append_lineterminator(WriterObj * self)1131 join_append_lineterminator(WriterObj *self)
1132 {
1133     Py_ssize_t terminator_len, i;
1134     unsigned int term_kind;
1135     void *term_data;
1136 
1137     terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1138     if (terminator_len == -1)
1139         return 0;
1140 
1141     /* grow record buffer if necessary */
1142     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1143         return 0;
1144 
1145     term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1146     term_data = PyUnicode_DATA(self->dialect->lineterminator);
1147     for (i = 0; i < terminator_len; i++)
1148         self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1149     self->rec_len += terminator_len;
1150 
1151     return 1;
1152 }
1153 
1154 PyDoc_STRVAR(csv_writerow_doc,
1155 "writerow(iterable)\n"
1156 "\n"
1157 "Construct and write a CSV record from an iterable of fields.  Non-string\n"
1158 "elements will be converted to string.");
1159 
1160 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1161 csv_writerow(WriterObj *self, PyObject *seq)
1162 {
1163     DialectObj *dialect = self->dialect;
1164     PyObject *iter, *field, *line, *result;
1165 
1166     iter = PyObject_GetIter(seq);
1167     if (iter == NULL)
1168         return PyErr_Format(_csvstate_global->error_obj,
1169                             "iterable expected, not %.200s",
1170                             seq->ob_type->tp_name);
1171 
1172     /* Join all fields in internal buffer.
1173      */
1174     join_reset(self);
1175     while ((field = PyIter_Next(iter))) {
1176         int append_ok;
1177         int quoted;
1178 
1179         switch (dialect->quoting) {
1180         case QUOTE_NONNUMERIC:
1181             quoted = !PyNumber_Check(field);
1182             break;
1183         case QUOTE_ALL:
1184             quoted = 1;
1185             break;
1186         default:
1187             quoted = 0;
1188             break;
1189         }
1190 
1191         if (PyUnicode_Check(field)) {
1192             append_ok = join_append(self, field, quoted);
1193             Py_DECREF(field);
1194         }
1195         else if (field == Py_None) {
1196             append_ok = join_append(self, NULL, quoted);
1197             Py_DECREF(field);
1198         }
1199         else {
1200             PyObject *str;
1201 
1202             str = PyObject_Str(field);
1203             Py_DECREF(field);
1204             if (str == NULL) {
1205                 Py_DECREF(iter);
1206                 return NULL;
1207             }
1208             append_ok = join_append(self, str, quoted);
1209             Py_DECREF(str);
1210         }
1211         if (!append_ok) {
1212             Py_DECREF(iter);
1213             return NULL;
1214         }
1215     }
1216     Py_DECREF(iter);
1217     if (PyErr_Occurred())
1218         return NULL;
1219 
1220     if (self->num_fields > 0 && self->rec_len == 0) {
1221         if (dialect->quoting == QUOTE_NONE) {
1222             PyErr_Format(_csvstate_global->error_obj,
1223                 "single empty field record must be quoted");
1224             return NULL;
1225         }
1226         self->num_fields--;
1227         if (!join_append(self, NULL, 1))
1228             return NULL;
1229     }
1230 
1231     /* Add line terminator.
1232      */
1233     if (!join_append_lineterminator(self))
1234         return NULL;
1235 
1236     line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1237                                      (void *) self->rec, self->rec_len);
1238     if (line == NULL)
1239         return NULL;
1240     result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1241     Py_DECREF(line);
1242     return result;
1243 }
1244 
1245 PyDoc_STRVAR(csv_writerows_doc,
1246 "writerows(iterable of iterables)\n"
1247 "\n"
1248 "Construct and write a series of iterables to a csv file.  Non-string\n"
1249 "elements will be converted to string.");
1250 
1251 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1252 csv_writerows(WriterObj *self, PyObject *seqseq)
1253 {
1254     PyObject *row_iter, *row_obj, *result;
1255 
1256     row_iter = PyObject_GetIter(seqseq);
1257     if (row_iter == NULL) {
1258         PyErr_SetString(PyExc_TypeError,
1259                         "writerows() argument must be iterable");
1260         return NULL;
1261     }
1262     while ((row_obj = PyIter_Next(row_iter))) {
1263         result = csv_writerow(self, row_obj);
1264         Py_DECREF(row_obj);
1265         if (!result) {
1266             Py_DECREF(row_iter);
1267             return NULL;
1268         }
1269         else
1270              Py_DECREF(result);
1271     }
1272     Py_DECREF(row_iter);
1273     if (PyErr_Occurred())
1274         return NULL;
1275     Py_RETURN_NONE;
1276 }
1277 
1278 static struct PyMethodDef Writer_methods[] = {
1279     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1280     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1281     { NULL, NULL }
1282 };
1283 
1284 #define W_OFF(x) offsetof(WriterObj, x)
1285 
1286 static struct PyMemberDef Writer_memberlist[] = {
1287     { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1288     { NULL }
1289 };
1290 
1291 static void
Writer_dealloc(WriterObj * self)1292 Writer_dealloc(WriterObj *self)
1293 {
1294     PyObject_GC_UnTrack(self);
1295     Py_XDECREF(self->dialect);
1296     Py_XDECREF(self->writeline);
1297     if (self->rec != NULL)
1298         PyMem_Free(self->rec);
1299     PyObject_GC_Del(self);
1300 }
1301 
1302 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1303 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1304 {
1305     Py_VISIT(self->dialect);
1306     Py_VISIT(self->writeline);
1307     return 0;
1308 }
1309 
1310 static int
Writer_clear(WriterObj * self)1311 Writer_clear(WriterObj *self)
1312 {
1313     Py_CLEAR(self->dialect);
1314     Py_CLEAR(self->writeline);
1315     return 0;
1316 }
1317 
1318 PyDoc_STRVAR(Writer_Type_doc,
1319 "CSV writer\n"
1320 "\n"
1321 "Writer objects are responsible for generating tabular data\n"
1322 "in CSV format from sequence input.\n"
1323 );
1324 
1325 static PyTypeObject Writer_Type = {
1326     PyVarObject_HEAD_INIT(NULL, 0)
1327     "_csv.writer",                          /*tp_name*/
1328     sizeof(WriterObj),                      /*tp_basicsize*/
1329     0,                                      /*tp_itemsize*/
1330     /* methods */
1331     (destructor)Writer_dealloc,             /*tp_dealloc*/
1332     (printfunc)0,                           /*tp_print*/
1333     (getattrfunc)0,                         /*tp_getattr*/
1334     (setattrfunc)0,                         /*tp_setattr*/
1335     0,                                      /*tp_reserved*/
1336     (reprfunc)0,                            /*tp_repr*/
1337     0,                                      /*tp_as_number*/
1338     0,                                      /*tp_as_sequence*/
1339     0,                                      /*tp_as_mapping*/
1340     (hashfunc)0,                            /*tp_hash*/
1341     (ternaryfunc)0,                         /*tp_call*/
1342     (reprfunc)0,                            /*tp_str*/
1343     0,                                      /*tp_getattro*/
1344     0,                                      /*tp_setattro*/
1345     0,                                      /*tp_as_buffer*/
1346     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1347         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1348     Writer_Type_doc,
1349     (traverseproc)Writer_traverse,          /*tp_traverse*/
1350     (inquiry)Writer_clear,                  /*tp_clear*/
1351     0,                                      /*tp_richcompare*/
1352     0,                                      /*tp_weaklistoffset*/
1353     (getiterfunc)0,                         /*tp_iter*/
1354     (getiterfunc)0,                         /*tp_iternext*/
1355     Writer_methods,                         /*tp_methods*/
1356     Writer_memberlist,                      /*tp_members*/
1357     0,                                      /*tp_getset*/
1358 };
1359 
1360 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1361 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1362 {
1363     PyObject * output_file, * dialect = NULL;
1364     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1365     _Py_IDENTIFIER(write);
1366 
1367     if (!self)
1368         return NULL;
1369 
1370     self->dialect = NULL;
1371     self->writeline = NULL;
1372 
1373     self->rec = NULL;
1374     self->rec_size = 0;
1375     self->rec_len = 0;
1376     self->num_fields = 0;
1377 
1378     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1379         Py_DECREF(self);
1380         return NULL;
1381     }
1382     self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
1383     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1384         PyErr_SetString(PyExc_TypeError,
1385                         "argument 1 must have a \"write\" method");
1386         Py_DECREF(self);
1387         return NULL;
1388     }
1389     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1390     if (self->dialect == NULL) {
1391         Py_DECREF(self);
1392         return NULL;
1393     }
1394     PyObject_GC_Track(self);
1395     return (PyObject *)self;
1396 }
1397 
1398 /*
1399  * DIALECT REGISTRY
1400  */
1401 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1402 csv_list_dialects(PyObject *module, PyObject *args)
1403 {
1404     return PyDict_Keys(_csvstate_global->dialects);
1405 }
1406 
1407 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1408 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1409 {
1410     PyObject *name_obj, *dialect_obj = NULL;
1411     PyObject *dialect;
1412 
1413     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1414         return NULL;
1415     if (!PyUnicode_Check(name_obj)) {
1416         PyErr_SetString(PyExc_TypeError,
1417                         "dialect name must be a string");
1418         return NULL;
1419     }
1420     if (PyUnicode_READY(name_obj) == -1)
1421         return NULL;
1422     dialect = _call_dialect(dialect_obj, kwargs);
1423     if (dialect == NULL)
1424         return NULL;
1425     if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
1426         Py_DECREF(dialect);
1427         return NULL;
1428     }
1429     Py_DECREF(dialect);
1430     Py_RETURN_NONE;
1431 }
1432 
1433 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1434 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1435 {
1436     if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1437         return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1438     Py_RETURN_NONE;
1439 }
1440 
1441 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1442 csv_get_dialect(PyObject *module, PyObject *name_obj)
1443 {
1444     return get_dialect_from_registry(name_obj);
1445 }
1446 
1447 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1448 csv_field_size_limit(PyObject *module, PyObject *args)
1449 {
1450     PyObject *new_limit = NULL;
1451     long old_limit = _csvstate_global->field_limit;
1452 
1453     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1454         return NULL;
1455     if (new_limit != NULL) {
1456         if (!PyLong_CheckExact(new_limit)) {
1457             PyErr_Format(PyExc_TypeError,
1458                          "limit must be an integer");
1459             return NULL;
1460         }
1461         _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1462         if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1463             _csvstate_global->field_limit = old_limit;
1464             return NULL;
1465         }
1466     }
1467     return PyLong_FromLong(old_limit);
1468 }
1469 
1470 /*
1471  * MODULE
1472  */
1473 
1474 PyDoc_STRVAR(csv_module_doc,
1475 "CSV parsing and writing.\n"
1476 "\n"
1477 "This module provides classes that assist in the reading and writing\n"
1478 "of Comma Separated Value (CSV) files, and implements the interface\n"
1479 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1480 "the format is not formally defined by a stable specification and\n"
1481 "is subtle enough that parsing lines of a CSV file with something\n"
1482 "like line.split(\",\") is bound to fail.  The module supports three\n"
1483 "basic APIs: reading, writing, and registration of dialects.\n"
1484 "\n"
1485 "\n"
1486 "DIALECT REGISTRATION:\n"
1487 "\n"
1488 "Readers and writers support a dialect argument, which is a convenient\n"
1489 "handle on a group of settings.  When the dialect argument is a string,\n"
1490 "it identifies one of the dialects previously registered with the module.\n"
1491 "If it is a class or instance, the attributes of the argument are used as\n"
1492 "the settings for the reader or writer:\n"
1493 "\n"
1494 "    class excel:\n"
1495 "        delimiter = ','\n"
1496 "        quotechar = '\"'\n"
1497 "        escapechar = None\n"
1498 "        doublequote = True\n"
1499 "        skipinitialspace = False\n"
1500 "        lineterminator = '\\r\\n'\n"
1501 "        quoting = QUOTE_MINIMAL\n"
1502 "\n"
1503 "SETTINGS:\n"
1504 "\n"
1505 "    * quotechar - specifies a one-character string to use as the \n"
1506 "        quoting character.  It defaults to '\"'.\n"
1507 "    * delimiter - specifies a one-character string to use as the \n"
1508 "        field separator.  It defaults to ','.\n"
1509 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1510 "        immediately follows a delimiter.  It defaults to False, which\n"
1511 "        means that whitespace immediately following a delimiter is part\n"
1512 "        of the following field.\n"
1513 "    * lineterminator -  specifies the character sequence which should \n"
1514 "        terminate rows.\n"
1515 "    * quoting - controls when quotes should be generated by the writer.\n"
1516 "        It can take on any of the following module constants:\n"
1517 "\n"
1518 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1519 "            field contains either the quotechar or the delimiter\n"
1520 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1521 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1522 "            fields which do not parse as integers or floating point\n"
1523 "            numbers.\n"
1524 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1525 "    * escapechar - specifies a one-character string used to escape \n"
1526 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1527 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1528 "        True, two consecutive quotes are interpreted as one during read,\n"
1529 "        and when writing, each quote character embedded in the data is\n"
1530 "        written as two quotes\n");
1531 
1532 PyDoc_STRVAR(csv_reader_doc,
1533 "    csv_reader = reader(iterable [, dialect='excel']\n"
1534 "                        [optional keyword args])\n"
1535 "    for row in csv_reader:\n"
1536 "        process(row)\n"
1537 "\n"
1538 "The \"iterable\" argument can be any object that returns a line\n"
1539 "of input for each iteration, such as a file object or a list.  The\n"
1540 "optional \"dialect\" parameter is discussed below.  The function\n"
1541 "also accepts optional keyword arguments which override settings\n"
1542 "provided by the dialect.\n"
1543 "\n"
1544 "The returned object is an iterator.  Each iteration returns a row\n"
1545 "of the CSV file (which can span multiple input lines).\n");
1546 
1547 PyDoc_STRVAR(csv_writer_doc,
1548 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1549 "                            [optional keyword args])\n"
1550 "    for row in sequence:\n"
1551 "        csv_writer.writerow(row)\n"
1552 "\n"
1553 "    [or]\n"
1554 "\n"
1555 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1556 "                            [optional keyword args])\n"
1557 "    csv_writer.writerows(rows)\n"
1558 "\n"
1559 "The \"fileobj\" argument can be any object that supports the file API.\n");
1560 
1561 PyDoc_STRVAR(csv_list_dialects_doc,
1562 "Return a list of all know dialect names.\n"
1563 "    names = csv.list_dialects()");
1564 
1565 PyDoc_STRVAR(csv_get_dialect_doc,
1566 "Return the dialect instance associated with name.\n"
1567 "    dialect = csv.get_dialect(name)");
1568 
1569 PyDoc_STRVAR(csv_register_dialect_doc,
1570 "Create a mapping from a string name to a dialect class.\n"
1571 "    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1572 
1573 PyDoc_STRVAR(csv_unregister_dialect_doc,
1574 "Delete the name/dialect mapping associated with a string name.\n"
1575 "    csv.unregister_dialect(name)");
1576 
1577 PyDoc_STRVAR(csv_field_size_limit_doc,
1578 "Sets an upper limit on parsed fields.\n"
1579 "    csv.field_size_limit([limit])\n"
1580 "\n"
1581 "Returns old limit. If limit is not given, no new limit is set and\n"
1582 "the old limit is returned");
1583 
1584 static struct PyMethodDef csv_methods[] = {
1585     { "reader", (PyCFunction)csv_reader,
1586         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1587     { "writer", (PyCFunction)csv_writer,
1588         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1589     { "list_dialects", (PyCFunction)csv_list_dialects,
1590         METH_NOARGS, csv_list_dialects_doc},
1591     { "register_dialect", (PyCFunction)csv_register_dialect,
1592         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1593     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1594         METH_O, csv_unregister_dialect_doc},
1595     { "get_dialect", (PyCFunction)csv_get_dialect,
1596         METH_O, csv_get_dialect_doc},
1597     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1598         METH_VARARGS, csv_field_size_limit_doc},
1599     { NULL, NULL }
1600 };
1601 
1602 static struct PyModuleDef _csvmodule = {
1603     PyModuleDef_HEAD_INIT,
1604     "_csv",
1605     csv_module_doc,
1606     sizeof(_csvstate),
1607     csv_methods,
1608     NULL,
1609     _csv_traverse,
1610     _csv_clear,
1611     _csv_free
1612 };
1613 
1614 PyMODINIT_FUNC
PyInit__csv(void)1615 PyInit__csv(void)
1616 {
1617     PyObject *module;
1618     const StyleDesc *style;
1619 
1620     if (PyType_Ready(&Dialect_Type) < 0)
1621         return NULL;
1622 
1623     if (PyType_Ready(&Reader_Type) < 0)
1624         return NULL;
1625 
1626     if (PyType_Ready(&Writer_Type) < 0)
1627         return NULL;
1628 
1629     /* Create the module and add the functions */
1630     module = PyModule_Create(&_csvmodule);
1631     if (module == NULL)
1632         return NULL;
1633 
1634     /* Add version to the module. */
1635     if (PyModule_AddStringConstant(module, "__version__",
1636                                    MODULE_VERSION) == -1)
1637         return NULL;
1638 
1639     /* Set the field limit */
1640     _csvstate(module)->field_limit = 128 * 1024;
1641     /* Do I still need to add this var to the Module Dict? */
1642 
1643     /* Add _dialects dictionary */
1644     _csvstate(module)->dialects = PyDict_New();
1645     if (_csvstate(module)->dialects == NULL)
1646         return NULL;
1647     Py_INCREF(_csvstate(module)->dialects);
1648     if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
1649         return NULL;
1650 
1651     /* Add quote styles into dictionary */
1652     for (style = quote_styles; style->name; style++) {
1653         if (PyModule_AddIntConstant(module, style->name,
1654                                     style->style) == -1)
1655             return NULL;
1656     }
1657 
1658     /* Add the Dialect type */
1659     Py_INCREF(&Dialect_Type);
1660     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1661         return NULL;
1662 
1663     /* Add the CSV exception object to the module. */
1664     _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1665     if (_csvstate(module)->error_obj == NULL)
1666         return NULL;
1667     Py_INCREF(_csvstate(module)->error_obj);
1668     PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
1669     return module;
1670 }
1671