1 /* csv module */
2
3 /*
4
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
7 module instead.
8
9 */
10
11 #define MODULE_VERSION "1.0"
12
13 #include "Python.h"
14 #include "structmember.h"
15
16
17 typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21 } _csvstate;
22
23 #define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25 static int
_csv_clear(PyObject * m)26 _csv_clear(PyObject *m)
27 {
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31 }
32
33 static int
_csv_traverse(PyObject * m,visitproc visit,void * arg)34 _csv_traverse(PyObject *m, visitproc visit, void *arg)
35 {
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39 }
40
41 static void
_csv_free(void * m)42 _csv_free(void *m)
43 {
44 _csv_clear((PyObject *)m);
45 }
46
47 static struct PyModuleDef _csvmodule;
48
49 #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
50
51 typedef enum {
52 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
54 EAT_CRNL,AFTER_ESCAPED_CRNL
55 } ParserState;
56
57 typedef enum {
58 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
59 } QuoteStyle;
60
61 typedef struct {
62 QuoteStyle style;
63 const char *name;
64 } StyleDesc;
65
66 static const StyleDesc quote_styles[] = {
67 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
72 };
73
74 typedef struct {
75 PyObject_HEAD
76
77 int doublequote; /* is " represented by ""? */
78 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
81 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
84
85 int strict; /* raise exception on bad CSV */
86 } DialectObj;
87
88 static PyTypeObject Dialect_Type;
89
90 typedef struct {
91 PyObject_HEAD
92
93 PyObject *input_iter; /* iterate over this for input lines */
94
95 DialectObj *dialect; /* parsing dialect */
96
97 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
99 Py_UCS4 *field; /* temporary buffer */
100 Py_ssize_t field_size; /* size of allocated buffer */
101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
104 } ReaderObj;
105
106 static PyTypeObject Reader_Type;
107
108 #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
109
110 typedef struct {
111 PyObject_HEAD
112
113 PyObject *writeline; /* write output lines to this file */
114
115 DialectObj *dialect; /* parsing dialect */
116
117 Py_UCS4 *rec; /* buffer for parser.join */
118 Py_ssize_t rec_size; /* size of allocated record */
119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
121 } WriterObj;
122
123 static PyTypeObject Writer_Type;
124
125 /*
126 * DIALECT class
127 */
128
129 static PyObject *
get_dialect_from_registry(PyObject * name_obj)130 get_dialect_from_registry(PyObject * name_obj)
131 {
132 PyObject *dialect_obj;
133
134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
142 }
143
144 static PyObject *
get_string(PyObject * str)145 get_string(PyObject *str)
146 {
147 Py_XINCREF(str);
148 return str;
149 }
150
151 static PyObject *
get_nullchar_as_None(Py_UCS4 c)152 get_nullchar_as_None(Py_UCS4 c)
153 {
154 if (c == '\0') {
155 Py_RETURN_NONE;
156 }
157 else
158 return PyUnicode_FromOrdinal(c);
159 }
160
161 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))162 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
163 {
164 return get_string(self->lineterminator);
165 }
166
167 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))168 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
169 {
170 return get_nullchar_as_None(self->delimiter);
171 }
172
173 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))174 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
175 {
176 return get_nullchar_as_None(self->escapechar);
177 }
178
179 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))180 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
181 {
182 return get_nullchar_as_None(self->quotechar);
183 }
184
185 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))186 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
187 {
188 return PyLong_FromLong(self->quoting);
189 }
190
191 static int
_set_bool(const char * name,int * target,PyObject * src,int dflt)192 _set_bool(const char *name, int *target, PyObject *src, int dflt)
193 {
194 if (src == NULL)
195 *target = dflt;
196 else {
197 int b = PyObject_IsTrue(src);
198 if (b < 0)
199 return -1;
200 *target = b;
201 }
202 return 0;
203 }
204
205 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)206 _set_int(const char *name, int *target, PyObject *src, int dflt)
207 {
208 if (src == NULL)
209 *target = dflt;
210 else {
211 int value;
212 if (!PyLong_CheckExact(src)) {
213 PyErr_Format(PyExc_TypeError,
214 "\"%s\" must be an integer", name);
215 return -1;
216 }
217 value = _PyLong_AsInt(src);
218 if (value == -1 && PyErr_Occurred()) {
219 return -1;
220 }
221 *target = value;
222 }
223 return 0;
224 }
225
226 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)227 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
228 {
229 if (src == NULL)
230 *target = dflt;
231 else {
232 *target = '\0';
233 if (src != Py_None) {
234 Py_ssize_t len;
235 if (!PyUnicode_Check(src)) {
236 PyErr_Format(PyExc_TypeError,
237 "\"%s\" must be string, not %.200s", name,
238 src->ob_type->tp_name);
239 return -1;
240 }
241 len = PyUnicode_GetLength(src);
242 if (len > 1) {
243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be a 1-character string",
245 name);
246 return -1;
247 }
248 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
249 if (len > 0)
250 *target = PyUnicode_READ_CHAR(src, 0);
251 }
252 }
253 return 0;
254 }
255
256 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)257 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
258 {
259 if (src == NULL)
260 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
261 else {
262 if (src == Py_None)
263 *target = NULL;
264 else if (!PyUnicode_Check(src)) {
265 PyErr_Format(PyExc_TypeError,
266 "\"%s\" must be a string", name);
267 return -1;
268 }
269 else {
270 if (PyUnicode_READY(src) == -1)
271 return -1;
272 Py_INCREF(src);
273 Py_XSETREF(*target, src);
274 }
275 }
276 return 0;
277 }
278
279 static int
dialect_check_quoting(int quoting)280 dialect_check_quoting(int quoting)
281 {
282 const StyleDesc *qs;
283
284 for (qs = quote_styles; qs->name; qs++) {
285 if ((int)qs->style == quoting)
286 return 0;
287 }
288 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
289 return -1;
290 }
291
292 #define D_OFF(x) offsetof(DialectObj, x)
293
294 static struct PyMemberDef Dialect_memberlist[] = {
295 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
296 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
297 { "strict", T_INT, D_OFF(strict), READONLY },
298 { NULL }
299 };
300
301 static PyGetSetDef Dialect_getsetlist[] = {
302 { "delimiter", (getter)Dialect_get_delimiter},
303 { "escapechar", (getter)Dialect_get_escapechar},
304 { "lineterminator", (getter)Dialect_get_lineterminator},
305 { "quotechar", (getter)Dialect_get_quotechar},
306 { "quoting", (getter)Dialect_get_quoting},
307 {NULL},
308 };
309
310 static void
Dialect_dealloc(DialectObj * self)311 Dialect_dealloc(DialectObj *self)
312 {
313 Py_XDECREF(self->lineterminator);
314 Py_TYPE(self)->tp_free((PyObject *)self);
315 }
316
317 static char *dialect_kws[] = {
318 "dialect",
319 "delimiter",
320 "doublequote",
321 "escapechar",
322 "lineterminator",
323 "quotechar",
324 "quoting",
325 "skipinitialspace",
326 "strict",
327 NULL
328 };
329
330 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)331 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
332 {
333 DialectObj *self;
334 PyObject *ret = NULL;
335 PyObject *dialect = NULL;
336 PyObject *delimiter = NULL;
337 PyObject *doublequote = NULL;
338 PyObject *escapechar = NULL;
339 PyObject *lineterminator = NULL;
340 PyObject *quotechar = NULL;
341 PyObject *quoting = NULL;
342 PyObject *skipinitialspace = NULL;
343 PyObject *strict = NULL;
344
345 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
346 "|OOOOOOOOO", dialect_kws,
347 &dialect,
348 &delimiter,
349 &doublequote,
350 &escapechar,
351 &lineterminator,
352 "echar,
353 "ing,
354 &skipinitialspace,
355 &strict))
356 return NULL;
357
358 if (dialect != NULL) {
359 if (PyUnicode_Check(dialect)) {
360 dialect = get_dialect_from_registry(dialect);
361 if (dialect == NULL)
362 return NULL;
363 }
364 else
365 Py_INCREF(dialect);
366 /* Can we reuse this instance? */
367 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
368 delimiter == NULL &&
369 doublequote == NULL &&
370 escapechar == NULL &&
371 lineterminator == NULL &&
372 quotechar == NULL &&
373 quoting == NULL &&
374 skipinitialspace == NULL &&
375 strict == NULL)
376 return dialect;
377 }
378
379 self = (DialectObj *)type->tp_alloc(type, 0);
380 if (self == NULL) {
381 Py_XDECREF(dialect);
382 return NULL;
383 }
384 self->lineterminator = NULL;
385
386 Py_XINCREF(delimiter);
387 Py_XINCREF(doublequote);
388 Py_XINCREF(escapechar);
389 Py_XINCREF(lineterminator);
390 Py_XINCREF(quotechar);
391 Py_XINCREF(quoting);
392 Py_XINCREF(skipinitialspace);
393 Py_XINCREF(strict);
394 if (dialect != NULL) {
395 #define DIALECT_GETATTR(v, n) \
396 if (v == NULL) \
397 v = PyObject_GetAttrString(dialect, n)
398 DIALECT_GETATTR(delimiter, "delimiter");
399 DIALECT_GETATTR(doublequote, "doublequote");
400 DIALECT_GETATTR(escapechar, "escapechar");
401 DIALECT_GETATTR(lineterminator, "lineterminator");
402 DIALECT_GETATTR(quotechar, "quotechar");
403 DIALECT_GETATTR(quoting, "quoting");
404 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
405 DIALECT_GETATTR(strict, "strict");
406 PyErr_Clear();
407 }
408
409 /* check types and convert to C values */
410 #define DIASET(meth, name, target, src, dflt) \
411 if (meth(name, target, src, dflt)) \
412 goto err
413 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
414 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
415 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
416 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
417 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
418 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
419 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
420 DIASET(_set_bool, "strict", &self->strict, strict, 0);
421
422 /* validate options */
423 if (dialect_check_quoting(self->quoting))
424 goto err;
425 if (self->delimiter == 0) {
426 PyErr_SetString(PyExc_TypeError,
427 "\"delimiter\" must be a 1-character string");
428 goto err;
429 }
430 if (quotechar == Py_None && quoting == NULL)
431 self->quoting = QUOTE_NONE;
432 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
433 PyErr_SetString(PyExc_TypeError,
434 "quotechar must be set if quoting enabled");
435 goto err;
436 }
437 if (self->lineterminator == 0) {
438 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
439 goto err;
440 }
441
442 ret = (PyObject *)self;
443 Py_INCREF(self);
444 err:
445 Py_XDECREF(self);
446 Py_XDECREF(dialect);
447 Py_XDECREF(delimiter);
448 Py_XDECREF(doublequote);
449 Py_XDECREF(escapechar);
450 Py_XDECREF(lineterminator);
451 Py_XDECREF(quotechar);
452 Py_XDECREF(quoting);
453 Py_XDECREF(skipinitialspace);
454 Py_XDECREF(strict);
455 return ret;
456 }
457
458
459 PyDoc_STRVAR(Dialect_Type_doc,
460 "CSV dialect\n"
461 "\n"
462 "The Dialect type records CSV parsing and generation options.\n");
463
464 static PyTypeObject Dialect_Type = {
465 PyVarObject_HEAD_INIT(NULL, 0)
466 "_csv.Dialect", /* tp_name */
467 sizeof(DialectObj), /* tp_basicsize */
468 0, /* tp_itemsize */
469 /* methods */
470 (destructor)Dialect_dealloc, /* tp_dealloc */
471 (printfunc)0, /* tp_print */
472 (getattrfunc)0, /* tp_getattr */
473 (setattrfunc)0, /* tp_setattr */
474 0, /* tp_reserved */
475 (reprfunc)0, /* tp_repr */
476 0, /* tp_as_number */
477 0, /* tp_as_sequence */
478 0, /* tp_as_mapping */
479 (hashfunc)0, /* tp_hash */
480 (ternaryfunc)0, /* tp_call */
481 (reprfunc)0, /* tp_str */
482 0, /* tp_getattro */
483 0, /* tp_setattro */
484 0, /* tp_as_buffer */
485 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
486 Dialect_Type_doc, /* tp_doc */
487 0, /* tp_traverse */
488 0, /* tp_clear */
489 0, /* tp_richcompare */
490 0, /* tp_weaklistoffset */
491 0, /* tp_iter */
492 0, /* tp_iternext */
493 0, /* tp_methods */
494 Dialect_memberlist, /* tp_members */
495 Dialect_getsetlist, /* tp_getset */
496 0, /* tp_base */
497 0, /* tp_dict */
498 0, /* tp_descr_get */
499 0, /* tp_descr_set */
500 0, /* tp_dictoffset */
501 0, /* tp_init */
502 0, /* tp_alloc */
503 dialect_new, /* tp_new */
504 0, /* tp_free */
505 };
506
507 /*
508 * Return an instance of the dialect type, given a Python instance or kwarg
509 * description of the dialect
510 */
511 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)512 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
513 {
514 PyObject *type = (PyObject *)&Dialect_Type;
515 if (dialect_inst) {
516 return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
517 }
518 else {
519 return _PyObject_FastCallDict(type, NULL, 0, kwargs);
520 }
521 }
522
523 /*
524 * READER
525 */
526 static int
parse_save_field(ReaderObj * self)527 parse_save_field(ReaderObj *self)
528 {
529 PyObject *field;
530
531 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
532 (void *) self->field, self->field_len);
533 if (field == NULL)
534 return -1;
535 self->field_len = 0;
536 if (self->numeric_field) {
537 PyObject *tmp;
538
539 self->numeric_field = 0;
540 tmp = PyNumber_Float(field);
541 Py_DECREF(field);
542 if (tmp == NULL)
543 return -1;
544 field = tmp;
545 }
546 if (PyList_Append(self->fields, field) < 0) {
547 Py_DECREF(field);
548 return -1;
549 }
550 Py_DECREF(field);
551 return 0;
552 }
553
554 static int
parse_grow_buff(ReaderObj * self)555 parse_grow_buff(ReaderObj *self)
556 {
557 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
558
559 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
560 Py_UCS4 *field_new = self->field;
561 PyMem_Resize(field_new, Py_UCS4, field_size_new);
562 if (field_new == NULL) {
563 PyErr_NoMemory();
564 return 0;
565 }
566 self->field = field_new;
567 self->field_size = field_size_new;
568 return 1;
569 }
570
571 static int
parse_add_char(ReaderObj * self,Py_UCS4 c)572 parse_add_char(ReaderObj *self, Py_UCS4 c)
573 {
574 if (self->field_len >= _csvstate_global->field_limit) {
575 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
576 _csvstate_global->field_limit);
577 return -1;
578 }
579 if (self->field_len == self->field_size && !parse_grow_buff(self))
580 return -1;
581 self->field[self->field_len++] = c;
582 return 0;
583 }
584
585 static int
parse_process_char(ReaderObj * self,Py_UCS4 c)586 parse_process_char(ReaderObj *self, Py_UCS4 c)
587 {
588 DialectObj *dialect = self->dialect;
589
590 switch (self->state) {
591 case START_RECORD:
592 /* start of record */
593 if (c == '\0')
594 /* empty line - return [] */
595 break;
596 else if (c == '\n' || c == '\r') {
597 self->state = EAT_CRNL;
598 break;
599 }
600 /* normal character - handle as START_FIELD */
601 self->state = START_FIELD;
602 /* fallthru */
603 case START_FIELD:
604 /* expecting field */
605 if (c == '\n' || c == '\r' || c == '\0') {
606 /* save empty field - return [fields] */
607 if (parse_save_field(self) < 0)
608 return -1;
609 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
610 }
611 else if (c == dialect->quotechar &&
612 dialect->quoting != QUOTE_NONE) {
613 /* start quoted field */
614 self->state = IN_QUOTED_FIELD;
615 }
616 else if (c == dialect->escapechar) {
617 /* possible escaped character */
618 self->state = ESCAPED_CHAR;
619 }
620 else if (c == ' ' && dialect->skipinitialspace)
621 /* ignore space at start of field */
622 ;
623 else if (c == dialect->delimiter) {
624 /* save empty field */
625 if (parse_save_field(self) < 0)
626 return -1;
627 }
628 else {
629 /* begin new unquoted field */
630 if (dialect->quoting == QUOTE_NONNUMERIC)
631 self->numeric_field = 1;
632 if (parse_add_char(self, c) < 0)
633 return -1;
634 self->state = IN_FIELD;
635 }
636 break;
637
638 case ESCAPED_CHAR:
639 if (c == '\n' || c=='\r') {
640 if (parse_add_char(self, c) < 0)
641 return -1;
642 self->state = AFTER_ESCAPED_CRNL;
643 break;
644 }
645 if (c == '\0')
646 c = '\n';
647 if (parse_add_char(self, c) < 0)
648 return -1;
649 self->state = IN_FIELD;
650 break;
651
652 case AFTER_ESCAPED_CRNL:
653 if (c == '\0')
654 break;
655 /*fallthru*/
656
657 case IN_FIELD:
658 /* in unquoted field */
659 if (c == '\n' || c == '\r' || c == '\0') {
660 /* end of line - return [fields] */
661 if (parse_save_field(self) < 0)
662 return -1;
663 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
664 }
665 else if (c == dialect->escapechar) {
666 /* possible escaped character */
667 self->state = ESCAPED_CHAR;
668 }
669 else if (c == dialect->delimiter) {
670 /* save field - wait for new field */
671 if (parse_save_field(self) < 0)
672 return -1;
673 self->state = START_FIELD;
674 }
675 else {
676 /* normal character - save in field */
677 if (parse_add_char(self, c) < 0)
678 return -1;
679 }
680 break;
681
682 case IN_QUOTED_FIELD:
683 /* in quoted field */
684 if (c == '\0')
685 ;
686 else if (c == dialect->escapechar) {
687 /* Possible escape character */
688 self->state = ESCAPE_IN_QUOTED_FIELD;
689 }
690 else if (c == dialect->quotechar &&
691 dialect->quoting != QUOTE_NONE) {
692 if (dialect->doublequote) {
693 /* doublequote; " represented by "" */
694 self->state = QUOTE_IN_QUOTED_FIELD;
695 }
696 else {
697 /* end of quote part of field */
698 self->state = IN_FIELD;
699 }
700 }
701 else {
702 /* normal character - save in field */
703 if (parse_add_char(self, c) < 0)
704 return -1;
705 }
706 break;
707
708 case ESCAPE_IN_QUOTED_FIELD:
709 if (c == '\0')
710 c = '\n';
711 if (parse_add_char(self, c) < 0)
712 return -1;
713 self->state = IN_QUOTED_FIELD;
714 break;
715
716 case QUOTE_IN_QUOTED_FIELD:
717 /* doublequote - seen a quote in a quoted field */
718 if (dialect->quoting != QUOTE_NONE &&
719 c == dialect->quotechar) {
720 /* save "" as " */
721 if (parse_add_char(self, c) < 0)
722 return -1;
723 self->state = IN_QUOTED_FIELD;
724 }
725 else if (c == dialect->delimiter) {
726 /* save field - wait for new field */
727 if (parse_save_field(self) < 0)
728 return -1;
729 self->state = START_FIELD;
730 }
731 else if (c == '\n' || c == '\r' || c == '\0') {
732 /* end of line - return [fields] */
733 if (parse_save_field(self) < 0)
734 return -1;
735 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
736 }
737 else if (!dialect->strict) {
738 if (parse_add_char(self, c) < 0)
739 return -1;
740 self->state = IN_FIELD;
741 }
742 else {
743 /* illegal */
744 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
745 dialect->delimiter,
746 dialect->quotechar);
747 return -1;
748 }
749 break;
750
751 case EAT_CRNL:
752 if (c == '\n' || c == '\r')
753 ;
754 else if (c == '\0')
755 self->state = START_RECORD;
756 else {
757 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
758 return -1;
759 }
760 break;
761
762 }
763 return 0;
764 }
765
766 static int
parse_reset(ReaderObj * self)767 parse_reset(ReaderObj *self)
768 {
769 Py_XSETREF(self->fields, PyList_New(0));
770 if (self->fields == NULL)
771 return -1;
772 self->field_len = 0;
773 self->state = START_RECORD;
774 self->numeric_field = 0;
775 return 0;
776 }
777
778 static PyObject *
Reader_iternext(ReaderObj * self)779 Reader_iternext(ReaderObj *self)
780 {
781 PyObject *fields = NULL;
782 Py_UCS4 c;
783 Py_ssize_t pos, linelen;
784 unsigned int kind;
785 void *data;
786 PyObject *lineobj;
787
788 if (parse_reset(self) < 0)
789 return NULL;
790 do {
791 lineobj = PyIter_Next(self->input_iter);
792 if (lineobj == NULL) {
793 /* End of input OR exception */
794 if (!PyErr_Occurred() && (self->field_len != 0 ||
795 self->state == IN_QUOTED_FIELD)) {
796 if (self->dialect->strict)
797 PyErr_SetString(_csvstate_global->error_obj,
798 "unexpected end of data");
799 else if (parse_save_field(self) >= 0)
800 break;
801 }
802 return NULL;
803 }
804 if (!PyUnicode_Check(lineobj)) {
805 PyErr_Format(_csvstate_global->error_obj,
806 "iterator should return strings, "
807 "not %.200s "
808 "(did you open the file in text mode?)",
809 lineobj->ob_type->tp_name
810 );
811 Py_DECREF(lineobj);
812 return NULL;
813 }
814 if (PyUnicode_READY(lineobj) == -1) {
815 Py_DECREF(lineobj);
816 return NULL;
817 }
818 ++self->line_num;
819 kind = PyUnicode_KIND(lineobj);
820 data = PyUnicode_DATA(lineobj);
821 pos = 0;
822 linelen = PyUnicode_GET_LENGTH(lineobj);
823 while (linelen--) {
824 c = PyUnicode_READ(kind, data, pos);
825 if (c == '\0') {
826 Py_DECREF(lineobj);
827 PyErr_Format(_csvstate_global->error_obj,
828 "line contains NUL");
829 goto err;
830 }
831 if (parse_process_char(self, c) < 0) {
832 Py_DECREF(lineobj);
833 goto err;
834 }
835 pos++;
836 }
837 Py_DECREF(lineobj);
838 if (parse_process_char(self, 0) < 0)
839 goto err;
840 } while (self->state != START_RECORD);
841
842 fields = self->fields;
843 self->fields = NULL;
844 err:
845 return fields;
846 }
847
848 static void
Reader_dealloc(ReaderObj * self)849 Reader_dealloc(ReaderObj *self)
850 {
851 PyObject_GC_UnTrack(self);
852 Py_XDECREF(self->dialect);
853 Py_XDECREF(self->input_iter);
854 Py_XDECREF(self->fields);
855 if (self->field != NULL)
856 PyMem_Free(self->field);
857 PyObject_GC_Del(self);
858 }
859
860 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)861 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
862 {
863 Py_VISIT(self->dialect);
864 Py_VISIT(self->input_iter);
865 Py_VISIT(self->fields);
866 return 0;
867 }
868
869 static int
Reader_clear(ReaderObj * self)870 Reader_clear(ReaderObj *self)
871 {
872 Py_CLEAR(self->dialect);
873 Py_CLEAR(self->input_iter);
874 Py_CLEAR(self->fields);
875 return 0;
876 }
877
878 PyDoc_STRVAR(Reader_Type_doc,
879 "CSV reader\n"
880 "\n"
881 "Reader objects are responsible for reading and parsing tabular data\n"
882 "in CSV format.\n"
883 );
884
885 static struct PyMethodDef Reader_methods[] = {
886 { NULL, NULL }
887 };
888 #define R_OFF(x) offsetof(ReaderObj, x)
889
890 static struct PyMemberDef Reader_memberlist[] = {
891 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
892 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
893 { NULL }
894 };
895
896
897 static PyTypeObject Reader_Type = {
898 PyVarObject_HEAD_INIT(NULL, 0)
899 "_csv.reader", /*tp_name*/
900 sizeof(ReaderObj), /*tp_basicsize*/
901 0, /*tp_itemsize*/
902 /* methods */
903 (destructor)Reader_dealloc, /*tp_dealloc*/
904 (printfunc)0, /*tp_print*/
905 (getattrfunc)0, /*tp_getattr*/
906 (setattrfunc)0, /*tp_setattr*/
907 0, /*tp_reserved*/
908 (reprfunc)0, /*tp_repr*/
909 0, /*tp_as_number*/
910 0, /*tp_as_sequence*/
911 0, /*tp_as_mapping*/
912 (hashfunc)0, /*tp_hash*/
913 (ternaryfunc)0, /*tp_call*/
914 (reprfunc)0, /*tp_str*/
915 0, /*tp_getattro*/
916 0, /*tp_setattro*/
917 0, /*tp_as_buffer*/
918 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
919 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
920 Reader_Type_doc, /*tp_doc*/
921 (traverseproc)Reader_traverse, /*tp_traverse*/
922 (inquiry)Reader_clear, /*tp_clear*/
923 0, /*tp_richcompare*/
924 0, /*tp_weaklistoffset*/
925 PyObject_SelfIter, /*tp_iter*/
926 (getiterfunc)Reader_iternext, /*tp_iternext*/
927 Reader_methods, /*tp_methods*/
928 Reader_memberlist, /*tp_members*/
929 0, /*tp_getset*/
930
931 };
932
933 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)934 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
935 {
936 PyObject * iterator, * dialect = NULL;
937 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
938
939 if (!self)
940 return NULL;
941
942 self->dialect = NULL;
943 self->fields = NULL;
944 self->input_iter = NULL;
945 self->field = NULL;
946 self->field_size = 0;
947 self->line_num = 0;
948
949 if (parse_reset(self) < 0) {
950 Py_DECREF(self);
951 return NULL;
952 }
953
954 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
955 Py_DECREF(self);
956 return NULL;
957 }
958 self->input_iter = PyObject_GetIter(iterator);
959 if (self->input_iter == NULL) {
960 PyErr_SetString(PyExc_TypeError,
961 "argument 1 must be an iterator");
962 Py_DECREF(self);
963 return NULL;
964 }
965 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
966 if (self->dialect == NULL) {
967 Py_DECREF(self);
968 return NULL;
969 }
970
971 PyObject_GC_Track(self);
972 return (PyObject *)self;
973 }
974
975 /*
976 * WRITER
977 */
978 /* ---------------------------------------------------------------- */
979 static void
join_reset(WriterObj * self)980 join_reset(WriterObj *self)
981 {
982 self->rec_len = 0;
983 self->num_fields = 0;
984 }
985
986 #define MEM_INCR 32768
987
988 /* Calculate new record length or append field to record. Return new
989 * record length.
990 */
991 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)992 join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
993 Py_ssize_t field_len, int *quoted,
994 int copy_phase)
995 {
996 DialectObj *dialect = self->dialect;
997 int i;
998 Py_ssize_t rec_len;
999
1000 #define INCLEN \
1001 do {\
1002 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1003 goto overflow; \
1004 } \
1005 rec_len++; \
1006 } while(0)
1007
1008 #define ADDCH(c) \
1009 do {\
1010 if (copy_phase) \
1011 self->rec[rec_len] = c;\
1012 INCLEN;\
1013 } while(0)
1014
1015 rec_len = self->rec_len;
1016
1017 /* If this is not the first field we need a field separator */
1018 if (self->num_fields > 0)
1019 ADDCH(dialect->delimiter);
1020
1021 /* Handle preceding quote */
1022 if (copy_phase && *quoted)
1023 ADDCH(dialect->quotechar);
1024
1025 /* Copy/count field data */
1026 /* If field is null just pass over */
1027 for (i = 0; field_data && (i < field_len); i++) {
1028 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1029 int want_escape = 0;
1030
1031 if (c == dialect->delimiter ||
1032 c == dialect->escapechar ||
1033 c == dialect->quotechar ||
1034 PyUnicode_FindChar(
1035 dialect->lineterminator, c, 0,
1036 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1037 if (dialect->quoting == QUOTE_NONE)
1038 want_escape = 1;
1039 else {
1040 if (c == dialect->quotechar) {
1041 if (dialect->doublequote)
1042 ADDCH(dialect->quotechar);
1043 else
1044 want_escape = 1;
1045 }
1046 if (!want_escape)
1047 *quoted = 1;
1048 }
1049 if (want_escape) {
1050 if (!dialect->escapechar) {
1051 PyErr_Format(_csvstate_global->error_obj,
1052 "need to escape, but no escapechar set");
1053 return -1;
1054 }
1055 ADDCH(dialect->escapechar);
1056 }
1057 }
1058 /* Copy field character into record buffer.
1059 */
1060 ADDCH(c);
1061 }
1062
1063 if (*quoted) {
1064 if (copy_phase)
1065 ADDCH(dialect->quotechar);
1066 else {
1067 INCLEN; /* starting quote */
1068 INCLEN; /* ending quote */
1069 }
1070 }
1071 return rec_len;
1072
1073 overflow:
1074 PyErr_NoMemory();
1075 return -1;
1076 #undef ADDCH
1077 #undef INCLEN
1078 }
1079
1080 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1081 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1082 {
1083 assert(rec_len >= 0);
1084
1085 if (rec_len > self->rec_size) {
1086 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1087 Py_UCS4 *rec_new = self->rec;
1088 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1089 if (rec_new == NULL) {
1090 PyErr_NoMemory();
1091 return 0;
1092 }
1093 self->rec = rec_new;
1094 self->rec_size = (Py_ssize_t)rec_size_new;
1095 }
1096 return 1;
1097 }
1098
1099 static int
join_append(WriterObj * self,PyObject * field,int quoted)1100 join_append(WriterObj *self, PyObject *field, int quoted)
1101 {
1102 unsigned int field_kind = -1;
1103 void *field_data = NULL;
1104 Py_ssize_t field_len = 0;
1105 Py_ssize_t rec_len;
1106
1107 if (field != NULL) {
1108 if (PyUnicode_READY(field) == -1)
1109 return 0;
1110 field_kind = PyUnicode_KIND(field);
1111 field_data = PyUnicode_DATA(field);
1112 field_len = PyUnicode_GET_LENGTH(field);
1113 }
1114 rec_len = join_append_data(self, field_kind, field_data, field_len,
1115 "ed, 0);
1116 if (rec_len < 0)
1117 return 0;
1118
1119 /* grow record buffer if necessary */
1120 if (!join_check_rec_size(self, rec_len))
1121 return 0;
1122
1123 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1124 "ed, 1);
1125 self->num_fields++;
1126
1127 return 1;
1128 }
1129
1130 static int
join_append_lineterminator(WriterObj * self)1131 join_append_lineterminator(WriterObj *self)
1132 {
1133 Py_ssize_t terminator_len, i;
1134 unsigned int term_kind;
1135 void *term_data;
1136
1137 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1138 if (terminator_len == -1)
1139 return 0;
1140
1141 /* grow record buffer if necessary */
1142 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1143 return 0;
1144
1145 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1146 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1147 for (i = 0; i < terminator_len; i++)
1148 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1149 self->rec_len += terminator_len;
1150
1151 return 1;
1152 }
1153
1154 PyDoc_STRVAR(csv_writerow_doc,
1155 "writerow(iterable)\n"
1156 "\n"
1157 "Construct and write a CSV record from an iterable of fields. Non-string\n"
1158 "elements will be converted to string.");
1159
1160 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1161 csv_writerow(WriterObj *self, PyObject *seq)
1162 {
1163 DialectObj *dialect = self->dialect;
1164 PyObject *iter, *field, *line, *result;
1165
1166 iter = PyObject_GetIter(seq);
1167 if (iter == NULL)
1168 return PyErr_Format(_csvstate_global->error_obj,
1169 "iterable expected, not %.200s",
1170 seq->ob_type->tp_name);
1171
1172 /* Join all fields in internal buffer.
1173 */
1174 join_reset(self);
1175 while ((field = PyIter_Next(iter))) {
1176 int append_ok;
1177 int quoted;
1178
1179 switch (dialect->quoting) {
1180 case QUOTE_NONNUMERIC:
1181 quoted = !PyNumber_Check(field);
1182 break;
1183 case QUOTE_ALL:
1184 quoted = 1;
1185 break;
1186 default:
1187 quoted = 0;
1188 break;
1189 }
1190
1191 if (PyUnicode_Check(field)) {
1192 append_ok = join_append(self, field, quoted);
1193 Py_DECREF(field);
1194 }
1195 else if (field == Py_None) {
1196 append_ok = join_append(self, NULL, quoted);
1197 Py_DECREF(field);
1198 }
1199 else {
1200 PyObject *str;
1201
1202 str = PyObject_Str(field);
1203 Py_DECREF(field);
1204 if (str == NULL) {
1205 Py_DECREF(iter);
1206 return NULL;
1207 }
1208 append_ok = join_append(self, str, quoted);
1209 Py_DECREF(str);
1210 }
1211 if (!append_ok) {
1212 Py_DECREF(iter);
1213 return NULL;
1214 }
1215 }
1216 Py_DECREF(iter);
1217 if (PyErr_Occurred())
1218 return NULL;
1219
1220 if (self->num_fields > 0 && self->rec_len == 0) {
1221 if (dialect->quoting == QUOTE_NONE) {
1222 PyErr_Format(_csvstate_global->error_obj,
1223 "single empty field record must be quoted");
1224 return NULL;
1225 }
1226 self->num_fields--;
1227 if (!join_append(self, NULL, 1))
1228 return NULL;
1229 }
1230
1231 /* Add line terminator.
1232 */
1233 if (!join_append_lineterminator(self))
1234 return NULL;
1235
1236 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1237 (void *) self->rec, self->rec_len);
1238 if (line == NULL)
1239 return NULL;
1240 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1241 Py_DECREF(line);
1242 return result;
1243 }
1244
1245 PyDoc_STRVAR(csv_writerows_doc,
1246 "writerows(iterable of iterables)\n"
1247 "\n"
1248 "Construct and write a series of iterables to a csv file. Non-string\n"
1249 "elements will be converted to string.");
1250
1251 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1252 csv_writerows(WriterObj *self, PyObject *seqseq)
1253 {
1254 PyObject *row_iter, *row_obj, *result;
1255
1256 row_iter = PyObject_GetIter(seqseq);
1257 if (row_iter == NULL) {
1258 PyErr_SetString(PyExc_TypeError,
1259 "writerows() argument must be iterable");
1260 return NULL;
1261 }
1262 while ((row_obj = PyIter_Next(row_iter))) {
1263 result = csv_writerow(self, row_obj);
1264 Py_DECREF(row_obj);
1265 if (!result) {
1266 Py_DECREF(row_iter);
1267 return NULL;
1268 }
1269 else
1270 Py_DECREF(result);
1271 }
1272 Py_DECREF(row_iter);
1273 if (PyErr_Occurred())
1274 return NULL;
1275 Py_RETURN_NONE;
1276 }
1277
1278 static struct PyMethodDef Writer_methods[] = {
1279 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1280 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1281 { NULL, NULL }
1282 };
1283
1284 #define W_OFF(x) offsetof(WriterObj, x)
1285
1286 static struct PyMemberDef Writer_memberlist[] = {
1287 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1288 { NULL }
1289 };
1290
1291 static void
Writer_dealloc(WriterObj * self)1292 Writer_dealloc(WriterObj *self)
1293 {
1294 PyObject_GC_UnTrack(self);
1295 Py_XDECREF(self->dialect);
1296 Py_XDECREF(self->writeline);
1297 if (self->rec != NULL)
1298 PyMem_Free(self->rec);
1299 PyObject_GC_Del(self);
1300 }
1301
1302 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1303 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1304 {
1305 Py_VISIT(self->dialect);
1306 Py_VISIT(self->writeline);
1307 return 0;
1308 }
1309
1310 static int
Writer_clear(WriterObj * self)1311 Writer_clear(WriterObj *self)
1312 {
1313 Py_CLEAR(self->dialect);
1314 Py_CLEAR(self->writeline);
1315 return 0;
1316 }
1317
1318 PyDoc_STRVAR(Writer_Type_doc,
1319 "CSV writer\n"
1320 "\n"
1321 "Writer objects are responsible for generating tabular data\n"
1322 "in CSV format from sequence input.\n"
1323 );
1324
1325 static PyTypeObject Writer_Type = {
1326 PyVarObject_HEAD_INIT(NULL, 0)
1327 "_csv.writer", /*tp_name*/
1328 sizeof(WriterObj), /*tp_basicsize*/
1329 0, /*tp_itemsize*/
1330 /* methods */
1331 (destructor)Writer_dealloc, /*tp_dealloc*/
1332 (printfunc)0, /*tp_print*/
1333 (getattrfunc)0, /*tp_getattr*/
1334 (setattrfunc)0, /*tp_setattr*/
1335 0, /*tp_reserved*/
1336 (reprfunc)0, /*tp_repr*/
1337 0, /*tp_as_number*/
1338 0, /*tp_as_sequence*/
1339 0, /*tp_as_mapping*/
1340 (hashfunc)0, /*tp_hash*/
1341 (ternaryfunc)0, /*tp_call*/
1342 (reprfunc)0, /*tp_str*/
1343 0, /*tp_getattro*/
1344 0, /*tp_setattro*/
1345 0, /*tp_as_buffer*/
1346 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1347 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1348 Writer_Type_doc,
1349 (traverseproc)Writer_traverse, /*tp_traverse*/
1350 (inquiry)Writer_clear, /*tp_clear*/
1351 0, /*tp_richcompare*/
1352 0, /*tp_weaklistoffset*/
1353 (getiterfunc)0, /*tp_iter*/
1354 (getiterfunc)0, /*tp_iternext*/
1355 Writer_methods, /*tp_methods*/
1356 Writer_memberlist, /*tp_members*/
1357 0, /*tp_getset*/
1358 };
1359
1360 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1361 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1362 {
1363 PyObject * output_file, * dialect = NULL;
1364 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1365 _Py_IDENTIFIER(write);
1366
1367 if (!self)
1368 return NULL;
1369
1370 self->dialect = NULL;
1371 self->writeline = NULL;
1372
1373 self->rec = NULL;
1374 self->rec_size = 0;
1375 self->rec_len = 0;
1376 self->num_fields = 0;
1377
1378 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1379 Py_DECREF(self);
1380 return NULL;
1381 }
1382 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
1383 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1384 PyErr_SetString(PyExc_TypeError,
1385 "argument 1 must have a \"write\" method");
1386 Py_DECREF(self);
1387 return NULL;
1388 }
1389 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1390 if (self->dialect == NULL) {
1391 Py_DECREF(self);
1392 return NULL;
1393 }
1394 PyObject_GC_Track(self);
1395 return (PyObject *)self;
1396 }
1397
1398 /*
1399 * DIALECT REGISTRY
1400 */
1401 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1402 csv_list_dialects(PyObject *module, PyObject *args)
1403 {
1404 return PyDict_Keys(_csvstate_global->dialects);
1405 }
1406
1407 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1408 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1409 {
1410 PyObject *name_obj, *dialect_obj = NULL;
1411 PyObject *dialect;
1412
1413 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1414 return NULL;
1415 if (!PyUnicode_Check(name_obj)) {
1416 PyErr_SetString(PyExc_TypeError,
1417 "dialect name must be a string");
1418 return NULL;
1419 }
1420 if (PyUnicode_READY(name_obj) == -1)
1421 return NULL;
1422 dialect = _call_dialect(dialect_obj, kwargs);
1423 if (dialect == NULL)
1424 return NULL;
1425 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
1426 Py_DECREF(dialect);
1427 return NULL;
1428 }
1429 Py_DECREF(dialect);
1430 Py_RETURN_NONE;
1431 }
1432
1433 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1434 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1435 {
1436 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1437 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1438 Py_RETURN_NONE;
1439 }
1440
1441 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1442 csv_get_dialect(PyObject *module, PyObject *name_obj)
1443 {
1444 return get_dialect_from_registry(name_obj);
1445 }
1446
1447 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1448 csv_field_size_limit(PyObject *module, PyObject *args)
1449 {
1450 PyObject *new_limit = NULL;
1451 long old_limit = _csvstate_global->field_limit;
1452
1453 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1454 return NULL;
1455 if (new_limit != NULL) {
1456 if (!PyLong_CheckExact(new_limit)) {
1457 PyErr_Format(PyExc_TypeError,
1458 "limit must be an integer");
1459 return NULL;
1460 }
1461 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1462 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1463 _csvstate_global->field_limit = old_limit;
1464 return NULL;
1465 }
1466 }
1467 return PyLong_FromLong(old_limit);
1468 }
1469
1470 /*
1471 * MODULE
1472 */
1473
1474 PyDoc_STRVAR(csv_module_doc,
1475 "CSV parsing and writing.\n"
1476 "\n"
1477 "This module provides classes that assist in the reading and writing\n"
1478 "of Comma Separated Value (CSV) files, and implements the interface\n"
1479 "described by PEP 305. Although many CSV files are simple to parse,\n"
1480 "the format is not formally defined by a stable specification and\n"
1481 "is subtle enough that parsing lines of a CSV file with something\n"
1482 "like line.split(\",\") is bound to fail. The module supports three\n"
1483 "basic APIs: reading, writing, and registration of dialects.\n"
1484 "\n"
1485 "\n"
1486 "DIALECT REGISTRATION:\n"
1487 "\n"
1488 "Readers and writers support a dialect argument, which is a convenient\n"
1489 "handle on a group of settings. When the dialect argument is a string,\n"
1490 "it identifies one of the dialects previously registered with the module.\n"
1491 "If it is a class or instance, the attributes of the argument are used as\n"
1492 "the settings for the reader or writer:\n"
1493 "\n"
1494 " class excel:\n"
1495 " delimiter = ','\n"
1496 " quotechar = '\"'\n"
1497 " escapechar = None\n"
1498 " doublequote = True\n"
1499 " skipinitialspace = False\n"
1500 " lineterminator = '\\r\\n'\n"
1501 " quoting = QUOTE_MINIMAL\n"
1502 "\n"
1503 "SETTINGS:\n"
1504 "\n"
1505 " * quotechar - specifies a one-character string to use as the \n"
1506 " quoting character. It defaults to '\"'.\n"
1507 " * delimiter - specifies a one-character string to use as the \n"
1508 " field separator. It defaults to ','.\n"
1509 " * skipinitialspace - specifies how to interpret whitespace which\n"
1510 " immediately follows a delimiter. It defaults to False, which\n"
1511 " means that whitespace immediately following a delimiter is part\n"
1512 " of the following field.\n"
1513 " * lineterminator - specifies the character sequence which should \n"
1514 " terminate rows.\n"
1515 " * quoting - controls when quotes should be generated by the writer.\n"
1516 " It can take on any of the following module constants:\n"
1517 "\n"
1518 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1519 " field contains either the quotechar or the delimiter\n"
1520 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1521 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1522 " fields which do not parse as integers or floating point\n"
1523 " numbers.\n"
1524 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1525 " * escapechar - specifies a one-character string used to escape \n"
1526 " the delimiter when quoting is set to QUOTE_NONE.\n"
1527 " * doublequote - controls the handling of quotes inside fields. When\n"
1528 " True, two consecutive quotes are interpreted as one during read,\n"
1529 " and when writing, each quote character embedded in the data is\n"
1530 " written as two quotes\n");
1531
1532 PyDoc_STRVAR(csv_reader_doc,
1533 " csv_reader = reader(iterable [, dialect='excel']\n"
1534 " [optional keyword args])\n"
1535 " for row in csv_reader:\n"
1536 " process(row)\n"
1537 "\n"
1538 "The \"iterable\" argument can be any object that returns a line\n"
1539 "of input for each iteration, such as a file object or a list. The\n"
1540 "optional \"dialect\" parameter is discussed below. The function\n"
1541 "also accepts optional keyword arguments which override settings\n"
1542 "provided by the dialect.\n"
1543 "\n"
1544 "The returned object is an iterator. Each iteration returns a row\n"
1545 "of the CSV file (which can span multiple input lines).\n");
1546
1547 PyDoc_STRVAR(csv_writer_doc,
1548 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1549 " [optional keyword args])\n"
1550 " for row in sequence:\n"
1551 " csv_writer.writerow(row)\n"
1552 "\n"
1553 " [or]\n"
1554 "\n"
1555 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1556 " [optional keyword args])\n"
1557 " csv_writer.writerows(rows)\n"
1558 "\n"
1559 "The \"fileobj\" argument can be any object that supports the file API.\n");
1560
1561 PyDoc_STRVAR(csv_list_dialects_doc,
1562 "Return a list of all know dialect names.\n"
1563 " names = csv.list_dialects()");
1564
1565 PyDoc_STRVAR(csv_get_dialect_doc,
1566 "Return the dialect instance associated with name.\n"
1567 " dialect = csv.get_dialect(name)");
1568
1569 PyDoc_STRVAR(csv_register_dialect_doc,
1570 "Create a mapping from a string name to a dialect class.\n"
1571 " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1572
1573 PyDoc_STRVAR(csv_unregister_dialect_doc,
1574 "Delete the name/dialect mapping associated with a string name.\n"
1575 " csv.unregister_dialect(name)");
1576
1577 PyDoc_STRVAR(csv_field_size_limit_doc,
1578 "Sets an upper limit on parsed fields.\n"
1579 " csv.field_size_limit([limit])\n"
1580 "\n"
1581 "Returns old limit. If limit is not given, no new limit is set and\n"
1582 "the old limit is returned");
1583
1584 static struct PyMethodDef csv_methods[] = {
1585 { "reader", (PyCFunction)csv_reader,
1586 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1587 { "writer", (PyCFunction)csv_writer,
1588 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1589 { "list_dialects", (PyCFunction)csv_list_dialects,
1590 METH_NOARGS, csv_list_dialects_doc},
1591 { "register_dialect", (PyCFunction)csv_register_dialect,
1592 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1593 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1594 METH_O, csv_unregister_dialect_doc},
1595 { "get_dialect", (PyCFunction)csv_get_dialect,
1596 METH_O, csv_get_dialect_doc},
1597 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1598 METH_VARARGS, csv_field_size_limit_doc},
1599 { NULL, NULL }
1600 };
1601
1602 static struct PyModuleDef _csvmodule = {
1603 PyModuleDef_HEAD_INIT,
1604 "_csv",
1605 csv_module_doc,
1606 sizeof(_csvstate),
1607 csv_methods,
1608 NULL,
1609 _csv_traverse,
1610 _csv_clear,
1611 _csv_free
1612 };
1613
1614 PyMODINIT_FUNC
PyInit__csv(void)1615 PyInit__csv(void)
1616 {
1617 PyObject *module;
1618 const StyleDesc *style;
1619
1620 if (PyType_Ready(&Dialect_Type) < 0)
1621 return NULL;
1622
1623 if (PyType_Ready(&Reader_Type) < 0)
1624 return NULL;
1625
1626 if (PyType_Ready(&Writer_Type) < 0)
1627 return NULL;
1628
1629 /* Create the module and add the functions */
1630 module = PyModule_Create(&_csvmodule);
1631 if (module == NULL)
1632 return NULL;
1633
1634 /* Add version to the module. */
1635 if (PyModule_AddStringConstant(module, "__version__",
1636 MODULE_VERSION) == -1)
1637 return NULL;
1638
1639 /* Set the field limit */
1640 _csvstate(module)->field_limit = 128 * 1024;
1641 /* Do I still need to add this var to the Module Dict? */
1642
1643 /* Add _dialects dictionary */
1644 _csvstate(module)->dialects = PyDict_New();
1645 if (_csvstate(module)->dialects == NULL)
1646 return NULL;
1647 Py_INCREF(_csvstate(module)->dialects);
1648 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
1649 return NULL;
1650
1651 /* Add quote styles into dictionary */
1652 for (style = quote_styles; style->name; style++) {
1653 if (PyModule_AddIntConstant(module, style->name,
1654 style->style) == -1)
1655 return NULL;
1656 }
1657
1658 /* Add the Dialect type */
1659 Py_INCREF(&Dialect_Type);
1660 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1661 return NULL;
1662
1663 /* Add the CSV exception object to the module. */
1664 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1665 if (_csvstate(module)->error_obj == NULL)
1666 return NULL;
1667 Py_INCREF(_csvstate(module)->error_obj);
1668 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
1669 return module;
1670 }
1671