12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844 |
- /* csv module */
- /*
- This module provides the low-level underpinnings of a CSV reading/writing
- module. Users should not use this module directly, but import the csv.py
- module instead.
- */
- #define MODULE_VERSION "1.0"
- #include "Python.h"
- #include "structmember.h" // PyMemberDef
- #include <stdbool.h>
- /*[clinic input]
- module _csv
- [clinic start generated code]*/
- /*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/
- #include "clinic/_csv.c.h"
- #define NOT_SET ((Py_UCS4)-1)
- #define EOL ((Py_UCS4)-2)
- typedef struct {
- PyObject *error_obj; /* CSV exception */
- PyObject *dialects; /* Dialect registry */
- PyTypeObject *dialect_type;
- PyTypeObject *reader_type;
- PyTypeObject *writer_type;
- long field_limit; /* max parsed field size */
- PyObject *str_write;
- } _csvstate;
- static struct PyModuleDef _csvmodule;
- static inline _csvstate*
- get_csv_state(PyObject *module)
- {
- void *state = PyModule_GetState(module);
- assert(state != NULL);
- return (_csvstate *)state;
- }
- static int
- _csv_clear(PyObject *module)
- {
- _csvstate *module_state = PyModule_GetState(module);
- Py_CLEAR(module_state->error_obj);
- Py_CLEAR(module_state->dialects);
- Py_CLEAR(module_state->dialect_type);
- Py_CLEAR(module_state->reader_type);
- Py_CLEAR(module_state->writer_type);
- Py_CLEAR(module_state->str_write);
- return 0;
- }
- static int
- _csv_traverse(PyObject *module, visitproc visit, void *arg)
- {
- _csvstate *module_state = PyModule_GetState(module);
- Py_VISIT(module_state->error_obj);
- Py_VISIT(module_state->dialects);
- Py_VISIT(module_state->dialect_type);
- Py_VISIT(module_state->reader_type);
- Py_VISIT(module_state->writer_type);
- return 0;
- }
- static void
- _csv_free(void *module)
- {
- _csv_clear((PyObject *)module);
- }
- typedef enum {
- START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
- IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
- EAT_CRNL,AFTER_ESCAPED_CRNL
- } ParserState;
- typedef enum {
- QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE,
- QUOTE_STRINGS, QUOTE_NOTNULL
- } QuoteStyle;
- typedef struct {
- QuoteStyle style;
- const char *name;
- } StyleDesc;
- static const StyleDesc quote_styles[] = {
- { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
- { QUOTE_ALL, "QUOTE_ALL" },
- { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
- { QUOTE_NONE, "QUOTE_NONE" },
- { QUOTE_STRINGS, "QUOTE_STRINGS" },
- { QUOTE_NOTNULL, "QUOTE_NOTNULL" },
- { 0 }
- };
- typedef struct {
- PyObject_HEAD
- char doublequote; /* is " represented by ""? */
- char skipinitialspace; /* ignore spaces following delimiter? */
- char strict; /* raise exception on bad CSV */
- int quoting; /* style of quoting to write */
- Py_UCS4 delimiter; /* field separator */
- Py_UCS4 quotechar; /* quote character */
- Py_UCS4 escapechar; /* escape character */
- PyObject *lineterminator; /* string to write between records */
- } DialectObj;
- typedef struct {
- PyObject_HEAD
- PyObject *input_iter; /* iterate over this for input lines */
- DialectObj *dialect; /* parsing dialect */
- PyObject *fields; /* field list for current record */
- ParserState state; /* current CSV parse state */
- Py_UCS4 *field; /* temporary buffer */
- Py_ssize_t field_size; /* size of allocated buffer */
- Py_ssize_t field_len; /* length of current field */
- int numeric_field; /* treat field as numeric */
- unsigned long line_num; /* Source-file line number */
- } ReaderObj;
- typedef struct {
- PyObject_HEAD
- PyObject *write; /* write output lines to this file */
- DialectObj *dialect; /* parsing dialect */
- Py_UCS4 *rec; /* buffer for parser.join */
- Py_ssize_t rec_size; /* size of allocated record */
- Py_ssize_t rec_len; /* length of record */
- int num_fields; /* number of fields in record */
- PyObject *error_obj; /* cached error object */
- } WriterObj;
- /*
- * DIALECT class
- */
- static PyObject *
- get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
- {
- PyObject *dialect_obj;
- dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
- if (dialect_obj == NULL) {
- if (!PyErr_Occurred())
- PyErr_Format(module_state->error_obj, "unknown dialect");
- }
- else
- Py_INCREF(dialect_obj);
- return dialect_obj;
- }
- static PyObject *
- get_char_or_None(Py_UCS4 c)
- {
- if (c == NOT_SET) {
- Py_RETURN_NONE;
- }
- else
- return PyUnicode_FromOrdinal(c);
- }
- static PyObject *
- Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
- {
- return Py_XNewRef(self->lineterminator);
- }
- static PyObject *
- Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
- {
- return get_char_or_None(self->delimiter);
- }
- static PyObject *
- Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
- {
- return get_char_or_None(self->escapechar);
- }
- static PyObject *
- Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
- {
- return get_char_or_None(self->quotechar);
- }
- static PyObject *
- Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
- {
- return PyLong_FromLong(self->quoting);
- }
- static int
- _set_bool(const char *name, char *target, PyObject *src, bool dflt)
- {
- if (src == NULL)
- *target = dflt;
- else {
- int b = PyObject_IsTrue(src);
- if (b < 0)
- return -1;
- *target = (char)b;
- }
- return 0;
- }
- static int
- _set_int(const char *name, int *target, PyObject *src, int dflt)
- {
- if (src == NULL)
- *target = dflt;
- else {
- int value;
- if (!PyLong_CheckExact(src)) {
- PyErr_Format(PyExc_TypeError,
- "\"%s\" must be an integer", name);
- return -1;
- }
- value = _PyLong_AsInt(src);
- if (value == -1 && PyErr_Occurred()) {
- return -1;
- }
- *target = value;
- }
- return 0;
- }
- static int
- _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
- {
- if (src == NULL) {
- *target = dflt;
- }
- else {
- *target = NOT_SET;
- if (src != Py_None) {
- if (!PyUnicode_Check(src)) {
- PyErr_Format(PyExc_TypeError,
- "\"%s\" must be string or None, not %.200s", name,
- Py_TYPE(src)->tp_name);
- return -1;
- }
- Py_ssize_t len = PyUnicode_GetLength(src);
- if (len < 0) {
- return -1;
- }
- if (len != 1) {
- PyErr_Format(PyExc_TypeError,
- "\"%s\" must be a 1-character string",
- name);
- return -1;
- }
- /* PyUnicode_READY() is called in PyUnicode_GetLength() */
- *target = PyUnicode_READ_CHAR(src, 0);
- }
- }
- return 0;
- }
- static int
- _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
- {
- if (src == NULL) {
- *target = dflt;
- }
- else {
- if (!PyUnicode_Check(src)) {
- PyErr_Format(PyExc_TypeError,
- "\"%s\" must be string, not %.200s", name,
- Py_TYPE(src)->tp_name);
- return -1;
- }
- Py_ssize_t len = PyUnicode_GetLength(src);
- if (len < 0) {
- return -1;
- }
- if (len != 1) {
- PyErr_Format(PyExc_TypeError,
- "\"%s\" must be a 1-character string",
- name);
- return -1;
- }
- /* PyUnicode_READY() is called in PyUnicode_GetLength() */
- *target = PyUnicode_READ_CHAR(src, 0);
- }
- return 0;
- }
- static int
- _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
- {
- if (src == NULL)
- *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
- else {
- if (src == Py_None)
- *target = NULL;
- else if (!PyUnicode_Check(src)) {
- PyErr_Format(PyExc_TypeError,
- "\"%s\" must be a string", name);
- return -1;
- }
- else {
- if (PyUnicode_READY(src) == -1)
- return -1;
- Py_XSETREF(*target, Py_NewRef(src));
- }
- }
- return 0;
- }
- static int
- dialect_check_quoting(int quoting)
- {
- const StyleDesc *qs;
- for (qs = quote_styles; qs->name; qs++) {
- if ((int)qs->style == quoting)
- return 0;
- }
- PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
- return -1;
- }
- #define D_OFF(x) offsetof(DialectObj, x)
- static struct PyMemberDef Dialect_memberlist[] = {
- { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
- { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
- { "strict", T_BOOL, D_OFF(strict), READONLY },
- { NULL }
- };
- static PyGetSetDef Dialect_getsetlist[] = {
- { "delimiter", (getter)Dialect_get_delimiter},
- { "escapechar", (getter)Dialect_get_escapechar},
- { "lineterminator", (getter)Dialect_get_lineterminator},
- { "quotechar", (getter)Dialect_get_quotechar},
- { "quoting", (getter)Dialect_get_quoting},
- {NULL},
- };
- static void
- Dialect_dealloc(DialectObj *self)
- {
- PyTypeObject *tp = Py_TYPE(self);
- PyObject_GC_UnTrack(self);
- tp->tp_clear((PyObject *)self);
- PyObject_GC_Del(self);
- Py_DECREF(tp);
- }
- static char *dialect_kws[] = {
- "dialect",
- "delimiter",
- "doublequote",
- "escapechar",
- "lineterminator",
- "quotechar",
- "quoting",
- "skipinitialspace",
- "strict",
- NULL
- };
- static _csvstate *
- _csv_state_from_type(PyTypeObject *type, const char *name)
- {
- PyObject *module = PyType_GetModuleByDef(type, &_csvmodule);
- if (module == NULL) {
- return NULL;
- }
- _csvstate *module_state = PyModule_GetState(module);
- if (module_state == NULL) {
- PyErr_Format(PyExc_SystemError,
- "%s: No _csv module state found", name);
- return NULL;
- }
- return module_state;
- }
- static PyObject *
- dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
- {
- DialectObj *self;
- PyObject *ret = NULL;
- PyObject *dialect = NULL;
- PyObject *delimiter = NULL;
- PyObject *doublequote = NULL;
- PyObject *escapechar = NULL;
- PyObject *lineterminator = NULL;
- PyObject *quotechar = NULL;
- PyObject *quoting = NULL;
- PyObject *skipinitialspace = NULL;
- PyObject *strict = NULL;
- if (!PyArg_ParseTupleAndKeywords(args, kwargs,
- "|OOOOOOOOO", dialect_kws,
- &dialect,
- &delimiter,
- &doublequote,
- &escapechar,
- &lineterminator,
- "echar,
- "ing,
- &skipinitialspace,
- &strict))
- return NULL;
- _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
- if (module_state == NULL) {
- return NULL;
- }
- if (dialect != NULL) {
- if (PyUnicode_Check(dialect)) {
- dialect = get_dialect_from_registry(dialect, module_state);
- if (dialect == NULL)
- return NULL;
- }
- else
- Py_INCREF(dialect);
- /* Can we reuse this instance? */
- if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
- delimiter == NULL &&
- doublequote == NULL &&
- escapechar == NULL &&
- lineterminator == NULL &&
- quotechar == NULL &&
- quoting == NULL &&
- skipinitialspace == NULL &&
- strict == NULL)
- return dialect;
- }
- self = (DialectObj *)type->tp_alloc(type, 0);
- if (self == NULL) {
- Py_CLEAR(dialect);
- return NULL;
- }
- self->lineterminator = NULL;
- Py_XINCREF(delimiter);
- Py_XINCREF(doublequote);
- Py_XINCREF(escapechar);
- Py_XINCREF(lineterminator);
- Py_XINCREF(quotechar);
- Py_XINCREF(quoting);
- Py_XINCREF(skipinitialspace);
- Py_XINCREF(strict);
- if (dialect != NULL) {
- #define DIALECT_GETATTR(v, n) \
- do { \
- if (v == NULL) { \
- v = PyObject_GetAttrString(dialect, n); \
- if (v == NULL) \
- PyErr_Clear(); \
- } \
- } while (0)
- DIALECT_GETATTR(delimiter, "delimiter");
- DIALECT_GETATTR(doublequote, "doublequote");
- DIALECT_GETATTR(escapechar, "escapechar");
- DIALECT_GETATTR(lineterminator, "lineterminator");
- DIALECT_GETATTR(quotechar, "quotechar");
- DIALECT_GETATTR(quoting, "quoting");
- DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
- DIALECT_GETATTR(strict, "strict");
- }
- /* check types and convert to C values */
- #define DIASET(meth, name, target, src, dflt) \
- if (meth(name, target, src, dflt)) \
- goto err
- DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
- DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
- DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET);
- DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
- DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
- DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
- DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
- DIASET(_set_bool, "strict", &self->strict, strict, false);
- /* validate options */
- if (dialect_check_quoting(self->quoting))
- goto err;
- if (self->delimiter == NOT_SET) {
- PyErr_SetString(PyExc_TypeError,
- "\"delimiter\" must be a 1-character string");
- goto err;
- }
- if (quotechar == Py_None && quoting == NULL)
- self->quoting = QUOTE_NONE;
- if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) {
- PyErr_SetString(PyExc_TypeError,
- "quotechar must be set if quoting enabled");
- goto err;
- }
- if (self->lineterminator == NULL) {
- PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
- goto err;
- }
- ret = Py_NewRef(self);
- err:
- Py_CLEAR(self);
- Py_CLEAR(dialect);
- Py_CLEAR(delimiter);
- Py_CLEAR(doublequote);
- Py_CLEAR(escapechar);
- Py_CLEAR(lineterminator);
- Py_CLEAR(quotechar);
- Py_CLEAR(quoting);
- Py_CLEAR(skipinitialspace);
- Py_CLEAR(strict);
- return ret;
- }
- /* Since dialect is now a heap type, it inherits pickling method for
- * protocol 0 and 1 from object, therefore it needs to be overridden */
- PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
- static PyObject *
- Dialect_reduce(PyObject *self, PyObject *args) {
- PyErr_Format(PyExc_TypeError,
- "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
- return NULL;
- }
- static struct PyMethodDef dialect_methods[] = {
- {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
- {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
- {NULL, NULL}
- };
- PyDoc_STRVAR(Dialect_Type_doc,
- "CSV dialect\n"
- "\n"
- "The Dialect type records CSV parsing and generation options.\n");
- static int
- Dialect_clear(DialectObj *self)
- {
- Py_CLEAR(self->lineterminator);
- return 0;
- }
- static int
- Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
- {
- Py_VISIT(self->lineterminator);
- Py_VISIT(Py_TYPE(self));
- return 0;
- }
- static PyType_Slot Dialect_Type_slots[] = {
- {Py_tp_doc, (char*)Dialect_Type_doc},
- {Py_tp_members, Dialect_memberlist},
- {Py_tp_getset, Dialect_getsetlist},
- {Py_tp_new, dialect_new},
- {Py_tp_methods, dialect_methods},
- {Py_tp_dealloc, Dialect_dealloc},
- {Py_tp_clear, Dialect_clear},
- {Py_tp_traverse, Dialect_traverse},
- {0, NULL}
- };
- PyType_Spec Dialect_Type_spec = {
- .name = "_csv.Dialect",
- .basicsize = sizeof(DialectObj),
- .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
- Py_TPFLAGS_IMMUTABLETYPE),
- .slots = Dialect_Type_slots,
- };
- /*
- * Return an instance of the dialect type, given a Python instance or kwarg
- * description of the dialect
- */
- static PyObject *
- _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
- {
- PyObject *type = (PyObject *)module_state->dialect_type;
- if (dialect_inst) {
- return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
- }
- else {
- return PyObject_VectorcallDict(type, NULL, 0, kwargs);
- }
- }
- /*
- * READER
- */
- static int
- parse_save_field(ReaderObj *self)
- {
- PyObject *field;
- field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
- (void *) self->field, self->field_len);
- if (field == NULL)
- return -1;
- self->field_len = 0;
- if (self->numeric_field) {
- PyObject *tmp;
- self->numeric_field = 0;
- tmp = PyNumber_Float(field);
- Py_DECREF(field);
- if (tmp == NULL)
- return -1;
- field = tmp;
- }
- if (PyList_Append(self->fields, field) < 0) {
- Py_DECREF(field);
- return -1;
- }
- Py_DECREF(field);
- return 0;
- }
- static int
- parse_grow_buff(ReaderObj *self)
- {
- assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
- Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
- Py_UCS4 *field_new = self->field;
- PyMem_Resize(field_new, Py_UCS4, field_size_new);
- if (field_new == NULL) {
- PyErr_NoMemory();
- return 0;
- }
- self->field = field_new;
- self->field_size = field_size_new;
- return 1;
- }
- static int
- parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
- {
- if (self->field_len >= module_state->field_limit) {
- PyErr_Format(module_state->error_obj,
- "field larger than field limit (%ld)",
- module_state->field_limit);
- return -1;
- }
- if (self->field_len == self->field_size && !parse_grow_buff(self))
- return -1;
- self->field[self->field_len++] = c;
- return 0;
- }
- static int
- parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
- {
- DialectObj *dialect = self->dialect;
- switch (self->state) {
- case START_RECORD:
- /* start of record */
- if (c == EOL)
- /* empty line - return [] */
- break;
- else if (c == '\n' || c == '\r') {
- self->state = EAT_CRNL;
- break;
- }
- /* normal character - handle as START_FIELD */
- self->state = START_FIELD;
- /* fallthru */
- case START_FIELD:
- /* expecting field */
- if (c == '\n' || c == '\r' || c == EOL) {
- /* save empty field - return [fields] */
- if (parse_save_field(self) < 0)
- return -1;
- self->state = (c == EOL ? START_RECORD : EAT_CRNL);
- }
- else if (c == dialect->quotechar &&
- dialect->quoting != QUOTE_NONE) {
- /* start quoted field */
- self->state = IN_QUOTED_FIELD;
- }
- else if (c == dialect->escapechar) {
- /* possible escaped character */
- self->state = ESCAPED_CHAR;
- }
- else if (c == ' ' && dialect->skipinitialspace)
- /* ignore spaces at start of field */
- ;
- else if (c == dialect->delimiter) {
- /* save empty field */
- if (parse_save_field(self) < 0)
- return -1;
- }
- else {
- /* begin new unquoted field */
- if (dialect->quoting == QUOTE_NONNUMERIC)
- self->numeric_field = 1;
- if (parse_add_char(self, module_state, c) < 0)
- return -1;
- self->state = IN_FIELD;
- }
- break;
- case ESCAPED_CHAR:
- if (c == '\n' || c=='\r') {
- if (parse_add_char(self, module_state, c) < 0)
- return -1;
- self->state = AFTER_ESCAPED_CRNL;
- break;
- }
- if (c == EOL)
- c = '\n';
- if (parse_add_char(self, module_state, c) < 0)
- return -1;
- self->state = IN_FIELD;
- break;
- case AFTER_ESCAPED_CRNL:
- if (c == EOL)
- break;
- /*fallthru*/
- case IN_FIELD:
- /* in unquoted field */
- if (c == '\n' || c == '\r' || c == EOL) {
- /* end of line - return [fields] */
- if (parse_save_field(self) < 0)
- return -1;
- self->state = (c == EOL ? START_RECORD : EAT_CRNL);
- }
- else if (c == dialect->escapechar) {
- /* possible escaped character */
- self->state = ESCAPED_CHAR;
- }
- else if (c == dialect->delimiter) {
- /* save field - wait for new field */
- if (parse_save_field(self) < 0)
- return -1;
- self->state = START_FIELD;
- }
- else {
- /* normal character - save in field */
- if (parse_add_char(self, module_state, c) < 0)
- return -1;
- }
- break;
- case IN_QUOTED_FIELD:
- /* in quoted field */
- if (c == EOL)
- ;
- else if (c == dialect->escapechar) {
- /* Possible escape character */
- self->state = ESCAPE_IN_QUOTED_FIELD;
- }
- else if (c == dialect->quotechar &&
- dialect->quoting != QUOTE_NONE) {
- if (dialect->doublequote) {
- /* doublequote; " represented by "" */
- self->state = QUOTE_IN_QUOTED_FIELD;
- }
- else {
- /* end of quote part of field */
- self->state = IN_FIELD;
- }
- }
- else {
- /* normal character - save in field */
- if (parse_add_char(self, module_state, c) < 0)
- return -1;
- }
- break;
- case ESCAPE_IN_QUOTED_FIELD:
- if (c == EOL)
- c = '\n';
- if (parse_add_char(self, module_state, c) < 0)
- return -1;
- self->state = IN_QUOTED_FIELD;
- break;
- case QUOTE_IN_QUOTED_FIELD:
- /* doublequote - seen a quote in a quoted field */
- if (dialect->quoting != QUOTE_NONE &&
- c == dialect->quotechar) {
- /* save "" as " */
- if (parse_add_char(self, module_state, c) < 0)
- return -1;
- self->state = IN_QUOTED_FIELD;
- }
- else if (c == dialect->delimiter) {
- /* save field - wait for new field */
- if (parse_save_field(self) < 0)
- return -1;
- self->state = START_FIELD;
- }
- else if (c == '\n' || c == '\r' || c == EOL) {
- /* end of line - return [fields] */
- if (parse_save_field(self) < 0)
- return -1;
- self->state = (c == EOL ? START_RECORD : EAT_CRNL);
- }
- else if (!dialect->strict) {
- if (parse_add_char(self, module_state, c) < 0)
- return -1;
- self->state = IN_FIELD;
- }
- else {
- /* illegal */
- PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
- dialect->delimiter,
- dialect->quotechar);
- return -1;
- }
- break;
- case EAT_CRNL:
- if (c == '\n' || c == '\r')
- ;
- else if (c == EOL)
- self->state = START_RECORD;
- else {
- PyErr_Format(module_state->error_obj,
- "new-line character seen in unquoted field - "
- "do you need to open the file with newline=''?");
- return -1;
- }
- break;
- }
- return 0;
- }
- static int
- parse_reset(ReaderObj *self)
- {
- Py_XSETREF(self->fields, PyList_New(0));
- if (self->fields == NULL)
- return -1;
- self->field_len = 0;
- self->state = START_RECORD;
- self->numeric_field = 0;
- return 0;
- }
- static PyObject *
- Reader_iternext(ReaderObj *self)
- {
- PyObject *fields = NULL;
- Py_UCS4 c;
- Py_ssize_t pos, linelen;
- int kind;
- const void *data;
- PyObject *lineobj;
- _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
- "Reader.__next__");
- if (module_state == NULL) {
- return NULL;
- }
- if (parse_reset(self) < 0)
- return NULL;
- do {
- lineobj = PyIter_Next(self->input_iter);
- if (lineobj == NULL) {
- /* End of input OR exception */
- if (!PyErr_Occurred() && (self->field_len != 0 ||
- self->state == IN_QUOTED_FIELD)) {
- if (self->dialect->strict)
- PyErr_SetString(module_state->error_obj,
- "unexpected end of data");
- else if (parse_save_field(self) >= 0)
- break;
- }
- return NULL;
- }
- if (!PyUnicode_Check(lineobj)) {
- PyErr_Format(module_state->error_obj,
- "iterator should return strings, "
- "not %.200s "
- "(the file should be opened in text mode)",
- Py_TYPE(lineobj)->tp_name
- );
- Py_DECREF(lineobj);
- return NULL;
- }
- if (PyUnicode_READY(lineobj) == -1) {
- Py_DECREF(lineobj);
- return NULL;
- }
- ++self->line_num;
- kind = PyUnicode_KIND(lineobj);
- data = PyUnicode_DATA(lineobj);
- pos = 0;
- linelen = PyUnicode_GET_LENGTH(lineobj);
- while (linelen--) {
- c = PyUnicode_READ(kind, data, pos);
- if (parse_process_char(self, module_state, c) < 0) {
- Py_DECREF(lineobj);
- goto err;
- }
- pos++;
- }
- Py_DECREF(lineobj);
- if (parse_process_char(self, module_state, EOL) < 0)
- goto err;
- } while (self->state != START_RECORD);
- fields = self->fields;
- self->fields = NULL;
- err:
- return fields;
- }
- static void
- Reader_dealloc(ReaderObj *self)
- {
- PyTypeObject *tp = Py_TYPE(self);
- PyObject_GC_UnTrack(self);
- tp->tp_clear((PyObject *)self);
- if (self->field != NULL) {
- PyMem_Free(self->field);
- self->field = NULL;
- }
- PyObject_GC_Del(self);
- Py_DECREF(tp);
- }
- static int
- Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
- {
- Py_VISIT(self->dialect);
- Py_VISIT(self->input_iter);
- Py_VISIT(self->fields);
- Py_VISIT(Py_TYPE(self));
- return 0;
- }
- static int
- Reader_clear(ReaderObj *self)
- {
- Py_CLEAR(self->dialect);
- Py_CLEAR(self->input_iter);
- Py_CLEAR(self->fields);
- return 0;
- }
- PyDoc_STRVAR(Reader_Type_doc,
- "CSV reader\n"
- "\n"
- "Reader objects are responsible for reading and parsing tabular data\n"
- "in CSV format.\n"
- );
- static struct PyMethodDef Reader_methods[] = {
- { NULL, NULL }
- };
- #define R_OFF(x) offsetof(ReaderObj, x)
- static struct PyMemberDef Reader_memberlist[] = {
- { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
- { "line_num", T_ULONG, R_OFF(line_num), READONLY },
- { NULL }
- };
- static PyType_Slot Reader_Type_slots[] = {
- {Py_tp_doc, (char*)Reader_Type_doc},
- {Py_tp_traverse, Reader_traverse},
- {Py_tp_iter, PyObject_SelfIter},
- {Py_tp_iternext, Reader_iternext},
- {Py_tp_methods, Reader_methods},
- {Py_tp_members, Reader_memberlist},
- {Py_tp_clear, Reader_clear},
- {Py_tp_dealloc, Reader_dealloc},
- {0, NULL}
- };
- PyType_Spec Reader_Type_spec = {
- .name = "_csv.reader",
- .basicsize = sizeof(ReaderObj),
- .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
- Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
- .slots = Reader_Type_slots
- };
- static PyObject *
- csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
- {
- PyObject * iterator, * dialect = NULL;
- _csvstate *module_state = get_csv_state(module);
- ReaderObj * self = PyObject_GC_New(
- ReaderObj,
- module_state->reader_type);
- if (!self)
- return NULL;
- self->dialect = NULL;
- self->fields = NULL;
- self->input_iter = NULL;
- self->field = NULL;
- self->field_size = 0;
- self->line_num = 0;
- if (parse_reset(self) < 0) {
- Py_DECREF(self);
- return NULL;
- }
- if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
- Py_DECREF(self);
- return NULL;
- }
- self->input_iter = PyObject_GetIter(iterator);
- if (self->input_iter == NULL) {
- Py_DECREF(self);
- return NULL;
- }
- self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
- keyword_args);
- if (self->dialect == NULL) {
- Py_DECREF(self);
- return NULL;
- }
- PyObject_GC_Track(self);
- return (PyObject *)self;
- }
- /*
- * WRITER
- */
- /* ---------------------------------------------------------------- */
- static void
- join_reset(WriterObj *self)
- {
- self->rec_len = 0;
- self->num_fields = 0;
- }
- #define MEM_INCR 32768
- /* Calculate new record length or append field to record. Return new
- * record length.
- */
- static Py_ssize_t
- join_append_data(WriterObj *self, int field_kind, const void *field_data,
- Py_ssize_t field_len, int *quoted,
- int copy_phase)
- {
- DialectObj *dialect = self->dialect;
- int i;
- Py_ssize_t rec_len;
- #define INCLEN \
- do {\
- if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
- goto overflow; \
- } \
- rec_len++; \
- } while(0)
- #define ADDCH(c) \
- do {\
- if (copy_phase) \
- self->rec[rec_len] = c;\
- INCLEN;\
- } while(0)
- rec_len = self->rec_len;
- /* If this is not the first field we need a field separator */
- if (self->num_fields > 0)
- ADDCH(dialect->delimiter);
- /* Handle preceding quote */
- if (copy_phase && *quoted)
- ADDCH(dialect->quotechar);
- /* Copy/count field data */
- /* If field is null just pass over */
- for (i = 0; field_data && (i < field_len); i++) {
- Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
- int want_escape = 0;
- if (c == dialect->delimiter ||
- c == dialect->escapechar ||
- c == dialect->quotechar ||
- c == '\n' ||
- c == '\r' ||
- PyUnicode_FindChar(
- dialect->lineterminator, c, 0,
- PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
- if (dialect->quoting == QUOTE_NONE)
- want_escape = 1;
- else {
- if (c == dialect->quotechar) {
- if (dialect->doublequote)
- ADDCH(dialect->quotechar);
- else
- want_escape = 1;
- }
- else if (c == dialect->escapechar) {
- want_escape = 1;
- }
- if (!want_escape)
- *quoted = 1;
- }
- if (want_escape) {
- if (dialect->escapechar == NOT_SET) {
- PyErr_Format(self->error_obj,
- "need to escape, but no escapechar set");
- return -1;
- }
- ADDCH(dialect->escapechar);
- }
- }
- /* Copy field character into record buffer.
- */
- ADDCH(c);
- }
- if (*quoted) {
- if (copy_phase)
- ADDCH(dialect->quotechar);
- else {
- INCLEN; /* starting quote */
- INCLEN; /* ending quote */
- }
- }
- return rec_len;
- overflow:
- PyErr_NoMemory();
- return -1;
- #undef ADDCH
- #undef INCLEN
- }
- static int
- join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
- {
- assert(rec_len >= 0);
- if (rec_len > self->rec_size) {
- size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
- Py_UCS4 *rec_new = self->rec;
- PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
- if (rec_new == NULL) {
- PyErr_NoMemory();
- return 0;
- }
- self->rec = rec_new;
- self->rec_size = (Py_ssize_t)rec_size_new;
- }
- return 1;
- }
- static int
- join_append(WriterObj *self, PyObject *field, int quoted)
- {
- DialectObj *dialect = self->dialect;
- int field_kind = -1;
- const void *field_data = NULL;
- Py_ssize_t field_len = 0;
- Py_ssize_t rec_len;
- if (field != NULL) {
- if (PyUnicode_READY(field) == -1)
- return 0;
- field_kind = PyUnicode_KIND(field);
- field_data = PyUnicode_DATA(field);
- field_len = PyUnicode_GET_LENGTH(field);
- }
- if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) {
- if (dialect->quoting == QUOTE_NONE ||
- (field == NULL &&
- (dialect->quoting == QUOTE_STRINGS ||
- dialect->quoting == QUOTE_NOTNULL)))
- {
- PyErr_Format(self->error_obj,
- "empty field must be quoted if delimiter is a space "
- "and skipinitialspace is true");
- return 0;
- }
- quoted = 1;
- }
- rec_len = join_append_data(self, field_kind, field_data, field_len,
- "ed, 0);
- if (rec_len < 0)
- return 0;
- /* grow record buffer if necessary */
- if (!join_check_rec_size(self, rec_len))
- return 0;
- self->rec_len = join_append_data(self, field_kind, field_data, field_len,
- "ed, 1);
- self->num_fields++;
- return 1;
- }
- static int
- join_append_lineterminator(WriterObj *self)
- {
- Py_ssize_t terminator_len, i;
- int term_kind;
- const void *term_data;
- terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
- if (terminator_len == -1)
- return 0;
- /* grow record buffer if necessary */
- if (!join_check_rec_size(self, self->rec_len + terminator_len))
- return 0;
- term_kind = PyUnicode_KIND(self->dialect->lineterminator);
- term_data = PyUnicode_DATA(self->dialect->lineterminator);
- for (i = 0; i < terminator_len; i++)
- self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
- self->rec_len += terminator_len;
- return 1;
- }
- PyDoc_STRVAR(csv_writerow_doc,
- "writerow(iterable)\n"
- "\n"
- "Construct and write a CSV record from an iterable of fields. Non-string\n"
- "elements will be converted to string.");
- static PyObject *
- csv_writerow(WriterObj *self, PyObject *seq)
- {
- DialectObj *dialect = self->dialect;
- PyObject *iter, *field, *line, *result;
- bool null_field = false;
- iter = PyObject_GetIter(seq);
- if (iter == NULL) {
- if (PyErr_ExceptionMatches(PyExc_TypeError)) {
- PyErr_Format(self->error_obj,
- "iterable expected, not %.200s",
- Py_TYPE(seq)->tp_name);
- }
- return NULL;
- }
- /* Join all fields in internal buffer.
- */
- join_reset(self);
- while ((field = PyIter_Next(iter))) {
- int append_ok;
- int quoted;
- switch (dialect->quoting) {
- case QUOTE_NONNUMERIC:
- quoted = !PyNumber_Check(field);
- break;
- case QUOTE_ALL:
- quoted = 1;
- break;
- case QUOTE_STRINGS:
- quoted = PyUnicode_Check(field);
- break;
- case QUOTE_NOTNULL:
- quoted = field != Py_None;
- break;
- default:
- quoted = 0;
- break;
- }
- null_field = (field == Py_None);
- if (PyUnicode_Check(field)) {
- append_ok = join_append(self, field, quoted);
- Py_DECREF(field);
- }
- else if (null_field) {
- append_ok = join_append(self, NULL, quoted);
- Py_DECREF(field);
- }
- else {
- PyObject *str;
- str = PyObject_Str(field);
- Py_DECREF(field);
- if (str == NULL) {
- Py_DECREF(iter);
- return NULL;
- }
- append_ok = join_append(self, str, quoted);
- Py_DECREF(str);
- }
- if (!append_ok) {
- Py_DECREF(iter);
- return NULL;
- }
- }
- Py_DECREF(iter);
- if (PyErr_Occurred())
- return NULL;
- if (self->num_fields > 0 && self->rec_len == 0) {
- if (dialect->quoting == QUOTE_NONE ||
- (null_field &&
- (dialect->quoting == QUOTE_STRINGS ||
- dialect->quoting == QUOTE_NOTNULL)))
- {
- PyErr_Format(self->error_obj,
- "single empty field record must be quoted");
- return NULL;
- }
- self->num_fields--;
- if (!join_append(self, NULL, 1))
- return NULL;
- }
- /* Add line terminator.
- */
- if (!join_append_lineterminator(self)) {
- return NULL;
- }
- line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
- (void *) self->rec, self->rec_len);
- if (line == NULL) {
- return NULL;
- }
- result = PyObject_CallOneArg(self->write, line);
- Py_DECREF(line);
- return result;
- }
- PyDoc_STRVAR(csv_writerows_doc,
- "writerows(iterable of iterables)\n"
- "\n"
- "Construct and write a series of iterables to a csv file. Non-string\n"
- "elements will be converted to string.");
- static PyObject *
- csv_writerows(WriterObj *self, PyObject *seqseq)
- {
- PyObject *row_iter, *row_obj, *result;
- row_iter = PyObject_GetIter(seqseq);
- if (row_iter == NULL) {
- return NULL;
- }
- while ((row_obj = PyIter_Next(row_iter))) {
- result = csv_writerow(self, row_obj);
- Py_DECREF(row_obj);
- if (!result) {
- Py_DECREF(row_iter);
- return NULL;
- }
- else
- Py_DECREF(result);
- }
- Py_DECREF(row_iter);
- if (PyErr_Occurred())
- return NULL;
- Py_RETURN_NONE;
- }
- static struct PyMethodDef Writer_methods[] = {
- { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
- { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
- { NULL, NULL }
- };
- #define W_OFF(x) offsetof(WriterObj, x)
- static struct PyMemberDef Writer_memberlist[] = {
- { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
- { NULL }
- };
- static int
- Writer_traverse(WriterObj *self, visitproc visit, void *arg)
- {
- Py_VISIT(self->dialect);
- Py_VISIT(self->write);
- Py_VISIT(self->error_obj);
- Py_VISIT(Py_TYPE(self));
- return 0;
- }
- static int
- Writer_clear(WriterObj *self)
- {
- Py_CLEAR(self->dialect);
- Py_CLEAR(self->write);
- Py_CLEAR(self->error_obj);
- return 0;
- }
- static void
- Writer_dealloc(WriterObj *self)
- {
- PyTypeObject *tp = Py_TYPE(self);
- PyObject_GC_UnTrack(self);
- tp->tp_clear((PyObject *)self);
- if (self->rec != NULL) {
- PyMem_Free(self->rec);
- }
- PyObject_GC_Del(self);
- Py_DECREF(tp);
- }
- PyDoc_STRVAR(Writer_Type_doc,
- "CSV writer\n"
- "\n"
- "Writer objects are responsible for generating tabular data\n"
- "in CSV format from sequence input.\n"
- );
- static PyType_Slot Writer_Type_slots[] = {
- {Py_tp_doc, (char*)Writer_Type_doc},
- {Py_tp_traverse, Writer_traverse},
- {Py_tp_clear, Writer_clear},
- {Py_tp_dealloc, Writer_dealloc},
- {Py_tp_methods, Writer_methods},
- {Py_tp_members, Writer_memberlist},
- {0, NULL}
- };
- PyType_Spec Writer_Type_spec = {
- .name = "_csv.writer",
- .basicsize = sizeof(WriterObj),
- .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
- Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
- .slots = Writer_Type_slots,
- };
- static PyObject *
- csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
- {
- PyObject * output_file, * dialect = NULL;
- _csvstate *module_state = get_csv_state(module);
- WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
- if (!self)
- return NULL;
- self->dialect = NULL;
- self->write = NULL;
- self->rec = NULL;
- self->rec_size = 0;
- self->rec_len = 0;
- self->num_fields = 0;
- self->error_obj = Py_NewRef(module_state->error_obj);
- if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
- Py_DECREF(self);
- return NULL;
- }
- if (_PyObject_LookupAttr(output_file,
- module_state->str_write,
- &self->write) < 0) {
- Py_DECREF(self);
- return NULL;
- }
- if (self->write == NULL || !PyCallable_Check(self->write)) {
- PyErr_SetString(PyExc_TypeError,
- "argument 1 must have a \"write\" method");
- Py_DECREF(self);
- return NULL;
- }
- self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
- keyword_args);
- if (self->dialect == NULL) {
- Py_DECREF(self);
- return NULL;
- }
- PyObject_GC_Track(self);
- return (PyObject *)self;
- }
- /*
- * DIALECT REGISTRY
- */
- /*[clinic input]
- _csv.list_dialects
- Return a list of all known dialect names.
- names = csv.list_dialects()
- [clinic start generated code]*/
- static PyObject *
- _csv_list_dialects_impl(PyObject *module)
- /*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/
- {
- return PyDict_Keys(get_csv_state(module)->dialects);
- }
- static PyObject *
- csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
- {
- PyObject *name_obj, *dialect_obj = NULL;
- _csvstate *module_state = get_csv_state(module);
- PyObject *dialect;
- if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
- return NULL;
- if (!PyUnicode_Check(name_obj)) {
- PyErr_SetString(PyExc_TypeError,
- "dialect name must be a string");
- return NULL;
- }
- if (PyUnicode_READY(name_obj) == -1)
- return NULL;
- dialect = _call_dialect(module_state, dialect_obj, kwargs);
- if (dialect == NULL)
- return NULL;
- if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
- Py_DECREF(dialect);
- return NULL;
- }
- Py_DECREF(dialect);
- Py_RETURN_NONE;
- }
- /*[clinic input]
- _csv.unregister_dialect
- name: object
- Delete the name/dialect mapping associated with a string name.
- csv.unregister_dialect(name)
- [clinic start generated code]*/
- static PyObject *
- _csv_unregister_dialect_impl(PyObject *module, PyObject *name)
- /*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/
- {
- _csvstate *module_state = get_csv_state(module);
- if (PyDict_DelItem(module_state->dialects, name) < 0) {
- if (PyErr_ExceptionMatches(PyExc_KeyError)) {
- PyErr_Format(module_state->error_obj, "unknown dialect");
- }
- return NULL;
- }
- Py_RETURN_NONE;
- }
- /*[clinic input]
- _csv.get_dialect
- name: object
- Return the dialect instance associated with name.
- dialect = csv.get_dialect(name)
- [clinic start generated code]*/
- static PyObject *
- _csv_get_dialect_impl(PyObject *module, PyObject *name)
- /*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/
- {
- return get_dialect_from_registry(name, get_csv_state(module));
- }
- /*[clinic input]
- _csv.field_size_limit
- new_limit: object = NULL
- Sets an upper limit on parsed fields.
- csv.field_size_limit([limit])
- Returns old limit. If limit is not given, no new limit is set and
- the old limit is returned
- [clinic start generated code]*/
- static PyObject *
- _csv_field_size_limit_impl(PyObject *module, PyObject *new_limit)
- /*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/
- {
- _csvstate *module_state = get_csv_state(module);
- long old_limit = module_state->field_limit;
- if (new_limit != NULL) {
- if (!PyLong_CheckExact(new_limit)) {
- PyErr_Format(PyExc_TypeError,
- "limit must be an integer");
- return NULL;
- }
- module_state->field_limit = PyLong_AsLong(new_limit);
- if (module_state->field_limit == -1 && PyErr_Occurred()) {
- module_state->field_limit = old_limit;
- return NULL;
- }
- }
- return PyLong_FromLong(old_limit);
- }
- static PyType_Slot error_slots[] = {
- {0, NULL},
- };
- PyType_Spec error_spec = {
- .name = "_csv.Error",
- .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
- .slots = error_slots,
- };
- /*
- * MODULE
- */
- PyDoc_STRVAR(csv_module_doc,
- "CSV parsing and writing.\n"
- "\n"
- "This module provides classes that assist in the reading and writing\n"
- "of Comma Separated Value (CSV) files, and implements the interface\n"
- "described by PEP 305. Although many CSV files are simple to parse,\n"
- "the format is not formally defined by a stable specification and\n"
- "is subtle enough that parsing lines of a CSV file with something\n"
- "like line.split(\",\") is bound to fail. The module supports three\n"
- "basic APIs: reading, writing, and registration of dialects.\n"
- "\n"
- "\n"
- "DIALECT REGISTRATION:\n"
- "\n"
- "Readers and writers support a dialect argument, which is a convenient\n"
- "handle on a group of settings. When the dialect argument is a string,\n"
- "it identifies one of the dialects previously registered with the module.\n"
- "If it is a class or instance, the attributes of the argument are used as\n"
- "the settings for the reader or writer:\n"
- "\n"
- " class excel:\n"
- " delimiter = ','\n"
- " quotechar = '\"'\n"
- " escapechar = None\n"
- " doublequote = True\n"
- " skipinitialspace = False\n"
- " lineterminator = '\\r\\n'\n"
- " quoting = QUOTE_MINIMAL\n"
- "\n"
- "SETTINGS:\n"
- "\n"
- " * quotechar - specifies a one-character string to use as the\n"
- " quoting character. It defaults to '\"'.\n"
- " * delimiter - specifies a one-character string to use as the\n"
- " field separator. It defaults to ','.\n"
- " * skipinitialspace - specifies how to interpret spaces which\n"
- " immediately follow a delimiter. It defaults to False, which\n"
- " means that spaces immediately following a delimiter is part\n"
- " of the following field.\n"
- " * lineterminator - specifies the character sequence which should\n"
- " terminate rows.\n"
- " * quoting - controls when quotes should be generated by the writer.\n"
- " It can take on any of the following module constants:\n"
- "\n"
- " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
- " field contains either the quotechar or the delimiter\n"
- " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
- " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
- " fields which do not parse as integers or floating point\n"
- " numbers.\n"
- " csv.QUOTE_STRINGS means that quotes are always placed around\n"
- " fields which are strings. Note that the Python value None\n"
- " is not a string.\n"
- " csv.QUOTE_NOTNULL means that quotes are only placed around fields\n"
- " that are not the Python value None.\n"
- " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
- " * escapechar - specifies a one-character string used to escape\n"
- " the delimiter when quoting is set to QUOTE_NONE.\n"
- " * doublequote - controls the handling of quotes inside fields. When\n"
- " True, two consecutive quotes are interpreted as one during read,\n"
- " and when writing, each quote character embedded in the data is\n"
- " written as two quotes\n");
- PyDoc_STRVAR(csv_reader_doc,
- " csv_reader = reader(iterable [, dialect='excel']\n"
- " [optional keyword args])\n"
- " for row in csv_reader:\n"
- " process(row)\n"
- "\n"
- "The \"iterable\" argument can be any object that returns a line\n"
- "of input for each iteration, such as a file object or a list. The\n"
- "optional \"dialect\" parameter is discussed below. The function\n"
- "also accepts optional keyword arguments which override settings\n"
- "provided by the dialect.\n"
- "\n"
- "The returned object is an iterator. Each iteration returns a row\n"
- "of the CSV file (which can span multiple input lines).\n");
- PyDoc_STRVAR(csv_writer_doc,
- " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
- " [optional keyword args])\n"
- " for row in sequence:\n"
- " csv_writer.writerow(row)\n"
- "\n"
- " [or]\n"
- "\n"
- " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
- " [optional keyword args])\n"
- " csv_writer.writerows(rows)\n"
- "\n"
- "The \"fileobj\" argument can be any object that supports the file API.\n");
- PyDoc_STRVAR(csv_register_dialect_doc,
- "Create a mapping from a string name to a dialect class.\n"
- " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
- static struct PyMethodDef csv_methods[] = {
- { "reader", _PyCFunction_CAST(csv_reader),
- METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
- { "writer", _PyCFunction_CAST(csv_writer),
- METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
- { "register_dialect", _PyCFunction_CAST(csv_register_dialect),
- METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
- _CSV_LIST_DIALECTS_METHODDEF
- _CSV_UNREGISTER_DIALECT_METHODDEF
- _CSV_GET_DIALECT_METHODDEF
- _CSV_FIELD_SIZE_LIMIT_METHODDEF
- { NULL, NULL }
- };
- static int
- csv_exec(PyObject *module) {
- const StyleDesc *style;
- PyObject *temp;
- _csvstate *module_state = get_csv_state(module);
- temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
- module_state->dialect_type = (PyTypeObject *)temp;
- if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
- return -1;
- }
- temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
- module_state->reader_type = (PyTypeObject *)temp;
- if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
- return -1;
- }
- temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
- module_state->writer_type = (PyTypeObject *)temp;
- if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
- return -1;
- }
- /* Add version to the module. */
- if (PyModule_AddStringConstant(module, "__version__",
- MODULE_VERSION) == -1) {
- return -1;
- }
- /* Set the field limit */
- module_state->field_limit = 128 * 1024;
- /* Add _dialects dictionary */
- module_state->dialects = PyDict_New();
- if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
- return -1;
- }
- /* Add quote styles into dictionary */
- for (style = quote_styles; style->name; style++) {
- if (PyModule_AddIntConstant(module, style->name,
- style->style) == -1)
- return -1;
- }
- /* Add the CSV exception object to the module. */
- PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
- if (bases == NULL) {
- return -1;
- }
- module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
- bases);
- Py_DECREF(bases);
- if (module_state->error_obj == NULL) {
- return -1;
- }
- if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
- return -1;
- }
- module_state->str_write = PyUnicode_InternFromString("write");
- if (module_state->str_write == NULL) {
- return -1;
- }
- return 0;
- }
- static PyModuleDef_Slot csv_slots[] = {
- {Py_mod_exec, csv_exec},
- {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
- {0, NULL}
- };
- static struct PyModuleDef _csvmodule = {
- PyModuleDef_HEAD_INIT,
- "_csv",
- csv_module_doc,
- sizeof(_csvstate),
- csv_methods,
- csv_slots,
- _csv_traverse,
- _csv_clear,
- _csv_free
- };
- PyMODINIT_FUNC
- PyInit__csv(void)
- {
- return PyModuleDef_Init(&_csvmodule);
- }
|