1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288 |
- /*
- unicode_format.h -- implementation of str.format().
- */
- #include "pycore_floatobject.h" // _PyFloat_FormatAdvancedWriter()
- /************************************************************************/
- /*********** Global data structures and forward declarations *********/
- /************************************************************************/
- /*
- A SubString consists of the characters between two string or
- unicode pointers.
- */
- typedef struct {
- PyObject *str; /* borrowed reference */
- Py_ssize_t start, end;
- } SubString;
- typedef enum {
- ANS_INIT,
- ANS_AUTO,
- ANS_MANUAL
- } AutoNumberState; /* Keep track if we're auto-numbering fields */
- /* Keeps track of our auto-numbering state, and which number field we're on */
- typedef struct {
- AutoNumberState an_state;
- int an_field_number;
- } AutoNumber;
- /* forward declaration for recursion */
- static PyObject *
- build_string(SubString *input, PyObject *args, PyObject *kwargs,
- int recursion_depth, AutoNumber *auto_number);
- /************************************************************************/
- /************************** Utility functions ************************/
- /************************************************************************/
- static void
- AutoNumber_Init(AutoNumber *auto_number)
- {
- auto_number->an_state = ANS_INIT;
- auto_number->an_field_number = 0;
- }
- /* fill in a SubString from a pointer and length */
- Py_LOCAL_INLINE(void)
- SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
- {
- str->str = s;
- str->start = start;
- str->end = end;
- }
- /* return a new string. if str->str is NULL, return None */
- Py_LOCAL_INLINE(PyObject *)
- SubString_new_object(SubString *str)
- {
- if (str->str == NULL)
- Py_RETURN_NONE;
- return PyUnicode_Substring(str->str, str->start, str->end);
- }
- /* return a new string. if str->str is NULL, return a new empty string */
- Py_LOCAL_INLINE(PyObject *)
- SubString_new_object_or_empty(SubString *str)
- {
- if (str->str == NULL) {
- return PyUnicode_New(0, 0);
- }
- return SubString_new_object(str);
- }
- /* Return 1 if an error has been detected switching between automatic
- field numbering and manual field specification, else return 0. Set
- ValueError on error. */
- static int
- autonumber_state_error(AutoNumberState state, int field_name_is_empty)
- {
- if (state == ANS_MANUAL) {
- if (field_name_is_empty) {
- PyErr_SetString(PyExc_ValueError, "cannot switch from "
- "manual field specification to "
- "automatic field numbering");
- return 1;
- }
- }
- else {
- if (!field_name_is_empty) {
- PyErr_SetString(PyExc_ValueError, "cannot switch from "
- "automatic field numbering to "
- "manual field specification");
- return 1;
- }
- }
- return 0;
- }
- /************************************************************************/
- /*********** Format string parsing -- integers and identifiers *********/
- /************************************************************************/
- static Py_ssize_t
- get_integer(const SubString *str)
- {
- Py_ssize_t accumulator = 0;
- Py_ssize_t digitval;
- Py_ssize_t i;
- /* empty string is an error */
- if (str->start >= str->end)
- return -1;
- for (i = str->start; i < str->end; i++) {
- digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
- if (digitval < 0)
- return -1;
- /*
- Detect possible overflow before it happens:
- accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
- accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
- */
- if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
- PyErr_Format(PyExc_ValueError,
- "Too many decimal digits in format string");
- return -1;
- }
- accumulator = accumulator * 10 + digitval;
- }
- return accumulator;
- }
- /************************************************************************/
- /******** Functions to get field objects and specification strings ******/
- /************************************************************************/
- /* do the equivalent of obj.name */
- static PyObject *
- getattr(PyObject *obj, SubString *name)
- {
- PyObject *newobj;
- PyObject *str = SubString_new_object(name);
- if (str == NULL)
- return NULL;
- newobj = PyObject_GetAttr(obj, str);
- Py_DECREF(str);
- return newobj;
- }
- /* do the equivalent of obj[idx], where obj is a sequence */
- static PyObject *
- getitem_sequence(PyObject *obj, Py_ssize_t idx)
- {
- return PySequence_GetItem(obj, idx);
- }
- /* do the equivalent of obj[idx], where obj is not a sequence */
- static PyObject *
- getitem_idx(PyObject *obj, Py_ssize_t idx)
- {
- PyObject *newobj;
- PyObject *idx_obj = PyLong_FromSsize_t(idx);
- if (idx_obj == NULL)
- return NULL;
- newobj = PyObject_GetItem(obj, idx_obj);
- Py_DECREF(idx_obj);
- return newobj;
- }
- /* do the equivalent of obj[name] */
- static PyObject *
- getitem_str(PyObject *obj, SubString *name)
- {
- PyObject *newobj;
- PyObject *str = SubString_new_object(name);
- if (str == NULL)
- return NULL;
- newobj = PyObject_GetItem(obj, str);
- Py_DECREF(str);
- return newobj;
- }
- typedef struct {
- /* the entire string we're parsing. we assume that someone else
- is managing its lifetime, and that it will exist for the
- lifetime of the iterator. can be empty */
- SubString str;
- /* index to where we are inside field_name */
- Py_ssize_t index;
- } FieldNameIterator;
- static int
- FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
- Py_ssize_t start, Py_ssize_t end)
- {
- SubString_init(&self->str, s, start, end);
- self->index = start;
- return 1;
- }
- static int
- _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
- {
- Py_UCS4 c;
- name->str = self->str.str;
- name->start = self->index;
- /* return everything until '.' or '[' */
- while (self->index < self->str.end) {
- c = PyUnicode_READ_CHAR(self->str.str, self->index++);
- switch (c) {
- case '[':
- case '.':
- /* backup so that we this character will be seen next time */
- self->index--;
- break;
- default:
- continue;
- }
- break;
- }
- /* end of string is okay */
- name->end = self->index;
- return 1;
- }
- static int
- _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
- {
- int bracket_seen = 0;
- Py_UCS4 c;
- name->str = self->str.str;
- name->start = self->index;
- /* return everything until ']' */
- while (self->index < self->str.end) {
- c = PyUnicode_READ_CHAR(self->str.str, self->index++);
- switch (c) {
- case ']':
- bracket_seen = 1;
- break;
- default:
- continue;
- }
- break;
- }
- /* make sure we ended with a ']' */
- if (!bracket_seen) {
- PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
- return 0;
- }
- /* end of string is okay */
- /* don't include the ']' */
- name->end = self->index-1;
- return 1;
- }
- /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
- static int
- FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
- Py_ssize_t *name_idx, SubString *name)
- {
- /* check at end of input */
- if (self->index >= self->str.end)
- return 1;
- switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
- case '.':
- *is_attribute = 1;
- if (_FieldNameIterator_attr(self, name) == 0)
- return 0;
- *name_idx = -1;
- break;
- case '[':
- *is_attribute = 0;
- if (_FieldNameIterator_item(self, name) == 0)
- return 0;
- *name_idx = get_integer(name);
- if (*name_idx == -1 && PyErr_Occurred())
- return 0;
- break;
- default:
- /* Invalid character follows ']' */
- PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
- "follow ']' in format field specifier");
- return 0;
- }
- /* empty string is an error */
- if (name->start == name->end) {
- PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
- return 0;
- }
- return 2;
- }
- /* input: field_name
- output: 'first' points to the part before the first '[' or '.'
- 'first_idx' is -1 if 'first' is not an integer, otherwise
- it's the value of first converted to an integer
- 'rest' is an iterator to return the rest
- */
- static int
- field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
- Py_ssize_t *first_idx, FieldNameIterator *rest,
- AutoNumber *auto_number)
- {
- Py_UCS4 c;
- Py_ssize_t i = start;
- int field_name_is_empty;
- int using_numeric_index;
- /* find the part up until the first '.' or '[' */
- while (i < end) {
- switch (c = PyUnicode_READ_CHAR(str, i++)) {
- case '[':
- case '.':
- /* backup so that we this character is available to the
- "rest" iterator */
- i--;
- break;
- default:
- continue;
- }
- break;
- }
- /* set up the return values */
- SubString_init(first, str, start, i);
- FieldNameIterator_init(rest, str, i, end);
- /* see if "first" is an integer, in which case it's used as an index */
- *first_idx = get_integer(first);
- if (*first_idx == -1 && PyErr_Occurred())
- return 0;
- field_name_is_empty = first->start >= first->end;
- /* If the field name is omitted or if we have a numeric index
- specified, then we're doing numeric indexing into args. */
- using_numeric_index = field_name_is_empty || *first_idx != -1;
- /* We always get here exactly one time for each field we're
- processing. And we get here in field order (counting by left
- braces). So this is the perfect place to handle automatic field
- numbering if the field name is omitted. */
- /* Check if we need to do the auto-numbering. It's not needed if
- we're called from string.Format routines, because it's handled
- in that class by itself. */
- if (auto_number) {
- /* Initialize our auto numbering state if this is the first
- time we're either auto-numbering or manually numbering. */
- if (auto_number->an_state == ANS_INIT && using_numeric_index)
- auto_number->an_state = field_name_is_empty ?
- ANS_AUTO : ANS_MANUAL;
- /* Make sure our state is consistent with what we're doing
- this time through. Only check if we're using a numeric
- index. */
- if (using_numeric_index)
- if (autonumber_state_error(auto_number->an_state,
- field_name_is_empty))
- return 0;
- /* Zero length field means we want to do auto-numbering of the
- fields. */
- if (field_name_is_empty)
- *first_idx = (auto_number->an_field_number)++;
- }
- return 1;
- }
- /*
- get_field_object returns the object inside {}, before the
- format_spec. It handles getindex and getattr lookups and consumes
- the entire input string.
- */
- static PyObject *
- get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
- AutoNumber *auto_number)
- {
- PyObject *obj = NULL;
- int ok;
- int is_attribute;
- SubString name;
- SubString first;
- Py_ssize_t index;
- FieldNameIterator rest;
- if (!field_name_split(input->str, input->start, input->end, &first,
- &index, &rest, auto_number)) {
- goto error;
- }
- if (index == -1) {
- /* look up in kwargs */
- PyObject *key = SubString_new_object(&first);
- if (key == NULL) {
- goto error;
- }
- if (kwargs == NULL) {
- PyErr_SetObject(PyExc_KeyError, key);
- Py_DECREF(key);
- goto error;
- }
- /* Use PyObject_GetItem instead of PyDict_GetItem because this
- code is no longer just used with kwargs. It might be passed
- a non-dict when called through format_map. */
- obj = PyObject_GetItem(kwargs, key);
- Py_DECREF(key);
- if (obj == NULL) {
- goto error;
- }
- }
- else {
- /* If args is NULL, we have a format string with a positional field
- with only kwargs to retrieve it from. This can only happen when
- used with format_map(), where positional arguments are not
- allowed. */
- if (args == NULL) {
- PyErr_SetString(PyExc_ValueError, "Format string contains "
- "positional fields");
- goto error;
- }
- /* look up in args */
- obj = PySequence_GetItem(args, index);
- if (obj == NULL) {
- PyErr_Format(PyExc_IndexError,
- "Replacement index %zd out of range for positional "
- "args tuple",
- index);
- goto error;
- }
- }
- /* iterate over the rest of the field_name */
- while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
- &name)) == 2) {
- PyObject *tmp;
- if (is_attribute)
- /* getattr lookup "." */
- tmp = getattr(obj, &name);
- else
- /* getitem lookup "[]" */
- if (index == -1)
- tmp = getitem_str(obj, &name);
- else
- if (PySequence_Check(obj))
- tmp = getitem_sequence(obj, index);
- else
- /* not a sequence */
- tmp = getitem_idx(obj, index);
- if (tmp == NULL)
- goto error;
- /* assign to obj */
- Py_SETREF(obj, tmp);
- }
- /* end of iterator, this is the non-error case */
- if (ok == 1)
- return obj;
- error:
- Py_XDECREF(obj);
- return NULL;
- }
- /************************************************************************/
- /***************** Field rendering functions **************************/
- /************************************************************************/
- /*
- render_field() is the main function in this section. It takes the
- field object and field specification string generated by
- get_field_and_spec, and renders the field into the output string.
- render_field calls fieldobj.__format__(format_spec) method, and
- appends to the output.
- */
- static int
- render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
- {
- int ok = 0;
- PyObject *result = NULL;
- PyObject *format_spec_object = NULL;
- int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
- int err;
- /* If we know the type exactly, skip the lookup of __format__ and just
- call the formatter directly. */
- if (PyUnicode_CheckExact(fieldobj))
- formatter = _PyUnicode_FormatAdvancedWriter;
- else if (PyLong_CheckExact(fieldobj))
- formatter = _PyLong_FormatAdvancedWriter;
- else if (PyFloat_CheckExact(fieldobj))
- formatter = _PyFloat_FormatAdvancedWriter;
- else if (PyComplex_CheckExact(fieldobj))
- formatter = _PyComplex_FormatAdvancedWriter;
- if (formatter) {
- /* we know exactly which formatter will be called when __format__ is
- looked up, so call it directly, instead. */
- err = formatter(writer, fieldobj, format_spec->str,
- format_spec->start, format_spec->end);
- return (err == 0);
- }
- else {
- /* We need to create an object out of the pointers we have, because
- __format__ takes a string/unicode object for format_spec. */
- if (format_spec->str)
- format_spec_object = PyUnicode_Substring(format_spec->str,
- format_spec->start,
- format_spec->end);
- else
- format_spec_object = PyUnicode_New(0, 0);
- if (format_spec_object == NULL)
- goto done;
- result = PyObject_Format(fieldobj, format_spec_object);
- }
- if (result == NULL)
- goto done;
- if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
- goto done;
- ok = 1;
- done:
- Py_XDECREF(format_spec_object);
- Py_XDECREF(result);
- return ok;
- }
- static int
- parse_field(SubString *str, SubString *field_name, SubString *format_spec,
- int *format_spec_needs_expanding, Py_UCS4 *conversion)
- {
- /* Note this function works if the field name is zero length,
- which is good. Zero length field names are handled later, in
- field_name_split. */
- Py_UCS4 c = 0;
- /* initialize these, as they may be empty */
- *conversion = '\0';
- SubString_init(format_spec, NULL, 0, 0);
- /* Search for the field name. it's terminated by the end of
- the string, or a ':' or '!' */
- field_name->str = str->str;
- field_name->start = str->start;
- while (str->start < str->end) {
- switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
- case '{':
- PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
- return 0;
- case '[':
- for (; str->start < str->end; str->start++)
- if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
- break;
- continue;
- case '}':
- case ':':
- case '!':
- break;
- default:
- continue;
- }
- break;
- }
- field_name->end = str->start - 1;
- if (c == '!' || c == ':') {
- Py_ssize_t count;
- /* we have a format specifier and/or a conversion */
- /* don't include the last character */
- /* see if there's a conversion specifier */
- if (c == '!') {
- /* there must be another character present */
- if (str->start >= str->end) {
- PyErr_SetString(PyExc_ValueError,
- "end of string while looking for conversion "
- "specifier");
- return 0;
- }
- *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
- if (str->start < str->end) {
- c = PyUnicode_READ_CHAR(str->str, str->start++);
- if (c == '}')
- return 1;
- if (c != ':') {
- PyErr_SetString(PyExc_ValueError,
- "expected ':' after conversion specifier");
- return 0;
- }
- }
- }
- format_spec->str = str->str;
- format_spec->start = str->start;
- count = 1;
- while (str->start < str->end) {
- switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
- case '{':
- *format_spec_needs_expanding = 1;
- count++;
- break;
- case '}':
- count--;
- if (count == 0) {
- format_spec->end = str->start - 1;
- return 1;
- }
- break;
- default:
- break;
- }
- }
- PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
- return 0;
- }
- else if (c != '}') {
- PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
- return 0;
- }
- return 1;
- }
- /************************************************************************/
- /******* Output string allocation and escape-to-markup processing ******/
- /************************************************************************/
- /* MarkupIterator breaks the string into pieces of either literal
- text, or things inside {} that need to be marked up. it is
- designed to make it easy to wrap a Python iterator around it, for
- use with the Formatter class */
- typedef struct {
- SubString str;
- } MarkupIterator;
- static int
- MarkupIterator_init(MarkupIterator *self, PyObject *str,
- Py_ssize_t start, Py_ssize_t end)
- {
- SubString_init(&self->str, str, start, end);
- return 1;
- }
- /* returns 0 on error, 1 on non-error termination, and 2 if it got a
- string (or something to be expanded) */
- static int
- MarkupIterator_next(MarkupIterator *self, SubString *literal,
- int *field_present, SubString *field_name,
- SubString *format_spec, Py_UCS4 *conversion,
- int *format_spec_needs_expanding)
- {
- int at_end;
- Py_UCS4 c = 0;
- Py_ssize_t start;
- Py_ssize_t len;
- int markup_follows = 0;
- /* initialize all of the output variables */
- SubString_init(literal, NULL, 0, 0);
- SubString_init(field_name, NULL, 0, 0);
- SubString_init(format_spec, NULL, 0, 0);
- *conversion = '\0';
- *format_spec_needs_expanding = 0;
- *field_present = 0;
- /* No more input, end of iterator. This is the normal exit
- path. */
- if (self->str.start >= self->str.end)
- return 1;
- start = self->str.start;
- /* First read any literal text. Read until the end of string, an
- escaped '{' or '}', or an unescaped '{'. In order to never
- allocate memory and so I can just pass pointers around, if
- there's an escaped '{' or '}' then we'll return the literal
- including the brace, but no format object. The next time
- through, we'll return the rest of the literal, skipping past
- the second consecutive brace. */
- while (self->str.start < self->str.end) {
- switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
- case '{':
- case '}':
- markup_follows = 1;
- break;
- default:
- continue;
- }
- break;
- }
- at_end = self->str.start >= self->str.end;
- len = self->str.start - start;
- if ((c == '}') && (at_end ||
- (c != PyUnicode_READ_CHAR(self->str.str,
- self->str.start)))) {
- PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
- "in format string");
- return 0;
- }
- if (at_end && c == '{') {
- PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
- "in format string");
- return 0;
- }
- if (!at_end) {
- if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
- /* escaped } or {, skip it in the input. there is no
- markup object following us, just this literal text */
- self->str.start++;
- markup_follows = 0;
- }
- else
- len--;
- }
- /* record the literal text */
- literal->str = self->str.str;
- literal->start = start;
- literal->end = start + len;
- if (!markup_follows)
- return 2;
- /* this is markup; parse the field */
- *field_present = 1;
- if (!parse_field(&self->str, field_name, format_spec,
- format_spec_needs_expanding, conversion))
- return 0;
- return 2;
- }
- /* do the !r or !s conversion on obj */
- static PyObject *
- do_conversion(PyObject *obj, Py_UCS4 conversion)
- {
- /* XXX in pre-3.0, do we need to convert this to unicode, since it
- might have returned a string? */
- switch (conversion) {
- case 'r':
- return PyObject_Repr(obj);
- case 's':
- return PyObject_Str(obj);
- case 'a':
- return PyObject_ASCII(obj);
- default:
- if (conversion > 32 && conversion < 127) {
- /* It's the ASCII subrange; casting to char is safe
- (assuming the execution character set is an ASCII
- superset). */
- PyErr_Format(PyExc_ValueError,
- "Unknown conversion specifier %c",
- (char)conversion);
- } else
- PyErr_Format(PyExc_ValueError,
- "Unknown conversion specifier \\x%x",
- (unsigned int)conversion);
- return NULL;
- }
- }
- /* given:
- {field_name!conversion:format_spec}
- compute the result and write it to output.
- format_spec_needs_expanding is an optimization. if it's false,
- just output the string directly, otherwise recursively expand the
- format_spec string.
- field_name is allowed to be zero length, in which case we
- are doing auto field numbering.
- */
- static int
- output_markup(SubString *field_name, SubString *format_spec,
- int format_spec_needs_expanding, Py_UCS4 conversion,
- _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
- int recursion_depth, AutoNumber *auto_number)
- {
- PyObject *tmp = NULL;
- PyObject *fieldobj = NULL;
- SubString expanded_format_spec;
- SubString *actual_format_spec;
- int result = 0;
- /* convert field_name to an object */
- fieldobj = get_field_object(field_name, args, kwargs, auto_number);
- if (fieldobj == NULL)
- goto done;
- if (conversion != '\0') {
- tmp = do_conversion(fieldobj, conversion);
- if (tmp == NULL || PyUnicode_READY(tmp) == -1)
- goto done;
- /* do the assignment, transferring ownership: fieldobj = tmp */
- Py_SETREF(fieldobj, tmp);
- tmp = NULL;
- }
- /* if needed, recursively compute the format_spec */
- if (format_spec_needs_expanding) {
- tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
- auto_number);
- if (tmp == NULL || PyUnicode_READY(tmp) == -1)
- goto done;
- /* note that in the case we're expanding the format string,
- tmp must be kept around until after the call to
- render_field. */
- SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
- actual_format_spec = &expanded_format_spec;
- }
- else
- actual_format_spec = format_spec;
- if (render_field(fieldobj, actual_format_spec, writer) == 0)
- goto done;
- result = 1;
- done:
- Py_XDECREF(fieldobj);
- Py_XDECREF(tmp);
- return result;
- }
- /*
- do_markup is the top-level loop for the format() method. It
- searches through the format string for escapes to markup codes, and
- calls other functions to move non-markup text to the output,
- and to perform the markup to the output.
- */
- static int
- do_markup(SubString *input, PyObject *args, PyObject *kwargs,
- _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
- {
- MarkupIterator iter;
- int format_spec_needs_expanding;
- int result;
- int field_present;
- SubString literal;
- SubString field_name;
- SubString format_spec;
- Py_UCS4 conversion;
- MarkupIterator_init(&iter, input->str, input->start, input->end);
- while ((result = MarkupIterator_next(&iter, &literal, &field_present,
- &field_name, &format_spec,
- &conversion,
- &format_spec_needs_expanding)) == 2) {
- if (literal.end != literal.start) {
- if (!field_present && iter.str.start == iter.str.end)
- writer->overallocate = 0;
- if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
- literal.start, literal.end) < 0)
- return 0;
- }
- if (field_present) {
- if (iter.str.start == iter.str.end)
- writer->overallocate = 0;
- if (!output_markup(&field_name, &format_spec,
- format_spec_needs_expanding, conversion, writer,
- args, kwargs, recursion_depth, auto_number))
- return 0;
- }
- }
- return result;
- }
- /*
- build_string allocates the output string and then
- calls do_markup to do the heavy lifting.
- */
- static PyObject *
- build_string(SubString *input, PyObject *args, PyObject *kwargs,
- int recursion_depth, AutoNumber *auto_number)
- {
- _PyUnicodeWriter writer;
- /* check the recursion level */
- if (recursion_depth <= 0) {
- PyErr_SetString(PyExc_ValueError,
- "Max string recursion exceeded");
- return NULL;
- }
- _PyUnicodeWriter_Init(&writer);
- writer.overallocate = 1;
- writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
- if (!do_markup(input, args, kwargs, &writer, recursion_depth,
- auto_number)) {
- _PyUnicodeWriter_Dealloc(&writer);
- return NULL;
- }
- return _PyUnicodeWriter_Finish(&writer);
- }
- /************************************************************************/
- /*********** main routine ***********************************************/
- /************************************************************************/
- /* this is the main entry point */
- static PyObject *
- do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
- {
- SubString input;
- /* PEP 3101 says only 2 levels, so that
- "{0:{1}}".format('abc', 's') # works
- "{0:{1:{2}}}".format('abc', 's', '') # fails
- */
- int recursion_depth = 2;
- AutoNumber auto_number;
- if (PyUnicode_READY(self) == -1)
- return NULL;
- AutoNumber_Init(&auto_number);
- SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
- return build_string(&input, args, kwargs, recursion_depth, &auto_number);
- }
- static PyObject *
- do_string_format_map(PyObject *self, PyObject *obj)
- {
- return do_string_format(self, NULL, obj);
- }
- /************************************************************************/
- /*********** formatteriterator ******************************************/
- /************************************************************************/
- /* This is used to implement string.Formatter.vparse(). It exists so
- Formatter can share code with the built in unicode.format() method.
- It's really just a wrapper around MarkupIterator that is callable
- from Python. */
- typedef struct {
- PyObject_HEAD
- PyObject *str;
- MarkupIterator it_markup;
- } formatteriterobject;
- static void
- formatteriter_dealloc(formatteriterobject *it)
- {
- Py_XDECREF(it->str);
- PyObject_Free(it);
- }
- /* returns a tuple:
- (literal, field_name, format_spec, conversion)
- literal is any literal text to output. might be zero length
- field_name is the string before the ':'. might be None
- format_spec is the string after the ':'. mibht be None
- conversion is either None, or the string after the '!'
- */
- static PyObject *
- formatteriter_next(formatteriterobject *it)
- {
- SubString literal;
- SubString field_name;
- SubString format_spec;
- Py_UCS4 conversion;
- int format_spec_needs_expanding;
- int field_present;
- int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
- &field_name, &format_spec, &conversion,
- &format_spec_needs_expanding);
- /* all of the SubString objects point into it->str, so no
- memory management needs to be done on them */
- assert(0 <= result && result <= 2);
- if (result == 0 || result == 1)
- /* if 0, error has already been set, if 1, iterator is empty */
- return NULL;
- else {
- PyObject *literal_str = NULL;
- PyObject *field_name_str = NULL;
- PyObject *format_spec_str = NULL;
- PyObject *conversion_str = NULL;
- PyObject *tuple = NULL;
- literal_str = SubString_new_object(&literal);
- if (literal_str == NULL)
- goto done;
- field_name_str = SubString_new_object(&field_name);
- if (field_name_str == NULL)
- goto done;
- /* if field_name is non-zero length, return a string for
- format_spec (even if zero length), else return None */
- format_spec_str = (field_present ?
- SubString_new_object_or_empty :
- SubString_new_object)(&format_spec);
- if (format_spec_str == NULL)
- goto done;
- /* if the conversion is not specified, return a None,
- otherwise create a one length string with the conversion
- character */
- if (conversion == '\0') {
- conversion_str = Py_NewRef(Py_None);
- }
- else
- conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
- &conversion, 1);
- if (conversion_str == NULL)
- goto done;
- tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
- conversion_str);
- done:
- Py_XDECREF(literal_str);
- Py_XDECREF(field_name_str);
- Py_XDECREF(format_spec_str);
- Py_XDECREF(conversion_str);
- return tuple;
- }
- }
- static PyMethodDef formatteriter_methods[] = {
- {NULL, NULL} /* sentinel */
- };
- static PyTypeObject PyFormatterIter_Type = {
- PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "formatteriterator", /* tp_name */
- sizeof(formatteriterobject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)formatteriter_dealloc, /* tp_dealloc */
- 0, /* tp_vectorcall_offset */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_as_async */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- 0, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- PyObject_SelfIter, /* tp_iter */
- (iternextfunc)formatteriter_next, /* tp_iternext */
- formatteriter_methods, /* tp_methods */
- 0,
- };
- /* unicode_formatter_parser is used to implement
- string.Formatter.vformat. it parses a string and returns tuples
- describing the parsed elements. It's a wrapper around
- stringlib/string_format.h's MarkupIterator */
- static PyObject *
- formatter_parser(PyObject *ignored, PyObject *self)
- {
- formatteriterobject *it;
- if (!PyUnicode_Check(self)) {
- PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
- return NULL;
- }
- if (PyUnicode_READY(self) == -1)
- return NULL;
- it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
- if (it == NULL)
- return NULL;
- /* take ownership, give the object to the iterator */
- it->str = Py_NewRef(self);
- /* initialize the contained MarkupIterator */
- MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
- return (PyObject *)it;
- }
- /************************************************************************/
- /*********** fieldnameiterator ******************************************/
- /************************************************************************/
- /* This is used to implement string.Formatter.vparse(). It parses the
- field name into attribute and item values. It's a Python-callable
- wrapper around FieldNameIterator */
- typedef struct {
- PyObject_HEAD
- PyObject *str;
- FieldNameIterator it_field;
- } fieldnameiterobject;
- static void
- fieldnameiter_dealloc(fieldnameiterobject *it)
- {
- Py_XDECREF(it->str);
- PyObject_Free(it);
- }
- /* returns a tuple:
- (is_attr, value)
- is_attr is true if we used attribute syntax (e.g., '.foo')
- false if we used index syntax (e.g., '[foo]')
- value is an integer or string
- */
- static PyObject *
- fieldnameiter_next(fieldnameiterobject *it)
- {
- int result;
- int is_attr;
- Py_ssize_t idx;
- SubString name;
- result = FieldNameIterator_next(&it->it_field, &is_attr,
- &idx, &name);
- if (result == 0 || result == 1)
- /* if 0, error has already been set, if 1, iterator is empty */
- return NULL;
- else {
- PyObject* result = NULL;
- PyObject* is_attr_obj = NULL;
- PyObject* obj = NULL;
- is_attr_obj = PyBool_FromLong(is_attr);
- if (is_attr_obj == NULL)
- goto done;
- /* either an integer or a string */
- if (idx != -1)
- obj = PyLong_FromSsize_t(idx);
- else
- obj = SubString_new_object(&name);
- if (obj == NULL)
- goto done;
- /* return a tuple of values */
- result = PyTuple_Pack(2, is_attr_obj, obj);
- done:
- Py_XDECREF(is_attr_obj);
- Py_XDECREF(obj);
- return result;
- }
- }
- static PyMethodDef fieldnameiter_methods[] = {
- {NULL, NULL} /* sentinel */
- };
- static PyTypeObject PyFieldNameIter_Type = {
- PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "fieldnameiterator", /* tp_name */
- sizeof(fieldnameiterobject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)fieldnameiter_dealloc, /* tp_dealloc */
- 0, /* tp_vectorcall_offset */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_as_async */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- 0, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- PyObject_SelfIter, /* tp_iter */
- (iternextfunc)fieldnameiter_next, /* tp_iternext */
- fieldnameiter_methods, /* tp_methods */
- 0};
- /* unicode_formatter_field_name_split is used to implement
- string.Formatter.vformat. it takes a PEP 3101 "field name", and
- returns a tuple of (first, rest): "first", the part before the
- first '.' or '['; and "rest", an iterator for the rest of the field
- name. it's a wrapper around stringlib/string_format.h's
- field_name_split. The iterator it returns is a
- FieldNameIterator */
- static PyObject *
- formatter_field_name_split(PyObject *ignored, PyObject *self)
- {
- SubString first;
- Py_ssize_t first_idx;
- fieldnameiterobject *it;
- PyObject *first_obj = NULL;
- PyObject *result = NULL;
- if (!PyUnicode_Check(self)) {
- PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
- return NULL;
- }
- if (PyUnicode_READY(self) == -1)
- return NULL;
- it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
- if (it == NULL)
- return NULL;
- /* take ownership, give the object to the iterator. this is
- just to keep the field_name alive */
- it->str = Py_NewRef(self);
- /* Pass in auto_number = NULL. We'll return an empty string for
- first_obj in that case. */
- if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
- &first, &first_idx, &it->it_field, NULL))
- goto done;
- /* first becomes an integer, if possible; else a string */
- if (first_idx != -1)
- first_obj = PyLong_FromSsize_t(first_idx);
- else
- /* convert "first" into a string object */
- first_obj = SubString_new_object(&first);
- if (first_obj == NULL)
- goto done;
- /* return a tuple of values */
- result = PyTuple_Pack(2, first_obj, it);
- done:
- Py_XDECREF(it);
- Py_XDECREF(first_obj);
- return result;
- }
|