12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365 |
- /*
- An implementation of Text I/O as defined by PEP 3116 - "New I/O"
- Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
- Written by Amaury Forgeot d'Arc and Antoine Pitrou
- */
- #define PY_SSIZE_T_CLEAN
- #include "Python.h"
- #include "pycore_interp.h" // PyInterpreterState.fs_codec
- #include "pycore_long.h" // _PyLong_GetZero()
- #include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
- #include "pycore_object.h"
- #include "pycore_pystate.h" // _PyInterpreterState_GET()
- #include "structmember.h" // PyMemberDef
- #include "_iomodule.h"
- /*[clinic input]
- module _io
- class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
- class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
- class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
- [clinic start generated code]*/
- /*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
- typedef struct nldecoder_object nldecoder_object;
- typedef struct textio textio;
- #define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
- #include "clinic/textio.c.h"
- #undef clinic_state
- /* TextIOBase */
- PyDoc_STRVAR(textiobase_doc,
- "Base class for text I/O.\n"
- "\n"
- "This class provides a character and line based interface to stream\n"
- "I/O. There is no readinto method because Python's character strings\n"
- "are immutable.\n"
- );
- static PyObject *
- _unsupported(_PyIO_State *state, const char *message)
- {
- PyErr_SetString(state->unsupported_operation, message);
- return NULL;
- }
- /*[clinic input]
- _io._TextIOBase.detach
- cls: defining_class
- /
- Separate the underlying buffer from the TextIOBase and return it.
- After the underlying buffer has been detached, the TextIO is in an unusable state.
- [clinic start generated code]*/
- static PyObject *
- _io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
- /*[clinic end generated code: output=50915f40c609eaa4 input=987ca3640d0a3776]*/
- {
- _PyIO_State *state = get_io_state_by_cls(cls);
- return _unsupported(state, "detach");
- }
- /*[clinic input]
- _io._TextIOBase.read
- cls: defining_class
- size: int(unused=True) = -1
- /
- Read at most size characters from stream.
- Read from underlying buffer until we have size characters or we hit EOF.
- If size is negative or omitted, read until EOF.
- [clinic start generated code]*/
- static PyObject *
- _io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
- int Py_UNUSED(size))
- /*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/
- {
- _PyIO_State *state = get_io_state_by_cls(cls);
- return _unsupported(state, "read");
- }
- /*[clinic input]
- _io._TextIOBase.readline
- cls: defining_class
- size: int(unused=True) = -1
- /
- Read until newline or EOF.
- Return an empty string if EOF is hit immediately.
- If size is specified, at most size characters will be read.
- [clinic start generated code]*/
- static PyObject *
- _io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
- int Py_UNUSED(size))
- /*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
- {
- _PyIO_State *state = get_io_state_by_cls(cls);
- return _unsupported(state, "readline");
- }
- /*[clinic input]
- _io._TextIOBase.write
- cls: defining_class
- s: str(unused=True)
- /
- Write string s to stream.
- Return the number of characters written
- (which is always equal to the length of the string).
- [clinic start generated code]*/
- static PyObject *
- _io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
- const char *Py_UNUSED(s))
- /*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
- {
- _PyIO_State *state = get_io_state_by_cls(cls);
- return _unsupported(state, "write");
- }
- PyDoc_STRVAR(textiobase_encoding_doc,
- "Encoding of the text stream.\n"
- "\n"
- "Subclasses should override.\n"
- );
- static PyObject *
- textiobase_encoding_get(PyObject *self, void *context)
- {
- Py_RETURN_NONE;
- }
- PyDoc_STRVAR(textiobase_newlines_doc,
- "Line endings translated so far.\n"
- "\n"
- "Only line endings translated during reading are considered.\n"
- "\n"
- "Subclasses should override.\n"
- );
- static PyObject *
- textiobase_newlines_get(PyObject *self, void *context)
- {
- Py_RETURN_NONE;
- }
- PyDoc_STRVAR(textiobase_errors_doc,
- "The error setting of the decoder or encoder.\n"
- "\n"
- "Subclasses should override.\n"
- );
- static PyObject *
- textiobase_errors_get(PyObject *self, void *context)
- {
- Py_RETURN_NONE;
- }
- static PyMethodDef textiobase_methods[] = {
- _IO__TEXTIOBASE_DETACH_METHODDEF
- _IO__TEXTIOBASE_READ_METHODDEF
- _IO__TEXTIOBASE_READLINE_METHODDEF
- _IO__TEXTIOBASE_WRITE_METHODDEF
- {NULL, NULL}
- };
- static PyGetSetDef textiobase_getset[] = {
- {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
- {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
- {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
- {NULL}
- };
- static PyType_Slot textiobase_slots[] = {
- {Py_tp_doc, (void *)textiobase_doc},
- {Py_tp_methods, textiobase_methods},
- {Py_tp_getset, textiobase_getset},
- {0, NULL},
- };
- /* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
- PyType_Spec textiobase_spec = {
- .name = "_io._TextIOBase",
- .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
- Py_TPFLAGS_IMMUTABLETYPE),
- .slots = textiobase_slots,
- };
- /* IncrementalNewlineDecoder */
- struct nldecoder_object {
- PyObject_HEAD
- PyObject *decoder;
- PyObject *errors;
- unsigned int pendingcr: 1;
- unsigned int translate: 1;
- unsigned int seennl: 3;
- };
- /*[clinic input]
- _io.IncrementalNewlineDecoder.__init__
- decoder: object
- translate: bool
- errors: object(c_default="NULL") = "strict"
- Codec used when reading a file in universal newlines mode.
- It wraps another incremental decoder, translating \r\n and \r into \n.
- It also records the types of newlines encountered. When used with
- translate=False, it ensures that the newline sequence is returned in
- one piece. When used with decoder=None, it expects unicode strings as
- decode input and translates newlines without first invoking an external
- decoder.
- [clinic start generated code]*/
- static int
- _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
- PyObject *decoder, int translate,
- PyObject *errors)
- /*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
- {
- if (errors == NULL) {
- errors = Py_NewRef(&_Py_ID(strict));
- }
- else {
- errors = Py_NewRef(errors);
- }
- Py_XSETREF(self->errors, errors);
- Py_XSETREF(self->decoder, Py_NewRef(decoder));
- self->translate = translate ? 1 : 0;
- self->seennl = 0;
- self->pendingcr = 0;
- return 0;
- }
- static int
- incrementalnewlinedecoder_traverse(nldecoder_object *self, visitproc visit,
- void *arg)
- {
- Py_VISIT(Py_TYPE(self));
- Py_VISIT(self->decoder);
- Py_VISIT(self->errors);
- return 0;
- }
- static int
- incrementalnewlinedecoder_clear(nldecoder_object *self)
- {
- Py_CLEAR(self->decoder);
- Py_CLEAR(self->errors);
- return 0;
- }
- static void
- incrementalnewlinedecoder_dealloc(nldecoder_object *self)
- {
- PyTypeObject *tp = Py_TYPE(self);
- _PyObject_GC_UNTRACK(self);
- (void)incrementalnewlinedecoder_clear(self);
- tp->tp_free((PyObject *)self);
- Py_DECREF(tp);
- }
- static int
- check_decoded(PyObject *decoded)
- {
- if (decoded == NULL)
- return -1;
- if (!PyUnicode_Check(decoded)) {
- PyErr_Format(PyExc_TypeError,
- "decoder should return a string result, not '%.200s'",
- Py_TYPE(decoded)->tp_name);
- Py_DECREF(decoded);
- return -1;
- }
- if (PyUnicode_READY(decoded) < 0) {
- Py_DECREF(decoded);
- return -1;
- }
- return 0;
- }
- #define CHECK_INITIALIZED_DECODER(self) \
- if (self->errors == NULL) { \
- PyErr_SetString(PyExc_ValueError, \
- "IncrementalNewlineDecoder.__init__() not called"); \
- return NULL; \
- }
- #define SEEN_CR 1
- #define SEEN_LF 2
- #define SEEN_CRLF 4
- #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
- PyObject *
- _PyIncrementalNewlineDecoder_decode(PyObject *myself,
- PyObject *input, int final)
- {
- PyObject *output;
- Py_ssize_t output_len;
- nldecoder_object *self = (nldecoder_object *) myself;
- CHECK_INITIALIZED_DECODER(self);
- /* decode input (with the eventual \r from a previous pass) */
- if (self->decoder != Py_None) {
- output = PyObject_CallMethodObjArgs(self->decoder,
- &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
- }
- else {
- output = Py_NewRef(input);
- }
- if (check_decoded(output) < 0)
- return NULL;
- output_len = PyUnicode_GET_LENGTH(output);
- if (self->pendingcr && (final || output_len > 0)) {
- /* Prefix output with CR */
- int kind;
- PyObject *modified;
- char *out;
- modified = PyUnicode_New(output_len + 1,
- PyUnicode_MAX_CHAR_VALUE(output));
- if (modified == NULL)
- goto error;
- kind = PyUnicode_KIND(modified);
- out = PyUnicode_DATA(modified);
- PyUnicode_WRITE(kind, out, 0, '\r');
- memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
- Py_SETREF(output, modified); /* output remains ready */
- self->pendingcr = 0;
- output_len++;
- }
- /* retain last \r even when not translating data:
- * then readline() is sure to get \r\n in one pass
- */
- if (!final) {
- if (output_len > 0
- && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
- {
- PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
- if (modified == NULL)
- goto error;
- Py_SETREF(output, modified);
- self->pendingcr = 1;
- }
- }
- /* Record which newlines are read and do newline translation if desired,
- all in one pass. */
- {
- const void *in_str;
- Py_ssize_t len;
- int seennl = self->seennl;
- int only_lf = 0;
- int kind;
- in_str = PyUnicode_DATA(output);
- len = PyUnicode_GET_LENGTH(output);
- kind = PyUnicode_KIND(output);
- if (len == 0)
- return output;
- /* If, up to now, newlines are consistently \n, do a quick check
- for the \r *byte* with the libc's optimized memchr.
- */
- if (seennl == SEEN_LF || seennl == 0) {
- only_lf = (memchr(in_str, '\r', kind * len) == NULL);
- }
- if (only_lf) {
- /* If not already seen, quick scan for a possible "\n" character.
- (there's nothing else to be done, even when in translation mode)
- */
- if (seennl == 0 &&
- memchr(in_str, '\n', kind * len) != NULL) {
- if (kind == PyUnicode_1BYTE_KIND)
- seennl |= SEEN_LF;
- else {
- Py_ssize_t i = 0;
- for (;;) {
- Py_UCS4 c;
- /* Fast loop for non-control characters */
- while (PyUnicode_READ(kind, in_str, i) > '\n')
- i++;
- c = PyUnicode_READ(kind, in_str, i++);
- if (c == '\n') {
- seennl |= SEEN_LF;
- break;
- }
- if (i >= len)
- break;
- }
- }
- }
- /* Finished: we have scanned for newlines, and none of them
- need translating */
- }
- else if (!self->translate) {
- Py_ssize_t i = 0;
- /* We have already seen all newline types, no need to scan again */
- if (seennl == SEEN_ALL)
- goto endscan;
- for (;;) {
- Py_UCS4 c;
- /* Fast loop for non-control characters */
- while (PyUnicode_READ(kind, in_str, i) > '\r')
- i++;
- c = PyUnicode_READ(kind, in_str, i++);
- if (c == '\n')
- seennl |= SEEN_LF;
- else if (c == '\r') {
- if (PyUnicode_READ(kind, in_str, i) == '\n') {
- seennl |= SEEN_CRLF;
- i++;
- }
- else
- seennl |= SEEN_CR;
- }
- if (i >= len)
- break;
- if (seennl == SEEN_ALL)
- break;
- }
- endscan:
- ;
- }
- else {
- void *translated;
- int kind = PyUnicode_KIND(output);
- const void *in_str = PyUnicode_DATA(output);
- Py_ssize_t in, out;
- /* XXX: Previous in-place translation here is disabled as
- resizing is not possible anymore */
- /* We could try to optimize this so that we only do a copy
- when there is something to translate. On the other hand,
- we already know there is a \r byte, so chances are high
- that something needs to be done. */
- translated = PyMem_Malloc(kind * len);
- if (translated == NULL) {
- PyErr_NoMemory();
- goto error;
- }
- in = out = 0;
- for (;;) {
- Py_UCS4 c;
- /* Fast loop for non-control characters */
- while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
- PyUnicode_WRITE(kind, translated, out++, c);
- if (c == '\n') {
- PyUnicode_WRITE(kind, translated, out++, c);
- seennl |= SEEN_LF;
- continue;
- }
- if (c == '\r') {
- if (PyUnicode_READ(kind, in_str, in) == '\n') {
- in++;
- seennl |= SEEN_CRLF;
- }
- else
- seennl |= SEEN_CR;
- PyUnicode_WRITE(kind, translated, out++, '\n');
- continue;
- }
- if (in > len)
- break;
- PyUnicode_WRITE(kind, translated, out++, c);
- }
- Py_DECREF(output);
- output = PyUnicode_FromKindAndData(kind, translated, out);
- PyMem_Free(translated);
- if (!output)
- return NULL;
- }
- self->seennl |= seennl;
- }
- return output;
- error:
- Py_DECREF(output);
- return NULL;
- }
- /*[clinic input]
- _io.IncrementalNewlineDecoder.decode
- input: object
- final: bool = False
- [clinic start generated code]*/
- static PyObject *
- _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
- PyObject *input, int final)
- /*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/
- {
- return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
- }
- /*[clinic input]
- _io.IncrementalNewlineDecoder.getstate
- [clinic start generated code]*/
- static PyObject *
- _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
- /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
- {
- PyObject *buffer;
- unsigned long long flag;
- CHECK_INITIALIZED_DECODER(self);
- if (self->decoder != Py_None) {
- PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
- &_Py_ID(getstate));
- if (state == NULL)
- return NULL;
- if (!PyTuple_Check(state)) {
- PyErr_SetString(PyExc_TypeError,
- "illegal decoder state");
- Py_DECREF(state);
- return NULL;
- }
- if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
- &buffer, &flag))
- {
- Py_DECREF(state);
- return NULL;
- }
- Py_INCREF(buffer);
- Py_DECREF(state);
- }
- else {
- buffer = PyBytes_FromString("");
- flag = 0;
- }
- flag <<= 1;
- if (self->pendingcr)
- flag |= 1;
- return Py_BuildValue("NK", buffer, flag);
- }
- /*[clinic input]
- _io.IncrementalNewlineDecoder.setstate
- state: object
- /
- [clinic start generated code]*/
- static PyObject *
- _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
- PyObject *state)
- /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
- {
- PyObject *buffer;
- unsigned long long flag;
- CHECK_INITIALIZED_DECODER(self);
- if (!PyTuple_Check(state)) {
- PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
- return NULL;
- }
- if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
- &buffer, &flag))
- {
- return NULL;
- }
- self->pendingcr = (int) (flag & 1);
- flag >>= 1;
- if (self->decoder != Py_None) {
- return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
- "((OK))", buffer, flag);
- }
- else {
- Py_RETURN_NONE;
- }
- }
- /*[clinic input]
- _io.IncrementalNewlineDecoder.reset
- [clinic start generated code]*/
- static PyObject *
- _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
- /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
- {
- CHECK_INITIALIZED_DECODER(self);
- self->seennl = 0;
- self->pendingcr = 0;
- if (self->decoder != Py_None)
- return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
- else
- Py_RETURN_NONE;
- }
- static PyObject *
- incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
- {
- CHECK_INITIALIZED_DECODER(self);
- switch (self->seennl) {
- case SEEN_CR:
- return PyUnicode_FromString("\r");
- case SEEN_LF:
- return PyUnicode_FromString("\n");
- case SEEN_CRLF:
- return PyUnicode_FromString("\r\n");
- case SEEN_CR | SEEN_LF:
- return Py_BuildValue("ss", "\r", "\n");
- case SEEN_CR | SEEN_CRLF:
- return Py_BuildValue("ss", "\r", "\r\n");
- case SEEN_LF | SEEN_CRLF:
- return Py_BuildValue("ss", "\n", "\r\n");
- case SEEN_CR | SEEN_LF | SEEN_CRLF:
- return Py_BuildValue("sss", "\r", "\n", "\r\n");
- default:
- Py_RETURN_NONE;
- }
- }
- /* TextIOWrapper */
- typedef PyObject *
- (*encodefunc_t)(PyObject *, PyObject *);
- struct textio
- {
- PyObject_HEAD
- int ok; /* initialized? */
- int detached;
- Py_ssize_t chunk_size;
- PyObject *buffer;
- PyObject *encoding;
- PyObject *encoder;
- PyObject *decoder;
- PyObject *readnl;
- PyObject *errors;
- const char *writenl; /* ASCII-encoded; NULL stands for \n */
- char line_buffering;
- char write_through;
- char readuniversal;
- char readtranslate;
- char writetranslate;
- char seekable;
- char has_read1;
- char telling;
- char finalizing;
- /* Specialized encoding func (see below) */
- encodefunc_t encodefunc;
- /* Whether or not it's the start of the stream */
- char encoding_start_of_stream;
- /* Reads and writes are internally buffered in order to speed things up.
- However, any read will first flush the write buffer if itsn't empty.
- Please also note that text to be written is first encoded before being
- buffered. This is necessary so that encoding errors are immediately
- reported to the caller, but it unfortunately means that the
- IncrementalEncoder (whose encode() method is always written in Python)
- becomes a bottleneck for small writes.
- */
- PyObject *decoded_chars; /* buffer for text returned from decoder */
- Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
- PyObject *pending_bytes; // data waiting to be written.
- // ascii unicode, bytes, or list of them.
- Py_ssize_t pending_bytes_count;
- /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
- * dec_flags is the second (integer) item of the decoder state and
- * next_input is the chunk of input bytes that comes next after the
- * snapshot point. We use this to reconstruct decoder states in tell().
- */
- PyObject *snapshot;
- /* Bytes-to-characters ratio for the current chunk. Serves as input for
- the heuristic in tell(). */
- double b2cratio;
- /* Cache raw object if it's a FileIO object */
- PyObject *raw;
- PyObject *weakreflist;
- PyObject *dict;
- _PyIO_State *state;
- };
- static void
- textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
- /* A couple of specialized cases in order to bypass the slow incremental
- encoding methods for the most popular encodings. */
- static PyObject *
- ascii_encode(textio *self, PyObject *text)
- {
- return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
- }
- static PyObject *
- utf16be_encode(textio *self, PyObject *text)
- {
- return _PyUnicode_EncodeUTF16(text,
- PyUnicode_AsUTF8(self->errors), 1);
- }
- static PyObject *
- utf16le_encode(textio *self, PyObject *text)
- {
- return _PyUnicode_EncodeUTF16(text,
- PyUnicode_AsUTF8(self->errors), -1);
- }
- static PyObject *
- utf16_encode(textio *self, PyObject *text)
- {
- if (!self->encoding_start_of_stream) {
- /* Skip the BOM and use native byte ordering */
- #if PY_BIG_ENDIAN
- return utf16be_encode(self, text);
- #else
- return utf16le_encode(self, text);
- #endif
- }
- return _PyUnicode_EncodeUTF16(text,
- PyUnicode_AsUTF8(self->errors), 0);
- }
- static PyObject *
- utf32be_encode(textio *self, PyObject *text)
- {
- return _PyUnicode_EncodeUTF32(text,
- PyUnicode_AsUTF8(self->errors), 1);
- }
- static PyObject *
- utf32le_encode(textio *self, PyObject *text)
- {
- return _PyUnicode_EncodeUTF32(text,
- PyUnicode_AsUTF8(self->errors), -1);
- }
- static PyObject *
- utf32_encode(textio *self, PyObject *text)
- {
- if (!self->encoding_start_of_stream) {
- /* Skip the BOM and use native byte ordering */
- #if PY_BIG_ENDIAN
- return utf32be_encode(self, text);
- #else
- return utf32le_encode(self, text);
- #endif
- }
- return _PyUnicode_EncodeUTF32(text,
- PyUnicode_AsUTF8(self->errors), 0);
- }
- static PyObject *
- utf8_encode(textio *self, PyObject *text)
- {
- return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
- }
- static PyObject *
- latin1_encode(textio *self, PyObject *text)
- {
- return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
- }
- // Return true when encoding can be skipped when text is ascii.
- static inline int
- is_asciicompat_encoding(encodefunc_t f)
- {
- return f == (encodefunc_t) ascii_encode
- || f == (encodefunc_t) latin1_encode
- || f == (encodefunc_t) utf8_encode;
- }
- /* Map normalized encoding names onto the specialized encoding funcs */
- typedef struct {
- const char *name;
- encodefunc_t encodefunc;
- } encodefuncentry;
- static const encodefuncentry encodefuncs[] = {
- {"ascii", (encodefunc_t) ascii_encode},
- {"iso8859-1", (encodefunc_t) latin1_encode},
- {"utf-8", (encodefunc_t) utf8_encode},
- {"utf-16-be", (encodefunc_t) utf16be_encode},
- {"utf-16-le", (encodefunc_t) utf16le_encode},
- {"utf-16", (encodefunc_t) utf16_encode},
- {"utf-32-be", (encodefunc_t) utf32be_encode},
- {"utf-32-le", (encodefunc_t) utf32le_encode},
- {"utf-32", (encodefunc_t) utf32_encode},
- {NULL, NULL}
- };
- static int
- validate_newline(const char *newline)
- {
- if (newline && newline[0] != '\0'
- && !(newline[0] == '\n' && newline[1] == '\0')
- && !(newline[0] == '\r' && newline[1] == '\0')
- && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
- PyErr_Format(PyExc_ValueError,
- "illegal newline value: %s", newline);
- return -1;
- }
- return 0;
- }
- static int
- set_newline(textio *self, const char *newline)
- {
- PyObject *old = self->readnl;
- if (newline == NULL) {
- self->readnl = NULL;
- }
- else {
- self->readnl = PyUnicode_FromString(newline);
- if (self->readnl == NULL) {
- self->readnl = old;
- return -1;
- }
- }
- self->readuniversal = (newline == NULL || newline[0] == '\0');
- self->readtranslate = (newline == NULL);
- self->writetranslate = (newline == NULL || newline[0] != '\0');
- if (!self->readuniversal && self->readnl != NULL) {
- // validate_newline() accepts only ASCII newlines.
- assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
- self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
- if (strcmp(self->writenl, "\n") == 0) {
- self->writenl = NULL;
- }
- }
- else {
- #ifdef MS_WINDOWS
- self->writenl = "\r\n";
- #else
- self->writenl = NULL;
- #endif
- }
- Py_XDECREF(old);
- return 0;
- }
- static int
- _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
- const char *errors)
- {
- PyObject *res;
- int r;
- res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
- if (res == NULL)
- return -1;
- r = PyObject_IsTrue(res);
- Py_DECREF(res);
- if (r == -1)
- return -1;
- if (r != 1)
- return 0;
- Py_CLEAR(self->decoder);
- self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
- if (self->decoder == NULL)
- return -1;
- if (self->readuniversal) {
- _PyIO_State *state = self->state;
- PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
- (PyObject *)state->PyIncrementalNewlineDecoder_Type,
- self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
- if (incrementalDecoder == NULL)
- return -1;
- Py_XSETREF(self->decoder, incrementalDecoder);
- }
- return 0;
- }
- static PyObject*
- _textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
- int eof)
- {
- PyObject *chars;
- if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
- chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
- else
- chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
- eof ? Py_True : Py_False, NULL);
- if (check_decoded(chars) < 0)
- // check_decoded already decreases refcount
- return NULL;
- return chars;
- }
- static int
- _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
- const char *errors)
- {
- PyObject *res;
- int r;
- res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
- if (res == NULL)
- return -1;
- r = PyObject_IsTrue(res);
- Py_DECREF(res);
- if (r == -1)
- return -1;
- if (r != 1)
- return 0;
- Py_CLEAR(self->encoder);
- self->encodefunc = NULL;
- self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
- if (self->encoder == NULL)
- return -1;
- /* Get the normalized named of the codec */
- if (_PyObject_LookupAttr(codec_info, &_Py_ID(name), &res) < 0) {
- return -1;
- }
- if (res != NULL && PyUnicode_Check(res)) {
- const encodefuncentry *e = encodefuncs;
- while (e->name != NULL) {
- if (_PyUnicode_EqualToASCIIString(res, e->name)) {
- self->encodefunc = e->encodefunc;
- break;
- }
- e++;
- }
- }
- Py_XDECREF(res);
- return 0;
- }
- static int
- _textiowrapper_fix_encoder_state(textio *self)
- {
- if (!self->seekable || !self->encoder) {
- return 0;
- }
- self->encoding_start_of_stream = 1;
- PyObject *cookieObj = PyObject_CallMethodNoArgs(
- self->buffer, &_Py_ID(tell));
- if (cookieObj == NULL) {
- return -1;
- }
- int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
- Py_DECREF(cookieObj);
- if (cmp < 0) {
- return -1;
- }
- if (cmp == 0) {
- self->encoding_start_of_stream = 0;
- PyObject *res = PyObject_CallMethodOneArg(
- self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
- if (res == NULL) {
- return -1;
- }
- Py_DECREF(res);
- }
- return 0;
- }
- static int
- io_check_errors(PyObject *errors)
- {
- assert(errors != NULL && errors != Py_None);
- PyInterpreterState *interp = _PyInterpreterState_GET();
- #ifndef Py_DEBUG
- /* In release mode, only check in development mode (-X dev) */
- if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
- return 0;
- }
- #else
- /* Always check in debug mode */
- #endif
- /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
- before_PyUnicode_InitEncodings() is called. */
- if (!interp->unicode.fs_codec.encoding) {
- return 0;
- }
- Py_ssize_t name_length;
- const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
- if (name == NULL) {
- return -1;
- }
- if (strlen(name) != (size_t)name_length) {
- PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
- return -1;
- }
- PyObject *handler = PyCodec_LookupError(name);
- if (handler != NULL) {
- Py_DECREF(handler);
- return 0;
- }
- return -1;
- }
- /*[clinic input]
- _io.TextIOWrapper.__init__
- buffer: object
- encoding: str(accept={str, NoneType}) = None
- errors: object = None
- newline: str(accept={str, NoneType}) = None
- line_buffering: bool = False
- write_through: bool = False
- Character and line based layer over a BufferedIOBase object, buffer.
- encoding gives the name of the encoding that the stream will be
- decoded or encoded with. It defaults to locale.getencoding().
- errors determines the strictness of encoding and decoding (see
- help(codecs.Codec) or the documentation for codecs.register) and
- defaults to "strict".
- newline controls how line endings are handled. It can be None, '',
- '\n', '\r', and '\r\n'. It works as follows:
- * On input, if newline is None, universal newlines mode is
- enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
- these are translated into '\n' before being returned to the
- caller. If it is '', universal newline mode is enabled, but line
- endings are returned to the caller untranslated. If it has any of
- the other legal values, input lines are only terminated by the given
- string, and the line ending is returned to the caller untranslated.
- * On output, if newline is None, any '\n' characters written are
- translated to the system default line separator, os.linesep. If
- newline is '' or '\n', no translation takes place. If newline is any
- of the other legal values, any '\n' characters written are translated
- to the given string.
- If line_buffering is True, a call to flush is implied when a call to
- write contains a newline character.
- [clinic start generated code]*/
- static int
- _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
- const char *encoding, PyObject *errors,
- const char *newline, int line_buffering,
- int write_through)
- /*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
- {
- PyObject *raw, *codec_info = NULL;
- PyObject *res;
- int r;
- self->ok = 0;
- self->detached = 0;
- if (encoding == NULL) {
- PyInterpreterState *interp = _PyInterpreterState_GET();
- if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
- if (PyErr_WarnEx(PyExc_EncodingWarning,
- "'encoding' argument not specified", 1)) {
- return -1;
- }
- }
- }
- if (errors == Py_None) {
- errors = &_Py_ID(strict);
- }
- else if (!PyUnicode_Check(errors)) {
- // Check 'errors' argument here because Argument Clinic doesn't support
- // 'str(accept={str, NoneType})' converter.
- PyErr_Format(
- PyExc_TypeError,
- "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
- Py_TYPE(errors)->tp_name);
- return -1;
- }
- else if (io_check_errors(errors)) {
- return -1;
- }
- Py_ssize_t errors_len;
- const char *errors_str = PyUnicode_AsUTF8AndSize(errors, &errors_len);
- if (errors_str == NULL) {
- return -1;
- }
- if (strlen(errors_str) != (size_t)errors_len) {
- PyErr_SetString(PyExc_ValueError, "embedded null character");
- return -1;
- }
- if (validate_newline(newline) < 0) {
- return -1;
- }
- Py_CLEAR(self->buffer);
- Py_CLEAR(self->encoding);
- Py_CLEAR(self->encoder);
- Py_CLEAR(self->decoder);
- Py_CLEAR(self->readnl);
- Py_CLEAR(self->decoded_chars);
- Py_CLEAR(self->pending_bytes);
- Py_CLEAR(self->snapshot);
- Py_CLEAR(self->errors);
- Py_CLEAR(self->raw);
- self->decoded_chars_used = 0;
- self->pending_bytes_count = 0;
- self->encodefunc = NULL;
- self->b2cratio = 0.0;
- if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
- _Py_DECLARE_STR(utf_8, "utf-8");
- self->encoding = Py_NewRef(&_Py_STR(utf_8));
- }
- else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
- self->encoding = _Py_GetLocaleEncodingObject();
- if (self->encoding == NULL) {
- goto error;
- }
- assert(PyUnicode_Check(self->encoding));
- }
- if (self->encoding != NULL) {
- encoding = PyUnicode_AsUTF8(self->encoding);
- if (encoding == NULL)
- goto error;
- }
- else if (encoding != NULL) {
- self->encoding = PyUnicode_FromString(encoding);
- if (self->encoding == NULL)
- goto error;
- }
- else {
- PyErr_SetString(PyExc_OSError,
- "could not determine default encoding");
- goto error;
- }
- /* Check we have been asked for a real text encoding */
- codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
- if (codec_info == NULL) {
- Py_CLEAR(self->encoding);
- goto error;
- }
- /* XXX: Failures beyond this point have the potential to leak elements
- * of the partially constructed object (like self->encoding)
- */
- self->errors = Py_NewRef(errors);
- self->chunk_size = 8192;
- self->line_buffering = line_buffering;
- self->write_through = write_through;
- if (set_newline(self, newline) < 0) {
- goto error;
- }
- self->buffer = Py_NewRef(buffer);
- /* Build the decoder object */
- _PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
- self->state = state;
- if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
- goto error;
- /* Build the encoder object */
- if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
- goto error;
- /* Finished sorting out the codec details */
- Py_CLEAR(codec_info);
- if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
- Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
- Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
- {
- if (_PyObject_LookupAttr(buffer, &_Py_ID(raw), &raw) < 0)
- goto error;
- /* Cache the raw FileIO object to speed up 'closed' checks */
- if (raw != NULL) {
- if (Py_IS_TYPE(raw, state->PyFileIO_Type))
- self->raw = raw;
- else
- Py_DECREF(raw);
- }
- }
- res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
- if (res == NULL)
- goto error;
- r = PyObject_IsTrue(res);
- Py_DECREF(res);
- if (r < 0)
- goto error;
- self->seekable = self->telling = r;
- r = _PyObject_LookupAttr(buffer, &_Py_ID(read1), &res);
- if (r < 0) {
- goto error;
- }
- Py_XDECREF(res);
- self->has_read1 = r;
- self->encoding_start_of_stream = 0;
- if (_textiowrapper_fix_encoder_state(self) < 0) {
- goto error;
- }
- self->ok = 1;
- return 0;
- error:
- Py_XDECREF(codec_info);
- return -1;
- }
- /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
- * -1 on error.
- */
- static int
- convert_optional_bool(PyObject *obj, int default_value)
- {
- long v;
- if (obj == Py_None) {
- v = default_value;
- }
- else {
- v = PyLong_AsLong(obj);
- if (v == -1 && PyErr_Occurred())
- return -1;
- }
- return v != 0;
- }
- static int
- textiowrapper_change_encoding(textio *self, PyObject *encoding,
- PyObject *errors, int newline_changed)
- {
- /* Use existing settings where new settings are not specified */
- if (encoding == Py_None && errors == Py_None && !newline_changed) {
- return 0; // no change
- }
- if (encoding == Py_None) {
- encoding = self->encoding;
- if (errors == Py_None) {
- errors = self->errors;
- }
- Py_INCREF(encoding);
- }
- else {
- if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
- encoding = _Py_GetLocaleEncodingObject();
- if (encoding == NULL) {
- return -1;
- }
- } else {
- Py_INCREF(encoding);
- }
- if (errors == Py_None) {
- errors = &_Py_ID(strict);
- }
- }
- Py_INCREF(errors);
- const char *c_encoding = PyUnicode_AsUTF8(encoding);
- if (c_encoding == NULL) {
- Py_DECREF(encoding);
- Py_DECREF(errors);
- return -1;
- }
- const char *c_errors = PyUnicode_AsUTF8(errors);
- if (c_errors == NULL) {
- Py_DECREF(encoding);
- Py_DECREF(errors);
- return -1;
- }
- // Create new encoder & decoder
- PyObject *codec_info = _PyCodec_LookupTextEncoding(
- c_encoding, "codecs.open()");
- if (codec_info == NULL) {
- Py_DECREF(encoding);
- Py_DECREF(errors);
- return -1;
- }
- if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
- _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
- Py_DECREF(codec_info);
- Py_DECREF(encoding);
- Py_DECREF(errors);
- return -1;
- }
- Py_DECREF(codec_info);
- Py_SETREF(self->encoding, encoding);
- Py_SETREF(self->errors, errors);
- return _textiowrapper_fix_encoder_state(self);
- }
- /*[clinic input]
- _io.TextIOWrapper.reconfigure
- *
- encoding: object = None
- errors: object = None
- newline as newline_obj: object(c_default="NULL") = None
- line_buffering as line_buffering_obj: object = None
- write_through as write_through_obj: object = None
- Reconfigure the text stream with new parameters.
- This also does an implicit stream flush.
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
- PyObject *errors, PyObject *newline_obj,
- PyObject *line_buffering_obj,
- PyObject *write_through_obj)
- /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
- {
- int line_buffering;
- int write_through;
- const char *newline = NULL;
- if (encoding != Py_None && !PyUnicode_Check(encoding)) {
- PyErr_Format(PyExc_TypeError,
- "reconfigure() argument 'encoding' must be str or None, not %s",
- Py_TYPE(encoding)->tp_name);
- return NULL;
- }
- if (errors != Py_None && !PyUnicode_Check(errors)) {
- PyErr_Format(PyExc_TypeError,
- "reconfigure() argument 'errors' must be str or None, not %s",
- Py_TYPE(errors)->tp_name);
- return NULL;
- }
- if (newline_obj != NULL && newline_obj != Py_None &&
- !PyUnicode_Check(newline_obj))
- {
- PyErr_Format(PyExc_TypeError,
- "reconfigure() argument 'newline' must be str or None, not %s",
- Py_TYPE(newline_obj)->tp_name);
- return NULL;
- }
- /* Check if something is in the read buffer */
- if (self->decoded_chars != NULL) {
- if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
- _unsupported(self->state,
- "It is not possible to set the encoding or newline "
- "of stream after the first read");
- return NULL;
- }
- }
- if (newline_obj != NULL && newline_obj != Py_None) {
- newline = PyUnicode_AsUTF8(newline_obj);
- if (newline == NULL || validate_newline(newline) < 0) {
- return NULL;
- }
- }
- line_buffering = convert_optional_bool(line_buffering_obj,
- self->line_buffering);
- if (line_buffering < 0) {
- return NULL;
- }
- write_through = convert_optional_bool(write_through_obj,
- self->write_through);
- if (write_through < 0) {
- return NULL;
- }
- PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
- if (res == NULL) {
- return NULL;
- }
- Py_DECREF(res);
- self->b2cratio = 0;
- if (newline_obj != NULL && set_newline(self, newline) < 0) {
- return NULL;
- }
- if (textiowrapper_change_encoding(
- self, encoding, errors, newline_obj != NULL) < 0) {
- return NULL;
- }
- self->line_buffering = line_buffering;
- self->write_through = write_through;
- Py_RETURN_NONE;
- }
- static int
- textiowrapper_clear(textio *self)
- {
- self->ok = 0;
- Py_CLEAR(self->buffer);
- Py_CLEAR(self->encoding);
- Py_CLEAR(self->encoder);
- Py_CLEAR(self->decoder);
- Py_CLEAR(self->readnl);
- Py_CLEAR(self->decoded_chars);
- Py_CLEAR(self->pending_bytes);
- Py_CLEAR(self->snapshot);
- Py_CLEAR(self->errors);
- Py_CLEAR(self->raw);
- Py_CLEAR(self->dict);
- return 0;
- }
- static void
- textiowrapper_dealloc(textio *self)
- {
- PyTypeObject *tp = Py_TYPE(self);
- self->finalizing = 1;
- if (_PyIOBase_finalize((PyObject *) self) < 0)
- return;
- self->ok = 0;
- _PyObject_GC_UNTRACK(self);
- if (self->weakreflist != NULL)
- PyObject_ClearWeakRefs((PyObject *)self);
- (void)textiowrapper_clear(self);
- tp->tp_free((PyObject *)self);
- Py_DECREF(tp);
- }
- static int
- textiowrapper_traverse(textio *self, visitproc visit, void *arg)
- {
- Py_VISIT(Py_TYPE(self));
- Py_VISIT(self->buffer);
- Py_VISIT(self->encoding);
- Py_VISIT(self->encoder);
- Py_VISIT(self->decoder);
- Py_VISIT(self->readnl);
- Py_VISIT(self->decoded_chars);
- Py_VISIT(self->pending_bytes);
- Py_VISIT(self->snapshot);
- Py_VISIT(self->errors);
- Py_VISIT(self->raw);
- Py_VISIT(self->dict);
- return 0;
- }
- static PyObject *
- textiowrapper_closed_get(textio *self, void *context);
- /* This macro takes some shortcuts to make the common case faster. */
- #define CHECK_CLOSED(self) \
- do { \
- int r; \
- PyObject *_res; \
- if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
- if (self->raw != NULL) \
- r = _PyFileIO_closed(self->raw); \
- else { \
- _res = textiowrapper_closed_get(self, NULL); \
- if (_res == NULL) \
- return NULL; \
- r = PyObject_IsTrue(_res); \
- Py_DECREF(_res); \
- if (r < 0) \
- return NULL; \
- } \
- if (r > 0) { \
- PyErr_SetString(PyExc_ValueError, \
- "I/O operation on closed file."); \
- return NULL; \
- } \
- } \
- else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
- return NULL; \
- } while (0)
- #define CHECK_INITIALIZED(self) \
- if (self->ok <= 0) { \
- PyErr_SetString(PyExc_ValueError, \
- "I/O operation on uninitialized object"); \
- return NULL; \
- }
- #define CHECK_ATTACHED(self) \
- CHECK_INITIALIZED(self); \
- if (self->detached) { \
- PyErr_SetString(PyExc_ValueError, \
- "underlying buffer has been detached"); \
- return NULL; \
- }
- #define CHECK_ATTACHED_INT(self) \
- if (self->ok <= 0) { \
- PyErr_SetString(PyExc_ValueError, \
- "I/O operation on uninitialized object"); \
- return -1; \
- } else if (self->detached) { \
- PyErr_SetString(PyExc_ValueError, \
- "underlying buffer has been detached"); \
- return -1; \
- }
- /*[clinic input]
- _io.TextIOWrapper.detach
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_detach_impl(textio *self)
- /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
- {
- PyObject *buffer, *res;
- CHECK_ATTACHED(self);
- res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
- if (res == NULL)
- return NULL;
- Py_DECREF(res);
- buffer = self->buffer;
- self->buffer = NULL;
- self->detached = 1;
- return buffer;
- }
- /* Flush the internal write buffer. This doesn't explicitly flush the
- underlying buffered object, though. */
- static int
- _textiowrapper_writeflush(textio *self)
- {
- if (self->pending_bytes == NULL)
- return 0;
- PyObject *pending = self->pending_bytes;
- PyObject *b;
- if (PyBytes_Check(pending)) {
- b = Py_NewRef(pending);
- }
- else if (PyUnicode_Check(pending)) {
- assert(PyUnicode_IS_ASCII(pending));
- assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
- b = PyBytes_FromStringAndSize(
- PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
- if (b == NULL) {
- return -1;
- }
- }
- else {
- assert(PyList_Check(pending));
- b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
- if (b == NULL) {
- return -1;
- }
- char *buf = PyBytes_AsString(b);
- Py_ssize_t pos = 0;
- for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
- PyObject *obj = PyList_GET_ITEM(pending, i);
- char *src;
- Py_ssize_t len;
- if (PyUnicode_Check(obj)) {
- assert(PyUnicode_IS_ASCII(obj));
- src = PyUnicode_DATA(obj);
- len = PyUnicode_GET_LENGTH(obj);
- }
- else {
- assert(PyBytes_Check(obj));
- if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
- Py_DECREF(b);
- return -1;
- }
- }
- memcpy(buf + pos, src, len);
- pos += len;
- }
- assert(pos == self->pending_bytes_count);
- }
- self->pending_bytes_count = 0;
- self->pending_bytes = NULL;
- Py_DECREF(pending);
- PyObject *ret;
- do {
- ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
- } while (ret == NULL && _PyIO_trap_eintr());
- Py_DECREF(b);
- // NOTE: We cleared buffer but we don't know how many bytes are actually written
- // when an error occurred.
- if (ret == NULL)
- return -1;
- Py_DECREF(ret);
- return 0;
- }
- /*[clinic input]
- _io.TextIOWrapper.write
- text: unicode
- /
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
- /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
- {
- PyObject *ret;
- PyObject *b;
- Py_ssize_t textlen;
- int haslf = 0;
- int needflush = 0, text_needflush = 0;
- if (PyUnicode_READY(text) == -1)
- return NULL;
- CHECK_ATTACHED(self);
- CHECK_CLOSED(self);
- if (self->encoder == NULL) {
- return _unsupported(self->state, "not writable");
- }
- Py_INCREF(text);
- textlen = PyUnicode_GET_LENGTH(text);
- if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
- if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
- haslf = 1;
- if (haslf && self->writetranslate && self->writenl != NULL) {
- PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
- "ss", "\n", self->writenl);
- Py_DECREF(text);
- if (newtext == NULL)
- return NULL;
- text = newtext;
- }
- if (self->write_through)
- text_needflush = 1;
- if (self->line_buffering &&
- (haslf ||
- PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
- needflush = 1;
- /* XXX What if we were just reading? */
- if (self->encodefunc != NULL) {
- if (PyUnicode_IS_ASCII(text) &&
- // See bpo-43260
- PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
- is_asciicompat_encoding(self->encodefunc)) {
- b = Py_NewRef(text);
- }
- else {
- b = (*self->encodefunc)((PyObject *) self, text);
- }
- self->encoding_start_of_stream = 0;
- }
- else {
- b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
- }
- Py_DECREF(text);
- if (b == NULL)
- return NULL;
- if (b != text && !PyBytes_Check(b)) {
- PyErr_Format(PyExc_TypeError,
- "encoder should return a bytes object, not '%.200s'",
- Py_TYPE(b)->tp_name);
- Py_DECREF(b);
- return NULL;
- }
- Py_ssize_t bytes_len;
- if (b == text) {
- bytes_len = PyUnicode_GET_LENGTH(b);
- }
- else {
- bytes_len = PyBytes_GET_SIZE(b);
- }
- // We should avoid concatinating huge data.
- // Flush the buffer before adding b to the buffer if b is not small.
- // https://github.com/python/cpython/issues/87426
- if (bytes_len >= self->chunk_size) {
- // _textiowrapper_writeflush() calls buffer.write().
- // self->pending_bytes can be appended during buffer->write()
- // or other thread.
- // We need to loop until buffer becomes empty.
- // https://github.com/python/cpython/issues/118138
- // https://github.com/python/cpython/issues/119506
- while (self->pending_bytes != NULL) {
- if (_textiowrapper_writeflush(self) < 0) {
- Py_DECREF(b);
- return NULL;
- }
- }
- }
- if (self->pending_bytes == NULL) {
- assert(self->pending_bytes_count == 0);
- self->pending_bytes = b;
- }
- else if (!PyList_CheckExact(self->pending_bytes)) {
- PyObject *list = PyList_New(2);
- if (list == NULL) {
- Py_DECREF(b);
- return NULL;
- }
- // Since Python 3.12, allocating GC object won't trigger GC and release
- // GIL. See https://github.com/python/cpython/issues/97922
- assert(!PyList_CheckExact(self->pending_bytes));
- PyList_SET_ITEM(list, 0, self->pending_bytes);
- PyList_SET_ITEM(list, 1, b);
- self->pending_bytes = list;
- }
- else {
- if (PyList_Append(self->pending_bytes, b) < 0) {
- Py_DECREF(b);
- return NULL;
- }
- Py_DECREF(b);
- }
- self->pending_bytes_count += bytes_len;
- if (self->pending_bytes_count >= self->chunk_size || needflush ||
- text_needflush) {
- if (_textiowrapper_writeflush(self) < 0)
- return NULL;
- }
- if (needflush) {
- ret = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
- if (ret == NULL)
- return NULL;
- Py_DECREF(ret);
- }
- if (self->snapshot != NULL) {
- textiowrapper_set_decoded_chars(self, NULL);
- Py_CLEAR(self->snapshot);
- }
- if (self->decoder) {
- ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
- if (ret == NULL)
- return NULL;
- Py_DECREF(ret);
- }
- return PyLong_FromSsize_t(textlen);
- }
- /* Steal a reference to chars and store it in the decoded_char buffer;
- */
- static void
- textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
- {
- Py_XSETREF(self->decoded_chars, chars);
- self->decoded_chars_used = 0;
- }
- static PyObject *
- textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
- {
- PyObject *chars;
- Py_ssize_t avail;
- if (self->decoded_chars == NULL)
- return PyUnicode_FromStringAndSize(NULL, 0);
- /* decoded_chars is guaranteed to be "ready". */
- avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
- - self->decoded_chars_used);
- assert(avail >= 0);
- if (n < 0 || n > avail)
- n = avail;
- if (self->decoded_chars_used > 0 || n < avail) {
- chars = PyUnicode_Substring(self->decoded_chars,
- self->decoded_chars_used,
- self->decoded_chars_used + n);
- if (chars == NULL)
- return NULL;
- }
- else {
- chars = Py_NewRef(self->decoded_chars);
- }
- self->decoded_chars_used += n;
- return chars;
- }
- /* Read and decode the next chunk of data from the BufferedReader.
- */
- static int
- textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
- {
- PyObject *dec_buffer = NULL;
- PyObject *dec_flags = NULL;
- PyObject *input_chunk = NULL;
- Py_buffer input_chunk_buf;
- PyObject *decoded_chars, *chunk_size;
- Py_ssize_t nbytes, nchars;
- int eof;
- /* The return value is True unless EOF was reached. The decoded string is
- * placed in self._decoded_chars (replacing its previous value). The
- * entire input chunk is sent to the decoder, though some of it may remain
- * buffered in the decoder, yet to be converted.
- */
- if (self->decoder == NULL) {
- _unsupported(self->state, "not readable");
- return -1;
- }
- if (self->telling) {
- /* To prepare for tell(), we need to snapshot a point in the file
- * where the decoder's input buffer is empty.
- */
- PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
- &_Py_ID(getstate));
- if (state == NULL)
- return -1;
- /* Given this, we know there was a valid snapshot point
- * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
- */
- if (!PyTuple_Check(state)) {
- PyErr_SetString(PyExc_TypeError,
- "illegal decoder state");
- Py_DECREF(state);
- return -1;
- }
- if (!PyArg_ParseTuple(state,
- "OO;illegal decoder state", &dec_buffer, &dec_flags))
- {
- Py_DECREF(state);
- return -1;
- }
- if (!PyBytes_Check(dec_buffer)) {
- PyErr_Format(PyExc_TypeError,
- "illegal decoder state: the first item should be a "
- "bytes object, not '%.200s'",
- Py_TYPE(dec_buffer)->tp_name);
- Py_DECREF(state);
- return -1;
- }
- Py_INCREF(dec_buffer);
- Py_INCREF(dec_flags);
- Py_DECREF(state);
- }
- /* Read a chunk, decode it, and put the result in self._decoded_chars. */
- if (size_hint > 0) {
- size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
- }
- chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
- if (chunk_size == NULL)
- goto fail;
- input_chunk = PyObject_CallMethodOneArg(self->buffer,
- (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
- chunk_size);
- Py_DECREF(chunk_size);
- if (input_chunk == NULL)
- goto fail;
- if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
- PyErr_Format(PyExc_TypeError,
- "underlying %s() should have returned a bytes-like object, "
- "not '%.200s'", (self->has_read1 ? "read1": "read"),
- Py_TYPE(input_chunk)->tp_name);
- goto fail;
- }
- nbytes = input_chunk_buf.len;
- eof = (nbytes == 0);
- decoded_chars = _textiowrapper_decode(self->state, self->decoder,
- input_chunk, eof);
- PyBuffer_Release(&input_chunk_buf);
- if (decoded_chars == NULL)
- goto fail;
- textiowrapper_set_decoded_chars(self, decoded_chars);
- nchars = PyUnicode_GET_LENGTH(decoded_chars);
- if (nchars > 0)
- self->b2cratio = (double) nbytes / nchars;
- else
- self->b2cratio = 0.0;
- if (nchars > 0)
- eof = 0;
- if (self->telling) {
- /* At the snapshot point, len(dec_buffer) bytes before the read, the
- * next input to be decoded is dec_buffer + input_chunk.
- */
- PyObject *next_input = dec_buffer;
- PyBytes_Concat(&next_input, input_chunk);
- dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
- if (next_input == NULL) {
- goto fail;
- }
- PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
- if (snapshot == NULL) {
- dec_flags = NULL;
- goto fail;
- }
- Py_XSETREF(self->snapshot, snapshot);
- }
- Py_DECREF(input_chunk);
- return (eof == 0);
- fail:
- Py_XDECREF(dec_buffer);
- Py_XDECREF(dec_flags);
- Py_XDECREF(input_chunk);
- return -1;
- }
- /*[clinic input]
- _io.TextIOWrapper.read
- size as n: Py_ssize_t(accept={int, NoneType}) = -1
- /
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
- /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
- {
- PyObject *result = NULL, *chunks = NULL;
- CHECK_ATTACHED(self);
- CHECK_CLOSED(self);
- if (self->decoder == NULL) {
- return _unsupported(self->state, "not readable");
- }
- if (_textiowrapper_writeflush(self) < 0)
- return NULL;
- if (n < 0) {
- /* Read everything */
- PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
- PyObject *decoded;
- if (bytes == NULL)
- goto fail;
- _PyIO_State *state = self->state;
- if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
- decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
- bytes, 1);
- else
- decoded = PyObject_CallMethodObjArgs(
- self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
- Py_DECREF(bytes);
- if (check_decoded(decoded) < 0)
- goto fail;
- result = textiowrapper_get_decoded_chars(self, -1);
- if (result == NULL) {
- Py_DECREF(decoded);
- return NULL;
- }
- PyUnicode_AppendAndDel(&result, decoded);
- if (result == NULL)
- goto fail;
- if (self->snapshot != NULL) {
- textiowrapper_set_decoded_chars(self, NULL);
- Py_CLEAR(self->snapshot);
- }
- return result;
- }
- else {
- int res = 1;
- Py_ssize_t remaining = n;
- result = textiowrapper_get_decoded_chars(self, n);
- if (result == NULL)
- goto fail;
- if (PyUnicode_READY(result) == -1)
- goto fail;
- remaining -= PyUnicode_GET_LENGTH(result);
- /* Keep reading chunks until we have n characters to return */
- while (remaining > 0) {
- res = textiowrapper_read_chunk(self, remaining);
- if (res < 0) {
- /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
- when EINTR occurs so we needn't do it ourselves. */
- if (_PyIO_trap_eintr()) {
- continue;
- }
- goto fail;
- }
- if (res == 0) /* EOF */
- break;
- if (chunks == NULL) {
- chunks = PyList_New(0);
- if (chunks == NULL)
- goto fail;
- }
- if (PyUnicode_GET_LENGTH(result) > 0 &&
- PyList_Append(chunks, result) < 0)
- goto fail;
- Py_DECREF(result);
- result = textiowrapper_get_decoded_chars(self, remaining);
- if (result == NULL)
- goto fail;
- remaining -= PyUnicode_GET_LENGTH(result);
- }
- if (chunks != NULL) {
- if (result != NULL && PyList_Append(chunks, result) < 0)
- goto fail;
- _Py_DECLARE_STR(empty, "");
- Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
- if (result == NULL)
- goto fail;
- Py_CLEAR(chunks);
- }
- return result;
- }
- fail:
- Py_XDECREF(result);
- Py_XDECREF(chunks);
- return NULL;
- }
- /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
- that is to the NUL character. Otherwise the function will produce
- incorrect results. */
- static const char *
- find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
- {
- if (kind == PyUnicode_1BYTE_KIND) {
- assert(ch < 256);
- return (char *) memchr((const void *) s, (char) ch, end - s);
- }
- for (;;) {
- while (PyUnicode_READ(kind, s, 0) > ch)
- s += kind;
- if (PyUnicode_READ(kind, s, 0) == ch)
- return s;
- if (s == end)
- return NULL;
- s += kind;
- }
- }
- Py_ssize_t
- _PyIO_find_line_ending(
- int translated, int universal, PyObject *readnl,
- int kind, const char *start, const char *end, Py_ssize_t *consumed)
- {
- Py_ssize_t len = (end - start)/kind;
- if (translated) {
- /* Newlines are already translated, only search for \n */
- const char *pos = find_control_char(kind, start, end, '\n');
- if (pos != NULL)
- return (pos - start)/kind + 1;
- else {
- *consumed = len;
- return -1;
- }
- }
- else if (universal) {
- /* Universal newline search. Find any of \r, \r\n, \n
- * The decoder ensures that \r\n are not split in two pieces
- */
- const char *s = start;
- for (;;) {
- Py_UCS4 ch;
- /* Fast path for non-control chars. The loop always ends
- since the Unicode string is NUL-terminated. */
- while (PyUnicode_READ(kind, s, 0) > '\r')
- s += kind;
- if (s >= end) {
- *consumed = len;
- return -1;
- }
- ch = PyUnicode_READ(kind, s, 0);
- s += kind;
- if (ch == '\n')
- return (s - start)/kind;
- if (ch == '\r') {
- if (PyUnicode_READ(kind, s, 0) == '\n')
- return (s - start)/kind + 1;
- else
- return (s - start)/kind;
- }
- }
- }
- else {
- /* Non-universal mode. */
- Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
- const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
- /* Assume that readnl is an ASCII character. */
- assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
- if (readnl_len == 1) {
- const char *pos = find_control_char(kind, start, end, nl[0]);
- if (pos != NULL)
- return (pos - start)/kind + 1;
- *consumed = len;
- return -1;
- }
- else {
- const char *s = start;
- const char *e = end - (readnl_len - 1)*kind;
- const char *pos;
- if (e < s)
- e = s;
- while (s < e) {
- Py_ssize_t i;
- const char *pos = find_control_char(kind, s, end, nl[0]);
- if (pos == NULL || pos >= e)
- break;
- for (i = 1; i < readnl_len; i++) {
- if (PyUnicode_READ(kind, pos, i) != nl[i])
- break;
- }
- if (i == readnl_len)
- return (pos - start)/kind + readnl_len;
- s = pos + kind;
- }
- pos = find_control_char(kind, e, end, nl[0]);
- if (pos == NULL)
- *consumed = len;
- else
- *consumed = (pos - start)/kind;
- return -1;
- }
- }
- }
- static PyObject *
- _textiowrapper_readline(textio *self, Py_ssize_t limit)
- {
- PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
- Py_ssize_t start, endpos, chunked, offset_to_buffer;
- int res;
- CHECK_CLOSED(self);
- if (_textiowrapper_writeflush(self) < 0)
- return NULL;
- chunked = 0;
- while (1) {
- const char *ptr;
- Py_ssize_t line_len;
- int kind;
- Py_ssize_t consumed = 0;
- /* First, get some data if necessary */
- res = 1;
- while (!self->decoded_chars ||
- !PyUnicode_GET_LENGTH(self->decoded_chars)) {
- res = textiowrapper_read_chunk(self, 0);
- if (res < 0) {
- /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
- when EINTR occurs so we needn't do it ourselves. */
- if (_PyIO_trap_eintr()) {
- continue;
- }
- goto error;
- }
- if (res == 0)
- break;
- }
- if (res == 0) {
- /* end of file */
- textiowrapper_set_decoded_chars(self, NULL);
- Py_CLEAR(self->snapshot);
- start = endpos = offset_to_buffer = 0;
- break;
- }
- if (remaining == NULL) {
- line = Py_NewRef(self->decoded_chars);
- start = self->decoded_chars_used;
- offset_to_buffer = 0;
- }
- else {
- assert(self->decoded_chars_used == 0);
- line = PyUnicode_Concat(remaining, self->decoded_chars);
- start = 0;
- offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
- Py_CLEAR(remaining);
- if (line == NULL)
- goto error;
- if (PyUnicode_READY(line) == -1)
- goto error;
- }
- ptr = PyUnicode_DATA(line);
- line_len = PyUnicode_GET_LENGTH(line);
- kind = PyUnicode_KIND(line);
- endpos = _PyIO_find_line_ending(
- self->readtranslate, self->readuniversal, self->readnl,
- kind,
- ptr + kind * start,
- ptr + kind * line_len,
- &consumed);
- if (endpos >= 0) {
- endpos += start;
- if (limit >= 0 && (endpos - start) + chunked >= limit)
- endpos = start + limit - chunked;
- break;
- }
- /* We can put aside up to `endpos` */
- endpos = consumed + start;
- if (limit >= 0 && (endpos - start) + chunked >= limit) {
- /* Didn't find line ending, but reached length limit */
- endpos = start + limit - chunked;
- break;
- }
- if (endpos > start) {
- /* No line ending seen yet - put aside current data */
- PyObject *s;
- if (chunks == NULL) {
- chunks = PyList_New(0);
- if (chunks == NULL)
- goto error;
- }
- s = PyUnicode_Substring(line, start, endpos);
- if (s == NULL)
- goto error;
- if (PyList_Append(chunks, s) < 0) {
- Py_DECREF(s);
- goto error;
- }
- chunked += PyUnicode_GET_LENGTH(s);
- Py_DECREF(s);
- }
- /* There may be some remaining bytes we'll have to prepend to the
- next chunk of data */
- if (endpos < line_len) {
- remaining = PyUnicode_Substring(line, endpos, line_len);
- if (remaining == NULL)
- goto error;
- }
- Py_CLEAR(line);
- /* We have consumed the buffer */
- textiowrapper_set_decoded_chars(self, NULL);
- }
- if (line != NULL) {
- /* Our line ends in the current buffer */
- self->decoded_chars_used = endpos - offset_to_buffer;
- if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
- PyObject *s = PyUnicode_Substring(line, start, endpos);
- Py_CLEAR(line);
- if (s == NULL)
- goto error;
- line = s;
- }
- }
- if (remaining != NULL) {
- if (chunks == NULL) {
- chunks = PyList_New(0);
- if (chunks == NULL)
- goto error;
- }
- if (PyList_Append(chunks, remaining) < 0)
- goto error;
- Py_CLEAR(remaining);
- }
- if (chunks != NULL) {
- if (line != NULL) {
- if (PyList_Append(chunks, line) < 0)
- goto error;
- Py_DECREF(line);
- }
- line = PyUnicode_Join(&_Py_STR(empty), chunks);
- if (line == NULL)
- goto error;
- Py_CLEAR(chunks);
- }
- if (line == NULL) {
- line = Py_NewRef(&_Py_STR(empty));
- }
- return line;
- error:
- Py_XDECREF(chunks);
- Py_XDECREF(remaining);
- Py_XDECREF(line);
- return NULL;
- }
- /*[clinic input]
- _io.TextIOWrapper.readline
- size: Py_ssize_t = -1
- /
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
- /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
- {
- CHECK_ATTACHED(self);
- return _textiowrapper_readline(self, size);
- }
- /* Seek and Tell */
- typedef struct {
- Py_off_t start_pos;
- int dec_flags;
- int bytes_to_feed;
- int chars_to_skip;
- char need_eof;
- } cookie_type;
- /*
- To speed up cookie packing/unpacking, we store the fields in a temporary
- string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
- The following macros define at which offsets in the intermediary byte
- string the various CookieStruct fields will be stored.
- */
- #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
- #if PY_BIG_ENDIAN
- /* We want the least significant byte of start_pos to also be the least
- significant byte of the cookie, which means that in big-endian mode we
- must copy the fields in reverse order. */
- # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
- # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
- # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
- # define OFF_CHARS_TO_SKIP (sizeof(char))
- # define OFF_NEED_EOF 0
- #else
- /* Little-endian mode: the least significant byte of start_pos will
- naturally end up the least significant byte of the cookie. */
- # define OFF_START_POS 0
- # define OFF_DEC_FLAGS (sizeof(Py_off_t))
- # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
- # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
- # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
- #endif
- static int
- textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
- {
- unsigned char buffer[COOKIE_BUF_LEN];
- PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
- if (cookieLong == NULL)
- return -1;
- if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
- PY_LITTLE_ENDIAN, 0) < 0) {
- Py_DECREF(cookieLong);
- return -1;
- }
- Py_DECREF(cookieLong);
- memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
- memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
- memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
- memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
- memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
- return 0;
- }
- static PyObject *
- textiowrapper_build_cookie(cookie_type *cookie)
- {
- unsigned char buffer[COOKIE_BUF_LEN];
- memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
- memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
- memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
- memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
- memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
- return _PyLong_FromByteArray(buffer, sizeof(buffer),
- PY_LITTLE_ENDIAN, 0);
- }
- static int
- _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
- {
- PyObject *res;
- /* When seeking to the start of the stream, we call decoder.reset()
- rather than decoder.getstate().
- This is for a few decoders such as utf-16 for which the state value
- at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
- utf-16, that we are expecting a BOM).
- */
- if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
- res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
- }
- else {
- res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
- "((yi))", "", cookie->dec_flags);
- }
- if (res == NULL) {
- return -1;
- }
- Py_DECREF(res);
- return 0;
- }
- static int
- _textiowrapper_encoder_reset(textio *self, int start_of_stream)
- {
- PyObject *res;
- if (start_of_stream) {
- res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
- self->encoding_start_of_stream = 1;
- }
- else {
- res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
- _PyLong_GetZero());
- self->encoding_start_of_stream = 0;
- }
- if (res == NULL)
- return -1;
- Py_DECREF(res);
- return 0;
- }
- static int
- _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
- {
- /* Same as _textiowrapper_decoder_setstate() above. */
- return _textiowrapper_encoder_reset(
- self, cookie->start_pos == 0 && cookie->dec_flags == 0);
- }
- /*[clinic input]
- _io.TextIOWrapper.seek
- cookie as cookieObj: object
- Zero or an opaque number returned by tell().
- whence: int(c_default='0') = os.SEEK_SET
- The relative position to seek from.
- /
- Set the stream position, and return the new stream position.
- Four operations are supported, given by the following argument
- combinations:
- - seek(0, SEEK_SET): Rewind to the start of the stream.
- - seek(cookie, SEEK_SET): Restore a previous position;
- 'cookie' must be a number returned by tell().
- - seek(0, SEEK_END): Fast-forward to the end of the stream.
- - seek(0, SEEK_CUR): Leave the current stream position unchanged.
- Any other argument combinations are invalid,
- and may raise exceptions.
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
- /*[clinic end generated code: output=0a15679764e2d04d input=0f68adcb02cf2823]*/
- {
- PyObject *posobj;
- cookie_type cookie;
- PyObject *res;
- int cmp;
- PyObject *snapshot;
- CHECK_ATTACHED(self);
- CHECK_CLOSED(self);
- Py_INCREF(cookieObj);
- if (!self->seekable) {
- _unsupported(self->state, "underlying stream is not seekable");
- goto fail;
- }
- PyObject *zero = _PyLong_GetZero(); // borrowed reference
- switch (whence) {
- case SEEK_CUR:
- /* seek relative to current position */
- cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
- if (cmp < 0)
- goto fail;
- if (cmp == 0) {
- _unsupported(self->state, "can't do nonzero cur-relative seeks");
- goto fail;
- }
- /* Seeking to the current position should attempt to
- * sync the underlying buffer with the current position.
- */
- Py_DECREF(cookieObj);
- cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
- if (cookieObj == NULL)
- goto fail;
- break;
- case SEEK_END:
- /* seek relative to end of file */
- cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
- if (cmp < 0)
- goto fail;
- if (cmp == 0) {
- _unsupported(self->state, "can't do nonzero end-relative seeks");
- goto fail;
- }
- res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
- if (res == NULL)
- goto fail;
- Py_DECREF(res);
- textiowrapper_set_decoded_chars(self, NULL);
- Py_CLEAR(self->snapshot);
- if (self->decoder) {
- res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
- if (res == NULL)
- goto fail;
- Py_DECREF(res);
- }
- res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
- Py_CLEAR(cookieObj);
- if (res == NULL)
- goto fail;
- if (self->encoder) {
- /* If seek() == 0, we are at the start of stream, otherwise not */
- cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
- if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
- Py_DECREF(res);
- goto fail;
- }
- }
- return res;
- case SEEK_SET:
- break;
- default:
- PyErr_Format(PyExc_ValueError,
- "invalid whence (%d, should be %d, %d or %d)", whence,
- SEEK_SET, SEEK_CUR, SEEK_END);
- goto fail;
- }
- cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
- if (cmp < 0)
- goto fail;
- if (cmp == 1) {
- PyErr_Format(PyExc_ValueError,
- "negative seek position %R", cookieObj);
- goto fail;
- }
- res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
- if (res == NULL)
- goto fail;
- Py_DECREF(res);
- /* The strategy of seek() is to go back to the safe start point
- * and replay the effect of read(chars_to_skip) from there.
- */
- if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
- goto fail;
- /* Seek back to the safe start point. */
- posobj = PyLong_FromOff_t(cookie.start_pos);
- if (posobj == NULL)
- goto fail;
- res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
- Py_DECREF(posobj);
- if (res == NULL)
- goto fail;
- Py_DECREF(res);
- textiowrapper_set_decoded_chars(self, NULL);
- Py_CLEAR(self->snapshot);
- /* Restore the decoder to its state from the safe start point. */
- if (self->decoder) {
- if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
- goto fail;
- }
- if (cookie.chars_to_skip) {
- /* Just like _read_chunk, feed the decoder and save a snapshot. */
- PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
- "i", cookie.bytes_to_feed);
- PyObject *decoded;
- if (input_chunk == NULL)
- goto fail;
- if (!PyBytes_Check(input_chunk)) {
- PyErr_Format(PyExc_TypeError,
- "underlying read() should have returned a bytes "
- "object, not '%.200s'",
- Py_TYPE(input_chunk)->tp_name);
- Py_DECREF(input_chunk);
- goto fail;
- }
- snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
- if (snapshot == NULL) {
- goto fail;
- }
- Py_XSETREF(self->snapshot, snapshot);
- decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
- input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
- if (check_decoded(decoded) < 0)
- goto fail;
- textiowrapper_set_decoded_chars(self, decoded);
- /* Skip chars_to_skip of the decoded characters. */
- if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
- PyErr_SetString(PyExc_OSError, "can't restore logical file position");
- goto fail;
- }
- self->decoded_chars_used = cookie.chars_to_skip;
- }
- else {
- snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
- if (snapshot == NULL)
- goto fail;
- Py_XSETREF(self->snapshot, snapshot);
- }
- /* Finally, reset the encoder (merely useful for proper BOM handling) */
- if (self->encoder) {
- if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
- goto fail;
- }
- return cookieObj;
- fail:
- Py_XDECREF(cookieObj);
- return NULL;
- }
- /*[clinic input]
- _io.TextIOWrapper.tell
- Return the stream position as an opaque number.
- The return value of tell() can be given as input to seek(), to restore a
- previous stream position.
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_tell_impl(textio *self)
- /*[clinic end generated code: output=4f168c08bf34ad5f input=0852d627d76fb520]*/
- {
- PyObject *res;
- PyObject *posobj = NULL;
- cookie_type cookie = {0,0,0,0,0};
- PyObject *next_input;
- Py_ssize_t chars_to_skip, chars_decoded;
- Py_ssize_t skip_bytes, skip_back;
- PyObject *saved_state = NULL;
- const char *input, *input_end;
- Py_ssize_t dec_buffer_len;
- int dec_flags;
- CHECK_ATTACHED(self);
- CHECK_CLOSED(self);
- if (!self->seekable) {
- _unsupported(self->state, "underlying stream is not seekable");
- goto fail;
- }
- if (!self->telling) {
- PyErr_SetString(PyExc_OSError,
- "telling position disabled by next() call");
- goto fail;
- }
- if (_textiowrapper_writeflush(self) < 0)
- return NULL;
- res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
- if (res == NULL)
- goto fail;
- Py_DECREF(res);
- posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
- if (posobj == NULL)
- goto fail;
- if (self->decoder == NULL || self->snapshot == NULL) {
- assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
- return posobj;
- }
- #if defined(HAVE_LARGEFILE_SUPPORT)
- cookie.start_pos = PyLong_AsLongLong(posobj);
- #else
- cookie.start_pos = PyLong_AsLong(posobj);
- #endif
- Py_DECREF(posobj);
- if (PyErr_Occurred())
- goto fail;
- /* Skip backward to the snapshot point (see _read_chunk). */
- assert(PyTuple_Check(self->snapshot));
- if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
- goto fail;
- assert (PyBytes_Check(next_input));
- cookie.start_pos -= PyBytes_GET_SIZE(next_input);
- /* How many decoded characters have been used up since the snapshot? */
- if (self->decoded_chars_used == 0) {
- /* We haven't moved from the snapshot point. */
- return textiowrapper_build_cookie(&cookie);
- }
- chars_to_skip = self->decoded_chars_used;
- /* Decoder state will be restored at the end */
- saved_state = PyObject_CallMethodNoArgs(self->decoder,
- &_Py_ID(getstate));
- if (saved_state == NULL)
- goto fail;
- #define DECODER_GETSTATE() do { \
- PyObject *dec_buffer; \
- PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
- &_Py_ID(getstate)); \
- if (_state == NULL) \
- goto fail; \
- if (!PyTuple_Check(_state)) { \
- PyErr_SetString(PyExc_TypeError, \
- "illegal decoder state"); \
- Py_DECREF(_state); \
- goto fail; \
- } \
- if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
- &dec_buffer, &dec_flags)) \
- { \
- Py_DECREF(_state); \
- goto fail; \
- } \
- if (!PyBytes_Check(dec_buffer)) { \
- PyErr_Format(PyExc_TypeError, \
- "illegal decoder state: the first item should be a " \
- "bytes object, not '%.200s'", \
- Py_TYPE(dec_buffer)->tp_name); \
- Py_DECREF(_state); \
- goto fail; \
- } \
- dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
- Py_DECREF(_state); \
- } while (0)
- #define DECODER_DECODE(start, len, res) do { \
- PyObject *_decoded = _PyObject_CallMethod( \
- self->decoder, &_Py_ID(decode), "y#", start, len); \
- if (check_decoded(_decoded) < 0) \
- goto fail; \
- res = PyUnicode_GET_LENGTH(_decoded); \
- Py_DECREF(_decoded); \
- } while (0)
- /* Fast search for an acceptable start point, close to our
- current pos */
- skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
- skip_back = 1;
- assert(skip_back <= PyBytes_GET_SIZE(next_input));
- input = PyBytes_AS_STRING(next_input);
- while (skip_bytes > 0) {
- /* Decode up to temptative start point */
- if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
- goto fail;
- DECODER_DECODE(input, skip_bytes, chars_decoded);
- if (chars_decoded <= chars_to_skip) {
- DECODER_GETSTATE();
- if (dec_buffer_len == 0) {
- /* Before pos and no bytes buffered in decoder => OK */
- cookie.dec_flags = dec_flags;
- chars_to_skip -= chars_decoded;
- break;
- }
- /* Skip back by buffered amount and reset heuristic */
- skip_bytes -= dec_buffer_len;
- skip_back = 1;
- }
- else {
- /* We're too far ahead, skip back a bit */
- skip_bytes -= skip_back;
- skip_back *= 2;
- }
- }
- if (skip_bytes <= 0) {
- skip_bytes = 0;
- if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
- goto fail;
- }
- /* Note our initial start point. */
- cookie.start_pos += skip_bytes;
- cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
- if (chars_to_skip == 0)
- goto finally;
- /* We should be close to the desired position. Now feed the decoder one
- * byte at a time until we reach the `chars_to_skip` target.
- * As we go, note the nearest "safe start point" before the current
- * location (a point where the decoder has nothing buffered, so seek()
- * can safely start from there and advance to this location).
- */
- chars_decoded = 0;
- input = PyBytes_AS_STRING(next_input);
- input_end = input + PyBytes_GET_SIZE(next_input);
- input += skip_bytes;
- while (input < input_end) {
- Py_ssize_t n;
- DECODER_DECODE(input, (Py_ssize_t)1, n);
- /* We got n chars for 1 byte */
- chars_decoded += n;
- cookie.bytes_to_feed += 1;
- DECODER_GETSTATE();
- if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
- /* Decoder buffer is empty, so this is a safe start point. */
- cookie.start_pos += cookie.bytes_to_feed;
- chars_to_skip -= chars_decoded;
- cookie.dec_flags = dec_flags;
- cookie.bytes_to_feed = 0;
- chars_decoded = 0;
- }
- if (chars_decoded >= chars_to_skip)
- break;
- input++;
- }
- if (input == input_end) {
- /* We didn't get enough decoded data; signal EOF to get more. */
- PyObject *decoded = _PyObject_CallMethod(
- self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
- if (check_decoded(decoded) < 0)
- goto fail;
- chars_decoded += PyUnicode_GET_LENGTH(decoded);
- Py_DECREF(decoded);
- cookie.need_eof = 1;
- if (chars_decoded < chars_to_skip) {
- PyErr_SetString(PyExc_OSError,
- "can't reconstruct logical file position");
- goto fail;
- }
- }
- finally:
- res = PyObject_CallMethodOneArg(
- self->decoder, &_Py_ID(setstate), saved_state);
- Py_DECREF(saved_state);
- if (res == NULL)
- return NULL;
- Py_DECREF(res);
- /* The returned cookie corresponds to the last safe start point. */
- cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
- return textiowrapper_build_cookie(&cookie);
- fail:
- if (saved_state) {
- PyObject *exc = PyErr_GetRaisedException();
- res = PyObject_CallMethodOneArg(
- self->decoder, &_Py_ID(setstate), saved_state);
- _PyErr_ChainExceptions1(exc);
- Py_DECREF(saved_state);
- Py_XDECREF(res);
- }
- return NULL;
- }
- /*[clinic input]
- _io.TextIOWrapper.truncate
- pos: object = None
- /
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
- /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
- {
- PyObject *res;
- CHECK_ATTACHED(self)
- res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
- if (res == NULL)
- return NULL;
- Py_DECREF(res);
- return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
- }
- static PyObject *
- textiowrapper_repr(textio *self)
- {
- PyObject *nameobj, *modeobj, *res, *s;
- int status;
- CHECK_INITIALIZED(self);
- res = PyUnicode_FromString("<_io.TextIOWrapper");
- if (res == NULL)
- return NULL;
- status = Py_ReprEnter((PyObject *)self);
- if (status != 0) {
- if (status > 0) {
- PyErr_Format(PyExc_RuntimeError,
- "reentrant call inside %s.__repr__",
- Py_TYPE(self)->tp_name);
- }
- goto error;
- }
- if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(name), &nameobj) < 0) {
- if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
- goto error;
- }
- /* Ignore ValueError raised if the underlying stream was detached */
- PyErr_Clear();
- }
- if (nameobj != NULL) {
- s = PyUnicode_FromFormat(" name=%R", nameobj);
- Py_DECREF(nameobj);
- if (s == NULL)
- goto error;
- PyUnicode_AppendAndDel(&res, s);
- if (res == NULL)
- goto error;
- }
- if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(mode), &modeobj) < 0) {
- goto error;
- }
- if (modeobj != NULL) {
- s = PyUnicode_FromFormat(" mode=%R", modeobj);
- Py_DECREF(modeobj);
- if (s == NULL)
- goto error;
- PyUnicode_AppendAndDel(&res, s);
- if (res == NULL)
- goto error;
- }
- s = PyUnicode_FromFormat("%U encoding=%R>",
- res, self->encoding);
- Py_DECREF(res);
- if (status == 0) {
- Py_ReprLeave((PyObject *)self);
- }
- return s;
- error:
- Py_XDECREF(res);
- if (status == 0) {
- Py_ReprLeave((PyObject *)self);
- }
- return NULL;
- }
- /* Inquiries */
- /*[clinic input]
- _io.TextIOWrapper.fileno
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_fileno_impl(textio *self)
- /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
- {
- CHECK_ATTACHED(self);
- return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
- }
- /*[clinic input]
- _io.TextIOWrapper.seekable
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_seekable_impl(textio *self)
- /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
- {
- CHECK_ATTACHED(self);
- return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
- }
- /*[clinic input]
- _io.TextIOWrapper.readable
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_readable_impl(textio *self)
- /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
- {
- CHECK_ATTACHED(self);
- return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
- }
- /*[clinic input]
- _io.TextIOWrapper.writable
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_writable_impl(textio *self)
- /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
- {
- CHECK_ATTACHED(self);
- return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
- }
- /*[clinic input]
- _io.TextIOWrapper.isatty
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_isatty_impl(textio *self)
- /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
- {
- CHECK_ATTACHED(self);
- return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
- }
- /*[clinic input]
- _io.TextIOWrapper.flush
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_flush_impl(textio *self)
- /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
- {
- CHECK_ATTACHED(self);
- CHECK_CLOSED(self);
- self->telling = self->seekable;
- if (_textiowrapper_writeflush(self) < 0)
- return NULL;
- return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
- }
- /*[clinic input]
- _io.TextIOWrapper.close
- [clinic start generated code]*/
- static PyObject *
- _io_TextIOWrapper_close_impl(textio *self)
- /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
- {
- PyObject *res;
- int r;
- CHECK_ATTACHED(self);
- res = textiowrapper_closed_get(self, NULL);
- if (res == NULL)
- return NULL;
- r = PyObject_IsTrue(res);
- Py_DECREF(res);
- if (r < 0)
- return NULL;
- if (r > 0) {
- Py_RETURN_NONE; /* stream already closed */
- }
- else {
- PyObject *exc = NULL;
- if (self->finalizing) {
- res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
- (PyObject *)self);
- if (res) {
- Py_DECREF(res);
- }
- else {
- PyErr_Clear();
- }
- }
- res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
- if (res == NULL) {
- exc = PyErr_GetRaisedException();
- }
- else {
- Py_DECREF(res);
- }
- res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
- if (exc != NULL) {
- _PyErr_ChainExceptions1(exc);
- Py_CLEAR(res);
- }
- return res;
- }
- }
- static PyObject *
- textiowrapper_iternext(textio *self)
- {
- PyObject *line;
- CHECK_ATTACHED(self);
- self->telling = 0;
- if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
- /* Skip method call overhead for speed */
- line = _textiowrapper_readline(self, -1);
- }
- else {
- line = PyObject_CallMethodNoArgs((PyObject *)self,
- &_Py_ID(readline));
- if (line && !PyUnicode_Check(line)) {
- PyErr_Format(PyExc_OSError,
- "readline() should have returned a str object, "
- "not '%.200s'", Py_TYPE(line)->tp_name);
- Py_DECREF(line);
- return NULL;
- }
- }
- if (line == NULL || PyUnicode_READY(line) == -1)
- return NULL;
- if (PyUnicode_GET_LENGTH(line) == 0) {
- /* Reached EOF or would have blocked */
- Py_DECREF(line);
- Py_CLEAR(self->snapshot);
- self->telling = self->seekable;
- return NULL;
- }
- return line;
- }
- static PyObject *
- textiowrapper_name_get(textio *self, void *context)
- {
- CHECK_ATTACHED(self);
- return PyObject_GetAttr(self->buffer, &_Py_ID(name));
- }
- static PyObject *
- textiowrapper_closed_get(textio *self, void *context)
- {
- CHECK_ATTACHED(self);
- return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
- }
- static PyObject *
- textiowrapper_newlines_get(textio *self, void *context)
- {
- PyObject *res;
- CHECK_ATTACHED(self);
- if (self->decoder == NULL ||
- _PyObject_LookupAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
- {
- Py_RETURN_NONE;
- }
- return res;
- }
- static PyObject *
- textiowrapper_errors_get(textio *self, void *context)
- {
- CHECK_INITIALIZED(self);
- return Py_NewRef(self->errors);
- }
- static PyObject *
- textiowrapper_chunk_size_get(textio *self, void *context)
- {
- CHECK_ATTACHED(self);
- return PyLong_FromSsize_t(self->chunk_size);
- }
- static int
- textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
- {
- Py_ssize_t n;
- CHECK_ATTACHED_INT(self);
- if (arg == NULL) {
- PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
- return -1;
- }
- n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
- if (n == -1 && PyErr_Occurred())
- return -1;
- if (n <= 0) {
- PyErr_SetString(PyExc_ValueError,
- "a strictly positive integer is required");
- return -1;
- }
- self->chunk_size = n;
- return 0;
- }
- static PyMethodDef incrementalnewlinedecoder_methods[] = {
- _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
- _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
- _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
- _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
- {NULL}
- };
- static PyGetSetDef incrementalnewlinedecoder_getset[] = {
- {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
- {NULL}
- };
- static PyType_Slot nldecoder_slots[] = {
- {Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
- {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
- {Py_tp_methods, incrementalnewlinedecoder_methods},
- {Py_tp_getset, incrementalnewlinedecoder_getset},
- {Py_tp_traverse, incrementalnewlinedecoder_traverse},
- {Py_tp_clear, incrementalnewlinedecoder_clear},
- {Py_tp_init, _io_IncrementalNewlineDecoder___init__},
- {0, NULL},
- };
- PyType_Spec nldecoder_spec = {
- .name = "_io.IncrementalNewlineDecoder",
- .basicsize = sizeof(nldecoder_object),
- .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
- Py_TPFLAGS_IMMUTABLETYPE),
- .slots = nldecoder_slots,
- };
- static PyMethodDef textiowrapper_methods[] = {
- _IO_TEXTIOWRAPPER_DETACH_METHODDEF
- _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
- _IO_TEXTIOWRAPPER_WRITE_METHODDEF
- _IO_TEXTIOWRAPPER_READ_METHODDEF
- _IO_TEXTIOWRAPPER_READLINE_METHODDEF
- _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
- _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
- _IO_TEXTIOWRAPPER_FILENO_METHODDEF
- _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
- _IO_TEXTIOWRAPPER_READABLE_METHODDEF
- _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
- _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
- _IO_TEXTIOWRAPPER_SEEK_METHODDEF
- _IO_TEXTIOWRAPPER_TELL_METHODDEF
- _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
- {"__reduce__", _PyIOBase_cannot_pickle, METH_VARARGS},
- {"__reduce_ex__", _PyIOBase_cannot_pickle, METH_VARARGS},
- {NULL, NULL}
- };
- static PyMemberDef textiowrapper_members[] = {
- {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
- {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
- {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
- {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
- {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
- {"__weaklistoffset__", T_PYSSIZET, offsetof(textio, weakreflist), READONLY},
- {"__dictoffset__", T_PYSSIZET, offsetof(textio, dict), READONLY},
- {NULL}
- };
- static PyGetSetDef textiowrapper_getset[] = {
- {"name", (getter)textiowrapper_name_get, NULL, NULL},
- {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
- /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
- */
- {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
- {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
- {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
- (setter)textiowrapper_chunk_size_set, NULL},
- {NULL}
- };
- PyType_Slot textiowrapper_slots[] = {
- {Py_tp_dealloc, textiowrapper_dealloc},
- {Py_tp_repr, textiowrapper_repr},
- {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
- {Py_tp_traverse, textiowrapper_traverse},
- {Py_tp_clear, textiowrapper_clear},
- {Py_tp_iternext, textiowrapper_iternext},
- {Py_tp_methods, textiowrapper_methods},
- {Py_tp_members, textiowrapper_members},
- {Py_tp_getset, textiowrapper_getset},
- {Py_tp_init, _io_TextIOWrapper___init__},
- {0, NULL},
- };
- PyType_Spec textiowrapper_spec = {
- .name = "_io.TextIOWrapper",
- .basicsize = sizeof(textio),
- .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
- Py_TPFLAGS_IMMUTABLETYPE),
- .slots = textiowrapper_slots,
- };
|