textio.c 99 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365
  1. /*
  2. An implementation of Text I/O as defined by PEP 3116 - "New I/O"
  3. Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
  4. Written by Amaury Forgeot d'Arc and Antoine Pitrou
  5. */
  6. #define PY_SSIZE_T_CLEAN
  7. #include "Python.h"
  8. #include "pycore_interp.h" // PyInterpreterState.fs_codec
  9. #include "pycore_long.h" // _PyLong_GetZero()
  10. #include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
  11. #include "pycore_object.h"
  12. #include "pycore_pystate.h" // _PyInterpreterState_GET()
  13. #include "structmember.h" // PyMemberDef
  14. #include "_iomodule.h"
  15. /*[clinic input]
  16. module _io
  17. class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
  18. class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
  19. class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
  20. [clinic start generated code]*/
  21. /*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
  22. typedef struct nldecoder_object nldecoder_object;
  23. typedef struct textio textio;
  24. #define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
  25. #include "clinic/textio.c.h"
  26. #undef clinic_state
  27. /* TextIOBase */
  28. PyDoc_STRVAR(textiobase_doc,
  29. "Base class for text I/O.\n"
  30. "\n"
  31. "This class provides a character and line based interface to stream\n"
  32. "I/O. There is no readinto method because Python's character strings\n"
  33. "are immutable.\n"
  34. );
  35. static PyObject *
  36. _unsupported(_PyIO_State *state, const char *message)
  37. {
  38. PyErr_SetString(state->unsupported_operation, message);
  39. return NULL;
  40. }
  41. /*[clinic input]
  42. _io._TextIOBase.detach
  43. cls: defining_class
  44. /
  45. Separate the underlying buffer from the TextIOBase and return it.
  46. After the underlying buffer has been detached, the TextIO is in an unusable state.
  47. [clinic start generated code]*/
  48. static PyObject *
  49. _io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
  50. /*[clinic end generated code: output=50915f40c609eaa4 input=987ca3640d0a3776]*/
  51. {
  52. _PyIO_State *state = get_io_state_by_cls(cls);
  53. return _unsupported(state, "detach");
  54. }
  55. /*[clinic input]
  56. _io._TextIOBase.read
  57. cls: defining_class
  58. size: int(unused=True) = -1
  59. /
  60. Read at most size characters from stream.
  61. Read from underlying buffer until we have size characters or we hit EOF.
  62. If size is negative or omitted, read until EOF.
  63. [clinic start generated code]*/
  64. static PyObject *
  65. _io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
  66. int Py_UNUSED(size))
  67. /*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/
  68. {
  69. _PyIO_State *state = get_io_state_by_cls(cls);
  70. return _unsupported(state, "read");
  71. }
  72. /*[clinic input]
  73. _io._TextIOBase.readline
  74. cls: defining_class
  75. size: int(unused=True) = -1
  76. /
  77. Read until newline or EOF.
  78. Return an empty string if EOF is hit immediately.
  79. If size is specified, at most size characters will be read.
  80. [clinic start generated code]*/
  81. static PyObject *
  82. _io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
  83. int Py_UNUSED(size))
  84. /*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
  85. {
  86. _PyIO_State *state = get_io_state_by_cls(cls);
  87. return _unsupported(state, "readline");
  88. }
  89. /*[clinic input]
  90. _io._TextIOBase.write
  91. cls: defining_class
  92. s: str(unused=True)
  93. /
  94. Write string s to stream.
  95. Return the number of characters written
  96. (which is always equal to the length of the string).
  97. [clinic start generated code]*/
  98. static PyObject *
  99. _io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
  100. const char *Py_UNUSED(s))
  101. /*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
  102. {
  103. _PyIO_State *state = get_io_state_by_cls(cls);
  104. return _unsupported(state, "write");
  105. }
  106. PyDoc_STRVAR(textiobase_encoding_doc,
  107. "Encoding of the text stream.\n"
  108. "\n"
  109. "Subclasses should override.\n"
  110. );
  111. static PyObject *
  112. textiobase_encoding_get(PyObject *self, void *context)
  113. {
  114. Py_RETURN_NONE;
  115. }
  116. PyDoc_STRVAR(textiobase_newlines_doc,
  117. "Line endings translated so far.\n"
  118. "\n"
  119. "Only line endings translated during reading are considered.\n"
  120. "\n"
  121. "Subclasses should override.\n"
  122. );
  123. static PyObject *
  124. textiobase_newlines_get(PyObject *self, void *context)
  125. {
  126. Py_RETURN_NONE;
  127. }
  128. PyDoc_STRVAR(textiobase_errors_doc,
  129. "The error setting of the decoder or encoder.\n"
  130. "\n"
  131. "Subclasses should override.\n"
  132. );
  133. static PyObject *
  134. textiobase_errors_get(PyObject *self, void *context)
  135. {
  136. Py_RETURN_NONE;
  137. }
  138. static PyMethodDef textiobase_methods[] = {
  139. _IO__TEXTIOBASE_DETACH_METHODDEF
  140. _IO__TEXTIOBASE_READ_METHODDEF
  141. _IO__TEXTIOBASE_READLINE_METHODDEF
  142. _IO__TEXTIOBASE_WRITE_METHODDEF
  143. {NULL, NULL}
  144. };
  145. static PyGetSetDef textiobase_getset[] = {
  146. {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
  147. {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
  148. {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
  149. {NULL}
  150. };
  151. static PyType_Slot textiobase_slots[] = {
  152. {Py_tp_doc, (void *)textiobase_doc},
  153. {Py_tp_methods, textiobase_methods},
  154. {Py_tp_getset, textiobase_getset},
  155. {0, NULL},
  156. };
  157. /* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
  158. PyType_Spec textiobase_spec = {
  159. .name = "_io._TextIOBase",
  160. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
  161. Py_TPFLAGS_IMMUTABLETYPE),
  162. .slots = textiobase_slots,
  163. };
  164. /* IncrementalNewlineDecoder */
  165. struct nldecoder_object {
  166. PyObject_HEAD
  167. PyObject *decoder;
  168. PyObject *errors;
  169. unsigned int pendingcr: 1;
  170. unsigned int translate: 1;
  171. unsigned int seennl: 3;
  172. };
  173. /*[clinic input]
  174. _io.IncrementalNewlineDecoder.__init__
  175. decoder: object
  176. translate: bool
  177. errors: object(c_default="NULL") = "strict"
  178. Codec used when reading a file in universal newlines mode.
  179. It wraps another incremental decoder, translating \r\n and \r into \n.
  180. It also records the types of newlines encountered. When used with
  181. translate=False, it ensures that the newline sequence is returned in
  182. one piece. When used with decoder=None, it expects unicode strings as
  183. decode input and translates newlines without first invoking an external
  184. decoder.
  185. [clinic start generated code]*/
  186. static int
  187. _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
  188. PyObject *decoder, int translate,
  189. PyObject *errors)
  190. /*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
  191. {
  192. if (errors == NULL) {
  193. errors = Py_NewRef(&_Py_ID(strict));
  194. }
  195. else {
  196. errors = Py_NewRef(errors);
  197. }
  198. Py_XSETREF(self->errors, errors);
  199. Py_XSETREF(self->decoder, Py_NewRef(decoder));
  200. self->translate = translate ? 1 : 0;
  201. self->seennl = 0;
  202. self->pendingcr = 0;
  203. return 0;
  204. }
  205. static int
  206. incrementalnewlinedecoder_traverse(nldecoder_object *self, visitproc visit,
  207. void *arg)
  208. {
  209. Py_VISIT(Py_TYPE(self));
  210. Py_VISIT(self->decoder);
  211. Py_VISIT(self->errors);
  212. return 0;
  213. }
  214. static int
  215. incrementalnewlinedecoder_clear(nldecoder_object *self)
  216. {
  217. Py_CLEAR(self->decoder);
  218. Py_CLEAR(self->errors);
  219. return 0;
  220. }
  221. static void
  222. incrementalnewlinedecoder_dealloc(nldecoder_object *self)
  223. {
  224. PyTypeObject *tp = Py_TYPE(self);
  225. _PyObject_GC_UNTRACK(self);
  226. (void)incrementalnewlinedecoder_clear(self);
  227. tp->tp_free((PyObject *)self);
  228. Py_DECREF(tp);
  229. }
  230. static int
  231. check_decoded(PyObject *decoded)
  232. {
  233. if (decoded == NULL)
  234. return -1;
  235. if (!PyUnicode_Check(decoded)) {
  236. PyErr_Format(PyExc_TypeError,
  237. "decoder should return a string result, not '%.200s'",
  238. Py_TYPE(decoded)->tp_name);
  239. Py_DECREF(decoded);
  240. return -1;
  241. }
  242. if (PyUnicode_READY(decoded) < 0) {
  243. Py_DECREF(decoded);
  244. return -1;
  245. }
  246. return 0;
  247. }
  248. #define CHECK_INITIALIZED_DECODER(self) \
  249. if (self->errors == NULL) { \
  250. PyErr_SetString(PyExc_ValueError, \
  251. "IncrementalNewlineDecoder.__init__() not called"); \
  252. return NULL; \
  253. }
  254. #define SEEN_CR 1
  255. #define SEEN_LF 2
  256. #define SEEN_CRLF 4
  257. #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
  258. PyObject *
  259. _PyIncrementalNewlineDecoder_decode(PyObject *myself,
  260. PyObject *input, int final)
  261. {
  262. PyObject *output;
  263. Py_ssize_t output_len;
  264. nldecoder_object *self = (nldecoder_object *) myself;
  265. CHECK_INITIALIZED_DECODER(self);
  266. /* decode input (with the eventual \r from a previous pass) */
  267. if (self->decoder != Py_None) {
  268. output = PyObject_CallMethodObjArgs(self->decoder,
  269. &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
  270. }
  271. else {
  272. output = Py_NewRef(input);
  273. }
  274. if (check_decoded(output) < 0)
  275. return NULL;
  276. output_len = PyUnicode_GET_LENGTH(output);
  277. if (self->pendingcr && (final || output_len > 0)) {
  278. /* Prefix output with CR */
  279. int kind;
  280. PyObject *modified;
  281. char *out;
  282. modified = PyUnicode_New(output_len + 1,
  283. PyUnicode_MAX_CHAR_VALUE(output));
  284. if (modified == NULL)
  285. goto error;
  286. kind = PyUnicode_KIND(modified);
  287. out = PyUnicode_DATA(modified);
  288. PyUnicode_WRITE(kind, out, 0, '\r');
  289. memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
  290. Py_SETREF(output, modified); /* output remains ready */
  291. self->pendingcr = 0;
  292. output_len++;
  293. }
  294. /* retain last \r even when not translating data:
  295. * then readline() is sure to get \r\n in one pass
  296. */
  297. if (!final) {
  298. if (output_len > 0
  299. && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
  300. {
  301. PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
  302. if (modified == NULL)
  303. goto error;
  304. Py_SETREF(output, modified);
  305. self->pendingcr = 1;
  306. }
  307. }
  308. /* Record which newlines are read and do newline translation if desired,
  309. all in one pass. */
  310. {
  311. const void *in_str;
  312. Py_ssize_t len;
  313. int seennl = self->seennl;
  314. int only_lf = 0;
  315. int kind;
  316. in_str = PyUnicode_DATA(output);
  317. len = PyUnicode_GET_LENGTH(output);
  318. kind = PyUnicode_KIND(output);
  319. if (len == 0)
  320. return output;
  321. /* If, up to now, newlines are consistently \n, do a quick check
  322. for the \r *byte* with the libc's optimized memchr.
  323. */
  324. if (seennl == SEEN_LF || seennl == 0) {
  325. only_lf = (memchr(in_str, '\r', kind * len) == NULL);
  326. }
  327. if (only_lf) {
  328. /* If not already seen, quick scan for a possible "\n" character.
  329. (there's nothing else to be done, even when in translation mode)
  330. */
  331. if (seennl == 0 &&
  332. memchr(in_str, '\n', kind * len) != NULL) {
  333. if (kind == PyUnicode_1BYTE_KIND)
  334. seennl |= SEEN_LF;
  335. else {
  336. Py_ssize_t i = 0;
  337. for (;;) {
  338. Py_UCS4 c;
  339. /* Fast loop for non-control characters */
  340. while (PyUnicode_READ(kind, in_str, i) > '\n')
  341. i++;
  342. c = PyUnicode_READ(kind, in_str, i++);
  343. if (c == '\n') {
  344. seennl |= SEEN_LF;
  345. break;
  346. }
  347. if (i >= len)
  348. break;
  349. }
  350. }
  351. }
  352. /* Finished: we have scanned for newlines, and none of them
  353. need translating */
  354. }
  355. else if (!self->translate) {
  356. Py_ssize_t i = 0;
  357. /* We have already seen all newline types, no need to scan again */
  358. if (seennl == SEEN_ALL)
  359. goto endscan;
  360. for (;;) {
  361. Py_UCS4 c;
  362. /* Fast loop for non-control characters */
  363. while (PyUnicode_READ(kind, in_str, i) > '\r')
  364. i++;
  365. c = PyUnicode_READ(kind, in_str, i++);
  366. if (c == '\n')
  367. seennl |= SEEN_LF;
  368. else if (c == '\r') {
  369. if (PyUnicode_READ(kind, in_str, i) == '\n') {
  370. seennl |= SEEN_CRLF;
  371. i++;
  372. }
  373. else
  374. seennl |= SEEN_CR;
  375. }
  376. if (i >= len)
  377. break;
  378. if (seennl == SEEN_ALL)
  379. break;
  380. }
  381. endscan:
  382. ;
  383. }
  384. else {
  385. void *translated;
  386. int kind = PyUnicode_KIND(output);
  387. const void *in_str = PyUnicode_DATA(output);
  388. Py_ssize_t in, out;
  389. /* XXX: Previous in-place translation here is disabled as
  390. resizing is not possible anymore */
  391. /* We could try to optimize this so that we only do a copy
  392. when there is something to translate. On the other hand,
  393. we already know there is a \r byte, so chances are high
  394. that something needs to be done. */
  395. translated = PyMem_Malloc(kind * len);
  396. if (translated == NULL) {
  397. PyErr_NoMemory();
  398. goto error;
  399. }
  400. in = out = 0;
  401. for (;;) {
  402. Py_UCS4 c;
  403. /* Fast loop for non-control characters */
  404. while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
  405. PyUnicode_WRITE(kind, translated, out++, c);
  406. if (c == '\n') {
  407. PyUnicode_WRITE(kind, translated, out++, c);
  408. seennl |= SEEN_LF;
  409. continue;
  410. }
  411. if (c == '\r') {
  412. if (PyUnicode_READ(kind, in_str, in) == '\n') {
  413. in++;
  414. seennl |= SEEN_CRLF;
  415. }
  416. else
  417. seennl |= SEEN_CR;
  418. PyUnicode_WRITE(kind, translated, out++, '\n');
  419. continue;
  420. }
  421. if (in > len)
  422. break;
  423. PyUnicode_WRITE(kind, translated, out++, c);
  424. }
  425. Py_DECREF(output);
  426. output = PyUnicode_FromKindAndData(kind, translated, out);
  427. PyMem_Free(translated);
  428. if (!output)
  429. return NULL;
  430. }
  431. self->seennl |= seennl;
  432. }
  433. return output;
  434. error:
  435. Py_DECREF(output);
  436. return NULL;
  437. }
  438. /*[clinic input]
  439. _io.IncrementalNewlineDecoder.decode
  440. input: object
  441. final: bool = False
  442. [clinic start generated code]*/
  443. static PyObject *
  444. _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
  445. PyObject *input, int final)
  446. /*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/
  447. {
  448. return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
  449. }
  450. /*[clinic input]
  451. _io.IncrementalNewlineDecoder.getstate
  452. [clinic start generated code]*/
  453. static PyObject *
  454. _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
  455. /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
  456. {
  457. PyObject *buffer;
  458. unsigned long long flag;
  459. CHECK_INITIALIZED_DECODER(self);
  460. if (self->decoder != Py_None) {
  461. PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
  462. &_Py_ID(getstate));
  463. if (state == NULL)
  464. return NULL;
  465. if (!PyTuple_Check(state)) {
  466. PyErr_SetString(PyExc_TypeError,
  467. "illegal decoder state");
  468. Py_DECREF(state);
  469. return NULL;
  470. }
  471. if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
  472. &buffer, &flag))
  473. {
  474. Py_DECREF(state);
  475. return NULL;
  476. }
  477. Py_INCREF(buffer);
  478. Py_DECREF(state);
  479. }
  480. else {
  481. buffer = PyBytes_FromString("");
  482. flag = 0;
  483. }
  484. flag <<= 1;
  485. if (self->pendingcr)
  486. flag |= 1;
  487. return Py_BuildValue("NK", buffer, flag);
  488. }
  489. /*[clinic input]
  490. _io.IncrementalNewlineDecoder.setstate
  491. state: object
  492. /
  493. [clinic start generated code]*/
  494. static PyObject *
  495. _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
  496. PyObject *state)
  497. /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
  498. {
  499. PyObject *buffer;
  500. unsigned long long flag;
  501. CHECK_INITIALIZED_DECODER(self);
  502. if (!PyTuple_Check(state)) {
  503. PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
  504. return NULL;
  505. }
  506. if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
  507. &buffer, &flag))
  508. {
  509. return NULL;
  510. }
  511. self->pendingcr = (int) (flag & 1);
  512. flag >>= 1;
  513. if (self->decoder != Py_None) {
  514. return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
  515. "((OK))", buffer, flag);
  516. }
  517. else {
  518. Py_RETURN_NONE;
  519. }
  520. }
  521. /*[clinic input]
  522. _io.IncrementalNewlineDecoder.reset
  523. [clinic start generated code]*/
  524. static PyObject *
  525. _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
  526. /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
  527. {
  528. CHECK_INITIALIZED_DECODER(self);
  529. self->seennl = 0;
  530. self->pendingcr = 0;
  531. if (self->decoder != Py_None)
  532. return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
  533. else
  534. Py_RETURN_NONE;
  535. }
  536. static PyObject *
  537. incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
  538. {
  539. CHECK_INITIALIZED_DECODER(self);
  540. switch (self->seennl) {
  541. case SEEN_CR:
  542. return PyUnicode_FromString("\r");
  543. case SEEN_LF:
  544. return PyUnicode_FromString("\n");
  545. case SEEN_CRLF:
  546. return PyUnicode_FromString("\r\n");
  547. case SEEN_CR | SEEN_LF:
  548. return Py_BuildValue("ss", "\r", "\n");
  549. case SEEN_CR | SEEN_CRLF:
  550. return Py_BuildValue("ss", "\r", "\r\n");
  551. case SEEN_LF | SEEN_CRLF:
  552. return Py_BuildValue("ss", "\n", "\r\n");
  553. case SEEN_CR | SEEN_LF | SEEN_CRLF:
  554. return Py_BuildValue("sss", "\r", "\n", "\r\n");
  555. default:
  556. Py_RETURN_NONE;
  557. }
  558. }
  559. /* TextIOWrapper */
  560. typedef PyObject *
  561. (*encodefunc_t)(PyObject *, PyObject *);
  562. struct textio
  563. {
  564. PyObject_HEAD
  565. int ok; /* initialized? */
  566. int detached;
  567. Py_ssize_t chunk_size;
  568. PyObject *buffer;
  569. PyObject *encoding;
  570. PyObject *encoder;
  571. PyObject *decoder;
  572. PyObject *readnl;
  573. PyObject *errors;
  574. const char *writenl; /* ASCII-encoded; NULL stands for \n */
  575. char line_buffering;
  576. char write_through;
  577. char readuniversal;
  578. char readtranslate;
  579. char writetranslate;
  580. char seekable;
  581. char has_read1;
  582. char telling;
  583. char finalizing;
  584. /* Specialized encoding func (see below) */
  585. encodefunc_t encodefunc;
  586. /* Whether or not it's the start of the stream */
  587. char encoding_start_of_stream;
  588. /* Reads and writes are internally buffered in order to speed things up.
  589. However, any read will first flush the write buffer if itsn't empty.
  590. Please also note that text to be written is first encoded before being
  591. buffered. This is necessary so that encoding errors are immediately
  592. reported to the caller, but it unfortunately means that the
  593. IncrementalEncoder (whose encode() method is always written in Python)
  594. becomes a bottleneck for small writes.
  595. */
  596. PyObject *decoded_chars; /* buffer for text returned from decoder */
  597. Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
  598. PyObject *pending_bytes; // data waiting to be written.
  599. // ascii unicode, bytes, or list of them.
  600. Py_ssize_t pending_bytes_count;
  601. /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
  602. * dec_flags is the second (integer) item of the decoder state and
  603. * next_input is the chunk of input bytes that comes next after the
  604. * snapshot point. We use this to reconstruct decoder states in tell().
  605. */
  606. PyObject *snapshot;
  607. /* Bytes-to-characters ratio for the current chunk. Serves as input for
  608. the heuristic in tell(). */
  609. double b2cratio;
  610. /* Cache raw object if it's a FileIO object */
  611. PyObject *raw;
  612. PyObject *weakreflist;
  613. PyObject *dict;
  614. _PyIO_State *state;
  615. };
  616. static void
  617. textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
  618. /* A couple of specialized cases in order to bypass the slow incremental
  619. encoding methods for the most popular encodings. */
  620. static PyObject *
  621. ascii_encode(textio *self, PyObject *text)
  622. {
  623. return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
  624. }
  625. static PyObject *
  626. utf16be_encode(textio *self, PyObject *text)
  627. {
  628. return _PyUnicode_EncodeUTF16(text,
  629. PyUnicode_AsUTF8(self->errors), 1);
  630. }
  631. static PyObject *
  632. utf16le_encode(textio *self, PyObject *text)
  633. {
  634. return _PyUnicode_EncodeUTF16(text,
  635. PyUnicode_AsUTF8(self->errors), -1);
  636. }
  637. static PyObject *
  638. utf16_encode(textio *self, PyObject *text)
  639. {
  640. if (!self->encoding_start_of_stream) {
  641. /* Skip the BOM and use native byte ordering */
  642. #if PY_BIG_ENDIAN
  643. return utf16be_encode(self, text);
  644. #else
  645. return utf16le_encode(self, text);
  646. #endif
  647. }
  648. return _PyUnicode_EncodeUTF16(text,
  649. PyUnicode_AsUTF8(self->errors), 0);
  650. }
  651. static PyObject *
  652. utf32be_encode(textio *self, PyObject *text)
  653. {
  654. return _PyUnicode_EncodeUTF32(text,
  655. PyUnicode_AsUTF8(self->errors), 1);
  656. }
  657. static PyObject *
  658. utf32le_encode(textio *self, PyObject *text)
  659. {
  660. return _PyUnicode_EncodeUTF32(text,
  661. PyUnicode_AsUTF8(self->errors), -1);
  662. }
  663. static PyObject *
  664. utf32_encode(textio *self, PyObject *text)
  665. {
  666. if (!self->encoding_start_of_stream) {
  667. /* Skip the BOM and use native byte ordering */
  668. #if PY_BIG_ENDIAN
  669. return utf32be_encode(self, text);
  670. #else
  671. return utf32le_encode(self, text);
  672. #endif
  673. }
  674. return _PyUnicode_EncodeUTF32(text,
  675. PyUnicode_AsUTF8(self->errors), 0);
  676. }
  677. static PyObject *
  678. utf8_encode(textio *self, PyObject *text)
  679. {
  680. return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
  681. }
  682. static PyObject *
  683. latin1_encode(textio *self, PyObject *text)
  684. {
  685. return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
  686. }
  687. // Return true when encoding can be skipped when text is ascii.
  688. static inline int
  689. is_asciicompat_encoding(encodefunc_t f)
  690. {
  691. return f == (encodefunc_t) ascii_encode
  692. || f == (encodefunc_t) latin1_encode
  693. || f == (encodefunc_t) utf8_encode;
  694. }
  695. /* Map normalized encoding names onto the specialized encoding funcs */
  696. typedef struct {
  697. const char *name;
  698. encodefunc_t encodefunc;
  699. } encodefuncentry;
  700. static const encodefuncentry encodefuncs[] = {
  701. {"ascii", (encodefunc_t) ascii_encode},
  702. {"iso8859-1", (encodefunc_t) latin1_encode},
  703. {"utf-8", (encodefunc_t) utf8_encode},
  704. {"utf-16-be", (encodefunc_t) utf16be_encode},
  705. {"utf-16-le", (encodefunc_t) utf16le_encode},
  706. {"utf-16", (encodefunc_t) utf16_encode},
  707. {"utf-32-be", (encodefunc_t) utf32be_encode},
  708. {"utf-32-le", (encodefunc_t) utf32le_encode},
  709. {"utf-32", (encodefunc_t) utf32_encode},
  710. {NULL, NULL}
  711. };
  712. static int
  713. validate_newline(const char *newline)
  714. {
  715. if (newline && newline[0] != '\0'
  716. && !(newline[0] == '\n' && newline[1] == '\0')
  717. && !(newline[0] == '\r' && newline[1] == '\0')
  718. && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
  719. PyErr_Format(PyExc_ValueError,
  720. "illegal newline value: %s", newline);
  721. return -1;
  722. }
  723. return 0;
  724. }
  725. static int
  726. set_newline(textio *self, const char *newline)
  727. {
  728. PyObject *old = self->readnl;
  729. if (newline == NULL) {
  730. self->readnl = NULL;
  731. }
  732. else {
  733. self->readnl = PyUnicode_FromString(newline);
  734. if (self->readnl == NULL) {
  735. self->readnl = old;
  736. return -1;
  737. }
  738. }
  739. self->readuniversal = (newline == NULL || newline[0] == '\0');
  740. self->readtranslate = (newline == NULL);
  741. self->writetranslate = (newline == NULL || newline[0] != '\0');
  742. if (!self->readuniversal && self->readnl != NULL) {
  743. // validate_newline() accepts only ASCII newlines.
  744. assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
  745. self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
  746. if (strcmp(self->writenl, "\n") == 0) {
  747. self->writenl = NULL;
  748. }
  749. }
  750. else {
  751. #ifdef MS_WINDOWS
  752. self->writenl = "\r\n";
  753. #else
  754. self->writenl = NULL;
  755. #endif
  756. }
  757. Py_XDECREF(old);
  758. return 0;
  759. }
  760. static int
  761. _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
  762. const char *errors)
  763. {
  764. PyObject *res;
  765. int r;
  766. res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
  767. if (res == NULL)
  768. return -1;
  769. r = PyObject_IsTrue(res);
  770. Py_DECREF(res);
  771. if (r == -1)
  772. return -1;
  773. if (r != 1)
  774. return 0;
  775. Py_CLEAR(self->decoder);
  776. self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
  777. if (self->decoder == NULL)
  778. return -1;
  779. if (self->readuniversal) {
  780. _PyIO_State *state = self->state;
  781. PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
  782. (PyObject *)state->PyIncrementalNewlineDecoder_Type,
  783. self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
  784. if (incrementalDecoder == NULL)
  785. return -1;
  786. Py_XSETREF(self->decoder, incrementalDecoder);
  787. }
  788. return 0;
  789. }
  790. static PyObject*
  791. _textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
  792. int eof)
  793. {
  794. PyObject *chars;
  795. if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
  796. chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
  797. else
  798. chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
  799. eof ? Py_True : Py_False, NULL);
  800. if (check_decoded(chars) < 0)
  801. // check_decoded already decreases refcount
  802. return NULL;
  803. return chars;
  804. }
  805. static int
  806. _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
  807. const char *errors)
  808. {
  809. PyObject *res;
  810. int r;
  811. res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
  812. if (res == NULL)
  813. return -1;
  814. r = PyObject_IsTrue(res);
  815. Py_DECREF(res);
  816. if (r == -1)
  817. return -1;
  818. if (r != 1)
  819. return 0;
  820. Py_CLEAR(self->encoder);
  821. self->encodefunc = NULL;
  822. self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
  823. if (self->encoder == NULL)
  824. return -1;
  825. /* Get the normalized named of the codec */
  826. if (_PyObject_LookupAttr(codec_info, &_Py_ID(name), &res) < 0) {
  827. return -1;
  828. }
  829. if (res != NULL && PyUnicode_Check(res)) {
  830. const encodefuncentry *e = encodefuncs;
  831. while (e->name != NULL) {
  832. if (_PyUnicode_EqualToASCIIString(res, e->name)) {
  833. self->encodefunc = e->encodefunc;
  834. break;
  835. }
  836. e++;
  837. }
  838. }
  839. Py_XDECREF(res);
  840. return 0;
  841. }
  842. static int
  843. _textiowrapper_fix_encoder_state(textio *self)
  844. {
  845. if (!self->seekable || !self->encoder) {
  846. return 0;
  847. }
  848. self->encoding_start_of_stream = 1;
  849. PyObject *cookieObj = PyObject_CallMethodNoArgs(
  850. self->buffer, &_Py_ID(tell));
  851. if (cookieObj == NULL) {
  852. return -1;
  853. }
  854. int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
  855. Py_DECREF(cookieObj);
  856. if (cmp < 0) {
  857. return -1;
  858. }
  859. if (cmp == 0) {
  860. self->encoding_start_of_stream = 0;
  861. PyObject *res = PyObject_CallMethodOneArg(
  862. self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
  863. if (res == NULL) {
  864. return -1;
  865. }
  866. Py_DECREF(res);
  867. }
  868. return 0;
  869. }
  870. static int
  871. io_check_errors(PyObject *errors)
  872. {
  873. assert(errors != NULL && errors != Py_None);
  874. PyInterpreterState *interp = _PyInterpreterState_GET();
  875. #ifndef Py_DEBUG
  876. /* In release mode, only check in development mode (-X dev) */
  877. if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
  878. return 0;
  879. }
  880. #else
  881. /* Always check in debug mode */
  882. #endif
  883. /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
  884. before_PyUnicode_InitEncodings() is called. */
  885. if (!interp->unicode.fs_codec.encoding) {
  886. return 0;
  887. }
  888. Py_ssize_t name_length;
  889. const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
  890. if (name == NULL) {
  891. return -1;
  892. }
  893. if (strlen(name) != (size_t)name_length) {
  894. PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
  895. return -1;
  896. }
  897. PyObject *handler = PyCodec_LookupError(name);
  898. if (handler != NULL) {
  899. Py_DECREF(handler);
  900. return 0;
  901. }
  902. return -1;
  903. }
  904. /*[clinic input]
  905. _io.TextIOWrapper.__init__
  906. buffer: object
  907. encoding: str(accept={str, NoneType}) = None
  908. errors: object = None
  909. newline: str(accept={str, NoneType}) = None
  910. line_buffering: bool = False
  911. write_through: bool = False
  912. Character and line based layer over a BufferedIOBase object, buffer.
  913. encoding gives the name of the encoding that the stream will be
  914. decoded or encoded with. It defaults to locale.getencoding().
  915. errors determines the strictness of encoding and decoding (see
  916. help(codecs.Codec) or the documentation for codecs.register) and
  917. defaults to "strict".
  918. newline controls how line endings are handled. It can be None, '',
  919. '\n', '\r', and '\r\n'. It works as follows:
  920. * On input, if newline is None, universal newlines mode is
  921. enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
  922. these are translated into '\n' before being returned to the
  923. caller. If it is '', universal newline mode is enabled, but line
  924. endings are returned to the caller untranslated. If it has any of
  925. the other legal values, input lines are only terminated by the given
  926. string, and the line ending is returned to the caller untranslated.
  927. * On output, if newline is None, any '\n' characters written are
  928. translated to the system default line separator, os.linesep. If
  929. newline is '' or '\n', no translation takes place. If newline is any
  930. of the other legal values, any '\n' characters written are translated
  931. to the given string.
  932. If line_buffering is True, a call to flush is implied when a call to
  933. write contains a newline character.
  934. [clinic start generated code]*/
  935. static int
  936. _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
  937. const char *encoding, PyObject *errors,
  938. const char *newline, int line_buffering,
  939. int write_through)
  940. /*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
  941. {
  942. PyObject *raw, *codec_info = NULL;
  943. PyObject *res;
  944. int r;
  945. self->ok = 0;
  946. self->detached = 0;
  947. if (encoding == NULL) {
  948. PyInterpreterState *interp = _PyInterpreterState_GET();
  949. if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
  950. if (PyErr_WarnEx(PyExc_EncodingWarning,
  951. "'encoding' argument not specified", 1)) {
  952. return -1;
  953. }
  954. }
  955. }
  956. if (errors == Py_None) {
  957. errors = &_Py_ID(strict);
  958. }
  959. else if (!PyUnicode_Check(errors)) {
  960. // Check 'errors' argument here because Argument Clinic doesn't support
  961. // 'str(accept={str, NoneType})' converter.
  962. PyErr_Format(
  963. PyExc_TypeError,
  964. "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
  965. Py_TYPE(errors)->tp_name);
  966. return -1;
  967. }
  968. else if (io_check_errors(errors)) {
  969. return -1;
  970. }
  971. Py_ssize_t errors_len;
  972. const char *errors_str = PyUnicode_AsUTF8AndSize(errors, &errors_len);
  973. if (errors_str == NULL) {
  974. return -1;
  975. }
  976. if (strlen(errors_str) != (size_t)errors_len) {
  977. PyErr_SetString(PyExc_ValueError, "embedded null character");
  978. return -1;
  979. }
  980. if (validate_newline(newline) < 0) {
  981. return -1;
  982. }
  983. Py_CLEAR(self->buffer);
  984. Py_CLEAR(self->encoding);
  985. Py_CLEAR(self->encoder);
  986. Py_CLEAR(self->decoder);
  987. Py_CLEAR(self->readnl);
  988. Py_CLEAR(self->decoded_chars);
  989. Py_CLEAR(self->pending_bytes);
  990. Py_CLEAR(self->snapshot);
  991. Py_CLEAR(self->errors);
  992. Py_CLEAR(self->raw);
  993. self->decoded_chars_used = 0;
  994. self->pending_bytes_count = 0;
  995. self->encodefunc = NULL;
  996. self->b2cratio = 0.0;
  997. if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
  998. _Py_DECLARE_STR(utf_8, "utf-8");
  999. self->encoding = Py_NewRef(&_Py_STR(utf_8));
  1000. }
  1001. else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
  1002. self->encoding = _Py_GetLocaleEncodingObject();
  1003. if (self->encoding == NULL) {
  1004. goto error;
  1005. }
  1006. assert(PyUnicode_Check(self->encoding));
  1007. }
  1008. if (self->encoding != NULL) {
  1009. encoding = PyUnicode_AsUTF8(self->encoding);
  1010. if (encoding == NULL)
  1011. goto error;
  1012. }
  1013. else if (encoding != NULL) {
  1014. self->encoding = PyUnicode_FromString(encoding);
  1015. if (self->encoding == NULL)
  1016. goto error;
  1017. }
  1018. else {
  1019. PyErr_SetString(PyExc_OSError,
  1020. "could not determine default encoding");
  1021. goto error;
  1022. }
  1023. /* Check we have been asked for a real text encoding */
  1024. codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
  1025. if (codec_info == NULL) {
  1026. Py_CLEAR(self->encoding);
  1027. goto error;
  1028. }
  1029. /* XXX: Failures beyond this point have the potential to leak elements
  1030. * of the partially constructed object (like self->encoding)
  1031. */
  1032. self->errors = Py_NewRef(errors);
  1033. self->chunk_size = 8192;
  1034. self->line_buffering = line_buffering;
  1035. self->write_through = write_through;
  1036. if (set_newline(self, newline) < 0) {
  1037. goto error;
  1038. }
  1039. self->buffer = Py_NewRef(buffer);
  1040. /* Build the decoder object */
  1041. _PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
  1042. self->state = state;
  1043. if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
  1044. goto error;
  1045. /* Build the encoder object */
  1046. if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
  1047. goto error;
  1048. /* Finished sorting out the codec details */
  1049. Py_CLEAR(codec_info);
  1050. if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
  1051. Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
  1052. Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
  1053. {
  1054. if (_PyObject_LookupAttr(buffer, &_Py_ID(raw), &raw) < 0)
  1055. goto error;
  1056. /* Cache the raw FileIO object to speed up 'closed' checks */
  1057. if (raw != NULL) {
  1058. if (Py_IS_TYPE(raw, state->PyFileIO_Type))
  1059. self->raw = raw;
  1060. else
  1061. Py_DECREF(raw);
  1062. }
  1063. }
  1064. res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
  1065. if (res == NULL)
  1066. goto error;
  1067. r = PyObject_IsTrue(res);
  1068. Py_DECREF(res);
  1069. if (r < 0)
  1070. goto error;
  1071. self->seekable = self->telling = r;
  1072. r = _PyObject_LookupAttr(buffer, &_Py_ID(read1), &res);
  1073. if (r < 0) {
  1074. goto error;
  1075. }
  1076. Py_XDECREF(res);
  1077. self->has_read1 = r;
  1078. self->encoding_start_of_stream = 0;
  1079. if (_textiowrapper_fix_encoder_state(self) < 0) {
  1080. goto error;
  1081. }
  1082. self->ok = 1;
  1083. return 0;
  1084. error:
  1085. Py_XDECREF(codec_info);
  1086. return -1;
  1087. }
  1088. /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
  1089. * -1 on error.
  1090. */
  1091. static int
  1092. convert_optional_bool(PyObject *obj, int default_value)
  1093. {
  1094. long v;
  1095. if (obj == Py_None) {
  1096. v = default_value;
  1097. }
  1098. else {
  1099. v = PyLong_AsLong(obj);
  1100. if (v == -1 && PyErr_Occurred())
  1101. return -1;
  1102. }
  1103. return v != 0;
  1104. }
  1105. static int
  1106. textiowrapper_change_encoding(textio *self, PyObject *encoding,
  1107. PyObject *errors, int newline_changed)
  1108. {
  1109. /* Use existing settings where new settings are not specified */
  1110. if (encoding == Py_None && errors == Py_None && !newline_changed) {
  1111. return 0; // no change
  1112. }
  1113. if (encoding == Py_None) {
  1114. encoding = self->encoding;
  1115. if (errors == Py_None) {
  1116. errors = self->errors;
  1117. }
  1118. Py_INCREF(encoding);
  1119. }
  1120. else {
  1121. if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
  1122. encoding = _Py_GetLocaleEncodingObject();
  1123. if (encoding == NULL) {
  1124. return -1;
  1125. }
  1126. } else {
  1127. Py_INCREF(encoding);
  1128. }
  1129. if (errors == Py_None) {
  1130. errors = &_Py_ID(strict);
  1131. }
  1132. }
  1133. Py_INCREF(errors);
  1134. const char *c_encoding = PyUnicode_AsUTF8(encoding);
  1135. if (c_encoding == NULL) {
  1136. Py_DECREF(encoding);
  1137. Py_DECREF(errors);
  1138. return -1;
  1139. }
  1140. const char *c_errors = PyUnicode_AsUTF8(errors);
  1141. if (c_errors == NULL) {
  1142. Py_DECREF(encoding);
  1143. Py_DECREF(errors);
  1144. return -1;
  1145. }
  1146. // Create new encoder & decoder
  1147. PyObject *codec_info = _PyCodec_LookupTextEncoding(
  1148. c_encoding, "codecs.open()");
  1149. if (codec_info == NULL) {
  1150. Py_DECREF(encoding);
  1151. Py_DECREF(errors);
  1152. return -1;
  1153. }
  1154. if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
  1155. _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
  1156. Py_DECREF(codec_info);
  1157. Py_DECREF(encoding);
  1158. Py_DECREF(errors);
  1159. return -1;
  1160. }
  1161. Py_DECREF(codec_info);
  1162. Py_SETREF(self->encoding, encoding);
  1163. Py_SETREF(self->errors, errors);
  1164. return _textiowrapper_fix_encoder_state(self);
  1165. }
  1166. /*[clinic input]
  1167. _io.TextIOWrapper.reconfigure
  1168. *
  1169. encoding: object = None
  1170. errors: object = None
  1171. newline as newline_obj: object(c_default="NULL") = None
  1172. line_buffering as line_buffering_obj: object = None
  1173. write_through as write_through_obj: object = None
  1174. Reconfigure the text stream with new parameters.
  1175. This also does an implicit stream flush.
  1176. [clinic start generated code]*/
  1177. static PyObject *
  1178. _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
  1179. PyObject *errors, PyObject *newline_obj,
  1180. PyObject *line_buffering_obj,
  1181. PyObject *write_through_obj)
  1182. /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
  1183. {
  1184. int line_buffering;
  1185. int write_through;
  1186. const char *newline = NULL;
  1187. if (encoding != Py_None && !PyUnicode_Check(encoding)) {
  1188. PyErr_Format(PyExc_TypeError,
  1189. "reconfigure() argument 'encoding' must be str or None, not %s",
  1190. Py_TYPE(encoding)->tp_name);
  1191. return NULL;
  1192. }
  1193. if (errors != Py_None && !PyUnicode_Check(errors)) {
  1194. PyErr_Format(PyExc_TypeError,
  1195. "reconfigure() argument 'errors' must be str or None, not %s",
  1196. Py_TYPE(errors)->tp_name);
  1197. return NULL;
  1198. }
  1199. if (newline_obj != NULL && newline_obj != Py_None &&
  1200. !PyUnicode_Check(newline_obj))
  1201. {
  1202. PyErr_Format(PyExc_TypeError,
  1203. "reconfigure() argument 'newline' must be str or None, not %s",
  1204. Py_TYPE(newline_obj)->tp_name);
  1205. return NULL;
  1206. }
  1207. /* Check if something is in the read buffer */
  1208. if (self->decoded_chars != NULL) {
  1209. if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
  1210. _unsupported(self->state,
  1211. "It is not possible to set the encoding or newline "
  1212. "of stream after the first read");
  1213. return NULL;
  1214. }
  1215. }
  1216. if (newline_obj != NULL && newline_obj != Py_None) {
  1217. newline = PyUnicode_AsUTF8(newline_obj);
  1218. if (newline == NULL || validate_newline(newline) < 0) {
  1219. return NULL;
  1220. }
  1221. }
  1222. line_buffering = convert_optional_bool(line_buffering_obj,
  1223. self->line_buffering);
  1224. if (line_buffering < 0) {
  1225. return NULL;
  1226. }
  1227. write_through = convert_optional_bool(write_through_obj,
  1228. self->write_through);
  1229. if (write_through < 0) {
  1230. return NULL;
  1231. }
  1232. PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
  1233. if (res == NULL) {
  1234. return NULL;
  1235. }
  1236. Py_DECREF(res);
  1237. self->b2cratio = 0;
  1238. if (newline_obj != NULL && set_newline(self, newline) < 0) {
  1239. return NULL;
  1240. }
  1241. if (textiowrapper_change_encoding(
  1242. self, encoding, errors, newline_obj != NULL) < 0) {
  1243. return NULL;
  1244. }
  1245. self->line_buffering = line_buffering;
  1246. self->write_through = write_through;
  1247. Py_RETURN_NONE;
  1248. }
  1249. static int
  1250. textiowrapper_clear(textio *self)
  1251. {
  1252. self->ok = 0;
  1253. Py_CLEAR(self->buffer);
  1254. Py_CLEAR(self->encoding);
  1255. Py_CLEAR(self->encoder);
  1256. Py_CLEAR(self->decoder);
  1257. Py_CLEAR(self->readnl);
  1258. Py_CLEAR(self->decoded_chars);
  1259. Py_CLEAR(self->pending_bytes);
  1260. Py_CLEAR(self->snapshot);
  1261. Py_CLEAR(self->errors);
  1262. Py_CLEAR(self->raw);
  1263. Py_CLEAR(self->dict);
  1264. return 0;
  1265. }
  1266. static void
  1267. textiowrapper_dealloc(textio *self)
  1268. {
  1269. PyTypeObject *tp = Py_TYPE(self);
  1270. self->finalizing = 1;
  1271. if (_PyIOBase_finalize((PyObject *) self) < 0)
  1272. return;
  1273. self->ok = 0;
  1274. _PyObject_GC_UNTRACK(self);
  1275. if (self->weakreflist != NULL)
  1276. PyObject_ClearWeakRefs((PyObject *)self);
  1277. (void)textiowrapper_clear(self);
  1278. tp->tp_free((PyObject *)self);
  1279. Py_DECREF(tp);
  1280. }
  1281. static int
  1282. textiowrapper_traverse(textio *self, visitproc visit, void *arg)
  1283. {
  1284. Py_VISIT(Py_TYPE(self));
  1285. Py_VISIT(self->buffer);
  1286. Py_VISIT(self->encoding);
  1287. Py_VISIT(self->encoder);
  1288. Py_VISIT(self->decoder);
  1289. Py_VISIT(self->readnl);
  1290. Py_VISIT(self->decoded_chars);
  1291. Py_VISIT(self->pending_bytes);
  1292. Py_VISIT(self->snapshot);
  1293. Py_VISIT(self->errors);
  1294. Py_VISIT(self->raw);
  1295. Py_VISIT(self->dict);
  1296. return 0;
  1297. }
  1298. static PyObject *
  1299. textiowrapper_closed_get(textio *self, void *context);
  1300. /* This macro takes some shortcuts to make the common case faster. */
  1301. #define CHECK_CLOSED(self) \
  1302. do { \
  1303. int r; \
  1304. PyObject *_res; \
  1305. if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
  1306. if (self->raw != NULL) \
  1307. r = _PyFileIO_closed(self->raw); \
  1308. else { \
  1309. _res = textiowrapper_closed_get(self, NULL); \
  1310. if (_res == NULL) \
  1311. return NULL; \
  1312. r = PyObject_IsTrue(_res); \
  1313. Py_DECREF(_res); \
  1314. if (r < 0) \
  1315. return NULL; \
  1316. } \
  1317. if (r > 0) { \
  1318. PyErr_SetString(PyExc_ValueError, \
  1319. "I/O operation on closed file."); \
  1320. return NULL; \
  1321. } \
  1322. } \
  1323. else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
  1324. return NULL; \
  1325. } while (0)
  1326. #define CHECK_INITIALIZED(self) \
  1327. if (self->ok <= 0) { \
  1328. PyErr_SetString(PyExc_ValueError, \
  1329. "I/O operation on uninitialized object"); \
  1330. return NULL; \
  1331. }
  1332. #define CHECK_ATTACHED(self) \
  1333. CHECK_INITIALIZED(self); \
  1334. if (self->detached) { \
  1335. PyErr_SetString(PyExc_ValueError, \
  1336. "underlying buffer has been detached"); \
  1337. return NULL; \
  1338. }
  1339. #define CHECK_ATTACHED_INT(self) \
  1340. if (self->ok <= 0) { \
  1341. PyErr_SetString(PyExc_ValueError, \
  1342. "I/O operation on uninitialized object"); \
  1343. return -1; \
  1344. } else if (self->detached) { \
  1345. PyErr_SetString(PyExc_ValueError, \
  1346. "underlying buffer has been detached"); \
  1347. return -1; \
  1348. }
  1349. /*[clinic input]
  1350. _io.TextIOWrapper.detach
  1351. [clinic start generated code]*/
  1352. static PyObject *
  1353. _io_TextIOWrapper_detach_impl(textio *self)
  1354. /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
  1355. {
  1356. PyObject *buffer, *res;
  1357. CHECK_ATTACHED(self);
  1358. res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
  1359. if (res == NULL)
  1360. return NULL;
  1361. Py_DECREF(res);
  1362. buffer = self->buffer;
  1363. self->buffer = NULL;
  1364. self->detached = 1;
  1365. return buffer;
  1366. }
  1367. /* Flush the internal write buffer. This doesn't explicitly flush the
  1368. underlying buffered object, though. */
  1369. static int
  1370. _textiowrapper_writeflush(textio *self)
  1371. {
  1372. if (self->pending_bytes == NULL)
  1373. return 0;
  1374. PyObject *pending = self->pending_bytes;
  1375. PyObject *b;
  1376. if (PyBytes_Check(pending)) {
  1377. b = Py_NewRef(pending);
  1378. }
  1379. else if (PyUnicode_Check(pending)) {
  1380. assert(PyUnicode_IS_ASCII(pending));
  1381. assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
  1382. b = PyBytes_FromStringAndSize(
  1383. PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
  1384. if (b == NULL) {
  1385. return -1;
  1386. }
  1387. }
  1388. else {
  1389. assert(PyList_Check(pending));
  1390. b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
  1391. if (b == NULL) {
  1392. return -1;
  1393. }
  1394. char *buf = PyBytes_AsString(b);
  1395. Py_ssize_t pos = 0;
  1396. for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
  1397. PyObject *obj = PyList_GET_ITEM(pending, i);
  1398. char *src;
  1399. Py_ssize_t len;
  1400. if (PyUnicode_Check(obj)) {
  1401. assert(PyUnicode_IS_ASCII(obj));
  1402. src = PyUnicode_DATA(obj);
  1403. len = PyUnicode_GET_LENGTH(obj);
  1404. }
  1405. else {
  1406. assert(PyBytes_Check(obj));
  1407. if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
  1408. Py_DECREF(b);
  1409. return -1;
  1410. }
  1411. }
  1412. memcpy(buf + pos, src, len);
  1413. pos += len;
  1414. }
  1415. assert(pos == self->pending_bytes_count);
  1416. }
  1417. self->pending_bytes_count = 0;
  1418. self->pending_bytes = NULL;
  1419. Py_DECREF(pending);
  1420. PyObject *ret;
  1421. do {
  1422. ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
  1423. } while (ret == NULL && _PyIO_trap_eintr());
  1424. Py_DECREF(b);
  1425. // NOTE: We cleared buffer but we don't know how many bytes are actually written
  1426. // when an error occurred.
  1427. if (ret == NULL)
  1428. return -1;
  1429. Py_DECREF(ret);
  1430. return 0;
  1431. }
  1432. /*[clinic input]
  1433. _io.TextIOWrapper.write
  1434. text: unicode
  1435. /
  1436. [clinic start generated code]*/
  1437. static PyObject *
  1438. _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
  1439. /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
  1440. {
  1441. PyObject *ret;
  1442. PyObject *b;
  1443. Py_ssize_t textlen;
  1444. int haslf = 0;
  1445. int needflush = 0, text_needflush = 0;
  1446. if (PyUnicode_READY(text) == -1)
  1447. return NULL;
  1448. CHECK_ATTACHED(self);
  1449. CHECK_CLOSED(self);
  1450. if (self->encoder == NULL) {
  1451. return _unsupported(self->state, "not writable");
  1452. }
  1453. Py_INCREF(text);
  1454. textlen = PyUnicode_GET_LENGTH(text);
  1455. if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
  1456. if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
  1457. haslf = 1;
  1458. if (haslf && self->writetranslate && self->writenl != NULL) {
  1459. PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
  1460. "ss", "\n", self->writenl);
  1461. Py_DECREF(text);
  1462. if (newtext == NULL)
  1463. return NULL;
  1464. text = newtext;
  1465. }
  1466. if (self->write_through)
  1467. text_needflush = 1;
  1468. if (self->line_buffering &&
  1469. (haslf ||
  1470. PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
  1471. needflush = 1;
  1472. /* XXX What if we were just reading? */
  1473. if (self->encodefunc != NULL) {
  1474. if (PyUnicode_IS_ASCII(text) &&
  1475. // See bpo-43260
  1476. PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
  1477. is_asciicompat_encoding(self->encodefunc)) {
  1478. b = Py_NewRef(text);
  1479. }
  1480. else {
  1481. b = (*self->encodefunc)((PyObject *) self, text);
  1482. }
  1483. self->encoding_start_of_stream = 0;
  1484. }
  1485. else {
  1486. b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
  1487. }
  1488. Py_DECREF(text);
  1489. if (b == NULL)
  1490. return NULL;
  1491. if (b != text && !PyBytes_Check(b)) {
  1492. PyErr_Format(PyExc_TypeError,
  1493. "encoder should return a bytes object, not '%.200s'",
  1494. Py_TYPE(b)->tp_name);
  1495. Py_DECREF(b);
  1496. return NULL;
  1497. }
  1498. Py_ssize_t bytes_len;
  1499. if (b == text) {
  1500. bytes_len = PyUnicode_GET_LENGTH(b);
  1501. }
  1502. else {
  1503. bytes_len = PyBytes_GET_SIZE(b);
  1504. }
  1505. // We should avoid concatinating huge data.
  1506. // Flush the buffer before adding b to the buffer if b is not small.
  1507. // https://github.com/python/cpython/issues/87426
  1508. if (bytes_len >= self->chunk_size) {
  1509. // _textiowrapper_writeflush() calls buffer.write().
  1510. // self->pending_bytes can be appended during buffer->write()
  1511. // or other thread.
  1512. // We need to loop until buffer becomes empty.
  1513. // https://github.com/python/cpython/issues/118138
  1514. // https://github.com/python/cpython/issues/119506
  1515. while (self->pending_bytes != NULL) {
  1516. if (_textiowrapper_writeflush(self) < 0) {
  1517. Py_DECREF(b);
  1518. return NULL;
  1519. }
  1520. }
  1521. }
  1522. if (self->pending_bytes == NULL) {
  1523. assert(self->pending_bytes_count == 0);
  1524. self->pending_bytes = b;
  1525. }
  1526. else if (!PyList_CheckExact(self->pending_bytes)) {
  1527. PyObject *list = PyList_New(2);
  1528. if (list == NULL) {
  1529. Py_DECREF(b);
  1530. return NULL;
  1531. }
  1532. // Since Python 3.12, allocating GC object won't trigger GC and release
  1533. // GIL. See https://github.com/python/cpython/issues/97922
  1534. assert(!PyList_CheckExact(self->pending_bytes));
  1535. PyList_SET_ITEM(list, 0, self->pending_bytes);
  1536. PyList_SET_ITEM(list, 1, b);
  1537. self->pending_bytes = list;
  1538. }
  1539. else {
  1540. if (PyList_Append(self->pending_bytes, b) < 0) {
  1541. Py_DECREF(b);
  1542. return NULL;
  1543. }
  1544. Py_DECREF(b);
  1545. }
  1546. self->pending_bytes_count += bytes_len;
  1547. if (self->pending_bytes_count >= self->chunk_size || needflush ||
  1548. text_needflush) {
  1549. if (_textiowrapper_writeflush(self) < 0)
  1550. return NULL;
  1551. }
  1552. if (needflush) {
  1553. ret = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
  1554. if (ret == NULL)
  1555. return NULL;
  1556. Py_DECREF(ret);
  1557. }
  1558. if (self->snapshot != NULL) {
  1559. textiowrapper_set_decoded_chars(self, NULL);
  1560. Py_CLEAR(self->snapshot);
  1561. }
  1562. if (self->decoder) {
  1563. ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
  1564. if (ret == NULL)
  1565. return NULL;
  1566. Py_DECREF(ret);
  1567. }
  1568. return PyLong_FromSsize_t(textlen);
  1569. }
  1570. /* Steal a reference to chars and store it in the decoded_char buffer;
  1571. */
  1572. static void
  1573. textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
  1574. {
  1575. Py_XSETREF(self->decoded_chars, chars);
  1576. self->decoded_chars_used = 0;
  1577. }
  1578. static PyObject *
  1579. textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
  1580. {
  1581. PyObject *chars;
  1582. Py_ssize_t avail;
  1583. if (self->decoded_chars == NULL)
  1584. return PyUnicode_FromStringAndSize(NULL, 0);
  1585. /* decoded_chars is guaranteed to be "ready". */
  1586. avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
  1587. - self->decoded_chars_used);
  1588. assert(avail >= 0);
  1589. if (n < 0 || n > avail)
  1590. n = avail;
  1591. if (self->decoded_chars_used > 0 || n < avail) {
  1592. chars = PyUnicode_Substring(self->decoded_chars,
  1593. self->decoded_chars_used,
  1594. self->decoded_chars_used + n);
  1595. if (chars == NULL)
  1596. return NULL;
  1597. }
  1598. else {
  1599. chars = Py_NewRef(self->decoded_chars);
  1600. }
  1601. self->decoded_chars_used += n;
  1602. return chars;
  1603. }
  1604. /* Read and decode the next chunk of data from the BufferedReader.
  1605. */
  1606. static int
  1607. textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
  1608. {
  1609. PyObject *dec_buffer = NULL;
  1610. PyObject *dec_flags = NULL;
  1611. PyObject *input_chunk = NULL;
  1612. Py_buffer input_chunk_buf;
  1613. PyObject *decoded_chars, *chunk_size;
  1614. Py_ssize_t nbytes, nchars;
  1615. int eof;
  1616. /* The return value is True unless EOF was reached. The decoded string is
  1617. * placed in self._decoded_chars (replacing its previous value). The
  1618. * entire input chunk is sent to the decoder, though some of it may remain
  1619. * buffered in the decoder, yet to be converted.
  1620. */
  1621. if (self->decoder == NULL) {
  1622. _unsupported(self->state, "not readable");
  1623. return -1;
  1624. }
  1625. if (self->telling) {
  1626. /* To prepare for tell(), we need to snapshot a point in the file
  1627. * where the decoder's input buffer is empty.
  1628. */
  1629. PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
  1630. &_Py_ID(getstate));
  1631. if (state == NULL)
  1632. return -1;
  1633. /* Given this, we know there was a valid snapshot point
  1634. * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
  1635. */
  1636. if (!PyTuple_Check(state)) {
  1637. PyErr_SetString(PyExc_TypeError,
  1638. "illegal decoder state");
  1639. Py_DECREF(state);
  1640. return -1;
  1641. }
  1642. if (!PyArg_ParseTuple(state,
  1643. "OO;illegal decoder state", &dec_buffer, &dec_flags))
  1644. {
  1645. Py_DECREF(state);
  1646. return -1;
  1647. }
  1648. if (!PyBytes_Check(dec_buffer)) {
  1649. PyErr_Format(PyExc_TypeError,
  1650. "illegal decoder state: the first item should be a "
  1651. "bytes object, not '%.200s'",
  1652. Py_TYPE(dec_buffer)->tp_name);
  1653. Py_DECREF(state);
  1654. return -1;
  1655. }
  1656. Py_INCREF(dec_buffer);
  1657. Py_INCREF(dec_flags);
  1658. Py_DECREF(state);
  1659. }
  1660. /* Read a chunk, decode it, and put the result in self._decoded_chars. */
  1661. if (size_hint > 0) {
  1662. size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
  1663. }
  1664. chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
  1665. if (chunk_size == NULL)
  1666. goto fail;
  1667. input_chunk = PyObject_CallMethodOneArg(self->buffer,
  1668. (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
  1669. chunk_size);
  1670. Py_DECREF(chunk_size);
  1671. if (input_chunk == NULL)
  1672. goto fail;
  1673. if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
  1674. PyErr_Format(PyExc_TypeError,
  1675. "underlying %s() should have returned a bytes-like object, "
  1676. "not '%.200s'", (self->has_read1 ? "read1": "read"),
  1677. Py_TYPE(input_chunk)->tp_name);
  1678. goto fail;
  1679. }
  1680. nbytes = input_chunk_buf.len;
  1681. eof = (nbytes == 0);
  1682. decoded_chars = _textiowrapper_decode(self->state, self->decoder,
  1683. input_chunk, eof);
  1684. PyBuffer_Release(&input_chunk_buf);
  1685. if (decoded_chars == NULL)
  1686. goto fail;
  1687. textiowrapper_set_decoded_chars(self, decoded_chars);
  1688. nchars = PyUnicode_GET_LENGTH(decoded_chars);
  1689. if (nchars > 0)
  1690. self->b2cratio = (double) nbytes / nchars;
  1691. else
  1692. self->b2cratio = 0.0;
  1693. if (nchars > 0)
  1694. eof = 0;
  1695. if (self->telling) {
  1696. /* At the snapshot point, len(dec_buffer) bytes before the read, the
  1697. * next input to be decoded is dec_buffer + input_chunk.
  1698. */
  1699. PyObject *next_input = dec_buffer;
  1700. PyBytes_Concat(&next_input, input_chunk);
  1701. dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
  1702. if (next_input == NULL) {
  1703. goto fail;
  1704. }
  1705. PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
  1706. if (snapshot == NULL) {
  1707. dec_flags = NULL;
  1708. goto fail;
  1709. }
  1710. Py_XSETREF(self->snapshot, snapshot);
  1711. }
  1712. Py_DECREF(input_chunk);
  1713. return (eof == 0);
  1714. fail:
  1715. Py_XDECREF(dec_buffer);
  1716. Py_XDECREF(dec_flags);
  1717. Py_XDECREF(input_chunk);
  1718. return -1;
  1719. }
  1720. /*[clinic input]
  1721. _io.TextIOWrapper.read
  1722. size as n: Py_ssize_t(accept={int, NoneType}) = -1
  1723. /
  1724. [clinic start generated code]*/
  1725. static PyObject *
  1726. _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
  1727. /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
  1728. {
  1729. PyObject *result = NULL, *chunks = NULL;
  1730. CHECK_ATTACHED(self);
  1731. CHECK_CLOSED(self);
  1732. if (self->decoder == NULL) {
  1733. return _unsupported(self->state, "not readable");
  1734. }
  1735. if (_textiowrapper_writeflush(self) < 0)
  1736. return NULL;
  1737. if (n < 0) {
  1738. /* Read everything */
  1739. PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
  1740. PyObject *decoded;
  1741. if (bytes == NULL)
  1742. goto fail;
  1743. _PyIO_State *state = self->state;
  1744. if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
  1745. decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
  1746. bytes, 1);
  1747. else
  1748. decoded = PyObject_CallMethodObjArgs(
  1749. self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
  1750. Py_DECREF(bytes);
  1751. if (check_decoded(decoded) < 0)
  1752. goto fail;
  1753. result = textiowrapper_get_decoded_chars(self, -1);
  1754. if (result == NULL) {
  1755. Py_DECREF(decoded);
  1756. return NULL;
  1757. }
  1758. PyUnicode_AppendAndDel(&result, decoded);
  1759. if (result == NULL)
  1760. goto fail;
  1761. if (self->snapshot != NULL) {
  1762. textiowrapper_set_decoded_chars(self, NULL);
  1763. Py_CLEAR(self->snapshot);
  1764. }
  1765. return result;
  1766. }
  1767. else {
  1768. int res = 1;
  1769. Py_ssize_t remaining = n;
  1770. result = textiowrapper_get_decoded_chars(self, n);
  1771. if (result == NULL)
  1772. goto fail;
  1773. if (PyUnicode_READY(result) == -1)
  1774. goto fail;
  1775. remaining -= PyUnicode_GET_LENGTH(result);
  1776. /* Keep reading chunks until we have n characters to return */
  1777. while (remaining > 0) {
  1778. res = textiowrapper_read_chunk(self, remaining);
  1779. if (res < 0) {
  1780. /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
  1781. when EINTR occurs so we needn't do it ourselves. */
  1782. if (_PyIO_trap_eintr()) {
  1783. continue;
  1784. }
  1785. goto fail;
  1786. }
  1787. if (res == 0) /* EOF */
  1788. break;
  1789. if (chunks == NULL) {
  1790. chunks = PyList_New(0);
  1791. if (chunks == NULL)
  1792. goto fail;
  1793. }
  1794. if (PyUnicode_GET_LENGTH(result) > 0 &&
  1795. PyList_Append(chunks, result) < 0)
  1796. goto fail;
  1797. Py_DECREF(result);
  1798. result = textiowrapper_get_decoded_chars(self, remaining);
  1799. if (result == NULL)
  1800. goto fail;
  1801. remaining -= PyUnicode_GET_LENGTH(result);
  1802. }
  1803. if (chunks != NULL) {
  1804. if (result != NULL && PyList_Append(chunks, result) < 0)
  1805. goto fail;
  1806. _Py_DECLARE_STR(empty, "");
  1807. Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
  1808. if (result == NULL)
  1809. goto fail;
  1810. Py_CLEAR(chunks);
  1811. }
  1812. return result;
  1813. }
  1814. fail:
  1815. Py_XDECREF(result);
  1816. Py_XDECREF(chunks);
  1817. return NULL;
  1818. }
  1819. /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
  1820. that is to the NUL character. Otherwise the function will produce
  1821. incorrect results. */
  1822. static const char *
  1823. find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
  1824. {
  1825. if (kind == PyUnicode_1BYTE_KIND) {
  1826. assert(ch < 256);
  1827. return (char *) memchr((const void *) s, (char) ch, end - s);
  1828. }
  1829. for (;;) {
  1830. while (PyUnicode_READ(kind, s, 0) > ch)
  1831. s += kind;
  1832. if (PyUnicode_READ(kind, s, 0) == ch)
  1833. return s;
  1834. if (s == end)
  1835. return NULL;
  1836. s += kind;
  1837. }
  1838. }
  1839. Py_ssize_t
  1840. _PyIO_find_line_ending(
  1841. int translated, int universal, PyObject *readnl,
  1842. int kind, const char *start, const char *end, Py_ssize_t *consumed)
  1843. {
  1844. Py_ssize_t len = (end - start)/kind;
  1845. if (translated) {
  1846. /* Newlines are already translated, only search for \n */
  1847. const char *pos = find_control_char(kind, start, end, '\n');
  1848. if (pos != NULL)
  1849. return (pos - start)/kind + 1;
  1850. else {
  1851. *consumed = len;
  1852. return -1;
  1853. }
  1854. }
  1855. else if (universal) {
  1856. /* Universal newline search. Find any of \r, \r\n, \n
  1857. * The decoder ensures that \r\n are not split in two pieces
  1858. */
  1859. const char *s = start;
  1860. for (;;) {
  1861. Py_UCS4 ch;
  1862. /* Fast path for non-control chars. The loop always ends
  1863. since the Unicode string is NUL-terminated. */
  1864. while (PyUnicode_READ(kind, s, 0) > '\r')
  1865. s += kind;
  1866. if (s >= end) {
  1867. *consumed = len;
  1868. return -1;
  1869. }
  1870. ch = PyUnicode_READ(kind, s, 0);
  1871. s += kind;
  1872. if (ch == '\n')
  1873. return (s - start)/kind;
  1874. if (ch == '\r') {
  1875. if (PyUnicode_READ(kind, s, 0) == '\n')
  1876. return (s - start)/kind + 1;
  1877. else
  1878. return (s - start)/kind;
  1879. }
  1880. }
  1881. }
  1882. else {
  1883. /* Non-universal mode. */
  1884. Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
  1885. const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
  1886. /* Assume that readnl is an ASCII character. */
  1887. assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
  1888. if (readnl_len == 1) {
  1889. const char *pos = find_control_char(kind, start, end, nl[0]);
  1890. if (pos != NULL)
  1891. return (pos - start)/kind + 1;
  1892. *consumed = len;
  1893. return -1;
  1894. }
  1895. else {
  1896. const char *s = start;
  1897. const char *e = end - (readnl_len - 1)*kind;
  1898. const char *pos;
  1899. if (e < s)
  1900. e = s;
  1901. while (s < e) {
  1902. Py_ssize_t i;
  1903. const char *pos = find_control_char(kind, s, end, nl[0]);
  1904. if (pos == NULL || pos >= e)
  1905. break;
  1906. for (i = 1; i < readnl_len; i++) {
  1907. if (PyUnicode_READ(kind, pos, i) != nl[i])
  1908. break;
  1909. }
  1910. if (i == readnl_len)
  1911. return (pos - start)/kind + readnl_len;
  1912. s = pos + kind;
  1913. }
  1914. pos = find_control_char(kind, e, end, nl[0]);
  1915. if (pos == NULL)
  1916. *consumed = len;
  1917. else
  1918. *consumed = (pos - start)/kind;
  1919. return -1;
  1920. }
  1921. }
  1922. }
  1923. static PyObject *
  1924. _textiowrapper_readline(textio *self, Py_ssize_t limit)
  1925. {
  1926. PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
  1927. Py_ssize_t start, endpos, chunked, offset_to_buffer;
  1928. int res;
  1929. CHECK_CLOSED(self);
  1930. if (_textiowrapper_writeflush(self) < 0)
  1931. return NULL;
  1932. chunked = 0;
  1933. while (1) {
  1934. const char *ptr;
  1935. Py_ssize_t line_len;
  1936. int kind;
  1937. Py_ssize_t consumed = 0;
  1938. /* First, get some data if necessary */
  1939. res = 1;
  1940. while (!self->decoded_chars ||
  1941. !PyUnicode_GET_LENGTH(self->decoded_chars)) {
  1942. res = textiowrapper_read_chunk(self, 0);
  1943. if (res < 0) {
  1944. /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
  1945. when EINTR occurs so we needn't do it ourselves. */
  1946. if (_PyIO_trap_eintr()) {
  1947. continue;
  1948. }
  1949. goto error;
  1950. }
  1951. if (res == 0)
  1952. break;
  1953. }
  1954. if (res == 0) {
  1955. /* end of file */
  1956. textiowrapper_set_decoded_chars(self, NULL);
  1957. Py_CLEAR(self->snapshot);
  1958. start = endpos = offset_to_buffer = 0;
  1959. break;
  1960. }
  1961. if (remaining == NULL) {
  1962. line = Py_NewRef(self->decoded_chars);
  1963. start = self->decoded_chars_used;
  1964. offset_to_buffer = 0;
  1965. }
  1966. else {
  1967. assert(self->decoded_chars_used == 0);
  1968. line = PyUnicode_Concat(remaining, self->decoded_chars);
  1969. start = 0;
  1970. offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
  1971. Py_CLEAR(remaining);
  1972. if (line == NULL)
  1973. goto error;
  1974. if (PyUnicode_READY(line) == -1)
  1975. goto error;
  1976. }
  1977. ptr = PyUnicode_DATA(line);
  1978. line_len = PyUnicode_GET_LENGTH(line);
  1979. kind = PyUnicode_KIND(line);
  1980. endpos = _PyIO_find_line_ending(
  1981. self->readtranslate, self->readuniversal, self->readnl,
  1982. kind,
  1983. ptr + kind * start,
  1984. ptr + kind * line_len,
  1985. &consumed);
  1986. if (endpos >= 0) {
  1987. endpos += start;
  1988. if (limit >= 0 && (endpos - start) + chunked >= limit)
  1989. endpos = start + limit - chunked;
  1990. break;
  1991. }
  1992. /* We can put aside up to `endpos` */
  1993. endpos = consumed + start;
  1994. if (limit >= 0 && (endpos - start) + chunked >= limit) {
  1995. /* Didn't find line ending, but reached length limit */
  1996. endpos = start + limit - chunked;
  1997. break;
  1998. }
  1999. if (endpos > start) {
  2000. /* No line ending seen yet - put aside current data */
  2001. PyObject *s;
  2002. if (chunks == NULL) {
  2003. chunks = PyList_New(0);
  2004. if (chunks == NULL)
  2005. goto error;
  2006. }
  2007. s = PyUnicode_Substring(line, start, endpos);
  2008. if (s == NULL)
  2009. goto error;
  2010. if (PyList_Append(chunks, s) < 0) {
  2011. Py_DECREF(s);
  2012. goto error;
  2013. }
  2014. chunked += PyUnicode_GET_LENGTH(s);
  2015. Py_DECREF(s);
  2016. }
  2017. /* There may be some remaining bytes we'll have to prepend to the
  2018. next chunk of data */
  2019. if (endpos < line_len) {
  2020. remaining = PyUnicode_Substring(line, endpos, line_len);
  2021. if (remaining == NULL)
  2022. goto error;
  2023. }
  2024. Py_CLEAR(line);
  2025. /* We have consumed the buffer */
  2026. textiowrapper_set_decoded_chars(self, NULL);
  2027. }
  2028. if (line != NULL) {
  2029. /* Our line ends in the current buffer */
  2030. self->decoded_chars_used = endpos - offset_to_buffer;
  2031. if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
  2032. PyObject *s = PyUnicode_Substring(line, start, endpos);
  2033. Py_CLEAR(line);
  2034. if (s == NULL)
  2035. goto error;
  2036. line = s;
  2037. }
  2038. }
  2039. if (remaining != NULL) {
  2040. if (chunks == NULL) {
  2041. chunks = PyList_New(0);
  2042. if (chunks == NULL)
  2043. goto error;
  2044. }
  2045. if (PyList_Append(chunks, remaining) < 0)
  2046. goto error;
  2047. Py_CLEAR(remaining);
  2048. }
  2049. if (chunks != NULL) {
  2050. if (line != NULL) {
  2051. if (PyList_Append(chunks, line) < 0)
  2052. goto error;
  2053. Py_DECREF(line);
  2054. }
  2055. line = PyUnicode_Join(&_Py_STR(empty), chunks);
  2056. if (line == NULL)
  2057. goto error;
  2058. Py_CLEAR(chunks);
  2059. }
  2060. if (line == NULL) {
  2061. line = Py_NewRef(&_Py_STR(empty));
  2062. }
  2063. return line;
  2064. error:
  2065. Py_XDECREF(chunks);
  2066. Py_XDECREF(remaining);
  2067. Py_XDECREF(line);
  2068. return NULL;
  2069. }
  2070. /*[clinic input]
  2071. _io.TextIOWrapper.readline
  2072. size: Py_ssize_t = -1
  2073. /
  2074. [clinic start generated code]*/
  2075. static PyObject *
  2076. _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
  2077. /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
  2078. {
  2079. CHECK_ATTACHED(self);
  2080. return _textiowrapper_readline(self, size);
  2081. }
  2082. /* Seek and Tell */
  2083. typedef struct {
  2084. Py_off_t start_pos;
  2085. int dec_flags;
  2086. int bytes_to_feed;
  2087. int chars_to_skip;
  2088. char need_eof;
  2089. } cookie_type;
  2090. /*
  2091. To speed up cookie packing/unpacking, we store the fields in a temporary
  2092. string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
  2093. The following macros define at which offsets in the intermediary byte
  2094. string the various CookieStruct fields will be stored.
  2095. */
  2096. #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
  2097. #if PY_BIG_ENDIAN
  2098. /* We want the least significant byte of start_pos to also be the least
  2099. significant byte of the cookie, which means that in big-endian mode we
  2100. must copy the fields in reverse order. */
  2101. # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
  2102. # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
  2103. # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
  2104. # define OFF_CHARS_TO_SKIP (sizeof(char))
  2105. # define OFF_NEED_EOF 0
  2106. #else
  2107. /* Little-endian mode: the least significant byte of start_pos will
  2108. naturally end up the least significant byte of the cookie. */
  2109. # define OFF_START_POS 0
  2110. # define OFF_DEC_FLAGS (sizeof(Py_off_t))
  2111. # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
  2112. # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
  2113. # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
  2114. #endif
  2115. static int
  2116. textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
  2117. {
  2118. unsigned char buffer[COOKIE_BUF_LEN];
  2119. PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
  2120. if (cookieLong == NULL)
  2121. return -1;
  2122. if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
  2123. PY_LITTLE_ENDIAN, 0) < 0) {
  2124. Py_DECREF(cookieLong);
  2125. return -1;
  2126. }
  2127. Py_DECREF(cookieLong);
  2128. memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
  2129. memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
  2130. memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
  2131. memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
  2132. memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
  2133. return 0;
  2134. }
  2135. static PyObject *
  2136. textiowrapper_build_cookie(cookie_type *cookie)
  2137. {
  2138. unsigned char buffer[COOKIE_BUF_LEN];
  2139. memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
  2140. memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
  2141. memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
  2142. memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
  2143. memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
  2144. return _PyLong_FromByteArray(buffer, sizeof(buffer),
  2145. PY_LITTLE_ENDIAN, 0);
  2146. }
  2147. static int
  2148. _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
  2149. {
  2150. PyObject *res;
  2151. /* When seeking to the start of the stream, we call decoder.reset()
  2152. rather than decoder.getstate().
  2153. This is for a few decoders such as utf-16 for which the state value
  2154. at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
  2155. utf-16, that we are expecting a BOM).
  2156. */
  2157. if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
  2158. res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
  2159. }
  2160. else {
  2161. res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
  2162. "((yi))", "", cookie->dec_flags);
  2163. }
  2164. if (res == NULL) {
  2165. return -1;
  2166. }
  2167. Py_DECREF(res);
  2168. return 0;
  2169. }
  2170. static int
  2171. _textiowrapper_encoder_reset(textio *self, int start_of_stream)
  2172. {
  2173. PyObject *res;
  2174. if (start_of_stream) {
  2175. res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
  2176. self->encoding_start_of_stream = 1;
  2177. }
  2178. else {
  2179. res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
  2180. _PyLong_GetZero());
  2181. self->encoding_start_of_stream = 0;
  2182. }
  2183. if (res == NULL)
  2184. return -1;
  2185. Py_DECREF(res);
  2186. return 0;
  2187. }
  2188. static int
  2189. _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
  2190. {
  2191. /* Same as _textiowrapper_decoder_setstate() above. */
  2192. return _textiowrapper_encoder_reset(
  2193. self, cookie->start_pos == 0 && cookie->dec_flags == 0);
  2194. }
  2195. /*[clinic input]
  2196. _io.TextIOWrapper.seek
  2197. cookie as cookieObj: object
  2198. Zero or an opaque number returned by tell().
  2199. whence: int(c_default='0') = os.SEEK_SET
  2200. The relative position to seek from.
  2201. /
  2202. Set the stream position, and return the new stream position.
  2203. Four operations are supported, given by the following argument
  2204. combinations:
  2205. - seek(0, SEEK_SET): Rewind to the start of the stream.
  2206. - seek(cookie, SEEK_SET): Restore a previous position;
  2207. 'cookie' must be a number returned by tell().
  2208. - seek(0, SEEK_END): Fast-forward to the end of the stream.
  2209. - seek(0, SEEK_CUR): Leave the current stream position unchanged.
  2210. Any other argument combinations are invalid,
  2211. and may raise exceptions.
  2212. [clinic start generated code]*/
  2213. static PyObject *
  2214. _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
  2215. /*[clinic end generated code: output=0a15679764e2d04d input=0f68adcb02cf2823]*/
  2216. {
  2217. PyObject *posobj;
  2218. cookie_type cookie;
  2219. PyObject *res;
  2220. int cmp;
  2221. PyObject *snapshot;
  2222. CHECK_ATTACHED(self);
  2223. CHECK_CLOSED(self);
  2224. Py_INCREF(cookieObj);
  2225. if (!self->seekable) {
  2226. _unsupported(self->state, "underlying stream is not seekable");
  2227. goto fail;
  2228. }
  2229. PyObject *zero = _PyLong_GetZero(); // borrowed reference
  2230. switch (whence) {
  2231. case SEEK_CUR:
  2232. /* seek relative to current position */
  2233. cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
  2234. if (cmp < 0)
  2235. goto fail;
  2236. if (cmp == 0) {
  2237. _unsupported(self->state, "can't do nonzero cur-relative seeks");
  2238. goto fail;
  2239. }
  2240. /* Seeking to the current position should attempt to
  2241. * sync the underlying buffer with the current position.
  2242. */
  2243. Py_DECREF(cookieObj);
  2244. cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
  2245. if (cookieObj == NULL)
  2246. goto fail;
  2247. break;
  2248. case SEEK_END:
  2249. /* seek relative to end of file */
  2250. cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
  2251. if (cmp < 0)
  2252. goto fail;
  2253. if (cmp == 0) {
  2254. _unsupported(self->state, "can't do nonzero end-relative seeks");
  2255. goto fail;
  2256. }
  2257. res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
  2258. if (res == NULL)
  2259. goto fail;
  2260. Py_DECREF(res);
  2261. textiowrapper_set_decoded_chars(self, NULL);
  2262. Py_CLEAR(self->snapshot);
  2263. if (self->decoder) {
  2264. res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
  2265. if (res == NULL)
  2266. goto fail;
  2267. Py_DECREF(res);
  2268. }
  2269. res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
  2270. Py_CLEAR(cookieObj);
  2271. if (res == NULL)
  2272. goto fail;
  2273. if (self->encoder) {
  2274. /* If seek() == 0, we are at the start of stream, otherwise not */
  2275. cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
  2276. if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
  2277. Py_DECREF(res);
  2278. goto fail;
  2279. }
  2280. }
  2281. return res;
  2282. case SEEK_SET:
  2283. break;
  2284. default:
  2285. PyErr_Format(PyExc_ValueError,
  2286. "invalid whence (%d, should be %d, %d or %d)", whence,
  2287. SEEK_SET, SEEK_CUR, SEEK_END);
  2288. goto fail;
  2289. }
  2290. cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
  2291. if (cmp < 0)
  2292. goto fail;
  2293. if (cmp == 1) {
  2294. PyErr_Format(PyExc_ValueError,
  2295. "negative seek position %R", cookieObj);
  2296. goto fail;
  2297. }
  2298. res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
  2299. if (res == NULL)
  2300. goto fail;
  2301. Py_DECREF(res);
  2302. /* The strategy of seek() is to go back to the safe start point
  2303. * and replay the effect of read(chars_to_skip) from there.
  2304. */
  2305. if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
  2306. goto fail;
  2307. /* Seek back to the safe start point. */
  2308. posobj = PyLong_FromOff_t(cookie.start_pos);
  2309. if (posobj == NULL)
  2310. goto fail;
  2311. res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
  2312. Py_DECREF(posobj);
  2313. if (res == NULL)
  2314. goto fail;
  2315. Py_DECREF(res);
  2316. textiowrapper_set_decoded_chars(self, NULL);
  2317. Py_CLEAR(self->snapshot);
  2318. /* Restore the decoder to its state from the safe start point. */
  2319. if (self->decoder) {
  2320. if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
  2321. goto fail;
  2322. }
  2323. if (cookie.chars_to_skip) {
  2324. /* Just like _read_chunk, feed the decoder and save a snapshot. */
  2325. PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
  2326. "i", cookie.bytes_to_feed);
  2327. PyObject *decoded;
  2328. if (input_chunk == NULL)
  2329. goto fail;
  2330. if (!PyBytes_Check(input_chunk)) {
  2331. PyErr_Format(PyExc_TypeError,
  2332. "underlying read() should have returned a bytes "
  2333. "object, not '%.200s'",
  2334. Py_TYPE(input_chunk)->tp_name);
  2335. Py_DECREF(input_chunk);
  2336. goto fail;
  2337. }
  2338. snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
  2339. if (snapshot == NULL) {
  2340. goto fail;
  2341. }
  2342. Py_XSETREF(self->snapshot, snapshot);
  2343. decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
  2344. input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
  2345. if (check_decoded(decoded) < 0)
  2346. goto fail;
  2347. textiowrapper_set_decoded_chars(self, decoded);
  2348. /* Skip chars_to_skip of the decoded characters. */
  2349. if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
  2350. PyErr_SetString(PyExc_OSError, "can't restore logical file position");
  2351. goto fail;
  2352. }
  2353. self->decoded_chars_used = cookie.chars_to_skip;
  2354. }
  2355. else {
  2356. snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
  2357. if (snapshot == NULL)
  2358. goto fail;
  2359. Py_XSETREF(self->snapshot, snapshot);
  2360. }
  2361. /* Finally, reset the encoder (merely useful for proper BOM handling) */
  2362. if (self->encoder) {
  2363. if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
  2364. goto fail;
  2365. }
  2366. return cookieObj;
  2367. fail:
  2368. Py_XDECREF(cookieObj);
  2369. return NULL;
  2370. }
  2371. /*[clinic input]
  2372. _io.TextIOWrapper.tell
  2373. Return the stream position as an opaque number.
  2374. The return value of tell() can be given as input to seek(), to restore a
  2375. previous stream position.
  2376. [clinic start generated code]*/
  2377. static PyObject *
  2378. _io_TextIOWrapper_tell_impl(textio *self)
  2379. /*[clinic end generated code: output=4f168c08bf34ad5f input=0852d627d76fb520]*/
  2380. {
  2381. PyObject *res;
  2382. PyObject *posobj = NULL;
  2383. cookie_type cookie = {0,0,0,0,0};
  2384. PyObject *next_input;
  2385. Py_ssize_t chars_to_skip, chars_decoded;
  2386. Py_ssize_t skip_bytes, skip_back;
  2387. PyObject *saved_state = NULL;
  2388. const char *input, *input_end;
  2389. Py_ssize_t dec_buffer_len;
  2390. int dec_flags;
  2391. CHECK_ATTACHED(self);
  2392. CHECK_CLOSED(self);
  2393. if (!self->seekable) {
  2394. _unsupported(self->state, "underlying stream is not seekable");
  2395. goto fail;
  2396. }
  2397. if (!self->telling) {
  2398. PyErr_SetString(PyExc_OSError,
  2399. "telling position disabled by next() call");
  2400. goto fail;
  2401. }
  2402. if (_textiowrapper_writeflush(self) < 0)
  2403. return NULL;
  2404. res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
  2405. if (res == NULL)
  2406. goto fail;
  2407. Py_DECREF(res);
  2408. posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
  2409. if (posobj == NULL)
  2410. goto fail;
  2411. if (self->decoder == NULL || self->snapshot == NULL) {
  2412. assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
  2413. return posobj;
  2414. }
  2415. #if defined(HAVE_LARGEFILE_SUPPORT)
  2416. cookie.start_pos = PyLong_AsLongLong(posobj);
  2417. #else
  2418. cookie.start_pos = PyLong_AsLong(posobj);
  2419. #endif
  2420. Py_DECREF(posobj);
  2421. if (PyErr_Occurred())
  2422. goto fail;
  2423. /* Skip backward to the snapshot point (see _read_chunk). */
  2424. assert(PyTuple_Check(self->snapshot));
  2425. if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
  2426. goto fail;
  2427. assert (PyBytes_Check(next_input));
  2428. cookie.start_pos -= PyBytes_GET_SIZE(next_input);
  2429. /* How many decoded characters have been used up since the snapshot? */
  2430. if (self->decoded_chars_used == 0) {
  2431. /* We haven't moved from the snapshot point. */
  2432. return textiowrapper_build_cookie(&cookie);
  2433. }
  2434. chars_to_skip = self->decoded_chars_used;
  2435. /* Decoder state will be restored at the end */
  2436. saved_state = PyObject_CallMethodNoArgs(self->decoder,
  2437. &_Py_ID(getstate));
  2438. if (saved_state == NULL)
  2439. goto fail;
  2440. #define DECODER_GETSTATE() do { \
  2441. PyObject *dec_buffer; \
  2442. PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
  2443. &_Py_ID(getstate)); \
  2444. if (_state == NULL) \
  2445. goto fail; \
  2446. if (!PyTuple_Check(_state)) { \
  2447. PyErr_SetString(PyExc_TypeError, \
  2448. "illegal decoder state"); \
  2449. Py_DECREF(_state); \
  2450. goto fail; \
  2451. } \
  2452. if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
  2453. &dec_buffer, &dec_flags)) \
  2454. { \
  2455. Py_DECREF(_state); \
  2456. goto fail; \
  2457. } \
  2458. if (!PyBytes_Check(dec_buffer)) { \
  2459. PyErr_Format(PyExc_TypeError, \
  2460. "illegal decoder state: the first item should be a " \
  2461. "bytes object, not '%.200s'", \
  2462. Py_TYPE(dec_buffer)->tp_name); \
  2463. Py_DECREF(_state); \
  2464. goto fail; \
  2465. } \
  2466. dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
  2467. Py_DECREF(_state); \
  2468. } while (0)
  2469. #define DECODER_DECODE(start, len, res) do { \
  2470. PyObject *_decoded = _PyObject_CallMethod( \
  2471. self->decoder, &_Py_ID(decode), "y#", start, len); \
  2472. if (check_decoded(_decoded) < 0) \
  2473. goto fail; \
  2474. res = PyUnicode_GET_LENGTH(_decoded); \
  2475. Py_DECREF(_decoded); \
  2476. } while (0)
  2477. /* Fast search for an acceptable start point, close to our
  2478. current pos */
  2479. skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
  2480. skip_back = 1;
  2481. assert(skip_back <= PyBytes_GET_SIZE(next_input));
  2482. input = PyBytes_AS_STRING(next_input);
  2483. while (skip_bytes > 0) {
  2484. /* Decode up to temptative start point */
  2485. if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
  2486. goto fail;
  2487. DECODER_DECODE(input, skip_bytes, chars_decoded);
  2488. if (chars_decoded <= chars_to_skip) {
  2489. DECODER_GETSTATE();
  2490. if (dec_buffer_len == 0) {
  2491. /* Before pos and no bytes buffered in decoder => OK */
  2492. cookie.dec_flags = dec_flags;
  2493. chars_to_skip -= chars_decoded;
  2494. break;
  2495. }
  2496. /* Skip back by buffered amount and reset heuristic */
  2497. skip_bytes -= dec_buffer_len;
  2498. skip_back = 1;
  2499. }
  2500. else {
  2501. /* We're too far ahead, skip back a bit */
  2502. skip_bytes -= skip_back;
  2503. skip_back *= 2;
  2504. }
  2505. }
  2506. if (skip_bytes <= 0) {
  2507. skip_bytes = 0;
  2508. if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
  2509. goto fail;
  2510. }
  2511. /* Note our initial start point. */
  2512. cookie.start_pos += skip_bytes;
  2513. cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
  2514. if (chars_to_skip == 0)
  2515. goto finally;
  2516. /* We should be close to the desired position. Now feed the decoder one
  2517. * byte at a time until we reach the `chars_to_skip` target.
  2518. * As we go, note the nearest "safe start point" before the current
  2519. * location (a point where the decoder has nothing buffered, so seek()
  2520. * can safely start from there and advance to this location).
  2521. */
  2522. chars_decoded = 0;
  2523. input = PyBytes_AS_STRING(next_input);
  2524. input_end = input + PyBytes_GET_SIZE(next_input);
  2525. input += skip_bytes;
  2526. while (input < input_end) {
  2527. Py_ssize_t n;
  2528. DECODER_DECODE(input, (Py_ssize_t)1, n);
  2529. /* We got n chars for 1 byte */
  2530. chars_decoded += n;
  2531. cookie.bytes_to_feed += 1;
  2532. DECODER_GETSTATE();
  2533. if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
  2534. /* Decoder buffer is empty, so this is a safe start point. */
  2535. cookie.start_pos += cookie.bytes_to_feed;
  2536. chars_to_skip -= chars_decoded;
  2537. cookie.dec_flags = dec_flags;
  2538. cookie.bytes_to_feed = 0;
  2539. chars_decoded = 0;
  2540. }
  2541. if (chars_decoded >= chars_to_skip)
  2542. break;
  2543. input++;
  2544. }
  2545. if (input == input_end) {
  2546. /* We didn't get enough decoded data; signal EOF to get more. */
  2547. PyObject *decoded = _PyObject_CallMethod(
  2548. self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
  2549. if (check_decoded(decoded) < 0)
  2550. goto fail;
  2551. chars_decoded += PyUnicode_GET_LENGTH(decoded);
  2552. Py_DECREF(decoded);
  2553. cookie.need_eof = 1;
  2554. if (chars_decoded < chars_to_skip) {
  2555. PyErr_SetString(PyExc_OSError,
  2556. "can't reconstruct logical file position");
  2557. goto fail;
  2558. }
  2559. }
  2560. finally:
  2561. res = PyObject_CallMethodOneArg(
  2562. self->decoder, &_Py_ID(setstate), saved_state);
  2563. Py_DECREF(saved_state);
  2564. if (res == NULL)
  2565. return NULL;
  2566. Py_DECREF(res);
  2567. /* The returned cookie corresponds to the last safe start point. */
  2568. cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
  2569. return textiowrapper_build_cookie(&cookie);
  2570. fail:
  2571. if (saved_state) {
  2572. PyObject *exc = PyErr_GetRaisedException();
  2573. res = PyObject_CallMethodOneArg(
  2574. self->decoder, &_Py_ID(setstate), saved_state);
  2575. _PyErr_ChainExceptions1(exc);
  2576. Py_DECREF(saved_state);
  2577. Py_XDECREF(res);
  2578. }
  2579. return NULL;
  2580. }
  2581. /*[clinic input]
  2582. _io.TextIOWrapper.truncate
  2583. pos: object = None
  2584. /
  2585. [clinic start generated code]*/
  2586. static PyObject *
  2587. _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
  2588. /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
  2589. {
  2590. PyObject *res;
  2591. CHECK_ATTACHED(self)
  2592. res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
  2593. if (res == NULL)
  2594. return NULL;
  2595. Py_DECREF(res);
  2596. return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
  2597. }
  2598. static PyObject *
  2599. textiowrapper_repr(textio *self)
  2600. {
  2601. PyObject *nameobj, *modeobj, *res, *s;
  2602. int status;
  2603. CHECK_INITIALIZED(self);
  2604. res = PyUnicode_FromString("<_io.TextIOWrapper");
  2605. if (res == NULL)
  2606. return NULL;
  2607. status = Py_ReprEnter((PyObject *)self);
  2608. if (status != 0) {
  2609. if (status > 0) {
  2610. PyErr_Format(PyExc_RuntimeError,
  2611. "reentrant call inside %s.__repr__",
  2612. Py_TYPE(self)->tp_name);
  2613. }
  2614. goto error;
  2615. }
  2616. if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(name), &nameobj) < 0) {
  2617. if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
  2618. goto error;
  2619. }
  2620. /* Ignore ValueError raised if the underlying stream was detached */
  2621. PyErr_Clear();
  2622. }
  2623. if (nameobj != NULL) {
  2624. s = PyUnicode_FromFormat(" name=%R", nameobj);
  2625. Py_DECREF(nameobj);
  2626. if (s == NULL)
  2627. goto error;
  2628. PyUnicode_AppendAndDel(&res, s);
  2629. if (res == NULL)
  2630. goto error;
  2631. }
  2632. if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(mode), &modeobj) < 0) {
  2633. goto error;
  2634. }
  2635. if (modeobj != NULL) {
  2636. s = PyUnicode_FromFormat(" mode=%R", modeobj);
  2637. Py_DECREF(modeobj);
  2638. if (s == NULL)
  2639. goto error;
  2640. PyUnicode_AppendAndDel(&res, s);
  2641. if (res == NULL)
  2642. goto error;
  2643. }
  2644. s = PyUnicode_FromFormat("%U encoding=%R>",
  2645. res, self->encoding);
  2646. Py_DECREF(res);
  2647. if (status == 0) {
  2648. Py_ReprLeave((PyObject *)self);
  2649. }
  2650. return s;
  2651. error:
  2652. Py_XDECREF(res);
  2653. if (status == 0) {
  2654. Py_ReprLeave((PyObject *)self);
  2655. }
  2656. return NULL;
  2657. }
  2658. /* Inquiries */
  2659. /*[clinic input]
  2660. _io.TextIOWrapper.fileno
  2661. [clinic start generated code]*/
  2662. static PyObject *
  2663. _io_TextIOWrapper_fileno_impl(textio *self)
  2664. /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
  2665. {
  2666. CHECK_ATTACHED(self);
  2667. return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
  2668. }
  2669. /*[clinic input]
  2670. _io.TextIOWrapper.seekable
  2671. [clinic start generated code]*/
  2672. static PyObject *
  2673. _io_TextIOWrapper_seekable_impl(textio *self)
  2674. /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
  2675. {
  2676. CHECK_ATTACHED(self);
  2677. return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
  2678. }
  2679. /*[clinic input]
  2680. _io.TextIOWrapper.readable
  2681. [clinic start generated code]*/
  2682. static PyObject *
  2683. _io_TextIOWrapper_readable_impl(textio *self)
  2684. /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
  2685. {
  2686. CHECK_ATTACHED(self);
  2687. return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
  2688. }
  2689. /*[clinic input]
  2690. _io.TextIOWrapper.writable
  2691. [clinic start generated code]*/
  2692. static PyObject *
  2693. _io_TextIOWrapper_writable_impl(textio *self)
  2694. /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
  2695. {
  2696. CHECK_ATTACHED(self);
  2697. return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
  2698. }
  2699. /*[clinic input]
  2700. _io.TextIOWrapper.isatty
  2701. [clinic start generated code]*/
  2702. static PyObject *
  2703. _io_TextIOWrapper_isatty_impl(textio *self)
  2704. /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
  2705. {
  2706. CHECK_ATTACHED(self);
  2707. return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
  2708. }
  2709. /*[clinic input]
  2710. _io.TextIOWrapper.flush
  2711. [clinic start generated code]*/
  2712. static PyObject *
  2713. _io_TextIOWrapper_flush_impl(textio *self)
  2714. /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
  2715. {
  2716. CHECK_ATTACHED(self);
  2717. CHECK_CLOSED(self);
  2718. self->telling = self->seekable;
  2719. if (_textiowrapper_writeflush(self) < 0)
  2720. return NULL;
  2721. return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
  2722. }
  2723. /*[clinic input]
  2724. _io.TextIOWrapper.close
  2725. [clinic start generated code]*/
  2726. static PyObject *
  2727. _io_TextIOWrapper_close_impl(textio *self)
  2728. /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
  2729. {
  2730. PyObject *res;
  2731. int r;
  2732. CHECK_ATTACHED(self);
  2733. res = textiowrapper_closed_get(self, NULL);
  2734. if (res == NULL)
  2735. return NULL;
  2736. r = PyObject_IsTrue(res);
  2737. Py_DECREF(res);
  2738. if (r < 0)
  2739. return NULL;
  2740. if (r > 0) {
  2741. Py_RETURN_NONE; /* stream already closed */
  2742. }
  2743. else {
  2744. PyObject *exc = NULL;
  2745. if (self->finalizing) {
  2746. res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
  2747. (PyObject *)self);
  2748. if (res) {
  2749. Py_DECREF(res);
  2750. }
  2751. else {
  2752. PyErr_Clear();
  2753. }
  2754. }
  2755. res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
  2756. if (res == NULL) {
  2757. exc = PyErr_GetRaisedException();
  2758. }
  2759. else {
  2760. Py_DECREF(res);
  2761. }
  2762. res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
  2763. if (exc != NULL) {
  2764. _PyErr_ChainExceptions1(exc);
  2765. Py_CLEAR(res);
  2766. }
  2767. return res;
  2768. }
  2769. }
  2770. static PyObject *
  2771. textiowrapper_iternext(textio *self)
  2772. {
  2773. PyObject *line;
  2774. CHECK_ATTACHED(self);
  2775. self->telling = 0;
  2776. if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
  2777. /* Skip method call overhead for speed */
  2778. line = _textiowrapper_readline(self, -1);
  2779. }
  2780. else {
  2781. line = PyObject_CallMethodNoArgs((PyObject *)self,
  2782. &_Py_ID(readline));
  2783. if (line && !PyUnicode_Check(line)) {
  2784. PyErr_Format(PyExc_OSError,
  2785. "readline() should have returned a str object, "
  2786. "not '%.200s'", Py_TYPE(line)->tp_name);
  2787. Py_DECREF(line);
  2788. return NULL;
  2789. }
  2790. }
  2791. if (line == NULL || PyUnicode_READY(line) == -1)
  2792. return NULL;
  2793. if (PyUnicode_GET_LENGTH(line) == 0) {
  2794. /* Reached EOF or would have blocked */
  2795. Py_DECREF(line);
  2796. Py_CLEAR(self->snapshot);
  2797. self->telling = self->seekable;
  2798. return NULL;
  2799. }
  2800. return line;
  2801. }
  2802. static PyObject *
  2803. textiowrapper_name_get(textio *self, void *context)
  2804. {
  2805. CHECK_ATTACHED(self);
  2806. return PyObject_GetAttr(self->buffer, &_Py_ID(name));
  2807. }
  2808. static PyObject *
  2809. textiowrapper_closed_get(textio *self, void *context)
  2810. {
  2811. CHECK_ATTACHED(self);
  2812. return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
  2813. }
  2814. static PyObject *
  2815. textiowrapper_newlines_get(textio *self, void *context)
  2816. {
  2817. PyObject *res;
  2818. CHECK_ATTACHED(self);
  2819. if (self->decoder == NULL ||
  2820. _PyObject_LookupAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
  2821. {
  2822. Py_RETURN_NONE;
  2823. }
  2824. return res;
  2825. }
  2826. static PyObject *
  2827. textiowrapper_errors_get(textio *self, void *context)
  2828. {
  2829. CHECK_INITIALIZED(self);
  2830. return Py_NewRef(self->errors);
  2831. }
  2832. static PyObject *
  2833. textiowrapper_chunk_size_get(textio *self, void *context)
  2834. {
  2835. CHECK_ATTACHED(self);
  2836. return PyLong_FromSsize_t(self->chunk_size);
  2837. }
  2838. static int
  2839. textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
  2840. {
  2841. Py_ssize_t n;
  2842. CHECK_ATTACHED_INT(self);
  2843. if (arg == NULL) {
  2844. PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
  2845. return -1;
  2846. }
  2847. n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
  2848. if (n == -1 && PyErr_Occurred())
  2849. return -1;
  2850. if (n <= 0) {
  2851. PyErr_SetString(PyExc_ValueError,
  2852. "a strictly positive integer is required");
  2853. return -1;
  2854. }
  2855. self->chunk_size = n;
  2856. return 0;
  2857. }
  2858. static PyMethodDef incrementalnewlinedecoder_methods[] = {
  2859. _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
  2860. _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
  2861. _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
  2862. _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
  2863. {NULL}
  2864. };
  2865. static PyGetSetDef incrementalnewlinedecoder_getset[] = {
  2866. {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
  2867. {NULL}
  2868. };
  2869. static PyType_Slot nldecoder_slots[] = {
  2870. {Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
  2871. {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
  2872. {Py_tp_methods, incrementalnewlinedecoder_methods},
  2873. {Py_tp_getset, incrementalnewlinedecoder_getset},
  2874. {Py_tp_traverse, incrementalnewlinedecoder_traverse},
  2875. {Py_tp_clear, incrementalnewlinedecoder_clear},
  2876. {Py_tp_init, _io_IncrementalNewlineDecoder___init__},
  2877. {0, NULL},
  2878. };
  2879. PyType_Spec nldecoder_spec = {
  2880. .name = "_io.IncrementalNewlineDecoder",
  2881. .basicsize = sizeof(nldecoder_object),
  2882. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
  2883. Py_TPFLAGS_IMMUTABLETYPE),
  2884. .slots = nldecoder_slots,
  2885. };
  2886. static PyMethodDef textiowrapper_methods[] = {
  2887. _IO_TEXTIOWRAPPER_DETACH_METHODDEF
  2888. _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
  2889. _IO_TEXTIOWRAPPER_WRITE_METHODDEF
  2890. _IO_TEXTIOWRAPPER_READ_METHODDEF
  2891. _IO_TEXTIOWRAPPER_READLINE_METHODDEF
  2892. _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
  2893. _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
  2894. _IO_TEXTIOWRAPPER_FILENO_METHODDEF
  2895. _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
  2896. _IO_TEXTIOWRAPPER_READABLE_METHODDEF
  2897. _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
  2898. _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
  2899. _IO_TEXTIOWRAPPER_SEEK_METHODDEF
  2900. _IO_TEXTIOWRAPPER_TELL_METHODDEF
  2901. _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
  2902. {"__reduce__", _PyIOBase_cannot_pickle, METH_VARARGS},
  2903. {"__reduce_ex__", _PyIOBase_cannot_pickle, METH_VARARGS},
  2904. {NULL, NULL}
  2905. };
  2906. static PyMemberDef textiowrapper_members[] = {
  2907. {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
  2908. {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
  2909. {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
  2910. {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
  2911. {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
  2912. {"__weaklistoffset__", T_PYSSIZET, offsetof(textio, weakreflist), READONLY},
  2913. {"__dictoffset__", T_PYSSIZET, offsetof(textio, dict), READONLY},
  2914. {NULL}
  2915. };
  2916. static PyGetSetDef textiowrapper_getset[] = {
  2917. {"name", (getter)textiowrapper_name_get, NULL, NULL},
  2918. {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
  2919. /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
  2920. */
  2921. {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
  2922. {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
  2923. {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
  2924. (setter)textiowrapper_chunk_size_set, NULL},
  2925. {NULL}
  2926. };
  2927. PyType_Slot textiowrapper_slots[] = {
  2928. {Py_tp_dealloc, textiowrapper_dealloc},
  2929. {Py_tp_repr, textiowrapper_repr},
  2930. {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
  2931. {Py_tp_traverse, textiowrapper_traverse},
  2932. {Py_tp_clear, textiowrapper_clear},
  2933. {Py_tp_iternext, textiowrapper_iternext},
  2934. {Py_tp_methods, textiowrapper_methods},
  2935. {Py_tp_members, textiowrapper_members},
  2936. {Py_tp_getset, textiowrapper_getset},
  2937. {Py_tp_init, _io_TextIOWrapper___init__},
  2938. {0, NULL},
  2939. };
  2940. PyType_Spec textiowrapper_spec = {
  2941. .name = "_io.TextIOWrapper",
  2942. .basicsize = sizeof(textio),
  2943. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
  2944. Py_TPFLAGS_IMMUTABLETYPE),
  2945. .slots = textiowrapper_slots,
  2946. };