_elementtree.c 123 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485
  1. /*--------------------------------------------------------------------
  2. * Licensed to PSF under a Contributor Agreement.
  3. * See https://www.python.org/psf/license for licensing details.
  4. *
  5. * _elementtree - C accelerator for xml.etree.ElementTree
  6. * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
  7. * Copyright (c) 1999-2009 by Fredrik Lundh.
  8. *
  9. * info@pythonware.com
  10. * http://www.pythonware.com
  11. *--------------------------------------------------------------------
  12. */
  13. #define PY_SSIZE_T_CLEAN
  14. #include "Python.h"
  15. #include "structmember.h" // PyMemberDef
  16. #include "expat.h"
  17. #include "pyexpat.h"
  18. /* -------------------------------------------------------------------- */
  19. /* configuration */
  20. /* An element can hold this many children without extra memory
  21. allocations. */
  22. #define STATIC_CHILDREN 4
  23. /* For best performance, chose a value so that 80-90% of all nodes
  24. have no more than the given number of children. Set this to zero
  25. to minimize the size of the element structure itself (this only
  26. helps if you have lots of leaf nodes with attributes). */
  27. /* Also note that pymalloc always allocates blocks in multiples of
  28. eight bytes. For the current C version of ElementTree, this means
  29. that the number of children should be an even number, at least on
  30. 32-bit platforms. */
  31. /* -------------------------------------------------------------------- */
  32. /* compiler tweaks */
  33. #if defined(_MSC_VER)
  34. #define LOCAL(type) static __inline type __fastcall
  35. #else
  36. #define LOCAL(type) static type
  37. #endif
  38. /* macros used to store 'join' flags in string object pointers. note
  39. that all use of text and tail as object pointers must be wrapped in
  40. JOIN_OBJ. see comments in the ElementObject definition for more
  41. info. */
  42. #define JOIN_GET(p) ((uintptr_t) (p) & 1)
  43. #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
  44. #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
  45. /* Py_SETREF for a PyObject* that uses a join flag. */
  46. Py_LOCAL_INLINE(void)
  47. _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
  48. {
  49. PyObject *tmp = JOIN_OBJ(*p);
  50. *p = new_joined_ptr;
  51. Py_DECREF(tmp);
  52. }
  53. /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
  54. * reference since this function sets it to NULL.
  55. */
  56. static void _clear_joined_ptr(PyObject **p)
  57. {
  58. if (*p) {
  59. _set_joined_ptr(p, NULL);
  60. }
  61. }
  62. /* Per-module state; PEP 3121 */
  63. typedef struct {
  64. PyObject *parseerror_obj;
  65. PyObject *deepcopy_obj;
  66. PyObject *elementpath_obj;
  67. PyObject *comment_factory;
  68. PyObject *pi_factory;
  69. /* Interned strings */
  70. PyObject *str_text;
  71. PyObject *str_tail;
  72. PyObject *str_append;
  73. PyObject *str_find;
  74. PyObject *str_findtext;
  75. PyObject *str_findall;
  76. PyObject *str_iterfind;
  77. PyObject *str_doctype;
  78. /* Types defined by this extension */
  79. PyTypeObject *Element_Type;
  80. PyTypeObject *ElementIter_Type;
  81. PyTypeObject *TreeBuilder_Type;
  82. PyTypeObject *XMLParser_Type;
  83. PyObject *expat_capsule;
  84. struct PyExpat_CAPI *expat_capi;
  85. } elementtreestate;
  86. static struct PyModuleDef elementtreemodule;
  87. /* Given a module object (assumed to be _elementtree), get its per-module
  88. * state.
  89. */
  90. static inline elementtreestate*
  91. get_elementtree_state(PyObject *module)
  92. {
  93. void *state = PyModule_GetState(module);
  94. assert(state != NULL);
  95. return (elementtreestate *)state;
  96. }
  97. static inline elementtreestate *
  98. get_elementtree_state_by_cls(PyTypeObject *cls)
  99. {
  100. void *state = PyType_GetModuleState(cls);
  101. assert(state != NULL);
  102. return (elementtreestate *)state;
  103. }
  104. static inline elementtreestate *
  105. get_elementtree_state_by_type(PyTypeObject *tp)
  106. {
  107. PyObject *mod = PyType_GetModuleByDef(tp, &elementtreemodule);
  108. assert(mod != NULL);
  109. return get_elementtree_state(mod);
  110. }
  111. static int
  112. elementtree_clear(PyObject *m)
  113. {
  114. elementtreestate *st = get_elementtree_state(m);
  115. Py_CLEAR(st->parseerror_obj);
  116. Py_CLEAR(st->deepcopy_obj);
  117. Py_CLEAR(st->elementpath_obj);
  118. Py_CLEAR(st->comment_factory);
  119. Py_CLEAR(st->pi_factory);
  120. // Interned strings
  121. Py_CLEAR(st->str_append);
  122. Py_CLEAR(st->str_find);
  123. Py_CLEAR(st->str_findall);
  124. Py_CLEAR(st->str_findtext);
  125. Py_CLEAR(st->str_iterfind);
  126. Py_CLEAR(st->str_tail);
  127. Py_CLEAR(st->str_text);
  128. Py_CLEAR(st->str_doctype);
  129. // Heap types
  130. Py_CLEAR(st->Element_Type);
  131. Py_CLEAR(st->ElementIter_Type);
  132. Py_CLEAR(st->TreeBuilder_Type);
  133. Py_CLEAR(st->XMLParser_Type);
  134. Py_CLEAR(st->expat_capsule);
  135. st->expat_capi = NULL;
  136. return 0;
  137. }
  138. static int
  139. elementtree_traverse(PyObject *m, visitproc visit, void *arg)
  140. {
  141. elementtreestate *st = get_elementtree_state(m);
  142. Py_VISIT(st->parseerror_obj);
  143. Py_VISIT(st->deepcopy_obj);
  144. Py_VISIT(st->elementpath_obj);
  145. Py_VISIT(st->comment_factory);
  146. Py_VISIT(st->pi_factory);
  147. // Heap types
  148. Py_VISIT(st->Element_Type);
  149. Py_VISIT(st->ElementIter_Type);
  150. Py_VISIT(st->TreeBuilder_Type);
  151. Py_VISIT(st->XMLParser_Type);
  152. Py_VISIT(st->expat_capsule);
  153. return 0;
  154. }
  155. static void
  156. elementtree_free(void *m)
  157. {
  158. elementtree_clear((PyObject *)m);
  159. }
  160. /* helpers */
  161. LOCAL(PyObject*)
  162. list_join(PyObject* list)
  163. {
  164. /* join list elements */
  165. PyObject* joiner;
  166. PyObject* result;
  167. joiner = PyUnicode_FromStringAndSize("", 0);
  168. if (!joiner)
  169. return NULL;
  170. result = PyUnicode_Join(joiner, list);
  171. Py_DECREF(joiner);
  172. return result;
  173. }
  174. /* Is the given object an empty dictionary?
  175. */
  176. static int
  177. is_empty_dict(PyObject *obj)
  178. {
  179. return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
  180. }
  181. /* -------------------------------------------------------------------- */
  182. /* the Element type */
  183. typedef struct {
  184. /* attributes (a dictionary object), or NULL if no attributes */
  185. PyObject* attrib;
  186. /* child elements */
  187. Py_ssize_t length; /* actual number of items */
  188. Py_ssize_t allocated; /* allocated items */
  189. /* this either points to _children or to a malloced buffer */
  190. PyObject* *children;
  191. PyObject* _children[STATIC_CHILDREN];
  192. } ElementObjectExtra;
  193. typedef struct {
  194. PyObject_HEAD
  195. /* element tag (a string). */
  196. PyObject* tag;
  197. /* text before first child. note that this is a tagged pointer;
  198. use JOIN_OBJ to get the object pointer. the join flag is used
  199. to distinguish lists created by the tree builder from lists
  200. assigned to the attribute by application code; the former
  201. should be joined before being returned to the user, the latter
  202. should be left intact. */
  203. PyObject* text;
  204. /* text after this element, in parent. note that this is a tagged
  205. pointer; use JOIN_OBJ to get the object pointer. */
  206. PyObject* tail;
  207. ElementObjectExtra* extra;
  208. PyObject *weakreflist; /* For tp_weaklistoffset */
  209. } ElementObject;
  210. #define Element_CheckExact(st, op) Py_IS_TYPE(op, (st)->Element_Type)
  211. #define Element_Check(st, op) PyObject_TypeCheck(op, (st)->Element_Type)
  212. /* -------------------------------------------------------------------- */
  213. /* Element constructors and destructor */
  214. LOCAL(int)
  215. create_extra(ElementObject* self, PyObject* attrib)
  216. {
  217. self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
  218. if (!self->extra) {
  219. PyErr_NoMemory();
  220. return -1;
  221. }
  222. self->extra->attrib = Py_XNewRef(attrib);
  223. self->extra->length = 0;
  224. self->extra->allocated = STATIC_CHILDREN;
  225. self->extra->children = self->extra->_children;
  226. return 0;
  227. }
  228. LOCAL(void)
  229. dealloc_extra(ElementObjectExtra *extra)
  230. {
  231. Py_ssize_t i;
  232. if (!extra)
  233. return;
  234. Py_XDECREF(extra->attrib);
  235. for (i = 0; i < extra->length; i++)
  236. Py_DECREF(extra->children[i]);
  237. if (extra->children != extra->_children)
  238. PyObject_Free(extra->children);
  239. PyObject_Free(extra);
  240. }
  241. LOCAL(void)
  242. clear_extra(ElementObject* self)
  243. {
  244. ElementObjectExtra *myextra;
  245. if (!self->extra)
  246. return;
  247. /* Avoid DECREFs calling into this code again (cycles, etc.)
  248. */
  249. myextra = self->extra;
  250. self->extra = NULL;
  251. dealloc_extra(myextra);
  252. }
  253. /* Convenience internal function to create new Element objects with the given
  254. * tag and attributes.
  255. */
  256. LOCAL(PyObject*)
  257. create_new_element(elementtreestate *st, PyObject *tag, PyObject *attrib)
  258. {
  259. ElementObject* self;
  260. self = PyObject_GC_New(ElementObject, st->Element_Type);
  261. if (self == NULL)
  262. return NULL;
  263. self->extra = NULL;
  264. self->tag = Py_NewRef(tag);
  265. self->text = Py_NewRef(Py_None);
  266. self->tail = Py_NewRef(Py_None);
  267. self->weakreflist = NULL;
  268. PyObject_GC_Track(self);
  269. if (attrib != NULL && !is_empty_dict(attrib)) {
  270. if (create_extra(self, attrib) < 0) {
  271. Py_DECREF(self);
  272. return NULL;
  273. }
  274. }
  275. return (PyObject*) self;
  276. }
  277. static PyObject *
  278. element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  279. {
  280. ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
  281. if (e != NULL) {
  282. e->tag = Py_NewRef(Py_None);
  283. e->text = Py_NewRef(Py_None);
  284. e->tail = Py_NewRef(Py_None);
  285. e->extra = NULL;
  286. e->weakreflist = NULL;
  287. }
  288. return (PyObject *)e;
  289. }
  290. /* Helper function for extracting the attrib dictionary from a keywords dict.
  291. * This is required by some constructors/functions in this module that can
  292. * either accept attrib as a keyword argument or all attributes splashed
  293. * directly into *kwds.
  294. *
  295. * Return a dictionary with the content of kwds merged into the content of
  296. * attrib. If there is no attrib keyword, return a copy of kwds.
  297. */
  298. static PyObject*
  299. get_attrib_from_keywords(PyObject *kwds)
  300. {
  301. PyObject *attrib_str = PyUnicode_FromString("attrib");
  302. if (attrib_str == NULL) {
  303. return NULL;
  304. }
  305. PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
  306. if (attrib) {
  307. /* If attrib was found in kwds, copy its value and remove it from
  308. * kwds
  309. */
  310. if (!PyDict_Check(attrib)) {
  311. Py_DECREF(attrib_str);
  312. PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
  313. Py_TYPE(attrib)->tp_name);
  314. return NULL;
  315. }
  316. attrib = PyDict_Copy(attrib);
  317. if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
  318. Py_SETREF(attrib, NULL);
  319. }
  320. }
  321. else if (!PyErr_Occurred()) {
  322. attrib = PyDict_New();
  323. }
  324. Py_DECREF(attrib_str);
  325. if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
  326. Py_DECREF(attrib);
  327. return NULL;
  328. }
  329. return attrib;
  330. }
  331. /*[clinic input]
  332. module _elementtree
  333. class _elementtree.Element "ElementObject *" "clinic_state()->Element_Type"
  334. class _elementtree.TreeBuilder "TreeBuilderObject *" "clinic_state()->TreeBuilder_Type"
  335. class _elementtree.XMLParser "XMLParserObject *" "clinic_state()->XMLParser_Type"
  336. [clinic start generated code]*/
  337. /*[clinic end generated code: output=da39a3ee5e6b4b0d input=6c83ea832d2b0ef1]*/
  338. static int
  339. element_init(PyObject *self, PyObject *args, PyObject *kwds)
  340. {
  341. PyObject *tag;
  342. PyObject *attrib = NULL;
  343. ElementObject *self_elem;
  344. if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
  345. return -1;
  346. if (attrib) {
  347. /* attrib passed as positional arg */
  348. attrib = PyDict_Copy(attrib);
  349. if (!attrib)
  350. return -1;
  351. if (kwds) {
  352. if (PyDict_Update(attrib, kwds) < 0) {
  353. Py_DECREF(attrib);
  354. return -1;
  355. }
  356. }
  357. } else if (kwds) {
  358. /* have keywords args */
  359. attrib = get_attrib_from_keywords(kwds);
  360. if (!attrib)
  361. return -1;
  362. }
  363. self_elem = (ElementObject *)self;
  364. if (attrib != NULL && !is_empty_dict(attrib)) {
  365. if (create_extra(self_elem, attrib) < 0) {
  366. Py_DECREF(attrib);
  367. return -1;
  368. }
  369. }
  370. /* We own a reference to attrib here and it's no longer needed. */
  371. Py_XDECREF(attrib);
  372. /* Replace the objects already pointed to by tag, text and tail. */
  373. Py_XSETREF(self_elem->tag, Py_NewRef(tag));
  374. _set_joined_ptr(&self_elem->text, Py_NewRef(Py_None));
  375. _set_joined_ptr(&self_elem->tail, Py_NewRef(Py_None));
  376. return 0;
  377. }
  378. LOCAL(int)
  379. element_resize(ElementObject* self, Py_ssize_t extra)
  380. {
  381. Py_ssize_t size;
  382. PyObject* *children;
  383. assert(extra >= 0);
  384. /* make sure self->children can hold the given number of extra
  385. elements. set an exception and return -1 if allocation failed */
  386. if (!self->extra) {
  387. if (create_extra(self, NULL) < 0)
  388. return -1;
  389. }
  390. size = self->extra->length + extra; /* never overflows */
  391. if (size > self->extra->allocated) {
  392. /* use Python 2.4's list growth strategy */
  393. size = (size >> 3) + (size < 9 ? 3 : 6) + size;
  394. /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
  395. * which needs at least 4 bytes.
  396. * Although it's a false alarm always assume at least one child to
  397. * be safe.
  398. */
  399. size = size ? size : 1;
  400. if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
  401. goto nomemory;
  402. if (self->extra->children != self->extra->_children) {
  403. /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
  404. * "children", which needs at least 4 bytes. Although it's a
  405. * false alarm always assume at least one child to be safe.
  406. */
  407. children = PyObject_Realloc(self->extra->children,
  408. size * sizeof(PyObject*));
  409. if (!children)
  410. goto nomemory;
  411. } else {
  412. children = PyObject_Malloc(size * sizeof(PyObject*));
  413. if (!children)
  414. goto nomemory;
  415. /* copy existing children from static area to malloc buffer */
  416. memcpy(children, self->extra->children,
  417. self->extra->length * sizeof(PyObject*));
  418. }
  419. self->extra->children = children;
  420. self->extra->allocated = size;
  421. }
  422. return 0;
  423. nomemory:
  424. PyErr_NoMemory();
  425. return -1;
  426. }
  427. LOCAL(void)
  428. raise_type_error(PyObject *element)
  429. {
  430. PyErr_Format(PyExc_TypeError,
  431. "expected an Element, not \"%.200s\"",
  432. Py_TYPE(element)->tp_name);
  433. }
  434. LOCAL(int)
  435. element_add_subelement(elementtreestate *st, ElementObject *self,
  436. PyObject *element)
  437. {
  438. /* add a child element to a parent */
  439. if (!Element_Check(st, element)) {
  440. raise_type_error(element);
  441. return -1;
  442. }
  443. if (element_resize(self, 1) < 0)
  444. return -1;
  445. self->extra->children[self->extra->length] = Py_NewRef(element);
  446. self->extra->length++;
  447. return 0;
  448. }
  449. LOCAL(PyObject*)
  450. element_get_attrib(ElementObject* self)
  451. {
  452. /* return borrowed reference to attrib dictionary */
  453. /* note: this function assumes that the extra section exists */
  454. PyObject* res = self->extra->attrib;
  455. if (!res) {
  456. /* create missing dictionary */
  457. res = self->extra->attrib = PyDict_New();
  458. }
  459. return res;
  460. }
  461. LOCAL(PyObject*)
  462. element_get_text(ElementObject* self)
  463. {
  464. /* return borrowed reference to text attribute */
  465. PyObject *res = self->text;
  466. if (JOIN_GET(res)) {
  467. res = JOIN_OBJ(res);
  468. if (PyList_CheckExact(res)) {
  469. PyObject *tmp = list_join(res);
  470. if (!tmp)
  471. return NULL;
  472. self->text = tmp;
  473. Py_SETREF(res, tmp);
  474. }
  475. }
  476. return res;
  477. }
  478. LOCAL(PyObject*)
  479. element_get_tail(ElementObject* self)
  480. {
  481. /* return borrowed reference to text attribute */
  482. PyObject *res = self->tail;
  483. if (JOIN_GET(res)) {
  484. res = JOIN_OBJ(res);
  485. if (PyList_CheckExact(res)) {
  486. PyObject *tmp = list_join(res);
  487. if (!tmp)
  488. return NULL;
  489. self->tail = tmp;
  490. Py_SETREF(res, tmp);
  491. }
  492. }
  493. return res;
  494. }
  495. static PyObject*
  496. subelement(PyObject *self, PyObject *args, PyObject *kwds)
  497. {
  498. PyObject* elem;
  499. elementtreestate *st = get_elementtree_state(self);
  500. ElementObject* parent;
  501. PyObject* tag;
  502. PyObject* attrib = NULL;
  503. if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
  504. st->Element_Type, &parent, &tag,
  505. &PyDict_Type, &attrib)) {
  506. return NULL;
  507. }
  508. if (attrib) {
  509. /* attrib passed as positional arg */
  510. attrib = PyDict_Copy(attrib);
  511. if (!attrib)
  512. return NULL;
  513. if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
  514. Py_DECREF(attrib);
  515. return NULL;
  516. }
  517. } else if (kwds) {
  518. /* have keyword args */
  519. attrib = get_attrib_from_keywords(kwds);
  520. if (!attrib)
  521. return NULL;
  522. } else {
  523. /* no attrib arg, no kwds, so no attribute */
  524. }
  525. elem = create_new_element(st, tag, attrib);
  526. Py_XDECREF(attrib);
  527. if (elem == NULL)
  528. return NULL;
  529. if (element_add_subelement(st, parent, elem) < 0) {
  530. Py_DECREF(elem);
  531. return NULL;
  532. }
  533. return elem;
  534. }
  535. static int
  536. element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
  537. {
  538. Py_VISIT(Py_TYPE(self));
  539. Py_VISIT(self->tag);
  540. Py_VISIT(JOIN_OBJ(self->text));
  541. Py_VISIT(JOIN_OBJ(self->tail));
  542. if (self->extra) {
  543. Py_ssize_t i;
  544. Py_VISIT(self->extra->attrib);
  545. for (i = 0; i < self->extra->length; ++i)
  546. Py_VISIT(self->extra->children[i]);
  547. }
  548. return 0;
  549. }
  550. static int
  551. element_gc_clear(ElementObject *self)
  552. {
  553. Py_CLEAR(self->tag);
  554. _clear_joined_ptr(&self->text);
  555. _clear_joined_ptr(&self->tail);
  556. /* After dropping all references from extra, it's no longer valid anyway,
  557. * so fully deallocate it.
  558. */
  559. clear_extra(self);
  560. return 0;
  561. }
  562. static void
  563. element_dealloc(ElementObject* self)
  564. {
  565. PyTypeObject *tp = Py_TYPE(self);
  566. /* bpo-31095: UnTrack is needed before calling any callbacks */
  567. PyObject_GC_UnTrack(self);
  568. Py_TRASHCAN_BEGIN(self, element_dealloc)
  569. if (self->weakreflist != NULL)
  570. PyObject_ClearWeakRefs((PyObject *) self);
  571. /* element_gc_clear clears all references and deallocates extra
  572. */
  573. element_gc_clear(self);
  574. tp->tp_free((PyObject *)self);
  575. Py_DECREF(tp);
  576. Py_TRASHCAN_END
  577. }
  578. /* -------------------------------------------------------------------- */
  579. /*[clinic input]
  580. _elementtree.Element.append
  581. cls: defining_class
  582. subelement: object(subclass_of='clinic_state()->Element_Type')
  583. /
  584. [clinic start generated code]*/
  585. static PyObject *
  586. _elementtree_Element_append_impl(ElementObject *self, PyTypeObject *cls,
  587. PyObject *subelement)
  588. /*[clinic end generated code: output=d00923711ea317fc input=8baf92679f9717b8]*/
  589. {
  590. elementtreestate *st = get_elementtree_state_by_cls(cls);
  591. if (element_add_subelement(st, self, subelement) < 0)
  592. return NULL;
  593. Py_RETURN_NONE;
  594. }
  595. /*[clinic input]
  596. _elementtree.Element.clear
  597. [clinic start generated code]*/
  598. static PyObject *
  599. _elementtree_Element_clear_impl(ElementObject *self)
  600. /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
  601. {
  602. clear_extra(self);
  603. _set_joined_ptr(&self->text, Py_NewRef(Py_None));
  604. _set_joined_ptr(&self->tail, Py_NewRef(Py_None));
  605. Py_RETURN_NONE;
  606. }
  607. /*[clinic input]
  608. _elementtree.Element.__copy__
  609. cls: defining_class
  610. /
  611. [clinic start generated code]*/
  612. static PyObject *
  613. _elementtree_Element___copy___impl(ElementObject *self, PyTypeObject *cls)
  614. /*[clinic end generated code: output=da22894421ff2b36 input=91edb92d9f441213]*/
  615. {
  616. Py_ssize_t i;
  617. ElementObject* element;
  618. elementtreestate *st = get_elementtree_state_by_cls(cls);
  619. element = (ElementObject*) create_new_element(
  620. st, self->tag, self->extra ? self->extra->attrib : NULL);
  621. if (!element)
  622. return NULL;
  623. Py_INCREF(JOIN_OBJ(self->text));
  624. _set_joined_ptr(&element->text, self->text);
  625. Py_INCREF(JOIN_OBJ(self->tail));
  626. _set_joined_ptr(&element->tail, self->tail);
  627. assert(!element->extra || !element->extra->length);
  628. if (self->extra) {
  629. if (element_resize(element, self->extra->length) < 0) {
  630. Py_DECREF(element);
  631. return NULL;
  632. }
  633. for (i = 0; i < self->extra->length; i++) {
  634. element->extra->children[i] = Py_NewRef(self->extra->children[i]);
  635. }
  636. assert(!element->extra->length);
  637. element->extra->length = self->extra->length;
  638. }
  639. return (PyObject*) element;
  640. }
  641. /* Helper for a deep copy. */
  642. LOCAL(PyObject *) deepcopy(elementtreestate *, PyObject *, PyObject *);
  643. /*[clinic input]
  644. _elementtree.Element.__deepcopy__
  645. memo: object(subclass_of="&PyDict_Type")
  646. /
  647. [clinic start generated code]*/
  648. static PyObject *
  649. _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
  650. /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
  651. {
  652. Py_ssize_t i;
  653. ElementObject* element;
  654. PyObject* tag;
  655. PyObject* attrib;
  656. PyObject* text;
  657. PyObject* tail;
  658. PyObject* id;
  659. PyTypeObject *tp = Py_TYPE(self);
  660. elementtreestate *st = get_elementtree_state_by_type(tp);
  661. tag = deepcopy(st, self->tag, memo);
  662. if (!tag)
  663. return NULL;
  664. if (self->extra && self->extra->attrib) {
  665. attrib = deepcopy(st, self->extra->attrib, memo);
  666. if (!attrib) {
  667. Py_DECREF(tag);
  668. return NULL;
  669. }
  670. } else {
  671. attrib = NULL;
  672. }
  673. element = (ElementObject*) create_new_element(st, tag, attrib);
  674. Py_DECREF(tag);
  675. Py_XDECREF(attrib);
  676. if (!element)
  677. return NULL;
  678. text = deepcopy(st, JOIN_OBJ(self->text), memo);
  679. if (!text)
  680. goto error;
  681. _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
  682. tail = deepcopy(st, JOIN_OBJ(self->tail), memo);
  683. if (!tail)
  684. goto error;
  685. _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
  686. assert(!element->extra || !element->extra->length);
  687. if (self->extra) {
  688. if (element_resize(element, self->extra->length) < 0)
  689. goto error;
  690. for (i = 0; i < self->extra->length; i++) {
  691. PyObject* child = deepcopy(st, self->extra->children[i], memo);
  692. if (!child || !Element_Check(st, child)) {
  693. if (child) {
  694. raise_type_error(child);
  695. Py_DECREF(child);
  696. }
  697. element->extra->length = i;
  698. goto error;
  699. }
  700. element->extra->children[i] = child;
  701. }
  702. assert(!element->extra->length);
  703. element->extra->length = self->extra->length;
  704. }
  705. /* add object to memo dictionary (so deepcopy won't visit it again) */
  706. id = PyLong_FromSsize_t((uintptr_t) self);
  707. if (!id)
  708. goto error;
  709. i = PyDict_SetItem(memo, id, (PyObject*) element);
  710. Py_DECREF(id);
  711. if (i < 0)
  712. goto error;
  713. return (PyObject*) element;
  714. error:
  715. Py_DECREF(element);
  716. return NULL;
  717. }
  718. LOCAL(PyObject *)
  719. deepcopy(elementtreestate *st, PyObject *object, PyObject *memo)
  720. {
  721. /* do a deep copy of the given object */
  722. PyObject *stack[2];
  723. /* Fast paths */
  724. if (object == Py_None || PyUnicode_CheckExact(object)) {
  725. return Py_NewRef(object);
  726. }
  727. if (Py_REFCNT(object) == 1) {
  728. if (PyDict_CheckExact(object)) {
  729. PyObject *key, *value;
  730. Py_ssize_t pos = 0;
  731. int simple = 1;
  732. while (PyDict_Next(object, &pos, &key, &value)) {
  733. if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
  734. simple = 0;
  735. break;
  736. }
  737. }
  738. if (simple)
  739. return PyDict_Copy(object);
  740. /* Fall through to general case */
  741. }
  742. else if (Element_CheckExact(st, object)) {
  743. return _elementtree_Element___deepcopy___impl(
  744. (ElementObject *)object, memo);
  745. }
  746. }
  747. /* General case */
  748. if (!st->deepcopy_obj) {
  749. PyErr_SetString(PyExc_RuntimeError,
  750. "deepcopy helper not found");
  751. return NULL;
  752. }
  753. stack[0] = object;
  754. stack[1] = memo;
  755. return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
  756. }
  757. /*[clinic input]
  758. _elementtree.Element.__sizeof__ -> size_t
  759. [clinic start generated code]*/
  760. static size_t
  761. _elementtree_Element___sizeof___impl(ElementObject *self)
  762. /*[clinic end generated code: output=baae4e7ae9fe04ec input=54e298c501f3e0d0]*/
  763. {
  764. size_t result = _PyObject_SIZE(Py_TYPE(self));
  765. if (self->extra) {
  766. result += sizeof(ElementObjectExtra);
  767. if (self->extra->children != self->extra->_children) {
  768. result += (size_t)self->extra->allocated * sizeof(PyObject*);
  769. }
  770. }
  771. return result;
  772. }
  773. /* dict keys for getstate/setstate. */
  774. #define PICKLED_TAG "tag"
  775. #define PICKLED_CHILDREN "_children"
  776. #define PICKLED_ATTRIB "attrib"
  777. #define PICKLED_TAIL "tail"
  778. #define PICKLED_TEXT "text"
  779. /* __getstate__ returns a fabricated instance dict as in the pure-Python
  780. * Element implementation, for interoperability/interchangeability. This
  781. * makes the pure-Python implementation details an API, but (a) there aren't
  782. * any unnecessary structures there; and (b) it buys compatibility with 3.2
  783. * pickles. See issue #16076.
  784. */
  785. /*[clinic input]
  786. _elementtree.Element.__getstate__
  787. [clinic start generated code]*/
  788. static PyObject *
  789. _elementtree_Element___getstate___impl(ElementObject *self)
  790. /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
  791. {
  792. Py_ssize_t i;
  793. PyObject *children, *attrib;
  794. /* Build a list of children. */
  795. children = PyList_New(self->extra ? self->extra->length : 0);
  796. if (!children)
  797. return NULL;
  798. for (i = 0; i < PyList_GET_SIZE(children); i++) {
  799. PyObject *child = Py_NewRef(self->extra->children[i]);
  800. PyList_SET_ITEM(children, i, child);
  801. }
  802. if (self->extra && self->extra->attrib) {
  803. attrib = Py_NewRef(self->extra->attrib);
  804. }
  805. else {
  806. attrib = PyDict_New();
  807. if (!attrib) {
  808. Py_DECREF(children);
  809. return NULL;
  810. }
  811. }
  812. return Py_BuildValue("{sOsNsNsOsO}",
  813. PICKLED_TAG, self->tag,
  814. PICKLED_CHILDREN, children,
  815. PICKLED_ATTRIB, attrib,
  816. PICKLED_TEXT, JOIN_OBJ(self->text),
  817. PICKLED_TAIL, JOIN_OBJ(self->tail));
  818. }
  819. static PyObject *
  820. element_setstate_from_attributes(elementtreestate *st,
  821. ElementObject *self,
  822. PyObject *tag,
  823. PyObject *attrib,
  824. PyObject *text,
  825. PyObject *tail,
  826. PyObject *children)
  827. {
  828. Py_ssize_t i, nchildren;
  829. ElementObjectExtra *oldextra = NULL;
  830. if (!tag) {
  831. PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
  832. return NULL;
  833. }
  834. Py_XSETREF(self->tag, Py_NewRef(tag));
  835. text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
  836. Py_INCREF(JOIN_OBJ(text));
  837. _set_joined_ptr(&self->text, text);
  838. tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
  839. Py_INCREF(JOIN_OBJ(tail));
  840. _set_joined_ptr(&self->tail, tail);
  841. /* Handle ATTRIB and CHILDREN. */
  842. if (!children && !attrib) {
  843. Py_RETURN_NONE;
  844. }
  845. /* Compute 'nchildren'. */
  846. if (children) {
  847. if (!PyList_Check(children)) {
  848. PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
  849. return NULL;
  850. }
  851. nchildren = PyList_GET_SIZE(children);
  852. /* (Re-)allocate 'extra'.
  853. Avoid DECREFs calling into this code again (cycles, etc.)
  854. */
  855. oldextra = self->extra;
  856. self->extra = NULL;
  857. if (element_resize(self, nchildren)) {
  858. assert(!self->extra || !self->extra->length);
  859. clear_extra(self);
  860. self->extra = oldextra;
  861. return NULL;
  862. }
  863. assert(self->extra);
  864. assert(self->extra->allocated >= nchildren);
  865. if (oldextra) {
  866. assert(self->extra->attrib == NULL);
  867. self->extra->attrib = oldextra->attrib;
  868. oldextra->attrib = NULL;
  869. }
  870. /* Copy children */
  871. for (i = 0; i < nchildren; i++) {
  872. PyObject *child = PyList_GET_ITEM(children, i);
  873. if (!Element_Check(st, child)) {
  874. raise_type_error(child);
  875. self->extra->length = i;
  876. dealloc_extra(oldextra);
  877. return NULL;
  878. }
  879. self->extra->children[i] = Py_NewRef(child);
  880. }
  881. assert(!self->extra->length);
  882. self->extra->length = nchildren;
  883. }
  884. else {
  885. if (element_resize(self, 0)) {
  886. return NULL;
  887. }
  888. }
  889. /* Stash attrib. */
  890. Py_XSETREF(self->extra->attrib, Py_XNewRef(attrib));
  891. dealloc_extra(oldextra);
  892. Py_RETURN_NONE;
  893. }
  894. /* __setstate__ for Element instance from the Python implementation.
  895. * 'state' should be the instance dict.
  896. */
  897. static PyObject *
  898. element_setstate_from_Python(elementtreestate *st, ElementObject *self,
  899. PyObject *state)
  900. {
  901. static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
  902. PICKLED_TAIL, PICKLED_CHILDREN, 0};
  903. PyObject *args;
  904. PyObject *tag, *attrib, *text, *tail, *children;
  905. PyObject *retval;
  906. tag = attrib = text = tail = children = NULL;
  907. args = PyTuple_New(0);
  908. if (!args)
  909. return NULL;
  910. if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
  911. &attrib, &text, &tail, &children))
  912. retval = element_setstate_from_attributes(st, self, tag, attrib, text,
  913. tail, children);
  914. else
  915. retval = NULL;
  916. Py_DECREF(args);
  917. return retval;
  918. }
  919. /*[clinic input]
  920. _elementtree.Element.__setstate__
  921. cls: defining_class
  922. state: object
  923. /
  924. [clinic start generated code]*/
  925. static PyObject *
  926. _elementtree_Element___setstate___impl(ElementObject *self,
  927. PyTypeObject *cls, PyObject *state)
  928. /*[clinic end generated code: output=598bfb5730f71509 input=13830488d35d51f7]*/
  929. {
  930. if (!PyDict_CheckExact(state)) {
  931. PyErr_Format(PyExc_TypeError,
  932. "Don't know how to unpickle \"%.200R\" as an Element",
  933. state);
  934. return NULL;
  935. }
  936. else {
  937. elementtreestate *st = get_elementtree_state_by_cls(cls);
  938. return element_setstate_from_Python(st, self, state);
  939. }
  940. }
  941. LOCAL(int)
  942. checkpath(PyObject* tag)
  943. {
  944. Py_ssize_t i;
  945. int check = 1;
  946. /* check if a tag contains an xpath character */
  947. #define PATHCHAR(ch) \
  948. (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
  949. if (PyUnicode_Check(tag)) {
  950. const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
  951. const void *data = PyUnicode_DATA(tag);
  952. int kind = PyUnicode_KIND(tag);
  953. if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
  954. PyUnicode_READ(kind, data, 1) == '}' || (
  955. PyUnicode_READ(kind, data, 1) == '*' &&
  956. PyUnicode_READ(kind, data, 2) == '}'))) {
  957. /* wildcard: '{}tag' or '{*}tag' */
  958. return 1;
  959. }
  960. for (i = 0; i < len; i++) {
  961. Py_UCS4 ch = PyUnicode_READ(kind, data, i);
  962. if (ch == '{')
  963. check = 0;
  964. else if (ch == '}')
  965. check = 1;
  966. else if (check && PATHCHAR(ch))
  967. return 1;
  968. }
  969. return 0;
  970. }
  971. if (PyBytes_Check(tag)) {
  972. const char *p = PyBytes_AS_STRING(tag);
  973. const Py_ssize_t len = PyBytes_GET_SIZE(tag);
  974. if (len >= 3 && p[0] == '{' && (
  975. p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
  976. /* wildcard: '{}tag' or '{*}tag' */
  977. return 1;
  978. }
  979. for (i = 0; i < len; i++) {
  980. if (p[i] == '{')
  981. check = 0;
  982. else if (p[i] == '}')
  983. check = 1;
  984. else if (check && PATHCHAR(p[i]))
  985. return 1;
  986. }
  987. return 0;
  988. }
  989. return 1; /* unknown type; might be path expression */
  990. }
  991. /*[clinic input]
  992. _elementtree.Element.extend
  993. cls: defining_class
  994. elements: object
  995. /
  996. [clinic start generated code]*/
  997. static PyObject *
  998. _elementtree_Element_extend_impl(ElementObject *self, PyTypeObject *cls,
  999. PyObject *elements)
  1000. /*[clinic end generated code: output=3e86d37fac542216 input=6479b1b5379d09ae]*/
  1001. {
  1002. PyObject* seq;
  1003. Py_ssize_t i;
  1004. seq = PySequence_Fast(elements, "'elements' must be an iterable");
  1005. if (!seq) {
  1006. return NULL;
  1007. }
  1008. elementtreestate *st = get_elementtree_state_by_cls(cls);
  1009. for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
  1010. PyObject* element = Py_NewRef(PySequence_Fast_GET_ITEM(seq, i));
  1011. if (element_add_subelement(st, self, element) < 0) {
  1012. Py_DECREF(seq);
  1013. Py_DECREF(element);
  1014. return NULL;
  1015. }
  1016. Py_DECREF(element);
  1017. }
  1018. Py_DECREF(seq);
  1019. Py_RETURN_NONE;
  1020. }
  1021. /*[clinic input]
  1022. _elementtree.Element.find
  1023. cls: defining_class
  1024. /
  1025. path: object
  1026. namespaces: object = None
  1027. [clinic start generated code]*/
  1028. static PyObject *
  1029. _elementtree_Element_find_impl(ElementObject *self, PyTypeObject *cls,
  1030. PyObject *path, PyObject *namespaces)
  1031. /*[clinic end generated code: output=18f77d393c9fef1b input=94df8a83f956acc6]*/
  1032. {
  1033. Py_ssize_t i;
  1034. elementtreestate *st = get_elementtree_state_by_cls(cls);
  1035. if (checkpath(path) || namespaces != Py_None) {
  1036. return PyObject_CallMethodObjArgs(
  1037. st->elementpath_obj, st->str_find, self, path, namespaces, NULL
  1038. );
  1039. }
  1040. if (!self->extra)
  1041. Py_RETURN_NONE;
  1042. for (i = 0; i < self->extra->length; i++) {
  1043. PyObject* item = self->extra->children[i];
  1044. int rc;
  1045. assert(Element_Check(st, item));
  1046. Py_INCREF(item);
  1047. rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
  1048. if (rc > 0)
  1049. return item;
  1050. Py_DECREF(item);
  1051. if (rc < 0)
  1052. return NULL;
  1053. }
  1054. Py_RETURN_NONE;
  1055. }
  1056. /*[clinic input]
  1057. _elementtree.Element.findtext
  1058. cls: defining_class
  1059. /
  1060. path: object
  1061. default: object = None
  1062. namespaces: object = None
  1063. [clinic start generated code]*/
  1064. static PyObject *
  1065. _elementtree_Element_findtext_impl(ElementObject *self, PyTypeObject *cls,
  1066. PyObject *path, PyObject *default_value,
  1067. PyObject *namespaces)
  1068. /*[clinic end generated code: output=6af7a2d96aac32cb input=32f252099f62a3d2]*/
  1069. {
  1070. Py_ssize_t i;
  1071. elementtreestate *st = get_elementtree_state_by_cls(cls);
  1072. if (checkpath(path) || namespaces != Py_None)
  1073. return PyObject_CallMethodObjArgs(
  1074. st->elementpath_obj, st->str_findtext,
  1075. self, path, default_value, namespaces, NULL
  1076. );
  1077. if (!self->extra) {
  1078. return Py_NewRef(default_value);
  1079. }
  1080. for (i = 0; i < self->extra->length; i++) {
  1081. PyObject *item = self->extra->children[i];
  1082. int rc;
  1083. assert(Element_Check(st, item));
  1084. Py_INCREF(item);
  1085. rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
  1086. if (rc > 0) {
  1087. PyObject* text = element_get_text((ElementObject*)item);
  1088. if (text == Py_None) {
  1089. Py_DECREF(item);
  1090. return PyUnicode_New(0, 0);
  1091. }
  1092. Py_XINCREF(text);
  1093. Py_DECREF(item);
  1094. return text;
  1095. }
  1096. Py_DECREF(item);
  1097. if (rc < 0)
  1098. return NULL;
  1099. }
  1100. return Py_NewRef(default_value);
  1101. }
  1102. /*[clinic input]
  1103. _elementtree.Element.findall
  1104. cls: defining_class
  1105. /
  1106. path: object
  1107. namespaces: object = None
  1108. [clinic start generated code]*/
  1109. static PyObject *
  1110. _elementtree_Element_findall_impl(ElementObject *self, PyTypeObject *cls,
  1111. PyObject *path, PyObject *namespaces)
  1112. /*[clinic end generated code: output=65e39a1208f3b59e input=7aa0db45673fc9a5]*/
  1113. {
  1114. Py_ssize_t i;
  1115. PyObject* out;
  1116. elementtreestate *st = get_elementtree_state_by_cls(cls);
  1117. if (checkpath(path) || namespaces != Py_None) {
  1118. return PyObject_CallMethodObjArgs(
  1119. st->elementpath_obj, st->str_findall, self, path, namespaces, NULL
  1120. );
  1121. }
  1122. out = PyList_New(0);
  1123. if (!out)
  1124. return NULL;
  1125. if (!self->extra)
  1126. return out;
  1127. for (i = 0; i < self->extra->length; i++) {
  1128. PyObject* item = self->extra->children[i];
  1129. int rc;
  1130. assert(Element_Check(st, item));
  1131. Py_INCREF(item);
  1132. rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
  1133. if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
  1134. Py_DECREF(item);
  1135. Py_DECREF(out);
  1136. return NULL;
  1137. }
  1138. Py_DECREF(item);
  1139. }
  1140. return out;
  1141. }
  1142. /*[clinic input]
  1143. _elementtree.Element.iterfind
  1144. cls: defining_class
  1145. /
  1146. path: object
  1147. namespaces: object = None
  1148. [clinic start generated code]*/
  1149. static PyObject *
  1150. _elementtree_Element_iterfind_impl(ElementObject *self, PyTypeObject *cls,
  1151. PyObject *path, PyObject *namespaces)
  1152. /*[clinic end generated code: output=be5c3f697a14e676 input=88766875a5c9a88b]*/
  1153. {
  1154. PyObject* tag = path;
  1155. elementtreestate *st = get_elementtree_state_by_cls(cls);
  1156. return PyObject_CallMethodObjArgs(
  1157. st->elementpath_obj, st->str_iterfind, self, tag, namespaces, NULL);
  1158. }
  1159. /*[clinic input]
  1160. _elementtree.Element.get
  1161. key: object
  1162. default: object = None
  1163. [clinic start generated code]*/
  1164. static PyObject *
  1165. _elementtree_Element_get_impl(ElementObject *self, PyObject *key,
  1166. PyObject *default_value)
  1167. /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
  1168. {
  1169. if (self->extra && self->extra->attrib) {
  1170. PyObject *attrib = Py_NewRef(self->extra->attrib);
  1171. PyObject *value = Py_XNewRef(PyDict_GetItemWithError(attrib, key));
  1172. Py_DECREF(attrib);
  1173. if (value != NULL || PyErr_Occurred()) {
  1174. return value;
  1175. }
  1176. }
  1177. return Py_NewRef(default_value);
  1178. }
  1179. static PyObject *
  1180. create_elementiter(elementtreestate *st, ElementObject *self, PyObject *tag,
  1181. int gettext);
  1182. /*[clinic input]
  1183. _elementtree.Element.iter
  1184. cls: defining_class
  1185. /
  1186. tag: object = None
  1187. [clinic start generated code]*/
  1188. static PyObject *
  1189. _elementtree_Element_iter_impl(ElementObject *self, PyTypeObject *cls,
  1190. PyObject *tag)
  1191. /*[clinic end generated code: output=bff29dc5d4566c68 input=f6944c48d3f84c58]*/
  1192. {
  1193. if (PyUnicode_Check(tag)) {
  1194. if (PyUnicode_READY(tag) < 0)
  1195. return NULL;
  1196. if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
  1197. tag = Py_None;
  1198. }
  1199. else if (PyBytes_Check(tag)) {
  1200. if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
  1201. tag = Py_None;
  1202. }
  1203. elementtreestate *st = get_elementtree_state_by_cls(cls);
  1204. return create_elementiter(st, self, tag, 0);
  1205. }
  1206. /*[clinic input]
  1207. _elementtree.Element.itertext
  1208. cls: defining_class
  1209. /
  1210. [clinic start generated code]*/
  1211. static PyObject *
  1212. _elementtree_Element_itertext_impl(ElementObject *self, PyTypeObject *cls)
  1213. /*[clinic end generated code: output=fdeb2a3bca0ae063 input=a1ef1f0fc872a586]*/
  1214. {
  1215. elementtreestate *st = get_elementtree_state_by_cls(cls);
  1216. return create_elementiter(st, self, Py_None, 1);
  1217. }
  1218. static PyObject*
  1219. element_getitem(PyObject* self_, Py_ssize_t index)
  1220. {
  1221. ElementObject* self = (ElementObject*) self_;
  1222. if (!self->extra || index < 0 || index >= self->extra->length) {
  1223. PyErr_SetString(
  1224. PyExc_IndexError,
  1225. "child index out of range"
  1226. );
  1227. return NULL;
  1228. }
  1229. return Py_NewRef(self->extra->children[index]);
  1230. }
  1231. static int
  1232. element_bool(PyObject* self_)
  1233. {
  1234. ElementObject* self = (ElementObject*) self_;
  1235. if (PyErr_WarnEx(PyExc_DeprecationWarning,
  1236. "Testing an element's truth value will always return True "
  1237. "in future versions. Use specific 'len(elem)' or "
  1238. "'elem is not None' test instead.",
  1239. 1) < 0) {
  1240. return -1;
  1241. };
  1242. if (self->extra ? self->extra->length : 0) {
  1243. return 1;
  1244. }
  1245. return 0;
  1246. }
  1247. /*[clinic input]
  1248. _elementtree.Element.insert
  1249. index: Py_ssize_t
  1250. subelement: object(subclass_of='clinic_state()->Element_Type')
  1251. /
  1252. [clinic start generated code]*/
  1253. static PyObject *
  1254. _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
  1255. PyObject *subelement)
  1256. /*[clinic end generated code: output=990adfef4d424c0b input=9530f4905aa401ca]*/
  1257. {
  1258. Py_ssize_t i;
  1259. if (!self->extra) {
  1260. if (create_extra(self, NULL) < 0)
  1261. return NULL;
  1262. }
  1263. if (index < 0) {
  1264. index += self->extra->length;
  1265. if (index < 0)
  1266. index = 0;
  1267. }
  1268. if (index > self->extra->length)
  1269. index = self->extra->length;
  1270. if (element_resize(self, 1) < 0)
  1271. return NULL;
  1272. for (i = self->extra->length; i > index; i--)
  1273. self->extra->children[i] = self->extra->children[i-1];
  1274. self->extra->children[index] = Py_NewRef(subelement);
  1275. self->extra->length++;
  1276. Py_RETURN_NONE;
  1277. }
  1278. /*[clinic input]
  1279. _elementtree.Element.items
  1280. [clinic start generated code]*/
  1281. static PyObject *
  1282. _elementtree_Element_items_impl(ElementObject *self)
  1283. /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
  1284. {
  1285. if (!self->extra || !self->extra->attrib)
  1286. return PyList_New(0);
  1287. return PyDict_Items(self->extra->attrib);
  1288. }
  1289. /*[clinic input]
  1290. _elementtree.Element.keys
  1291. [clinic start generated code]*/
  1292. static PyObject *
  1293. _elementtree_Element_keys_impl(ElementObject *self)
  1294. /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
  1295. {
  1296. if (!self->extra || !self->extra->attrib)
  1297. return PyList_New(0);
  1298. return PyDict_Keys(self->extra->attrib);
  1299. }
  1300. static Py_ssize_t
  1301. element_length(ElementObject* self)
  1302. {
  1303. if (!self->extra)
  1304. return 0;
  1305. return self->extra->length;
  1306. }
  1307. /*[clinic input]
  1308. _elementtree.Element.makeelement
  1309. cls: defining_class
  1310. tag: object
  1311. attrib: object(subclass_of='&PyDict_Type')
  1312. /
  1313. [clinic start generated code]*/
  1314. static PyObject *
  1315. _elementtree_Element_makeelement_impl(ElementObject *self, PyTypeObject *cls,
  1316. PyObject *tag, PyObject *attrib)
  1317. /*[clinic end generated code: output=d50bb17a47077d47 input=589829dab92f26e8]*/
  1318. {
  1319. PyObject* elem;
  1320. attrib = PyDict_Copy(attrib);
  1321. if (!attrib)
  1322. return NULL;
  1323. elementtreestate *st = get_elementtree_state_by_cls(cls);
  1324. elem = create_new_element(st, tag, attrib);
  1325. Py_DECREF(attrib);
  1326. return elem;
  1327. }
  1328. /*[clinic input]
  1329. _elementtree.Element.remove
  1330. subelement: object(subclass_of='clinic_state()->Element_Type')
  1331. /
  1332. [clinic start generated code]*/
  1333. static PyObject *
  1334. _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
  1335. /*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/
  1336. {
  1337. Py_ssize_t i;
  1338. int rc;
  1339. PyObject *found;
  1340. if (!self->extra) {
  1341. /* element has no children, so raise exception */
  1342. PyErr_SetString(
  1343. PyExc_ValueError,
  1344. "list.remove(x): x not in list"
  1345. );
  1346. return NULL;
  1347. }
  1348. for (i = 0; i < self->extra->length; i++) {
  1349. if (self->extra->children[i] == subelement)
  1350. break;
  1351. rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
  1352. if (rc > 0)
  1353. break;
  1354. if (rc < 0)
  1355. return NULL;
  1356. }
  1357. if (i >= self->extra->length) {
  1358. /* subelement is not in children, so raise exception */
  1359. PyErr_SetString(
  1360. PyExc_ValueError,
  1361. "list.remove(x): x not in list"
  1362. );
  1363. return NULL;
  1364. }
  1365. found = self->extra->children[i];
  1366. self->extra->length--;
  1367. for (; i < self->extra->length; i++)
  1368. self->extra->children[i] = self->extra->children[i+1];
  1369. Py_DECREF(found);
  1370. Py_RETURN_NONE;
  1371. }
  1372. static PyObject*
  1373. element_repr(ElementObject* self)
  1374. {
  1375. int status;
  1376. if (self->tag == NULL)
  1377. return PyUnicode_FromFormat("<Element at %p>", self);
  1378. status = Py_ReprEnter((PyObject *)self);
  1379. if (status == 0) {
  1380. PyObject *res;
  1381. res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
  1382. Py_ReprLeave((PyObject *)self);
  1383. return res;
  1384. }
  1385. if (status > 0)
  1386. PyErr_Format(PyExc_RuntimeError,
  1387. "reentrant call inside %s.__repr__",
  1388. Py_TYPE(self)->tp_name);
  1389. return NULL;
  1390. }
  1391. /*[clinic input]
  1392. _elementtree.Element.set
  1393. key: object
  1394. value: object
  1395. /
  1396. [clinic start generated code]*/
  1397. static PyObject *
  1398. _elementtree_Element_set_impl(ElementObject *self, PyObject *key,
  1399. PyObject *value)
  1400. /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
  1401. {
  1402. PyObject* attrib;
  1403. if (!self->extra) {
  1404. if (create_extra(self, NULL) < 0)
  1405. return NULL;
  1406. }
  1407. attrib = element_get_attrib(self);
  1408. if (!attrib)
  1409. return NULL;
  1410. if (PyDict_SetItem(attrib, key, value) < 0)
  1411. return NULL;
  1412. Py_RETURN_NONE;
  1413. }
  1414. static int
  1415. element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
  1416. {
  1417. ElementObject* self = (ElementObject*) self_;
  1418. Py_ssize_t i;
  1419. PyObject* old;
  1420. if (!self->extra || index < 0 || index >= self->extra->length) {
  1421. PyErr_SetString(
  1422. PyExc_IndexError,
  1423. "child assignment index out of range");
  1424. return -1;
  1425. }
  1426. old = self->extra->children[index];
  1427. if (item) {
  1428. PyTypeObject *tp = Py_TYPE(self);
  1429. elementtreestate *st = get_elementtree_state_by_type(tp);
  1430. if (!Element_Check(st, item)) {
  1431. raise_type_error(item);
  1432. return -1;
  1433. }
  1434. self->extra->children[index] = Py_NewRef(item);
  1435. } else {
  1436. self->extra->length--;
  1437. for (i = index; i < self->extra->length; i++)
  1438. self->extra->children[i] = self->extra->children[i+1];
  1439. }
  1440. Py_DECREF(old);
  1441. return 0;
  1442. }
  1443. static PyObject*
  1444. element_subscr(PyObject* self_, PyObject* item)
  1445. {
  1446. ElementObject* self = (ElementObject*) self_;
  1447. if (PyIndex_Check(item)) {
  1448. Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
  1449. if (i == -1 && PyErr_Occurred()) {
  1450. return NULL;
  1451. }
  1452. if (i < 0 && self->extra)
  1453. i += self->extra->length;
  1454. return element_getitem(self_, i);
  1455. }
  1456. else if (PySlice_Check(item)) {
  1457. Py_ssize_t start, stop, step, slicelen, i;
  1458. size_t cur;
  1459. PyObject* list;
  1460. if (!self->extra)
  1461. return PyList_New(0);
  1462. if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
  1463. return NULL;
  1464. }
  1465. slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
  1466. step);
  1467. if (slicelen <= 0)
  1468. return PyList_New(0);
  1469. else {
  1470. list = PyList_New(slicelen);
  1471. if (!list)
  1472. return NULL;
  1473. for (cur = start, i = 0; i < slicelen;
  1474. cur += step, i++) {
  1475. PyObject* item = Py_NewRef(self->extra->children[cur]);
  1476. PyList_SET_ITEM(list, i, item);
  1477. }
  1478. return list;
  1479. }
  1480. }
  1481. else {
  1482. PyErr_SetString(PyExc_TypeError,
  1483. "element indices must be integers");
  1484. return NULL;
  1485. }
  1486. }
  1487. static int
  1488. element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
  1489. {
  1490. ElementObject* self = (ElementObject*) self_;
  1491. if (PyIndex_Check(item)) {
  1492. Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
  1493. if (i == -1 && PyErr_Occurred()) {
  1494. return -1;
  1495. }
  1496. if (i < 0 && self->extra)
  1497. i += self->extra->length;
  1498. return element_setitem(self_, i, value);
  1499. }
  1500. else if (PySlice_Check(item)) {
  1501. Py_ssize_t start, stop, step, slicelen, newlen, i;
  1502. size_t cur;
  1503. PyObject* recycle = NULL;
  1504. PyObject* seq;
  1505. if (!self->extra) {
  1506. if (create_extra(self, NULL) < 0)
  1507. return -1;
  1508. }
  1509. if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
  1510. return -1;
  1511. }
  1512. slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
  1513. step);
  1514. if (value == NULL) {
  1515. /* Delete slice */
  1516. size_t cur;
  1517. Py_ssize_t i;
  1518. if (slicelen <= 0)
  1519. return 0;
  1520. /* Since we're deleting, the direction of the range doesn't matter,
  1521. * so for simplicity make it always ascending.
  1522. */
  1523. if (step < 0) {
  1524. stop = start + 1;
  1525. start = stop + step * (slicelen - 1) - 1;
  1526. step = -step;
  1527. }
  1528. assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
  1529. /* recycle is a list that will contain all the children
  1530. * scheduled for removal.
  1531. */
  1532. if (!(recycle = PyList_New(slicelen))) {
  1533. return -1;
  1534. }
  1535. /* This loop walks over all the children that have to be deleted,
  1536. * with cur pointing at them. num_moved is the amount of children
  1537. * until the next deleted child that have to be "shifted down" to
  1538. * occupy the deleted's places.
  1539. * Note that in the ith iteration, shifting is done i+i places down
  1540. * because i children were already removed.
  1541. */
  1542. for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
  1543. /* Compute how many children have to be moved, clipping at the
  1544. * list end.
  1545. */
  1546. Py_ssize_t num_moved = step - 1;
  1547. if (cur + step >= (size_t)self->extra->length) {
  1548. num_moved = self->extra->length - cur - 1;
  1549. }
  1550. PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
  1551. memmove(
  1552. self->extra->children + cur - i,
  1553. self->extra->children + cur + 1,
  1554. num_moved * sizeof(PyObject *));
  1555. }
  1556. /* Leftover "tail" after the last removed child */
  1557. cur = start + (size_t)slicelen * step;
  1558. if (cur < (size_t)self->extra->length) {
  1559. memmove(
  1560. self->extra->children + cur - slicelen,
  1561. self->extra->children + cur,
  1562. (self->extra->length - cur) * sizeof(PyObject *));
  1563. }
  1564. self->extra->length -= slicelen;
  1565. /* Discard the recycle list with all the deleted sub-elements */
  1566. Py_DECREF(recycle);
  1567. return 0;
  1568. }
  1569. /* A new slice is actually being assigned */
  1570. seq = PySequence_Fast(value, "assignment expects an iterable");
  1571. if (!seq) {
  1572. return -1;
  1573. }
  1574. newlen = PySequence_Fast_GET_SIZE(seq);
  1575. if (step != 1 && newlen != slicelen)
  1576. {
  1577. Py_DECREF(seq);
  1578. PyErr_Format(PyExc_ValueError,
  1579. "attempt to assign sequence of size %zd "
  1580. "to extended slice of size %zd",
  1581. newlen, slicelen
  1582. );
  1583. return -1;
  1584. }
  1585. /* Resize before creating the recycle bin, to prevent refleaks. */
  1586. if (newlen > slicelen) {
  1587. if (element_resize(self, newlen - slicelen) < 0) {
  1588. Py_DECREF(seq);
  1589. return -1;
  1590. }
  1591. }
  1592. PyTypeObject *tp = Py_TYPE(self);
  1593. elementtreestate *st = get_elementtree_state_by_type(tp);
  1594. for (i = 0; i < newlen; i++) {
  1595. PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
  1596. if (!Element_Check(st, element)) {
  1597. raise_type_error(element);
  1598. Py_DECREF(seq);
  1599. return -1;
  1600. }
  1601. }
  1602. if (slicelen > 0) {
  1603. /* to avoid recursive calls to this method (via decref), move
  1604. old items to the recycle bin here, and get rid of them when
  1605. we're done modifying the element */
  1606. recycle = PyList_New(slicelen);
  1607. if (!recycle) {
  1608. Py_DECREF(seq);
  1609. return -1;
  1610. }
  1611. for (cur = start, i = 0; i < slicelen;
  1612. cur += step, i++)
  1613. PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
  1614. }
  1615. if (newlen < slicelen) {
  1616. /* delete slice */
  1617. for (i = stop; i < self->extra->length; i++)
  1618. self->extra->children[i + newlen - slicelen] = self->extra->children[i];
  1619. } else if (newlen > slicelen) {
  1620. /* insert slice */
  1621. for (i = self->extra->length-1; i >= stop; i--)
  1622. self->extra->children[i + newlen - slicelen] = self->extra->children[i];
  1623. }
  1624. /* replace the slice */
  1625. for (cur = start, i = 0; i < newlen;
  1626. cur += step, i++) {
  1627. PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
  1628. self->extra->children[cur] = Py_NewRef(element);
  1629. }
  1630. self->extra->length += newlen - slicelen;
  1631. Py_DECREF(seq);
  1632. /* discard the recycle bin, and everything in it */
  1633. Py_XDECREF(recycle);
  1634. return 0;
  1635. }
  1636. else {
  1637. PyErr_SetString(PyExc_TypeError,
  1638. "element indices must be integers");
  1639. return -1;
  1640. }
  1641. }
  1642. static PyObject*
  1643. element_tag_getter(ElementObject *self, void *closure)
  1644. {
  1645. PyObject *res = self->tag;
  1646. return Py_NewRef(res);
  1647. }
  1648. static PyObject*
  1649. element_text_getter(ElementObject *self, void *closure)
  1650. {
  1651. PyObject *res = element_get_text(self);
  1652. return Py_XNewRef(res);
  1653. }
  1654. static PyObject*
  1655. element_tail_getter(ElementObject *self, void *closure)
  1656. {
  1657. PyObject *res = element_get_tail(self);
  1658. return Py_XNewRef(res);
  1659. }
  1660. static PyObject*
  1661. element_attrib_getter(ElementObject *self, void *closure)
  1662. {
  1663. PyObject *res;
  1664. if (!self->extra) {
  1665. if (create_extra(self, NULL) < 0)
  1666. return NULL;
  1667. }
  1668. res = element_get_attrib(self);
  1669. return Py_XNewRef(res);
  1670. }
  1671. /* macro for setter validation */
  1672. #define _VALIDATE_ATTR_VALUE(V) \
  1673. if ((V) == NULL) { \
  1674. PyErr_SetString( \
  1675. PyExc_AttributeError, \
  1676. "can't delete element attribute"); \
  1677. return -1; \
  1678. }
  1679. static int
  1680. element_tag_setter(ElementObject *self, PyObject *value, void *closure)
  1681. {
  1682. _VALIDATE_ATTR_VALUE(value);
  1683. Py_SETREF(self->tag, Py_NewRef(value));
  1684. return 0;
  1685. }
  1686. static int
  1687. element_text_setter(ElementObject *self, PyObject *value, void *closure)
  1688. {
  1689. _VALIDATE_ATTR_VALUE(value);
  1690. _set_joined_ptr(&self->text, Py_NewRef(value));
  1691. return 0;
  1692. }
  1693. static int
  1694. element_tail_setter(ElementObject *self, PyObject *value, void *closure)
  1695. {
  1696. _VALIDATE_ATTR_VALUE(value);
  1697. _set_joined_ptr(&self->tail, Py_NewRef(value));
  1698. return 0;
  1699. }
  1700. static int
  1701. element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
  1702. {
  1703. _VALIDATE_ATTR_VALUE(value);
  1704. if (!PyDict_Check(value)) {
  1705. PyErr_Format(PyExc_TypeError,
  1706. "attrib must be dict, not %.200s",
  1707. Py_TYPE(value)->tp_name);
  1708. return -1;
  1709. }
  1710. if (!self->extra) {
  1711. if (create_extra(self, NULL) < 0)
  1712. return -1;
  1713. }
  1714. Py_XSETREF(self->extra->attrib, Py_NewRef(value));
  1715. return 0;
  1716. }
  1717. /******************************* Element iterator ****************************/
  1718. /* ElementIterObject represents the iteration state over an XML element in
  1719. * pre-order traversal. To keep track of which sub-element should be returned
  1720. * next, a stack of parents is maintained. This is a standard stack-based
  1721. * iterative pre-order traversal of a tree.
  1722. * The stack is managed using a continuous array.
  1723. * Each stack item contains the saved parent to which we should return after
  1724. * the current one is exhausted, and the next child to examine in that parent.
  1725. */
  1726. typedef struct ParentLocator_t {
  1727. ElementObject *parent;
  1728. Py_ssize_t child_index;
  1729. } ParentLocator;
  1730. typedef struct {
  1731. PyObject_HEAD
  1732. ParentLocator *parent_stack;
  1733. Py_ssize_t parent_stack_used;
  1734. Py_ssize_t parent_stack_size;
  1735. ElementObject *root_element;
  1736. PyObject *sought_tag;
  1737. int gettext;
  1738. } ElementIterObject;
  1739. static void
  1740. elementiter_dealloc(ElementIterObject *it)
  1741. {
  1742. PyTypeObject *tp = Py_TYPE(it);
  1743. Py_ssize_t i = it->parent_stack_used;
  1744. it->parent_stack_used = 0;
  1745. /* bpo-31095: UnTrack is needed before calling any callbacks */
  1746. PyObject_GC_UnTrack(it);
  1747. while (i--)
  1748. Py_XDECREF(it->parent_stack[i].parent);
  1749. PyMem_Free(it->parent_stack);
  1750. Py_XDECREF(it->sought_tag);
  1751. Py_XDECREF(it->root_element);
  1752. tp->tp_free(it);
  1753. Py_DECREF(tp);
  1754. }
  1755. static int
  1756. elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
  1757. {
  1758. Py_ssize_t i = it->parent_stack_used;
  1759. while (i--)
  1760. Py_VISIT(it->parent_stack[i].parent);
  1761. Py_VISIT(it->root_element);
  1762. Py_VISIT(it->sought_tag);
  1763. Py_VISIT(Py_TYPE(it));
  1764. return 0;
  1765. }
  1766. /* Helper function for elementiter_next. Add a new parent to the parent stack.
  1767. */
  1768. static int
  1769. parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
  1770. {
  1771. ParentLocator *item;
  1772. if (it->parent_stack_used >= it->parent_stack_size) {
  1773. Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
  1774. ParentLocator *parent_stack = it->parent_stack;
  1775. PyMem_Resize(parent_stack, ParentLocator, new_size);
  1776. if (parent_stack == NULL)
  1777. return -1;
  1778. it->parent_stack = parent_stack;
  1779. it->parent_stack_size = new_size;
  1780. }
  1781. item = it->parent_stack + it->parent_stack_used++;
  1782. item->parent = (ElementObject*)Py_NewRef(parent);
  1783. item->child_index = 0;
  1784. return 0;
  1785. }
  1786. static PyObject *
  1787. elementiter_next(ElementIterObject *it)
  1788. {
  1789. /* Sub-element iterator.
  1790. *
  1791. * A short note on gettext: this function serves both the iter() and
  1792. * itertext() methods to avoid code duplication. However, there are a few
  1793. * small differences in the way these iterations work. Namely:
  1794. * - itertext() only yields text from nodes that have it, and continues
  1795. * iterating when a node doesn't have text (so it doesn't return any
  1796. * node like iter())
  1797. * - itertext() also has to handle tail, after finishing with all the
  1798. * children of a node.
  1799. */
  1800. int rc;
  1801. ElementObject *elem;
  1802. PyObject *text;
  1803. while (1) {
  1804. /* Handle the case reached in the beginning and end of iteration, where
  1805. * the parent stack is empty. If root_element is NULL and we're here, the
  1806. * iterator is exhausted.
  1807. */
  1808. if (!it->parent_stack_used) {
  1809. if (!it->root_element) {
  1810. PyErr_SetNone(PyExc_StopIteration);
  1811. return NULL;
  1812. }
  1813. elem = it->root_element; /* steals a reference */
  1814. it->root_element = NULL;
  1815. }
  1816. else {
  1817. /* See if there are children left to traverse in the current parent. If
  1818. * yes, visit the next child. If not, pop the stack and try again.
  1819. */
  1820. ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
  1821. Py_ssize_t child_index = item->child_index;
  1822. ElementObjectExtra *extra;
  1823. elem = item->parent;
  1824. extra = elem->extra;
  1825. if (!extra || child_index >= extra->length) {
  1826. it->parent_stack_used--;
  1827. /* Note that extra condition on it->parent_stack_used here;
  1828. * this is because itertext() is supposed to only return *inner*
  1829. * text, not text following the element it began iteration with.
  1830. */
  1831. if (it->gettext && it->parent_stack_used) {
  1832. text = element_get_tail(elem);
  1833. goto gettext;
  1834. }
  1835. Py_DECREF(elem);
  1836. continue;
  1837. }
  1838. #ifndef NDEBUG
  1839. PyTypeObject *tp = Py_TYPE(it);
  1840. elementtreestate *st = get_elementtree_state_by_type(tp);
  1841. assert(Element_Check(st, extra->children[child_index]));
  1842. #endif
  1843. elem = (ElementObject *)Py_NewRef(extra->children[child_index]);
  1844. item->child_index++;
  1845. }
  1846. if (parent_stack_push_new(it, elem) < 0) {
  1847. Py_DECREF(elem);
  1848. PyErr_NoMemory();
  1849. return NULL;
  1850. }
  1851. if (it->gettext) {
  1852. text = element_get_text(elem);
  1853. goto gettext;
  1854. }
  1855. if (it->sought_tag == Py_None)
  1856. return (PyObject *)elem;
  1857. rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
  1858. if (rc > 0)
  1859. return (PyObject *)elem;
  1860. Py_DECREF(elem);
  1861. if (rc < 0)
  1862. return NULL;
  1863. continue;
  1864. gettext:
  1865. if (!text) {
  1866. Py_DECREF(elem);
  1867. return NULL;
  1868. }
  1869. if (text == Py_None) {
  1870. Py_DECREF(elem);
  1871. }
  1872. else {
  1873. Py_INCREF(text);
  1874. Py_DECREF(elem);
  1875. rc = PyObject_IsTrue(text);
  1876. if (rc > 0)
  1877. return text;
  1878. Py_DECREF(text);
  1879. if (rc < 0)
  1880. return NULL;
  1881. }
  1882. }
  1883. return NULL;
  1884. }
  1885. static PyType_Slot elementiter_slots[] = {
  1886. {Py_tp_dealloc, elementiter_dealloc},
  1887. {Py_tp_traverse, elementiter_traverse},
  1888. {Py_tp_iter, PyObject_SelfIter},
  1889. {Py_tp_iternext, elementiter_next},
  1890. {0, NULL},
  1891. };
  1892. static PyType_Spec elementiter_spec = {
  1893. /* Using the module's name since the pure-Python implementation does not
  1894. have such a type. */
  1895. .name = "_elementtree._element_iterator",
  1896. .basicsize = sizeof(ElementIterObject),
  1897. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
  1898. Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
  1899. .slots = elementiter_slots,
  1900. };
  1901. #define INIT_PARENT_STACK_SIZE 8
  1902. static PyObject *
  1903. create_elementiter(elementtreestate *st, ElementObject *self, PyObject *tag,
  1904. int gettext)
  1905. {
  1906. ElementIterObject *it;
  1907. it = PyObject_GC_New(ElementIterObject, st->ElementIter_Type);
  1908. if (!it)
  1909. return NULL;
  1910. it->sought_tag = Py_NewRef(tag);
  1911. it->gettext = gettext;
  1912. it->root_element = (ElementObject*)Py_NewRef(self);
  1913. it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
  1914. if (it->parent_stack == NULL) {
  1915. Py_DECREF(it);
  1916. PyErr_NoMemory();
  1917. return NULL;
  1918. }
  1919. it->parent_stack_used = 0;
  1920. it->parent_stack_size = INIT_PARENT_STACK_SIZE;
  1921. PyObject_GC_Track(it);
  1922. return (PyObject *)it;
  1923. }
  1924. /* ==================================================================== */
  1925. /* the tree builder type */
  1926. typedef struct {
  1927. PyObject_HEAD
  1928. PyObject *root; /* root node (first created node) */
  1929. PyObject *this; /* current node */
  1930. PyObject *last; /* most recently created node */
  1931. PyObject *last_for_tail; /* most recently created node that takes a tail */
  1932. PyObject *data; /* data collector (string or list), or NULL */
  1933. PyObject *stack; /* element stack */
  1934. Py_ssize_t index; /* current stack size (0 means empty) */
  1935. PyObject *element_factory;
  1936. PyObject *comment_factory;
  1937. PyObject *pi_factory;
  1938. /* element tracing */
  1939. PyObject *events_append; /* the append method of the list of events, or NULL */
  1940. PyObject *start_event_obj; /* event objects (NULL to ignore) */
  1941. PyObject *end_event_obj;
  1942. PyObject *start_ns_event_obj;
  1943. PyObject *end_ns_event_obj;
  1944. PyObject *comment_event_obj;
  1945. PyObject *pi_event_obj;
  1946. char insert_comments;
  1947. char insert_pis;
  1948. elementtreestate *state;
  1949. } TreeBuilderObject;
  1950. #define TreeBuilder_CheckExact(st, op) Py_IS_TYPE((op), (st)->TreeBuilder_Type)
  1951. /* -------------------------------------------------------------------- */
  1952. /* constructor and destructor */
  1953. static PyObject *
  1954. treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  1955. {
  1956. TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
  1957. if (t != NULL) {
  1958. t->root = NULL;
  1959. t->this = Py_NewRef(Py_None);
  1960. t->last = Py_NewRef(Py_None);
  1961. t->data = NULL;
  1962. t->element_factory = NULL;
  1963. t->comment_factory = NULL;
  1964. t->pi_factory = NULL;
  1965. t->stack = PyList_New(20);
  1966. if (!t->stack) {
  1967. Py_DECREF(t->this);
  1968. Py_DECREF(t->last);
  1969. Py_DECREF((PyObject *) t);
  1970. return NULL;
  1971. }
  1972. t->index = 0;
  1973. t->events_append = NULL;
  1974. t->start_event_obj = t->end_event_obj = NULL;
  1975. t->start_ns_event_obj = t->end_ns_event_obj = NULL;
  1976. t->comment_event_obj = t->pi_event_obj = NULL;
  1977. t->insert_comments = t->insert_pis = 0;
  1978. t->state = get_elementtree_state_by_type(type);
  1979. }
  1980. return (PyObject *)t;
  1981. }
  1982. /*[clinic input]
  1983. _elementtree.TreeBuilder.__init__
  1984. element_factory: object = None
  1985. *
  1986. comment_factory: object = None
  1987. pi_factory: object = None
  1988. insert_comments: bool = False
  1989. insert_pis: bool = False
  1990. [clinic start generated code]*/
  1991. static int
  1992. _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
  1993. PyObject *element_factory,
  1994. PyObject *comment_factory,
  1995. PyObject *pi_factory,
  1996. int insert_comments, int insert_pis)
  1997. /*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
  1998. {
  1999. if (element_factory != Py_None) {
  2000. Py_XSETREF(self->element_factory, Py_NewRef(element_factory));
  2001. } else {
  2002. Py_CLEAR(self->element_factory);
  2003. }
  2004. if (comment_factory == Py_None) {
  2005. elementtreestate *st = self->state;
  2006. comment_factory = st->comment_factory;
  2007. }
  2008. if (comment_factory) {
  2009. Py_XSETREF(self->comment_factory, Py_NewRef(comment_factory));
  2010. self->insert_comments = insert_comments;
  2011. } else {
  2012. Py_CLEAR(self->comment_factory);
  2013. self->insert_comments = 0;
  2014. }
  2015. if (pi_factory == Py_None) {
  2016. elementtreestate *st = self->state;
  2017. pi_factory = st->pi_factory;
  2018. }
  2019. if (pi_factory) {
  2020. Py_XSETREF(self->pi_factory, Py_NewRef(pi_factory));
  2021. self->insert_pis = insert_pis;
  2022. } else {
  2023. Py_CLEAR(self->pi_factory);
  2024. self->insert_pis = 0;
  2025. }
  2026. return 0;
  2027. }
  2028. static int
  2029. treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
  2030. {
  2031. Py_VISIT(Py_TYPE(self));
  2032. Py_VISIT(self->pi_event_obj);
  2033. Py_VISIT(self->comment_event_obj);
  2034. Py_VISIT(self->end_ns_event_obj);
  2035. Py_VISIT(self->start_ns_event_obj);
  2036. Py_VISIT(self->end_event_obj);
  2037. Py_VISIT(self->start_event_obj);
  2038. Py_VISIT(self->events_append);
  2039. Py_VISIT(self->root);
  2040. Py_VISIT(self->this);
  2041. Py_VISIT(self->last);
  2042. Py_VISIT(self->last_for_tail);
  2043. Py_VISIT(self->data);
  2044. Py_VISIT(self->stack);
  2045. Py_VISIT(self->pi_factory);
  2046. Py_VISIT(self->comment_factory);
  2047. Py_VISIT(self->element_factory);
  2048. return 0;
  2049. }
  2050. static int
  2051. treebuilder_gc_clear(TreeBuilderObject *self)
  2052. {
  2053. Py_CLEAR(self->pi_event_obj);
  2054. Py_CLEAR(self->comment_event_obj);
  2055. Py_CLEAR(self->end_ns_event_obj);
  2056. Py_CLEAR(self->start_ns_event_obj);
  2057. Py_CLEAR(self->end_event_obj);
  2058. Py_CLEAR(self->start_event_obj);
  2059. Py_CLEAR(self->events_append);
  2060. Py_CLEAR(self->stack);
  2061. Py_CLEAR(self->data);
  2062. Py_CLEAR(self->last);
  2063. Py_CLEAR(self->last_for_tail);
  2064. Py_CLEAR(self->this);
  2065. Py_CLEAR(self->pi_factory);
  2066. Py_CLEAR(self->comment_factory);
  2067. Py_CLEAR(self->element_factory);
  2068. Py_CLEAR(self->root);
  2069. return 0;
  2070. }
  2071. static void
  2072. treebuilder_dealloc(TreeBuilderObject *self)
  2073. {
  2074. PyTypeObject *tp = Py_TYPE(self);
  2075. PyObject_GC_UnTrack(self);
  2076. treebuilder_gc_clear(self);
  2077. tp->tp_free(self);
  2078. Py_DECREF(tp);
  2079. }
  2080. /* -------------------------------------------------------------------- */
  2081. /* helpers for handling of arbitrary element-like objects */
  2082. /*[clinic input]
  2083. _elementtree._set_factories
  2084. comment_factory: object
  2085. pi_factory: object
  2086. /
  2087. Change the factories used to create comments and processing instructions.
  2088. For internal use only.
  2089. [clinic start generated code]*/
  2090. static PyObject *
  2091. _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
  2092. PyObject *pi_factory)
  2093. /*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
  2094. {
  2095. elementtreestate *st = get_elementtree_state(module);
  2096. PyObject *old;
  2097. if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
  2098. PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
  2099. Py_TYPE(comment_factory)->tp_name);
  2100. return NULL;
  2101. }
  2102. if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
  2103. PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
  2104. Py_TYPE(pi_factory)->tp_name);
  2105. return NULL;
  2106. }
  2107. old = PyTuple_Pack(2,
  2108. st->comment_factory ? st->comment_factory : Py_None,
  2109. st->pi_factory ? st->pi_factory : Py_None);
  2110. if (comment_factory == Py_None) {
  2111. Py_CLEAR(st->comment_factory);
  2112. } else {
  2113. Py_XSETREF(st->comment_factory, Py_NewRef(comment_factory));
  2114. }
  2115. if (pi_factory == Py_None) {
  2116. Py_CLEAR(st->pi_factory);
  2117. } else {
  2118. Py_XSETREF(st->pi_factory, Py_NewRef(pi_factory));
  2119. }
  2120. return old;
  2121. }
  2122. static int
  2123. treebuilder_extend_element_text_or_tail(elementtreestate *st, PyObject *element,
  2124. PyObject **data, PyObject **dest,
  2125. PyObject *name)
  2126. {
  2127. /* Fast paths for the "almost always" cases. */
  2128. if (Element_CheckExact(st, element)) {
  2129. PyObject *dest_obj = JOIN_OBJ(*dest);
  2130. if (dest_obj == Py_None) {
  2131. *dest = JOIN_SET(*data, PyList_CheckExact(*data));
  2132. *data = NULL;
  2133. Py_DECREF(dest_obj);
  2134. return 0;
  2135. }
  2136. else if (JOIN_GET(*dest)) {
  2137. if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
  2138. return -1;
  2139. }
  2140. Py_CLEAR(*data);
  2141. return 0;
  2142. }
  2143. }
  2144. /* Fallback for the non-Element / non-trivial cases. */
  2145. {
  2146. int r;
  2147. PyObject* joined;
  2148. PyObject* previous = PyObject_GetAttr(element, name);
  2149. if (!previous)
  2150. return -1;
  2151. joined = list_join(*data);
  2152. if (!joined) {
  2153. Py_DECREF(previous);
  2154. return -1;
  2155. }
  2156. if (previous != Py_None) {
  2157. PyObject *tmp = PyNumber_Add(previous, joined);
  2158. Py_DECREF(joined);
  2159. Py_DECREF(previous);
  2160. if (!tmp)
  2161. return -1;
  2162. joined = tmp;
  2163. } else {
  2164. Py_DECREF(previous);
  2165. }
  2166. r = PyObject_SetAttr(element, name, joined);
  2167. Py_DECREF(joined);
  2168. if (r < 0)
  2169. return -1;
  2170. Py_CLEAR(*data);
  2171. return 0;
  2172. }
  2173. }
  2174. LOCAL(int)
  2175. treebuilder_flush_data(TreeBuilderObject* self)
  2176. {
  2177. if (!self->data) {
  2178. return 0;
  2179. }
  2180. elementtreestate *st = self->state;
  2181. if (!self->last_for_tail) {
  2182. PyObject *element = self->last;
  2183. return treebuilder_extend_element_text_or_tail(
  2184. st, element, &self->data,
  2185. &((ElementObject *) element)->text, st->str_text);
  2186. }
  2187. else {
  2188. PyObject *element = self->last_for_tail;
  2189. return treebuilder_extend_element_text_or_tail(
  2190. st, element, &self->data,
  2191. &((ElementObject *) element)->tail, st->str_tail);
  2192. }
  2193. }
  2194. static int
  2195. treebuilder_add_subelement(elementtreestate *st, PyObject *element,
  2196. PyObject *child)
  2197. {
  2198. if (Element_CheckExact(st, element)) {
  2199. ElementObject *elem = (ElementObject *) element;
  2200. return element_add_subelement(st, elem, child);
  2201. }
  2202. else {
  2203. PyObject *res;
  2204. res = PyObject_CallMethodOneArg(element, st->str_append, child);
  2205. if (res == NULL)
  2206. return -1;
  2207. Py_DECREF(res);
  2208. return 0;
  2209. }
  2210. }
  2211. LOCAL(int)
  2212. treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
  2213. PyObject *node)
  2214. {
  2215. if (action != NULL) {
  2216. PyObject *res;
  2217. PyObject *event = PyTuple_Pack(2, action, node);
  2218. if (event == NULL)
  2219. return -1;
  2220. res = PyObject_CallOneArg(self->events_append, event);
  2221. Py_DECREF(event);
  2222. if (res == NULL)
  2223. return -1;
  2224. Py_DECREF(res);
  2225. }
  2226. return 0;
  2227. }
  2228. /* -------------------------------------------------------------------- */
  2229. /* handlers */
  2230. LOCAL(PyObject*)
  2231. treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
  2232. PyObject* attrib)
  2233. {
  2234. PyObject* node;
  2235. PyObject* this;
  2236. elementtreestate *st = self->state;
  2237. if (treebuilder_flush_data(self) < 0) {
  2238. return NULL;
  2239. }
  2240. if (!self->element_factory) {
  2241. node = create_new_element(st, tag, attrib);
  2242. }
  2243. else if (attrib == NULL) {
  2244. attrib = PyDict_New();
  2245. if (!attrib)
  2246. return NULL;
  2247. node = PyObject_CallFunctionObjArgs(self->element_factory,
  2248. tag, attrib, NULL);
  2249. Py_DECREF(attrib);
  2250. }
  2251. else {
  2252. node = PyObject_CallFunctionObjArgs(self->element_factory,
  2253. tag, attrib, NULL);
  2254. }
  2255. if (!node) {
  2256. return NULL;
  2257. }
  2258. this = self->this;
  2259. Py_CLEAR(self->last_for_tail);
  2260. if (this != Py_None) {
  2261. if (treebuilder_add_subelement(st, this, node) < 0) {
  2262. goto error;
  2263. }
  2264. } else {
  2265. if (self->root) {
  2266. PyErr_SetString(
  2267. st->parseerror_obj,
  2268. "multiple elements on top level"
  2269. );
  2270. goto error;
  2271. }
  2272. self->root = Py_NewRef(node);
  2273. }
  2274. if (self->index < PyList_GET_SIZE(self->stack)) {
  2275. if (PyList_SetItem(self->stack, self->index, this) < 0)
  2276. goto error;
  2277. Py_INCREF(this);
  2278. } else {
  2279. if (PyList_Append(self->stack, this) < 0)
  2280. goto error;
  2281. }
  2282. self->index++;
  2283. Py_SETREF(self->this, Py_NewRef(node));
  2284. Py_SETREF(self->last, Py_NewRef(node));
  2285. if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
  2286. goto error;
  2287. return node;
  2288. error:
  2289. Py_DECREF(node);
  2290. return NULL;
  2291. }
  2292. LOCAL(PyObject*)
  2293. treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
  2294. {
  2295. if (!self->data) {
  2296. if (self->last == Py_None) {
  2297. /* ignore calls to data before the first call to start */
  2298. Py_RETURN_NONE;
  2299. }
  2300. /* store the first item as is */
  2301. self->data = Py_NewRef(data);
  2302. } else {
  2303. /* more than one item; use a list to collect items */
  2304. if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
  2305. PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
  2306. /* XXX this code path unused in Python 3? */
  2307. /* expat often generates single character data sections; handle
  2308. the most common case by resizing the existing string... */
  2309. Py_ssize_t size = PyBytes_GET_SIZE(self->data);
  2310. if (_PyBytes_Resize(&self->data, size + 1) < 0)
  2311. return NULL;
  2312. PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
  2313. } else if (PyList_CheckExact(self->data)) {
  2314. if (PyList_Append(self->data, data) < 0)
  2315. return NULL;
  2316. } else {
  2317. PyObject* list = PyList_New(2);
  2318. if (!list)
  2319. return NULL;
  2320. PyList_SET_ITEM(list, 0, Py_NewRef(self->data));
  2321. PyList_SET_ITEM(list, 1, Py_NewRef(data));
  2322. Py_SETREF(self->data, list);
  2323. }
  2324. }
  2325. Py_RETURN_NONE;
  2326. }
  2327. LOCAL(PyObject*)
  2328. treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
  2329. {
  2330. PyObject* item;
  2331. if (treebuilder_flush_data(self) < 0) {
  2332. return NULL;
  2333. }
  2334. if (self->index == 0) {
  2335. PyErr_SetString(
  2336. PyExc_IndexError,
  2337. "pop from empty stack"
  2338. );
  2339. return NULL;
  2340. }
  2341. item = self->last;
  2342. self->last = Py_NewRef(self->this);
  2343. Py_XSETREF(self->last_for_tail, self->last);
  2344. self->index--;
  2345. self->this = Py_NewRef(PyList_GET_ITEM(self->stack, self->index));
  2346. Py_DECREF(item);
  2347. if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
  2348. return NULL;
  2349. return Py_NewRef(self->last);
  2350. }
  2351. LOCAL(PyObject*)
  2352. treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
  2353. {
  2354. PyObject* comment;
  2355. PyObject* this;
  2356. if (treebuilder_flush_data(self) < 0) {
  2357. return NULL;
  2358. }
  2359. if (self->comment_factory) {
  2360. comment = PyObject_CallOneArg(self->comment_factory, text);
  2361. if (!comment)
  2362. return NULL;
  2363. this = self->this;
  2364. if (self->insert_comments && this != Py_None) {
  2365. if (treebuilder_add_subelement(self->state, this, comment) < 0) {
  2366. goto error;
  2367. }
  2368. Py_XSETREF(self->last_for_tail, Py_NewRef(comment));
  2369. }
  2370. } else {
  2371. comment = Py_NewRef(text);
  2372. }
  2373. if (self->events_append && self->comment_event_obj) {
  2374. if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
  2375. goto error;
  2376. }
  2377. return comment;
  2378. error:
  2379. Py_DECREF(comment);
  2380. return NULL;
  2381. }
  2382. LOCAL(PyObject*)
  2383. treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
  2384. {
  2385. PyObject* pi;
  2386. PyObject* this;
  2387. PyObject* stack[2] = {target, text};
  2388. if (treebuilder_flush_data(self) < 0) {
  2389. return NULL;
  2390. }
  2391. if (self->pi_factory) {
  2392. pi = _PyObject_FastCall(self->pi_factory, stack, 2);
  2393. if (!pi) {
  2394. return NULL;
  2395. }
  2396. this = self->this;
  2397. if (self->insert_pis && this != Py_None) {
  2398. if (treebuilder_add_subelement(self->state, this, pi) < 0) {
  2399. goto error;
  2400. }
  2401. Py_XSETREF(self->last_for_tail, Py_NewRef(pi));
  2402. }
  2403. } else {
  2404. pi = PyTuple_Pack(2, target, text);
  2405. if (!pi) {
  2406. return NULL;
  2407. }
  2408. }
  2409. if (self->events_append && self->pi_event_obj) {
  2410. if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
  2411. goto error;
  2412. }
  2413. return pi;
  2414. error:
  2415. Py_DECREF(pi);
  2416. return NULL;
  2417. }
  2418. LOCAL(PyObject*)
  2419. treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
  2420. {
  2421. PyObject* parcel;
  2422. if (self->events_append && self->start_ns_event_obj) {
  2423. parcel = PyTuple_Pack(2, prefix, uri);
  2424. if (!parcel) {
  2425. return NULL;
  2426. }
  2427. if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
  2428. Py_DECREF(parcel);
  2429. return NULL;
  2430. }
  2431. Py_DECREF(parcel);
  2432. }
  2433. Py_RETURN_NONE;
  2434. }
  2435. LOCAL(PyObject*)
  2436. treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
  2437. {
  2438. if (self->events_append && self->end_ns_event_obj) {
  2439. if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
  2440. return NULL;
  2441. }
  2442. }
  2443. Py_RETURN_NONE;
  2444. }
  2445. /* -------------------------------------------------------------------- */
  2446. /* methods (in alphabetical order) */
  2447. /*[clinic input]
  2448. _elementtree.TreeBuilder.data
  2449. data: object
  2450. /
  2451. [clinic start generated code]*/
  2452. static PyObject *
  2453. _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
  2454. /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
  2455. {
  2456. return treebuilder_handle_data(self, data);
  2457. }
  2458. /*[clinic input]
  2459. _elementtree.TreeBuilder.end
  2460. tag: object
  2461. /
  2462. [clinic start generated code]*/
  2463. static PyObject *
  2464. _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
  2465. /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
  2466. {
  2467. return treebuilder_handle_end(self, tag);
  2468. }
  2469. /*[clinic input]
  2470. _elementtree.TreeBuilder.comment
  2471. text: object
  2472. /
  2473. [clinic start generated code]*/
  2474. static PyObject *
  2475. _elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
  2476. /*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
  2477. {
  2478. return treebuilder_handle_comment(self, text);
  2479. }
  2480. /*[clinic input]
  2481. _elementtree.TreeBuilder.pi
  2482. target: object
  2483. text: object = None
  2484. /
  2485. [clinic start generated code]*/
  2486. static PyObject *
  2487. _elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
  2488. PyObject *text)
  2489. /*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
  2490. {
  2491. return treebuilder_handle_pi(self, target, text);
  2492. }
  2493. LOCAL(PyObject*)
  2494. treebuilder_done(TreeBuilderObject* self)
  2495. {
  2496. PyObject* res;
  2497. /* FIXME: check stack size? */
  2498. if (self->root)
  2499. res = self->root;
  2500. else
  2501. res = Py_None;
  2502. return Py_NewRef(res);
  2503. }
  2504. /*[clinic input]
  2505. _elementtree.TreeBuilder.close
  2506. [clinic start generated code]*/
  2507. static PyObject *
  2508. _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
  2509. /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
  2510. {
  2511. return treebuilder_done(self);
  2512. }
  2513. /*[clinic input]
  2514. _elementtree.TreeBuilder.start
  2515. tag: object
  2516. attrs: object(subclass_of='&PyDict_Type')
  2517. /
  2518. [clinic start generated code]*/
  2519. static PyObject *
  2520. _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
  2521. PyObject *attrs)
  2522. /*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
  2523. {
  2524. return treebuilder_handle_start(self, tag, attrs);
  2525. }
  2526. /* ==================================================================== */
  2527. /* the expat interface */
  2528. #define EXPAT(st, func) ((st)->expat_capi->func)
  2529. static XML_Memory_Handling_Suite ExpatMemoryHandler = {
  2530. PyObject_Malloc, PyObject_Realloc, PyObject_Free};
  2531. typedef struct {
  2532. PyObject_HEAD
  2533. XML_Parser parser;
  2534. PyObject *target;
  2535. PyObject *entity;
  2536. PyObject *names;
  2537. PyObject *handle_start_ns;
  2538. PyObject *handle_end_ns;
  2539. PyObject *handle_start;
  2540. PyObject *handle_data;
  2541. PyObject *handle_end;
  2542. PyObject *handle_comment;
  2543. PyObject *handle_pi;
  2544. PyObject *handle_doctype;
  2545. PyObject *handle_close;
  2546. elementtreestate *state;
  2547. PyObject *elementtree_module;
  2548. } XMLParserObject;
  2549. /* helpers */
  2550. LOCAL(PyObject*)
  2551. makeuniversal(XMLParserObject* self, const char* string)
  2552. {
  2553. /* convert a UTF-8 tag/attribute name from the expat parser
  2554. to a universal name string */
  2555. Py_ssize_t size = (Py_ssize_t) strlen(string);
  2556. PyObject* key;
  2557. PyObject* value;
  2558. /* look the 'raw' name up in the names dictionary */
  2559. key = PyBytes_FromStringAndSize(string, size);
  2560. if (!key)
  2561. return NULL;
  2562. value = Py_XNewRef(PyDict_GetItemWithError(self->names, key));
  2563. if (value == NULL && !PyErr_Occurred()) {
  2564. /* new name. convert to universal name, and decode as
  2565. necessary */
  2566. PyObject* tag;
  2567. char* p;
  2568. Py_ssize_t i;
  2569. /* look for namespace separator */
  2570. for (i = 0; i < size; i++)
  2571. if (string[i] == '}')
  2572. break;
  2573. if (i != size) {
  2574. /* convert to universal name */
  2575. tag = PyBytes_FromStringAndSize(NULL, size+1);
  2576. if (tag == NULL) {
  2577. Py_DECREF(key);
  2578. return NULL;
  2579. }
  2580. p = PyBytes_AS_STRING(tag);
  2581. p[0] = '{';
  2582. memcpy(p+1, string, size);
  2583. size++;
  2584. } else {
  2585. /* plain name; use key as tag */
  2586. tag = Py_NewRef(key);
  2587. }
  2588. /* decode universal name */
  2589. p = PyBytes_AS_STRING(tag);
  2590. value = PyUnicode_DecodeUTF8(p, size, "strict");
  2591. Py_DECREF(tag);
  2592. if (!value) {
  2593. Py_DECREF(key);
  2594. return NULL;
  2595. }
  2596. /* add to names dictionary */
  2597. if (PyDict_SetItem(self->names, key, value) < 0) {
  2598. Py_DECREF(key);
  2599. Py_DECREF(value);
  2600. return NULL;
  2601. }
  2602. }
  2603. Py_DECREF(key);
  2604. return value;
  2605. }
  2606. /* Set the ParseError exception with the given parameters.
  2607. * If message is not NULL, it's used as the error string. Otherwise, the
  2608. * message string is the default for the given error_code.
  2609. */
  2610. static void
  2611. expat_set_error(elementtreestate *st, enum XML_Error error_code,
  2612. Py_ssize_t line, Py_ssize_t column, const char *message)
  2613. {
  2614. PyObject *errmsg, *error, *position, *code;
  2615. errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
  2616. message ? message : EXPAT(st, ErrorString)(error_code),
  2617. line, column);
  2618. if (errmsg == NULL)
  2619. return;
  2620. error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
  2621. Py_DECREF(errmsg);
  2622. if (!error)
  2623. return;
  2624. /* Add code and position attributes */
  2625. code = PyLong_FromLong((long)error_code);
  2626. if (!code) {
  2627. Py_DECREF(error);
  2628. return;
  2629. }
  2630. if (PyObject_SetAttrString(error, "code", code) == -1) {
  2631. Py_DECREF(error);
  2632. Py_DECREF(code);
  2633. return;
  2634. }
  2635. Py_DECREF(code);
  2636. position = Py_BuildValue("(nn)", line, column);
  2637. if (!position) {
  2638. Py_DECREF(error);
  2639. return;
  2640. }
  2641. if (PyObject_SetAttrString(error, "position", position) == -1) {
  2642. Py_DECREF(error);
  2643. Py_DECREF(position);
  2644. return;
  2645. }
  2646. Py_DECREF(position);
  2647. PyErr_SetObject(st->parseerror_obj, error);
  2648. Py_DECREF(error);
  2649. }
  2650. /* -------------------------------------------------------------------- */
  2651. /* handlers */
  2652. static void
  2653. expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
  2654. int data_len)
  2655. {
  2656. PyObject* key;
  2657. PyObject* value;
  2658. PyObject* res;
  2659. if (data_len < 2 || data_in[0] != '&')
  2660. return;
  2661. if (PyErr_Occurred())
  2662. return;
  2663. key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
  2664. if (!key)
  2665. return;
  2666. value = PyDict_GetItemWithError(self->entity, key);
  2667. elementtreestate *st = self->state;
  2668. if (value) {
  2669. if (TreeBuilder_CheckExact(st, self->target))
  2670. res = treebuilder_handle_data(
  2671. (TreeBuilderObject*) self->target, value
  2672. );
  2673. else if (self->handle_data)
  2674. res = PyObject_CallOneArg(self->handle_data, value);
  2675. else
  2676. res = NULL;
  2677. Py_XDECREF(res);
  2678. } else if (!PyErr_Occurred()) {
  2679. /* Report the first error, not the last */
  2680. char message[128] = "undefined entity ";
  2681. strncat(message, data_in, data_len < 100?data_len:100);
  2682. expat_set_error(
  2683. st,
  2684. XML_ERROR_UNDEFINED_ENTITY,
  2685. EXPAT(st, GetErrorLineNumber)(self->parser),
  2686. EXPAT(st, GetErrorColumnNumber)(self->parser),
  2687. message
  2688. );
  2689. }
  2690. Py_DECREF(key);
  2691. }
  2692. static void
  2693. expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
  2694. const XML_Char **attrib_in)
  2695. {
  2696. PyObject* res;
  2697. PyObject* tag;
  2698. PyObject* attrib;
  2699. int ok;
  2700. if (PyErr_Occurred())
  2701. return;
  2702. /* tag name */
  2703. tag = makeuniversal(self, tag_in);
  2704. if (!tag)
  2705. return; /* parser will look for errors */
  2706. /* attributes */
  2707. if (attrib_in[0]) {
  2708. attrib = PyDict_New();
  2709. if (!attrib) {
  2710. Py_DECREF(tag);
  2711. return;
  2712. }
  2713. while (attrib_in[0] && attrib_in[1]) {
  2714. PyObject* key = makeuniversal(self, attrib_in[0]);
  2715. if (key == NULL) {
  2716. Py_DECREF(attrib);
  2717. Py_DECREF(tag);
  2718. return;
  2719. }
  2720. PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
  2721. if (value == NULL) {
  2722. Py_DECREF(key);
  2723. Py_DECREF(attrib);
  2724. Py_DECREF(tag);
  2725. return;
  2726. }
  2727. ok = PyDict_SetItem(attrib, key, value);
  2728. Py_DECREF(value);
  2729. Py_DECREF(key);
  2730. if (ok < 0) {
  2731. Py_DECREF(attrib);
  2732. Py_DECREF(tag);
  2733. return;
  2734. }
  2735. attrib_in += 2;
  2736. }
  2737. } else {
  2738. attrib = NULL;
  2739. }
  2740. elementtreestate *st = self->state;
  2741. if (TreeBuilder_CheckExact(st, self->target)) {
  2742. /* shortcut */
  2743. res = treebuilder_handle_start((TreeBuilderObject*) self->target,
  2744. tag, attrib);
  2745. }
  2746. else if (self->handle_start) {
  2747. if (attrib == NULL) {
  2748. attrib = PyDict_New();
  2749. if (!attrib) {
  2750. Py_DECREF(tag);
  2751. return;
  2752. }
  2753. }
  2754. res = PyObject_CallFunctionObjArgs(self->handle_start,
  2755. tag, attrib, NULL);
  2756. } else
  2757. res = NULL;
  2758. Py_DECREF(tag);
  2759. Py_XDECREF(attrib);
  2760. Py_XDECREF(res);
  2761. }
  2762. static void
  2763. expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
  2764. int data_len)
  2765. {
  2766. PyObject* data;
  2767. PyObject* res;
  2768. if (PyErr_Occurred())
  2769. return;
  2770. data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
  2771. if (!data)
  2772. return; /* parser will look for errors */
  2773. elementtreestate *st = self->state;
  2774. if (TreeBuilder_CheckExact(st, self->target))
  2775. /* shortcut */
  2776. res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
  2777. else if (self->handle_data)
  2778. res = PyObject_CallOneArg(self->handle_data, data);
  2779. else
  2780. res = NULL;
  2781. Py_DECREF(data);
  2782. Py_XDECREF(res);
  2783. }
  2784. static void
  2785. expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
  2786. {
  2787. PyObject* tag;
  2788. PyObject* res = NULL;
  2789. if (PyErr_Occurred())
  2790. return;
  2791. elementtreestate *st = self->state;
  2792. if (TreeBuilder_CheckExact(st, self->target))
  2793. /* shortcut */
  2794. /* the standard tree builder doesn't look at the end tag */
  2795. res = treebuilder_handle_end(
  2796. (TreeBuilderObject*) self->target, Py_None
  2797. );
  2798. else if (self->handle_end) {
  2799. tag = makeuniversal(self, tag_in);
  2800. if (tag) {
  2801. res = PyObject_CallOneArg(self->handle_end, tag);
  2802. Py_DECREF(tag);
  2803. }
  2804. }
  2805. Py_XDECREF(res);
  2806. }
  2807. static void
  2808. expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
  2809. const XML_Char *uri_in)
  2810. {
  2811. PyObject* res = NULL;
  2812. PyObject* uri;
  2813. PyObject* prefix;
  2814. PyObject* stack[2];
  2815. if (PyErr_Occurred())
  2816. return;
  2817. if (!uri_in)
  2818. uri_in = "";
  2819. if (!prefix_in)
  2820. prefix_in = "";
  2821. elementtreestate *st = self->state;
  2822. if (TreeBuilder_CheckExact(st, self->target)) {
  2823. /* shortcut - TreeBuilder does not actually implement .start_ns() */
  2824. TreeBuilderObject *target = (TreeBuilderObject*) self->target;
  2825. if (target->events_append && target->start_ns_event_obj) {
  2826. prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
  2827. if (!prefix)
  2828. return;
  2829. uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
  2830. if (!uri) {
  2831. Py_DECREF(prefix);
  2832. return;
  2833. }
  2834. res = treebuilder_handle_start_ns(target, prefix, uri);
  2835. Py_DECREF(uri);
  2836. Py_DECREF(prefix);
  2837. }
  2838. } else if (self->handle_start_ns) {
  2839. prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
  2840. if (!prefix)
  2841. return;
  2842. uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
  2843. if (!uri) {
  2844. Py_DECREF(prefix);
  2845. return;
  2846. }
  2847. stack[0] = prefix;
  2848. stack[1] = uri;
  2849. res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
  2850. Py_DECREF(uri);
  2851. Py_DECREF(prefix);
  2852. }
  2853. Py_XDECREF(res);
  2854. }
  2855. static void
  2856. expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
  2857. {
  2858. PyObject *res = NULL;
  2859. PyObject* prefix;
  2860. if (PyErr_Occurred())
  2861. return;
  2862. if (!prefix_in)
  2863. prefix_in = "";
  2864. elementtreestate *st = self->state;
  2865. if (TreeBuilder_CheckExact(st, self->target)) {
  2866. /* shortcut - TreeBuilder does not actually implement .end_ns() */
  2867. TreeBuilderObject *target = (TreeBuilderObject*) self->target;
  2868. if (target->events_append && target->end_ns_event_obj) {
  2869. res = treebuilder_handle_end_ns(target, Py_None);
  2870. }
  2871. } else if (self->handle_end_ns) {
  2872. prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
  2873. if (!prefix)
  2874. return;
  2875. res = PyObject_CallOneArg(self->handle_end_ns, prefix);
  2876. Py_DECREF(prefix);
  2877. }
  2878. Py_XDECREF(res);
  2879. }
  2880. static void
  2881. expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
  2882. {
  2883. PyObject* comment;
  2884. PyObject* res;
  2885. if (PyErr_Occurred())
  2886. return;
  2887. elementtreestate *st = self->state;
  2888. if (TreeBuilder_CheckExact(st, self->target)) {
  2889. /* shortcut */
  2890. TreeBuilderObject *target = (TreeBuilderObject*) self->target;
  2891. comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
  2892. if (!comment)
  2893. return; /* parser will look for errors */
  2894. res = treebuilder_handle_comment(target, comment);
  2895. Py_XDECREF(res);
  2896. Py_DECREF(comment);
  2897. } else if (self->handle_comment) {
  2898. comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
  2899. if (!comment)
  2900. return;
  2901. res = PyObject_CallOneArg(self->handle_comment, comment);
  2902. Py_XDECREF(res);
  2903. Py_DECREF(comment);
  2904. }
  2905. }
  2906. static void
  2907. expat_start_doctype_handler(XMLParserObject *self,
  2908. const XML_Char *doctype_name,
  2909. const XML_Char *sysid,
  2910. const XML_Char *pubid,
  2911. int has_internal_subset)
  2912. {
  2913. PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
  2914. PyObject *res;
  2915. if (PyErr_Occurred())
  2916. return;
  2917. doctype_name_obj = makeuniversal(self, doctype_name);
  2918. if (!doctype_name_obj)
  2919. return;
  2920. if (sysid) {
  2921. sysid_obj = makeuniversal(self, sysid);
  2922. if (!sysid_obj) {
  2923. Py_DECREF(doctype_name_obj);
  2924. return;
  2925. }
  2926. } else {
  2927. sysid_obj = Py_NewRef(Py_None);
  2928. }
  2929. if (pubid) {
  2930. pubid_obj = makeuniversal(self, pubid);
  2931. if (!pubid_obj) {
  2932. Py_DECREF(doctype_name_obj);
  2933. Py_DECREF(sysid_obj);
  2934. return;
  2935. }
  2936. } else {
  2937. pubid_obj = Py_NewRef(Py_None);
  2938. }
  2939. elementtreestate *st = self->state;
  2940. /* If the target has a handler for doctype, call it. */
  2941. if (self->handle_doctype) {
  2942. res = PyObject_CallFunctionObjArgs(self->handle_doctype,
  2943. doctype_name_obj, pubid_obj,
  2944. sysid_obj, NULL);
  2945. Py_XDECREF(res);
  2946. }
  2947. else if (_PyObject_LookupAttr((PyObject *)self, st->str_doctype, &res) > 0) {
  2948. (void)PyErr_WarnEx(PyExc_RuntimeWarning,
  2949. "The doctype() method of XMLParser is ignored. "
  2950. "Define doctype() method on the TreeBuilder target.",
  2951. 1);
  2952. Py_DECREF(res);
  2953. }
  2954. Py_DECREF(doctype_name_obj);
  2955. Py_DECREF(pubid_obj);
  2956. Py_DECREF(sysid_obj);
  2957. }
  2958. static void
  2959. expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
  2960. const XML_Char* data_in)
  2961. {
  2962. PyObject* pi_target;
  2963. PyObject* data;
  2964. PyObject* res;
  2965. PyObject* stack[2];
  2966. if (PyErr_Occurred())
  2967. return;
  2968. elementtreestate *st = self->state;
  2969. if (TreeBuilder_CheckExact(st, self->target)) {
  2970. /* shortcut */
  2971. TreeBuilderObject *target = (TreeBuilderObject*) self->target;
  2972. if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
  2973. pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
  2974. if (!pi_target)
  2975. goto error;
  2976. data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
  2977. if (!data)
  2978. goto error;
  2979. res = treebuilder_handle_pi(target, pi_target, data);
  2980. Py_XDECREF(res);
  2981. Py_DECREF(data);
  2982. Py_DECREF(pi_target);
  2983. }
  2984. } else if (self->handle_pi) {
  2985. pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
  2986. if (!pi_target)
  2987. goto error;
  2988. data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
  2989. if (!data)
  2990. goto error;
  2991. stack[0] = pi_target;
  2992. stack[1] = data;
  2993. res = _PyObject_FastCall(self->handle_pi, stack, 2);
  2994. Py_XDECREF(res);
  2995. Py_DECREF(data);
  2996. Py_DECREF(pi_target);
  2997. }
  2998. return;
  2999. error:
  3000. Py_XDECREF(pi_target);
  3001. return;
  3002. }
  3003. /* -------------------------------------------------------------------- */
  3004. static PyObject *
  3005. xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  3006. {
  3007. XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
  3008. if (self) {
  3009. self->parser = NULL;
  3010. self->target = self->entity = self->names = NULL;
  3011. self->handle_start_ns = self->handle_end_ns = NULL;
  3012. self->handle_start = self->handle_data = self->handle_end = NULL;
  3013. self->handle_comment = self->handle_pi = self->handle_close = NULL;
  3014. self->handle_doctype = NULL;
  3015. self->elementtree_module = PyType_GetModuleByDef(type, &elementtreemodule);
  3016. assert(self->elementtree_module != NULL);
  3017. Py_INCREF(self->elementtree_module);
  3018. // See gh-111784 for explanation why is reference to module needed here.
  3019. self->state = get_elementtree_state(self->elementtree_module);
  3020. }
  3021. return (PyObject *)self;
  3022. }
  3023. static int
  3024. ignore_attribute_error(PyObject *value)
  3025. {
  3026. if (value == NULL) {
  3027. if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
  3028. return -1;
  3029. }
  3030. PyErr_Clear();
  3031. }
  3032. return 0;
  3033. }
  3034. /*[clinic input]
  3035. _elementtree.XMLParser.__init__
  3036. *
  3037. target: object = None
  3038. encoding: str(accept={str, NoneType}) = None
  3039. [clinic start generated code]*/
  3040. static int
  3041. _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
  3042. const char *encoding)
  3043. /*[clinic end generated code: output=3ae45ec6cdf344e4 input=7e716dd6e4f3e439]*/
  3044. {
  3045. self->entity = PyDict_New();
  3046. if (!self->entity)
  3047. return -1;
  3048. self->names = PyDict_New();
  3049. if (!self->names) {
  3050. Py_CLEAR(self->entity);
  3051. return -1;
  3052. }
  3053. elementtreestate *st = self->state;
  3054. self->parser = EXPAT(st, ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
  3055. if (!self->parser) {
  3056. Py_CLEAR(self->entity);
  3057. Py_CLEAR(self->names);
  3058. PyErr_NoMemory();
  3059. return -1;
  3060. }
  3061. /* expat < 2.1.0 has no XML_SetHashSalt() */
  3062. if (EXPAT(st, SetHashSalt) != NULL) {
  3063. EXPAT(st, SetHashSalt)(self->parser,
  3064. (unsigned long)_Py_HashSecret.expat.hashsalt);
  3065. }
  3066. if (target != Py_None) {
  3067. Py_INCREF(target);
  3068. } else {
  3069. target = treebuilder_new(st->TreeBuilder_Type, NULL, NULL);
  3070. if (!target) {
  3071. Py_CLEAR(self->entity);
  3072. Py_CLEAR(self->names);
  3073. return -1;
  3074. }
  3075. }
  3076. self->target = target;
  3077. self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
  3078. if (ignore_attribute_error(self->handle_start_ns)) {
  3079. return -1;
  3080. }
  3081. self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
  3082. if (ignore_attribute_error(self->handle_end_ns)) {
  3083. return -1;
  3084. }
  3085. self->handle_start = PyObject_GetAttrString(target, "start");
  3086. if (ignore_attribute_error(self->handle_start)) {
  3087. return -1;
  3088. }
  3089. self->handle_data = PyObject_GetAttrString(target, "data");
  3090. if (ignore_attribute_error(self->handle_data)) {
  3091. return -1;
  3092. }
  3093. self->handle_end = PyObject_GetAttrString(target, "end");
  3094. if (ignore_attribute_error(self->handle_end)) {
  3095. return -1;
  3096. }
  3097. self->handle_comment = PyObject_GetAttrString(target, "comment");
  3098. if (ignore_attribute_error(self->handle_comment)) {
  3099. return -1;
  3100. }
  3101. self->handle_pi = PyObject_GetAttrString(target, "pi");
  3102. if (ignore_attribute_error(self->handle_pi)) {
  3103. return -1;
  3104. }
  3105. self->handle_close = PyObject_GetAttrString(target, "close");
  3106. if (ignore_attribute_error(self->handle_close)) {
  3107. return -1;
  3108. }
  3109. self->handle_doctype = PyObject_GetAttrString(target, "doctype");
  3110. if (ignore_attribute_error(self->handle_doctype)) {
  3111. return -1;
  3112. }
  3113. /* configure parser */
  3114. EXPAT(st, SetUserData)(self->parser, self);
  3115. if (self->handle_start_ns || self->handle_end_ns)
  3116. EXPAT(st, SetNamespaceDeclHandler)(
  3117. self->parser,
  3118. (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
  3119. (XML_EndNamespaceDeclHandler) expat_end_ns_handler
  3120. );
  3121. EXPAT(st, SetElementHandler)(
  3122. self->parser,
  3123. (XML_StartElementHandler) expat_start_handler,
  3124. (XML_EndElementHandler) expat_end_handler
  3125. );
  3126. EXPAT(st, SetDefaultHandlerExpand)(
  3127. self->parser,
  3128. (XML_DefaultHandler) expat_default_handler
  3129. );
  3130. EXPAT(st, SetCharacterDataHandler)(
  3131. self->parser,
  3132. (XML_CharacterDataHandler) expat_data_handler
  3133. );
  3134. if (self->handle_comment)
  3135. EXPAT(st, SetCommentHandler)(
  3136. self->parser,
  3137. (XML_CommentHandler) expat_comment_handler
  3138. );
  3139. if (self->handle_pi)
  3140. EXPAT(st, SetProcessingInstructionHandler)(
  3141. self->parser,
  3142. (XML_ProcessingInstructionHandler) expat_pi_handler
  3143. );
  3144. EXPAT(st, SetStartDoctypeDeclHandler)(
  3145. self->parser,
  3146. (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
  3147. );
  3148. EXPAT(st, SetUnknownEncodingHandler)(
  3149. self->parser,
  3150. EXPAT(st, DefaultUnknownEncodingHandler), NULL
  3151. );
  3152. return 0;
  3153. }
  3154. static int
  3155. xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
  3156. {
  3157. Py_VISIT(Py_TYPE(self));
  3158. Py_VISIT(self->handle_close);
  3159. Py_VISIT(self->handle_pi);
  3160. Py_VISIT(self->handle_comment);
  3161. Py_VISIT(self->handle_end);
  3162. Py_VISIT(self->handle_data);
  3163. Py_VISIT(self->handle_start);
  3164. Py_VISIT(self->handle_start_ns);
  3165. Py_VISIT(self->handle_end_ns);
  3166. Py_VISIT(self->handle_doctype);
  3167. Py_VISIT(self->target);
  3168. Py_VISIT(self->entity);
  3169. Py_VISIT(self->names);
  3170. return 0;
  3171. }
  3172. static int
  3173. xmlparser_gc_clear(XMLParserObject *self)
  3174. {
  3175. elementtreestate *st = self->state;
  3176. if (self->parser != NULL) {
  3177. XML_Parser parser = self->parser;
  3178. self->parser = NULL;
  3179. EXPAT(st, ParserFree)(parser);
  3180. }
  3181. Py_CLEAR(self->elementtree_module);
  3182. Py_CLEAR(self->handle_close);
  3183. Py_CLEAR(self->handle_pi);
  3184. Py_CLEAR(self->handle_comment);
  3185. Py_CLEAR(self->handle_end);
  3186. Py_CLEAR(self->handle_data);
  3187. Py_CLEAR(self->handle_start);
  3188. Py_CLEAR(self->handle_start_ns);
  3189. Py_CLEAR(self->handle_end_ns);
  3190. Py_CLEAR(self->handle_doctype);
  3191. Py_CLEAR(self->target);
  3192. Py_CLEAR(self->entity);
  3193. Py_CLEAR(self->names);
  3194. return 0;
  3195. }
  3196. static void
  3197. xmlparser_dealloc(XMLParserObject* self)
  3198. {
  3199. PyTypeObject *tp = Py_TYPE(self);
  3200. PyObject_GC_UnTrack(self);
  3201. xmlparser_gc_clear(self);
  3202. tp->tp_free(self);
  3203. Py_DECREF(tp);
  3204. }
  3205. Py_LOCAL_INLINE(int)
  3206. _check_xmlparser(XMLParserObject* self)
  3207. {
  3208. if (self->target == NULL) {
  3209. PyErr_SetString(PyExc_ValueError,
  3210. "XMLParser.__init__() wasn't called");
  3211. return 0;
  3212. }
  3213. return 1;
  3214. }
  3215. LOCAL(PyObject*)
  3216. expat_parse(elementtreestate *st, XMLParserObject *self, const char *data,
  3217. int data_len, int final)
  3218. {
  3219. int ok;
  3220. assert(!PyErr_Occurred());
  3221. ok = EXPAT(st, Parse)(self->parser, data, data_len, final);
  3222. if (PyErr_Occurred())
  3223. return NULL;
  3224. if (!ok) {
  3225. expat_set_error(
  3226. st,
  3227. EXPAT(st, GetErrorCode)(self->parser),
  3228. EXPAT(st, GetErrorLineNumber)(self->parser),
  3229. EXPAT(st, GetErrorColumnNumber)(self->parser),
  3230. NULL
  3231. );
  3232. return NULL;
  3233. }
  3234. Py_RETURN_NONE;
  3235. }
  3236. /*[clinic input]
  3237. _elementtree.XMLParser.close
  3238. [clinic start generated code]*/
  3239. static PyObject *
  3240. _elementtree_XMLParser_close_impl(XMLParserObject *self)
  3241. /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
  3242. {
  3243. /* end feeding data to parser */
  3244. PyObject* res;
  3245. if (!_check_xmlparser(self)) {
  3246. return NULL;
  3247. }
  3248. elementtreestate *st = self->state;
  3249. res = expat_parse(st, self, "", 0, 1);
  3250. if (!res)
  3251. return NULL;
  3252. if (TreeBuilder_CheckExact(st, self->target)) {
  3253. Py_DECREF(res);
  3254. return treebuilder_done((TreeBuilderObject*) self->target);
  3255. }
  3256. else if (self->handle_close) {
  3257. Py_DECREF(res);
  3258. return PyObject_CallNoArgs(self->handle_close);
  3259. }
  3260. else {
  3261. return res;
  3262. }
  3263. }
  3264. /*[clinic input]
  3265. _elementtree.XMLParser.flush
  3266. [clinic start generated code]*/
  3267. static PyObject *
  3268. _elementtree_XMLParser_flush_impl(XMLParserObject *self)
  3269. /*[clinic end generated code: output=42fdb8795ca24509 input=effbecdb28715949]*/
  3270. {
  3271. if (!_check_xmlparser(self)) {
  3272. return NULL;
  3273. }
  3274. elementtreestate *st = self->state;
  3275. if (EXPAT(st, SetReparseDeferralEnabled) == NULL) {
  3276. Py_RETURN_NONE;
  3277. }
  3278. // NOTE: The Expat parser in the C implementation of ElementTree is not
  3279. // exposed to the outside; as a result we known that reparse deferral
  3280. // is currently enabled, or we would not even have access to function
  3281. // XML_SetReparseDeferralEnabled in the first place (which we checked
  3282. // for, a few lines up).
  3283. EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_FALSE);
  3284. PyObject *res = expat_parse(st, self, "", 0, XML_FALSE);
  3285. EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_TRUE);
  3286. return res;
  3287. }
  3288. /*[clinic input]
  3289. _elementtree.XMLParser.feed
  3290. data: object
  3291. /
  3292. [clinic start generated code]*/
  3293. static PyObject *
  3294. _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
  3295. /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
  3296. {
  3297. /* feed data to parser */
  3298. if (!_check_xmlparser(self)) {
  3299. return NULL;
  3300. }
  3301. elementtreestate *st = self->state;
  3302. if (PyUnicode_Check(data)) {
  3303. Py_ssize_t data_len;
  3304. const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
  3305. if (data_ptr == NULL)
  3306. return NULL;
  3307. if (data_len > INT_MAX) {
  3308. PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
  3309. return NULL;
  3310. }
  3311. /* Explicitly set UTF-8 encoding. Return code ignored. */
  3312. (void)EXPAT(st, SetEncoding)(self->parser, "utf-8");
  3313. return expat_parse(st, self, data_ptr, (int)data_len, 0);
  3314. }
  3315. else {
  3316. Py_buffer view;
  3317. PyObject *res;
  3318. if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
  3319. return NULL;
  3320. if (view.len > INT_MAX) {
  3321. PyBuffer_Release(&view);
  3322. PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
  3323. return NULL;
  3324. }
  3325. res = expat_parse(st, self, view.buf, (int)view.len, 0);
  3326. PyBuffer_Release(&view);
  3327. return res;
  3328. }
  3329. }
  3330. /*[clinic input]
  3331. _elementtree.XMLParser._parse_whole
  3332. file: object
  3333. /
  3334. [clinic start generated code]*/
  3335. static PyObject *
  3336. _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
  3337. /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
  3338. {
  3339. /* (internal) parse the whole input, until end of stream */
  3340. PyObject* reader;
  3341. PyObject* buffer;
  3342. PyObject* temp;
  3343. PyObject* res;
  3344. if (!_check_xmlparser(self)) {
  3345. return NULL;
  3346. }
  3347. reader = PyObject_GetAttrString(file, "read");
  3348. if (!reader)
  3349. return NULL;
  3350. /* read from open file object */
  3351. elementtreestate *st = self->state;
  3352. for (;;) {
  3353. buffer = PyObject_CallFunction(reader, "i", 64*1024);
  3354. if (!buffer) {
  3355. /* read failed (e.g. due to KeyboardInterrupt) */
  3356. Py_DECREF(reader);
  3357. return NULL;
  3358. }
  3359. if (PyUnicode_CheckExact(buffer)) {
  3360. /* A unicode object is encoded into bytes using UTF-8 */
  3361. if (PyUnicode_GET_LENGTH(buffer) == 0) {
  3362. Py_DECREF(buffer);
  3363. break;
  3364. }
  3365. temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
  3366. Py_DECREF(buffer);
  3367. if (!temp) {
  3368. /* Propagate exception from PyUnicode_AsEncodedString */
  3369. Py_DECREF(reader);
  3370. return NULL;
  3371. }
  3372. buffer = temp;
  3373. }
  3374. else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
  3375. Py_DECREF(buffer);
  3376. break;
  3377. }
  3378. if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
  3379. Py_DECREF(buffer);
  3380. Py_DECREF(reader);
  3381. PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
  3382. return NULL;
  3383. }
  3384. res = expat_parse(
  3385. st, self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer),
  3386. 0);
  3387. Py_DECREF(buffer);
  3388. if (!res) {
  3389. Py_DECREF(reader);
  3390. return NULL;
  3391. }
  3392. Py_DECREF(res);
  3393. }
  3394. Py_DECREF(reader);
  3395. res = expat_parse(st, self, "", 0, 1);
  3396. if (res && TreeBuilder_CheckExact(st, self->target)) {
  3397. Py_DECREF(res);
  3398. return treebuilder_done((TreeBuilderObject*) self->target);
  3399. }
  3400. return res;
  3401. }
  3402. /*[clinic input]
  3403. _elementtree.XMLParser._setevents
  3404. events_queue: object
  3405. events_to_report: object = None
  3406. /
  3407. [clinic start generated code]*/
  3408. static PyObject *
  3409. _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
  3410. PyObject *events_queue,
  3411. PyObject *events_to_report)
  3412. /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
  3413. {
  3414. /* activate element event reporting */
  3415. Py_ssize_t i;
  3416. TreeBuilderObject *target;
  3417. PyObject *events_append, *events_seq;
  3418. if (!_check_xmlparser(self)) {
  3419. return NULL;
  3420. }
  3421. elementtreestate *st = self->state;
  3422. if (!TreeBuilder_CheckExact(st, self->target)) {
  3423. PyErr_SetString(
  3424. PyExc_TypeError,
  3425. "event handling only supported for ElementTree.TreeBuilder "
  3426. "targets"
  3427. );
  3428. return NULL;
  3429. }
  3430. target = (TreeBuilderObject*) self->target;
  3431. events_append = PyObject_GetAttrString(events_queue, "append");
  3432. if (events_append == NULL)
  3433. return NULL;
  3434. Py_XSETREF(target->events_append, events_append);
  3435. /* clear out existing events */
  3436. Py_CLEAR(target->start_event_obj);
  3437. Py_CLEAR(target->end_event_obj);
  3438. Py_CLEAR(target->start_ns_event_obj);
  3439. Py_CLEAR(target->end_ns_event_obj);
  3440. Py_CLEAR(target->comment_event_obj);
  3441. Py_CLEAR(target->pi_event_obj);
  3442. if (events_to_report == Py_None) {
  3443. /* default is "end" only */
  3444. target->end_event_obj = PyUnicode_FromString("end");
  3445. Py_RETURN_NONE;
  3446. }
  3447. if (!(events_seq = PySequence_Fast(events_to_report,
  3448. "events must be a sequence"))) {
  3449. return NULL;
  3450. }
  3451. for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
  3452. PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
  3453. const char *event_name = NULL;
  3454. if (PyUnicode_Check(event_name_obj)) {
  3455. event_name = PyUnicode_AsUTF8(event_name_obj);
  3456. } else if (PyBytes_Check(event_name_obj)) {
  3457. event_name = PyBytes_AS_STRING(event_name_obj);
  3458. }
  3459. if (event_name == NULL) {
  3460. Py_DECREF(events_seq);
  3461. PyErr_Format(PyExc_ValueError, "invalid events sequence");
  3462. return NULL;
  3463. }
  3464. if (strcmp(event_name, "start") == 0) {
  3465. Py_XSETREF(target->start_event_obj, Py_NewRef(event_name_obj));
  3466. } else if (strcmp(event_name, "end") == 0) {
  3467. Py_XSETREF(target->end_event_obj, Py_NewRef(event_name_obj));
  3468. } else if (strcmp(event_name, "start-ns") == 0) {
  3469. Py_XSETREF(target->start_ns_event_obj, Py_NewRef(event_name_obj));
  3470. EXPAT(st, SetNamespaceDeclHandler)(
  3471. self->parser,
  3472. (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
  3473. (XML_EndNamespaceDeclHandler) expat_end_ns_handler
  3474. );
  3475. } else if (strcmp(event_name, "end-ns") == 0) {
  3476. Py_XSETREF(target->end_ns_event_obj, Py_NewRef(event_name_obj));
  3477. EXPAT(st, SetNamespaceDeclHandler)(
  3478. self->parser,
  3479. (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
  3480. (XML_EndNamespaceDeclHandler) expat_end_ns_handler
  3481. );
  3482. } else if (strcmp(event_name, "comment") == 0) {
  3483. Py_XSETREF(target->comment_event_obj, Py_NewRef(event_name_obj));
  3484. EXPAT(st, SetCommentHandler)(
  3485. self->parser,
  3486. (XML_CommentHandler) expat_comment_handler
  3487. );
  3488. } else if (strcmp(event_name, "pi") == 0) {
  3489. Py_XSETREF(target->pi_event_obj, Py_NewRef(event_name_obj));
  3490. EXPAT(st, SetProcessingInstructionHandler)(
  3491. self->parser,
  3492. (XML_ProcessingInstructionHandler) expat_pi_handler
  3493. );
  3494. } else {
  3495. Py_DECREF(events_seq);
  3496. PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
  3497. return NULL;
  3498. }
  3499. }
  3500. Py_DECREF(events_seq);
  3501. Py_RETURN_NONE;
  3502. }
  3503. static PyMemberDef xmlparser_members[] = {
  3504. {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
  3505. {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
  3506. {NULL}
  3507. };
  3508. static PyObject*
  3509. xmlparser_version_getter(XMLParserObject *self, void *closure)
  3510. {
  3511. return PyUnicode_FromFormat(
  3512. "Expat %d.%d.%d", XML_MAJOR_VERSION,
  3513. XML_MINOR_VERSION, XML_MICRO_VERSION);
  3514. }
  3515. static PyGetSetDef xmlparser_getsetlist[] = {
  3516. {"version", (getter)xmlparser_version_getter, NULL, NULL},
  3517. {NULL},
  3518. };
  3519. #define clinic_state() (get_elementtree_state_by_type(Py_TYPE(self)))
  3520. #include "clinic/_elementtree.c.h"
  3521. #undef clinic_state
  3522. static PyMethodDef element_methods[] = {
  3523. _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
  3524. _ELEMENTTREE_ELEMENT_GET_METHODDEF
  3525. _ELEMENTTREE_ELEMENT_SET_METHODDEF
  3526. _ELEMENTTREE_ELEMENT_FIND_METHODDEF
  3527. _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
  3528. _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
  3529. _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
  3530. _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
  3531. _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
  3532. _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
  3533. _ELEMENTTREE_ELEMENT_ITER_METHODDEF
  3534. _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
  3535. _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
  3536. _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
  3537. _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
  3538. _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
  3539. _ELEMENTTREE_ELEMENT___COPY___METHODDEF
  3540. _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
  3541. _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
  3542. _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
  3543. _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
  3544. {NULL, NULL}
  3545. };
  3546. static struct PyMemberDef element_members[] = {
  3547. {"__weaklistoffset__", T_PYSSIZET, offsetof(ElementObject, weakreflist), READONLY},
  3548. {NULL},
  3549. };
  3550. static PyGetSetDef element_getsetlist[] = {
  3551. {"tag",
  3552. (getter)element_tag_getter,
  3553. (setter)element_tag_setter,
  3554. "A string identifying what kind of data this element represents"},
  3555. {"text",
  3556. (getter)element_text_getter,
  3557. (setter)element_text_setter,
  3558. "A string of text directly after the start tag, or None"},
  3559. {"tail",
  3560. (getter)element_tail_getter,
  3561. (setter)element_tail_setter,
  3562. "A string of text directly after the end tag, or None"},
  3563. {"attrib",
  3564. (getter)element_attrib_getter,
  3565. (setter)element_attrib_setter,
  3566. "A dictionary containing the element's attributes"},
  3567. {NULL},
  3568. };
  3569. static PyType_Slot element_slots[] = {
  3570. {Py_tp_dealloc, element_dealloc},
  3571. {Py_tp_repr, element_repr},
  3572. {Py_tp_getattro, PyObject_GenericGetAttr},
  3573. {Py_tp_traverse, element_gc_traverse},
  3574. {Py_tp_clear, element_gc_clear},
  3575. {Py_tp_methods, element_methods},
  3576. {Py_tp_members, element_members},
  3577. {Py_tp_getset, element_getsetlist},
  3578. {Py_tp_init, element_init},
  3579. {Py_tp_alloc, PyType_GenericAlloc},
  3580. {Py_tp_new, element_new},
  3581. {Py_sq_length, element_length},
  3582. {Py_sq_item, element_getitem},
  3583. {Py_sq_ass_item, element_setitem},
  3584. {Py_nb_bool, element_bool},
  3585. {Py_mp_length, element_length},
  3586. {Py_mp_subscript, element_subscr},
  3587. {Py_mp_ass_subscript, element_ass_subscr},
  3588. {0, NULL},
  3589. };
  3590. static PyType_Spec element_spec = {
  3591. .name = "xml.etree.ElementTree.Element",
  3592. .basicsize = sizeof(ElementObject),
  3593. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
  3594. Py_TPFLAGS_IMMUTABLETYPE),
  3595. .slots = element_slots,
  3596. };
  3597. static PyMethodDef treebuilder_methods[] = {
  3598. _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
  3599. _ELEMENTTREE_TREEBUILDER_START_METHODDEF
  3600. _ELEMENTTREE_TREEBUILDER_END_METHODDEF
  3601. _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
  3602. _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
  3603. _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
  3604. {NULL, NULL}
  3605. };
  3606. static PyType_Slot treebuilder_slots[] = {
  3607. {Py_tp_dealloc, treebuilder_dealloc},
  3608. {Py_tp_traverse, treebuilder_gc_traverse},
  3609. {Py_tp_clear, treebuilder_gc_clear},
  3610. {Py_tp_methods, treebuilder_methods},
  3611. {Py_tp_init, _elementtree_TreeBuilder___init__},
  3612. {Py_tp_alloc, PyType_GenericAlloc},
  3613. {Py_tp_new, treebuilder_new},
  3614. {0, NULL},
  3615. };
  3616. static PyType_Spec treebuilder_spec = {
  3617. .name = "xml.etree.ElementTree.TreeBuilder",
  3618. .basicsize = sizeof(TreeBuilderObject),
  3619. .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_IMMUTABLETYPE,
  3620. .slots = treebuilder_slots,
  3621. };
  3622. static PyMethodDef xmlparser_methods[] = {
  3623. _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
  3624. _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
  3625. _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF
  3626. _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
  3627. _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
  3628. {NULL, NULL}
  3629. };
  3630. static PyType_Slot xmlparser_slots[] = {
  3631. {Py_tp_dealloc, xmlparser_dealloc},
  3632. {Py_tp_traverse, xmlparser_gc_traverse},
  3633. {Py_tp_clear, xmlparser_gc_clear},
  3634. {Py_tp_methods, xmlparser_methods},
  3635. {Py_tp_members, xmlparser_members},
  3636. {Py_tp_getset, xmlparser_getsetlist},
  3637. {Py_tp_init, _elementtree_XMLParser___init__},
  3638. {Py_tp_alloc, PyType_GenericAlloc},
  3639. {Py_tp_new, xmlparser_new},
  3640. {0, NULL},
  3641. };
  3642. static PyType_Spec xmlparser_spec = {
  3643. .name = "xml.etree.ElementTree.XMLParser",
  3644. .basicsize = sizeof(XMLParserObject),
  3645. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
  3646. Py_TPFLAGS_IMMUTABLETYPE),
  3647. .slots = xmlparser_slots,
  3648. };
  3649. /* ==================================================================== */
  3650. /* python module interface */
  3651. static PyMethodDef _functions[] = {
  3652. {"SubElement", _PyCFunction_CAST(subelement), METH_VARARGS | METH_KEYWORDS},
  3653. _ELEMENTTREE__SET_FACTORIES_METHODDEF
  3654. {NULL, NULL}
  3655. };
  3656. #define CREATE_TYPE(module, type, spec) \
  3657. do { \
  3658. if (type != NULL) { \
  3659. break; \
  3660. } \
  3661. type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
  3662. if (type == NULL) { \
  3663. goto error; \
  3664. } \
  3665. } while (0)
  3666. static int
  3667. module_exec(PyObject *m)
  3668. {
  3669. elementtreestate *st = get_elementtree_state(m);
  3670. /* Initialize object types */
  3671. CREATE_TYPE(m, st->ElementIter_Type, &elementiter_spec);
  3672. CREATE_TYPE(m, st->TreeBuilder_Type, &treebuilder_spec);
  3673. CREATE_TYPE(m, st->Element_Type, &element_spec);
  3674. CREATE_TYPE(m, st->XMLParser_Type, &xmlparser_spec);
  3675. st->deepcopy_obj = _PyImport_GetModuleAttrString("copy", "deepcopy");
  3676. if (st->deepcopy_obj == NULL) {
  3677. goto error;
  3678. }
  3679. assert(!PyErr_Occurred());
  3680. if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
  3681. goto error;
  3682. /* link against pyexpat */
  3683. if (!(st->expat_capsule = _PyImport_GetModuleAttrString("pyexpat", "expat_CAPI")))
  3684. goto error;
  3685. if (!(st->expat_capi = PyCapsule_GetPointer(st->expat_capsule, PyExpat_CAPSULE_NAME)))
  3686. goto error;
  3687. if (st->expat_capi) {
  3688. /* check that it's usable */
  3689. if (strcmp(st->expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
  3690. (size_t)st->expat_capi->size < sizeof(struct PyExpat_CAPI) ||
  3691. st->expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
  3692. st->expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
  3693. st->expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
  3694. PyErr_SetString(PyExc_ImportError,
  3695. "pyexpat version is incompatible");
  3696. goto error;
  3697. }
  3698. } else {
  3699. goto error;
  3700. }
  3701. st->str_append = PyUnicode_InternFromString("append");
  3702. if (st->str_append == NULL) {
  3703. goto error;
  3704. }
  3705. st->str_find = PyUnicode_InternFromString("find");
  3706. if (st->str_find == NULL) {
  3707. goto error;
  3708. }
  3709. st->str_findall = PyUnicode_InternFromString("findall");
  3710. if (st->str_findall == NULL) {
  3711. goto error;
  3712. }
  3713. st->str_findtext = PyUnicode_InternFromString("findtext");
  3714. if (st->str_findtext == NULL) {
  3715. goto error;
  3716. }
  3717. st->str_iterfind = PyUnicode_InternFromString("iterfind");
  3718. if (st->str_iterfind == NULL) {
  3719. goto error;
  3720. }
  3721. st->str_tail = PyUnicode_InternFromString("tail");
  3722. if (st->str_tail == NULL) {
  3723. goto error;
  3724. }
  3725. st->str_text = PyUnicode_InternFromString("text");
  3726. if (st->str_text == NULL) {
  3727. goto error;
  3728. }
  3729. st->str_doctype = PyUnicode_InternFromString("doctype");
  3730. if (st->str_doctype == NULL) {
  3731. goto error;
  3732. }
  3733. st->parseerror_obj = PyErr_NewException(
  3734. "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
  3735. );
  3736. if (PyModule_AddObjectRef(m, "ParseError", st->parseerror_obj) < 0) {
  3737. goto error;
  3738. }
  3739. PyTypeObject *types[] = {
  3740. st->Element_Type,
  3741. st->TreeBuilder_Type,
  3742. st->XMLParser_Type
  3743. };
  3744. for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) {
  3745. if (PyModule_AddType(m, types[i]) < 0) {
  3746. goto error;
  3747. }
  3748. }
  3749. return 0;
  3750. error:
  3751. return -1;
  3752. }
  3753. static struct PyModuleDef_Slot elementtree_slots[] = {
  3754. {Py_mod_exec, module_exec},
  3755. // XXX gh-103092: fix isolation.
  3756. {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED},
  3757. //{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
  3758. {0, NULL},
  3759. };
  3760. static struct PyModuleDef elementtreemodule = {
  3761. .m_base = PyModuleDef_HEAD_INIT,
  3762. .m_name = "_elementtree",
  3763. .m_size = sizeof(elementtreestate),
  3764. .m_methods = _functions,
  3765. .m_slots = elementtree_slots,
  3766. .m_traverse = elementtree_traverse,
  3767. .m_clear = elementtree_clear,
  3768. .m_free = elementtree_free,
  3769. };
  3770. PyMODINIT_FUNC
  3771. PyInit__elementtree(void)
  3772. {
  3773. return PyModuleDef_Init(&elementtreemodule);
  3774. }