unistr.h 182 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1998-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. *
  9. * File unistr.h
  10. *
  11. * Modification History:
  12. *
  13. * Date Name Description
  14. * 09/25/98 stephen Creation.
  15. * 11/11/98 stephen Changed per 11/9 code review.
  16. * 04/20/99 stephen Overhauled per 4/16 code review.
  17. * 11/18/99 aliu Made to inherit from Replaceable. Added method
  18. * handleReplaceBetween(); other methods unchanged.
  19. * 06/25/01 grhoten Remove dependency on iostream.
  20. ******************************************************************************
  21. */
  22. #ifndef UNISTR_H
  23. #define UNISTR_H
  24. /**
  25. * \file
  26. * \brief C++ API: Unicode String
  27. */
  28. #include "unicode/utypes.h"
  29. #if U_SHOW_CPLUSPLUS_API
  30. #include <cstddef>
  31. #include <string_view>
  32. #include "unicode/char16ptr.h"
  33. #include "unicode/rep.h"
  34. #include "unicode/std_string.h"
  35. #include "unicode/stringpiece.h"
  36. #include "unicode/bytestream.h"
  37. struct UConverter; // unicode/ucnv.h
  38. #ifndef USTRING_H
  39. /**
  40. * \ingroup ustring_ustrlen
  41. * @param s Pointer to sequence of UChars.
  42. * @return Length of sequence.
  43. */
  44. U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
  45. #endif
  46. U_NAMESPACE_BEGIN
  47. #if !UCONFIG_NO_BREAK_ITERATION
  48. class BreakIterator; // unicode/brkiter.h
  49. #endif
  50. class Edits;
  51. U_NAMESPACE_END
  52. // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
  53. /**
  54. * Internal string case mapping function type.
  55. * All error checking must be done.
  56. * src and dest must not overlap.
  57. * @internal
  58. */
  59. typedef int32_t U_CALLCONV
  60. UStringCaseMapper(int32_t caseLocale, uint32_t options,
  61. #if !UCONFIG_NO_BREAK_ITERATION
  62. icu::BreakIterator *iter,
  63. #endif
  64. char16_t *dest, int32_t destCapacity,
  65. const char16_t *src, int32_t srcLength,
  66. icu::Edits *edits,
  67. UErrorCode &errorCode);
  68. U_NAMESPACE_BEGIN
  69. class Locale; // unicode/locid.h
  70. class StringCharacterIterator;
  71. class UnicodeStringAppendable; // unicode/appendable.h
  72. /* The <iostream> include has been moved to unicode/ustream.h */
  73. /**
  74. * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
  75. * which constructs a Unicode string from an invariant-character char * string.
  76. * About invariant characters see utypes.h.
  77. * This constructor has no runtime dependency on conversion code and is
  78. * therefore recommended over ones taking a charset name string
  79. * (where the empty string "" indicates invariant-character conversion).
  80. *
  81. * @stable ICU 3.2
  82. */
  83. #define US_INV icu::UnicodeString::kInvariant
  84. /**
  85. * \def UNICODE_STRING
  86. * Obsolete macro approximating UnicodeString literals.
  87. *
  88. * Prior to the availability of C++11 and u"UTF-16 string literals",
  89. * this macro was provided for portability and efficiency when
  90. * initializing UnicodeStrings from literals.
  91. *
  92. * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
  93. * length determination:
  94. * \code
  95. * UnicodeString str(u"literal");
  96. * if (str == u"other literal") { ... }
  97. * \endcode
  98. *
  99. * The string parameter must be a C string literal.
  100. * The length of the string, not including the terminating
  101. * `NUL`, must be specified as a constant.
  102. * @stable ICU 2.0
  103. */
  104. #if !U_CHAR16_IS_TYPEDEF
  105. # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
  106. #else
  107. # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
  108. #endif
  109. /**
  110. * Unicode String literals in C++.
  111. * Obsolete macro approximating UnicodeString literals.
  112. * See UNICODE_STRING.
  113. *
  114. * The string parameter must be a C string literal.
  115. * @stable ICU 2.0
  116. * @see UNICODE_STRING
  117. */
  118. #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
  119. /**
  120. * \def UNISTR_FROM_CHAR_EXPLICIT
  121. * This can be defined to be empty or "explicit".
  122. * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
  123. * constructors are marked as explicit, preventing their inadvertent use.
  124. * @stable ICU 49
  125. */
  126. #ifndef UNISTR_FROM_CHAR_EXPLICIT
  127. # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
  128. // Auto-"explicit" in ICU library code.
  129. # define UNISTR_FROM_CHAR_EXPLICIT explicit
  130. # else
  131. // Empty by default for source code compatibility.
  132. # define UNISTR_FROM_CHAR_EXPLICIT
  133. # endif
  134. #endif
  135. /**
  136. * \def UNISTR_FROM_STRING_EXPLICIT
  137. * This can be defined to be empty or "explicit".
  138. * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *)
  139. * constructors are marked as explicit, preventing their inadvertent use.
  140. *
  141. * In particular, this helps prevent accidentally depending on ICU conversion code
  142. * by passing a string literal into an API with a const UnicodeString & parameter.
  143. * @stable ICU 49
  144. */
  145. #ifndef UNISTR_FROM_STRING_EXPLICIT
  146. # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
  147. // Auto-"explicit" in ICU library code.
  148. # define UNISTR_FROM_STRING_EXPLICIT explicit
  149. # else
  150. // Empty by default for source code compatibility.
  151. # define UNISTR_FROM_STRING_EXPLICIT
  152. # endif
  153. #endif
  154. /**
  155. * \def UNISTR_OBJECT_SIZE
  156. * Desired sizeof(UnicodeString) in bytes.
  157. * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
  158. * The object size may want to be a multiple of 16 bytes,
  159. * which is a common granularity for heap allocation.
  160. *
  161. * Any space inside the object beyond sizeof(vtable pointer) + 2
  162. * is available for storing short strings inside the object.
  163. * The bigger the object, the longer a string that can be stored inside the object,
  164. * without additional heap allocation.
  165. *
  166. * Depending on a platform's pointer size, pointer alignment requirements,
  167. * and struct padding, the compiler will usually round up sizeof(UnicodeString)
  168. * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
  169. * to hold the fields for heap-allocated strings.
  170. * Such a minimum size also ensures that the object is easily large enough
  171. * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
  172. *
  173. * sizeof(UnicodeString) >= 48 should work for all known platforms.
  174. *
  175. * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
  176. * sizeof(UnicodeString) = 64 would leave space for
  177. * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
  178. * char16_ts stored inside the object.
  179. *
  180. * The minimum object size on a 64-bit machine would be
  181. * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
  182. * and the internal buffer would hold up to 11 char16_ts in that case.
  183. *
  184. * @see U16_MAX_LENGTH
  185. * @stable ICU 56
  186. */
  187. #ifndef UNISTR_OBJECT_SIZE
  188. # define UNISTR_OBJECT_SIZE 64
  189. #endif
  190. /**
  191. * UnicodeString is a string class that stores Unicode characters directly and provides
  192. * similar functionality as the Java String and StringBuffer/StringBuilder classes.
  193. * It is a concrete implementation of the abstract class Replaceable (for transliteration).
  194. *
  195. * The UnicodeString equivalent of std::string’s clear() is remove().
  196. *
  197. * A UnicodeString may "alias" an external array of characters
  198. * (that is, point to it, rather than own the array)
  199. * whose lifetime must then at least match the lifetime of the aliasing object.
  200. * This aliasing may be preserved when returning a UnicodeString by value,
  201. * depending on the compiler and the function implementation,
  202. * via Return Value Optimization (RVO) or the move assignment operator.
  203. * (However, the copy assignment operator does not preserve aliasing.)
  204. * For details see the description of storage models at the end of the class API docs
  205. * and in the User Guide chapter linked from there.
  206. *
  207. * The UnicodeString class is not suitable for subclassing.
  208. *
  209. * For an overview of Unicode strings in C and C++ see the
  210. * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#strings-in-cc).
  211. *
  212. * In ICU, a Unicode string consists of 16-bit Unicode *code units*.
  213. * A Unicode character may be stored with either one code unit
  214. * (the most common case) or with a matched pair of special code units
  215. * ("surrogates"). The data type for code units is char16_t.
  216. * For single-character handling, a Unicode character code *point* is a value
  217. * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
  218. *
  219. * Indexes and offsets into and lengths of strings always count code units, not code points.
  220. * This is the same as with multi-byte char* strings in traditional string handling.
  221. * Operations on partial strings typically do not test for code point boundaries.
  222. * If necessary, the user needs to take care of such boundaries by testing for the code unit
  223. * values or by using functions like
  224. * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
  225. * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
  226. *
  227. * UnicodeString methods are more lenient with regard to input parameter values
  228. * than other ICU APIs. In particular:
  229. * - If indexes are out of bounds for a UnicodeString object
  230. * (< 0 or > length()) then they are "pinned" to the nearest boundary.
  231. * - If the buffer passed to an insert/append/replace operation is owned by the
  232. * target object, e.g., calling str.append(str), an extra copy may take place
  233. * to ensure safety.
  234. * - If primitive string pointer values (e.g., const char16_t * or char *)
  235. * for input strings are nullptr, then those input string parameters are treated
  236. * as if they pointed to an empty string.
  237. * However, this is *not* the case for char * parameters for charset names
  238. * or other IDs.
  239. * - Most UnicodeString methods do not take a UErrorCode parameter because
  240. * there are usually very few opportunities for failure other than a shortage
  241. * of memory, error codes in low-level C++ string methods would be inconvenient,
  242. * and the error code as the last parameter (ICU convention) would prevent
  243. * the use of default parameter values.
  244. * Instead, such methods set the UnicodeString into a "bogus" state
  245. * (see isBogus()) if an error occurs.
  246. *
  247. * In string comparisons, two UnicodeString objects that are both "bogus"
  248. * compare equal (to be transitive and prevent endless loops in sorting),
  249. * and a "bogus" string compares less than any non-"bogus" one.
  250. *
  251. * Const UnicodeString methods are thread-safe. Multiple threads can use
  252. * const methods on the same UnicodeString object simultaneously,
  253. * but non-const methods must not be called concurrently (in multiple threads)
  254. * with any other (const or non-const) methods.
  255. *
  256. * Similarly, const UnicodeString & parameters are thread-safe.
  257. * One object may be passed in as such a parameter concurrently in multiple threads.
  258. * This includes the const UnicodeString & parameters for
  259. * copy construction, assignment, and cloning.
  260. *
  261. * UnicodeString uses several storage methods.
  262. * String contents can be stored inside the UnicodeString object itself,
  263. * in an allocated and shared buffer, or in an outside buffer that is "aliased".
  264. * Most of this is done transparently, but careful aliasing in particular provides
  265. * significant performance improvements.
  266. * Also, the internal buffer is accessible via special functions.
  267. * For details see the
  268. * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model).
  269. *
  270. * @see utf.h
  271. * @see CharacterIterator
  272. * @stable ICU 2.0
  273. */
  274. class U_COMMON_API UnicodeString : public Replaceable
  275. {
  276. public:
  277. /**
  278. * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
  279. * which constructs a Unicode string from an invariant-character char * string.
  280. * Use the macro US_INV instead of the full qualification for this value.
  281. *
  282. * @see US_INV
  283. * @stable ICU 3.2
  284. */
  285. enum EInvariant {
  286. /**
  287. * @see EInvariant
  288. * @stable ICU 3.2
  289. */
  290. kInvariant
  291. };
  292. //========================================
  293. // Read-only operations
  294. //========================================
  295. /* Comparison - bitwise only - for international comparison use collation */
  296. /**
  297. * Equality operator. Performs only bitwise comparison.
  298. * @param text The UnicodeString to compare to this one.
  299. * @return true if `text` contains the same characters as this one,
  300. * false otherwise.
  301. * @stable ICU 2.0
  302. */
  303. inline bool operator== (const UnicodeString& text) const;
  304. #ifndef U_HIDE_DRAFT_API
  305. /**
  306. * Equality operator. Performs only bitwise comparison with `text`
  307. * which is, or which is implicitly convertible to,
  308. * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
  309. *
  310. * For performance, you can use UTF-16 string literals with compile-time
  311. * length determination:
  312. * \code
  313. * UnicodeString str = ...;
  314. * if (str == u"literal") { ... }
  315. * \endcode
  316. * @param text The string view to compare to this string.
  317. * @return true if `text` contains the same characters as this one, false otherwise.
  318. * @draft ICU 76
  319. */
  320. template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
  321. inline bool operator==(const S &text) const {
  322. std::u16string_view sv(internal::toU16StringView(text));
  323. uint32_t len; // unsigned to avoid a compiler warning
  324. return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
  325. }
  326. #endif // U_HIDE_DRAFT_API
  327. /**
  328. * Inequality operator. Performs only bitwise comparison.
  329. * @param text The UnicodeString to compare to this one.
  330. * @return false if `text` contains the same characters as this one,
  331. * true otherwise.
  332. * @stable ICU 2.0
  333. */
  334. inline bool operator!= (const UnicodeString& text) const;
  335. #ifndef U_HIDE_DRAFT_API
  336. /**
  337. * Inequality operator. Performs only bitwise comparison with `text`
  338. * which is, or which is implicitly convertible to,
  339. * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
  340. *
  341. * For performance, you can use std::u16string_view literals with compile-time
  342. * length determination:
  343. * \code
  344. * #include &lt;string_view&gt;
  345. * using namespace std::string_view_literals;
  346. * UnicodeString str = ...;
  347. * if (str != u"literal"sv) { ... }
  348. * \endcode
  349. * @param text The string view to compare to this string.
  350. * @return false if `text` contains the same characters as this one, true otherwise.
  351. * @draft ICU 76
  352. */
  353. template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
  354. inline bool operator!=(const S &text) const {
  355. return !operator==(text);
  356. }
  357. #endif // U_HIDE_DRAFT_API
  358. /**
  359. * Greater than operator. Performs only bitwise comparison.
  360. * @param text The UnicodeString to compare to this one.
  361. * @return true if the characters in this are bitwise
  362. * greater than the characters in `text`, false otherwise
  363. * @stable ICU 2.0
  364. */
  365. inline UBool operator> (const UnicodeString& text) const;
  366. /**
  367. * Less than operator. Performs only bitwise comparison.
  368. * @param text The UnicodeString to compare to this one.
  369. * @return true if the characters in this are bitwise
  370. * less than the characters in `text`, false otherwise
  371. * @stable ICU 2.0
  372. */
  373. inline UBool operator< (const UnicodeString& text) const;
  374. /**
  375. * Greater than or equal operator. Performs only bitwise comparison.
  376. * @param text The UnicodeString to compare to this one.
  377. * @return true if the characters in this are bitwise
  378. * greater than or equal to the characters in `text`, false otherwise
  379. * @stable ICU 2.0
  380. */
  381. inline UBool operator>= (const UnicodeString& text) const;
  382. /**
  383. * Less than or equal operator. Performs only bitwise comparison.
  384. * @param text The UnicodeString to compare to this one.
  385. * @return true if the characters in this are bitwise
  386. * less than or equal to the characters in `text`, false otherwise
  387. * @stable ICU 2.0
  388. */
  389. inline UBool operator<= (const UnicodeString& text) const;
  390. /**
  391. * Compare the characters bitwise in this UnicodeString to
  392. * the characters in `text`.
  393. * @param text The UnicodeString to compare to this one.
  394. * @return The result of bitwise character comparison: 0 if this
  395. * contains the same characters as `text`, -1 if the characters in
  396. * this are bitwise less than the characters in `text`, +1 if the
  397. * characters in this are bitwise greater than the characters
  398. * in `text`.
  399. * @stable ICU 2.0
  400. */
  401. inline int8_t compare(const UnicodeString& text) const;
  402. /**
  403. * Compare the characters bitwise in the range
  404. * [`start`, `start + length`) with the characters
  405. * in the **entire string** `text`.
  406. * (The parameters "start" and "length" are not applied to the other text "text".)
  407. * @param start the offset at which the compare operation begins
  408. * @param length the number of characters of text to compare.
  409. * @param text the other text to be compared against this string.
  410. * @return The result of bitwise character comparison: 0 if this
  411. * contains the same characters as `text`, -1 if the characters in
  412. * this are bitwise less than the characters in `text`, +1 if the
  413. * characters in this are bitwise greater than the characters
  414. * in `text`.
  415. * @stable ICU 2.0
  416. */
  417. inline int8_t compare(int32_t start,
  418. int32_t length,
  419. const UnicodeString& text) const;
  420. /**
  421. * Compare the characters bitwise in the range
  422. * [`start`, `start + length`) with the characters
  423. * in `srcText` in the range
  424. * [`srcStart`, `srcStart + srcLength`).
  425. * @param start the offset at which the compare operation begins
  426. * @param length the number of characters in this to compare.
  427. * @param srcText the text to be compared
  428. * @param srcStart the offset into `srcText` to start comparison
  429. * @param srcLength the number of characters in `src` to compare
  430. * @return The result of bitwise character comparison: 0 if this
  431. * contains the same characters as `srcText`, -1 if the characters in
  432. * this are bitwise less than the characters in `srcText`, +1 if the
  433. * characters in this are bitwise greater than the characters
  434. * in `srcText`.
  435. * @stable ICU 2.0
  436. */
  437. inline int8_t compare(int32_t start,
  438. int32_t length,
  439. const UnicodeString& srcText,
  440. int32_t srcStart,
  441. int32_t srcLength) const;
  442. /**
  443. * Compare the characters bitwise in this UnicodeString with the first
  444. * `srcLength` characters in `srcChars`.
  445. * @param srcChars The characters to compare to this UnicodeString.
  446. * @param srcLength the number of characters in `srcChars` to compare
  447. * @return The result of bitwise character comparison: 0 if this
  448. * contains the same characters as `srcChars`, -1 if the characters in
  449. * this are bitwise less than the characters in `srcChars`, +1 if the
  450. * characters in this are bitwise greater than the characters
  451. * in `srcChars`.
  452. * @stable ICU 2.0
  453. */
  454. inline int8_t compare(ConstChar16Ptr srcChars,
  455. int32_t srcLength) const;
  456. /**
  457. * Compare the characters bitwise in the range
  458. * [`start`, `start + length`) with the first
  459. * `length` characters in `srcChars`
  460. * @param start the offset at which the compare operation begins
  461. * @param length the number of characters to compare.
  462. * @param srcChars the characters to be compared
  463. * @return The result of bitwise character comparison: 0 if this
  464. * contains the same characters as `srcChars`, -1 if the characters in
  465. * this are bitwise less than the characters in `srcChars`, +1 if the
  466. * characters in this are bitwise greater than the characters
  467. * in `srcChars`.
  468. * @stable ICU 2.0
  469. */
  470. inline int8_t compare(int32_t start,
  471. int32_t length,
  472. const char16_t *srcChars) const;
  473. /**
  474. * Compare the characters bitwise in the range
  475. * [`start`, `start + length`) with the characters
  476. * in `srcChars` in the range
  477. * [`srcStart`, `srcStart + srcLength`).
  478. * @param start the offset at which the compare operation begins
  479. * @param length the number of characters in this to compare
  480. * @param srcChars the characters to be compared
  481. * @param srcStart the offset into `srcChars` to start comparison
  482. * @param srcLength the number of characters in `srcChars` to compare
  483. * @return The result of bitwise character comparison: 0 if this
  484. * contains the same characters as `srcChars`, -1 if the characters in
  485. * this are bitwise less than the characters in `srcChars`, +1 if the
  486. * characters in this are bitwise greater than the characters
  487. * in `srcChars`.
  488. * @stable ICU 2.0
  489. */
  490. inline int8_t compare(int32_t start,
  491. int32_t length,
  492. const char16_t *srcChars,
  493. int32_t srcStart,
  494. int32_t srcLength) const;
  495. /**
  496. * Compare the characters bitwise in the range
  497. * [`start`, `limit`) with the characters
  498. * in `srcText` in the range
  499. * [`srcStart`, `srcLimit`).
  500. * @param start the offset at which the compare operation begins
  501. * @param limit the offset immediately following the compare operation
  502. * @param srcText the text to be compared
  503. * @param srcStart the offset into `srcText` to start comparison
  504. * @param srcLimit the offset into `srcText` to limit comparison
  505. * @return The result of bitwise character comparison: 0 if this
  506. * contains the same characters as `srcText`, -1 if the characters in
  507. * this are bitwise less than the characters in `srcText`, +1 if the
  508. * characters in this are bitwise greater than the characters
  509. * in `srcText`.
  510. * @stable ICU 2.0
  511. */
  512. inline int8_t compareBetween(int32_t start,
  513. int32_t limit,
  514. const UnicodeString& srcText,
  515. int32_t srcStart,
  516. int32_t srcLimit) const;
  517. /**
  518. * Compare two Unicode strings in code point order.
  519. * The result may be different from the results of compare(), operator<, etc.
  520. * if supplementary characters are present:
  521. *
  522. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  523. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  524. * which means that they compare as less than some other BMP characters like U+feff.
  525. * This function compares Unicode strings in code point order.
  526. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  527. *
  528. * @param text Another string to compare this one to.
  529. * @return a negative/zero/positive integer corresponding to whether
  530. * this string is less than/equal to/greater than the second one
  531. * in code point order
  532. * @stable ICU 2.0
  533. */
  534. inline int8_t compareCodePointOrder(const UnicodeString& text) const;
  535. /**
  536. * Compare two Unicode strings in code point order.
  537. * The result may be different from the results of compare(), operator<, etc.
  538. * if supplementary characters are present:
  539. *
  540. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  541. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  542. * which means that they compare as less than some other BMP characters like U+feff.
  543. * This function compares Unicode strings in code point order.
  544. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  545. *
  546. * @param start The start offset in this string at which the compare operation begins.
  547. * @param length The number of code units from this string to compare.
  548. * @param srcText Another string to compare this one to.
  549. * @return a negative/zero/positive integer corresponding to whether
  550. * this string is less than/equal to/greater than the second one
  551. * in code point order
  552. * @stable ICU 2.0
  553. */
  554. inline int8_t compareCodePointOrder(int32_t start,
  555. int32_t length,
  556. const UnicodeString& srcText) const;
  557. /**
  558. * Compare two Unicode strings in code point order.
  559. * The result may be different from the results of compare(), operator<, etc.
  560. * if supplementary characters are present:
  561. *
  562. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  563. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  564. * which means that they compare as less than some other BMP characters like U+feff.
  565. * This function compares Unicode strings in code point order.
  566. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  567. *
  568. * @param start The start offset in this string at which the compare operation begins.
  569. * @param length The number of code units from this string to compare.
  570. * @param srcText Another string to compare this one to.
  571. * @param srcStart The start offset in that string at which the compare operation begins.
  572. * @param srcLength The number of code units from that string to compare.
  573. * @return a negative/zero/positive integer corresponding to whether
  574. * this string is less than/equal to/greater than the second one
  575. * in code point order
  576. * @stable ICU 2.0
  577. */
  578. inline int8_t compareCodePointOrder(int32_t start,
  579. int32_t length,
  580. const UnicodeString& srcText,
  581. int32_t srcStart,
  582. int32_t srcLength) const;
  583. /**
  584. * Compare two Unicode strings in code point order.
  585. * The result may be different from the results of compare(), operator<, etc.
  586. * if supplementary characters are present:
  587. *
  588. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  589. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  590. * which means that they compare as less than some other BMP characters like U+feff.
  591. * This function compares Unicode strings in code point order.
  592. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  593. *
  594. * @param srcChars A pointer to another string to compare this one to.
  595. * @param srcLength The number of code units from that string to compare.
  596. * @return a negative/zero/positive integer corresponding to whether
  597. * this string is less than/equal to/greater than the second one
  598. * in code point order
  599. * @stable ICU 2.0
  600. */
  601. inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
  602. int32_t srcLength) const;
  603. /**
  604. * Compare two Unicode strings in code point order.
  605. * The result may be different from the results of compare(), operator<, etc.
  606. * if supplementary characters are present:
  607. *
  608. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  609. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  610. * which means that they compare as less than some other BMP characters like U+feff.
  611. * This function compares Unicode strings in code point order.
  612. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  613. *
  614. * @param start The start offset in this string at which the compare operation begins.
  615. * @param length The number of code units from this string to compare.
  616. * @param srcChars A pointer to another string to compare this one to.
  617. * @return a negative/zero/positive integer corresponding to whether
  618. * this string is less than/equal to/greater than the second one
  619. * in code point order
  620. * @stable ICU 2.0
  621. */
  622. inline int8_t compareCodePointOrder(int32_t start,
  623. int32_t length,
  624. const char16_t *srcChars) const;
  625. /**
  626. * Compare two Unicode strings in code point order.
  627. * The result may be different from the results of compare(), operator<, etc.
  628. * if supplementary characters are present:
  629. *
  630. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  631. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  632. * which means that they compare as less than some other BMP characters like U+feff.
  633. * This function compares Unicode strings in code point order.
  634. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  635. *
  636. * @param start The start offset in this string at which the compare operation begins.
  637. * @param length The number of code units from this string to compare.
  638. * @param srcChars A pointer to another string to compare this one to.
  639. * @param srcStart The start offset in that string at which the compare operation begins.
  640. * @param srcLength The number of code units from that string to compare.
  641. * @return a negative/zero/positive integer corresponding to whether
  642. * this string is less than/equal to/greater than the second one
  643. * in code point order
  644. * @stable ICU 2.0
  645. */
  646. inline int8_t compareCodePointOrder(int32_t start,
  647. int32_t length,
  648. const char16_t *srcChars,
  649. int32_t srcStart,
  650. int32_t srcLength) const;
  651. /**
  652. * Compare two Unicode strings in code point order.
  653. * The result may be different from the results of compare(), operator<, etc.
  654. * if supplementary characters are present:
  655. *
  656. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  657. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  658. * which means that they compare as less than some other BMP characters like U+feff.
  659. * This function compares Unicode strings in code point order.
  660. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  661. *
  662. * @param start The start offset in this string at which the compare operation begins.
  663. * @param limit The offset after the last code unit from this string to compare.
  664. * @param srcText Another string to compare this one to.
  665. * @param srcStart The start offset in that string at which the compare operation begins.
  666. * @param srcLimit The offset after the last code unit from that string to compare.
  667. * @return a negative/zero/positive integer corresponding to whether
  668. * this string is less than/equal to/greater than the second one
  669. * in code point order
  670. * @stable ICU 2.0
  671. */
  672. inline int8_t compareCodePointOrderBetween(int32_t start,
  673. int32_t limit,
  674. const UnicodeString& srcText,
  675. int32_t srcStart,
  676. int32_t srcLimit) const;
  677. /**
  678. * Compare two strings case-insensitively using full case folding.
  679. * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
  680. *
  681. * @param text Another string to compare this one to.
  682. * @param options A bit set of options:
  683. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  684. * Comparison in code unit order with default case folding.
  685. *
  686. * - U_COMPARE_CODE_POINT_ORDER
  687. * Set to choose code point order instead of code unit order
  688. * (see u_strCompare for details).
  689. *
  690. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  691. *
  692. * @return A negative, zero, or positive integer indicating the comparison result.
  693. * @stable ICU 2.0
  694. */
  695. inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
  696. /**
  697. * Compare two strings case-insensitively using full case folding.
  698. * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
  699. *
  700. * @param start The start offset in this string at which the compare operation begins.
  701. * @param length The number of code units from this string to compare.
  702. * @param srcText Another string to compare this one to.
  703. * @param options A bit set of options:
  704. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  705. * Comparison in code unit order with default case folding.
  706. *
  707. * - U_COMPARE_CODE_POINT_ORDER
  708. * Set to choose code point order instead of code unit order
  709. * (see u_strCompare for details).
  710. *
  711. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  712. *
  713. * @return A negative, zero, or positive integer indicating the comparison result.
  714. * @stable ICU 2.0
  715. */
  716. inline int8_t caseCompare(int32_t start,
  717. int32_t length,
  718. const UnicodeString& srcText,
  719. uint32_t options) const;
  720. /**
  721. * Compare two strings case-insensitively using full case folding.
  722. * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
  723. *
  724. * @param start The start offset in this string at which the compare operation begins.
  725. * @param length The number of code units from this string to compare.
  726. * @param srcText Another string to compare this one to.
  727. * @param srcStart The start offset in that string at which the compare operation begins.
  728. * @param srcLength The number of code units from that string to compare.
  729. * @param options A bit set of options:
  730. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  731. * Comparison in code unit order with default case folding.
  732. *
  733. * - U_COMPARE_CODE_POINT_ORDER
  734. * Set to choose code point order instead of code unit order
  735. * (see u_strCompare for details).
  736. *
  737. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  738. *
  739. * @return A negative, zero, or positive integer indicating the comparison result.
  740. * @stable ICU 2.0
  741. */
  742. inline int8_t caseCompare(int32_t start,
  743. int32_t length,
  744. const UnicodeString& srcText,
  745. int32_t srcStart,
  746. int32_t srcLength,
  747. uint32_t options) const;
  748. /**
  749. * Compare two strings case-insensitively using full case folding.
  750. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  751. *
  752. * @param srcChars A pointer to another string to compare this one to.
  753. * @param srcLength The number of code units from that string to compare.
  754. * @param options A bit set of options:
  755. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  756. * Comparison in code unit order with default case folding.
  757. *
  758. * - U_COMPARE_CODE_POINT_ORDER
  759. * Set to choose code point order instead of code unit order
  760. * (see u_strCompare for details).
  761. *
  762. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  763. *
  764. * @return A negative, zero, or positive integer indicating the comparison result.
  765. * @stable ICU 2.0
  766. */
  767. inline int8_t caseCompare(ConstChar16Ptr srcChars,
  768. int32_t srcLength,
  769. uint32_t options) const;
  770. /**
  771. * Compare two strings case-insensitively using full case folding.
  772. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  773. *
  774. * @param start The start offset in this string at which the compare operation begins.
  775. * @param length The number of code units from this string to compare.
  776. * @param srcChars A pointer to another string to compare this one to.
  777. * @param options A bit set of options:
  778. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  779. * Comparison in code unit order with default case folding.
  780. *
  781. * - U_COMPARE_CODE_POINT_ORDER
  782. * Set to choose code point order instead of code unit order
  783. * (see u_strCompare for details).
  784. *
  785. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  786. *
  787. * @return A negative, zero, or positive integer indicating the comparison result.
  788. * @stable ICU 2.0
  789. */
  790. inline int8_t caseCompare(int32_t start,
  791. int32_t length,
  792. const char16_t *srcChars,
  793. uint32_t options) const;
  794. /**
  795. * Compare two strings case-insensitively using full case folding.
  796. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  797. *
  798. * @param start The start offset in this string at which the compare operation begins.
  799. * @param length The number of code units from this string to compare.
  800. * @param srcChars A pointer to another string to compare this one to.
  801. * @param srcStart The start offset in that string at which the compare operation begins.
  802. * @param srcLength The number of code units from that string to compare.
  803. * @param options A bit set of options:
  804. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  805. * Comparison in code unit order with default case folding.
  806. *
  807. * - U_COMPARE_CODE_POINT_ORDER
  808. * Set to choose code point order instead of code unit order
  809. * (see u_strCompare for details).
  810. *
  811. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  812. *
  813. * @return A negative, zero, or positive integer indicating the comparison result.
  814. * @stable ICU 2.0
  815. */
  816. inline int8_t caseCompare(int32_t start,
  817. int32_t length,
  818. const char16_t *srcChars,
  819. int32_t srcStart,
  820. int32_t srcLength,
  821. uint32_t options) const;
  822. /**
  823. * Compare two strings case-insensitively using full case folding.
  824. * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
  825. *
  826. * @param start The start offset in this string at which the compare operation begins.
  827. * @param limit The offset after the last code unit from this string to compare.
  828. * @param srcText Another string to compare this one to.
  829. * @param srcStart The start offset in that string at which the compare operation begins.
  830. * @param srcLimit The offset after the last code unit from that string to compare.
  831. * @param options A bit set of options:
  832. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  833. * Comparison in code unit order with default case folding.
  834. *
  835. * - U_COMPARE_CODE_POINT_ORDER
  836. * Set to choose code point order instead of code unit order
  837. * (see u_strCompare for details).
  838. *
  839. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  840. *
  841. * @return A negative, zero, or positive integer indicating the comparison result.
  842. * @stable ICU 2.0
  843. */
  844. inline int8_t caseCompareBetween(int32_t start,
  845. int32_t limit,
  846. const UnicodeString& srcText,
  847. int32_t srcStart,
  848. int32_t srcLimit,
  849. uint32_t options) const;
  850. /**
  851. * Determine if this starts with the characters in `text`
  852. * @param text The text to match.
  853. * @return true if this starts with the characters in `text`,
  854. * false otherwise
  855. * @stable ICU 2.0
  856. */
  857. inline UBool startsWith(const UnicodeString& text) const;
  858. /**
  859. * Determine if this starts with the characters in `srcText`
  860. * in the range [`srcStart`, `srcStart + srcLength`).
  861. * @param srcText The text to match.
  862. * @param srcStart the offset into `srcText` to start matching
  863. * @param srcLength the number of characters in `srcText` to match
  864. * @return true if this starts with the characters in `text`,
  865. * false otherwise
  866. * @stable ICU 2.0
  867. */
  868. inline UBool startsWith(const UnicodeString& srcText,
  869. int32_t srcStart,
  870. int32_t srcLength) const;
  871. /**
  872. * Determine if this starts with the characters in `srcChars`
  873. * @param srcChars The characters to match.
  874. * @param srcLength the number of characters in `srcChars`
  875. * @return true if this starts with the characters in `srcChars`,
  876. * false otherwise
  877. * @stable ICU 2.0
  878. */
  879. inline UBool startsWith(ConstChar16Ptr srcChars,
  880. int32_t srcLength) const;
  881. /**
  882. * Determine if this ends with the characters in `srcChars`
  883. * in the range [`srcStart`, `srcStart + srcLength`).
  884. * @param srcChars The characters to match.
  885. * @param srcStart the offset into `srcText` to start matching
  886. * @param srcLength the number of characters in `srcChars` to match
  887. * @return true if this ends with the characters in `srcChars`, false otherwise
  888. * @stable ICU 2.0
  889. */
  890. inline UBool startsWith(const char16_t *srcChars,
  891. int32_t srcStart,
  892. int32_t srcLength) const;
  893. /**
  894. * Determine if this ends with the characters in `text`
  895. * @param text The text to match.
  896. * @return true if this ends with the characters in `text`,
  897. * false otherwise
  898. * @stable ICU 2.0
  899. */
  900. inline UBool endsWith(const UnicodeString& text) const;
  901. /**
  902. * Determine if this ends with the characters in `srcText`
  903. * in the range [`srcStart`, `srcStart + srcLength`).
  904. * @param srcText The text to match.
  905. * @param srcStart the offset into `srcText` to start matching
  906. * @param srcLength the number of characters in `srcText` to match
  907. * @return true if this ends with the characters in `text`,
  908. * false otherwise
  909. * @stable ICU 2.0
  910. */
  911. inline UBool endsWith(const UnicodeString& srcText,
  912. int32_t srcStart,
  913. int32_t srcLength) const;
  914. /**
  915. * Determine if this ends with the characters in `srcChars`
  916. * @param srcChars The characters to match.
  917. * @param srcLength the number of characters in `srcChars`
  918. * @return true if this ends with the characters in `srcChars`,
  919. * false otherwise
  920. * @stable ICU 2.0
  921. */
  922. inline UBool endsWith(ConstChar16Ptr srcChars,
  923. int32_t srcLength) const;
  924. /**
  925. * Determine if this ends with the characters in `srcChars`
  926. * in the range [`srcStart`, `srcStart + srcLength`).
  927. * @param srcChars The characters to match.
  928. * @param srcStart the offset into `srcText` to start matching
  929. * @param srcLength the number of characters in `srcChars` to match
  930. * @return true if this ends with the characters in `srcChars`,
  931. * false otherwise
  932. * @stable ICU 2.0
  933. */
  934. inline UBool endsWith(const char16_t *srcChars,
  935. int32_t srcStart,
  936. int32_t srcLength) const;
  937. /* Searching - bitwise only */
  938. /**
  939. * Locate in this the first occurrence of the characters in `text`,
  940. * using bitwise comparison.
  941. * @param text The text to search for.
  942. * @return The offset into this of the start of `text`,
  943. * or -1 if not found.
  944. * @stable ICU 2.0
  945. */
  946. inline int32_t indexOf(const UnicodeString& text) const;
  947. /**
  948. * Locate in this the first occurrence of the characters in `text`
  949. * starting at offset `start`, using bitwise comparison.
  950. * @param text The text to search for.
  951. * @param start The offset at which searching will start.
  952. * @return The offset into this of the start of `text`,
  953. * or -1 if not found.
  954. * @stable ICU 2.0
  955. */
  956. inline int32_t indexOf(const UnicodeString& text,
  957. int32_t start) const;
  958. /**
  959. * Locate in this the first occurrence in the range
  960. * [`start`, `start + length`) of the characters
  961. * in `text`, using bitwise comparison.
  962. * @param text The text to search for.
  963. * @param start The offset at which searching will start.
  964. * @param length The number of characters to search
  965. * @return The offset into this of the start of `text`,
  966. * or -1 if not found.
  967. * @stable ICU 2.0
  968. */
  969. inline int32_t indexOf(const UnicodeString& text,
  970. int32_t start,
  971. int32_t length) const;
  972. /**
  973. * Locate in this the first occurrence in the range
  974. * [`start`, `start + length`) of the characters
  975. * in `srcText` in the range
  976. * [`srcStart`, `srcStart + srcLength`),
  977. * using bitwise comparison.
  978. * @param srcText The text to search for.
  979. * @param srcStart the offset into `srcText` at which
  980. * to start matching
  981. * @param srcLength the number of characters in `srcText` to match
  982. * @param start the offset into this at which to start matching
  983. * @param length the number of characters in this to search
  984. * @return The offset into this of the start of `text`,
  985. * or -1 if not found.
  986. * @stable ICU 2.0
  987. */
  988. inline int32_t indexOf(const UnicodeString& srcText,
  989. int32_t srcStart,
  990. int32_t srcLength,
  991. int32_t start,
  992. int32_t length) const;
  993. /**
  994. * Locate in this the first occurrence of the characters in
  995. * `srcChars`
  996. * starting at offset `start`, using bitwise comparison.
  997. * @param srcChars The text to search for.
  998. * @param srcLength the number of characters in `srcChars` to match
  999. * @param start the offset into this at which to start matching
  1000. * @return The offset into this of the start of `text`,
  1001. * or -1 if not found.
  1002. * @stable ICU 2.0
  1003. */
  1004. inline int32_t indexOf(const char16_t *srcChars,
  1005. int32_t srcLength,
  1006. int32_t start) const;
  1007. /**
  1008. * Locate in this the first occurrence in the range
  1009. * [`start`, `start + length`) of the characters
  1010. * in `srcChars`, using bitwise comparison.
  1011. * @param srcChars The text to search for.
  1012. * @param srcLength the number of characters in `srcChars`
  1013. * @param start The offset at which searching will start.
  1014. * @param length The number of characters to search
  1015. * @return The offset into this of the start of `srcChars`,
  1016. * or -1 if not found.
  1017. * @stable ICU 2.0
  1018. */
  1019. inline int32_t indexOf(ConstChar16Ptr srcChars,
  1020. int32_t srcLength,
  1021. int32_t start,
  1022. int32_t length) const;
  1023. /**
  1024. * Locate in this the first occurrence in the range
  1025. * [`start`, `start + length`) of the characters
  1026. * in `srcChars` in the range
  1027. * [`srcStart`, `srcStart + srcLength`),
  1028. * using bitwise comparison.
  1029. * @param srcChars The text to search for.
  1030. * @param srcStart the offset into `srcChars` at which
  1031. * to start matching
  1032. * @param srcLength the number of characters in `srcChars` to match
  1033. * @param start the offset into this at which to start matching
  1034. * @param length the number of characters in this to search
  1035. * @return The offset into this of the start of `text`,
  1036. * or -1 if not found.
  1037. * @stable ICU 2.0
  1038. */
  1039. int32_t indexOf(const char16_t *srcChars,
  1040. int32_t srcStart,
  1041. int32_t srcLength,
  1042. int32_t start,
  1043. int32_t length) const;
  1044. /**
  1045. * Locate in this the first occurrence of the BMP code point `c`,
  1046. * using bitwise comparison.
  1047. * @param c The code unit to search for.
  1048. * @return The offset into this of `c`, or -1 if not found.
  1049. * @stable ICU 2.0
  1050. */
  1051. inline int32_t indexOf(char16_t c) const;
  1052. /**
  1053. * Locate in this the first occurrence of the code point `c`,
  1054. * using bitwise comparison.
  1055. *
  1056. * @param c The code point to search for.
  1057. * @return The offset into this of `c`, or -1 if not found.
  1058. * @stable ICU 2.0
  1059. */
  1060. inline int32_t indexOf(UChar32 c) const;
  1061. /**
  1062. * Locate in this the first occurrence of the BMP code point `c`,
  1063. * starting at offset `start`, using bitwise comparison.
  1064. * @param c The code unit to search for.
  1065. * @param start The offset at which searching will start.
  1066. * @return The offset into this of `c`, or -1 if not found.
  1067. * @stable ICU 2.0
  1068. */
  1069. inline int32_t indexOf(char16_t c,
  1070. int32_t start) const;
  1071. /**
  1072. * Locate in this the first occurrence of the code point `c`
  1073. * starting at offset `start`, using bitwise comparison.
  1074. *
  1075. * @param c The code point to search for.
  1076. * @param start The offset at which searching will start.
  1077. * @return The offset into this of `c`, or -1 if not found.
  1078. * @stable ICU 2.0
  1079. */
  1080. inline int32_t indexOf(UChar32 c,
  1081. int32_t start) const;
  1082. /**
  1083. * Locate in this the first occurrence of the BMP code point `c`
  1084. * in the range [`start`, `start + length`),
  1085. * using bitwise comparison.
  1086. * @param c The code unit to search for.
  1087. * @param start the offset into this at which to start matching
  1088. * @param length the number of characters in this to search
  1089. * @return The offset into this of `c`, or -1 if not found.
  1090. * @stable ICU 2.0
  1091. */
  1092. inline int32_t indexOf(char16_t c,
  1093. int32_t start,
  1094. int32_t length) const;
  1095. /**
  1096. * Locate in this the first occurrence of the code point `c`
  1097. * in the range [`start`, `start + length`),
  1098. * using bitwise comparison.
  1099. *
  1100. * @param c The code point to search for.
  1101. * @param start the offset into this at which to start matching
  1102. * @param length the number of characters in this to search
  1103. * @return The offset into this of `c`, or -1 if not found.
  1104. * @stable ICU 2.0
  1105. */
  1106. inline int32_t indexOf(UChar32 c,
  1107. int32_t start,
  1108. int32_t length) const;
  1109. /**
  1110. * Locate in this the last occurrence of the characters in `text`,
  1111. * using bitwise comparison.
  1112. * @param text The text to search for.
  1113. * @return The offset into this of the start of `text`,
  1114. * or -1 if not found.
  1115. * @stable ICU 2.0
  1116. */
  1117. inline int32_t lastIndexOf(const UnicodeString& text) const;
  1118. /**
  1119. * Locate in this the last occurrence of the characters in `text`
  1120. * starting at offset `start`, using bitwise comparison.
  1121. * @param text The text to search for.
  1122. * @param start The offset at which searching will start.
  1123. * @return The offset into this of the start of `text`,
  1124. * or -1 if not found.
  1125. * @stable ICU 2.0
  1126. */
  1127. inline int32_t lastIndexOf(const UnicodeString& text,
  1128. int32_t start) const;
  1129. /**
  1130. * Locate in this the last occurrence in the range
  1131. * [`start`, `start + length`) of the characters
  1132. * in `text`, using bitwise comparison.
  1133. * @param text The text to search for.
  1134. * @param start The offset at which searching will start.
  1135. * @param length The number of characters to search
  1136. * @return The offset into this of the start of `text`,
  1137. * or -1 if not found.
  1138. * @stable ICU 2.0
  1139. */
  1140. inline int32_t lastIndexOf(const UnicodeString& text,
  1141. int32_t start,
  1142. int32_t length) const;
  1143. /**
  1144. * Locate in this the last occurrence in the range
  1145. * [`start`, `start + length`) of the characters
  1146. * in `srcText` in the range
  1147. * [`srcStart`, `srcStart + srcLength`),
  1148. * using bitwise comparison.
  1149. * @param srcText The text to search for.
  1150. * @param srcStart the offset into `srcText` at which
  1151. * to start matching
  1152. * @param srcLength the number of characters in `srcText` to match
  1153. * @param start the offset into this at which to start matching
  1154. * @param length the number of characters in this to search
  1155. * @return The offset into this of the start of `text`,
  1156. * or -1 if not found.
  1157. * @stable ICU 2.0
  1158. */
  1159. inline int32_t lastIndexOf(const UnicodeString& srcText,
  1160. int32_t srcStart,
  1161. int32_t srcLength,
  1162. int32_t start,
  1163. int32_t length) const;
  1164. /**
  1165. * Locate in this the last occurrence of the characters in `srcChars`
  1166. * starting at offset `start`, using bitwise comparison.
  1167. * @param srcChars The text to search for.
  1168. * @param srcLength the number of characters in `srcChars` to match
  1169. * @param start the offset into this at which to start matching
  1170. * @return The offset into this of the start of `text`,
  1171. * or -1 if not found.
  1172. * @stable ICU 2.0
  1173. */
  1174. inline int32_t lastIndexOf(const char16_t *srcChars,
  1175. int32_t srcLength,
  1176. int32_t start) const;
  1177. /**
  1178. * Locate in this the last occurrence in the range
  1179. * [`start`, `start + length`) of the characters
  1180. * in `srcChars`, using bitwise comparison.
  1181. * @param srcChars The text to search for.
  1182. * @param srcLength the number of characters in `srcChars`
  1183. * @param start The offset at which searching will start.
  1184. * @param length The number of characters to search
  1185. * @return The offset into this of the start of `srcChars`,
  1186. * or -1 if not found.
  1187. * @stable ICU 2.0
  1188. */
  1189. inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
  1190. int32_t srcLength,
  1191. int32_t start,
  1192. int32_t length) const;
  1193. /**
  1194. * Locate in this the last occurrence in the range
  1195. * [`start`, `start + length`) of the characters
  1196. * in `srcChars` in the range
  1197. * [`srcStart`, `srcStart + srcLength`),
  1198. * using bitwise comparison.
  1199. * @param srcChars The text to search for.
  1200. * @param srcStart the offset into `srcChars` at which
  1201. * to start matching
  1202. * @param srcLength the number of characters in `srcChars` to match
  1203. * @param start the offset into this at which to start matching
  1204. * @param length the number of characters in this to search
  1205. * @return The offset into this of the start of `text`,
  1206. * or -1 if not found.
  1207. * @stable ICU 2.0
  1208. */
  1209. int32_t lastIndexOf(const char16_t *srcChars,
  1210. int32_t srcStart,
  1211. int32_t srcLength,
  1212. int32_t start,
  1213. int32_t length) const;
  1214. /**
  1215. * Locate in this the last occurrence of the BMP code point `c`,
  1216. * using bitwise comparison.
  1217. * @param c The code unit to search for.
  1218. * @return The offset into this of `c`, or -1 if not found.
  1219. * @stable ICU 2.0
  1220. */
  1221. inline int32_t lastIndexOf(char16_t c) const;
  1222. /**
  1223. * Locate in this the last occurrence of the code point `c`,
  1224. * using bitwise comparison.
  1225. *
  1226. * @param c The code point to search for.
  1227. * @return The offset into this of `c`, or -1 if not found.
  1228. * @stable ICU 2.0
  1229. */
  1230. inline int32_t lastIndexOf(UChar32 c) const;
  1231. /**
  1232. * Locate in this the last occurrence of the BMP code point `c`
  1233. * starting at offset `start`, using bitwise comparison.
  1234. * @param c The code unit to search for.
  1235. * @param start The offset at which searching will start.
  1236. * @return The offset into this of `c`, or -1 if not found.
  1237. * @stable ICU 2.0
  1238. */
  1239. inline int32_t lastIndexOf(char16_t c,
  1240. int32_t start) const;
  1241. /**
  1242. * Locate in this the last occurrence of the code point `c`
  1243. * starting at offset `start`, using bitwise comparison.
  1244. *
  1245. * @param c The code point to search for.
  1246. * @param start The offset at which searching will start.
  1247. * @return The offset into this of `c`, or -1 if not found.
  1248. * @stable ICU 2.0
  1249. */
  1250. inline int32_t lastIndexOf(UChar32 c,
  1251. int32_t start) const;
  1252. /**
  1253. * Locate in this the last occurrence of the BMP code point `c`
  1254. * in the range [`start`, `start + length`),
  1255. * using bitwise comparison.
  1256. * @param c The code unit to search for.
  1257. * @param start the offset into this at which to start matching
  1258. * @param length the number of characters in this to search
  1259. * @return The offset into this of `c`, or -1 if not found.
  1260. * @stable ICU 2.0
  1261. */
  1262. inline int32_t lastIndexOf(char16_t c,
  1263. int32_t start,
  1264. int32_t length) const;
  1265. /**
  1266. * Locate in this the last occurrence of the code point `c`
  1267. * in the range [`start`, `start + length`),
  1268. * using bitwise comparison.
  1269. *
  1270. * @param c The code point to search for.
  1271. * @param start the offset into this at which to start matching
  1272. * @param length the number of characters in this to search
  1273. * @return The offset into this of `c`, or -1 if not found.
  1274. * @stable ICU 2.0
  1275. */
  1276. inline int32_t lastIndexOf(UChar32 c,
  1277. int32_t start,
  1278. int32_t length) const;
  1279. /* Character access */
  1280. /**
  1281. * Return the code unit at offset `offset`.
  1282. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1283. * @param offset a valid offset into the text
  1284. * @return the code unit at offset `offset`
  1285. * or 0xffff if the offset is not valid for this string
  1286. * @stable ICU 2.0
  1287. */
  1288. inline char16_t charAt(int32_t offset) const;
  1289. /**
  1290. * Return the code unit at offset `offset`.
  1291. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1292. * @param offset a valid offset into the text
  1293. * @return the code unit at offset `offset`
  1294. * @stable ICU 2.0
  1295. */
  1296. inline char16_t operator[] (int32_t offset) const;
  1297. /**
  1298. * Return the code point that contains the code unit
  1299. * at offset `offset`.
  1300. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1301. * @param offset a valid offset into the text
  1302. * that indicates the text offset of any of the code units
  1303. * that will be assembled into a code point (21-bit value) and returned
  1304. * @return the code point of text at `offset`
  1305. * or 0xffff if the offset is not valid for this string
  1306. * @stable ICU 2.0
  1307. */
  1308. UChar32 char32At(int32_t offset) const;
  1309. /**
  1310. * Adjust a random-access offset so that
  1311. * it points to the beginning of a Unicode character.
  1312. * The offset that is passed in points to
  1313. * any code unit of a code point,
  1314. * while the returned offset will point to the first code unit
  1315. * of the same code point.
  1316. * In UTF-16, if the input offset points to a second surrogate
  1317. * of a surrogate pair, then the returned offset will point
  1318. * to the first surrogate.
  1319. * @param offset a valid offset into one code point of the text
  1320. * @return offset of the first code unit of the same code point
  1321. * @see U16_SET_CP_START
  1322. * @stable ICU 2.0
  1323. */
  1324. int32_t getChar32Start(int32_t offset) const;
  1325. /**
  1326. * Adjust a random-access offset so that
  1327. * it points behind a Unicode character.
  1328. * The offset that is passed in points behind
  1329. * any code unit of a code point,
  1330. * while the returned offset will point behind the last code unit
  1331. * of the same code point.
  1332. * In UTF-16, if the input offset points behind the first surrogate
  1333. * (i.e., to the second surrogate)
  1334. * of a surrogate pair, then the returned offset will point
  1335. * behind the second surrogate (i.e., to the first surrogate).
  1336. * @param offset a valid offset after any code unit of a code point of the text
  1337. * @return offset of the first code unit after the same code point
  1338. * @see U16_SET_CP_LIMIT
  1339. * @stable ICU 2.0
  1340. */
  1341. int32_t getChar32Limit(int32_t offset) const;
  1342. /**
  1343. * Move the code unit index along the string by delta code points.
  1344. * Interpret the input index as a code unit-based offset into the string,
  1345. * move the index forward or backward by delta code points, and
  1346. * return the resulting index.
  1347. * The input index should point to the first code unit of a code point,
  1348. * if there is more than one.
  1349. *
  1350. * Both input and output indexes are code unit-based as for all
  1351. * string indexes/offsets in ICU (and other libraries, like MBCS char*).
  1352. * If delta<0 then the index is moved backward (toward the start of the string).
  1353. * If delta>0 then the index is moved forward (toward the end of the string).
  1354. *
  1355. * This behaves like CharacterIterator::move32(delta, kCurrent).
  1356. *
  1357. * Behavior for out-of-bounds indexes:
  1358. * `moveIndex32` pins the input index to 0..length(), i.e.,
  1359. * if the input index<0 then it is pinned to 0;
  1360. * if it is index>length() then it is pinned to length().
  1361. * Afterwards, the index is moved by `delta` code points
  1362. * forward or backward,
  1363. * but no further backward than to 0 and no further forward than to length().
  1364. * The resulting index return value will be in between 0 and length(), inclusively.
  1365. *
  1366. * Examples:
  1367. * \code
  1368. * // s has code points 'a' U+10000 'b' U+10ffff U+2029
  1369. * UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
  1370. *
  1371. * // initial index: position of U+10000
  1372. * int32_t index=1;
  1373. *
  1374. * // the following examples will all result in index==4, position of U+10ffff
  1375. *
  1376. * // skip 2 code points from some position in the string
  1377. * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
  1378. *
  1379. * // go to the 3rd code point from the start of s (0-based)
  1380. * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
  1381. *
  1382. * // go to the next-to-last code point of s
  1383. * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
  1384. * \endcode
  1385. *
  1386. * @param index input code unit index
  1387. * @param delta (signed) code point count to move the index forward or backward
  1388. * in the string
  1389. * @return the resulting code unit index
  1390. * @stable ICU 2.0
  1391. */
  1392. int32_t moveIndex32(int32_t index, int32_t delta) const;
  1393. /* Substring extraction */
  1394. /**
  1395. * Copy the characters in the range
  1396. * [`start`, `start + length`) into the array `dst`,
  1397. * beginning at `dstStart`.
  1398. * If the string aliases to `dst` itself as an external buffer,
  1399. * then extract() will not copy the contents.
  1400. *
  1401. * @param start offset of first character which will be copied into the array
  1402. * @param length the number of characters to extract
  1403. * @param dst array in which to copy characters. The length of `dst`
  1404. * must be at least (`dstStart + length`).
  1405. * @param dstStart the offset in `dst` where the first character
  1406. * will be extracted
  1407. * @stable ICU 2.0
  1408. */
  1409. inline void extract(int32_t start,
  1410. int32_t length,
  1411. Char16Ptr dst,
  1412. int32_t dstStart = 0) const;
  1413. /**
  1414. * Copy the contents of the string into dest.
  1415. * This is a convenience function that
  1416. * checks if there is enough space in dest,
  1417. * extracts the entire string if possible,
  1418. * and NUL-terminates dest if possible.
  1419. *
  1420. * If the string fits into dest but cannot be NUL-terminated
  1421. * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
  1422. * If the string itself does not fit into dest
  1423. * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
  1424. *
  1425. * If the string aliases to `dest` itself as an external buffer,
  1426. * then extract() will not copy the contents.
  1427. *
  1428. * @param dest Destination string buffer.
  1429. * @param destCapacity Number of char16_ts available at dest.
  1430. * @param errorCode ICU error code.
  1431. * @return length()
  1432. * @stable ICU 2.0
  1433. */
  1434. int32_t
  1435. extract(Char16Ptr dest, int32_t destCapacity,
  1436. UErrorCode &errorCode) const;
  1437. /**
  1438. * Copy the characters in the range
  1439. * [`start`, `start + length`) into the UnicodeString
  1440. * `target`.
  1441. * @param start offset of first character which will be copied
  1442. * @param length the number of characters to extract
  1443. * @param target UnicodeString into which to copy characters.
  1444. * @stable ICU 2.0
  1445. */
  1446. inline void extract(int32_t start,
  1447. int32_t length,
  1448. UnicodeString& target) const;
  1449. /**
  1450. * Copy the characters in the range [`start`, `limit`)
  1451. * into the array `dst`, beginning at `dstStart`.
  1452. * @param start offset of first character which will be copied into the array
  1453. * @param limit offset immediately following the last character to be copied
  1454. * @param dst array in which to copy characters. The length of `dst`
  1455. * must be at least (`dstStart + (limit - start)`).
  1456. * @param dstStart the offset in `dst` where the first character
  1457. * will be extracted
  1458. * @stable ICU 2.0
  1459. */
  1460. inline void extractBetween(int32_t start,
  1461. int32_t limit,
  1462. char16_t *dst,
  1463. int32_t dstStart = 0) const;
  1464. /**
  1465. * Copy the characters in the range [`start`, `limit`)
  1466. * into the UnicodeString `target`. Replaceable API.
  1467. * @param start offset of first character which will be copied
  1468. * @param limit offset immediately following the last character to be copied
  1469. * @param target UnicodeString into which to copy characters.
  1470. * @stable ICU 2.0
  1471. */
  1472. virtual void extractBetween(int32_t start,
  1473. int32_t limit,
  1474. UnicodeString& target) const override;
  1475. /**
  1476. * Copy the characters in the range
  1477. * [`start`, `start + startLength`) into an array of characters.
  1478. * All characters must be invariant (see utypes.h).
  1479. * Use US_INV as the last, signature-distinguishing parameter.
  1480. *
  1481. * This function does not write any more than `targetCapacity`
  1482. * characters but returns the length of the entire output string
  1483. * so that one can allocate a larger buffer and call the function again
  1484. * if necessary.
  1485. * The output string is NUL-terminated if possible.
  1486. *
  1487. * @param start offset of first character which will be copied
  1488. * @param startLength the number of characters to extract
  1489. * @param target the target buffer for extraction, can be nullptr
  1490. * if targetLength is 0
  1491. * @param targetCapacity the length of the target buffer
  1492. * @param inv Signature-distinguishing parameter, use US_INV.
  1493. * @return the output string length, not including the terminating NUL
  1494. * @stable ICU 3.2
  1495. */
  1496. int32_t extract(int32_t start,
  1497. int32_t startLength,
  1498. char *target,
  1499. int32_t targetCapacity,
  1500. enum EInvariant inv) const;
  1501. #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  1502. /**
  1503. * Copy the characters in the range
  1504. * [`start`, `start + length`) into an array of characters
  1505. * in the platform's default codepage.
  1506. * This function does not write any more than `targetLength`
  1507. * characters but returns the length of the entire output string
  1508. * so that one can allocate a larger buffer and call the function again
  1509. * if necessary.
  1510. * The output string is NUL-terminated if possible.
  1511. *
  1512. * @param start offset of first character which will be copied
  1513. * @param startLength the number of characters to extract
  1514. * @param target the target buffer for extraction
  1515. * @param targetLength the length of the target buffer
  1516. * If `target` is nullptr, then the number of bytes required for
  1517. * `target` is returned.
  1518. * @return the output string length, not including the terminating NUL
  1519. * @stable ICU 2.0
  1520. */
  1521. int32_t extract(int32_t start,
  1522. int32_t startLength,
  1523. char *target,
  1524. uint32_t targetLength) const;
  1525. #endif
  1526. #if !UCONFIG_NO_CONVERSION
  1527. /**
  1528. * Copy the characters in the range
  1529. * [`start`, `start + length`) into an array of characters
  1530. * in a specified codepage.
  1531. * The output string is NUL-terminated.
  1532. *
  1533. * Recommendation: For invariant-character strings use
  1534. * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1535. * because it avoids object code dependencies of UnicodeString on
  1536. * the conversion code.
  1537. *
  1538. * @param start offset of first character which will be copied
  1539. * @param startLength the number of characters to extract
  1540. * @param target the target buffer for extraction
  1541. * @param codepage the desired codepage for the characters. 0 has
  1542. * the special meaning of the default codepage
  1543. * If `codepage` is an empty string (`""`),
  1544. * then a simple conversion is performed on the codepage-invariant
  1545. * subset ("invariant characters") of the platform encoding. See utypes.h.
  1546. * If `target` is nullptr, then the number of bytes required for
  1547. * `target` is returned. It is assumed that the target is big enough
  1548. * to fit all of the characters.
  1549. * @return the output string length, not including the terminating NUL
  1550. * @stable ICU 2.0
  1551. */
  1552. inline int32_t extract(int32_t start,
  1553. int32_t startLength,
  1554. char* target,
  1555. const char* codepage = nullptr) const;
  1556. /**
  1557. * Copy the characters in the range
  1558. * [`start`, `start + length`) into an array of characters
  1559. * in a specified codepage.
  1560. * This function does not write any more than `targetLength`
  1561. * characters but returns the length of the entire output string
  1562. * so that one can allocate a larger buffer and call the function again
  1563. * if necessary.
  1564. * The output string is NUL-terminated if possible.
  1565. *
  1566. * Recommendation: For invariant-character strings use
  1567. * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1568. * because it avoids object code dependencies of UnicodeString on
  1569. * the conversion code.
  1570. *
  1571. * @param start offset of first character which will be copied
  1572. * @param startLength the number of characters to extract
  1573. * @param target the target buffer for extraction
  1574. * @param targetLength the length of the target buffer
  1575. * @param codepage the desired codepage for the characters. 0 has
  1576. * the special meaning of the default codepage
  1577. * If `codepage` is an empty string (`""`),
  1578. * then a simple conversion is performed on the codepage-invariant
  1579. * subset ("invariant characters") of the platform encoding. See utypes.h.
  1580. * If `target` is nullptr, then the number of bytes required for
  1581. * `target` is returned.
  1582. * @return the output string length, not including the terminating NUL
  1583. * @stable ICU 2.0
  1584. */
  1585. int32_t extract(int32_t start,
  1586. int32_t startLength,
  1587. char *target,
  1588. uint32_t targetLength,
  1589. const char *codepage) const;
  1590. /**
  1591. * Convert the UnicodeString into a codepage string using an existing UConverter.
  1592. * The output string is NUL-terminated if possible.
  1593. *
  1594. * This function avoids the overhead of opening and closing a converter if
  1595. * multiple strings are extracted.
  1596. *
  1597. * @param dest destination string buffer, can be nullptr if destCapacity==0
  1598. * @param destCapacity the number of chars available at dest
  1599. * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
  1600. * or nullptr for the default converter
  1601. * @param errorCode normal ICU error code
  1602. * @return the length of the output string, not counting the terminating NUL;
  1603. * if the length is greater than destCapacity, then the string will not fit
  1604. * and a buffer of the indicated length would need to be passed in
  1605. * @stable ICU 2.0
  1606. */
  1607. int32_t extract(char *dest, int32_t destCapacity,
  1608. UConverter *cnv,
  1609. UErrorCode &errorCode) const;
  1610. #endif
  1611. /**
  1612. * Create a temporary substring for the specified range.
  1613. * Unlike the substring constructor and setTo() functions,
  1614. * the object returned here will be a read-only alias (using getBuffer())
  1615. * rather than copying the text.
  1616. * As a result, this substring operation is much faster but requires
  1617. * that the original string not be modified or deleted during the lifetime
  1618. * of the returned substring object.
  1619. * @param start offset of the first character visible in the substring
  1620. * @param length length of the substring
  1621. * @return a read-only alias UnicodeString object for the substring
  1622. * @stable ICU 4.4
  1623. */
  1624. UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
  1625. /**
  1626. * Create a temporary substring for the specified range.
  1627. * Same as tempSubString(start, length) except that the substring range
  1628. * is specified as a (start, limit) pair (with an exclusive limit index)
  1629. * rather than a (start, length) pair.
  1630. * @param start offset of the first character visible in the substring
  1631. * @param limit offset immediately following the last character visible in the substring
  1632. * @return a read-only alias UnicodeString object for the substring
  1633. * @stable ICU 4.4
  1634. */
  1635. inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
  1636. /**
  1637. * Convert the UnicodeString to UTF-8 and write the result
  1638. * to a ByteSink. This is called by toUTF8String().
  1639. * Unpaired surrogates are replaced with U+FFFD.
  1640. * Calls u_strToUTF8WithSub().
  1641. *
  1642. * @param sink A ByteSink to which the UTF-8 version of the string is written.
  1643. * sink.Flush() is called at the end.
  1644. * @stable ICU 4.2
  1645. * @see toUTF8String
  1646. */
  1647. void toUTF8(ByteSink &sink) const;
  1648. /**
  1649. * Convert the UnicodeString to UTF-8 and append the result
  1650. * to a standard string.
  1651. * Unpaired surrogates are replaced with U+FFFD.
  1652. * Calls toUTF8().
  1653. *
  1654. * @param result A standard string (or a compatible object)
  1655. * to which the UTF-8 version of the string is appended.
  1656. * @return The string object.
  1657. * @stable ICU 4.2
  1658. * @see toUTF8
  1659. */
  1660. template<typename StringClass>
  1661. StringClass &toUTF8String(StringClass &result) const {
  1662. StringByteSink<StringClass> sbs(&result, length());
  1663. toUTF8(sbs);
  1664. return result;
  1665. }
  1666. /**
  1667. * Convert the UnicodeString to UTF-32.
  1668. * Unpaired surrogates are replaced with U+FFFD.
  1669. * Calls u_strToUTF32WithSub().
  1670. *
  1671. * @param utf32 destination string buffer, can be nullptr if capacity==0
  1672. * @param capacity the number of UChar32s available at utf32
  1673. * @param errorCode Standard ICU error code. Its input value must
  1674. * pass the U_SUCCESS() test, or else the function returns
  1675. * immediately. Check for U_FAILURE() on output or use with
  1676. * function chaining. (See User Guide for details.)
  1677. * @return The length of the UTF-32 string.
  1678. * @see fromUTF32
  1679. * @stable ICU 4.2
  1680. */
  1681. int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
  1682. /* Length operations */
  1683. /**
  1684. * Return the length of the UnicodeString object.
  1685. * The length is the number of char16_t code units are in the UnicodeString.
  1686. * If you want the number of code points, please use countChar32().
  1687. * @return the length of the UnicodeString object
  1688. * @see countChar32
  1689. * @stable ICU 2.0
  1690. */
  1691. inline int32_t length() const;
  1692. /**
  1693. * Count Unicode code points in the length char16_t code units of the string.
  1694. * A code point may occupy either one or two char16_t code units.
  1695. * Counting code points involves reading all code units.
  1696. *
  1697. * This functions is basically the inverse of moveIndex32().
  1698. *
  1699. * @param start the index of the first code unit to check
  1700. * @param length the number of char16_t code units to check
  1701. * @return the number of code points in the specified code units
  1702. * @see length
  1703. * @stable ICU 2.0
  1704. */
  1705. int32_t
  1706. countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
  1707. /**
  1708. * Check if the length char16_t code units of the string
  1709. * contain more Unicode code points than a certain number.
  1710. * This is more efficient than counting all code points in this part of the string
  1711. * and comparing that number with a threshold.
  1712. * This function may not need to scan the string at all if the length
  1713. * falls within a certain range, and
  1714. * never needs to count more than 'number+1' code points.
  1715. * Logically equivalent to (countChar32(start, length)>number).
  1716. * A Unicode code point may occupy either one or two char16_t code units.
  1717. *
  1718. * @param start the index of the first code unit to check (0 for the entire string)
  1719. * @param length the number of char16_t code units to check
  1720. * (use INT32_MAX for the entire string; remember that start/length
  1721. * values are pinned)
  1722. * @param number The number of code points in the (sub)string is compared against
  1723. * the 'number' parameter.
  1724. * @return Boolean value for whether the string contains more Unicode code points
  1725. * than 'number'. Same as (u_countChar32(s, length)>number).
  1726. * @see countChar32
  1727. * @see u_strHasMoreChar32Than
  1728. * @stable ICU 2.4
  1729. */
  1730. UBool
  1731. hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
  1732. /**
  1733. * Determine if this string is empty.
  1734. * @return true if this string contains 0 characters, false otherwise.
  1735. * @stable ICU 2.0
  1736. */
  1737. inline UBool isEmpty() const;
  1738. /**
  1739. * Return the capacity of the internal buffer of the UnicodeString object.
  1740. * This is useful together with the getBuffer functions.
  1741. * See there for details.
  1742. *
  1743. * @return the number of char16_ts available in the internal buffer
  1744. * @see getBuffer
  1745. * @stable ICU 2.0
  1746. */
  1747. inline int32_t getCapacity() const;
  1748. /* Other operations */
  1749. /**
  1750. * Generate a hash code for this object.
  1751. * @return The hash code of this UnicodeString.
  1752. * @stable ICU 2.0
  1753. */
  1754. inline int32_t hashCode() const;
  1755. /**
  1756. * Determine if this object contains a valid string.
  1757. * A bogus string has no value. It is different from an empty string,
  1758. * although in both cases isEmpty() returns true and length() returns 0.
  1759. * setToBogus() and isBogus() can be used to indicate that no string value is available.
  1760. * For a bogus string, getBuffer() and getTerminatedBuffer() return nullptr, and
  1761. * length() returns 0.
  1762. *
  1763. * @return true if the string is bogus/invalid, false otherwise
  1764. * @see setToBogus()
  1765. * @stable ICU 2.0
  1766. */
  1767. inline UBool isBogus() const;
  1768. //========================================
  1769. // Write operations
  1770. //========================================
  1771. /* Assignment operations */
  1772. /**
  1773. * Assignment operator. Replace the characters in this UnicodeString
  1774. * with the characters from `srcText`.
  1775. *
  1776. * Starting with ICU 2.4, the assignment operator and the copy constructor
  1777. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1778. * By contrast, the fastCopyFrom() function implements the old,
  1779. * more efficient but less safe behavior
  1780. * of making this string also a readonly alias to the same buffer.
  1781. *
  1782. * If the source object has an "open" buffer from getBuffer(minCapacity),
  1783. * then the copy is an empty string.
  1784. *
  1785. * @param srcText The text containing the characters to replace
  1786. * @return a reference to this
  1787. * @stable ICU 2.0
  1788. * @see fastCopyFrom
  1789. */
  1790. UnicodeString &operator=(const UnicodeString &srcText);
  1791. /**
  1792. * Almost the same as the assignment operator.
  1793. * Replace the characters in this UnicodeString
  1794. * with the characters from `srcText`.
  1795. *
  1796. * This function works the same as the assignment operator
  1797. * for all strings except for ones that are readonly aliases.
  1798. *
  1799. * Starting with ICU 2.4, the assignment operator and the copy constructor
  1800. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1801. * This function implements the old, more efficient but less safe behavior
  1802. * of making this string also a readonly alias to the same buffer.
  1803. *
  1804. * The fastCopyFrom function must be used only if it is known that the lifetime of
  1805. * this UnicodeString does not exceed the lifetime of the aliased buffer
  1806. * including its contents, for example for strings from resource bundles
  1807. * or aliases to string constants.
  1808. *
  1809. * If the source object has an "open" buffer from getBuffer(minCapacity),
  1810. * then the copy is an empty string.
  1811. *
  1812. * @param src The text containing the characters to replace.
  1813. * @return a reference to this
  1814. * @stable ICU 2.4
  1815. */
  1816. UnicodeString &fastCopyFrom(const UnicodeString &src);
  1817. #ifndef U_HIDE_DRAFT_API
  1818. /**
  1819. * Assignment operator. Replaces the characters in this UnicodeString
  1820. * with a copy of the characters from the `src`
  1821. * which is, or which is implicitly convertible to,
  1822. * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
  1823. *
  1824. * @param src The string view containing the characters to copy.
  1825. * @return a reference to this
  1826. * @draft ICU 76
  1827. */
  1828. template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
  1829. inline UnicodeString &operator=(const S &src) {
  1830. unBogus();
  1831. return doReplace(0, length(), internal::toU16StringView(src));
  1832. }
  1833. #endif // U_HIDE_DRAFT_API
  1834. /**
  1835. * Move assignment operator; might leave src in bogus state.
  1836. * This string will have the same contents and state that the source string had.
  1837. * The behavior is undefined if *this and src are the same object.
  1838. * @param src source string
  1839. * @return *this
  1840. * @stable ICU 56
  1841. */
  1842. UnicodeString &operator=(UnicodeString &&src) noexcept;
  1843. /**
  1844. * Swap strings.
  1845. * @param other other string
  1846. * @stable ICU 56
  1847. */
  1848. void swap(UnicodeString &other) noexcept;
  1849. /**
  1850. * Non-member UnicodeString swap function.
  1851. * @param s1 will get s2's contents and state
  1852. * @param s2 will get s1's contents and state
  1853. * @stable ICU 56
  1854. */
  1855. friend inline void U_EXPORT2
  1856. swap(UnicodeString &s1, UnicodeString &s2) noexcept {
  1857. s1.swap(s2);
  1858. }
  1859. /**
  1860. * Assignment operator. Replace the characters in this UnicodeString
  1861. * with the code unit `ch`.
  1862. * @param ch the code unit to replace
  1863. * @return a reference to this
  1864. * @stable ICU 2.0
  1865. */
  1866. inline UnicodeString& operator= (char16_t ch);
  1867. /**
  1868. * Assignment operator. Replace the characters in this UnicodeString
  1869. * with the code point `ch`.
  1870. * @param ch the code point to replace
  1871. * @return a reference to this
  1872. * @stable ICU 2.0
  1873. */
  1874. inline UnicodeString& operator= (UChar32 ch);
  1875. /**
  1876. * Set the text in the UnicodeString object to the characters
  1877. * in `srcText` in the range
  1878. * [`srcStart`, `srcText.length()`).
  1879. * `srcText` is not modified.
  1880. * @param srcText the source for the new characters
  1881. * @param srcStart the offset into `srcText` where new characters
  1882. * will be obtained
  1883. * @return a reference to this
  1884. * @stable ICU 2.2
  1885. */
  1886. inline UnicodeString& setTo(const UnicodeString& srcText,
  1887. int32_t srcStart);
  1888. /**
  1889. * Set the text in the UnicodeString object to the characters
  1890. * in `srcText` in the range
  1891. * [`srcStart`, `srcStart + srcLength`).
  1892. * `srcText` is not modified.
  1893. * @param srcText the source for the new characters
  1894. * @param srcStart the offset into `srcText` where new characters
  1895. * will be obtained
  1896. * @param srcLength the number of characters in `srcText` in the
  1897. * replace string.
  1898. * @return a reference to this
  1899. * @stable ICU 2.0
  1900. */
  1901. inline UnicodeString& setTo(const UnicodeString& srcText,
  1902. int32_t srcStart,
  1903. int32_t srcLength);
  1904. /**
  1905. * Set the text in the UnicodeString object to the characters in
  1906. * `srcText`.
  1907. * `srcText` is not modified.
  1908. * @param srcText the source for the new characters
  1909. * @return a reference to this
  1910. * @stable ICU 2.0
  1911. */
  1912. inline UnicodeString& setTo(const UnicodeString& srcText);
  1913. /**
  1914. * Set the characters in the UnicodeString object to the characters
  1915. * in `srcChars`. `srcChars` is not modified.
  1916. * @param srcChars the source for the new characters
  1917. * @param srcLength the number of Unicode characters in srcChars.
  1918. * @return a reference to this
  1919. * @stable ICU 2.0
  1920. */
  1921. inline UnicodeString& setTo(const char16_t *srcChars,
  1922. int32_t srcLength);
  1923. /**
  1924. * Set the characters in the UnicodeString object to the code unit
  1925. * `srcChar`.
  1926. * @param srcChar the code unit which becomes the UnicodeString's character
  1927. * content
  1928. * @return a reference to this
  1929. * @stable ICU 2.0
  1930. */
  1931. inline UnicodeString& setTo(char16_t srcChar);
  1932. /**
  1933. * Set the characters in the UnicodeString object to the code point
  1934. * `srcChar`.
  1935. * @param srcChar the code point which becomes the UnicodeString's character
  1936. * content
  1937. * @return a reference to this
  1938. * @stable ICU 2.0
  1939. */
  1940. inline UnicodeString& setTo(UChar32 srcChar);
  1941. /**
  1942. * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
  1943. * The text will be used for the UnicodeString object, but
  1944. * it will not be released when the UnicodeString is destroyed.
  1945. * This has copy-on-write semantics:
  1946. * When the string is modified, then the buffer is first copied into
  1947. * newly allocated memory.
  1948. * The aliased buffer is never modified.
  1949. *
  1950. * In an assignment to another UnicodeString, when using the copy constructor
  1951. * or the assignment operator, the text will be copied.
  1952. * When using fastCopyFrom(), the text will be aliased again,
  1953. * so that both strings then alias the same readonly-text.
  1954. *
  1955. * @param isTerminated specifies if `text` is `NUL`-terminated.
  1956. * This must be true if `textLength==-1`.
  1957. * @param text The characters to alias for the UnicodeString.
  1958. * @param textLength The number of Unicode characters in `text` to alias.
  1959. * If -1, then this constructor will determine the length
  1960. * by calling `u_strlen()`.
  1961. * @return a reference to this
  1962. * @stable ICU 2.0
  1963. */
  1964. UnicodeString &setTo(UBool isTerminated,
  1965. ConstChar16Ptr text,
  1966. int32_t textLength);
  1967. /**
  1968. * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
  1969. * The text will be used for the UnicodeString object, but
  1970. * it will not be released when the UnicodeString is destroyed.
  1971. * This has write-through semantics:
  1972. * For as long as the capacity of the buffer is sufficient, write operations
  1973. * will directly affect the buffer. When more capacity is necessary, then
  1974. * a new buffer will be allocated and the contents copied as with regularly
  1975. * constructed strings.
  1976. * In an assignment to another UnicodeString, the buffer will be copied.
  1977. * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
  1978. * as the string buffer itself and will in this case not copy the contents.
  1979. *
  1980. * @param buffer The characters to alias for the UnicodeString.
  1981. * @param buffLength The number of Unicode characters in `buffer` to alias.
  1982. * @param buffCapacity The size of `buffer` in char16_ts.
  1983. * @return a reference to this
  1984. * @stable ICU 2.0
  1985. */
  1986. UnicodeString &setTo(char16_t *buffer,
  1987. int32_t buffLength,
  1988. int32_t buffCapacity);
  1989. /**
  1990. * Make this UnicodeString object invalid.
  1991. * The string will test true with isBogus().
  1992. *
  1993. * A bogus string has no value. It is different from an empty string.
  1994. * It can be used to indicate that no string value is available.
  1995. * getBuffer() and getTerminatedBuffer() return nullptr, and
  1996. * length() returns 0.
  1997. *
  1998. * This utility function is used throughout the UnicodeString
  1999. * implementation to indicate that a UnicodeString operation failed,
  2000. * and may be used in other functions,
  2001. * especially but not exclusively when such functions do not
  2002. * take a UErrorCode for simplicity.
  2003. *
  2004. * The following methods, and no others, will clear a string object's bogus flag:
  2005. * - remove()
  2006. * - remove(0, INT32_MAX)
  2007. * - truncate(0)
  2008. * - operator=() (assignment operator)
  2009. * - setTo(...)
  2010. *
  2011. * The simplest ways to turn a bogus string into an empty one
  2012. * is to use the remove() function.
  2013. * Examples for other functions that are equivalent to "set to empty string":
  2014. * \code
  2015. * if(s.isBogus()) {
  2016. * s.remove(); // set to an empty string (remove all), or
  2017. * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
  2018. * s.truncate(0); // set to an empty string (complete truncation), or
  2019. * s=UnicodeString(); // assign an empty string, or
  2020. * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
  2021. * s.setTo(u"", 0); // set to an empty C Unicode string
  2022. * }
  2023. * \endcode
  2024. *
  2025. * @see isBogus()
  2026. * @stable ICU 2.0
  2027. */
  2028. void setToBogus();
  2029. /**
  2030. * Set the character at the specified offset to the specified character.
  2031. * @param offset A valid offset into the text of the character to set
  2032. * @param ch The new character
  2033. * @return A reference to this
  2034. * @stable ICU 2.0
  2035. */
  2036. UnicodeString& setCharAt(int32_t offset,
  2037. char16_t ch);
  2038. /* Append operations */
  2039. /**
  2040. * Append operator. Append the code unit `ch` to the UnicodeString
  2041. * object.
  2042. * @param ch the code unit to be appended
  2043. * @return a reference to this
  2044. * @stable ICU 2.0
  2045. */
  2046. inline UnicodeString& operator+= (char16_t ch);
  2047. /**
  2048. * Append operator. Append the code point `ch` to the UnicodeString
  2049. * object.
  2050. * @param ch the code point to be appended
  2051. * @return a reference to this
  2052. * @stable ICU 2.0
  2053. */
  2054. inline UnicodeString& operator+= (UChar32 ch);
  2055. /**
  2056. * Append operator. Append the characters in `srcText` to the
  2057. * UnicodeString object. `srcText` is not modified.
  2058. * @param srcText the source for the new characters
  2059. * @return a reference to this
  2060. * @stable ICU 2.0
  2061. */
  2062. inline UnicodeString& operator+= (const UnicodeString& srcText);
  2063. #ifndef U_HIDE_DRAFT_API
  2064. /**
  2065. * Append operator. Appends the characters in `src`
  2066. * which is, or which is implicitly convertible to,
  2067. * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
  2068. * to the UnicodeString object.
  2069. *
  2070. * @param src the source for the new characters
  2071. * @return a reference to this
  2072. * @draft ICU 76
  2073. */
  2074. template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
  2075. inline UnicodeString& operator+=(const S &src) {
  2076. return doAppend(internal::toU16StringView(src));
  2077. }
  2078. #endif // U_HIDE_DRAFT_API
  2079. /**
  2080. * Append the characters
  2081. * in `srcText` in the range
  2082. * [`srcStart`, `srcStart + srcLength`) to the
  2083. * UnicodeString object at offset `start`. `srcText`
  2084. * is not modified.
  2085. * @param srcText the source for the new characters
  2086. * @param srcStart the offset into `srcText` where new characters
  2087. * will be obtained
  2088. * @param srcLength the number of characters in `srcText` in
  2089. * the append string
  2090. * @return a reference to this
  2091. * @stable ICU 2.0
  2092. */
  2093. inline UnicodeString& append(const UnicodeString& srcText,
  2094. int32_t srcStart,
  2095. int32_t srcLength);
  2096. /**
  2097. * Append the characters in `srcText` to the UnicodeString object.
  2098. * `srcText` is not modified.
  2099. * @param srcText the source for the new characters
  2100. * @return a reference to this
  2101. * @stable ICU 2.0
  2102. */
  2103. inline UnicodeString& append(const UnicodeString& srcText);
  2104. /**
  2105. * Append the characters in `srcChars` in the range
  2106. * [`srcStart`, `srcStart + srcLength`) to the UnicodeString
  2107. * object at offset
  2108. * `start`. `srcChars` is not modified.
  2109. * @param srcChars the source for the new characters
  2110. * @param srcStart the offset into `srcChars` where new characters
  2111. * will be obtained
  2112. * @param srcLength the number of characters in `srcChars` in
  2113. * the append string; can be -1 if `srcChars` is NUL-terminated
  2114. * @return a reference to this
  2115. * @stable ICU 2.0
  2116. */
  2117. inline UnicodeString& append(const char16_t *srcChars,
  2118. int32_t srcStart,
  2119. int32_t srcLength);
  2120. /**
  2121. * Append the characters in `srcChars` to the UnicodeString object.
  2122. * `srcChars` is not modified.
  2123. * @param srcChars the source for the new characters
  2124. * @param srcLength the number of Unicode characters in `srcChars`;
  2125. * can be -1 if `srcChars` is NUL-terminated
  2126. * @return a reference to this
  2127. * @stable ICU 2.0
  2128. */
  2129. inline UnicodeString& append(ConstChar16Ptr srcChars,
  2130. int32_t srcLength);
  2131. #ifndef U_HIDE_DRAFT_API
  2132. /**
  2133. * Appends the characters in `src`
  2134. * which is, or which is implicitly convertible to,
  2135. * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
  2136. * to the UnicodeString object.
  2137. *
  2138. * @param src the source for the new characters
  2139. * @return a reference to this
  2140. * @draft ICU 76
  2141. */
  2142. template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
  2143. inline UnicodeString& append(const S &src) {
  2144. return doAppend(internal::toU16StringView(src));
  2145. }
  2146. #endif // U_HIDE_DRAFT_API
  2147. /**
  2148. * Append the code unit `srcChar` to the UnicodeString object.
  2149. * @param srcChar the code unit to append
  2150. * @return a reference to this
  2151. * @stable ICU 2.0
  2152. */
  2153. inline UnicodeString& append(char16_t srcChar);
  2154. /**
  2155. * Append the code point `srcChar` to the UnicodeString object.
  2156. * @param srcChar the code point to append
  2157. * @return a reference to this
  2158. * @stable ICU 2.0
  2159. */
  2160. UnicodeString& append(UChar32 srcChar);
  2161. /* Insert operations */
  2162. /**
  2163. * Insert the characters in `srcText` in the range
  2164. * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
  2165. * object at offset `start`. `srcText` is not modified.
  2166. * @param start the offset where the insertion begins
  2167. * @param srcText the source for the new characters
  2168. * @param srcStart the offset into `srcText` where new characters
  2169. * will be obtained
  2170. * @param srcLength the number of characters in `srcText` in
  2171. * the insert string
  2172. * @return a reference to this
  2173. * @stable ICU 2.0
  2174. */
  2175. inline UnicodeString& insert(int32_t start,
  2176. const UnicodeString& srcText,
  2177. int32_t srcStart,
  2178. int32_t srcLength);
  2179. /**
  2180. * Insert the characters in `srcText` into the UnicodeString object
  2181. * at offset `start`. `srcText` is not modified.
  2182. * @param start the offset where the insertion begins
  2183. * @param srcText the source for the new characters
  2184. * @return a reference to this
  2185. * @stable ICU 2.0
  2186. */
  2187. inline UnicodeString& insert(int32_t start,
  2188. const UnicodeString& srcText);
  2189. /**
  2190. * Insert the characters in `srcChars` in the range
  2191. * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
  2192. * object at offset `start`. `srcChars` is not modified.
  2193. * @param start the offset at which the insertion begins
  2194. * @param srcChars the source for the new characters
  2195. * @param srcStart the offset into `srcChars` where new characters
  2196. * will be obtained
  2197. * @param srcLength the number of characters in `srcChars`
  2198. * in the insert string
  2199. * @return a reference to this
  2200. * @stable ICU 2.0
  2201. */
  2202. inline UnicodeString& insert(int32_t start,
  2203. const char16_t *srcChars,
  2204. int32_t srcStart,
  2205. int32_t srcLength);
  2206. /**
  2207. * Insert the characters in `srcChars` into the UnicodeString object
  2208. * at offset `start`. `srcChars` is not modified.
  2209. * @param start the offset where the insertion begins
  2210. * @param srcChars the source for the new characters
  2211. * @param srcLength the number of Unicode characters in srcChars.
  2212. * @return a reference to this
  2213. * @stable ICU 2.0
  2214. */
  2215. inline UnicodeString& insert(int32_t start,
  2216. ConstChar16Ptr srcChars,
  2217. int32_t srcLength);
  2218. /**
  2219. * Insert the code unit `srcChar` into the UnicodeString object at
  2220. * offset `start`.
  2221. * @param start the offset at which the insertion occurs
  2222. * @param srcChar the code unit to insert
  2223. * @return a reference to this
  2224. * @stable ICU 2.0
  2225. */
  2226. inline UnicodeString& insert(int32_t start,
  2227. char16_t srcChar);
  2228. /**
  2229. * Insert the code point `srcChar` into the UnicodeString object at
  2230. * offset `start`.
  2231. * @param start the offset at which the insertion occurs
  2232. * @param srcChar the code point to insert
  2233. * @return a reference to this
  2234. * @stable ICU 2.0
  2235. */
  2236. inline UnicodeString& insert(int32_t start,
  2237. UChar32 srcChar);
  2238. /* Replace operations */
  2239. /**
  2240. * Replace the characters in the range
  2241. * [`start`, `start + length`) with the characters in
  2242. * `srcText` in the range
  2243. * [`srcStart`, `srcStart + srcLength`).
  2244. * `srcText` is not modified.
  2245. * @param start the offset at which the replace operation begins
  2246. * @param length the number of characters to replace. The character at
  2247. * `start + length` is not modified.
  2248. * @param srcText the source for the new characters
  2249. * @param srcStart the offset into `srcText` where new characters
  2250. * will be obtained
  2251. * @param srcLength the number of characters in `srcText` in
  2252. * the replace string
  2253. * @return a reference to this
  2254. * @stable ICU 2.0
  2255. */
  2256. inline UnicodeString& replace(int32_t start,
  2257. int32_t length,
  2258. const UnicodeString& srcText,
  2259. int32_t srcStart,
  2260. int32_t srcLength);
  2261. /**
  2262. * Replace the characters in the range
  2263. * [`start`, `start + length`)
  2264. * with the characters in `srcText`. `srcText` is
  2265. * not modified.
  2266. * @param start the offset at which the replace operation begins
  2267. * @param length the number of characters to replace. The character at
  2268. * `start + length` is not modified.
  2269. * @param srcText the source for the new characters
  2270. * @return a reference to this
  2271. * @stable ICU 2.0
  2272. */
  2273. inline UnicodeString& replace(int32_t start,
  2274. int32_t length,
  2275. const UnicodeString& srcText);
  2276. /**
  2277. * Replace the characters in the range
  2278. * [`start`, `start + length`) with the characters in
  2279. * `srcChars` in the range
  2280. * [`srcStart`, `srcStart + srcLength`). `srcChars`
  2281. * is not modified.
  2282. * @param start the offset at which the replace operation begins
  2283. * @param length the number of characters to replace. The character at
  2284. * `start + length` is not modified.
  2285. * @param srcChars the source for the new characters
  2286. * @param srcStart the offset into `srcChars` where new characters
  2287. * will be obtained
  2288. * @param srcLength the number of characters in `srcChars`
  2289. * in the replace string
  2290. * @return a reference to this
  2291. * @stable ICU 2.0
  2292. */
  2293. inline UnicodeString& replace(int32_t start,
  2294. int32_t length,
  2295. const char16_t *srcChars,
  2296. int32_t srcStart,
  2297. int32_t srcLength);
  2298. /**
  2299. * Replace the characters in the range
  2300. * [`start`, `start + length`) with the characters in
  2301. * `srcChars`. `srcChars` is not modified.
  2302. * @param start the offset at which the replace operation begins
  2303. * @param length number of characters to replace. The character at
  2304. * `start + length` is not modified.
  2305. * @param srcChars the source for the new characters
  2306. * @param srcLength the number of Unicode characters in srcChars
  2307. * @return a reference to this
  2308. * @stable ICU 2.0
  2309. */
  2310. inline UnicodeString& replace(int32_t start,
  2311. int32_t length,
  2312. ConstChar16Ptr srcChars,
  2313. int32_t srcLength);
  2314. /**
  2315. * Replace the characters in the range
  2316. * [`start`, `start + length`) with the code unit
  2317. * `srcChar`.
  2318. * @param start the offset at which the replace operation begins
  2319. * @param length the number of characters to replace. The character at
  2320. * `start + length` is not modified.
  2321. * @param srcChar the new code unit
  2322. * @return a reference to this
  2323. * @stable ICU 2.0
  2324. */
  2325. inline UnicodeString& replace(int32_t start,
  2326. int32_t length,
  2327. char16_t srcChar);
  2328. /**
  2329. * Replace the characters in the range
  2330. * [`start`, `start + length`) with the code point
  2331. * `srcChar`.
  2332. * @param start the offset at which the replace operation begins
  2333. * @param length the number of characters to replace. The character at
  2334. * `start + length` is not modified.
  2335. * @param srcChar the new code point
  2336. * @return a reference to this
  2337. * @stable ICU 2.0
  2338. */
  2339. UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
  2340. /**
  2341. * Replace the characters in the range [`start`, `limit`)
  2342. * with the characters in `srcText`. `srcText` is not modified.
  2343. * @param start the offset at which the replace operation begins
  2344. * @param limit the offset immediately following the replace range
  2345. * @param srcText the source for the new characters
  2346. * @return a reference to this
  2347. * @stable ICU 2.0
  2348. */
  2349. inline UnicodeString& replaceBetween(int32_t start,
  2350. int32_t limit,
  2351. const UnicodeString& srcText);
  2352. /**
  2353. * Replace the characters in the range [`start`, `limit`)
  2354. * with the characters in `srcText` in the range
  2355. * [`srcStart`, `srcLimit`). `srcText` is not modified.
  2356. * @param start the offset at which the replace operation begins
  2357. * @param limit the offset immediately following the replace range
  2358. * @param srcText the source for the new characters
  2359. * @param srcStart the offset into `srcChars` where new characters
  2360. * will be obtained
  2361. * @param srcLimit the offset immediately following the range to copy
  2362. * in `srcText`
  2363. * @return a reference to this
  2364. * @stable ICU 2.0
  2365. */
  2366. inline UnicodeString& replaceBetween(int32_t start,
  2367. int32_t limit,
  2368. const UnicodeString& srcText,
  2369. int32_t srcStart,
  2370. int32_t srcLimit);
  2371. /**
  2372. * Replace a substring of this object with the given text.
  2373. * @param start the beginning index, inclusive; `0 <= start <= limit`.
  2374. * @param limit the ending index, exclusive; `start <= limit <= length()`.
  2375. * @param text the text to replace characters `start` to `limit - 1`
  2376. * @stable ICU 2.0
  2377. */
  2378. virtual void handleReplaceBetween(int32_t start,
  2379. int32_t limit,
  2380. const UnicodeString& text) override;
  2381. /**
  2382. * Replaceable API
  2383. * @return true if it has MetaData
  2384. * @stable ICU 2.4
  2385. */
  2386. virtual UBool hasMetaData() const override;
  2387. /**
  2388. * Copy a substring of this object, retaining attribute (out-of-band)
  2389. * information. This method is used to duplicate or reorder substrings.
  2390. * The destination index must not overlap the source range.
  2391. *
  2392. * @param start the beginning index, inclusive; `0 <= start <= limit`.
  2393. * @param limit the ending index, exclusive; `start <= limit <= length()`.
  2394. * @param dest the destination index. The characters from
  2395. * `start..limit-1` will be copied to `dest`.
  2396. * Implementations of this method may assume that `dest <= start ||
  2397. * dest >= limit`.
  2398. * @stable ICU 2.0
  2399. */
  2400. virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
  2401. /* Search and replace operations */
  2402. /**
  2403. * Replace all occurrences of characters in oldText with the characters
  2404. * in newText
  2405. * @param oldText the text containing the search text
  2406. * @param newText the text containing the replacement text
  2407. * @return a reference to this
  2408. * @stable ICU 2.0
  2409. */
  2410. inline UnicodeString& findAndReplace(const UnicodeString& oldText,
  2411. const UnicodeString& newText);
  2412. /**
  2413. * Replace all occurrences of characters in oldText with characters
  2414. * in newText
  2415. * in the range [`start`, `start + length`).
  2416. * @param start the start of the range in which replace will performed
  2417. * @param length the length of the range in which replace will be performed
  2418. * @param oldText the text containing the search text
  2419. * @param newText the text containing the replacement text
  2420. * @return a reference to this
  2421. * @stable ICU 2.0
  2422. */
  2423. inline UnicodeString& findAndReplace(int32_t start,
  2424. int32_t length,
  2425. const UnicodeString& oldText,
  2426. const UnicodeString& newText);
  2427. /**
  2428. * Replace all occurrences of characters in oldText in the range
  2429. * [`oldStart`, `oldStart + oldLength`) with the characters
  2430. * in newText in the range
  2431. * [`newStart`, `newStart + newLength`)
  2432. * in the range [`start`, `start + length`).
  2433. * @param start the start of the range in which replace will performed
  2434. * @param length the length of the range in which replace will be performed
  2435. * @param oldText the text containing the search text
  2436. * @param oldStart the start of the search range in `oldText`
  2437. * @param oldLength the length of the search range in `oldText`
  2438. * @param newText the text containing the replacement text
  2439. * @param newStart the start of the replacement range in `newText`
  2440. * @param newLength the length of the replacement range in `newText`
  2441. * @return a reference to this
  2442. * @stable ICU 2.0
  2443. */
  2444. UnicodeString& findAndReplace(int32_t start,
  2445. int32_t length,
  2446. const UnicodeString& oldText,
  2447. int32_t oldStart,
  2448. int32_t oldLength,
  2449. const UnicodeString& newText,
  2450. int32_t newStart,
  2451. int32_t newLength);
  2452. /* Remove operations */
  2453. /**
  2454. * Removes all characters from the UnicodeString object and clears the bogus flag.
  2455. * This is the UnicodeString equivalent of std::string’s clear().
  2456. *
  2457. * @return a reference to this
  2458. * @see setToBogus
  2459. * @stable ICU 2.0
  2460. */
  2461. inline UnicodeString& remove();
  2462. /**
  2463. * Remove the characters in the range
  2464. * [`start`, `start + length`) from the UnicodeString object.
  2465. * @param start the offset of the first character to remove
  2466. * @param length the number of characters to remove
  2467. * @return a reference to this
  2468. * @stable ICU 2.0
  2469. */
  2470. inline UnicodeString& remove(int32_t start,
  2471. int32_t length = static_cast<int32_t>(INT32_MAX));
  2472. /**
  2473. * Remove the characters in the range
  2474. * [`start`, `limit`) from the UnicodeString object.
  2475. * @param start the offset of the first character to remove
  2476. * @param limit the offset immediately following the range to remove
  2477. * @return a reference to this
  2478. * @stable ICU 2.0
  2479. */
  2480. inline UnicodeString& removeBetween(int32_t start,
  2481. int32_t limit = static_cast<int32_t>(INT32_MAX));
  2482. /**
  2483. * Retain only the characters in the range
  2484. * [`start`, `limit`) from the UnicodeString object.
  2485. * Removes characters before `start` and at and after `limit`.
  2486. * @param start the offset of the first character to retain
  2487. * @param limit the offset immediately following the range to retain
  2488. * @return a reference to this
  2489. * @stable ICU 4.4
  2490. */
  2491. inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
  2492. /* Length operations */
  2493. /**
  2494. * Pad the start of this UnicodeString with the character `padChar`.
  2495. * If the length of this UnicodeString is less than targetLength,
  2496. * length() - targetLength copies of padChar will be added to the
  2497. * beginning of this UnicodeString.
  2498. * @param targetLength the desired length of the string
  2499. * @param padChar the character to use for padding. Defaults to
  2500. * space (U+0020)
  2501. * @return true if the text was padded, false otherwise.
  2502. * @stable ICU 2.0
  2503. */
  2504. UBool padLeading(int32_t targetLength,
  2505. char16_t padChar = 0x0020);
  2506. /**
  2507. * Pad the end of this UnicodeString with the character `padChar`.
  2508. * If the length of this UnicodeString is less than targetLength,
  2509. * length() - targetLength copies of padChar will be added to the
  2510. * end of this UnicodeString.
  2511. * @param targetLength the desired length of the string
  2512. * @param padChar the character to use for padding. Defaults to
  2513. * space (U+0020)
  2514. * @return true if the text was padded, false otherwise.
  2515. * @stable ICU 2.0
  2516. */
  2517. UBool padTrailing(int32_t targetLength,
  2518. char16_t padChar = 0x0020);
  2519. /**
  2520. * Truncate this UnicodeString to the `targetLength`.
  2521. * @param targetLength the desired length of this UnicodeString.
  2522. * @return true if the text was truncated, false otherwise
  2523. * @stable ICU 2.0
  2524. */
  2525. inline UBool truncate(int32_t targetLength);
  2526. /**
  2527. * Trims leading and trailing whitespace from this UnicodeString.
  2528. * @return a reference to this
  2529. * @stable ICU 2.0
  2530. */
  2531. UnicodeString& trim();
  2532. /* Miscellaneous operations */
  2533. /**
  2534. * Reverse this UnicodeString in place.
  2535. * @return a reference to this
  2536. * @stable ICU 2.0
  2537. */
  2538. inline UnicodeString& reverse();
  2539. /**
  2540. * Reverse the range [`start`, `start + length`) in
  2541. * this UnicodeString.
  2542. * @param start the start of the range to reverse
  2543. * @param length the number of characters to to reverse
  2544. * @return a reference to this
  2545. * @stable ICU 2.0
  2546. */
  2547. inline UnicodeString& reverse(int32_t start,
  2548. int32_t length);
  2549. /**
  2550. * Convert the characters in this to UPPER CASE following the conventions of
  2551. * the default locale.
  2552. * @return A reference to this.
  2553. * @stable ICU 2.0
  2554. */
  2555. UnicodeString& toUpper();
  2556. /**
  2557. * Convert the characters in this to UPPER CASE following the conventions of
  2558. * a specific locale.
  2559. * @param locale The locale containing the conventions to use.
  2560. * @return A reference to this.
  2561. * @stable ICU 2.0
  2562. */
  2563. UnicodeString& toUpper(const Locale& locale);
  2564. /**
  2565. * Convert the characters in this to lower case following the conventions of
  2566. * the default locale.
  2567. * @return A reference to this.
  2568. * @stable ICU 2.0
  2569. */
  2570. UnicodeString& toLower();
  2571. /**
  2572. * Convert the characters in this to lower case following the conventions of
  2573. * a specific locale.
  2574. * @param locale The locale containing the conventions to use.
  2575. * @return A reference to this.
  2576. * @stable ICU 2.0
  2577. */
  2578. UnicodeString& toLower(const Locale& locale);
  2579. #if !UCONFIG_NO_BREAK_ITERATION
  2580. /**
  2581. * Titlecase this string, convenience function using the default locale.
  2582. *
  2583. * Casing is locale-dependent and context-sensitive.
  2584. * Titlecasing uses a break iterator to find the first characters of words
  2585. * that are to be titlecased. It titlecases those characters and lowercases
  2586. * all others.
  2587. *
  2588. * The titlecase break iterator can be provided to customize for arbitrary
  2589. * styles, using rules and dictionaries beyond the standard iterators.
  2590. * It may be more efficient to always provide an iterator to avoid
  2591. * opening and closing one for each string.
  2592. * If the break iterator passed in is null, the default Unicode algorithm
  2593. * will be used to determine the titlecase positions.
  2594. *
  2595. * This function uses only the setText(), first() and next() methods of the
  2596. * provided break iterator.
  2597. *
  2598. * @param titleIter A break iterator to find the first characters of words
  2599. * that are to be titlecased.
  2600. * If none is provided (0), then a standard titlecase
  2601. * break iterator is opened.
  2602. * Otherwise the provided iterator is set to the string's text.
  2603. * @return A reference to this.
  2604. * @stable ICU 2.1
  2605. */
  2606. UnicodeString &toTitle(BreakIterator *titleIter);
  2607. /**
  2608. * Titlecase this string.
  2609. *
  2610. * Casing is locale-dependent and context-sensitive.
  2611. * Titlecasing uses a break iterator to find the first characters of words
  2612. * that are to be titlecased. It titlecases those characters and lowercases
  2613. * all others.
  2614. *
  2615. * The titlecase break iterator can be provided to customize for arbitrary
  2616. * styles, using rules and dictionaries beyond the standard iterators.
  2617. * It may be more efficient to always provide an iterator to avoid
  2618. * opening and closing one for each string.
  2619. * If the break iterator passed in is null, the default Unicode algorithm
  2620. * will be used to determine the titlecase positions.
  2621. *
  2622. * This function uses only the setText(), first() and next() methods of the
  2623. * provided break iterator.
  2624. *
  2625. * @param titleIter A break iterator to find the first characters of words
  2626. * that are to be titlecased.
  2627. * If none is provided (0), then a standard titlecase
  2628. * break iterator is opened.
  2629. * Otherwise the provided iterator is set to the string's text.
  2630. * @param locale The locale to consider.
  2631. * @return A reference to this.
  2632. * @stable ICU 2.1
  2633. */
  2634. UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
  2635. /**
  2636. * Titlecase this string, with options.
  2637. *
  2638. * Casing is locale-dependent and context-sensitive.
  2639. * Titlecasing uses a break iterator to find the first characters of words
  2640. * that are to be titlecased. It titlecases those characters and lowercases
  2641. * all others. (This can be modified with options.)
  2642. *
  2643. * The titlecase break iterator can be provided to customize for arbitrary
  2644. * styles, using rules and dictionaries beyond the standard iterators.
  2645. * It may be more efficient to always provide an iterator to avoid
  2646. * opening and closing one for each string.
  2647. * If the break iterator passed in is null, the default Unicode algorithm
  2648. * will be used to determine the titlecase positions.
  2649. *
  2650. * This function uses only the setText(), first() and next() methods of the
  2651. * provided break iterator.
  2652. *
  2653. * @param titleIter A break iterator to find the first characters of words
  2654. * that are to be titlecased.
  2655. * If none is provided (0), then a standard titlecase
  2656. * break iterator is opened.
  2657. * Otherwise the provided iterator is set to the string's text.
  2658. * @param locale The locale to consider.
  2659. * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
  2660. * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
  2661. * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
  2662. * @return A reference to this.
  2663. * @stable ICU 3.8
  2664. */
  2665. UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
  2666. #endif
  2667. /**
  2668. * Case-folds the characters in this string.
  2669. *
  2670. * Case-folding is locale-independent and not context-sensitive,
  2671. * but there is an option for whether to include or exclude mappings for dotted I
  2672. * and dotless i that are marked with 'T' in CaseFolding.txt.
  2673. *
  2674. * The result may be longer or shorter than the original.
  2675. *
  2676. * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
  2677. * @return A reference to this.
  2678. * @stable ICU 2.0
  2679. */
  2680. UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
  2681. //========================================
  2682. // Access to the internal buffer
  2683. //========================================
  2684. /**
  2685. * Get a read/write pointer to the internal buffer.
  2686. * The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
  2687. * writable, and is still owned by the UnicodeString object.
  2688. * Calls to getBuffer(minCapacity) must not be nested, and
  2689. * must be matched with calls to releaseBuffer(newLength).
  2690. * If the string buffer was read-only or shared,
  2691. * then it will be reallocated and copied.
  2692. *
  2693. * An attempted nested call will return 0, and will not further modify the
  2694. * state of the UnicodeString object.
  2695. * It also returns 0 if the string is bogus.
  2696. *
  2697. * The actual capacity of the string buffer may be larger than minCapacity.
  2698. * getCapacity() returns the actual capacity.
  2699. * For many operations, the full capacity should be used to avoid reallocations.
  2700. *
  2701. * While the buffer is "open" between getBuffer(minCapacity)
  2702. * and releaseBuffer(newLength), the following applies:
  2703. * - The string length is set to 0.
  2704. * - Any read API call on the UnicodeString object will behave like on a 0-length string.
  2705. * - Any write API call on the UnicodeString object is disallowed and will have no effect.
  2706. * - You can read from and write to the returned buffer.
  2707. * - The previous string contents will still be in the buffer;
  2708. * if you want to use it, then you need to call length() before getBuffer(minCapacity).
  2709. * If the length() was greater than minCapacity, then any contents after minCapacity
  2710. * may be lost.
  2711. * The buffer contents is not NUL-terminated by getBuffer().
  2712. * If length() < getCapacity() then you can terminate it by writing a NUL
  2713. * at index length().
  2714. * - You must call releaseBuffer(newLength) before and in order to
  2715. * return to normal UnicodeString operation.
  2716. *
  2717. * @param minCapacity the minimum number of char16_ts that are to be available
  2718. * in the buffer, starting at the returned pointer;
  2719. * default to the current string capacity if minCapacity==-1
  2720. * @return a writable pointer to the internal string buffer,
  2721. * or nullptr if an error occurs (nested calls, out of memory)
  2722. *
  2723. * @see releaseBuffer
  2724. * @see getTerminatedBuffer()
  2725. * @stable ICU 2.0
  2726. */
  2727. char16_t *getBuffer(int32_t minCapacity);
  2728. /**
  2729. * Release a read/write buffer on a UnicodeString object with an
  2730. * "open" getBuffer(minCapacity).
  2731. * This function must be called in a matched pair with getBuffer(minCapacity).
  2732. * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
  2733. *
  2734. * It will set the string length to newLength, at most to the current capacity.
  2735. * If newLength==-1 then it will set the length according to the
  2736. * first NUL in the buffer, or to the capacity if there is no NUL.
  2737. *
  2738. * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
  2739. *
  2740. * @param newLength the new length of the UnicodeString object;
  2741. * defaults to the current capacity if newLength is greater than that;
  2742. * if newLength==-1, it defaults to u_strlen(buffer) but not more than
  2743. * the current capacity of the string
  2744. *
  2745. * @see getBuffer(int32_t minCapacity)
  2746. * @stable ICU 2.0
  2747. */
  2748. void releaseBuffer(int32_t newLength=-1);
  2749. /**
  2750. * Get a read-only pointer to the internal buffer.
  2751. * This can be called at any time on a valid UnicodeString.
  2752. *
  2753. * It returns 0 if the string is bogus, or
  2754. * during an "open" getBuffer(minCapacity).
  2755. *
  2756. * It can be called as many times as desired.
  2757. * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2758. * at which time the pointer is semantically invalidated and must not be used any more.
  2759. *
  2760. * The capacity of the buffer can be determined with getCapacity().
  2761. * The part after length() may or may not be initialized and valid,
  2762. * depending on the history of the UnicodeString object.
  2763. *
  2764. * The buffer contents is (probably) not NUL-terminated.
  2765. * You can check if it is with
  2766. * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
  2767. * (See getTerminatedBuffer().)
  2768. *
  2769. * The buffer may reside in read-only memory. Its contents must not
  2770. * be modified.
  2771. *
  2772. * @return a read-only pointer to the internal string buffer,
  2773. * or nullptr if the string is empty or bogus
  2774. *
  2775. * @see getBuffer(int32_t minCapacity)
  2776. * @see getTerminatedBuffer()
  2777. * @stable ICU 2.0
  2778. */
  2779. inline const char16_t *getBuffer() const;
  2780. /**
  2781. * Get a read-only pointer to the internal buffer,
  2782. * making sure that it is NUL-terminated.
  2783. * This can be called at any time on a valid UnicodeString.
  2784. *
  2785. * It returns 0 if the string is bogus, or
  2786. * during an "open" getBuffer(minCapacity), or if the buffer cannot
  2787. * be NUL-terminated (because memory allocation failed).
  2788. *
  2789. * It can be called as many times as desired.
  2790. * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2791. * at which time the pointer is semantically invalidated and must not be used any more.
  2792. *
  2793. * The capacity of the buffer can be determined with getCapacity().
  2794. * The part after length()+1 may or may not be initialized and valid,
  2795. * depending on the history of the UnicodeString object.
  2796. *
  2797. * The buffer contents is guaranteed to be NUL-terminated.
  2798. * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
  2799. * is written.
  2800. * For this reason, this function is not const, unlike getBuffer().
  2801. * Note that a UnicodeString may also contain NUL characters as part of its contents.
  2802. *
  2803. * The buffer may reside in read-only memory. Its contents must not
  2804. * be modified.
  2805. *
  2806. * @return a read-only pointer to the internal string buffer,
  2807. * or 0 if the string is empty or bogus
  2808. *
  2809. * @see getBuffer(int32_t minCapacity)
  2810. * @see getBuffer()
  2811. * @stable ICU 2.2
  2812. */
  2813. const char16_t *getTerminatedBuffer();
  2814. #ifndef U_HIDE_DRAFT_API
  2815. /**
  2816. * Converts to a std::u16string_view.
  2817. *
  2818. * @return a string view of the contents of this string
  2819. * @draft ICU 76
  2820. */
  2821. inline operator std::u16string_view() const {
  2822. return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
  2823. }
  2824. #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
  2825. /**
  2826. * Converts to a std::wstring_view.
  2827. *
  2828. * Note: This should remain draft until C++ standard plans
  2829. * about char16_t vs. wchar_t become clearer.
  2830. *
  2831. * @return a string view of the contents of this string
  2832. * @draft ICU 76
  2833. */
  2834. inline operator std::wstring_view() const {
  2835. const char16_t *p = getBuffer();
  2836. #ifdef U_ALIASING_BARRIER
  2837. U_ALIASING_BARRIER(p);
  2838. #endif
  2839. return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
  2840. }
  2841. #endif // U_SIZEOF_WCHAR_T
  2842. #endif // U_HIDE_DRAFT_API
  2843. //========================================
  2844. // Constructors
  2845. //========================================
  2846. /** Construct an empty UnicodeString.
  2847. * @stable ICU 2.0
  2848. */
  2849. inline UnicodeString();
  2850. /**
  2851. * Construct a UnicodeString with capacity to hold `capacity` char16_ts
  2852. * @param capacity the number of char16_ts this UnicodeString should hold
  2853. * before a resize is necessary; if count is greater than 0 and count
  2854. * code points c take up more space than capacity, then capacity is adjusted
  2855. * accordingly.
  2856. * @param c is used to initially fill the string
  2857. * @param count specifies how many code points c are to be written in the
  2858. * string
  2859. * @stable ICU 2.0
  2860. */
  2861. UnicodeString(int32_t capacity, UChar32 c, int32_t count);
  2862. /**
  2863. * Single char16_t (code unit) constructor.
  2864. *
  2865. * It is recommended to mark this constructor "explicit" by
  2866. * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
  2867. * on the compiler command line or similar.
  2868. * @param ch the character to place in the UnicodeString
  2869. * @stable ICU 2.0
  2870. */
  2871. UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
  2872. /**
  2873. * Single UChar32 (code point) constructor.
  2874. *
  2875. * It is recommended to mark this constructor "explicit" by
  2876. * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
  2877. * on the compiler command line or similar.
  2878. * @param ch the character to place in the UnicodeString
  2879. * @stable ICU 2.0
  2880. */
  2881. UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
  2882. #ifdef U_HIDE_DRAFT_API
  2883. /**
  2884. * char16_t* constructor.
  2885. *
  2886. * It is recommended to mark this constructor "explicit" by
  2887. * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
  2888. * on the compiler command line or similar.
  2889. *
  2890. * Note, for string literals:
  2891. * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
  2892. * length determination:
  2893. * \code
  2894. * UnicodeString str(u"literal");
  2895. * if (str == u"other literal") { ... }
  2896. * \endcode
  2897. *
  2898. * @param text The characters to place in the UnicodeString. `text`
  2899. * must be NUL (U+0000) terminated.
  2900. * @stable ICU 2.0
  2901. */
  2902. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
  2903. UnicodeString(text, -1) {}
  2904. #endif // U_HIDE_DRAFT_API
  2905. #if !U_CHAR16_IS_TYPEDEF && \
  2906. (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000))
  2907. /**
  2908. * uint16_t * constructor.
  2909. * Delegates to UnicodeString(const char16_t *).
  2910. *
  2911. * It is recommended to mark this constructor "explicit" by
  2912. * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
  2913. * on the compiler command line or similar.
  2914. *
  2915. * Note, for string literals:
  2916. * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
  2917. * length determination:
  2918. * \code
  2919. * UnicodeString str(u"literal");
  2920. * if (str == u"other literal") { ... }
  2921. * \endcode
  2922. *
  2923. * @param text NUL-terminated UTF-16 string
  2924. * @stable ICU 59
  2925. */
  2926. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
  2927. UnicodeString(ConstChar16Ptr(text), -1) {}
  2928. #endif
  2929. #if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN))
  2930. /**
  2931. * wchar_t * constructor.
  2932. * (Only defined if U_SIZEOF_WCHAR_T==2.)
  2933. * Delegates to UnicodeString(const char16_t *).
  2934. *
  2935. * It is recommended to mark this constructor "explicit" by
  2936. * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
  2937. * on the compiler command line or similar.
  2938. *
  2939. * Note, for string literals:
  2940. * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
  2941. * length determination:
  2942. * \code
  2943. * UnicodeString str(u"literal");
  2944. * if (str == u"other literal") { ... }
  2945. * \endcode
  2946. *
  2947. * @param text NUL-terminated UTF-16 string
  2948. * @stable ICU 59
  2949. */
  2950. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
  2951. UnicodeString(ConstChar16Ptr(text), -1) {}
  2952. #endif
  2953. /**
  2954. * nullptr_t constructor.
  2955. * Effectively the same as the default constructor, makes an empty string object.
  2956. *
  2957. * It is recommended to mark this constructor "explicit" by
  2958. * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
  2959. * on the compiler command line or similar.
  2960. * @param text nullptr
  2961. * @stable ICU 59
  2962. */
  2963. UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
  2964. /**
  2965. * char16_t* constructor.
  2966. *
  2967. * Note, for string literals:
  2968. * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
  2969. * length determination:
  2970. * \code
  2971. * UnicodeString str(u"literal");
  2972. * if (str == u"other literal") { ... }
  2973. * \endcode
  2974. *
  2975. * @param text The characters to place in the UnicodeString.
  2976. * @param textLength The number of Unicode characters in `text`
  2977. * to copy.
  2978. * @stable ICU 2.0
  2979. */
  2980. UnicodeString(const char16_t *text,
  2981. int32_t textLength);
  2982. #if !U_CHAR16_IS_TYPEDEF
  2983. /**
  2984. * uint16_t * constructor.
  2985. * Delegates to UnicodeString(const char16_t *, int32_t).
  2986. *
  2987. * Note, for string literals:
  2988. * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
  2989. * length determination:
  2990. * \code
  2991. * UnicodeString str(u"literal");
  2992. * if (str == u"other literal") { ... }
  2993. * \endcode
  2994. *
  2995. * @param text UTF-16 string
  2996. * @param textLength string length
  2997. * @stable ICU 59
  2998. */
  2999. UnicodeString(const uint16_t *text, int32_t textLength) :
  3000. UnicodeString(ConstChar16Ptr(text), textLength) {}
  3001. #endif
  3002. #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
  3003. /**
  3004. * wchar_t * constructor.
  3005. * (Only defined if U_SIZEOF_WCHAR_T==2.)
  3006. * Delegates to UnicodeString(const char16_t *, int32_t).
  3007. *
  3008. * Note, for string literals:
  3009. * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
  3010. * length determination:
  3011. * \code
  3012. * UnicodeString str(u"literal");
  3013. * if (str == u"other literal") { ... }
  3014. * \endcode
  3015. *
  3016. * @param text UTF-16 string
  3017. * @param textLength string length
  3018. * @stable ICU 59
  3019. */
  3020. UnicodeString(const wchar_t *text, int32_t textLength) :
  3021. UnicodeString(ConstChar16Ptr(text), textLength) {}
  3022. #endif
  3023. /**
  3024. * nullptr_t constructor.
  3025. * Effectively the same as the default constructor, makes an empty string object.
  3026. * @param text nullptr
  3027. * @param textLength ignored
  3028. * @stable ICU 59
  3029. */
  3030. inline UnicodeString(const std::nullptr_t text, int32_t textLength);
  3031. #ifndef U_HIDE_DRAFT_API
  3032. /**
  3033. * Constructor from `text`
  3034. * which is, or which is implicitly convertible to,
  3035. * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
  3036. * The string is bogus if the string view is too long.
  3037. *
  3038. * If you need a UnicodeString but need not copy the string view contents,
  3039. * then you can call the UnicodeString::readOnlyAlias() function instead of this constructor.
  3040. *
  3041. * @param text UTF-16 string
  3042. * @draft ICU 76
  3043. */
  3044. template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
  3045. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) {
  3046. fUnion.fFields.fLengthAndFlags = kShortString;
  3047. doAppend(internal::toU16StringViewNullable(text));
  3048. }
  3049. #endif // U_HIDE_DRAFT_API
  3050. /**
  3051. * Readonly-aliasing char16_t* constructor.
  3052. * The text will be used for the UnicodeString object, but
  3053. * it will not be released when the UnicodeString is destroyed.
  3054. * This has copy-on-write semantics:
  3055. * When the string is modified, then the buffer is first copied into
  3056. * newly allocated memory.
  3057. * The aliased buffer is never modified.
  3058. *
  3059. * In an assignment to another UnicodeString, when using the copy constructor
  3060. * or the assignment operator, the text will be copied.
  3061. * When using fastCopyFrom(), the text will be aliased again,
  3062. * so that both strings then alias the same readonly-text.
  3063. *
  3064. * Note, for string literals:
  3065. * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
  3066. * length determination:
  3067. * \code
  3068. * UnicodeString alias = UnicodeString::readOnlyAlias(u"literal");
  3069. * if (str == u"other literal") { ... }
  3070. * \endcode
  3071. *
  3072. * @param isTerminated specifies if `text` is `NUL`-terminated.
  3073. * This must be true if `textLength==-1`.
  3074. * @param text The characters to alias for the UnicodeString.
  3075. * @param textLength The number of Unicode characters in `text` to alias.
  3076. * If -1, then this constructor will determine the length
  3077. * by calling `u_strlen()`.
  3078. * @stable ICU 2.0
  3079. */
  3080. UnicodeString(UBool isTerminated,
  3081. ConstChar16Ptr text,
  3082. int32_t textLength);
  3083. /**
  3084. * Writable-aliasing char16_t* constructor.
  3085. * The text will be used for the UnicodeString object, but
  3086. * it will not be released when the UnicodeString is destroyed.
  3087. * This has write-through semantics:
  3088. * For as long as the capacity of the buffer is sufficient, write operations
  3089. * will directly affect the buffer. When more capacity is necessary, then
  3090. * a new buffer will be allocated and the contents copied as with regularly
  3091. * constructed strings.
  3092. * In an assignment to another UnicodeString, the buffer will be copied.
  3093. * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
  3094. * as the string buffer itself and will in this case not copy the contents.
  3095. *
  3096. * @param buffer The characters to alias for the UnicodeString.
  3097. * @param buffLength The number of Unicode characters in `buffer` to alias.
  3098. * @param buffCapacity The size of `buffer` in char16_ts.
  3099. * @stable ICU 2.0
  3100. */
  3101. UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
  3102. #if !U_CHAR16_IS_TYPEDEF
  3103. /**
  3104. * Writable-aliasing uint16_t * constructor.
  3105. * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
  3106. * @param buffer writable buffer of/for UTF-16 text
  3107. * @param buffLength length of the current buffer contents
  3108. * @param buffCapacity buffer capacity
  3109. * @stable ICU 59
  3110. */
  3111. UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
  3112. UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
  3113. #endif
  3114. #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
  3115. /**
  3116. * Writable-aliasing wchar_t * constructor.
  3117. * (Only defined if U_SIZEOF_WCHAR_T==2.)
  3118. * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
  3119. * @param buffer writable buffer of/for UTF-16 text
  3120. * @param buffLength length of the current buffer contents
  3121. * @param buffCapacity buffer capacity
  3122. * @stable ICU 59
  3123. */
  3124. UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
  3125. UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
  3126. #endif
  3127. /**
  3128. * Writable-aliasing nullptr_t constructor.
  3129. * Effectively the same as the default constructor, makes an empty string object.
  3130. * @param buffer nullptr
  3131. * @param buffLength ignored
  3132. * @param buffCapacity ignored
  3133. * @stable ICU 59
  3134. */
  3135. inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
  3136. #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  3137. /**
  3138. * char* constructor.
  3139. * Uses the default converter (and thus depends on the ICU conversion code)
  3140. * unless U_CHARSET_IS_UTF8 is set to 1.
  3141. *
  3142. * For ASCII (really "invariant character") strings it is more efficient to use
  3143. * the constructor that takes a US_INV (for its enum EInvariant).
  3144. *
  3145. * Note, for string literals:
  3146. * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
  3147. * length determination:
  3148. * \code
  3149. * UnicodeString str(u"literal");
  3150. * if (str == u"other literal") { ... }
  3151. * \endcode
  3152. *
  3153. * It is recommended to mark this constructor "explicit" by
  3154. * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
  3155. * on the compiler command line or similar.
  3156. * @param codepageData an array of bytes, null-terminated,
  3157. * in the platform's default codepage.
  3158. * @stable ICU 2.0
  3159. */
  3160. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
  3161. /**
  3162. * char* constructor.
  3163. * Uses the default converter (and thus depends on the ICU conversion code)
  3164. * unless U_CHARSET_IS_UTF8 is set to 1.
  3165. * @param codepageData an array of bytes in the platform's default codepage.
  3166. * @param dataLength The number of bytes in `codepageData`.
  3167. * @stable ICU 2.0
  3168. */
  3169. UnicodeString(const char *codepageData, int32_t dataLength);
  3170. #endif
  3171. #if !UCONFIG_NO_CONVERSION
  3172. /**
  3173. * char* constructor.
  3174. * @param codepageData an array of bytes, null-terminated
  3175. * @param codepage the encoding of `codepageData`. The special
  3176. * value 0 for `codepage` indicates that the text is in the
  3177. * platform's default codepage.
  3178. *
  3179. * If `codepage` is an empty string (`""`),
  3180. * then a simple conversion is performed on the codepage-invariant
  3181. * subset ("invariant characters") of the platform encoding. See utypes.h.
  3182. * Recommendation: For invariant-character strings use the constructor
  3183. * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  3184. * because it avoids object code dependencies of UnicodeString on
  3185. * the conversion code.
  3186. *
  3187. * @stable ICU 2.0
  3188. */
  3189. UnicodeString(const char *codepageData, const char *codepage);
  3190. /**
  3191. * char* constructor.
  3192. * @param codepageData an array of bytes.
  3193. * @param dataLength The number of bytes in `codepageData`.
  3194. * @param codepage the encoding of `codepageData`. The special
  3195. * value 0 for `codepage` indicates that the text is in the
  3196. * platform's default codepage.
  3197. * If `codepage` is an empty string (`""`),
  3198. * then a simple conversion is performed on the codepage-invariant
  3199. * subset ("invariant characters") of the platform encoding. See utypes.h.
  3200. * Recommendation: For invariant-character strings use the constructor
  3201. * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  3202. * because it avoids object code dependencies of UnicodeString on
  3203. * the conversion code.
  3204. *
  3205. * @stable ICU 2.0
  3206. */
  3207. UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
  3208. /**
  3209. * char * / UConverter constructor.
  3210. * This constructor uses an existing UConverter object to
  3211. * convert the codepage string to Unicode and construct a UnicodeString
  3212. * from that.
  3213. *
  3214. * The converter is reset at first.
  3215. * If the error code indicates a failure before this constructor is called,
  3216. * or if an error occurs during conversion or construction,
  3217. * then the string will be bogus.
  3218. *
  3219. * This function avoids the overhead of opening and closing a converter if
  3220. * multiple strings are constructed.
  3221. *
  3222. * @param src input codepage string
  3223. * @param srcLength length of the input string, can be -1 for NUL-terminated strings
  3224. * @param cnv converter object (ucnv_resetToUnicode() will be called),
  3225. * can be nullptr for the default converter
  3226. * @param errorCode normal ICU error code
  3227. * @stable ICU 2.0
  3228. */
  3229. UnicodeString(
  3230. const char *src, int32_t srcLength,
  3231. UConverter *cnv,
  3232. UErrorCode &errorCode);
  3233. #endif
  3234. /**
  3235. * Constructs a Unicode string from an invariant-character char * string.
  3236. * About invariant characters see utypes.h.
  3237. * This constructor has no runtime dependency on conversion code and is
  3238. * therefore recommended over ones taking a charset name string
  3239. * (where the empty string "" indicates invariant-character conversion).
  3240. *
  3241. * Use the macro US_INV as the third, signature-distinguishing parameter.
  3242. *
  3243. * For example:
  3244. * \code
  3245. * void fn(const char *s) {
  3246. * UnicodeString ustr(s, -1, US_INV);
  3247. * // use ustr ...
  3248. * }
  3249. * \endcode
  3250. *
  3251. * Note, for string literals:
  3252. * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
  3253. * length determination:
  3254. * \code
  3255. * UnicodeString str(u"literal");
  3256. * if (str == u"other literal") { ... }
  3257. * \endcode
  3258. *
  3259. * @param src String using only invariant characters.
  3260. * @param textLength Length of src, or -1 if NUL-terminated.
  3261. * @param inv Signature-distinguishing parameter, use US_INV.
  3262. *
  3263. * @see US_INV
  3264. * @stable ICU 3.2
  3265. */
  3266. UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
  3267. /**
  3268. * Copy constructor.
  3269. *
  3270. * Starting with ICU 2.4, the assignment operator and the copy constructor
  3271. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  3272. * By contrast, the fastCopyFrom() function implements the old,
  3273. * more efficient but less safe behavior
  3274. * of making this string also a readonly alias to the same buffer.
  3275. *
  3276. * If the source object has an "open" buffer from getBuffer(minCapacity),
  3277. * then the copy is an empty string.
  3278. *
  3279. * @param that The UnicodeString object to copy.
  3280. * @stable ICU 2.0
  3281. * @see fastCopyFrom
  3282. */
  3283. UnicodeString(const UnicodeString& that);
  3284. /**
  3285. * Move constructor; might leave src in bogus state.
  3286. * This string will have the same contents and state that the source string had.
  3287. * @param src source string
  3288. * @stable ICU 56
  3289. */
  3290. UnicodeString(UnicodeString &&src) noexcept;
  3291. /**
  3292. * 'Substring' constructor from tail of source string.
  3293. * @param src The UnicodeString object to copy.
  3294. * @param srcStart The offset into `src` at which to start copying.
  3295. * @stable ICU 2.2
  3296. */
  3297. UnicodeString(const UnicodeString& src, int32_t srcStart);
  3298. /**
  3299. * 'Substring' constructor from subrange of source string.
  3300. * @param src The UnicodeString object to copy.
  3301. * @param srcStart The offset into `src` at which to start copying.
  3302. * @param srcLength The number of characters from `src` to copy.
  3303. * @stable ICU 2.2
  3304. */
  3305. UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  3306. /**
  3307. * Clone this object, an instance of a subclass of Replaceable.
  3308. * Clones can be used concurrently in multiple threads.
  3309. * If a subclass does not implement clone(), or if an error occurs,
  3310. * then nullptr is returned.
  3311. * The caller must delete the clone.
  3312. *
  3313. * @return a clone of this object
  3314. *
  3315. * @see Replaceable::clone
  3316. * @see getDynamicClassID
  3317. * @stable ICU 2.6
  3318. */
  3319. virtual UnicodeString *clone() const override;
  3320. /** Destructor.
  3321. * @stable ICU 2.0
  3322. */
  3323. virtual ~UnicodeString();
  3324. #ifndef U_HIDE_DRAFT_API
  3325. /**
  3326. * Readonly-aliasing factory method.
  3327. * Aliases the same buffer as the input `text`
  3328. * which is, or which is implicitly convertible to,
  3329. * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
  3330. * The string is bogus if the string view is too long.
  3331. *
  3332. * The text will be used for the UnicodeString object, but
  3333. * it will not be released when the UnicodeString is destroyed.
  3334. * This has copy-on-write semantics:
  3335. * When the string is modified, then the buffer is first copied into
  3336. * newly allocated memory.
  3337. * The aliased buffer is never modified.
  3338. *
  3339. * In an assignment to another UnicodeString, when using the copy constructor
  3340. * or the assignment operator, the text will be copied.
  3341. * When using fastCopyFrom(), the text will be aliased again,
  3342. * so that both strings then alias the same readonly-text.
  3343. *
  3344. * @param text The string view to alias for the UnicodeString.
  3345. * @draft ICU 76
  3346. */
  3347. template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
  3348. static inline UnicodeString readOnlyAlias(const S &text) {
  3349. return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
  3350. }
  3351. /**
  3352. * Readonly-aliasing factory method.
  3353. * Aliases the same buffer as the input `text`.
  3354. *
  3355. * The text will be used for the UnicodeString object, but
  3356. * it will not be released when the UnicodeString is destroyed.
  3357. * This has copy-on-write semantics:
  3358. * When the string is modified, then the buffer is first copied into
  3359. * newly allocated memory.
  3360. * The aliased buffer is never modified.
  3361. *
  3362. * In an assignment to another UnicodeString, when using the copy constructor
  3363. * or the assignment operator, the text will be copied.
  3364. * When using fastCopyFrom(), the text will be aliased again,
  3365. * so that both strings then alias the same readonly-text.
  3366. *
  3367. * @param text The UnicodeString to alias.
  3368. * @draft ICU 76
  3369. */
  3370. static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
  3371. return readOnlyAliasFromUnicodeString(text);
  3372. }
  3373. #endif // U_HIDE_DRAFT_API
  3374. /**
  3375. * Create a UnicodeString from a UTF-8 string.
  3376. * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  3377. * Calls u_strFromUTF8WithSub().
  3378. *
  3379. * @param utf8 UTF-8 input string.
  3380. * Note that a StringPiece can be implicitly constructed
  3381. * from a std::string or a NUL-terminated const char * string.
  3382. * @return A UnicodeString with equivalent UTF-16 contents.
  3383. * @see toUTF8
  3384. * @see toUTF8String
  3385. * @stable ICU 4.2
  3386. */
  3387. static UnicodeString fromUTF8(StringPiece utf8);
  3388. /**
  3389. * Create a UnicodeString from a UTF-32 string.
  3390. * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  3391. * Calls u_strFromUTF32WithSub().
  3392. *
  3393. * @param utf32 UTF-32 input string. Must not be nullptr.
  3394. * @param length Length of the input string, or -1 if NUL-terminated.
  3395. * @return A UnicodeString with equivalent UTF-16 contents.
  3396. * @see toUTF32
  3397. * @stable ICU 4.2
  3398. */
  3399. static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
  3400. /* Miscellaneous operations */
  3401. /**
  3402. * Unescape a string of characters and return a string containing
  3403. * the result. The following escape sequences are recognized:
  3404. *
  3405. * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
  3406. * \\Uhhhhhhhh 8 hex digits
  3407. * \\xhh 1-2 hex digits
  3408. * \\ooo 1-3 octal digits; o in [0-7]
  3409. * \\cX control-X; X is masked with 0x1F
  3410. *
  3411. * as well as the standard ANSI C escapes:
  3412. *
  3413. * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
  3414. * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
  3415. * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
  3416. *
  3417. * Anything else following a backslash is generically escaped. For
  3418. * example, "[a\\-z]" returns "[a-z]".
  3419. *
  3420. * If an escape sequence is ill-formed, this method returns an empty
  3421. * string. An example of an ill-formed sequence is "\\u" followed by
  3422. * fewer than 4 hex digits.
  3423. *
  3424. * This function is similar to u_unescape() but not identical to it.
  3425. * The latter takes a source char*, so it does escape recognition
  3426. * and also invariant conversion.
  3427. *
  3428. * @return a string with backslash escapes interpreted, or an
  3429. * empty string on error.
  3430. * @see UnicodeString#unescapeAt()
  3431. * @see u_unescape()
  3432. * @see u_unescapeAt()
  3433. * @stable ICU 2.0
  3434. */
  3435. UnicodeString unescape() const;
  3436. /**
  3437. * Unescape a single escape sequence and return the represented
  3438. * character. See unescape() for a listing of the recognized escape
  3439. * sequences. The character at offset-1 is assumed (without
  3440. * checking) to be a backslash. If the escape sequence is
  3441. * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
  3442. * returned.
  3443. *
  3444. * @param offset an input output parameter. On input, it is the
  3445. * offset into this string where the escape sequence is located,
  3446. * after the initial backslash. On output, it is advanced after the
  3447. * last character parsed. On error, it is not advanced at all.
  3448. * @return the character represented by the escape sequence at
  3449. * offset, or U_SENTINEL=-1 on error.
  3450. * @see UnicodeString#unescape()
  3451. * @see u_unescape()
  3452. * @see u_unescapeAt()
  3453. * @stable ICU 2.0
  3454. */
  3455. UChar32 unescapeAt(int32_t &offset) const;
  3456. /**
  3457. * ICU "poor man's RTTI", returns a UClassID for this class.
  3458. *
  3459. * @stable ICU 2.2
  3460. */
  3461. static UClassID U_EXPORT2 getStaticClassID();
  3462. /**
  3463. * ICU "poor man's RTTI", returns a UClassID for the actual class.
  3464. *
  3465. * @stable ICU 2.2
  3466. */
  3467. virtual UClassID getDynamicClassID() const override;
  3468. //========================================
  3469. // Implementation methods
  3470. //========================================
  3471. protected:
  3472. /**
  3473. * Implement Replaceable::getLength() (see jitterbug 1027).
  3474. * @stable ICU 2.4
  3475. */
  3476. virtual int32_t getLength() const override;
  3477. /**
  3478. * The change in Replaceable to use virtual getCharAt() allows
  3479. * UnicodeString::charAt() to be inline again (see jitterbug 709).
  3480. * @stable ICU 2.4
  3481. */
  3482. virtual char16_t getCharAt(int32_t offset) const override;
  3483. /**
  3484. * The change in Replaceable to use virtual getChar32At() allows
  3485. * UnicodeString::char32At() to be inline again (see jitterbug 709).
  3486. * @stable ICU 2.4
  3487. */
  3488. virtual UChar32 getChar32At(int32_t offset) const override;
  3489. private:
  3490. static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
  3491. static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text);
  3492. // For char* constructors. Could be made public.
  3493. UnicodeString &setToUTF8(StringPiece utf8);
  3494. // For extract(char*).
  3495. // We could make a toUTF8(target, capacity, errorCode) public but not
  3496. // this version: New API will be cleaner if we make callers create substrings
  3497. // rather than having start+length on every method,
  3498. // and it should take a UErrorCode&.
  3499. int32_t
  3500. toUTF8(int32_t start, int32_t len,
  3501. char *target, int32_t capacity) const;
  3502. /**
  3503. * Internal string contents comparison, called by operator==.
  3504. * Requires: this & text not bogus and have same lengths.
  3505. */
  3506. inline UBool doEquals(const UnicodeString &text, int32_t len) const {
  3507. return doEquals(text.getArrayStart(), len);
  3508. }
  3509. UBool doEquals(const char16_t *text, int32_t len) const;
  3510. inline UBool
  3511. doEqualsSubstring(int32_t start,
  3512. int32_t length,
  3513. const UnicodeString& srcText,
  3514. int32_t srcStart,
  3515. int32_t srcLength) const;
  3516. UBool doEqualsSubstring(int32_t start,
  3517. int32_t length,
  3518. const char16_t *srcChars,
  3519. int32_t srcStart,
  3520. int32_t srcLength) const;
  3521. inline int8_t
  3522. doCompare(int32_t start,
  3523. int32_t length,
  3524. const UnicodeString& srcText,
  3525. int32_t srcStart,
  3526. int32_t srcLength) const;
  3527. int8_t doCompare(int32_t start,
  3528. int32_t length,
  3529. const char16_t *srcChars,
  3530. int32_t srcStart,
  3531. int32_t srcLength) const;
  3532. inline int8_t
  3533. doCompareCodePointOrder(int32_t start,
  3534. int32_t length,
  3535. const UnicodeString& srcText,
  3536. int32_t srcStart,
  3537. int32_t srcLength) const;
  3538. int8_t doCompareCodePointOrder(int32_t start,
  3539. int32_t length,
  3540. const char16_t *srcChars,
  3541. int32_t srcStart,
  3542. int32_t srcLength) const;
  3543. inline int8_t
  3544. doCaseCompare(int32_t start,
  3545. int32_t length,
  3546. const UnicodeString &srcText,
  3547. int32_t srcStart,
  3548. int32_t srcLength,
  3549. uint32_t options) const;
  3550. int8_t
  3551. doCaseCompare(int32_t start,
  3552. int32_t length,
  3553. const char16_t *srcChars,
  3554. int32_t srcStart,
  3555. int32_t srcLength,
  3556. uint32_t options) const;
  3557. int32_t doIndexOf(char16_t c,
  3558. int32_t start,
  3559. int32_t length) const;
  3560. int32_t doIndexOf(UChar32 c,
  3561. int32_t start,
  3562. int32_t length) const;
  3563. int32_t doLastIndexOf(char16_t c,
  3564. int32_t start,
  3565. int32_t length) const;
  3566. int32_t doLastIndexOf(UChar32 c,
  3567. int32_t start,
  3568. int32_t length) const;
  3569. void doExtract(int32_t start,
  3570. int32_t length,
  3571. char16_t *dst,
  3572. int32_t dstStart) const;
  3573. inline void doExtract(int32_t start,
  3574. int32_t length,
  3575. UnicodeString& target) const;
  3576. inline char16_t doCharAt(int32_t offset) const;
  3577. UnicodeString& doReplace(int32_t start,
  3578. int32_t length,
  3579. const UnicodeString& srcText,
  3580. int32_t srcStart,
  3581. int32_t srcLength);
  3582. UnicodeString& doReplace(int32_t start,
  3583. int32_t length,
  3584. const char16_t *srcChars,
  3585. int32_t srcStart,
  3586. int32_t srcLength);
  3587. UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
  3588. UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  3589. UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
  3590. UnicodeString& doAppend(std::u16string_view src);
  3591. UnicodeString& doReverse(int32_t start,
  3592. int32_t length);
  3593. // calculate hash code
  3594. int32_t doHashCode() const;
  3595. // get pointer to start of array
  3596. // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
  3597. inline char16_t* getArrayStart();
  3598. inline const char16_t* getArrayStart() const;
  3599. inline UBool hasShortLength() const;
  3600. inline int32_t getShortLength() const;
  3601. // A UnicodeString object (not necessarily its current buffer)
  3602. // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
  3603. inline UBool isWritable() const;
  3604. // Is the current buffer writable?
  3605. inline UBool isBufferWritable() const;
  3606. // None of the following does releaseArray().
  3607. inline void setZeroLength();
  3608. inline void setShortLength(int32_t len);
  3609. inline void setLength(int32_t len);
  3610. inline void setToEmpty();
  3611. inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
  3612. // allocate the array; result may be the stack buffer
  3613. // sets refCount to 1 if appropriate
  3614. // sets fArray, fCapacity, and flags
  3615. // sets length to 0
  3616. // returns boolean for success or failure
  3617. UBool allocate(int32_t capacity);
  3618. // release the array if owned
  3619. void releaseArray();
  3620. // turn a bogus string into an empty one
  3621. void unBogus();
  3622. // implements assignment operator, copy constructor, and fastCopyFrom()
  3623. UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
  3624. // Copies just the fields without memory management.
  3625. void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
  3626. // Pin start and limit to acceptable values.
  3627. inline void pinIndex(int32_t& start) const;
  3628. inline void pinIndices(int32_t& start,
  3629. int32_t& length) const;
  3630. #if !UCONFIG_NO_CONVERSION
  3631. /* Internal extract() using UConverter. */
  3632. int32_t doExtract(int32_t start, int32_t length,
  3633. char *dest, int32_t destCapacity,
  3634. UConverter *cnv,
  3635. UErrorCode &errorCode) const;
  3636. /*
  3637. * Real constructor for converting from codepage data.
  3638. * It assumes that it is called with !fRefCounted.
  3639. *
  3640. * If `codepage==0`, then the default converter
  3641. * is used for the platform encoding.
  3642. * If `codepage` is an empty string (`""`),
  3643. * then a simple conversion is performed on the codepage-invariant
  3644. * subset ("invariant characters") of the platform encoding. See utypes.h.
  3645. */
  3646. void doCodepageCreate(const char *codepageData,
  3647. int32_t dataLength,
  3648. const char *codepage);
  3649. /*
  3650. * Worker function for creating a UnicodeString from
  3651. * a codepage string using a UConverter.
  3652. */
  3653. void
  3654. doCodepageCreate(const char *codepageData,
  3655. int32_t dataLength,
  3656. UConverter *converter,
  3657. UErrorCode &status);
  3658. #endif
  3659. /*
  3660. * This function is called when write access to the array
  3661. * is necessary.
  3662. *
  3663. * We need to make a copy of the array if
  3664. * the buffer is read-only, or
  3665. * the buffer is refCounted (shared), and refCount>1, or
  3666. * the buffer is too small.
  3667. *
  3668. * Return false if memory could not be allocated.
  3669. */
  3670. UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
  3671. int32_t growCapacity = -1,
  3672. UBool doCopyArray = true,
  3673. int32_t** pBufferToDelete = nullptr,
  3674. UBool forceClone = false);
  3675. /**
  3676. * Common function for UnicodeString case mappings.
  3677. * The stringCaseMapper has the same type UStringCaseMapper
  3678. * as in ustr_imp.h for ustrcase_map().
  3679. */
  3680. UnicodeString &
  3681. caseMap(int32_t caseLocale, uint32_t options,
  3682. #if !UCONFIG_NO_BREAK_ITERATION
  3683. BreakIterator *iter,
  3684. #endif
  3685. UStringCaseMapper *stringCaseMapper);
  3686. // ref counting
  3687. void addRef();
  3688. int32_t removeRef();
  3689. int32_t refCount() const;
  3690. // constants
  3691. enum {
  3692. /**
  3693. * Size of stack buffer for short strings.
  3694. * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
  3695. * @see UNISTR_OBJECT_SIZE
  3696. */
  3697. US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR,
  3698. kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
  3699. kInvalidHashCode=0, // invalid hash code
  3700. kEmptyHashCode=1, // hash code for empty string
  3701. // bit flag values for fLengthAndFlags
  3702. kIsBogus=1, // this string is bogus, i.e., not valid or nullptr
  3703. kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
  3704. kRefCounted=4, // there is a refCount field before the characters in fArray
  3705. kBufferIsReadonly=8,// do not write to this buffer
  3706. kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
  3707. // and releaseBuffer(newLength) must be called
  3708. kAllStorageFlags=0x1f,
  3709. kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
  3710. kLength1=1<<kLengthShift,
  3711. kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
  3712. kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
  3713. // combined values for convenience
  3714. kShortString=kUsingStackBuffer,
  3715. kLongString=kRefCounted,
  3716. kReadonlyAlias=kBufferIsReadonly,
  3717. kWritableAlias=0
  3718. };
  3719. friend class UnicodeStringAppendable;
  3720. union StackBufferOrFields; // forward declaration necessary before friend declaration
  3721. friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
  3722. /*
  3723. * The following are all the class fields that are stored
  3724. * in each UnicodeString object.
  3725. * Note that UnicodeString has virtual functions,
  3726. * therefore there is an implicit vtable pointer
  3727. * as the first real field.
  3728. * The fields should be aligned such that no padding is necessary.
  3729. * On 32-bit machines, the size should be 32 bytes,
  3730. * on 64-bit machines (8-byte pointers), it should be 40 bytes.
  3731. *
  3732. * We use a hack to achieve this.
  3733. *
  3734. * With at least some compilers, each of the following is forced to
  3735. * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
  3736. * rounded up with additional padding if the fields do not already fit that requirement:
  3737. * - sizeof(class UnicodeString)
  3738. * - offsetof(UnicodeString, fUnion)
  3739. * - sizeof(fUnion)
  3740. * - sizeof(fStackFields)
  3741. *
  3742. * We optimize for the longest possible internal buffer for short strings.
  3743. * fUnion.fStackFields begins with 2 bytes for storage flags
  3744. * and the length of relatively short strings,
  3745. * followed by the buffer for short string contents.
  3746. * There is no padding inside fStackFields.
  3747. *
  3748. * Heap-allocated and aliased strings use fUnion.fFields.
  3749. * Both fStackFields and fFields must begin with the same fields for flags and short length,
  3750. * that is, those must have the same memory offsets inside the object,
  3751. * because the flags must be inspected in order to decide which half of fUnion is being used.
  3752. * We assume that the compiler does not reorder the fields.
  3753. *
  3754. * (Padding at the end of fFields is ok:
  3755. * As long as it is no larger than fStackFields, it is not wasted space.)
  3756. *
  3757. * For some of the history of the UnicodeString class fields layout, see
  3758. * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
  3759. * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
  3760. * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
  3761. */
  3762. // (implicit) *vtable;
  3763. union StackBufferOrFields {
  3764. // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
  3765. // Each struct of the union must begin with fLengthAndFlags.
  3766. struct {
  3767. int16_t fLengthAndFlags; // bit fields: see constants above
  3768. char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
  3769. } fStackFields;
  3770. struct {
  3771. int16_t fLengthAndFlags; // bit fields: see constants above
  3772. int32_t fLength; // number of characters in fArray if >127; else undefined
  3773. int32_t fCapacity; // capacity of fArray (in char16_ts)
  3774. // array pointer last to minimize padding for machines with P128 data model
  3775. // or pointer sizes that are not a power of 2
  3776. char16_t *fArray; // the Unicode data
  3777. } fFields;
  3778. } fUnion;
  3779. };
  3780. /**
  3781. * Creates a new UnicodeString from the concatenation of two others.
  3782. *
  3783. * @param s1 The first string to be copied to the new one.
  3784. * @param s2 The second string to be copied to the new one, after s1.
  3785. * @return UnicodeString(s1).append(s2)
  3786. * @stable ICU 2.8
  3787. */
  3788. U_COMMON_API UnicodeString U_EXPORT2
  3789. operator+ (const UnicodeString &s1, const UnicodeString &s2);
  3790. #ifndef U_HIDE_DRAFT_API
  3791. /**
  3792. * Creates a new UnicodeString from the concatenation of a UnicodeString and `s2`
  3793. * which is, or which is implicitly convertible to,
  3794. * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
  3795. *
  3796. * @param s1 The string to be copied to the new one.
  3797. * @param s2 The string view to be copied to the new string, after s1.
  3798. * @return UnicodeString(s1).append(s2)
  3799. * @draft ICU 76
  3800. */
  3801. template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
  3802. inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
  3803. return unistr_internalConcat(s1, internal::toU16StringView(s2));
  3804. }
  3805. #endif // U_HIDE_DRAFT_API
  3806. #ifndef U_FORCE_HIDE_INTERNAL_API
  3807. /** @internal */
  3808. U_COMMON_API UnicodeString U_EXPORT2
  3809. unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
  3810. #endif
  3811. //========================================
  3812. // Inline members
  3813. //========================================
  3814. //========================================
  3815. // Privates
  3816. //========================================
  3817. inline void
  3818. UnicodeString::pinIndex(int32_t& start) const
  3819. {
  3820. // pin index
  3821. if(start < 0) {
  3822. start = 0;
  3823. } else if(start > length()) {
  3824. start = length();
  3825. }
  3826. }
  3827. inline void
  3828. UnicodeString::pinIndices(int32_t& start,
  3829. int32_t& _length) const
  3830. {
  3831. // pin indices
  3832. int32_t len = length();
  3833. if(start < 0) {
  3834. start = 0;
  3835. } else if(start > len) {
  3836. start = len;
  3837. }
  3838. if(_length < 0) {
  3839. _length = 0;
  3840. } else if(_length > (len - start)) {
  3841. _length = (len - start);
  3842. }
  3843. }
  3844. inline char16_t*
  3845. UnicodeString::getArrayStart() {
  3846. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3847. fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
  3848. }
  3849. inline const char16_t*
  3850. UnicodeString::getArrayStart() const {
  3851. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3852. fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
  3853. }
  3854. //========================================
  3855. // Default constructor
  3856. //========================================
  3857. inline
  3858. UnicodeString::UnicodeString() {
  3859. fUnion.fStackFields.fLengthAndFlags=kShortString;
  3860. }
  3861. inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
  3862. fUnion.fStackFields.fLengthAndFlags=kShortString;
  3863. }
  3864. inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
  3865. fUnion.fStackFields.fLengthAndFlags=kShortString;
  3866. }
  3867. inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
  3868. fUnion.fStackFields.fLengthAndFlags=kShortString;
  3869. }
  3870. //========================================
  3871. // Read-only implementation methods
  3872. //========================================
  3873. inline UBool
  3874. UnicodeString::hasShortLength() const {
  3875. return fUnion.fFields.fLengthAndFlags>=0;
  3876. }
  3877. inline int32_t
  3878. UnicodeString::getShortLength() const {
  3879. // fLengthAndFlags must be non-negative -> short length >= 0
  3880. // and arithmetic or logical shift does not matter.
  3881. return fUnion.fFields.fLengthAndFlags>>kLengthShift;
  3882. }
  3883. inline int32_t
  3884. UnicodeString::length() const {
  3885. return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
  3886. }
  3887. inline int32_t
  3888. UnicodeString::getCapacity() const {
  3889. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3890. US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
  3891. }
  3892. inline int32_t
  3893. UnicodeString::hashCode() const
  3894. { return doHashCode(); }
  3895. inline UBool
  3896. UnicodeString::isBogus() const
  3897. { return fUnion.fFields.fLengthAndFlags & kIsBogus; }
  3898. inline UBool
  3899. UnicodeString::isWritable() const
  3900. { return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
  3901. inline UBool
  3902. UnicodeString::isBufferWritable() const
  3903. {
  3904. return
  3905. !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
  3906. (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
  3907. }
  3908. inline const char16_t *
  3909. UnicodeString::getBuffer() const {
  3910. if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
  3911. return nullptr;
  3912. } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
  3913. return fUnion.fStackFields.fBuffer;
  3914. } else {
  3915. return fUnion.fFields.fArray;
  3916. }
  3917. }
  3918. //========================================
  3919. // Read-only alias methods
  3920. //========================================
  3921. inline int8_t
  3922. UnicodeString::doCompare(int32_t start,
  3923. int32_t thisLength,
  3924. const UnicodeString& srcText,
  3925. int32_t srcStart,
  3926. int32_t srcLength) const
  3927. {
  3928. if(srcText.isBogus()) {
  3929. return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
  3930. } else {
  3931. srcText.pinIndices(srcStart, srcLength);
  3932. return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  3933. }
  3934. }
  3935. inline UBool
  3936. UnicodeString::doEqualsSubstring(int32_t start,
  3937. int32_t thisLength,
  3938. const UnicodeString& srcText,
  3939. int32_t srcStart,
  3940. int32_t srcLength) const
  3941. {
  3942. if(srcText.isBogus()) {
  3943. return isBogus();
  3944. } else {
  3945. srcText.pinIndices(srcStart, srcLength);
  3946. return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  3947. }
  3948. }
  3949. inline bool
  3950. UnicodeString::operator== (const UnicodeString& text) const
  3951. {
  3952. if(isBogus()) {
  3953. return text.isBogus();
  3954. } else {
  3955. int32_t len = length(), textLength = text.length();
  3956. return !text.isBogus() && len == textLength && doEquals(text, len);
  3957. }
  3958. }
  3959. inline bool
  3960. UnicodeString::operator!= (const UnicodeString& text) const
  3961. { return (! operator==(text)); }
  3962. inline UBool
  3963. UnicodeString::operator> (const UnicodeString& text) const
  3964. { return doCompare(0, length(), text, 0, text.length()) == 1; }
  3965. inline UBool
  3966. UnicodeString::operator< (const UnicodeString& text) const
  3967. { return doCompare(0, length(), text, 0, text.length()) == -1; }
  3968. inline UBool
  3969. UnicodeString::operator>= (const UnicodeString& text) const
  3970. { return doCompare(0, length(), text, 0, text.length()) != -1; }
  3971. inline UBool
  3972. UnicodeString::operator<= (const UnicodeString& text) const
  3973. { return doCompare(0, length(), text, 0, text.length()) != 1; }
  3974. inline int8_t
  3975. UnicodeString::compare(const UnicodeString& text) const
  3976. { return doCompare(0, length(), text, 0, text.length()); }
  3977. inline int8_t
  3978. UnicodeString::compare(int32_t start,
  3979. int32_t _length,
  3980. const UnicodeString& srcText) const
  3981. { return doCompare(start, _length, srcText, 0, srcText.length()); }
  3982. inline int8_t
  3983. UnicodeString::compare(ConstChar16Ptr srcChars,
  3984. int32_t srcLength) const
  3985. { return doCompare(0, length(), srcChars, 0, srcLength); }
  3986. inline int8_t
  3987. UnicodeString::compare(int32_t start,
  3988. int32_t _length,
  3989. const UnicodeString& srcText,
  3990. int32_t srcStart,
  3991. int32_t srcLength) const
  3992. { return doCompare(start, _length, srcText, srcStart, srcLength); }
  3993. inline int8_t
  3994. UnicodeString::compare(int32_t start,
  3995. int32_t _length,
  3996. const char16_t *srcChars) const
  3997. { return doCompare(start, _length, srcChars, 0, _length); }
  3998. inline int8_t
  3999. UnicodeString::compare(int32_t start,
  4000. int32_t _length,
  4001. const char16_t *srcChars,
  4002. int32_t srcStart,
  4003. int32_t srcLength) const
  4004. { return doCompare(start, _length, srcChars, srcStart, srcLength); }
  4005. inline int8_t
  4006. UnicodeString::compareBetween(int32_t start,
  4007. int32_t limit,
  4008. const UnicodeString& srcText,
  4009. int32_t srcStart,
  4010. int32_t srcLimit) const
  4011. { return doCompare(start, limit - start,
  4012. srcText, srcStart, srcLimit - srcStart); }
  4013. inline int8_t
  4014. UnicodeString::doCompareCodePointOrder(int32_t start,
  4015. int32_t thisLength,
  4016. const UnicodeString& srcText,
  4017. int32_t srcStart,
  4018. int32_t srcLength) const
  4019. {
  4020. if(srcText.isBogus()) {
  4021. return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
  4022. } else {
  4023. srcText.pinIndices(srcStart, srcLength);
  4024. return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  4025. }
  4026. }
  4027. inline int8_t
  4028. UnicodeString::compareCodePointOrder(const UnicodeString& text) const
  4029. { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
  4030. inline int8_t
  4031. UnicodeString::compareCodePointOrder(int32_t start,
  4032. int32_t _length,
  4033. const UnicodeString& srcText) const
  4034. { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
  4035. inline int8_t
  4036. UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
  4037. int32_t srcLength) const
  4038. { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
  4039. inline int8_t
  4040. UnicodeString::compareCodePointOrder(int32_t start,
  4041. int32_t _length,
  4042. const UnicodeString& srcText,
  4043. int32_t srcStart,
  4044. int32_t srcLength) const
  4045. { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
  4046. inline int8_t
  4047. UnicodeString::compareCodePointOrder(int32_t start,
  4048. int32_t _length,
  4049. const char16_t *srcChars) const
  4050. { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
  4051. inline int8_t
  4052. UnicodeString::compareCodePointOrder(int32_t start,
  4053. int32_t _length,
  4054. const char16_t *srcChars,
  4055. int32_t srcStart,
  4056. int32_t srcLength) const
  4057. { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
  4058. inline int8_t
  4059. UnicodeString::compareCodePointOrderBetween(int32_t start,
  4060. int32_t limit,
  4061. const UnicodeString& srcText,
  4062. int32_t srcStart,
  4063. int32_t srcLimit) const
  4064. { return doCompareCodePointOrder(start, limit - start,
  4065. srcText, srcStart, srcLimit - srcStart); }
  4066. inline int8_t
  4067. UnicodeString::doCaseCompare(int32_t start,
  4068. int32_t thisLength,
  4069. const UnicodeString &srcText,
  4070. int32_t srcStart,
  4071. int32_t srcLength,
  4072. uint32_t options) const
  4073. {
  4074. if(srcText.isBogus()) {
  4075. return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
  4076. } else {
  4077. srcText.pinIndices(srcStart, srcLength);
  4078. return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
  4079. }
  4080. }
  4081. inline int8_t
  4082. UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
  4083. return doCaseCompare(0, length(), text, 0, text.length(), options);
  4084. }
  4085. inline int8_t
  4086. UnicodeString::caseCompare(int32_t start,
  4087. int32_t _length,
  4088. const UnicodeString &srcText,
  4089. uint32_t options) const {
  4090. return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
  4091. }
  4092. inline int8_t
  4093. UnicodeString::caseCompare(ConstChar16Ptr srcChars,
  4094. int32_t srcLength,
  4095. uint32_t options) const {
  4096. return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
  4097. }
  4098. inline int8_t
  4099. UnicodeString::caseCompare(int32_t start,
  4100. int32_t _length,
  4101. const UnicodeString &srcText,
  4102. int32_t srcStart,
  4103. int32_t srcLength,
  4104. uint32_t options) const {
  4105. return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
  4106. }
  4107. inline int8_t
  4108. UnicodeString::caseCompare(int32_t start,
  4109. int32_t _length,
  4110. const char16_t *srcChars,
  4111. uint32_t options) const {
  4112. return doCaseCompare(start, _length, srcChars, 0, _length, options);
  4113. }
  4114. inline int8_t
  4115. UnicodeString::caseCompare(int32_t start,
  4116. int32_t _length,
  4117. const char16_t *srcChars,
  4118. int32_t srcStart,
  4119. int32_t srcLength,
  4120. uint32_t options) const {
  4121. return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
  4122. }
  4123. inline int8_t
  4124. UnicodeString::caseCompareBetween(int32_t start,
  4125. int32_t limit,
  4126. const UnicodeString &srcText,
  4127. int32_t srcStart,
  4128. int32_t srcLimit,
  4129. uint32_t options) const {
  4130. return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
  4131. }
  4132. inline int32_t
  4133. UnicodeString::indexOf(const UnicodeString& srcText,
  4134. int32_t srcStart,
  4135. int32_t srcLength,
  4136. int32_t start,
  4137. int32_t _length) const
  4138. {
  4139. if(!srcText.isBogus()) {
  4140. srcText.pinIndices(srcStart, srcLength);
  4141. if(srcLength > 0) {
  4142. return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  4143. }
  4144. }
  4145. return -1;
  4146. }
  4147. inline int32_t
  4148. UnicodeString::indexOf(const UnicodeString& text) const
  4149. { return indexOf(text, 0, text.length(), 0, length()); }
  4150. inline int32_t
  4151. UnicodeString::indexOf(const UnicodeString& text,
  4152. int32_t start) const {
  4153. pinIndex(start);
  4154. return indexOf(text, 0, text.length(), start, length() - start);
  4155. }
  4156. inline int32_t
  4157. UnicodeString::indexOf(const UnicodeString& text,
  4158. int32_t start,
  4159. int32_t _length) const
  4160. { return indexOf(text, 0, text.length(), start, _length); }
  4161. inline int32_t
  4162. UnicodeString::indexOf(const char16_t *srcChars,
  4163. int32_t srcLength,
  4164. int32_t start) const {
  4165. pinIndex(start);
  4166. return indexOf(srcChars, 0, srcLength, start, length() - start);
  4167. }
  4168. inline int32_t
  4169. UnicodeString::indexOf(ConstChar16Ptr srcChars,
  4170. int32_t srcLength,
  4171. int32_t start,
  4172. int32_t _length) const
  4173. { return indexOf(srcChars, 0, srcLength, start, _length); }
  4174. inline int32_t
  4175. UnicodeString::indexOf(char16_t c,
  4176. int32_t start,
  4177. int32_t _length) const
  4178. { return doIndexOf(c, start, _length); }
  4179. inline int32_t
  4180. UnicodeString::indexOf(UChar32 c,
  4181. int32_t start,
  4182. int32_t _length) const
  4183. { return doIndexOf(c, start, _length); }
  4184. inline int32_t
  4185. UnicodeString::indexOf(char16_t c) const
  4186. { return doIndexOf(c, 0, length()); }
  4187. inline int32_t
  4188. UnicodeString::indexOf(UChar32 c) const
  4189. { return indexOf(c, 0, length()); }
  4190. inline int32_t
  4191. UnicodeString::indexOf(char16_t c,
  4192. int32_t start) const {
  4193. pinIndex(start);
  4194. return doIndexOf(c, start, length() - start);
  4195. }
  4196. inline int32_t
  4197. UnicodeString::indexOf(UChar32 c,
  4198. int32_t start) const {
  4199. pinIndex(start);
  4200. return indexOf(c, start, length() - start);
  4201. }
  4202. inline int32_t
  4203. UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
  4204. int32_t srcLength,
  4205. int32_t start,
  4206. int32_t _length) const
  4207. { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
  4208. inline int32_t
  4209. UnicodeString::lastIndexOf(const char16_t *srcChars,
  4210. int32_t srcLength,
  4211. int32_t start) const {
  4212. pinIndex(start);
  4213. return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
  4214. }
  4215. inline int32_t
  4216. UnicodeString::lastIndexOf(const UnicodeString& srcText,
  4217. int32_t srcStart,
  4218. int32_t srcLength,
  4219. int32_t start,
  4220. int32_t _length) const
  4221. {
  4222. if(!srcText.isBogus()) {
  4223. srcText.pinIndices(srcStart, srcLength);
  4224. if(srcLength > 0) {
  4225. return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  4226. }
  4227. }
  4228. return -1;
  4229. }
  4230. inline int32_t
  4231. UnicodeString::lastIndexOf(const UnicodeString& text,
  4232. int32_t start,
  4233. int32_t _length) const
  4234. { return lastIndexOf(text, 0, text.length(), start, _length); }
  4235. inline int32_t
  4236. UnicodeString::lastIndexOf(const UnicodeString& text,
  4237. int32_t start) const {
  4238. pinIndex(start);
  4239. return lastIndexOf(text, 0, text.length(), start, length() - start);
  4240. }
  4241. inline int32_t
  4242. UnicodeString::lastIndexOf(const UnicodeString& text) const
  4243. { return lastIndexOf(text, 0, text.length(), 0, length()); }
  4244. inline int32_t
  4245. UnicodeString::lastIndexOf(char16_t c,
  4246. int32_t start,
  4247. int32_t _length) const
  4248. { return doLastIndexOf(c, start, _length); }
  4249. inline int32_t
  4250. UnicodeString::lastIndexOf(UChar32 c,
  4251. int32_t start,
  4252. int32_t _length) const {
  4253. return doLastIndexOf(c, start, _length);
  4254. }
  4255. inline int32_t
  4256. UnicodeString::lastIndexOf(char16_t c) const
  4257. { return doLastIndexOf(c, 0, length()); }
  4258. inline int32_t
  4259. UnicodeString::lastIndexOf(UChar32 c) const {
  4260. return lastIndexOf(c, 0, length());
  4261. }
  4262. inline int32_t
  4263. UnicodeString::lastIndexOf(char16_t c,
  4264. int32_t start) const {
  4265. pinIndex(start);
  4266. return doLastIndexOf(c, start, length() - start);
  4267. }
  4268. inline int32_t
  4269. UnicodeString::lastIndexOf(UChar32 c,
  4270. int32_t start) const {
  4271. pinIndex(start);
  4272. return lastIndexOf(c, start, length() - start);
  4273. }
  4274. inline UBool
  4275. UnicodeString::startsWith(const UnicodeString& text) const
  4276. { return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
  4277. inline UBool
  4278. UnicodeString::startsWith(const UnicodeString& srcText,
  4279. int32_t srcStart,
  4280. int32_t srcLength) const
  4281. { return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
  4282. inline UBool
  4283. UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
  4284. if(srcLength < 0) {
  4285. srcLength = u_strlen(toUCharPtr(srcChars));
  4286. }
  4287. return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
  4288. }
  4289. inline UBool
  4290. UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
  4291. if(srcLength < 0) {
  4292. srcLength = u_strlen(toUCharPtr(srcChars));
  4293. }
  4294. return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
  4295. }
  4296. inline UBool
  4297. UnicodeString::endsWith(const UnicodeString& text) const
  4298. { return doEqualsSubstring(length() - text.length(), text.length(),
  4299. text, 0, text.length()); }
  4300. inline UBool
  4301. UnicodeString::endsWith(const UnicodeString& srcText,
  4302. int32_t srcStart,
  4303. int32_t srcLength) const {
  4304. srcText.pinIndices(srcStart, srcLength);
  4305. return doEqualsSubstring(length() - srcLength, srcLength,
  4306. srcText, srcStart, srcLength);
  4307. }
  4308. inline UBool
  4309. UnicodeString::endsWith(ConstChar16Ptr srcChars,
  4310. int32_t srcLength) const {
  4311. if(srcLength < 0) {
  4312. srcLength = u_strlen(toUCharPtr(srcChars));
  4313. }
  4314. return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
  4315. }
  4316. inline UBool
  4317. UnicodeString::endsWith(const char16_t *srcChars,
  4318. int32_t srcStart,
  4319. int32_t srcLength) const {
  4320. if(srcLength < 0) {
  4321. srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
  4322. }
  4323. return doEqualsSubstring(length() - srcLength, srcLength,
  4324. srcChars, srcStart, srcLength);
  4325. }
  4326. //========================================
  4327. // replace
  4328. //========================================
  4329. inline UnicodeString&
  4330. UnicodeString::replace(int32_t start,
  4331. int32_t _length,
  4332. const UnicodeString& srcText)
  4333. { return doReplace(start, _length, srcText, 0, srcText.length()); }
  4334. inline UnicodeString&
  4335. UnicodeString::replace(int32_t start,
  4336. int32_t _length,
  4337. const UnicodeString& srcText,
  4338. int32_t srcStart,
  4339. int32_t srcLength)
  4340. { return doReplace(start, _length, srcText, srcStart, srcLength); }
  4341. inline UnicodeString&
  4342. UnicodeString::replace(int32_t start,
  4343. int32_t _length,
  4344. ConstChar16Ptr srcChars,
  4345. int32_t srcLength)
  4346. { return doReplace(start, _length, srcChars, 0, srcLength); }
  4347. inline UnicodeString&
  4348. UnicodeString::replace(int32_t start,
  4349. int32_t _length,
  4350. const char16_t *srcChars,
  4351. int32_t srcStart,
  4352. int32_t srcLength)
  4353. { return doReplace(start, _length, srcChars, srcStart, srcLength); }
  4354. inline UnicodeString&
  4355. UnicodeString::replace(int32_t start,
  4356. int32_t _length,
  4357. char16_t srcChar)
  4358. { return doReplace(start, _length, &srcChar, 0, 1); }
  4359. inline UnicodeString&
  4360. UnicodeString::replaceBetween(int32_t start,
  4361. int32_t limit,
  4362. const UnicodeString& srcText)
  4363. { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
  4364. inline UnicodeString&
  4365. UnicodeString::replaceBetween(int32_t start,
  4366. int32_t limit,
  4367. const UnicodeString& srcText,
  4368. int32_t srcStart,
  4369. int32_t srcLimit)
  4370. { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
  4371. inline UnicodeString&
  4372. UnicodeString::findAndReplace(const UnicodeString& oldText,
  4373. const UnicodeString& newText)
  4374. { return findAndReplace(0, length(), oldText, 0, oldText.length(),
  4375. newText, 0, newText.length()); }
  4376. inline UnicodeString&
  4377. UnicodeString::findAndReplace(int32_t start,
  4378. int32_t _length,
  4379. const UnicodeString& oldText,
  4380. const UnicodeString& newText)
  4381. { return findAndReplace(start, _length, oldText, 0, oldText.length(),
  4382. newText, 0, newText.length()); }
  4383. // ============================
  4384. // extract
  4385. // ============================
  4386. inline void
  4387. UnicodeString::doExtract(int32_t start,
  4388. int32_t _length,
  4389. UnicodeString& target) const
  4390. { target.replace(0, target.length(), *this, start, _length); }
  4391. inline void
  4392. UnicodeString::extract(int32_t start,
  4393. int32_t _length,
  4394. Char16Ptr target,
  4395. int32_t targetStart) const
  4396. { doExtract(start, _length, target, targetStart); }
  4397. inline void
  4398. UnicodeString::extract(int32_t start,
  4399. int32_t _length,
  4400. UnicodeString& target) const
  4401. { doExtract(start, _length, target); }
  4402. #if !UCONFIG_NO_CONVERSION
  4403. inline int32_t
  4404. UnicodeString::extract(int32_t start,
  4405. int32_t _length,
  4406. char *dst,
  4407. const char *codepage) const
  4408. {
  4409. // This dstSize value will be checked explicitly
  4410. return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage);
  4411. }
  4412. #endif
  4413. inline void
  4414. UnicodeString::extractBetween(int32_t start,
  4415. int32_t limit,
  4416. char16_t *dst,
  4417. int32_t dstStart) const {
  4418. pinIndex(start);
  4419. pinIndex(limit);
  4420. doExtract(start, limit - start, dst, dstStart);
  4421. }
  4422. inline UnicodeString
  4423. UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
  4424. return tempSubString(start, limit - start);
  4425. }
  4426. inline char16_t
  4427. UnicodeString::doCharAt(int32_t offset) const
  4428. {
  4429. if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
  4430. return getArrayStart()[offset];
  4431. } else {
  4432. return kInvalidUChar;
  4433. }
  4434. }
  4435. inline char16_t
  4436. UnicodeString::charAt(int32_t offset) const
  4437. { return doCharAt(offset); }
  4438. inline char16_t
  4439. UnicodeString::operator[] (int32_t offset) const
  4440. { return doCharAt(offset); }
  4441. inline UBool
  4442. UnicodeString::isEmpty() const {
  4443. // Arithmetic or logical right shift does not matter: only testing for 0.
  4444. return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
  4445. }
  4446. //========================================
  4447. // Write implementation methods
  4448. //========================================
  4449. inline void
  4450. UnicodeString::setZeroLength() {
  4451. fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
  4452. }
  4453. inline void
  4454. UnicodeString::setShortLength(int32_t len) {
  4455. // requires 0 <= len <= kMaxShortLength
  4456. fUnion.fFields.fLengthAndFlags =
  4457. static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
  4458. }
  4459. inline void
  4460. UnicodeString::setLength(int32_t len) {
  4461. if(len <= kMaxShortLength) {
  4462. setShortLength(len);
  4463. } else {
  4464. fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
  4465. fUnion.fFields.fLength = len;
  4466. }
  4467. }
  4468. inline void
  4469. UnicodeString::setToEmpty() {
  4470. fUnion.fFields.fLengthAndFlags = kShortString;
  4471. }
  4472. inline void
  4473. UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
  4474. setLength(len);
  4475. fUnion.fFields.fArray = array;
  4476. fUnion.fFields.fCapacity = capacity;
  4477. }
  4478. inline UnicodeString&
  4479. UnicodeString::operator= (char16_t ch)
  4480. { return doReplace(0, length(), &ch, 0, 1); }
  4481. inline UnicodeString&
  4482. UnicodeString::operator= (UChar32 ch)
  4483. { return replace(0, length(), ch); }
  4484. inline UnicodeString&
  4485. UnicodeString::setTo(const UnicodeString& srcText,
  4486. int32_t srcStart,
  4487. int32_t srcLength)
  4488. {
  4489. unBogus();
  4490. return doReplace(0, length(), srcText, srcStart, srcLength);
  4491. }
  4492. inline UnicodeString&
  4493. UnicodeString::setTo(const UnicodeString& srcText,
  4494. int32_t srcStart)
  4495. {
  4496. unBogus();
  4497. srcText.pinIndex(srcStart);
  4498. return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
  4499. }
  4500. inline UnicodeString&
  4501. UnicodeString::setTo(const UnicodeString& srcText)
  4502. {
  4503. return copyFrom(srcText);
  4504. }
  4505. inline UnicodeString&
  4506. UnicodeString::setTo(const char16_t *srcChars,
  4507. int32_t srcLength)
  4508. {
  4509. unBogus();
  4510. return doReplace(0, length(), srcChars, 0, srcLength);
  4511. }
  4512. inline UnicodeString&
  4513. UnicodeString::setTo(char16_t srcChar)
  4514. {
  4515. unBogus();
  4516. return doReplace(0, length(), &srcChar, 0, 1);
  4517. }
  4518. inline UnicodeString&
  4519. UnicodeString::setTo(UChar32 srcChar)
  4520. {
  4521. unBogus();
  4522. return replace(0, length(), srcChar);
  4523. }
  4524. inline UnicodeString&
  4525. UnicodeString::append(const UnicodeString& srcText,
  4526. int32_t srcStart,
  4527. int32_t srcLength)
  4528. { return doAppend(srcText, srcStart, srcLength); }
  4529. inline UnicodeString&
  4530. UnicodeString::append(const UnicodeString& srcText)
  4531. { return doAppend(srcText, 0, srcText.length()); }
  4532. inline UnicodeString&
  4533. UnicodeString::append(const char16_t *srcChars,
  4534. int32_t srcStart,
  4535. int32_t srcLength)
  4536. { return doAppend(srcChars, srcStart, srcLength); }
  4537. inline UnicodeString&
  4538. UnicodeString::append(ConstChar16Ptr srcChars,
  4539. int32_t srcLength)
  4540. { return doAppend(srcChars, 0, srcLength); }
  4541. inline UnicodeString&
  4542. UnicodeString::append(char16_t srcChar)
  4543. { return doAppend(&srcChar, 0, 1); }
  4544. inline UnicodeString&
  4545. UnicodeString::operator+= (char16_t ch)
  4546. { return doAppend(&ch, 0, 1); }
  4547. inline UnicodeString&
  4548. UnicodeString::operator+= (UChar32 ch) {
  4549. return append(ch);
  4550. }
  4551. inline UnicodeString&
  4552. UnicodeString::operator+= (const UnicodeString& srcText)
  4553. { return doAppend(srcText, 0, srcText.length()); }
  4554. inline UnicodeString&
  4555. UnicodeString::insert(int32_t start,
  4556. const UnicodeString& srcText,
  4557. int32_t srcStart,
  4558. int32_t srcLength)
  4559. { return doReplace(start, 0, srcText, srcStart, srcLength); }
  4560. inline UnicodeString&
  4561. UnicodeString::insert(int32_t start,
  4562. const UnicodeString& srcText)
  4563. { return doReplace(start, 0, srcText, 0, srcText.length()); }
  4564. inline UnicodeString&
  4565. UnicodeString::insert(int32_t start,
  4566. const char16_t *srcChars,
  4567. int32_t srcStart,
  4568. int32_t srcLength)
  4569. { return doReplace(start, 0, srcChars, srcStart, srcLength); }
  4570. inline UnicodeString&
  4571. UnicodeString::insert(int32_t start,
  4572. ConstChar16Ptr srcChars,
  4573. int32_t srcLength)
  4574. { return doReplace(start, 0, srcChars, 0, srcLength); }
  4575. inline UnicodeString&
  4576. UnicodeString::insert(int32_t start,
  4577. char16_t srcChar)
  4578. { return doReplace(start, 0, &srcChar, 0, 1); }
  4579. inline UnicodeString&
  4580. UnicodeString::insert(int32_t start,
  4581. UChar32 srcChar)
  4582. { return replace(start, 0, srcChar); }
  4583. inline UnicodeString&
  4584. UnicodeString::remove()
  4585. {
  4586. // remove() of a bogus string makes the string empty and non-bogus
  4587. if(isBogus()) {
  4588. setToEmpty();
  4589. } else {
  4590. setZeroLength();
  4591. }
  4592. return *this;
  4593. }
  4594. inline UnicodeString&
  4595. UnicodeString::remove(int32_t start,
  4596. int32_t _length)
  4597. {
  4598. if(start <= 0 && _length == INT32_MAX) {
  4599. // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
  4600. return remove();
  4601. }
  4602. return doReplace(start, _length, nullptr, 0, 0);
  4603. }
  4604. inline UnicodeString&
  4605. UnicodeString::removeBetween(int32_t start,
  4606. int32_t limit)
  4607. { return doReplace(start, limit - start, nullptr, 0, 0); }
  4608. inline UnicodeString &
  4609. UnicodeString::retainBetween(int32_t start, int32_t limit) {
  4610. truncate(limit);
  4611. return doReplace(0, start, nullptr, 0, 0);
  4612. }
  4613. inline UBool
  4614. UnicodeString::truncate(int32_t targetLength)
  4615. {
  4616. if(isBogus() && targetLength == 0) {
  4617. // truncate(0) of a bogus string makes the string empty and non-bogus
  4618. unBogus();
  4619. return false;
  4620. } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) {
  4621. setLength(targetLength);
  4622. return true;
  4623. } else {
  4624. return false;
  4625. }
  4626. }
  4627. inline UnicodeString&
  4628. UnicodeString::reverse()
  4629. { return doReverse(0, length()); }
  4630. inline UnicodeString&
  4631. UnicodeString::reverse(int32_t start,
  4632. int32_t _length)
  4633. { return doReverse(start, _length); }
  4634. U_NAMESPACE_END
  4635. #endif /* U_SHOW_CPLUSPLUS_API */
  4636. #endif