Twine.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_ADT_TWINE_H
  14. #define LLVM_ADT_TWINE_H
  15. #include "llvm/ADT/SmallVector.h"
  16. #include "llvm/ADT/StringRef.h"
  17. #include "llvm/Support/ErrorHandling.h"
  18. #include <cassert>
  19. #include <cstdint>
  20. #include <string>
  21. namespace llvm {
  22. class formatv_object_base;
  23. class raw_ostream;
  24. /// Twine - A lightweight data structure for efficiently representing the
  25. /// concatenation of temporary values as strings.
  26. ///
  27. /// A Twine is a kind of rope, it represents a concatenated string using a
  28. /// binary-tree, where the string is the preorder of the nodes. Since the
  29. /// Twine can be efficiently rendered into a buffer when its result is used,
  30. /// it avoids the cost of generating temporary values for intermediate string
  31. /// results -- particularly in cases when the Twine result is never
  32. /// required. By explicitly tracking the type of leaf nodes, we can also avoid
  33. /// the creation of temporary strings for conversions operations (such as
  34. /// appending an integer to a string).
  35. ///
  36. /// A Twine is not intended for use directly and should not be stored, its
  37. /// implementation relies on the ability to store pointers to temporary stack
  38. /// objects which may be deallocated at the end of a statement. Twines should
  39. /// only be used accepted as const references in arguments, when an API wishes
  40. /// to accept possibly-concatenated strings.
  41. ///
  42. /// Twines support a special 'null' value, which always concatenates to form
  43. /// itself, and renders as an empty string. This can be returned from APIs to
  44. /// effectively nullify any concatenations performed on the result.
  45. ///
  46. /// \b Implementation
  47. ///
  48. /// Given the nature of a Twine, it is not possible for the Twine's
  49. /// concatenation method to construct interior nodes; the result must be
  50. /// represented inside the returned value. For this reason a Twine object
  51. /// actually holds two values, the left- and right-hand sides of a
  52. /// concatenation. We also have nullary Twine objects, which are effectively
  53. /// sentinel values that represent empty strings.
  54. ///
  55. /// Thus, a Twine can effectively have zero, one, or two children. The \see
  56. /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
  57. /// testing the number of children.
  58. ///
  59. /// We maintain a number of invariants on Twine objects (FIXME: Why):
  60. /// - Nullary twines are always represented with their Kind on the left-hand
  61. /// side, and the Empty kind on the right-hand side.
  62. /// - Unary twines are always represented with the value on the left-hand
  63. /// side, and the Empty kind on the right-hand side.
  64. /// - If a Twine has another Twine as a child, that child should always be
  65. /// binary (otherwise it could have been folded into the parent).
  66. ///
  67. /// These invariants are check by \see isValid().
  68. ///
  69. /// \b Efficiency Considerations
  70. ///
  71. /// The Twine is designed to yield efficient and small code for common
  72. /// situations. For this reason, the concat() method is inlined so that
  73. /// concatenations of leaf nodes can be optimized into stores directly into a
  74. /// single stack allocated object.
  75. ///
  76. /// In practice, not all compilers can be trusted to optimize concat() fully,
  77. /// so we provide two additional methods (and accompanying operator+
  78. /// overloads) to guarantee that particularly important cases (cstring plus
  79. /// StringRef) codegen as desired.
  80. class Twine {
  81. /// NodeKind - Represent the type of an argument.
  82. enum NodeKind : unsigned char {
  83. /// An empty string; the result of concatenating anything with it is also
  84. /// empty.
  85. NullKind,
  86. /// The empty string.
  87. EmptyKind,
  88. /// A pointer to a Twine instance.
  89. TwineKind,
  90. /// A pointer to a C string instance.
  91. CStringKind,
  92. /// A pointer to an std::string instance.
  93. StdStringKind,
  94. /// A pointer to a StringRef instance.
  95. StringRefKind,
  96. /// A pointer to a SmallString instance.
  97. SmallStringKind,
  98. /// A pointer to a formatv_object_base instance.
  99. FormatvObjectKind,
  100. /// A char value, to render as a character.
  101. CharKind,
  102. /// An unsigned int value, to render as an unsigned decimal integer.
  103. DecUIKind,
  104. /// An int value, to render as a signed decimal integer.
  105. DecIKind,
  106. /// A pointer to an unsigned long value, to render as an unsigned decimal
  107. /// integer.
  108. DecULKind,
  109. /// A pointer to a long value, to render as a signed decimal integer.
  110. DecLKind,
  111. /// A pointer to an unsigned long long value, to render as an unsigned
  112. /// decimal integer.
  113. DecULLKind,
  114. /// A pointer to a long long value, to render as a signed decimal integer.
  115. DecLLKind,
  116. /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
  117. /// integer.
  118. UHexKind
  119. };
  120. union Child
  121. {
  122. const Twine *twine;
  123. const char *cString;
  124. const std::string *stdString;
  125. const StringRef *stringRef;
  126. const SmallVectorImpl<char> *smallString;
  127. const formatv_object_base *formatvObject;
  128. char character;
  129. unsigned int decUI;
  130. int decI;
  131. const unsigned long *decUL;
  132. const long *decL;
  133. const unsigned long long *decULL;
  134. const long long *decLL;
  135. const uint64_t *uHex;
  136. };
  137. /// LHS - The prefix in the concatenation, which may be uninitialized for
  138. /// Null or Empty kinds.
  139. Child LHS;
  140. /// RHS - The suffix in the concatenation, which may be uninitialized for
  141. /// Null or Empty kinds.
  142. Child RHS;
  143. /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
  144. NodeKind LHSKind = EmptyKind;
  145. /// RHSKind - The NodeKind of the right hand side, \see getRHSKind().
  146. NodeKind RHSKind = EmptyKind;
  147. /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
  148. explicit Twine(NodeKind Kind) : LHSKind(Kind) {
  149. assert(isNullary() && "Invalid kind!");
  150. }
  151. /// Construct a binary twine.
  152. explicit Twine(const Twine &LHS, const Twine &RHS)
  153. : LHSKind(TwineKind), RHSKind(TwineKind) {
  154. this->LHS.twine = &LHS;
  155. this->RHS.twine = &RHS;
  156. assert(isValid() && "Invalid twine!");
  157. }
  158. /// Construct a twine from explicit values.
  159. explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind)
  160. : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) {
  161. assert(isValid() && "Invalid twine!");
  162. }
  163. /// Check for the null twine.
  164. bool isNull() const {
  165. return getLHSKind() == NullKind;
  166. }
  167. /// Check for the empty twine.
  168. bool isEmpty() const {
  169. return getLHSKind() == EmptyKind;
  170. }
  171. /// Check if this is a nullary twine (null or empty).
  172. bool isNullary() const {
  173. return isNull() || isEmpty();
  174. }
  175. /// Check if this is a unary twine.
  176. bool isUnary() const {
  177. return getRHSKind() == EmptyKind && !isNullary();
  178. }
  179. /// Check if this is a binary twine.
  180. bool isBinary() const {
  181. return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
  182. }
  183. /// Check if this is a valid twine (satisfying the invariants on
  184. /// order and number of arguments).
  185. bool isValid() const {
  186. // Nullary twines always have Empty on the RHS.
  187. if (isNullary() && getRHSKind() != EmptyKind)
  188. return false;
  189. // Null should never appear on the RHS.
  190. if (getRHSKind() == NullKind)
  191. return false;
  192. // The RHS cannot be non-empty if the LHS is empty.
  193. if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
  194. return false;
  195. // A twine child should always be binary.
  196. if (getLHSKind() == TwineKind &&
  197. !LHS.twine->isBinary())
  198. return false;
  199. if (getRHSKind() == TwineKind &&
  200. !RHS.twine->isBinary())
  201. return false;
  202. return true;
  203. }
  204. /// Get the NodeKind of the left-hand side.
  205. NodeKind getLHSKind() const { return LHSKind; }
  206. /// Get the NodeKind of the right-hand side.
  207. NodeKind getRHSKind() const { return RHSKind; }
  208. /// Print one child from a twine.
  209. void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const;
  210. /// Print the representation of one child from a twine.
  211. void printOneChildRepr(raw_ostream &OS, Child Ptr,
  212. NodeKind Kind) const;
  213. public:
  214. /// @name Constructors
  215. /// @{
  216. /// Construct from an empty string.
  217. /*implicit*/ Twine() {
  218. assert(isValid() && "Invalid twine!");
  219. }
  220. Twine(const Twine &) = default;
  221. /// Construct from a C string.
  222. ///
  223. /// We take care here to optimize "" into the empty twine -- this will be
  224. /// optimized out for string constants. This allows Twine arguments have
  225. /// default "" values, without introducing unnecessary string constants.
  226. /*implicit*/ Twine(const char *Str) {
  227. if (Str[0] != '\0') {
  228. LHS.cString = Str;
  229. LHSKind = CStringKind;
  230. } else
  231. LHSKind = EmptyKind;
  232. assert(isValid() && "Invalid twine!");
  233. }
  234. /// Delete the implicit conversion from nullptr as Twine(const char *)
  235. /// cannot take nullptr.
  236. /*implicit*/ Twine(std::nullptr_t) = delete;
  237. /// Construct from an std::string.
  238. /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) {
  239. LHS.stdString = &Str;
  240. assert(isValid() && "Invalid twine!");
  241. }
  242. /// Construct from a StringRef.
  243. /*implicit*/ Twine(const StringRef &Str) : LHSKind(StringRefKind) {
  244. LHS.stringRef = &Str;
  245. assert(isValid() && "Invalid twine!");
  246. }
  247. /// Construct from a SmallString.
  248. /*implicit*/ Twine(const SmallVectorImpl<char> &Str)
  249. : LHSKind(SmallStringKind) {
  250. LHS.smallString = &Str;
  251. assert(isValid() && "Invalid twine!");
  252. }
  253. /// Construct from a formatv_object_base.
  254. /*implicit*/ Twine(const formatv_object_base &Fmt)
  255. : LHSKind(FormatvObjectKind) {
  256. LHS.formatvObject = &Fmt;
  257. assert(isValid() && "Invalid twine!");
  258. }
  259. /// Construct from a char.
  260. explicit Twine(char Val) : LHSKind(CharKind) {
  261. LHS.character = Val;
  262. }
  263. /// Construct from a signed char.
  264. explicit Twine(signed char Val) : LHSKind(CharKind) {
  265. LHS.character = static_cast<char>(Val);
  266. }
  267. /// Construct from an unsigned char.
  268. explicit Twine(unsigned char Val) : LHSKind(CharKind) {
  269. LHS.character = static_cast<char>(Val);
  270. }
  271. /// Construct a twine to print \p Val as an unsigned decimal integer.
  272. explicit Twine(unsigned Val) : LHSKind(DecUIKind) {
  273. LHS.decUI = Val;
  274. }
  275. /// Construct a twine to print \p Val as a signed decimal integer.
  276. explicit Twine(int Val) : LHSKind(DecIKind) {
  277. LHS.decI = Val;
  278. }
  279. /// Construct a twine to print \p Val as an unsigned decimal integer.
  280. explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) {
  281. LHS.decUL = &Val;
  282. }
  283. /// Construct a twine to print \p Val as a signed decimal integer.
  284. explicit Twine(const long &Val) : LHSKind(DecLKind) {
  285. LHS.decL = &Val;
  286. }
  287. /// Construct a twine to print \p Val as an unsigned decimal integer.
  288. explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) {
  289. LHS.decULL = &Val;
  290. }
  291. /// Construct a twine to print \p Val as a signed decimal integer.
  292. explicit Twine(const long long &Val) : LHSKind(DecLLKind) {
  293. LHS.decLL = &Val;
  294. }
  295. // FIXME: Unfortunately, to make sure this is as efficient as possible we
  296. // need extra binary constructors from particular types. We can't rely on
  297. // the compiler to be smart enough to fold operator+()/concat() down to the
  298. // right thing. Yet.
  299. /// Construct as the concatenation of a C string and a StringRef.
  300. /*implicit*/ Twine(const char *LHS, const StringRef &RHS)
  301. : LHSKind(CStringKind), RHSKind(StringRefKind) {
  302. this->LHS.cString = LHS;
  303. this->RHS.stringRef = &RHS;
  304. assert(isValid() && "Invalid twine!");
  305. }
  306. /// Construct as the concatenation of a StringRef and a C string.
  307. /*implicit*/ Twine(const StringRef &LHS, const char *RHS)
  308. : LHSKind(StringRefKind), RHSKind(CStringKind) {
  309. this->LHS.stringRef = &LHS;
  310. this->RHS.cString = RHS;
  311. assert(isValid() && "Invalid twine!");
  312. }
  313. /// Since the intended use of twines is as temporary objects, assignments
  314. /// when concatenating might cause undefined behavior or stack corruptions
  315. Twine &operator=(const Twine &) = delete;
  316. /// Create a 'null' string, which is an empty string that always
  317. /// concatenates to form another empty string.
  318. static Twine createNull() {
  319. return Twine(NullKind);
  320. }
  321. /// @}
  322. /// @name Numeric Conversions
  323. /// @{
  324. // Construct a twine to print \p Val as an unsigned hexadecimal integer.
  325. static Twine utohexstr(const uint64_t &Val) {
  326. Child LHS, RHS;
  327. LHS.uHex = &Val;
  328. RHS.twine = nullptr;
  329. return Twine(LHS, UHexKind, RHS, EmptyKind);
  330. }
  331. /// @}
  332. /// @name Predicate Operations
  333. /// @{
  334. /// Check if this twine is trivially empty; a false return value does not
  335. /// necessarily mean the twine is empty.
  336. bool isTriviallyEmpty() const {
  337. return isNullary();
  338. }
  339. /// Return true if this twine can be dynamically accessed as a single
  340. /// StringRef value with getSingleStringRef().
  341. bool isSingleStringRef() const {
  342. if (getRHSKind() != EmptyKind) return false;
  343. switch (getLHSKind()) {
  344. case EmptyKind:
  345. case CStringKind:
  346. case StdStringKind:
  347. case StringRefKind:
  348. case SmallStringKind:
  349. return true;
  350. default:
  351. return false;
  352. }
  353. }
  354. /// @}
  355. /// @name String Operations
  356. /// @{
  357. Twine concat(const Twine &Suffix) const;
  358. /// @}
  359. /// @name Output & Conversion.
  360. /// @{
  361. /// Return the twine contents as a std::string.
  362. std::string str() const;
  363. /// Append the concatenated string into the given SmallString or SmallVector.
  364. void toVector(SmallVectorImpl<char> &Out) const;
  365. /// This returns the twine as a single StringRef. This method is only valid
  366. /// if isSingleStringRef() is true.
  367. StringRef getSingleStringRef() const {
  368. assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
  369. switch (getLHSKind()) {
  370. default: llvm_unreachable("Out of sync with isSingleStringRef");
  371. case EmptyKind: return StringRef();
  372. case CStringKind: return StringRef(LHS.cString);
  373. case StdStringKind: return StringRef(*LHS.stdString);
  374. case StringRefKind: return *LHS.stringRef;
  375. case SmallStringKind:
  376. return StringRef(LHS.smallString->data(), LHS.smallString->size());
  377. }
  378. }
  379. /// This returns the twine as a single StringRef if it can be
  380. /// represented as such. Otherwise the twine is written into the given
  381. /// SmallVector and a StringRef to the SmallVector's data is returned.
  382. StringRef toStringRef(SmallVectorImpl<char> &Out) const {
  383. if (isSingleStringRef())
  384. return getSingleStringRef();
  385. toVector(Out);
  386. return StringRef(Out.data(), Out.size());
  387. }
  388. /// This returns the twine as a single null terminated StringRef if it
  389. /// can be represented as such. Otherwise the twine is written into the
  390. /// given SmallVector and a StringRef to the SmallVector's data is returned.
  391. ///
  392. /// The returned StringRef's size does not include the null terminator.
  393. StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
  394. /// Write the concatenated string represented by this twine to the
  395. /// stream \p OS.
  396. void print(raw_ostream &OS) const;
  397. /// Dump the concatenated string represented by this twine to stderr.
  398. void dump() const;
  399. /// Write the representation of this twine to the stream \p OS.
  400. void printRepr(raw_ostream &OS) const;
  401. /// Dump the representation of this twine to stderr.
  402. void dumpRepr() const;
  403. /// @}
  404. };
  405. /// @name Twine Inline Implementations
  406. /// @{
  407. inline Twine Twine::concat(const Twine &Suffix) const {
  408. // Concatenation with null is null.
  409. if (isNull() || Suffix.isNull())
  410. return Twine(NullKind);
  411. // Concatenation with empty yields the other side.
  412. if (isEmpty())
  413. return Suffix;
  414. if (Suffix.isEmpty())
  415. return *this;
  416. // Otherwise we need to create a new node, taking care to fold in unary
  417. // twines.
  418. Child NewLHS, NewRHS;
  419. NewLHS.twine = this;
  420. NewRHS.twine = &Suffix;
  421. NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
  422. if (isUnary()) {
  423. NewLHS = LHS;
  424. NewLHSKind = getLHSKind();
  425. }
  426. if (Suffix.isUnary()) {
  427. NewRHS = Suffix.LHS;
  428. NewRHSKind = Suffix.getLHSKind();
  429. }
  430. return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
  431. }
  432. inline Twine operator+(const Twine &LHS, const Twine &RHS) {
  433. return LHS.concat(RHS);
  434. }
  435. /// Additional overload to guarantee simplified codegen; this is equivalent to
  436. /// concat().
  437. inline Twine operator+(const char *LHS, const StringRef &RHS) {
  438. return Twine(LHS, RHS);
  439. }
  440. /// Additional overload to guarantee simplified codegen; this is equivalent to
  441. /// concat().
  442. inline Twine operator+(const StringRef &LHS, const char *RHS) {
  443. return Twine(LHS, RHS);
  444. }
  445. inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
  446. RHS.print(OS);
  447. return OS;
  448. }
  449. /// @}
  450. } // end namespace llvm
  451. #endif // LLVM_ADT_TWINE_H
  452. #ifdef __GNUC__
  453. #pragma GCC diagnostic pop
  454. #endif