StringRef.h 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_ADT_STRINGREF_H
  14. #define LLVM_ADT_STRINGREF_H
  15. #include "llvm/ADT/DenseMapInfo.h"
  16. #include "llvm/ADT/STLFunctionalExtras.h"
  17. #include "llvm/ADT/iterator_range.h"
  18. #include "llvm/Support/Compiler.h"
  19. #include <algorithm>
  20. #include <cassert>
  21. #include <cstddef>
  22. #include <cstring>
  23. #include <limits>
  24. #include <string>
  25. #if __cplusplus > 201402L
  26. #include <string_view>
  27. #endif
  28. #include <type_traits>
  29. #include <utility>
  30. // Declare the __builtin_strlen intrinsic for MSVC so it can be used in
  31. // constexpr context.
  32. #if defined(_MSC_VER)
  33. extern "C" size_t __builtin_strlen(const char *);
  34. #endif
  35. namespace llvm {
  36. class APInt;
  37. class hash_code;
  38. template <typename T> class SmallVectorImpl;
  39. class StringRef;
  40. /// Helper functions for StringRef::getAsInteger.
  41. bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
  42. unsigned long long &Result);
  43. bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
  44. bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
  45. unsigned long long &Result);
  46. bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
  47. /// StringRef - Represent a constant reference to a string, i.e. a character
  48. /// array and a length, which need not be null terminated.
  49. ///
  50. /// This class does not own the string data, it is expected to be used in
  51. /// situations where the character data resides in some other buffer, whose
  52. /// lifetime extends past that of the StringRef. For this reason, it is not in
  53. /// general safe to store a StringRef.
  54. class LLVM_GSL_POINTER StringRef {
  55. public:
  56. static constexpr size_t npos = ~size_t(0);
  57. using iterator = const char *;
  58. using const_iterator = const char *;
  59. using size_type = size_t;
  60. private:
  61. /// The start of the string, in an external buffer.
  62. const char *Data = nullptr;
  63. /// The length of the string.
  64. size_t Length = 0;
  65. // Workaround memcmp issue with null pointers (undefined behavior)
  66. // by providing a specialized version
  67. static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
  68. if (Length == 0) { return 0; }
  69. return ::memcmp(Lhs,Rhs,Length);
  70. }
  71. // Constexpr version of std::strlen.
  72. static constexpr size_t strLen(const char *Str) {
  73. #if __cplusplus > 201402L
  74. return std::char_traits<char>::length(Str);
  75. #elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \
  76. (defined(_MSC_VER) && _MSC_VER >= 1916)
  77. return __builtin_strlen(Str);
  78. #else
  79. const char *Begin = Str;
  80. while (*Str != '\0')
  81. ++Str;
  82. return Str - Begin;
  83. #endif
  84. }
  85. public:
  86. /// @name Constructors
  87. /// @{
  88. /// Construct an empty string ref.
  89. /*implicit*/ StringRef() = default;
  90. /// Disable conversion from nullptr. This prevents things like
  91. /// if (S == nullptr)
  92. StringRef(std::nullptr_t) = delete;
  93. /// Construct a string ref from a cstring.
  94. /*implicit*/ constexpr StringRef(const char *Str)
  95. : Data(Str), Length(Str ? strLen(Str) : 0) {}
  96. /// Construct a string ref from a pointer and length.
  97. /*implicit*/ constexpr StringRef(const char *data, size_t length)
  98. : Data(data), Length(length) {}
  99. /// Construct a string ref from an std::string.
  100. /*implicit*/ StringRef(const std::string &Str)
  101. : Data(Str.data()), Length(Str.length()) {}
  102. #if __cplusplus > 201402L
  103. /// Construct a string ref from an std::string_view.
  104. /*implicit*/ constexpr StringRef(std::string_view Str)
  105. : Data(Str.data()), Length(Str.size()) {}
  106. #endif
  107. /// @}
  108. /// @name Iterators
  109. /// @{
  110. iterator begin() const { return Data; }
  111. iterator end() const { return Data + Length; }
  112. const unsigned char *bytes_begin() const {
  113. return reinterpret_cast<const unsigned char *>(begin());
  114. }
  115. const unsigned char *bytes_end() const {
  116. return reinterpret_cast<const unsigned char *>(end());
  117. }
  118. iterator_range<const unsigned char *> bytes() const {
  119. return make_range(bytes_begin(), bytes_end());
  120. }
  121. /// @}
  122. /// @name String Operations
  123. /// @{
  124. /// data - Get a pointer to the start of the string (which may not be null
  125. /// terminated).
  126. LLVM_NODISCARD
  127. const char *data() const { return Data; }
  128. /// empty - Check if the string is empty.
  129. LLVM_NODISCARD
  130. constexpr bool empty() const { return Length == 0; }
  131. /// size - Get the string size.
  132. LLVM_NODISCARD
  133. constexpr size_t size() const { return Length; }
  134. /// front - Get the first character in the string.
  135. LLVM_NODISCARD
  136. char front() const {
  137. assert(!empty());
  138. return Data[0];
  139. }
  140. /// back - Get the last character in the string.
  141. LLVM_NODISCARD
  142. char back() const {
  143. assert(!empty());
  144. return Data[Length-1];
  145. }
  146. // copy - Allocate copy in Allocator and return StringRef to it.
  147. template <typename Allocator>
  148. LLVM_NODISCARD StringRef copy(Allocator &A) const {
  149. // Don't request a length 0 copy from the allocator.
  150. if (empty())
  151. return StringRef();
  152. char *S = A.template Allocate<char>(Length);
  153. std::copy(begin(), end(), S);
  154. return StringRef(S, Length);
  155. }
  156. /// equals - Check for string equality, this is more efficient than
  157. /// compare() when the relative ordering of inequal strings isn't needed.
  158. LLVM_NODISCARD
  159. bool equals(StringRef RHS) const {
  160. return (Length == RHS.Length &&
  161. compareMemory(Data, RHS.Data, RHS.Length) == 0);
  162. }
  163. /// Check for string equality, ignoring case.
  164. LLVM_NODISCARD
  165. bool equals_insensitive(StringRef RHS) const {
  166. return Length == RHS.Length && compare_insensitive(RHS) == 0;
  167. }
  168. /// compare - Compare two strings; the result is -1, 0, or 1 if this string
  169. /// is lexicographically less than, equal to, or greater than the \p RHS.
  170. LLVM_NODISCARD
  171. int compare(StringRef RHS) const {
  172. // Check the prefix for a mismatch.
  173. if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
  174. return Res < 0 ? -1 : 1;
  175. // Otherwise the prefixes match, so we only need to check the lengths.
  176. if (Length == RHS.Length)
  177. return 0;
  178. return Length < RHS.Length ? -1 : 1;
  179. }
  180. /// Compare two strings, ignoring case.
  181. LLVM_NODISCARD
  182. int compare_insensitive(StringRef RHS) const;
  183. /// compare_numeric - Compare two strings, treating sequences of digits as
  184. /// numbers.
  185. LLVM_NODISCARD
  186. int compare_numeric(StringRef RHS) const;
  187. /// Determine the edit distance between this string and another
  188. /// string.
  189. ///
  190. /// \param Other the string to compare this string against.
  191. ///
  192. /// \param AllowReplacements whether to allow character
  193. /// replacements (change one character into another) as a single
  194. /// operation, rather than as two operations (an insertion and a
  195. /// removal).
  196. ///
  197. /// \param MaxEditDistance If non-zero, the maximum edit distance that
  198. /// this routine is allowed to compute. If the edit distance will exceed
  199. /// that maximum, returns \c MaxEditDistance+1.
  200. ///
  201. /// \returns the minimum number of character insertions, removals,
  202. /// or (if \p AllowReplacements is \c true) replacements needed to
  203. /// transform one of the given strings into the other. If zero,
  204. /// the strings are identical.
  205. LLVM_NODISCARD
  206. unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
  207. unsigned MaxEditDistance = 0) const;
  208. /// str - Get the contents as an std::string.
  209. LLVM_NODISCARD
  210. std::string str() const {
  211. if (!Data) return std::string();
  212. return std::string(Data, Length);
  213. }
  214. /// @}
  215. /// @name Operator Overloads
  216. /// @{
  217. LLVM_NODISCARD
  218. char operator[](size_t Index) const {
  219. assert(Index < Length && "Invalid index!");
  220. return Data[Index];
  221. }
  222. /// Disallow accidental assignment from a temporary std::string.
  223. ///
  224. /// The declaration here is extra complicated so that `stringRef = {}`
  225. /// and `stringRef = "abc"` continue to select the move assignment operator.
  226. template <typename T>
  227. std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
  228. operator=(T &&Str) = delete;
  229. /// @}
  230. /// @name Type Conversions
  231. /// @{
  232. explicit operator std::string() const { return str(); }
  233. #if __cplusplus > 201402L
  234. operator std::string_view() const {
  235. return std::string_view(data(), size());
  236. }
  237. #endif
  238. /// @}
  239. /// @name String Predicates
  240. /// @{
  241. /// Check if this string starts with the given \p Prefix.
  242. LLVM_NODISCARD
  243. bool startswith(StringRef Prefix) const {
  244. return Length >= Prefix.Length &&
  245. compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
  246. }
  247. /// Check if this string starts with the given \p Prefix, ignoring case.
  248. LLVM_NODISCARD
  249. bool startswith_insensitive(StringRef Prefix) const;
  250. /// Check if this string ends with the given \p Suffix.
  251. LLVM_NODISCARD
  252. bool endswith(StringRef Suffix) const {
  253. return Length >= Suffix.Length &&
  254. compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
  255. }
  256. /// Check if this string ends with the given \p Suffix, ignoring case.
  257. LLVM_NODISCARD
  258. bool endswith_insensitive(StringRef Suffix) const;
  259. /// @}
  260. /// @name String Searching
  261. /// @{
  262. /// Search for the first character \p C in the string.
  263. ///
  264. /// \returns The index of the first occurrence of \p C, or npos if not
  265. /// found.
  266. LLVM_NODISCARD
  267. size_t find(char C, size_t From = 0) const {
  268. size_t FindBegin = std::min(From, Length);
  269. if (FindBegin < Length) { // Avoid calling memchr with nullptr.
  270. // Just forward to memchr, which is faster than a hand-rolled loop.
  271. if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
  272. return static_cast<const char *>(P) - Data;
  273. }
  274. return npos;
  275. }
  276. /// Search for the first character \p C in the string, ignoring case.
  277. ///
  278. /// \returns The index of the first occurrence of \p C, or npos if not
  279. /// found.
  280. LLVM_NODISCARD
  281. size_t find_insensitive(char C, size_t From = 0) const;
  282. /// Search for the first character satisfying the predicate \p F
  283. ///
  284. /// \returns The index of the first character satisfying \p F starting from
  285. /// \p From, or npos if not found.
  286. LLVM_NODISCARD
  287. size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
  288. StringRef S = drop_front(From);
  289. while (!S.empty()) {
  290. if (F(S.front()))
  291. return size() - S.size();
  292. S = S.drop_front();
  293. }
  294. return npos;
  295. }
  296. /// Search for the first character not satisfying the predicate \p F
  297. ///
  298. /// \returns The index of the first character not satisfying \p F starting
  299. /// from \p From, or npos if not found.
  300. LLVM_NODISCARD
  301. size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
  302. return find_if([F](char c) { return !F(c); }, From);
  303. }
  304. /// Search for the first string \p Str in the string.
  305. ///
  306. /// \returns The index of the first occurrence of \p Str, or npos if not
  307. /// found.
  308. LLVM_NODISCARD
  309. size_t find(StringRef Str, size_t From = 0) const;
  310. /// Search for the first string \p Str in the string, ignoring case.
  311. ///
  312. /// \returns The index of the first occurrence of \p Str, or npos if not
  313. /// found.
  314. LLVM_NODISCARD
  315. size_t find_insensitive(StringRef Str, size_t From = 0) const;
  316. /// Search for the last character \p C in the string.
  317. ///
  318. /// \returns The index of the last occurrence of \p C, or npos if not
  319. /// found.
  320. LLVM_NODISCARD
  321. size_t rfind(char C, size_t From = npos) const {
  322. From = std::min(From, Length);
  323. size_t i = From;
  324. while (i != 0) {
  325. --i;
  326. if (Data[i] == C)
  327. return i;
  328. }
  329. return npos;
  330. }
  331. /// Search for the last character \p C in the string, ignoring case.
  332. ///
  333. /// \returns The index of the last occurrence of \p C, or npos if not
  334. /// found.
  335. LLVM_NODISCARD
  336. size_t rfind_insensitive(char C, size_t From = npos) const;
  337. /// Search for the last string \p Str in the string.
  338. ///
  339. /// \returns The index of the last occurrence of \p Str, or npos if not
  340. /// found.
  341. LLVM_NODISCARD
  342. size_t rfind(StringRef Str) const;
  343. /// Search for the last string \p Str in the string, ignoring case.
  344. ///
  345. /// \returns The index of the last occurrence of \p Str, or npos if not
  346. /// found.
  347. LLVM_NODISCARD
  348. size_t rfind_insensitive(StringRef Str) const;
  349. /// Find the first character in the string that is \p C, or npos if not
  350. /// found. Same as find.
  351. LLVM_NODISCARD
  352. size_t find_first_of(char C, size_t From = 0) const {
  353. return find(C, From);
  354. }
  355. /// Find the first character in the string that is in \p Chars, or npos if
  356. /// not found.
  357. ///
  358. /// Complexity: O(size() + Chars.size())
  359. LLVM_NODISCARD
  360. size_t find_first_of(StringRef Chars, size_t From = 0) const;
  361. /// Find the first character in the string that is not \p C or npos if not
  362. /// found.
  363. LLVM_NODISCARD
  364. size_t find_first_not_of(char C, size_t From = 0) const;
  365. /// Find the first character in the string that is not in the string
  366. /// \p Chars, or npos if not found.
  367. ///
  368. /// Complexity: O(size() + Chars.size())
  369. LLVM_NODISCARD
  370. size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
  371. /// Find the last character in the string that is \p C, or npos if not
  372. /// found.
  373. LLVM_NODISCARD
  374. size_t find_last_of(char C, size_t From = npos) const {
  375. return rfind(C, From);
  376. }
  377. /// Find the last character in the string that is in \p C, or npos if not
  378. /// found.
  379. ///
  380. /// Complexity: O(size() + Chars.size())
  381. LLVM_NODISCARD
  382. size_t find_last_of(StringRef Chars, size_t From = npos) const;
  383. /// Find the last character in the string that is not \p C, or npos if not
  384. /// found.
  385. LLVM_NODISCARD
  386. size_t find_last_not_of(char C, size_t From = npos) const;
  387. /// Find the last character in the string that is not in \p Chars, or
  388. /// npos if not found.
  389. ///
  390. /// Complexity: O(size() + Chars.size())
  391. LLVM_NODISCARD
  392. size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
  393. /// Return true if the given string is a substring of *this, and false
  394. /// otherwise.
  395. LLVM_NODISCARD
  396. bool contains(StringRef Other) const { return find(Other) != npos; }
  397. /// Return true if the given character is contained in *this, and false
  398. /// otherwise.
  399. LLVM_NODISCARD
  400. bool contains(char C) const { return find_first_of(C) != npos; }
  401. /// Return true if the given string is a substring of *this, and false
  402. /// otherwise.
  403. LLVM_NODISCARD
  404. bool contains_insensitive(StringRef Other) const {
  405. return find_insensitive(Other) != npos;
  406. }
  407. /// Return true if the given character is contained in *this, and false
  408. /// otherwise.
  409. LLVM_NODISCARD
  410. bool contains_insensitive(char C) const {
  411. return find_insensitive(C) != npos;
  412. }
  413. /// @}
  414. /// @name Helpful Algorithms
  415. /// @{
  416. /// Return the number of occurrences of \p C in the string.
  417. LLVM_NODISCARD
  418. size_t count(char C) const {
  419. size_t Count = 0;
  420. for (size_t i = 0, e = Length; i != e; ++i)
  421. if (Data[i] == C)
  422. ++Count;
  423. return Count;
  424. }
  425. /// Return the number of non-overlapped occurrences of \p Str in
  426. /// the string.
  427. size_t count(StringRef Str) const;
  428. /// Parse the current string as an integer of the specified radix. If
  429. /// \p Radix is specified as zero, this does radix autosensing using
  430. /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
  431. ///
  432. /// If the string is invalid or if only a subset of the string is valid,
  433. /// this returns true to signify the error. The string is considered
  434. /// erroneous if empty or if it overflows T.
  435. template <typename T>
  436. std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
  437. getAsInteger(unsigned Radix, T &Result) const {
  438. long long LLVal;
  439. if (getAsSignedInteger(*this, Radix, LLVal) ||
  440. static_cast<T>(LLVal) != LLVal)
  441. return true;
  442. Result = LLVal;
  443. return false;
  444. }
  445. template <typename T>
  446. std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
  447. getAsInteger(unsigned Radix, T &Result) const {
  448. unsigned long long ULLVal;
  449. // The additional cast to unsigned long long is required to avoid the
  450. // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
  451. // 'unsigned __int64' when instantiating getAsInteger with T = bool.
  452. if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
  453. static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
  454. return true;
  455. Result = ULLVal;
  456. return false;
  457. }
  458. /// Parse the current string as an integer of the specified radix. If
  459. /// \p Radix is specified as zero, this does radix autosensing using
  460. /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
  461. ///
  462. /// If the string does not begin with a number of the specified radix,
  463. /// this returns true to signify the error. The string is considered
  464. /// erroneous if empty or if it overflows T.
  465. /// The portion of the string representing the discovered numeric value
  466. /// is removed from the beginning of the string.
  467. template <typename T>
  468. std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
  469. consumeInteger(unsigned Radix, T &Result) {
  470. long long LLVal;
  471. if (consumeSignedInteger(*this, Radix, LLVal) ||
  472. static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
  473. return true;
  474. Result = LLVal;
  475. return false;
  476. }
  477. template <typename T>
  478. std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
  479. consumeInteger(unsigned Radix, T &Result) {
  480. unsigned long long ULLVal;
  481. if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
  482. static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
  483. return true;
  484. Result = ULLVal;
  485. return false;
  486. }
  487. /// Parse the current string as an integer of the specified \p Radix, or of
  488. /// an autosensed radix if the \p Radix given is 0. The current value in
  489. /// \p Result is discarded, and the storage is changed to be wide enough to
  490. /// store the parsed integer.
  491. ///
  492. /// \returns true if the string does not solely consist of a valid
  493. /// non-empty number in the appropriate base.
  494. ///
  495. /// APInt::fromString is superficially similar but assumes the
  496. /// string is well-formed in the given radix.
  497. bool getAsInteger(unsigned Radix, APInt &Result) const;
  498. /// Parse the current string as an IEEE double-precision floating
  499. /// point value. The string must be a well-formed double.
  500. ///
  501. /// If \p AllowInexact is false, the function will fail if the string
  502. /// cannot be represented exactly. Otherwise, the function only fails
  503. /// in case of an overflow or underflow, or an invalid floating point
  504. /// representation.
  505. bool getAsDouble(double &Result, bool AllowInexact = true) const;
  506. /// @}
  507. /// @name String Operations
  508. /// @{
  509. // Convert the given ASCII string to lowercase.
  510. LLVM_NODISCARD
  511. std::string lower() const;
  512. /// Convert the given ASCII string to uppercase.
  513. LLVM_NODISCARD
  514. std::string upper() const;
  515. /// @}
  516. /// @name Substring Operations
  517. /// @{
  518. /// Return a reference to the substring from [Start, Start + N).
  519. ///
  520. /// \param Start The index of the starting character in the substring; if
  521. /// the index is npos or greater than the length of the string then the
  522. /// empty substring will be returned.
  523. ///
  524. /// \param N The number of characters to included in the substring. If N
  525. /// exceeds the number of characters remaining in the string, the string
  526. /// suffix (starting with \p Start) will be returned.
  527. LLVM_NODISCARD
  528. StringRef substr(size_t Start, size_t N = npos) const {
  529. Start = std::min(Start, Length);
  530. return StringRef(Data + Start, std::min(N, Length - Start));
  531. }
  532. /// Return a StringRef equal to 'this' but with only the first \p N
  533. /// elements remaining. If \p N is greater than the length of the
  534. /// string, the entire string is returned.
  535. LLVM_NODISCARD
  536. StringRef take_front(size_t N = 1) const {
  537. if (N >= size())
  538. return *this;
  539. return drop_back(size() - N);
  540. }
  541. /// Return a StringRef equal to 'this' but with only the last \p N
  542. /// elements remaining. If \p N is greater than the length of the
  543. /// string, the entire string is returned.
  544. LLVM_NODISCARD
  545. StringRef take_back(size_t N = 1) const {
  546. if (N >= size())
  547. return *this;
  548. return drop_front(size() - N);
  549. }
  550. /// Return the longest prefix of 'this' such that every character
  551. /// in the prefix satisfies the given predicate.
  552. LLVM_NODISCARD
  553. StringRef take_while(function_ref<bool(char)> F) const {
  554. return substr(0, find_if_not(F));
  555. }
  556. /// Return the longest prefix of 'this' such that no character in
  557. /// the prefix satisfies the given predicate.
  558. LLVM_NODISCARD
  559. StringRef take_until(function_ref<bool(char)> F) const {
  560. return substr(0, find_if(F));
  561. }
  562. /// Return a StringRef equal to 'this' but with the first \p N elements
  563. /// dropped.
  564. LLVM_NODISCARD
  565. StringRef drop_front(size_t N = 1) const {
  566. assert(size() >= N && "Dropping more elements than exist");
  567. return substr(N);
  568. }
  569. /// Return a StringRef equal to 'this' but with the last \p N elements
  570. /// dropped.
  571. LLVM_NODISCARD
  572. StringRef drop_back(size_t N = 1) const {
  573. assert(size() >= N && "Dropping more elements than exist");
  574. return substr(0, size()-N);
  575. }
  576. /// Return a StringRef equal to 'this', but with all characters satisfying
  577. /// the given predicate dropped from the beginning of the string.
  578. LLVM_NODISCARD
  579. StringRef drop_while(function_ref<bool(char)> F) const {
  580. return substr(find_if_not(F));
  581. }
  582. /// Return a StringRef equal to 'this', but with all characters not
  583. /// satisfying the given predicate dropped from the beginning of the string.
  584. LLVM_NODISCARD
  585. StringRef drop_until(function_ref<bool(char)> F) const {
  586. return substr(find_if(F));
  587. }
  588. /// Returns true if this StringRef has the given prefix and removes that
  589. /// prefix.
  590. bool consume_front(StringRef Prefix) {
  591. if (!startswith(Prefix))
  592. return false;
  593. *this = drop_front(Prefix.size());
  594. return true;
  595. }
  596. /// Returns true if this StringRef has the given prefix, ignoring case,
  597. /// and removes that prefix.
  598. bool consume_front_insensitive(StringRef Prefix) {
  599. if (!startswith_insensitive(Prefix))
  600. return false;
  601. *this = drop_front(Prefix.size());
  602. return true;
  603. }
  604. /// Returns true if this StringRef has the given suffix and removes that
  605. /// suffix.
  606. bool consume_back(StringRef Suffix) {
  607. if (!endswith(Suffix))
  608. return false;
  609. *this = drop_back(Suffix.size());
  610. return true;
  611. }
  612. /// Returns true if this StringRef has the given suffix, ignoring case,
  613. /// and removes that suffix.
  614. bool consume_back_insensitive(StringRef Suffix) {
  615. if (!endswith_insensitive(Suffix))
  616. return false;
  617. *this = drop_back(Suffix.size());
  618. return true;
  619. }
  620. /// Return a reference to the substring from [Start, End).
  621. ///
  622. /// \param Start The index of the starting character in the substring; if
  623. /// the index is npos or greater than the length of the string then the
  624. /// empty substring will be returned.
  625. ///
  626. /// \param End The index following the last character to include in the
  627. /// substring. If this is npos or exceeds the number of characters
  628. /// remaining in the string, the string suffix (starting with \p Start)
  629. /// will be returned. If this is less than \p Start, an empty string will
  630. /// be returned.
  631. LLVM_NODISCARD
  632. StringRef slice(size_t Start, size_t End) const {
  633. Start = std::min(Start, Length);
  634. End = std::min(std::max(Start, End), Length);
  635. return StringRef(Data + Start, End - Start);
  636. }
  637. /// Split into two substrings around the first occurrence of a separator
  638. /// character.
  639. ///
  640. /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
  641. /// such that (*this == LHS + Separator + RHS) is true and RHS is
  642. /// maximal. If \p Separator is not in the string, then the result is a
  643. /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
  644. ///
  645. /// \param Separator The character to split on.
  646. /// \returns The split substrings.
  647. LLVM_NODISCARD
  648. std::pair<StringRef, StringRef> split(char Separator) const {
  649. return split(StringRef(&Separator, 1));
  650. }
  651. /// Split into two substrings around the first occurrence of a separator
  652. /// string.
  653. ///
  654. /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
  655. /// such that (*this == LHS + Separator + RHS) is true and RHS is
  656. /// maximal. If \p Separator is not in the string, then the result is a
  657. /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
  658. ///
  659. /// \param Separator - The string to split on.
  660. /// \return - The split substrings.
  661. LLVM_NODISCARD
  662. std::pair<StringRef, StringRef> split(StringRef Separator) const {
  663. size_t Idx = find(Separator);
  664. if (Idx == npos)
  665. return std::make_pair(*this, StringRef());
  666. return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
  667. }
  668. /// Split into two substrings around the last occurrence of a separator
  669. /// string.
  670. ///
  671. /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
  672. /// such that (*this == LHS + Separator + RHS) is true and RHS is
  673. /// minimal. If \p Separator is not in the string, then the result is a
  674. /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
  675. ///
  676. /// \param Separator - The string to split on.
  677. /// \return - The split substrings.
  678. LLVM_NODISCARD
  679. std::pair<StringRef, StringRef> rsplit(StringRef Separator) const {
  680. size_t Idx = rfind(Separator);
  681. if (Idx == npos)
  682. return std::make_pair(*this, StringRef());
  683. return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
  684. }
  685. /// Split into substrings around the occurrences of a separator string.
  686. ///
  687. /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
  688. /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
  689. /// elements are added to A.
  690. /// If \p KeepEmpty is false, empty strings are not added to \p A. They
  691. /// still count when considering \p MaxSplit
  692. /// An useful invariant is that
  693. /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
  694. ///
  695. /// \param A - Where to put the substrings.
  696. /// \param Separator - The string to split on.
  697. /// \param MaxSplit - The maximum number of times the string is split.
  698. /// \param KeepEmpty - True if empty substring should be added.
  699. void split(SmallVectorImpl<StringRef> &A,
  700. StringRef Separator, int MaxSplit = -1,
  701. bool KeepEmpty = true) const;
  702. /// Split into substrings around the occurrences of a separator character.
  703. ///
  704. /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
  705. /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
  706. /// elements are added to A.
  707. /// If \p KeepEmpty is false, empty strings are not added to \p A. They
  708. /// still count when considering \p MaxSplit
  709. /// An useful invariant is that
  710. /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
  711. ///
  712. /// \param A - Where to put the substrings.
  713. /// \param Separator - The string to split on.
  714. /// \param MaxSplit - The maximum number of times the string is split.
  715. /// \param KeepEmpty - True if empty substring should be added.
  716. void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
  717. bool KeepEmpty = true) const;
  718. /// Split into two substrings around the last occurrence of a separator
  719. /// character.
  720. ///
  721. /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
  722. /// such that (*this == LHS + Separator + RHS) is true and RHS is
  723. /// minimal. If \p Separator is not in the string, then the result is a
  724. /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
  725. ///
  726. /// \param Separator - The character to split on.
  727. /// \return - The split substrings.
  728. LLVM_NODISCARD
  729. std::pair<StringRef, StringRef> rsplit(char Separator) const {
  730. return rsplit(StringRef(&Separator, 1));
  731. }
  732. /// Return string with consecutive \p Char characters starting from the
  733. /// the left removed.
  734. LLVM_NODISCARD
  735. StringRef ltrim(char Char) const {
  736. return drop_front(std::min(Length, find_first_not_of(Char)));
  737. }
  738. /// Return string with consecutive characters in \p Chars starting from
  739. /// the left removed.
  740. LLVM_NODISCARD
  741. StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
  742. return drop_front(std::min(Length, find_first_not_of(Chars)));
  743. }
  744. /// Return string with consecutive \p Char characters starting from the
  745. /// right removed.
  746. LLVM_NODISCARD
  747. StringRef rtrim(char Char) const {
  748. return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
  749. }
  750. /// Return string with consecutive characters in \p Chars starting from
  751. /// the right removed.
  752. LLVM_NODISCARD
  753. StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
  754. return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
  755. }
  756. /// Return string with consecutive \p Char characters starting from the
  757. /// left and right removed.
  758. LLVM_NODISCARD
  759. StringRef trim(char Char) const {
  760. return ltrim(Char).rtrim(Char);
  761. }
  762. /// Return string with consecutive characters in \p Chars starting from
  763. /// the left and right removed.
  764. LLVM_NODISCARD
  765. StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
  766. return ltrim(Chars).rtrim(Chars);
  767. }
  768. /// Detect the line ending style of the string.
  769. ///
  770. /// If the string contains a line ending, return the line ending character
  771. /// sequence that is detected. Otherwise return '\n' for unix line endings.
  772. ///
  773. /// \return - The line ending character sequence.
  774. LLVM_NODISCARD
  775. StringRef detectEOL() const {
  776. size_t Pos = find('\r');
  777. if (Pos == npos) {
  778. // If there is no carriage return, assume unix
  779. return "\n";
  780. }
  781. if (Pos + 1 < Length && Data[Pos + 1] == '\n')
  782. return "\r\n"; // Windows
  783. if (Pos > 0 && Data[Pos - 1] == '\n')
  784. return "\n\r"; // You monster!
  785. return "\r"; // Classic Mac
  786. }
  787. /// @}
  788. };
  789. /// A wrapper around a string literal that serves as a proxy for constructing
  790. /// global tables of StringRefs with the length computed at compile time.
  791. /// In order to avoid the invocation of a global constructor, StringLiteral
  792. /// should *only* be used in a constexpr context, as such:
  793. ///
  794. /// constexpr StringLiteral S("test");
  795. ///
  796. class StringLiteral : public StringRef {
  797. private:
  798. constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
  799. }
  800. public:
  801. template <size_t N>
  802. constexpr StringLiteral(const char (&Str)[N])
  803. #if defined(__clang__) && __has_attribute(enable_if)
  804. #pragma clang diagnostic push
  805. #pragma clang diagnostic ignored "-Wgcc-compat"
  806. __attribute((enable_if(__builtin_strlen(Str) == N - 1,
  807. "invalid string literal")))
  808. #pragma clang diagnostic pop
  809. #endif
  810. : StringRef(Str, N - 1) {
  811. }
  812. // Explicit construction for strings like "foo\0bar".
  813. template <size_t N>
  814. static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
  815. return StringLiteral(Str, N - 1);
  816. }
  817. };
  818. /// @name StringRef Comparison Operators
  819. /// @{
  820. inline bool operator==(StringRef LHS, StringRef RHS) {
  821. return LHS.equals(RHS);
  822. }
  823. inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
  824. inline bool operator<(StringRef LHS, StringRef RHS) {
  825. return LHS.compare(RHS) == -1;
  826. }
  827. inline bool operator<=(StringRef LHS, StringRef RHS) {
  828. return LHS.compare(RHS) != 1;
  829. }
  830. inline bool operator>(StringRef LHS, StringRef RHS) {
  831. return LHS.compare(RHS) == 1;
  832. }
  833. inline bool operator>=(StringRef LHS, StringRef RHS) {
  834. return LHS.compare(RHS) != -1;
  835. }
  836. inline std::string &operator+=(std::string &buffer, StringRef string) {
  837. return buffer.append(string.data(), string.size());
  838. }
  839. /// @}
  840. /// Compute a hash_code for a StringRef.
  841. LLVM_NODISCARD
  842. hash_code hash_value(StringRef S);
  843. // Provide DenseMapInfo for StringRefs.
  844. template <> struct DenseMapInfo<StringRef, void> {
  845. static inline StringRef getEmptyKey() {
  846. return StringRef(
  847. reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
  848. }
  849. static inline StringRef getTombstoneKey() {
  850. return StringRef(
  851. reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
  852. }
  853. static unsigned getHashValue(StringRef Val);
  854. static bool isEqual(StringRef LHS, StringRef RHS) {
  855. if (RHS.data() == getEmptyKey().data())
  856. return LHS.data() == getEmptyKey().data();
  857. if (RHS.data() == getTombstoneKey().data())
  858. return LHS.data() == getTombstoneKey().data();
  859. return LHS == RHS;
  860. }
  861. };
  862. } // end namespace llvm
  863. #endif // LLVM_ADT_STRINGREF_H
  864. #ifdef __GNUC__
  865. #pragma GCC diagnostic pop
  866. #endif