string_utils.cpp 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. //===-- string_utils.cpp ----------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "string_utils.h"
  9. #include "common.h"
  10. #include <stdarg.h>
  11. #include <string.h>
  12. namespace scudo {
  13. static int appendChar(char **Buffer, const char *BufferEnd, char C) {
  14. if (*Buffer < BufferEnd) {
  15. **Buffer = C;
  16. (*Buffer)++;
  17. }
  18. return 1;
  19. }
  20. // Appends number in a given Base to buffer. If its length is less than
  21. // |MinNumberLength|, it is padded with leading zeroes or spaces, depending
  22. // on the value of |PadWithZero|.
  23. static int appendNumber(char **Buffer, const char *BufferEnd, u64 AbsoluteValue,
  24. u8 Base, u8 MinNumberLength, bool PadWithZero,
  25. bool Negative, bool Upper) {
  26. constexpr uptr MaxLen = 30;
  27. RAW_CHECK(Base == 10 || Base == 16);
  28. RAW_CHECK(Base == 10 || !Negative);
  29. RAW_CHECK(AbsoluteValue || !Negative);
  30. RAW_CHECK(MinNumberLength < MaxLen);
  31. int Res = 0;
  32. if (Negative && MinNumberLength)
  33. --MinNumberLength;
  34. if (Negative && PadWithZero)
  35. Res += appendChar(Buffer, BufferEnd, '-');
  36. uptr NumBuffer[MaxLen];
  37. int Pos = 0;
  38. do {
  39. RAW_CHECK_MSG(static_cast<uptr>(Pos) < MaxLen,
  40. "appendNumber buffer overflow");
  41. NumBuffer[Pos++] = static_cast<uptr>(AbsoluteValue % Base);
  42. AbsoluteValue /= Base;
  43. } while (AbsoluteValue > 0);
  44. if (Pos < MinNumberLength) {
  45. memset(&NumBuffer[Pos], 0,
  46. sizeof(NumBuffer[0]) * static_cast<uptr>(MinNumberLength - Pos));
  47. Pos = MinNumberLength;
  48. }
  49. RAW_CHECK(Pos > 0);
  50. Pos--;
  51. for (; Pos >= 0 && NumBuffer[Pos] == 0; Pos--) {
  52. char c = (PadWithZero || Pos == 0) ? '0' : ' ';
  53. Res += appendChar(Buffer, BufferEnd, c);
  54. }
  55. if (Negative && !PadWithZero)
  56. Res += appendChar(Buffer, BufferEnd, '-');
  57. for (; Pos >= 0; Pos--) {
  58. char Digit = static_cast<char>(NumBuffer[Pos]);
  59. Digit = static_cast<char>((Digit < 10) ? '0' + Digit
  60. : (Upper ? 'A' : 'a') + Digit - 10);
  61. Res += appendChar(Buffer, BufferEnd, Digit);
  62. }
  63. return Res;
  64. }
  65. static int appendUnsigned(char **Buffer, const char *BufferEnd, u64 Num,
  66. u8 Base, u8 MinNumberLength, bool PadWithZero,
  67. bool Upper) {
  68. return appendNumber(Buffer, BufferEnd, Num, Base, MinNumberLength,
  69. PadWithZero, /*Negative=*/false, Upper);
  70. }
  71. static int appendSignedDecimal(char **Buffer, const char *BufferEnd, s64 Num,
  72. u8 MinNumberLength, bool PadWithZero) {
  73. const bool Negative = (Num < 0);
  74. const u64 UnsignedNum = (Num == INT64_MIN)
  75. ? static_cast<u64>(INT64_MAX) + 1
  76. : static_cast<u64>(Negative ? -Num : Num);
  77. return appendNumber(Buffer, BufferEnd, UnsignedNum, 10, MinNumberLength,
  78. PadWithZero, Negative, /*Upper=*/false);
  79. }
  80. // Use the fact that explicitly requesting 0 Width (%0s) results in UB and
  81. // interpret Width == 0 as "no Width requested":
  82. // Width == 0 - no Width requested
  83. // Width < 0 - left-justify S within and pad it to -Width chars, if necessary
  84. // Width > 0 - right-justify S, not implemented yet
  85. static int appendString(char **Buffer, const char *BufferEnd, int Width,
  86. int MaxChars, const char *S) {
  87. if (!S)
  88. S = "<null>";
  89. int Res = 0;
  90. for (; *S; S++) {
  91. if (MaxChars >= 0 && Res >= MaxChars)
  92. break;
  93. Res += appendChar(Buffer, BufferEnd, *S);
  94. }
  95. // Only the left justified strings are supported.
  96. while (Width < -Res)
  97. Res += appendChar(Buffer, BufferEnd, ' ');
  98. return Res;
  99. }
  100. static int appendPointer(char **Buffer, const char *BufferEnd, u64 ptr_value) {
  101. int Res = 0;
  102. Res += appendString(Buffer, BufferEnd, 0, -1, "0x");
  103. Res += appendUnsigned(Buffer, BufferEnd, ptr_value, 16,
  104. SCUDO_POINTER_FORMAT_LENGTH, /*PadWithZero=*/true,
  105. /*Upper=*/false);
  106. return Res;
  107. }
  108. static int formatString(char *Buffer, uptr BufferLength, const char *Format,
  109. va_list Args) {
  110. static const char *PrintfFormatsHelp =
  111. "Supported formatString formats: %([0-9]*)?(z|ll)?{d,u,x,X}; %p; "
  112. "%[-]([0-9]*)?(\\.\\*)?s; %c\n";
  113. RAW_CHECK(Format);
  114. RAW_CHECK(BufferLength > 0);
  115. const char *BufferEnd = &Buffer[BufferLength - 1];
  116. const char *Cur = Format;
  117. int Res = 0;
  118. for (; *Cur; Cur++) {
  119. if (*Cur != '%') {
  120. Res += appendChar(&Buffer, BufferEnd, *Cur);
  121. continue;
  122. }
  123. Cur++;
  124. const bool LeftJustified = *Cur == '-';
  125. if (LeftJustified)
  126. Cur++;
  127. bool HaveWidth = (*Cur >= '0' && *Cur <= '9');
  128. const bool PadWithZero = (*Cur == '0');
  129. u8 Width = 0;
  130. if (HaveWidth) {
  131. while (*Cur >= '0' && *Cur <= '9')
  132. Width = static_cast<u8>(Width * 10 + *Cur++ - '0');
  133. }
  134. const bool HavePrecision = (Cur[0] == '.' && Cur[1] == '*');
  135. int Precision = -1;
  136. if (HavePrecision) {
  137. Cur += 2;
  138. Precision = va_arg(Args, int);
  139. }
  140. const bool HaveZ = (*Cur == 'z');
  141. Cur += HaveZ;
  142. const bool HaveLL = !HaveZ && (Cur[0] == 'l' && Cur[1] == 'l');
  143. Cur += HaveLL * 2;
  144. s64 DVal;
  145. u64 UVal;
  146. const bool HaveLength = HaveZ || HaveLL;
  147. const bool HaveFlags = HaveWidth || HaveLength;
  148. // At the moment only %s supports precision and left-justification.
  149. CHECK(!((Precision >= 0 || LeftJustified) && *Cur != 's'));
  150. switch (*Cur) {
  151. case 'd': {
  152. DVal = HaveLL ? va_arg(Args, s64)
  153. : HaveZ ? va_arg(Args, sptr)
  154. : va_arg(Args, int);
  155. Res += appendSignedDecimal(&Buffer, BufferEnd, DVal, Width, PadWithZero);
  156. break;
  157. }
  158. case 'u':
  159. case 'x':
  160. case 'X': {
  161. UVal = HaveLL ? va_arg(Args, u64)
  162. : HaveZ ? va_arg(Args, uptr)
  163. : va_arg(Args, unsigned);
  164. const bool Upper = (*Cur == 'X');
  165. Res += appendUnsigned(&Buffer, BufferEnd, UVal, (*Cur == 'u') ? 10 : 16,
  166. Width, PadWithZero, Upper);
  167. break;
  168. }
  169. case 'p': {
  170. RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp);
  171. Res += appendPointer(&Buffer, BufferEnd, va_arg(Args, uptr));
  172. break;
  173. }
  174. case 's': {
  175. RAW_CHECK_MSG(!HaveLength, PrintfFormatsHelp);
  176. // Only left-justified Width is supported.
  177. CHECK(!HaveWidth || LeftJustified);
  178. Res += appendString(&Buffer, BufferEnd, LeftJustified ? -Width : Width,
  179. Precision, va_arg(Args, char *));
  180. break;
  181. }
  182. case 'c': {
  183. RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp);
  184. Res +=
  185. appendChar(&Buffer, BufferEnd, static_cast<char>(va_arg(Args, int)));
  186. break;
  187. }
  188. // In Scudo, `s64`/`u64` are supposed to use `lld` and `llu` respectively.
  189. // However, `-Wformat` doesn't know we have a different parser for those
  190. // placeholders and it keeps complaining the type mismatch on 64-bit
  191. // platform which uses `ld`/`lu` for `s64`/`u64`. Therefore, in order to
  192. // silence the warning, we turn to use `PRId64`/`PRIu64` for printing
  193. // `s64`/`u64` and handle the `ld`/`lu` here.
  194. case 'l': {
  195. ++Cur;
  196. RAW_CHECK(*Cur == 'd' || *Cur == 'u');
  197. if (*Cur == 'd') {
  198. DVal = va_arg(Args, s64);
  199. Res +=
  200. appendSignedDecimal(&Buffer, BufferEnd, DVal, Width, PadWithZero);
  201. } else {
  202. UVal = va_arg(Args, u64);
  203. Res += appendUnsigned(&Buffer, BufferEnd, UVal, 10, Width, PadWithZero,
  204. false);
  205. }
  206. break;
  207. }
  208. case '%': {
  209. RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp);
  210. Res += appendChar(&Buffer, BufferEnd, '%');
  211. break;
  212. }
  213. default: {
  214. RAW_CHECK_MSG(false, PrintfFormatsHelp);
  215. }
  216. }
  217. }
  218. RAW_CHECK(Buffer <= BufferEnd);
  219. appendChar(&Buffer, BufferEnd + 1, '\0');
  220. return Res;
  221. }
  222. int formatString(char *Buffer, uptr BufferLength, const char *Format, ...) {
  223. va_list Args;
  224. va_start(Args, Format);
  225. int Res = formatString(Buffer, BufferLength, Format, Args);
  226. va_end(Args);
  227. return Res;
  228. }
  229. void ScopedString::vappend(const char *Format, va_list Args) {
  230. va_list ArgsCopy;
  231. va_copy(ArgsCopy, Args);
  232. // formatString doesn't currently support a null buffer or zero buffer length,
  233. // so in order to get the resulting formatted string length, we use a one-char
  234. // buffer.
  235. char C[1];
  236. const uptr AdditionalLength =
  237. static_cast<uptr>(formatString(C, sizeof(C), Format, Args)) + 1;
  238. const uptr Length = length();
  239. String.resize(Length + AdditionalLength);
  240. const uptr FormattedLength = static_cast<uptr>(formatString(
  241. String.data() + Length, String.size() - Length, Format, ArgsCopy));
  242. RAW_CHECK(data()[length()] == '\0');
  243. RAW_CHECK(FormattedLength + 1 == AdditionalLength);
  244. va_end(ArgsCopy);
  245. }
  246. void ScopedString::append(const char *Format, ...) {
  247. va_list Args;
  248. va_start(Args, Format);
  249. vappend(Format, Args);
  250. va_end(Args);
  251. }
  252. void Printf(const char *Format, ...) {
  253. va_list Args;
  254. va_start(Args, Format);
  255. ScopedString Msg;
  256. Msg.vappend(Format, Args);
  257. outputRaw(Msg.data());
  258. va_end(Args);
  259. }
  260. } // namespace scudo