numbers.cc 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371
  1. // Copyright 2017 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // This file contains string processing functions related to
  15. // numeric values.
  16. #include "absl/strings/numbers.h"
  17. #include <algorithm>
  18. #include <cassert>
  19. #include <cfloat> // for DBL_DIG and FLT_DIG
  20. #include <climits>
  21. #include <cmath> // for HUGE_VAL
  22. #include <cstddef>
  23. #include <cstdint>
  24. #include <cstdio>
  25. #include <cstdlib>
  26. #include <cstring>
  27. #include <iterator>
  28. #include <limits>
  29. #include <system_error> // NOLINT(build/c++11)
  30. #include <type_traits>
  31. #include <utility>
  32. #include "absl/base/attributes.h"
  33. #include "absl/base/config.h"
  34. #include "absl/base/internal/endian.h"
  35. #include "absl/base/internal/raw_logging.h"
  36. #include "absl/base/nullability.h"
  37. #include "absl/base/optimization.h"
  38. #include "absl/numeric/bits.h"
  39. #include "absl/numeric/int128.h"
  40. #include "absl/strings/ascii.h"
  41. #include "absl/strings/charconv.h"
  42. #include "absl/strings/match.h"
  43. #include "absl/strings/string_view.h"
  44. namespace absl {
  45. ABSL_NAMESPACE_BEGIN
  46. bool SimpleAtof(absl::string_view str, absl::Nonnull<float*> out) {
  47. *out = 0.0;
  48. str = StripAsciiWhitespace(str);
  49. // std::from_chars doesn't accept an initial +, but SimpleAtof does, so if one
  50. // is present, skip it, while avoiding accepting "+-0" as valid.
  51. if (!str.empty() && str[0] == '+') {
  52. str.remove_prefix(1);
  53. if (!str.empty() && str[0] == '-') {
  54. return false;
  55. }
  56. }
  57. auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
  58. if (result.ec == std::errc::invalid_argument) {
  59. return false;
  60. }
  61. if (result.ptr != str.data() + str.size()) {
  62. // not all non-whitespace characters consumed
  63. return false;
  64. }
  65. // from_chars() with DR 3081's current wording will return max() on
  66. // overflow. SimpleAtof returns infinity instead.
  67. if (result.ec == std::errc::result_out_of_range) {
  68. if (*out > 1.0) {
  69. *out = std::numeric_limits<float>::infinity();
  70. } else if (*out < -1.0) {
  71. *out = -std::numeric_limits<float>::infinity();
  72. }
  73. }
  74. return true;
  75. }
  76. bool SimpleAtod(absl::string_view str, absl::Nonnull<double*> out) {
  77. *out = 0.0;
  78. str = StripAsciiWhitespace(str);
  79. // std::from_chars doesn't accept an initial +, but SimpleAtod does, so if one
  80. // is present, skip it, while avoiding accepting "+-0" as valid.
  81. if (!str.empty() && str[0] == '+') {
  82. str.remove_prefix(1);
  83. if (!str.empty() && str[0] == '-') {
  84. return false;
  85. }
  86. }
  87. auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
  88. if (result.ec == std::errc::invalid_argument) {
  89. return false;
  90. }
  91. if (result.ptr != str.data() + str.size()) {
  92. // not all non-whitespace characters consumed
  93. return false;
  94. }
  95. // from_chars() with DR 3081's current wording will return max() on
  96. // overflow. SimpleAtod returns infinity instead.
  97. if (result.ec == std::errc::result_out_of_range) {
  98. if (*out > 1.0) {
  99. *out = std::numeric_limits<double>::infinity();
  100. } else if (*out < -1.0) {
  101. *out = -std::numeric_limits<double>::infinity();
  102. }
  103. }
  104. return true;
  105. }
  106. bool SimpleAtob(absl::string_view str, absl::Nonnull<bool*> out) {
  107. ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
  108. if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
  109. EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
  110. EqualsIgnoreCase(str, "1")) {
  111. *out = true;
  112. return true;
  113. }
  114. if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
  115. EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
  116. EqualsIgnoreCase(str, "0")) {
  117. *out = false;
  118. return true;
  119. }
  120. return false;
  121. }
  122. // ----------------------------------------------------------------------
  123. // FastIntToBuffer() overloads
  124. //
  125. // Like the Fast*ToBuffer() functions above, these are intended for speed.
  126. // Unlike the Fast*ToBuffer() functions, however, these functions write
  127. // their output to the beginning of the buffer. The caller is responsible
  128. // for ensuring that the buffer has enough space to hold the output.
  129. //
  130. // Returns a pointer to the end of the string (i.e. the null character
  131. // terminating the string).
  132. // ----------------------------------------------------------------------
  133. namespace {
  134. // Various routines to encode integers to strings.
  135. // We split data encodings into a group of 2 digits, 4 digits, 8 digits as
  136. // it's easier to combine powers of two into scalar arithmetic.
  137. // Previous implementation used a lookup table of 200 bytes for every 2 bytes
  138. // and it was memory bound, any L1 cache miss would result in a much slower
  139. // result. When benchmarking with a cache eviction rate of several percent,
  140. // this implementation proved to be better.
  141. // These constants represent '00', '0000' and '00000000' as ascii strings in
  142. // integers. We can add these numbers if we encode to bytes from 0 to 9. as
  143. // 'i' = '0' + i for 0 <= i <= 9.
  144. constexpr uint32_t kTwoZeroBytes = 0x0101 * '0';
  145. constexpr uint64_t kFourZeroBytes = 0x01010101 * '0';
  146. constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0';
  147. template <typename T>
  148. constexpr T Pow(T base, uint32_t n) {
  149. // Exponentiation by squaring
  150. return static_cast<T>((n > 1 ? Pow(base * base, n >> 1) : static_cast<T>(1)) *
  151. ((n & 1) ? base : static_cast<T>(1)));
  152. }
  153. // Given n, calculates C where the following holds for all 0 <= x < Pow(100, n):
  154. // x / Pow(10, n) == x * C / Pow(2, n * 10)
  155. // In other words, it allows us to divide by a power of 10 via a single
  156. // multiplication and bit shifts, assuming the input will be smaller than the
  157. // square of that power of 10.
  158. template <typename T>
  159. constexpr T ComputePowerOf100DivisionCoefficient(uint32_t n) {
  160. if (n > 4) {
  161. // This doesn't work for large powers of 100, due to overflow
  162. abort();
  163. }
  164. T denom = 16 - 1;
  165. T num = (denom + 1) - 10;
  166. T gcd = 3; // Greatest common divisor of numerator and denominator
  167. denom = Pow(denom / gcd, n);
  168. num = Pow(num / gcd, 9 * n);
  169. T quotient = num / denom;
  170. if (num % denom >= denom / 2) {
  171. // Round up, since the remainder is more than half the denominator
  172. ++quotient;
  173. }
  174. return quotient;
  175. }
  176. // * kDivisionBy10Mul / kDivisionBy10Div is a division by 10 for values from 0
  177. // to 99. It's also a division of a structure [k takes 2 bytes][m takes 2
  178. // bytes], then * kDivisionBy10Mul / kDivisionBy10Div will be [k / 10][m / 10].
  179. // It allows parallel division.
  180. constexpr uint64_t kDivisionBy10Mul =
  181. ComputePowerOf100DivisionCoefficient<uint64_t>(1);
  182. static_assert(kDivisionBy10Mul == 103,
  183. "division coefficient for 10 is incorrect");
  184. constexpr uint64_t kDivisionBy10Div = 1 << 10;
  185. // * kDivisionBy100Mul / kDivisionBy100Div is a division by 100 for values from
  186. // 0 to 9999.
  187. constexpr uint64_t kDivisionBy100Mul =
  188. ComputePowerOf100DivisionCoefficient<uint64_t>(2);
  189. static_assert(kDivisionBy100Mul == 10486,
  190. "division coefficient for 100 is incorrect");
  191. constexpr uint64_t kDivisionBy100Div = 1 << 20;
  192. static_assert(ComputePowerOf100DivisionCoefficient<uint64_t>(3) == 1073742,
  193. "division coefficient for 1000 is incorrect");
  194. // Same as `PrepareEightDigits`, but produces 2 digits for integers < 100.
  195. inline uint32_t PrepareTwoDigitsImpl(uint32_t i, bool reversed) {
  196. assert(i < 100);
  197. uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div;
  198. uint32_t mod10 = i - 10u * div10;
  199. return (div10 << (reversed ? 8 : 0)) + (mod10 << (reversed ? 0 : 8));
  200. }
  201. inline uint32_t PrepareTwoDigits(uint32_t i) {
  202. return PrepareTwoDigitsImpl(i, false);
  203. }
  204. // Same as `PrepareEightDigits`, but produces 4 digits for integers < 10000.
  205. inline uint32_t PrepareFourDigitsImpl(uint32_t n, bool reversed) {
  206. // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive
  207. // blocks. 123 -> [\0\1][\0\23]. We divide by 10 both blocks
  208. // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in
  209. // parallel". Then we combine both results to have both ASCII digits,
  210. // strip trailing zeros, add ASCII '0000' and return.
  211. uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div;
  212. uint32_t mod100 = n - 100ull * div100;
  213. uint32_t hundreds =
  214. (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0));
  215. uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
  216. tens &= (0xFull << 16) | 0xFull;
  217. tens = (tens << (reversed ? 8 : 0)) +
  218. static_cast<uint32_t>((hundreds - 10ull * tens) << (reversed ? 0 : 8));
  219. return tens;
  220. }
  221. inline uint32_t PrepareFourDigits(uint32_t n) {
  222. return PrepareFourDigitsImpl(n, false);
  223. }
  224. inline uint32_t PrepareFourDigitsReversed(uint32_t n) {
  225. return PrepareFourDigitsImpl(n, true);
  226. }
  227. // Helper function to produce an ASCII representation of `i`.
  228. //
  229. // Function returns an 8-byte integer which when summed with `kEightZeroBytes`,
  230. // can be treated as a printable buffer with ascii representation of `i`,
  231. // possibly with leading zeros.
  232. //
  233. // Example:
  234. //
  235. // uint64_t buffer = PrepareEightDigits(102030) + kEightZeroBytes;
  236. // char* ascii = reinterpret_cast<char*>(&buffer);
  237. // // Note two leading zeros:
  238. // EXPECT_EQ(absl::string_view(ascii, 8), "00102030");
  239. //
  240. // If `Reversed` is set to true, the result becomes reversed to "03020100".
  241. //
  242. // Pre-condition: `i` must be less than 100000000.
  243. inline uint64_t PrepareEightDigitsImpl(uint32_t i, bool reversed) {
  244. ABSL_ASSUME(i < 10000'0000);
  245. // Prepare 2 blocks of 4 digits "in parallel".
  246. uint32_t hi = i / 10000;
  247. uint32_t lo = i % 10000;
  248. uint64_t merged = (uint64_t{hi} << (reversed ? 32 : 0)) |
  249. (uint64_t{lo} << (reversed ? 0 : 32));
  250. uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) &
  251. ((0x7Full << 32) | 0x7Full);
  252. uint64_t mod100 = merged - 100ull * div100;
  253. uint64_t hundreds =
  254. (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0));
  255. uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
  256. tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull;
  257. tens = (tens << (reversed ? 8 : 0)) +
  258. ((hundreds - 10ull * tens) << (reversed ? 0 : 8));
  259. return tens;
  260. }
  261. inline uint64_t PrepareEightDigits(uint32_t i) {
  262. return PrepareEightDigitsImpl(i, false);
  263. }
  264. inline uint64_t PrepareEightDigitsReversed(uint32_t i) {
  265. return PrepareEightDigitsImpl(i, true);
  266. }
  267. template <typename T, typename BackwardIt>
  268. class FastUIntToStringConverter {
  269. static_assert(
  270. std::is_same<T, decltype(+std::declval<T>())>::value,
  271. "to avoid code bloat, only instantiate this for int and larger types");
  272. static_assert(std::is_unsigned<T>::value,
  273. "this class is only for unsigned types");
  274. public:
  275. // Outputs the given number backward (like with std::copy_backward),
  276. // starting from the end of the string.
  277. // The number of digits in the number must have been already measured and
  278. // passed *exactly*, otherwise the behavior is undefined.
  279. // (This is an optimization, as calculating the number of digits again would
  280. // slow down the hot path.)
  281. // Returns an iterator to the start of the suffix that was appended.
  282. static BackwardIt FastIntToBufferBackward(T v, BackwardIt end) {
  283. // THIS IS A HOT FUNCTION with a very deliberate structure to exploit branch
  284. // prediction and shorten the critical path for smaller numbers.
  285. // Do not move around the if/else blocks or attempt to simplify it
  286. // without benchmarking any changes.
  287. if (v < 10) {
  288. goto AT_LEAST_1 /* NOTE: mandatory for the 0 case */;
  289. }
  290. if (v < 1000) {
  291. goto AT_LEAST_10;
  292. }
  293. if (v < 10000000) {
  294. goto AT_LEAST_1000;
  295. }
  296. if (v >= 100000000 / 10) {
  297. if (v >= 10000000000000000 / 10) {
  298. DoFastIntToBufferBackward<8>(v, end);
  299. }
  300. DoFastIntToBufferBackward<8>(v, end);
  301. }
  302. if (v >= 10000 / 10) {
  303. AT_LEAST_1000:
  304. DoFastIntToBufferBackward<4>(v, end);
  305. }
  306. if (v >= 100 / 10) {
  307. AT_LEAST_10:
  308. DoFastIntToBufferBackward<2>(v, end);
  309. }
  310. if (v >= 10 / 10) {
  311. AT_LEAST_1:
  312. end = DoFastIntToBufferBackward(v, end, std::integral_constant<int, 1>());
  313. }
  314. return end;
  315. }
  316. private:
  317. // Only assume pointers are contiguous for now. String and vector iterators
  318. // could be special-cased as well, but there's no need for them here.
  319. // With C++20 we can probably switch to std::contiguous_iterator_tag.
  320. static constexpr bool kIsContiguousIterator =
  321. std::is_pointer<BackwardIt>::value;
  322. template <int Exponent>
  323. static void DoFastIntToBufferBackward(T& v, BackwardIt& end) {
  324. constexpr T kModulus = Pow<T>(10, Exponent);
  325. T remainder = static_cast<T>(v % kModulus);
  326. v = static_cast<T>(v / kModulus);
  327. end = DoFastIntToBufferBackward(remainder, end,
  328. std::integral_constant<int, Exponent>());
  329. }
  330. static BackwardIt DoFastIntToBufferBackward(const T&, BackwardIt end,
  331. std::integral_constant<int, 0>) {
  332. return end;
  333. }
  334. static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
  335. std::integral_constant<int, 1>) {
  336. *--end = static_cast<char>('0' + v);
  337. return DoFastIntToBufferBackward(v, end, std::integral_constant<int, 0>());
  338. }
  339. static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
  340. std::integral_constant<int, 4>) {
  341. if (kIsContiguousIterator) {
  342. const uint32_t digits =
  343. PrepareFourDigits(static_cast<uint32_t>(v)) + kFourZeroBytes;
  344. end -= sizeof(digits);
  345. little_endian::Store32(&*end, digits);
  346. } else {
  347. uint32_t digits =
  348. PrepareFourDigitsReversed(static_cast<uint32_t>(v)) + kFourZeroBytes;
  349. for (size_t i = 0; i < sizeof(digits); ++i) {
  350. *--end = static_cast<char>(digits);
  351. digits >>= CHAR_BIT;
  352. }
  353. }
  354. return end;
  355. }
  356. static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
  357. std::integral_constant<int, 8>) {
  358. if (kIsContiguousIterator) {
  359. const uint64_t digits =
  360. PrepareEightDigits(static_cast<uint32_t>(v)) + kEightZeroBytes;
  361. end -= sizeof(digits);
  362. little_endian::Store64(&*end, digits);
  363. } else {
  364. uint64_t digits = PrepareEightDigitsReversed(static_cast<uint32_t>(v)) +
  365. kEightZeroBytes;
  366. for (size_t i = 0; i < sizeof(digits); ++i) {
  367. *--end = static_cast<char>(digits);
  368. digits >>= CHAR_BIT;
  369. }
  370. }
  371. return end;
  372. }
  373. template <int Digits>
  374. static BackwardIt DoFastIntToBufferBackward(
  375. T v, BackwardIt end, std::integral_constant<int, Digits>) {
  376. constexpr int kLogModulus = Digits - Digits / 2;
  377. constexpr T kModulus = Pow(static_cast<T>(10), kLogModulus);
  378. bool is_safe_to_use_division_trick = Digits <= 8;
  379. T quotient, remainder;
  380. if (is_safe_to_use_division_trick) {
  381. constexpr uint64_t kCoefficient =
  382. ComputePowerOf100DivisionCoefficient<uint64_t>(kLogModulus);
  383. quotient = (v * kCoefficient) >> (10 * kLogModulus);
  384. remainder = v - quotient * kModulus;
  385. } else {
  386. quotient = v / kModulus;
  387. remainder = v % kModulus;
  388. }
  389. end = DoFastIntToBufferBackward(remainder, end,
  390. std::integral_constant<int, kLogModulus>());
  391. return DoFastIntToBufferBackward(
  392. quotient, end, std::integral_constant<int, Digits - kLogModulus>());
  393. }
  394. };
  395. // Returns an iterator to the start of the suffix that was appended
  396. template <typename T, typename BackwardIt>
  397. std::enable_if_t<std::is_unsigned<T>::value, BackwardIt>
  398. DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) {
  399. using PromotedT = std::decay_t<decltype(+v)>;
  400. using Converter = FastUIntToStringConverter<PromotedT, BackwardIt>;
  401. (void)digits;
  402. return Converter().FastIntToBufferBackward(v, end);
  403. }
  404. template <typename T, typename BackwardIt>
  405. std::enable_if_t<std::is_signed<T>::value, BackwardIt>
  406. DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) {
  407. if (absl::numbers_internal::IsNegative(v)) {
  408. // Store the minus sign *before* we produce the number itself, not after.
  409. // This gets us a tail call.
  410. end[-static_cast<ptrdiff_t>(digits) - 1] = '-';
  411. }
  412. return DoFastIntToBufferBackward(
  413. absl::numbers_internal::UnsignedAbsoluteValue(v), end, digits);
  414. }
  415. template <class T>
  416. std::enable_if_t<std::is_integral<T>::value, int>
  417. GetNumDigitsOrNegativeIfNegativeImpl(T v) {
  418. const auto /* either bool or std::false_type */ is_negative =
  419. absl::numbers_internal::IsNegative(v);
  420. const int digits = static_cast<int>(absl::numbers_internal::Base10Digits(
  421. absl::numbers_internal::UnsignedAbsoluteValue(v)));
  422. return is_negative ? ~digits : digits;
  423. }
  424. } // namespace
  425. void numbers_internal::PutTwoDigits(uint32_t i, absl::Nonnull<char*> buf) {
  426. little_endian::Store16(
  427. buf, static_cast<uint16_t>(PrepareTwoDigits(i) + kTwoZeroBytes));
  428. }
  429. absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  430. uint32_t i, absl::Nonnull<char*> buffer) {
  431. const uint32_t digits = absl::numbers_internal::Base10Digits(i);
  432. buffer += digits;
  433. *buffer = '\0'; // We're going backward, so store this first
  434. FastIntToBufferBackward(i, buffer, digits);
  435. return buffer;
  436. }
  437. absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  438. int32_t i, absl::Nonnull<char*> buffer) {
  439. buffer += static_cast<int>(i < 0);
  440. uint32_t digits = absl::numbers_internal::Base10Digits(
  441. absl::numbers_internal::UnsignedAbsoluteValue(i));
  442. buffer += digits;
  443. *buffer = '\0'; // We're going backward, so store this first
  444. FastIntToBufferBackward(i, buffer, digits);
  445. return buffer;
  446. }
  447. absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  448. uint64_t i, absl::Nonnull<char*> buffer) {
  449. uint32_t digits = absl::numbers_internal::Base10Digits(i);
  450. buffer += digits;
  451. *buffer = '\0'; // We're going backward, so store this first
  452. FastIntToBufferBackward(i, buffer, digits);
  453. return buffer;
  454. }
  455. absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  456. int64_t i, absl::Nonnull<char*> buffer) {
  457. buffer += static_cast<int>(i < 0);
  458. uint32_t digits = absl::numbers_internal::Base10Digits(
  459. absl::numbers_internal::UnsignedAbsoluteValue(i));
  460. buffer += digits;
  461. *buffer = '\0'; // We're going backward, so store this first
  462. FastIntToBufferBackward(i, buffer, digits);
  463. return buffer;
  464. }
  465. absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
  466. uint32_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
  467. return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
  468. }
  469. absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
  470. int32_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
  471. return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
  472. }
  473. absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
  474. uint64_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
  475. return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
  476. }
  477. absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
  478. int64_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
  479. return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
  480. }
  481. int numbers_internal::GetNumDigitsOrNegativeIfNegative(signed char v) {
  482. return GetNumDigitsOrNegativeIfNegativeImpl(v);
  483. }
  484. int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned char v) {
  485. return GetNumDigitsOrNegativeIfNegativeImpl(v);
  486. }
  487. int numbers_internal::GetNumDigitsOrNegativeIfNegative(short v) { // NOLINT
  488. return GetNumDigitsOrNegativeIfNegativeImpl(v);
  489. }
  490. int numbers_internal::GetNumDigitsOrNegativeIfNegative(
  491. unsigned short v) { // NOLINT
  492. return GetNumDigitsOrNegativeIfNegativeImpl(v);
  493. }
  494. int numbers_internal::GetNumDigitsOrNegativeIfNegative(int v) {
  495. return GetNumDigitsOrNegativeIfNegativeImpl(v);
  496. }
  497. int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned int v) {
  498. return GetNumDigitsOrNegativeIfNegativeImpl(v);
  499. }
  500. int numbers_internal::GetNumDigitsOrNegativeIfNegative(long v) { // NOLINT
  501. return GetNumDigitsOrNegativeIfNegativeImpl(v);
  502. }
  503. int numbers_internal::GetNumDigitsOrNegativeIfNegative(
  504. unsigned long v) { // NOLINT
  505. return GetNumDigitsOrNegativeIfNegativeImpl(v);
  506. }
  507. int numbers_internal::GetNumDigitsOrNegativeIfNegative(long long v) { // NOLINT
  508. return GetNumDigitsOrNegativeIfNegativeImpl(v);
  509. }
  510. int numbers_internal::GetNumDigitsOrNegativeIfNegative(
  511. unsigned long long v) { // NOLINT
  512. return GetNumDigitsOrNegativeIfNegativeImpl(v);
  513. }
  514. // Given a 128-bit number expressed as a pair of uint64_t, high half first,
  515. // return that number multiplied by the given 32-bit value. If the result is
  516. // too large to fit in a 128-bit number, divide it by 2 until it fits.
  517. static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
  518. uint32_t mul) {
  519. uint64_t bits0_31 = num.second & 0xFFFFFFFF;
  520. uint64_t bits32_63 = num.second >> 32;
  521. uint64_t bits64_95 = num.first & 0xFFFFFFFF;
  522. uint64_t bits96_127 = num.first >> 32;
  523. // The picture so far: each of these 64-bit values has only the lower 32 bits
  524. // filled in.
  525. // bits96_127: [ 00000000 xxxxxxxx ]
  526. // bits64_95: [ 00000000 xxxxxxxx ]
  527. // bits32_63: [ 00000000 xxxxxxxx ]
  528. // bits0_31: [ 00000000 xxxxxxxx ]
  529. bits0_31 *= mul;
  530. bits32_63 *= mul;
  531. bits64_95 *= mul;
  532. bits96_127 *= mul;
  533. // Now the top halves may also have value, though all 64 of their bits will
  534. // never be set at the same time, since they are a result of a 32x32 bit
  535. // multiply. This makes the carry calculation slightly easier.
  536. // bits96_127: [ mmmmmmmm | mmmmmmmm ]
  537. // bits64_95: [ | mmmmmmmm mmmmmmmm | ]
  538. // bits32_63: | [ mmmmmmmm | mmmmmmmm ]
  539. // bits0_31: | [ | mmmmmmmm mmmmmmmm ]
  540. // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ]
  541. uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
  542. uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
  543. (bits0_63 < bits0_31);
  544. uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
  545. if (bits128_up == 0) return {bits64_127, bits0_63};
  546. auto shift = static_cast<unsigned>(bit_width(bits128_up));
  547. uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
  548. uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
  549. return {hi, lo};
  550. }
  551. // Compute num * 5 ^ expfive, and return the first 128 bits of the result,
  552. // where the first bit is always a one. So PowFive(1, 0) starts 0b100000,
  553. // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
  554. static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
  555. std::pair<uint64_t, uint64_t> result = {num, 0};
  556. while (expfive >= 13) {
  557. // 5^13 is the highest power of five that will fit in a 32-bit integer.
  558. result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
  559. expfive -= 13;
  560. }
  561. constexpr uint32_t powers_of_five[13] = {
  562. 1,
  563. 5,
  564. 5 * 5,
  565. 5 * 5 * 5,
  566. 5 * 5 * 5 * 5,
  567. 5 * 5 * 5 * 5 * 5,
  568. 5 * 5 * 5 * 5 * 5 * 5,
  569. 5 * 5 * 5 * 5 * 5 * 5 * 5,
  570. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  571. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  572. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  573. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  574. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
  575. result = Mul32(result, powers_of_five[expfive & 15]);
  576. int shift = countl_zero(result.first);
  577. if (shift != 0) {
  578. result.first = (result.first << shift) + (result.second >> (64 - shift));
  579. result.second = (result.second << shift);
  580. }
  581. return result;
  582. }
  583. struct ExpDigits {
  584. int32_t exponent;
  585. char digits[6];
  586. };
  587. // SplitToSix converts value, a positive double-precision floating-point number,
  588. // into a base-10 exponent and 6 ASCII digits, where the first digit is never
  589. // zero. For example, SplitToSix(1) returns an exponent of zero and a digits
  590. // array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between
  591. // two possible representations, e.g. value = 100000.5, then "round to even" is
  592. // performed.
  593. static ExpDigits SplitToSix(const double value) {
  594. ExpDigits exp_dig;
  595. int exp = 5;
  596. double d = value;
  597. // First step: calculate a close approximation of the output, where the
  598. // value d will be between 100,000 and 999,999, representing the digits
  599. // in the output ASCII array, and exp is the base-10 exponent. It would be
  600. // faster to use a table here, and to look up the base-2 exponent of value,
  601. // however value is an IEEE-754 64-bit number, so the table would have 2,000
  602. // entries, which is not cache-friendly.
  603. if (d >= 999999.5) {
  604. if (d >= 1e+261) exp += 256, d *= 1e-256;
  605. if (d >= 1e+133) exp += 128, d *= 1e-128;
  606. if (d >= 1e+69) exp += 64, d *= 1e-64;
  607. if (d >= 1e+37) exp += 32, d *= 1e-32;
  608. if (d >= 1e+21) exp += 16, d *= 1e-16;
  609. if (d >= 1e+13) exp += 8, d *= 1e-8;
  610. if (d >= 1e+9) exp += 4, d *= 1e-4;
  611. if (d >= 1e+7) exp += 2, d *= 1e-2;
  612. if (d >= 1e+6) exp += 1, d *= 1e-1;
  613. } else {
  614. if (d < 1e-250) exp -= 256, d *= 1e256;
  615. if (d < 1e-122) exp -= 128, d *= 1e128;
  616. if (d < 1e-58) exp -= 64, d *= 1e64;
  617. if (d < 1e-26) exp -= 32, d *= 1e32;
  618. if (d < 1e-10) exp -= 16, d *= 1e16;
  619. if (d < 1e-2) exp -= 8, d *= 1e8;
  620. if (d < 1e+2) exp -= 4, d *= 1e4;
  621. if (d < 1e+4) exp -= 2, d *= 1e2;
  622. if (d < 1e+5) exp -= 1, d *= 1e1;
  623. }
  624. // At this point, d is in the range [99999.5..999999.5) and exp is in the
  625. // range [-324..308]. Since we need to round d up, we want to add a half
  626. // and truncate.
  627. // However, the technique above may have lost some precision, due to its
  628. // repeated multiplication by constants that each may be off by half a bit
  629. // of precision. This only matters if we're close to the edge though.
  630. // Since we'd like to know if the fractional part of d is close to a half,
  631. // we multiply it by 65536 and see if the fractional part is close to 32768.
  632. // (The number doesn't have to be a power of two,but powers of two are faster)
  633. uint64_t d64k = d * 65536;
  634. uint32_t dddddd; // A 6-digit decimal integer.
  635. if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
  636. // OK, it's fairly likely that precision was lost above, which is
  637. // not a surprise given only 52 mantissa bits are available. Therefore
  638. // redo the calculation using 128-bit numbers. (64 bits are not enough).
  639. // Start out with digits rounded down; maybe add one below.
  640. dddddd = static_cast<uint32_t>(d64k / 65536);
  641. // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual
  642. // value we're representing, of course, is M.mmm... * 2^exp2.
  643. int exp2;
  644. double m = std::frexp(value, &exp2);
  645. uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
  646. // std::frexp returns an m value in the range [0.5, 1.0), however we
  647. // can't multiply it by 2^64 and convert to an integer because some FPUs
  648. // throw an exception when converting an number higher than 2^63 into an
  649. // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter
  650. // since m only has 52 significant bits anyway.
  651. mantissa <<= 1;
  652. exp2 -= 64; // not needed, but nice for debugging
  653. // OK, we are here to compare:
  654. // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2
  655. // so we can round up dddddd if appropriate. Those values span the full
  656. // range of 600 orders of magnitude of IEE 64-bit floating-point.
  657. // Fortunately, we already know they are very close, so we don't need to
  658. // track the base-2 exponent of both sides. This greatly simplifies the
  659. // the math since the 2^exp2 calculation is unnecessary and the power-of-10
  660. // calculation can become a power-of-5 instead.
  661. std::pair<uint64_t, uint64_t> edge, val;
  662. if (exp >= 6) {
  663. // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
  664. // Since we're tossing powers of two, 2 * dddddd + 1 is the
  665. // same as dddddd + 0.5
  666. edge = PowFive(2 * dddddd + 1, exp - 5);
  667. val.first = mantissa;
  668. val.second = 0;
  669. } else {
  670. // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
  671. // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to
  672. // mantissa * 5 ^ (5 - exp)
  673. edge = PowFive(2 * dddddd + 1, 0);
  674. val = PowFive(mantissa, 5 - exp);
  675. }
  676. // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
  677. // val.second, edge.first, edge.second);
  678. if (val > edge) {
  679. dddddd++;
  680. } else if (val == edge) {
  681. dddddd += (dddddd & 1);
  682. }
  683. } else {
  684. // Here, we are not close to the edge.
  685. dddddd = static_cast<uint32_t>((d64k + 32768) / 65536);
  686. }
  687. if (dddddd == 1000000) {
  688. dddddd = 100000;
  689. exp += 1;
  690. }
  691. exp_dig.exponent = exp;
  692. uint32_t two_digits = dddddd / 10000;
  693. dddddd -= two_digits * 10000;
  694. numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]);
  695. two_digits = dddddd / 100;
  696. dddddd -= two_digits * 100;
  697. numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]);
  698. numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]);
  699. return exp_dig;
  700. }
  701. // Helper function for fast formatting of floating-point.
  702. // The result is the same as "%g", a.k.a. "%.6g".
  703. size_t numbers_internal::SixDigitsToBuffer(double d,
  704. absl::Nonnull<char*> const buffer) {
  705. static_assert(std::numeric_limits<float>::is_iec559,
  706. "IEEE-754/IEC-559 support only");
  707. char* out = buffer; // we write data to out, incrementing as we go, but
  708. // FloatToBuffer always returns the address of the buffer
  709. // passed in.
  710. if (std::isnan(d)) {
  711. strcpy(out, "nan"); // NOLINT(runtime/printf)
  712. return 3;
  713. }
  714. if (d == 0) { // +0 and -0 are handled here
  715. if (std::signbit(d)) *out++ = '-';
  716. *out++ = '0';
  717. *out = 0;
  718. return static_cast<size_t>(out - buffer);
  719. }
  720. if (d < 0) {
  721. *out++ = '-';
  722. d = -d;
  723. }
  724. if (d > std::numeric_limits<double>::max()) {
  725. strcpy(out, "inf"); // NOLINT(runtime/printf)
  726. return static_cast<size_t>(out + 3 - buffer);
  727. }
  728. auto exp_dig = SplitToSix(d);
  729. int exp = exp_dig.exponent;
  730. const char* digits = exp_dig.digits;
  731. out[0] = '0';
  732. out[1] = '.';
  733. switch (exp) {
  734. case 5:
  735. memcpy(out, &digits[0], 6), out += 6;
  736. *out = 0;
  737. return static_cast<size_t>(out - buffer);
  738. case 4:
  739. memcpy(out, &digits[0], 5), out += 5;
  740. if (digits[5] != '0') {
  741. *out++ = '.';
  742. *out++ = digits[5];
  743. }
  744. *out = 0;
  745. return static_cast<size_t>(out - buffer);
  746. case 3:
  747. memcpy(out, &digits[0], 4), out += 4;
  748. if ((digits[5] | digits[4]) != '0') {
  749. *out++ = '.';
  750. *out++ = digits[4];
  751. if (digits[5] != '0') *out++ = digits[5];
  752. }
  753. *out = 0;
  754. return static_cast<size_t>(out - buffer);
  755. case 2:
  756. memcpy(out, &digits[0], 3), out += 3;
  757. *out++ = '.';
  758. memcpy(out, &digits[3], 3);
  759. out += 3;
  760. while (out[-1] == '0') --out;
  761. if (out[-1] == '.') --out;
  762. *out = 0;
  763. return static_cast<size_t>(out - buffer);
  764. case 1:
  765. memcpy(out, &digits[0], 2), out += 2;
  766. *out++ = '.';
  767. memcpy(out, &digits[2], 4);
  768. out += 4;
  769. while (out[-1] == '0') --out;
  770. if (out[-1] == '.') --out;
  771. *out = 0;
  772. return static_cast<size_t>(out - buffer);
  773. case 0:
  774. memcpy(out, &digits[0], 1), out += 1;
  775. *out++ = '.';
  776. memcpy(out, &digits[1], 5);
  777. out += 5;
  778. while (out[-1] == '0') --out;
  779. if (out[-1] == '.') --out;
  780. *out = 0;
  781. return static_cast<size_t>(out - buffer);
  782. case -4:
  783. out[2] = '0';
  784. ++out;
  785. ABSL_FALLTHROUGH_INTENDED;
  786. case -3:
  787. out[2] = '0';
  788. ++out;
  789. ABSL_FALLTHROUGH_INTENDED;
  790. case -2:
  791. out[2] = '0';
  792. ++out;
  793. ABSL_FALLTHROUGH_INTENDED;
  794. case -1:
  795. out += 2;
  796. memcpy(out, &digits[0], 6);
  797. out += 6;
  798. while (out[-1] == '0') --out;
  799. *out = 0;
  800. return static_cast<size_t>(out - buffer);
  801. }
  802. assert(exp < -4 || exp >= 6);
  803. out[0] = digits[0];
  804. assert(out[1] == '.');
  805. out += 2;
  806. memcpy(out, &digits[1], 5), out += 5;
  807. while (out[-1] == '0') --out;
  808. if (out[-1] == '.') --out;
  809. *out++ = 'e';
  810. if (exp > 0) {
  811. *out++ = '+';
  812. } else {
  813. *out++ = '-';
  814. exp = -exp;
  815. }
  816. if (exp > 99) {
  817. int dig1 = exp / 100;
  818. exp -= dig1 * 100;
  819. *out++ = '0' + static_cast<char>(dig1);
  820. }
  821. PutTwoDigits(static_cast<uint32_t>(exp), out);
  822. out += 2;
  823. *out = 0;
  824. return static_cast<size_t>(out - buffer);
  825. }
  826. namespace {
  827. // Represents integer values of digits.
  828. // Uses 36 to indicate an invalid character since we support
  829. // bases up to 36.
  830. static const int8_t kAsciiToInt[256] = {
  831. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
  832. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  833. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5,
  834. 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
  835. 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
  836. 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
  837. 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
  838. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  839. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  840. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  841. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  842. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  843. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  844. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
  845. // Parse the sign and optional hex or oct prefix in text.
  846. inline bool safe_parse_sign_and_base(
  847. absl::Nonnull<absl::string_view*> text /*inout*/,
  848. absl::Nonnull<int*> base_ptr /*inout*/,
  849. absl::Nonnull<bool*> negative_ptr /*output*/) {
  850. if (text->data() == nullptr) {
  851. return false;
  852. }
  853. const char* start = text->data();
  854. const char* end = start + text->size();
  855. int base = *base_ptr;
  856. // Consume whitespace.
  857. while (start < end &&
  858. absl::ascii_isspace(static_cast<unsigned char>(start[0]))) {
  859. ++start;
  860. }
  861. while (start < end &&
  862. absl::ascii_isspace(static_cast<unsigned char>(end[-1]))) {
  863. --end;
  864. }
  865. if (start >= end) {
  866. return false;
  867. }
  868. // Consume sign.
  869. *negative_ptr = (start[0] == '-');
  870. if (*negative_ptr || start[0] == '+') {
  871. ++start;
  872. if (start >= end) {
  873. return false;
  874. }
  875. }
  876. // Consume base-dependent prefix.
  877. // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
  878. // base 16: "0x" -> base 16
  879. // Also validate the base.
  880. if (base == 0) {
  881. if (end - start >= 2 && start[0] == '0' &&
  882. (start[1] == 'x' || start[1] == 'X')) {
  883. base = 16;
  884. start += 2;
  885. if (start >= end) {
  886. // "0x" with no digits after is invalid.
  887. return false;
  888. }
  889. } else if (end - start >= 1 && start[0] == '0') {
  890. base = 8;
  891. start += 1;
  892. } else {
  893. base = 10;
  894. }
  895. } else if (base == 16) {
  896. if (end - start >= 2 && start[0] == '0' &&
  897. (start[1] == 'x' || start[1] == 'X')) {
  898. start += 2;
  899. if (start >= end) {
  900. // "0x" with no digits after is invalid.
  901. return false;
  902. }
  903. }
  904. } else if (base >= 2 && base <= 36) {
  905. // okay
  906. } else {
  907. return false;
  908. }
  909. *text = absl::string_view(start, static_cast<size_t>(end - start));
  910. *base_ptr = base;
  911. return true;
  912. }
  913. // Consume digits.
  914. //
  915. // The classic loop:
  916. //
  917. // for each digit
  918. // value = value * base + digit
  919. // value *= sign
  920. //
  921. // The classic loop needs overflow checking. It also fails on the most
  922. // negative integer, -2147483648 in 32-bit two's complement representation.
  923. //
  924. // My improved loop:
  925. //
  926. // if (!negative)
  927. // for each digit
  928. // value = value * base
  929. // value = value + digit
  930. // else
  931. // for each digit
  932. // value = value * base
  933. // value = value - digit
  934. //
  935. // Overflow checking becomes simple.
  936. // Lookup tables per IntType:
  937. // vmax/base and vmin/base are precomputed because division costs at least 8ns.
  938. // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
  939. // struct of arrays) would probably be better in terms of d-cache for the most
  940. // commonly used bases.
  941. template <typename IntType>
  942. struct LookupTables {
  943. ABSL_CONST_INIT static const IntType kVmaxOverBase[];
  944. ABSL_CONST_INIT static const IntType kVminOverBase[];
  945. };
  946. // An array initializer macro for X/base where base in [0, 36].
  947. // However, note that lookups for base in [0, 1] should never happen because
  948. // base has been validated to be in [2, 36] by safe_parse_sign_and_base().
  949. #define X_OVER_BASE_INITIALIZER(X) \
  950. { \
  951. 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
  952. X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \
  953. X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \
  954. X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \
  955. X / 35, X / 36, \
  956. }
  957. // This kVmaxOverBase is generated with
  958. // for (int base = 2; base < 37; ++base) {
  959. // absl::uint128 max = std::numeric_limits<absl::uint128>::max();
  960. // auto result = max / base;
  961. // std::cout << " MakeUint128(" << absl::Uint128High64(result) << "u, "
  962. // << absl::Uint128Low64(result) << "u),\n";
  963. // }
  964. // See https://godbolt.org/z/aneYsb
  965. //
  966. // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting
  967. // array to avoid a static initializer.
  968. template <>
  969. ABSL_CONST_INIT const uint128 LookupTables<uint128>::kVmaxOverBase[] = {
  970. 0,
  971. 0,
  972. MakeUint128(9223372036854775807u, 18446744073709551615u),
  973. MakeUint128(6148914691236517205u, 6148914691236517205u),
  974. MakeUint128(4611686018427387903u, 18446744073709551615u),
  975. MakeUint128(3689348814741910323u, 3689348814741910323u),
  976. MakeUint128(3074457345618258602u, 12297829382473034410u),
  977. MakeUint128(2635249153387078802u, 5270498306774157604u),
  978. MakeUint128(2305843009213693951u, 18446744073709551615u),
  979. MakeUint128(2049638230412172401u, 14347467612885206812u),
  980. MakeUint128(1844674407370955161u, 11068046444225730969u),
  981. MakeUint128(1676976733973595601u, 8384883669867978007u),
  982. MakeUint128(1537228672809129301u, 6148914691236517205u),
  983. MakeUint128(1418980313362273201u, 4256940940086819603u),
  984. MakeUint128(1317624576693539401u, 2635249153387078802u),
  985. MakeUint128(1229782938247303441u, 1229782938247303441u),
  986. MakeUint128(1152921504606846975u, 18446744073709551615u),
  987. MakeUint128(1085102592571150095u, 1085102592571150095u),
  988. MakeUint128(1024819115206086200u, 16397105843297379214u),
  989. MakeUint128(970881267037344821u, 16504981539634861972u),
  990. MakeUint128(922337203685477580u, 14757395258967641292u),
  991. MakeUint128(878416384462359600u, 14054662151397753612u),
  992. MakeUint128(838488366986797800u, 13415813871788764811u),
  993. MakeUint128(802032351030850070u, 4812194106185100421u),
  994. MakeUint128(768614336404564650u, 12297829382473034410u),
  995. MakeUint128(737869762948382064u, 11805916207174113034u),
  996. MakeUint128(709490156681136600u, 11351842506898185609u),
  997. MakeUint128(683212743470724133u, 17080318586768103348u),
  998. MakeUint128(658812288346769700u, 10540996613548315209u),
  999. MakeUint128(636094623231363848u, 15266270957552732371u),
  1000. MakeUint128(614891469123651720u, 9838263505978427528u),
  1001. MakeUint128(595056260442243600u, 9520900167075897608u),
  1002. MakeUint128(576460752303423487u, 18446744073709551615u),
  1003. MakeUint128(558992244657865200u, 8943875914525843207u),
  1004. MakeUint128(542551296285575047u, 9765923333140350855u),
  1005. MakeUint128(527049830677415760u, 8432797290838652167u),
  1006. MakeUint128(512409557603043100u, 8198552921648689607u),
  1007. };
  1008. // This kVmaxOverBase generated with
  1009. // for (int base = 2; base < 37; ++base) {
  1010. // absl::int128 max = std::numeric_limits<absl::int128>::max();
  1011. // auto result = max / base;
  1012. // std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", "
  1013. // << absl::Int128Low64(result) << "u),\n";
  1014. // }
  1015. // See https://godbolt.org/z/7djYWz
  1016. //
  1017. // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
  1018. // to avoid a static initializer.
  1019. template <>
  1020. ABSL_CONST_INIT const int128 LookupTables<int128>::kVmaxOverBase[] = {
  1021. 0,
  1022. 0,
  1023. MakeInt128(4611686018427387903, 18446744073709551615u),
  1024. MakeInt128(3074457345618258602, 12297829382473034410u),
  1025. MakeInt128(2305843009213693951, 18446744073709551615u),
  1026. MakeInt128(1844674407370955161, 11068046444225730969u),
  1027. MakeInt128(1537228672809129301, 6148914691236517205u),
  1028. MakeInt128(1317624576693539401, 2635249153387078802u),
  1029. MakeInt128(1152921504606846975, 18446744073709551615u),
  1030. MakeInt128(1024819115206086200, 16397105843297379214u),
  1031. MakeInt128(922337203685477580, 14757395258967641292u),
  1032. MakeInt128(838488366986797800, 13415813871788764811u),
  1033. MakeInt128(768614336404564650, 12297829382473034410u),
  1034. MakeInt128(709490156681136600, 11351842506898185609u),
  1035. MakeInt128(658812288346769700, 10540996613548315209u),
  1036. MakeInt128(614891469123651720, 9838263505978427528u),
  1037. MakeInt128(576460752303423487, 18446744073709551615u),
  1038. MakeInt128(542551296285575047, 9765923333140350855u),
  1039. MakeInt128(512409557603043100, 8198552921648689607u),
  1040. MakeInt128(485440633518672410, 17475862806672206794u),
  1041. MakeInt128(461168601842738790, 7378697629483820646u),
  1042. MakeInt128(439208192231179800, 7027331075698876806u),
  1043. MakeInt128(419244183493398900, 6707906935894382405u),
  1044. MakeInt128(401016175515425035, 2406097053092550210u),
  1045. MakeInt128(384307168202282325, 6148914691236517205u),
  1046. MakeInt128(368934881474191032, 5902958103587056517u),
  1047. MakeInt128(354745078340568300, 5675921253449092804u),
  1048. MakeInt128(341606371735362066, 17763531330238827482u),
  1049. MakeInt128(329406144173384850, 5270498306774157604u),
  1050. MakeInt128(318047311615681924, 7633135478776366185u),
  1051. MakeInt128(307445734561825860, 4919131752989213764u),
  1052. MakeInt128(297528130221121800, 4760450083537948804u),
  1053. MakeInt128(288230376151711743, 18446744073709551615u),
  1054. MakeInt128(279496122328932600, 4471937957262921603u),
  1055. MakeInt128(271275648142787523, 14106333703424951235u),
  1056. MakeInt128(263524915338707880, 4216398645419326083u),
  1057. MakeInt128(256204778801521550, 4099276460824344803u),
  1058. };
  1059. // This kVminOverBase generated with
  1060. // for (int base = 2; base < 37; ++base) {
  1061. // absl::int128 min = std::numeric_limits<absl::int128>::min();
  1062. // auto result = min / base;
  1063. // std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", "
  1064. // << absl::Int128Low64(result) << "u),\n";
  1065. // }
  1066. //
  1067. // See https://godbolt.org/z/7djYWz
  1068. //
  1069. // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
  1070. // to avoid a static initializer.
  1071. template <>
  1072. ABSL_CONST_INIT const int128 LookupTables<int128>::kVminOverBase[] = {
  1073. 0,
  1074. 0,
  1075. MakeInt128(-4611686018427387904, 0u),
  1076. MakeInt128(-3074457345618258603, 6148914691236517206u),
  1077. MakeInt128(-2305843009213693952, 0u),
  1078. MakeInt128(-1844674407370955162, 7378697629483820647u),
  1079. MakeInt128(-1537228672809129302, 12297829382473034411u),
  1080. MakeInt128(-1317624576693539402, 15811494920322472814u),
  1081. MakeInt128(-1152921504606846976, 0u),
  1082. MakeInt128(-1024819115206086201, 2049638230412172402u),
  1083. MakeInt128(-922337203685477581, 3689348814741910324u),
  1084. MakeInt128(-838488366986797801, 5030930201920786805u),
  1085. MakeInt128(-768614336404564651, 6148914691236517206u),
  1086. MakeInt128(-709490156681136601, 7094901566811366007u),
  1087. MakeInt128(-658812288346769701, 7905747460161236407u),
  1088. MakeInt128(-614891469123651721, 8608480567731124088u),
  1089. MakeInt128(-576460752303423488, 0u),
  1090. MakeInt128(-542551296285575048, 8680820740569200761u),
  1091. MakeInt128(-512409557603043101, 10248191152060862009u),
  1092. MakeInt128(-485440633518672411, 970881267037344822u),
  1093. MakeInt128(-461168601842738791, 11068046444225730970u),
  1094. MakeInt128(-439208192231179801, 11419412998010674810u),
  1095. MakeInt128(-419244183493398901, 11738837137815169211u),
  1096. MakeInt128(-401016175515425036, 16040647020617001406u),
  1097. MakeInt128(-384307168202282326, 12297829382473034411u),
  1098. MakeInt128(-368934881474191033, 12543785970122495099u),
  1099. MakeInt128(-354745078340568301, 12770822820260458812u),
  1100. MakeInt128(-341606371735362067, 683212743470724134u),
  1101. MakeInt128(-329406144173384851, 13176245766935394012u),
  1102. MakeInt128(-318047311615681925, 10813608594933185431u),
  1103. MakeInt128(-307445734561825861, 13527612320720337852u),
  1104. MakeInt128(-297528130221121801, 13686293990171602812u),
  1105. MakeInt128(-288230376151711744, 0u),
  1106. MakeInt128(-279496122328932601, 13974806116446630013u),
  1107. MakeInt128(-271275648142787524, 4340410370284600381u),
  1108. MakeInt128(-263524915338707881, 14230345428290225533u),
  1109. MakeInt128(-256204778801521551, 14347467612885206813u),
  1110. };
  1111. template <typename IntType>
  1112. ABSL_CONST_INIT const IntType LookupTables<IntType>::kVmaxOverBase[] =
  1113. X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
  1114. template <typename IntType>
  1115. ABSL_CONST_INIT const IntType LookupTables<IntType>::kVminOverBase[] =
  1116. X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
  1117. #undef X_OVER_BASE_INITIALIZER
  1118. template <typename IntType>
  1119. inline bool safe_parse_positive_int(absl::string_view text, int base,
  1120. absl::Nonnull<IntType*> value_p) {
  1121. IntType value = 0;
  1122. const IntType vmax = std::numeric_limits<IntType>::max();
  1123. assert(vmax > 0);
  1124. assert(base >= 0);
  1125. const IntType base_inttype = static_cast<IntType>(base);
  1126. assert(vmax >= base_inttype);
  1127. const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
  1128. assert(base < 2 ||
  1129. std::numeric_limits<IntType>::max() / base_inttype == vmax_over_base);
  1130. const char* start = text.data();
  1131. const char* end = start + text.size();
  1132. // loop over digits
  1133. for (; start < end; ++start) {
  1134. unsigned char c = static_cast<unsigned char>(start[0]);
  1135. IntType digit = static_cast<IntType>(kAsciiToInt[c]);
  1136. if (digit >= base_inttype) {
  1137. *value_p = value;
  1138. return false;
  1139. }
  1140. if (value > vmax_over_base) {
  1141. *value_p = vmax;
  1142. return false;
  1143. }
  1144. value *= base_inttype;
  1145. if (value > vmax - digit) {
  1146. *value_p = vmax;
  1147. return false;
  1148. }
  1149. value += digit;
  1150. }
  1151. *value_p = value;
  1152. return true;
  1153. }
  1154. template <typename IntType>
  1155. inline bool safe_parse_negative_int(absl::string_view text, int base,
  1156. absl::Nonnull<IntType*> value_p) {
  1157. IntType value = 0;
  1158. const IntType vmin = std::numeric_limits<IntType>::min();
  1159. assert(vmin < 0);
  1160. assert(vmin <= 0 - base);
  1161. IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
  1162. assert(base < 2 ||
  1163. std::numeric_limits<IntType>::min() / base == vmin_over_base);
  1164. // 2003 c++ standard [expr.mul]
  1165. // "... the sign of the remainder is implementation-defined."
  1166. // Although (vmin/base)*base + vmin%base is always vmin.
  1167. // 2011 c++ standard tightens the spec but we cannot rely on it.
  1168. // TODO(junyer): Handle this in the lookup table generation.
  1169. if (vmin % base > 0) {
  1170. vmin_over_base += 1;
  1171. }
  1172. const char* start = text.data();
  1173. const char* end = start + text.size();
  1174. // loop over digits
  1175. for (; start < end; ++start) {
  1176. unsigned char c = static_cast<unsigned char>(start[0]);
  1177. int digit = kAsciiToInt[c];
  1178. if (digit >= base) {
  1179. *value_p = value;
  1180. return false;
  1181. }
  1182. if (value < vmin_over_base) {
  1183. *value_p = vmin;
  1184. return false;
  1185. }
  1186. value *= base;
  1187. if (value < vmin + digit) {
  1188. *value_p = vmin;
  1189. return false;
  1190. }
  1191. value -= digit;
  1192. }
  1193. *value_p = value;
  1194. return true;
  1195. }
  1196. // Input format based on POSIX.1-2008 strtol
  1197. // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
  1198. template <typename IntType>
  1199. inline bool safe_int_internal(absl::string_view text,
  1200. absl::Nonnull<IntType*> value_p, int base) {
  1201. *value_p = 0;
  1202. bool negative;
  1203. if (!safe_parse_sign_and_base(&text, &base, &negative)) {
  1204. return false;
  1205. }
  1206. if (!negative) {
  1207. return safe_parse_positive_int(text, base, value_p);
  1208. } else {
  1209. return safe_parse_negative_int(text, base, value_p);
  1210. }
  1211. }
  1212. template <typename IntType>
  1213. inline bool safe_uint_internal(absl::string_view text,
  1214. absl::Nonnull<IntType*> value_p, int base) {
  1215. *value_p = 0;
  1216. bool negative;
  1217. if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
  1218. return false;
  1219. }
  1220. return safe_parse_positive_int(text, base, value_p);
  1221. }
  1222. } // anonymous namespace
  1223. namespace numbers_internal {
  1224. // Digit conversion.
  1225. ABSL_CONST_INIT ABSL_DLL const char kHexChar[] =
  1226. "0123456789abcdef";
  1227. ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] =
  1228. "000102030405060708090a0b0c0d0e0f"
  1229. "101112131415161718191a1b1c1d1e1f"
  1230. "202122232425262728292a2b2c2d2e2f"
  1231. "303132333435363738393a3b3c3d3e3f"
  1232. "404142434445464748494a4b4c4d4e4f"
  1233. "505152535455565758595a5b5c5d5e5f"
  1234. "606162636465666768696a6b6c6d6e6f"
  1235. "707172737475767778797a7b7c7d7e7f"
  1236. "808182838485868788898a8b8c8d8e8f"
  1237. "909192939495969798999a9b9c9d9e9f"
  1238. "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
  1239. "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
  1240. "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
  1241. "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
  1242. "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
  1243. "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
  1244. bool safe_strto32_base(absl::string_view text, absl::Nonnull<int32_t*> value,
  1245. int base) {
  1246. return safe_int_internal<int32_t>(text, value, base);
  1247. }
  1248. bool safe_strto64_base(absl::string_view text, absl::Nonnull<int64_t*> value,
  1249. int base) {
  1250. return safe_int_internal<int64_t>(text, value, base);
  1251. }
  1252. bool safe_strto128_base(absl::string_view text, absl::Nonnull<int128*> value,
  1253. int base) {
  1254. return safe_int_internal<absl::int128>(text, value, base);
  1255. }
  1256. bool safe_strtou32_base(absl::string_view text, absl::Nonnull<uint32_t*> value,
  1257. int base) {
  1258. return safe_uint_internal<uint32_t>(text, value, base);
  1259. }
  1260. bool safe_strtou64_base(absl::string_view text, absl::Nonnull<uint64_t*> value,
  1261. int base) {
  1262. return safe_uint_internal<uint64_t>(text, value, base);
  1263. }
  1264. bool safe_strtou128_base(absl::string_view text, absl::Nonnull<uint128*> value,
  1265. int base) {
  1266. return safe_uint_internal<absl::uint128>(text, value, base);
  1267. }
  1268. } // namespace numbers_internal
  1269. ABSL_NAMESPACE_END
  1270. } // namespace absl