numbers.cc 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146
  1. // Copyright 2017 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // This file contains string processing functions related to
  15. // numeric values.
  16. #include "absl/strings/numbers.h"
  17. #include <algorithm>
  18. #include <array>
  19. #include <cassert>
  20. #include <cfloat> // for DBL_DIG and FLT_DIG
  21. #include <cmath> // for HUGE_VAL
  22. #include <cstdint>
  23. #include <cstdio>
  24. #include <cstdlib>
  25. #include <cstring>
  26. #include <iterator>
  27. #include <limits>
  28. #include <system_error> // NOLINT(build/c++11)
  29. #include <utility>
  30. #include "absl/base/attributes.h"
  31. #include "absl/base/config.h"
  32. #include "absl/base/internal/endian.h"
  33. #include "absl/base/internal/raw_logging.h"
  34. #include "absl/base/nullability.h"
  35. #include "absl/base/optimization.h"
  36. #include "absl/numeric/bits.h"
  37. #include "absl/numeric/int128.h"
  38. #include "absl/strings/ascii.h"
  39. #include "absl/strings/charconv.h"
  40. #include "absl/strings/match.h"
  41. #include "absl/strings/string_view.h"
  42. namespace absl {
  43. ABSL_NAMESPACE_BEGIN
  44. bool SimpleAtof(absl::string_view str, absl::Nonnull<float*> out) {
  45. *out = 0.0;
  46. str = StripAsciiWhitespace(str);
  47. // std::from_chars doesn't accept an initial +, but SimpleAtof does, so if one
  48. // is present, skip it, while avoiding accepting "+-0" as valid.
  49. if (!str.empty() && str[0] == '+') {
  50. str.remove_prefix(1);
  51. if (!str.empty() && str[0] == '-') {
  52. return false;
  53. }
  54. }
  55. auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
  56. if (result.ec == std::errc::invalid_argument) {
  57. return false;
  58. }
  59. if (result.ptr != str.data() + str.size()) {
  60. // not all non-whitespace characters consumed
  61. return false;
  62. }
  63. // from_chars() with DR 3081's current wording will return max() on
  64. // overflow. SimpleAtof returns infinity instead.
  65. if (result.ec == std::errc::result_out_of_range) {
  66. if (*out > 1.0) {
  67. *out = std::numeric_limits<float>::infinity();
  68. } else if (*out < -1.0) {
  69. *out = -std::numeric_limits<float>::infinity();
  70. }
  71. }
  72. return true;
  73. }
  74. bool SimpleAtod(absl::string_view str, absl::Nonnull<double*> out) {
  75. *out = 0.0;
  76. str = StripAsciiWhitespace(str);
  77. // std::from_chars doesn't accept an initial +, but SimpleAtod does, so if one
  78. // is present, skip it, while avoiding accepting "+-0" as valid.
  79. if (!str.empty() && str[0] == '+') {
  80. str.remove_prefix(1);
  81. if (!str.empty() && str[0] == '-') {
  82. return false;
  83. }
  84. }
  85. auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
  86. if (result.ec == std::errc::invalid_argument) {
  87. return false;
  88. }
  89. if (result.ptr != str.data() + str.size()) {
  90. // not all non-whitespace characters consumed
  91. return false;
  92. }
  93. // from_chars() with DR 3081's current wording will return max() on
  94. // overflow. SimpleAtod returns infinity instead.
  95. if (result.ec == std::errc::result_out_of_range) {
  96. if (*out > 1.0) {
  97. *out = std::numeric_limits<double>::infinity();
  98. } else if (*out < -1.0) {
  99. *out = -std::numeric_limits<double>::infinity();
  100. }
  101. }
  102. return true;
  103. }
  104. bool SimpleAtob(absl::string_view str, absl::Nonnull<bool*> out) {
  105. ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
  106. if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
  107. EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
  108. EqualsIgnoreCase(str, "1")) {
  109. *out = true;
  110. return true;
  111. }
  112. if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
  113. EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
  114. EqualsIgnoreCase(str, "0")) {
  115. *out = false;
  116. return true;
  117. }
  118. return false;
  119. }
  120. // ----------------------------------------------------------------------
  121. // FastIntToBuffer() overloads
  122. //
  123. // Like the Fast*ToBuffer() functions above, these are intended for speed.
  124. // Unlike the Fast*ToBuffer() functions, however, these functions write
  125. // their output to the beginning of the buffer. The caller is responsible
  126. // for ensuring that the buffer has enough space to hold the output.
  127. //
  128. // Returns a pointer to the end of the string (i.e. the null character
  129. // terminating the string).
  130. // ----------------------------------------------------------------------
  131. namespace {
  132. // Various routines to encode integers to strings.
  133. // We split data encodings into a group of 2 digits, 4 digits, 8 digits as
  134. // it's easier to combine powers of two into scalar arithmetic.
  135. // Previous implementation used a lookup table of 200 bytes for every 2 bytes
  136. // and it was memory bound, any L1 cache miss would result in a much slower
  137. // result. When benchmarking with a cache eviction rate of several percent,
  138. // this implementation proved to be better.
  139. // These constants represent '00', '0000' and '00000000' as ascii strings in
  140. // integers. We can add these numbers if we encode to bytes from 0 to 9. as
  141. // 'i' = '0' + i for 0 <= i <= 9.
  142. constexpr uint32_t kTwoZeroBytes = 0x0101 * '0';
  143. constexpr uint64_t kFourZeroBytes = 0x01010101 * '0';
  144. constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0';
  145. // * 103 / 1024 is a division by 10 for values from 0 to 99. It's also a
  146. // division of a structure [k takes 2 bytes][m takes 2 bytes], then * 103 / 1024
  147. // will be [k / 10][m / 10]. It allows parallel division.
  148. constexpr uint64_t kDivisionBy10Mul = 103u;
  149. constexpr uint64_t kDivisionBy10Div = 1 << 10;
  150. // * 10486 / 1048576 is a division by 100 for values from 0 to 9999.
  151. constexpr uint64_t kDivisionBy100Mul = 10486u;
  152. constexpr uint64_t kDivisionBy100Div = 1 << 20;
  153. // Encode functions write the ASCII output of input `n` to `out_str`.
  154. inline char* EncodeHundred(uint32_t n, absl::Nonnull<char*> out_str) {
  155. int num_digits = static_cast<int>(n - 10) >> 8;
  156. uint32_t div10 = (n * kDivisionBy10Mul) / kDivisionBy10Div;
  157. uint32_t mod10 = n - 10u * div10;
  158. uint32_t base = kTwoZeroBytes + div10 + (mod10 << 8);
  159. base >>= num_digits & 8;
  160. little_endian::Store16(out_str, static_cast<uint16_t>(base));
  161. return out_str + 2 + num_digits;
  162. }
  163. inline char* EncodeTenThousand(uint32_t n, absl::Nonnull<char*> out_str) {
  164. // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive
  165. // blocks. 123 -> [\0\1][\0\23]. We divide by 10 both blocks
  166. // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in
  167. // parallel". Then we combine both results to have both ASCII digits,
  168. // strip trailing zeros, add ASCII '0000' and return.
  169. uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div;
  170. uint32_t mod100 = n - 100ull * div100;
  171. uint32_t hundreds = (mod100 << 16) + div100;
  172. uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
  173. tens &= (0xFull << 16) | 0xFull;
  174. tens += (hundreds - 10ull * tens) << 8;
  175. ABSL_ASSUME(tens != 0);
  176. // The result can contain trailing zero bits, we need to strip them to a first
  177. // significant byte in a final representation. For example, for n = 123, we
  178. // have tens to have representation \0\1\2\3. We do `& -8` to round
  179. // to a multiple to 8 to strip zero bytes, not all zero bits.
  180. // countr_zero to help.
  181. // 0 minus 8 to make MSVC happy.
  182. uint32_t zeroes = static_cast<uint32_t>(absl::countr_zero(tens)) & (0 - 8u);
  183. tens += kFourZeroBytes;
  184. tens >>= zeroes;
  185. little_endian::Store32(out_str, tens);
  186. return out_str + sizeof(tens) - zeroes / 8;
  187. }
  188. // Helper function to produce an ASCII representation of `i`.
  189. //
  190. // Function returns an 8-byte integer which when summed with `kEightZeroBytes`,
  191. // can be treated as a printable buffer with ascii representation of `i`,
  192. // possibly with leading zeros.
  193. //
  194. // Example:
  195. //
  196. // uint64_t buffer = PrepareEightDigits(102030) + kEightZeroBytes;
  197. // char* ascii = reinterpret_cast<char*>(&buffer);
  198. // // Note two leading zeros:
  199. // EXPECT_EQ(absl::string_view(ascii, 8), "00102030");
  200. //
  201. // Pre-condition: `i` must be less than 100000000.
  202. inline uint64_t PrepareEightDigits(uint32_t i) {
  203. ABSL_ASSUME(i < 10000'0000);
  204. // Prepare 2 blocks of 4 digits "in parallel".
  205. uint32_t hi = i / 10000;
  206. uint32_t lo = i % 10000;
  207. uint64_t merged = hi | (uint64_t{lo} << 32);
  208. uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) &
  209. ((0x7Full << 32) | 0x7Full);
  210. uint64_t mod100 = merged - 100ull * div100;
  211. uint64_t hundreds = (mod100 << 16) + div100;
  212. uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
  213. tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull;
  214. tens += (hundreds - 10ull * tens) << 8;
  215. return tens;
  216. }
  217. inline ABSL_ATTRIBUTE_ALWAYS_INLINE absl::Nonnull<char*> EncodeFullU32(
  218. uint32_t n, absl::Nonnull<char*> out_str) {
  219. if (n < 10) {
  220. *out_str = static_cast<char>('0' + n);
  221. return out_str + 1;
  222. }
  223. if (n < 100'000'000) {
  224. uint64_t bottom = PrepareEightDigits(n);
  225. ABSL_ASSUME(bottom != 0);
  226. // 0 minus 8 to make MSVC happy.
  227. uint32_t zeroes =
  228. static_cast<uint32_t>(absl::countr_zero(bottom)) & (0 - 8u);
  229. little_endian::Store64(out_str, (bottom + kEightZeroBytes) >> zeroes);
  230. return out_str + sizeof(bottom) - zeroes / 8;
  231. }
  232. uint32_t div08 = n / 100'000'000;
  233. uint32_t mod08 = n % 100'000'000;
  234. uint64_t bottom = PrepareEightDigits(mod08) + kEightZeroBytes;
  235. out_str = EncodeHundred(div08, out_str);
  236. little_endian::Store64(out_str, bottom);
  237. return out_str + sizeof(bottom);
  238. }
  239. inline ABSL_ATTRIBUTE_ALWAYS_INLINE char* EncodeFullU64(uint64_t i,
  240. char* buffer) {
  241. if (i <= std::numeric_limits<uint32_t>::max()) {
  242. return EncodeFullU32(static_cast<uint32_t>(i), buffer);
  243. }
  244. uint32_t mod08;
  245. if (i < 1'0000'0000'0000'0000ull) {
  246. uint32_t div08 = static_cast<uint32_t>(i / 100'000'000ull);
  247. mod08 = static_cast<uint32_t>(i % 100'000'000ull);
  248. buffer = EncodeFullU32(div08, buffer);
  249. } else {
  250. uint64_t div08 = i / 100'000'000ull;
  251. mod08 = static_cast<uint32_t>(i % 100'000'000ull);
  252. uint32_t div016 = static_cast<uint32_t>(div08 / 100'000'000ull);
  253. uint32_t div08mod08 = static_cast<uint32_t>(div08 % 100'000'000ull);
  254. uint64_t mid_result = PrepareEightDigits(div08mod08) + kEightZeroBytes;
  255. buffer = EncodeTenThousand(div016, buffer);
  256. little_endian::Store64(buffer, mid_result);
  257. buffer += sizeof(mid_result);
  258. }
  259. uint64_t mod_result = PrepareEightDigits(mod08) + kEightZeroBytes;
  260. little_endian::Store64(buffer, mod_result);
  261. return buffer + sizeof(mod_result);
  262. }
  263. } // namespace
  264. void numbers_internal::PutTwoDigits(uint32_t i, absl::Nonnull<char*> buf) {
  265. assert(i < 100);
  266. uint32_t base = kTwoZeroBytes;
  267. uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div;
  268. uint32_t mod10 = i - 10u * div10;
  269. base += div10 + (mod10 << 8);
  270. little_endian::Store16(buf, static_cast<uint16_t>(base));
  271. }
  272. absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  273. uint32_t n, absl::Nonnull<char*> out_str) {
  274. out_str = EncodeFullU32(n, out_str);
  275. *out_str = '\0';
  276. return out_str;
  277. }
  278. absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  279. int32_t i, absl::Nonnull<char*> buffer) {
  280. uint32_t u = static_cast<uint32_t>(i);
  281. if (i < 0) {
  282. *buffer++ = '-';
  283. // We need to do the negation in modular (i.e., "unsigned")
  284. // arithmetic; MSVC++ apparently warns for plain "-u", so
  285. // we write the equivalent expression "0 - u" instead.
  286. u = 0 - u;
  287. }
  288. buffer = EncodeFullU32(u, buffer);
  289. *buffer = '\0';
  290. return buffer;
  291. }
  292. absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  293. uint64_t i, absl::Nonnull<char*> buffer) {
  294. buffer = EncodeFullU64(i, buffer);
  295. *buffer = '\0';
  296. return buffer;
  297. }
  298. absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  299. int64_t i, absl::Nonnull<char*> buffer) {
  300. uint64_t u = static_cast<uint64_t>(i);
  301. if (i < 0) {
  302. *buffer++ = '-';
  303. // We need to do the negation in modular (i.e., "unsigned")
  304. // arithmetic; MSVC++ apparently warns for plain "-u", so
  305. // we write the equivalent expression "0 - u" instead.
  306. u = 0 - u;
  307. }
  308. buffer = EncodeFullU64(u, buffer);
  309. *buffer = '\0';
  310. return buffer;
  311. }
  312. // Given a 128-bit number expressed as a pair of uint64_t, high half first,
  313. // return that number multiplied by the given 32-bit value. If the result is
  314. // too large to fit in a 128-bit number, divide it by 2 until it fits.
  315. static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
  316. uint32_t mul) {
  317. uint64_t bits0_31 = num.second & 0xFFFFFFFF;
  318. uint64_t bits32_63 = num.second >> 32;
  319. uint64_t bits64_95 = num.first & 0xFFFFFFFF;
  320. uint64_t bits96_127 = num.first >> 32;
  321. // The picture so far: each of these 64-bit values has only the lower 32 bits
  322. // filled in.
  323. // bits96_127: [ 00000000 xxxxxxxx ]
  324. // bits64_95: [ 00000000 xxxxxxxx ]
  325. // bits32_63: [ 00000000 xxxxxxxx ]
  326. // bits0_31: [ 00000000 xxxxxxxx ]
  327. bits0_31 *= mul;
  328. bits32_63 *= mul;
  329. bits64_95 *= mul;
  330. bits96_127 *= mul;
  331. // Now the top halves may also have value, though all 64 of their bits will
  332. // never be set at the same time, since they are a result of a 32x32 bit
  333. // multiply. This makes the carry calculation slightly easier.
  334. // bits96_127: [ mmmmmmmm | mmmmmmmm ]
  335. // bits64_95: [ | mmmmmmmm mmmmmmmm | ]
  336. // bits32_63: | [ mmmmmmmm | mmmmmmmm ]
  337. // bits0_31: | [ | mmmmmmmm mmmmmmmm ]
  338. // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ]
  339. uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
  340. uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
  341. (bits0_63 < bits0_31);
  342. uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
  343. if (bits128_up == 0) return {bits64_127, bits0_63};
  344. auto shift = static_cast<unsigned>(bit_width(bits128_up));
  345. uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
  346. uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
  347. return {hi, lo};
  348. }
  349. // Compute num * 5 ^ expfive, and return the first 128 bits of the result,
  350. // where the first bit is always a one. So PowFive(1, 0) starts 0b100000,
  351. // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
  352. static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
  353. std::pair<uint64_t, uint64_t> result = {num, 0};
  354. while (expfive >= 13) {
  355. // 5^13 is the highest power of five that will fit in a 32-bit integer.
  356. result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
  357. expfive -= 13;
  358. }
  359. constexpr uint32_t powers_of_five[13] = {
  360. 1,
  361. 5,
  362. 5 * 5,
  363. 5 * 5 * 5,
  364. 5 * 5 * 5 * 5,
  365. 5 * 5 * 5 * 5 * 5,
  366. 5 * 5 * 5 * 5 * 5 * 5,
  367. 5 * 5 * 5 * 5 * 5 * 5 * 5,
  368. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  369. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  370. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  371. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  372. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
  373. result = Mul32(result, powers_of_five[expfive & 15]);
  374. int shift = countl_zero(result.first);
  375. if (shift != 0) {
  376. result.first = (result.first << shift) + (result.second >> (64 - shift));
  377. result.second = (result.second << shift);
  378. }
  379. return result;
  380. }
  381. struct ExpDigits {
  382. int32_t exponent;
  383. char digits[6];
  384. };
  385. // SplitToSix converts value, a positive double-precision floating-point number,
  386. // into a base-10 exponent and 6 ASCII digits, where the first digit is never
  387. // zero. For example, SplitToSix(1) returns an exponent of zero and a digits
  388. // array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between
  389. // two possible representations, e.g. value = 100000.5, then "round to even" is
  390. // performed.
  391. static ExpDigits SplitToSix(const double value) {
  392. ExpDigits exp_dig;
  393. int exp = 5;
  394. double d = value;
  395. // First step: calculate a close approximation of the output, where the
  396. // value d will be between 100,000 and 999,999, representing the digits
  397. // in the output ASCII array, and exp is the base-10 exponent. It would be
  398. // faster to use a table here, and to look up the base-2 exponent of value,
  399. // however value is an IEEE-754 64-bit number, so the table would have 2,000
  400. // entries, which is not cache-friendly.
  401. if (d >= 999999.5) {
  402. if (d >= 1e+261) exp += 256, d *= 1e-256;
  403. if (d >= 1e+133) exp += 128, d *= 1e-128;
  404. if (d >= 1e+69) exp += 64, d *= 1e-64;
  405. if (d >= 1e+37) exp += 32, d *= 1e-32;
  406. if (d >= 1e+21) exp += 16, d *= 1e-16;
  407. if (d >= 1e+13) exp += 8, d *= 1e-8;
  408. if (d >= 1e+9) exp += 4, d *= 1e-4;
  409. if (d >= 1e+7) exp += 2, d *= 1e-2;
  410. if (d >= 1e+6) exp += 1, d *= 1e-1;
  411. } else {
  412. if (d < 1e-250) exp -= 256, d *= 1e256;
  413. if (d < 1e-122) exp -= 128, d *= 1e128;
  414. if (d < 1e-58) exp -= 64, d *= 1e64;
  415. if (d < 1e-26) exp -= 32, d *= 1e32;
  416. if (d < 1e-10) exp -= 16, d *= 1e16;
  417. if (d < 1e-2) exp -= 8, d *= 1e8;
  418. if (d < 1e+2) exp -= 4, d *= 1e4;
  419. if (d < 1e+4) exp -= 2, d *= 1e2;
  420. if (d < 1e+5) exp -= 1, d *= 1e1;
  421. }
  422. // At this point, d is in the range [99999.5..999999.5) and exp is in the
  423. // range [-324..308]. Since we need to round d up, we want to add a half
  424. // and truncate.
  425. // However, the technique above may have lost some precision, due to its
  426. // repeated multiplication by constants that each may be off by half a bit
  427. // of precision. This only matters if we're close to the edge though.
  428. // Since we'd like to know if the fractional part of d is close to a half,
  429. // we multiply it by 65536 and see if the fractional part is close to 32768.
  430. // (The number doesn't have to be a power of two,but powers of two are faster)
  431. uint64_t d64k = d * 65536;
  432. uint32_t dddddd; // A 6-digit decimal integer.
  433. if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
  434. // OK, it's fairly likely that precision was lost above, which is
  435. // not a surprise given only 52 mantissa bits are available. Therefore
  436. // redo the calculation using 128-bit numbers. (64 bits are not enough).
  437. // Start out with digits rounded down; maybe add one below.
  438. dddddd = static_cast<uint32_t>(d64k / 65536);
  439. // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual
  440. // value we're representing, of course, is M.mmm... * 2^exp2.
  441. int exp2;
  442. double m = std::frexp(value, &exp2);
  443. uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
  444. // std::frexp returns an m value in the range [0.5, 1.0), however we
  445. // can't multiply it by 2^64 and convert to an integer because some FPUs
  446. // throw an exception when converting an number higher than 2^63 into an
  447. // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter
  448. // since m only has 52 significant bits anyway.
  449. mantissa <<= 1;
  450. exp2 -= 64; // not needed, but nice for debugging
  451. // OK, we are here to compare:
  452. // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2
  453. // so we can round up dddddd if appropriate. Those values span the full
  454. // range of 600 orders of magnitude of IEE 64-bit floating-point.
  455. // Fortunately, we already know they are very close, so we don't need to
  456. // track the base-2 exponent of both sides. This greatly simplifies the
  457. // the math since the 2^exp2 calculation is unnecessary and the power-of-10
  458. // calculation can become a power-of-5 instead.
  459. std::pair<uint64_t, uint64_t> edge, val;
  460. if (exp >= 6) {
  461. // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
  462. // Since we're tossing powers of two, 2 * dddddd + 1 is the
  463. // same as dddddd + 0.5
  464. edge = PowFive(2 * dddddd + 1, exp - 5);
  465. val.first = mantissa;
  466. val.second = 0;
  467. } else {
  468. // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
  469. // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to
  470. // mantissa * 5 ^ (5 - exp)
  471. edge = PowFive(2 * dddddd + 1, 0);
  472. val = PowFive(mantissa, 5 - exp);
  473. }
  474. // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
  475. // val.second, edge.first, edge.second);
  476. if (val > edge) {
  477. dddddd++;
  478. } else if (val == edge) {
  479. dddddd += (dddddd & 1);
  480. }
  481. } else {
  482. // Here, we are not close to the edge.
  483. dddddd = static_cast<uint32_t>((d64k + 32768) / 65536);
  484. }
  485. if (dddddd == 1000000) {
  486. dddddd = 100000;
  487. exp += 1;
  488. }
  489. exp_dig.exponent = exp;
  490. uint32_t two_digits = dddddd / 10000;
  491. dddddd -= two_digits * 10000;
  492. numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]);
  493. two_digits = dddddd / 100;
  494. dddddd -= two_digits * 100;
  495. numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]);
  496. numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]);
  497. return exp_dig;
  498. }
  499. // Helper function for fast formatting of floating-point.
  500. // The result is the same as "%g", a.k.a. "%.6g".
  501. size_t numbers_internal::SixDigitsToBuffer(double d,
  502. absl::Nonnull<char*> const buffer) {
  503. static_assert(std::numeric_limits<float>::is_iec559,
  504. "IEEE-754/IEC-559 support only");
  505. char* out = buffer; // we write data to out, incrementing as we go, but
  506. // FloatToBuffer always returns the address of the buffer
  507. // passed in.
  508. if (std::isnan(d)) {
  509. strcpy(out, "nan"); // NOLINT(runtime/printf)
  510. return 3;
  511. }
  512. if (d == 0) { // +0 and -0 are handled here
  513. if (std::signbit(d)) *out++ = '-';
  514. *out++ = '0';
  515. *out = 0;
  516. return static_cast<size_t>(out - buffer);
  517. }
  518. if (d < 0) {
  519. *out++ = '-';
  520. d = -d;
  521. }
  522. if (d > std::numeric_limits<double>::max()) {
  523. strcpy(out, "inf"); // NOLINT(runtime/printf)
  524. return static_cast<size_t>(out + 3 - buffer);
  525. }
  526. auto exp_dig = SplitToSix(d);
  527. int exp = exp_dig.exponent;
  528. const char* digits = exp_dig.digits;
  529. out[0] = '0';
  530. out[1] = '.';
  531. switch (exp) {
  532. case 5:
  533. memcpy(out, &digits[0], 6), out += 6;
  534. *out = 0;
  535. return static_cast<size_t>(out - buffer);
  536. case 4:
  537. memcpy(out, &digits[0], 5), out += 5;
  538. if (digits[5] != '0') {
  539. *out++ = '.';
  540. *out++ = digits[5];
  541. }
  542. *out = 0;
  543. return static_cast<size_t>(out - buffer);
  544. case 3:
  545. memcpy(out, &digits[0], 4), out += 4;
  546. if ((digits[5] | digits[4]) != '0') {
  547. *out++ = '.';
  548. *out++ = digits[4];
  549. if (digits[5] != '0') *out++ = digits[5];
  550. }
  551. *out = 0;
  552. return static_cast<size_t>(out - buffer);
  553. case 2:
  554. memcpy(out, &digits[0], 3), out += 3;
  555. *out++ = '.';
  556. memcpy(out, &digits[3], 3);
  557. out += 3;
  558. while (out[-1] == '0') --out;
  559. if (out[-1] == '.') --out;
  560. *out = 0;
  561. return static_cast<size_t>(out - buffer);
  562. case 1:
  563. memcpy(out, &digits[0], 2), out += 2;
  564. *out++ = '.';
  565. memcpy(out, &digits[2], 4);
  566. out += 4;
  567. while (out[-1] == '0') --out;
  568. if (out[-1] == '.') --out;
  569. *out = 0;
  570. return static_cast<size_t>(out - buffer);
  571. case 0:
  572. memcpy(out, &digits[0], 1), out += 1;
  573. *out++ = '.';
  574. memcpy(out, &digits[1], 5);
  575. out += 5;
  576. while (out[-1] == '0') --out;
  577. if (out[-1] == '.') --out;
  578. *out = 0;
  579. return static_cast<size_t>(out - buffer);
  580. case -4:
  581. out[2] = '0';
  582. ++out;
  583. ABSL_FALLTHROUGH_INTENDED;
  584. case -3:
  585. out[2] = '0';
  586. ++out;
  587. ABSL_FALLTHROUGH_INTENDED;
  588. case -2:
  589. out[2] = '0';
  590. ++out;
  591. ABSL_FALLTHROUGH_INTENDED;
  592. case -1:
  593. out += 2;
  594. memcpy(out, &digits[0], 6);
  595. out += 6;
  596. while (out[-1] == '0') --out;
  597. *out = 0;
  598. return static_cast<size_t>(out - buffer);
  599. }
  600. assert(exp < -4 || exp >= 6);
  601. out[0] = digits[0];
  602. assert(out[1] == '.');
  603. out += 2;
  604. memcpy(out, &digits[1], 5), out += 5;
  605. while (out[-1] == '0') --out;
  606. if (out[-1] == '.') --out;
  607. *out++ = 'e';
  608. if (exp > 0) {
  609. *out++ = '+';
  610. } else {
  611. *out++ = '-';
  612. exp = -exp;
  613. }
  614. if (exp > 99) {
  615. int dig1 = exp / 100;
  616. exp -= dig1 * 100;
  617. *out++ = '0' + static_cast<char>(dig1);
  618. }
  619. PutTwoDigits(static_cast<uint32_t>(exp), out);
  620. out += 2;
  621. *out = 0;
  622. return static_cast<size_t>(out - buffer);
  623. }
  624. namespace {
  625. // Represents integer values of digits.
  626. // Uses 36 to indicate an invalid character since we support
  627. // bases up to 36.
  628. static constexpr std::array<int8_t, 256> kAsciiToInt = {
  629. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
  630. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  631. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5,
  632. 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
  633. 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
  634. 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
  635. 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
  636. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  637. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  638. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  639. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  640. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  641. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  642. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
  643. // Parse the sign and optional hex or oct prefix in text.
  644. inline bool safe_parse_sign_and_base(
  645. absl::Nonnull<absl::string_view*> text /*inout*/,
  646. absl::Nonnull<int*> base_ptr /*inout*/,
  647. absl::Nonnull<bool*> negative_ptr /*output*/) {
  648. if (text->data() == nullptr) {
  649. return false;
  650. }
  651. const char* start = text->data();
  652. const char* end = start + text->size();
  653. int base = *base_ptr;
  654. // Consume whitespace.
  655. while (start < end &&
  656. absl::ascii_isspace(static_cast<unsigned char>(start[0]))) {
  657. ++start;
  658. }
  659. while (start < end &&
  660. absl::ascii_isspace(static_cast<unsigned char>(end[-1]))) {
  661. --end;
  662. }
  663. if (start >= end) {
  664. return false;
  665. }
  666. // Consume sign.
  667. *negative_ptr = (start[0] == '-');
  668. if (*negative_ptr || start[0] == '+') {
  669. ++start;
  670. if (start >= end) {
  671. return false;
  672. }
  673. }
  674. // Consume base-dependent prefix.
  675. // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
  676. // base 16: "0x" -> base 16
  677. // Also validate the base.
  678. if (base == 0) {
  679. if (end - start >= 2 && start[0] == '0' &&
  680. (start[1] == 'x' || start[1] == 'X')) {
  681. base = 16;
  682. start += 2;
  683. if (start >= end) {
  684. // "0x" with no digits after is invalid.
  685. return false;
  686. }
  687. } else if (end - start >= 1 && start[0] == '0') {
  688. base = 8;
  689. start += 1;
  690. } else {
  691. base = 10;
  692. }
  693. } else if (base == 16) {
  694. if (end - start >= 2 && start[0] == '0' &&
  695. (start[1] == 'x' || start[1] == 'X')) {
  696. start += 2;
  697. if (start >= end) {
  698. // "0x" with no digits after is invalid.
  699. return false;
  700. }
  701. }
  702. } else if (base >= 2 && base <= 36) {
  703. // okay
  704. } else {
  705. return false;
  706. }
  707. *text = absl::string_view(start, static_cast<size_t>(end - start));
  708. *base_ptr = base;
  709. return true;
  710. }
  711. // Consume digits.
  712. //
  713. // The classic loop:
  714. //
  715. // for each digit
  716. // value = value * base + digit
  717. // value *= sign
  718. //
  719. // The classic loop needs overflow checking. It also fails on the most
  720. // negative integer, -2147483648 in 32-bit two's complement representation.
  721. //
  722. // My improved loop:
  723. //
  724. // if (!negative)
  725. // for each digit
  726. // value = value * base
  727. // value = value + digit
  728. // else
  729. // for each digit
  730. // value = value * base
  731. // value = value - digit
  732. //
  733. // Overflow checking becomes simple.
  734. // Lookup tables per IntType:
  735. // vmax/base and vmin/base are precomputed because division costs at least 8ns.
  736. // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
  737. // struct of arrays) would probably be better in terms of d-cache for the most
  738. // commonly used bases.
  739. template <typename IntType>
  740. struct LookupTables {
  741. ABSL_CONST_INIT static const IntType kVmaxOverBase[];
  742. ABSL_CONST_INIT static const IntType kVminOverBase[];
  743. };
  744. // An array initializer macro for X/base where base in [0, 36].
  745. // However, note that lookups for base in [0, 1] should never happen because
  746. // base has been validated to be in [2, 36] by safe_parse_sign_and_base().
  747. #define X_OVER_BASE_INITIALIZER(X) \
  748. { \
  749. 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
  750. X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \
  751. X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \
  752. X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \
  753. X / 35, X / 36, \
  754. }
  755. // This kVmaxOverBase is generated with
  756. // for (int base = 2; base < 37; ++base) {
  757. // absl::uint128 max = std::numeric_limits<absl::uint128>::max();
  758. // auto result = max / base;
  759. // std::cout << " MakeUint128(" << absl::Uint128High64(result) << "u, "
  760. // << absl::Uint128Low64(result) << "u),\n";
  761. // }
  762. // See https://godbolt.org/z/aneYsb
  763. //
  764. // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting
  765. // array to avoid a static initializer.
  766. template <>
  767. ABSL_CONST_INIT const uint128 LookupTables<uint128>::kVmaxOverBase[] = {
  768. 0,
  769. 0,
  770. MakeUint128(9223372036854775807u, 18446744073709551615u),
  771. MakeUint128(6148914691236517205u, 6148914691236517205u),
  772. MakeUint128(4611686018427387903u, 18446744073709551615u),
  773. MakeUint128(3689348814741910323u, 3689348814741910323u),
  774. MakeUint128(3074457345618258602u, 12297829382473034410u),
  775. MakeUint128(2635249153387078802u, 5270498306774157604u),
  776. MakeUint128(2305843009213693951u, 18446744073709551615u),
  777. MakeUint128(2049638230412172401u, 14347467612885206812u),
  778. MakeUint128(1844674407370955161u, 11068046444225730969u),
  779. MakeUint128(1676976733973595601u, 8384883669867978007u),
  780. MakeUint128(1537228672809129301u, 6148914691236517205u),
  781. MakeUint128(1418980313362273201u, 4256940940086819603u),
  782. MakeUint128(1317624576693539401u, 2635249153387078802u),
  783. MakeUint128(1229782938247303441u, 1229782938247303441u),
  784. MakeUint128(1152921504606846975u, 18446744073709551615u),
  785. MakeUint128(1085102592571150095u, 1085102592571150095u),
  786. MakeUint128(1024819115206086200u, 16397105843297379214u),
  787. MakeUint128(970881267037344821u, 16504981539634861972u),
  788. MakeUint128(922337203685477580u, 14757395258967641292u),
  789. MakeUint128(878416384462359600u, 14054662151397753612u),
  790. MakeUint128(838488366986797800u, 13415813871788764811u),
  791. MakeUint128(802032351030850070u, 4812194106185100421u),
  792. MakeUint128(768614336404564650u, 12297829382473034410u),
  793. MakeUint128(737869762948382064u, 11805916207174113034u),
  794. MakeUint128(709490156681136600u, 11351842506898185609u),
  795. MakeUint128(683212743470724133u, 17080318586768103348u),
  796. MakeUint128(658812288346769700u, 10540996613548315209u),
  797. MakeUint128(636094623231363848u, 15266270957552732371u),
  798. MakeUint128(614891469123651720u, 9838263505978427528u),
  799. MakeUint128(595056260442243600u, 9520900167075897608u),
  800. MakeUint128(576460752303423487u, 18446744073709551615u),
  801. MakeUint128(558992244657865200u, 8943875914525843207u),
  802. MakeUint128(542551296285575047u, 9765923333140350855u),
  803. MakeUint128(527049830677415760u, 8432797290838652167u),
  804. MakeUint128(512409557603043100u, 8198552921648689607u),
  805. };
  806. // This kVmaxOverBase generated with
  807. // for (int base = 2; base < 37; ++base) {
  808. // absl::int128 max = std::numeric_limits<absl::int128>::max();
  809. // auto result = max / base;
  810. // std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", "
  811. // << absl::Int128Low64(result) << "u),\n";
  812. // }
  813. // See https://godbolt.org/z/7djYWz
  814. //
  815. // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
  816. // to avoid a static initializer.
  817. template <>
  818. ABSL_CONST_INIT const int128 LookupTables<int128>::kVmaxOverBase[] = {
  819. 0,
  820. 0,
  821. MakeInt128(4611686018427387903, 18446744073709551615u),
  822. MakeInt128(3074457345618258602, 12297829382473034410u),
  823. MakeInt128(2305843009213693951, 18446744073709551615u),
  824. MakeInt128(1844674407370955161, 11068046444225730969u),
  825. MakeInt128(1537228672809129301, 6148914691236517205u),
  826. MakeInt128(1317624576693539401, 2635249153387078802u),
  827. MakeInt128(1152921504606846975, 18446744073709551615u),
  828. MakeInt128(1024819115206086200, 16397105843297379214u),
  829. MakeInt128(922337203685477580, 14757395258967641292u),
  830. MakeInt128(838488366986797800, 13415813871788764811u),
  831. MakeInt128(768614336404564650, 12297829382473034410u),
  832. MakeInt128(709490156681136600, 11351842506898185609u),
  833. MakeInt128(658812288346769700, 10540996613548315209u),
  834. MakeInt128(614891469123651720, 9838263505978427528u),
  835. MakeInt128(576460752303423487, 18446744073709551615u),
  836. MakeInt128(542551296285575047, 9765923333140350855u),
  837. MakeInt128(512409557603043100, 8198552921648689607u),
  838. MakeInt128(485440633518672410, 17475862806672206794u),
  839. MakeInt128(461168601842738790, 7378697629483820646u),
  840. MakeInt128(439208192231179800, 7027331075698876806u),
  841. MakeInt128(419244183493398900, 6707906935894382405u),
  842. MakeInt128(401016175515425035, 2406097053092550210u),
  843. MakeInt128(384307168202282325, 6148914691236517205u),
  844. MakeInt128(368934881474191032, 5902958103587056517u),
  845. MakeInt128(354745078340568300, 5675921253449092804u),
  846. MakeInt128(341606371735362066, 17763531330238827482u),
  847. MakeInt128(329406144173384850, 5270498306774157604u),
  848. MakeInt128(318047311615681924, 7633135478776366185u),
  849. MakeInt128(307445734561825860, 4919131752989213764u),
  850. MakeInt128(297528130221121800, 4760450083537948804u),
  851. MakeInt128(288230376151711743, 18446744073709551615u),
  852. MakeInt128(279496122328932600, 4471937957262921603u),
  853. MakeInt128(271275648142787523, 14106333703424951235u),
  854. MakeInt128(263524915338707880, 4216398645419326083u),
  855. MakeInt128(256204778801521550, 4099276460824344803u),
  856. };
  857. // This kVminOverBase generated with
  858. // for (int base = 2; base < 37; ++base) {
  859. // absl::int128 min = std::numeric_limits<absl::int128>::min();
  860. // auto result = min / base;
  861. // std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", "
  862. // << absl::Int128Low64(result) << "u),\n";
  863. // }
  864. //
  865. // See https://godbolt.org/z/7djYWz
  866. //
  867. // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
  868. // to avoid a static initializer.
  869. template <>
  870. ABSL_CONST_INIT const int128 LookupTables<int128>::kVminOverBase[] = {
  871. 0,
  872. 0,
  873. MakeInt128(-4611686018427387904, 0u),
  874. MakeInt128(-3074457345618258603, 6148914691236517206u),
  875. MakeInt128(-2305843009213693952, 0u),
  876. MakeInt128(-1844674407370955162, 7378697629483820647u),
  877. MakeInt128(-1537228672809129302, 12297829382473034411u),
  878. MakeInt128(-1317624576693539402, 15811494920322472814u),
  879. MakeInt128(-1152921504606846976, 0u),
  880. MakeInt128(-1024819115206086201, 2049638230412172402u),
  881. MakeInt128(-922337203685477581, 3689348814741910324u),
  882. MakeInt128(-838488366986797801, 5030930201920786805u),
  883. MakeInt128(-768614336404564651, 6148914691236517206u),
  884. MakeInt128(-709490156681136601, 7094901566811366007u),
  885. MakeInt128(-658812288346769701, 7905747460161236407u),
  886. MakeInt128(-614891469123651721, 8608480567731124088u),
  887. MakeInt128(-576460752303423488, 0u),
  888. MakeInt128(-542551296285575048, 8680820740569200761u),
  889. MakeInt128(-512409557603043101, 10248191152060862009u),
  890. MakeInt128(-485440633518672411, 970881267037344822u),
  891. MakeInt128(-461168601842738791, 11068046444225730970u),
  892. MakeInt128(-439208192231179801, 11419412998010674810u),
  893. MakeInt128(-419244183493398901, 11738837137815169211u),
  894. MakeInt128(-401016175515425036, 16040647020617001406u),
  895. MakeInt128(-384307168202282326, 12297829382473034411u),
  896. MakeInt128(-368934881474191033, 12543785970122495099u),
  897. MakeInt128(-354745078340568301, 12770822820260458812u),
  898. MakeInt128(-341606371735362067, 683212743470724134u),
  899. MakeInt128(-329406144173384851, 13176245766935394012u),
  900. MakeInt128(-318047311615681925, 10813608594933185431u),
  901. MakeInt128(-307445734561825861, 13527612320720337852u),
  902. MakeInt128(-297528130221121801, 13686293990171602812u),
  903. MakeInt128(-288230376151711744, 0u),
  904. MakeInt128(-279496122328932601, 13974806116446630013u),
  905. MakeInt128(-271275648142787524, 4340410370284600381u),
  906. MakeInt128(-263524915338707881, 14230345428290225533u),
  907. MakeInt128(-256204778801521551, 14347467612885206813u),
  908. };
  909. template <typename IntType>
  910. ABSL_CONST_INIT const IntType LookupTables<IntType>::kVmaxOverBase[] =
  911. X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
  912. template <typename IntType>
  913. ABSL_CONST_INIT const IntType LookupTables<IntType>::kVminOverBase[] =
  914. X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
  915. #undef X_OVER_BASE_INITIALIZER
  916. template <typename IntType>
  917. inline bool safe_parse_positive_int(absl::string_view text, int base,
  918. absl::Nonnull<IntType*> value_p) {
  919. IntType value = 0;
  920. const IntType vmax = std::numeric_limits<IntType>::max();
  921. assert(vmax > 0);
  922. assert(base >= 0);
  923. const IntType base_inttype = static_cast<IntType>(base);
  924. assert(vmax >= base_inttype);
  925. const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
  926. assert(base < 2 ||
  927. std::numeric_limits<IntType>::max() / base_inttype == vmax_over_base);
  928. const char* start = text.data();
  929. const char* end = start + text.size();
  930. // loop over digits
  931. for (; start < end; ++start) {
  932. unsigned char c = static_cast<unsigned char>(start[0]);
  933. IntType digit = static_cast<IntType>(kAsciiToInt[c]);
  934. if (digit >= base_inttype) {
  935. *value_p = value;
  936. return false;
  937. }
  938. if (value > vmax_over_base) {
  939. *value_p = vmax;
  940. return false;
  941. }
  942. value *= base_inttype;
  943. if (value > vmax - digit) {
  944. *value_p = vmax;
  945. return false;
  946. }
  947. value += digit;
  948. }
  949. *value_p = value;
  950. return true;
  951. }
  952. template <typename IntType>
  953. inline bool safe_parse_negative_int(absl::string_view text, int base,
  954. absl::Nonnull<IntType*> value_p) {
  955. IntType value = 0;
  956. const IntType vmin = std::numeric_limits<IntType>::min();
  957. assert(vmin < 0);
  958. assert(vmin <= 0 - base);
  959. IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
  960. assert(base < 2 ||
  961. std::numeric_limits<IntType>::min() / base == vmin_over_base);
  962. // 2003 c++ standard [expr.mul]
  963. // "... the sign of the remainder is implementation-defined."
  964. // Although (vmin/base)*base + vmin%base is always vmin.
  965. // 2011 c++ standard tightens the spec but we cannot rely on it.
  966. // TODO(junyer): Handle this in the lookup table generation.
  967. if (vmin % base > 0) {
  968. vmin_over_base += 1;
  969. }
  970. const char* start = text.data();
  971. const char* end = start + text.size();
  972. // loop over digits
  973. for (; start < end; ++start) {
  974. unsigned char c = static_cast<unsigned char>(start[0]);
  975. int digit = kAsciiToInt[c];
  976. if (digit >= base) {
  977. *value_p = value;
  978. return false;
  979. }
  980. if (value < vmin_over_base) {
  981. *value_p = vmin;
  982. return false;
  983. }
  984. value *= base;
  985. if (value < vmin + digit) {
  986. *value_p = vmin;
  987. return false;
  988. }
  989. value -= digit;
  990. }
  991. *value_p = value;
  992. return true;
  993. }
  994. // Input format based on POSIX.1-2008 strtol
  995. // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
  996. template <typename IntType>
  997. inline bool safe_int_internal(absl::string_view text,
  998. absl::Nonnull<IntType*> value_p, int base) {
  999. *value_p = 0;
  1000. bool negative;
  1001. if (!safe_parse_sign_and_base(&text, &base, &negative)) {
  1002. return false;
  1003. }
  1004. if (!negative) {
  1005. return safe_parse_positive_int(text, base, value_p);
  1006. } else {
  1007. return safe_parse_negative_int(text, base, value_p);
  1008. }
  1009. }
  1010. template <typename IntType>
  1011. inline bool safe_uint_internal(absl::string_view text,
  1012. absl::Nonnull<IntType*> value_p, int base) {
  1013. *value_p = 0;
  1014. bool negative;
  1015. if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
  1016. return false;
  1017. }
  1018. return safe_parse_positive_int(text, base, value_p);
  1019. }
  1020. } // anonymous namespace
  1021. namespace numbers_internal {
  1022. // Digit conversion.
  1023. ABSL_CONST_INIT ABSL_DLL const char kHexChar[] =
  1024. "0123456789abcdef";
  1025. ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] =
  1026. "000102030405060708090a0b0c0d0e0f"
  1027. "101112131415161718191a1b1c1d1e1f"
  1028. "202122232425262728292a2b2c2d2e2f"
  1029. "303132333435363738393a3b3c3d3e3f"
  1030. "404142434445464748494a4b4c4d4e4f"
  1031. "505152535455565758595a5b5c5d5e5f"
  1032. "606162636465666768696a6b6c6d6e6f"
  1033. "707172737475767778797a7b7c7d7e7f"
  1034. "808182838485868788898a8b8c8d8e8f"
  1035. "909192939495969798999a9b9c9d9e9f"
  1036. "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
  1037. "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
  1038. "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
  1039. "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
  1040. "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
  1041. "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
  1042. bool safe_strto32_base(absl::string_view text, absl::Nonnull<int32_t*> value,
  1043. int base) {
  1044. return safe_int_internal<int32_t>(text, value, base);
  1045. }
  1046. bool safe_strto64_base(absl::string_view text, absl::Nonnull<int64_t*> value,
  1047. int base) {
  1048. return safe_int_internal<int64_t>(text, value, base);
  1049. }
  1050. bool safe_strto128_base(absl::string_view text, absl::Nonnull<int128*> value,
  1051. int base) {
  1052. return safe_int_internal<absl::int128>(text, value, base);
  1053. }
  1054. bool safe_strtou32_base(absl::string_view text, absl::Nonnull<uint32_t*> value,
  1055. int base) {
  1056. return safe_uint_internal<uint32_t>(text, value, base);
  1057. }
  1058. bool safe_strtou64_base(absl::string_view text, absl::Nonnull<uint64_t*> value,
  1059. int base) {
  1060. return safe_uint_internal<uint64_t>(text, value, base);
  1061. }
  1062. bool safe_strtou128_base(absl::string_view text, absl::Nonnull<uint128*> value,
  1063. int base) {
  1064. return safe_uint_internal<absl::uint128>(text, value, base);
  1065. }
  1066. } // namespace numbers_internal
  1067. ABSL_NAMESPACE_END
  1068. } // namespace absl