numbers.cc 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145
  1. // Copyright 2017 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // This file contains string processing functions related to
  15. // numeric values.
  16. #include "y_absl/strings/numbers.h"
  17. #include <algorithm>
  18. #include <cassert>
  19. #include <cfloat> // for DBL_DIG and FLT_DIG
  20. #include <cmath> // for HUGE_VAL
  21. #include <cstdint>
  22. #include <cstdio>
  23. #include <cstdlib>
  24. #include <cstring>
  25. #include <iterator>
  26. #include <limits>
  27. #include <system_error> // NOLINT(build/c++11)
  28. #include <utility>
  29. #include "y_absl/base/attributes.h"
  30. #include "y_absl/base/config.h"
  31. #include "y_absl/base/internal/endian.h"
  32. #include "y_absl/base/internal/raw_logging.h"
  33. #include "y_absl/base/nullability.h"
  34. #include "y_absl/base/optimization.h"
  35. #include "y_absl/numeric/bits.h"
  36. #include "y_absl/numeric/int128.h"
  37. #include "y_absl/strings/ascii.h"
  38. #include "y_absl/strings/charconv.h"
  39. #include "y_absl/strings/match.h"
  40. #include "y_absl/strings/string_view.h"
  41. namespace y_absl {
  42. Y_ABSL_NAMESPACE_BEGIN
  43. bool SimpleAtof(y_absl::string_view str, y_absl::Nonnull<float*> out) {
  44. *out = 0.0;
  45. str = StripAsciiWhitespace(str);
  46. // std::from_chars doesn't accept an initial +, but SimpleAtof does, so if one
  47. // is present, skip it, while avoiding accepting "+-0" as valid.
  48. if (!str.empty() && str[0] == '+') {
  49. str.remove_prefix(1);
  50. if (!str.empty() && str[0] == '-') {
  51. return false;
  52. }
  53. }
  54. auto result = y_absl::from_chars(str.data(), str.data() + str.size(), *out);
  55. if (result.ec == std::errc::invalid_argument) {
  56. return false;
  57. }
  58. if (result.ptr != str.data() + str.size()) {
  59. // not all non-whitespace characters consumed
  60. return false;
  61. }
  62. // from_chars() with DR 3081's current wording will return max() on
  63. // overflow. SimpleAtof returns infinity instead.
  64. if (result.ec == std::errc::result_out_of_range) {
  65. if (*out > 1.0) {
  66. *out = std::numeric_limits<float>::infinity();
  67. } else if (*out < -1.0) {
  68. *out = -std::numeric_limits<float>::infinity();
  69. }
  70. }
  71. return true;
  72. }
  73. bool SimpleAtod(y_absl::string_view str, y_absl::Nonnull<double*> out) {
  74. *out = 0.0;
  75. str = StripAsciiWhitespace(str);
  76. // std::from_chars doesn't accept an initial +, but SimpleAtod does, so if one
  77. // is present, skip it, while avoiding accepting "+-0" as valid.
  78. if (!str.empty() && str[0] == '+') {
  79. str.remove_prefix(1);
  80. if (!str.empty() && str[0] == '-') {
  81. return false;
  82. }
  83. }
  84. auto result = y_absl::from_chars(str.data(), str.data() + str.size(), *out);
  85. if (result.ec == std::errc::invalid_argument) {
  86. return false;
  87. }
  88. if (result.ptr != str.data() + str.size()) {
  89. // not all non-whitespace characters consumed
  90. return false;
  91. }
  92. // from_chars() with DR 3081's current wording will return max() on
  93. // overflow. SimpleAtod returns infinity instead.
  94. if (result.ec == std::errc::result_out_of_range) {
  95. if (*out > 1.0) {
  96. *out = std::numeric_limits<double>::infinity();
  97. } else if (*out < -1.0) {
  98. *out = -std::numeric_limits<double>::infinity();
  99. }
  100. }
  101. return true;
  102. }
  103. bool SimpleAtob(y_absl::string_view str, y_absl::Nonnull<bool*> out) {
  104. Y_ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
  105. if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
  106. EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
  107. EqualsIgnoreCase(str, "1")) {
  108. *out = true;
  109. return true;
  110. }
  111. if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
  112. EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
  113. EqualsIgnoreCase(str, "0")) {
  114. *out = false;
  115. return true;
  116. }
  117. return false;
  118. }
  119. // ----------------------------------------------------------------------
  120. // FastIntToBuffer() overloads
  121. //
  122. // Like the Fast*ToBuffer() functions above, these are intended for speed.
  123. // Unlike the Fast*ToBuffer() functions, however, these functions write
  124. // their output to the beginning of the buffer. The caller is responsible
  125. // for ensuring that the buffer has enough space to hold the output.
  126. //
  127. // Returns a pointer to the end of the string (i.e. the null character
  128. // terminating the string).
  129. // ----------------------------------------------------------------------
  130. namespace {
  131. // Various routines to encode integers to strings.
  132. // We split data encodings into a group of 2 digits, 4 digits, 8 digits as
  133. // it's easier to combine powers of two into scalar arithmetic.
  134. // Previous implementation used a lookup table of 200 bytes for every 2 bytes
  135. // and it was memory bound, any L1 cache miss would result in a much slower
  136. // result. When benchmarking with a cache eviction rate of several percent,
  137. // this implementation proved to be better.
  138. // These constants represent '00', '0000' and '00000000' as ascii strings in
  139. // integers. We can add these numbers if we encode to bytes from 0 to 9. as
  140. // 'i' = '0' + i for 0 <= i <= 9.
  141. constexpr uint32_t kTwoZeroBytes = 0x0101 * '0';
  142. constexpr uint64_t kFourZeroBytes = 0x01010101 * '0';
  143. constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0';
  144. // * 103 / 1024 is a division by 10 for values from 0 to 99. It's also a
  145. // division of a structure [k takes 2 bytes][m takes 2 bytes], then * 103 / 1024
  146. // will be [k / 10][m / 10]. It allows parallel division.
  147. constexpr uint64_t kDivisionBy10Mul = 103u;
  148. constexpr uint64_t kDivisionBy10Div = 1 << 10;
  149. // * 10486 / 1048576 is a division by 100 for values from 0 to 9999.
  150. constexpr uint64_t kDivisionBy100Mul = 10486u;
  151. constexpr uint64_t kDivisionBy100Div = 1 << 20;
  152. // Encode functions write the ASCII output of input `n` to `out_str`.
  153. inline char* EncodeHundred(uint32_t n, y_absl::Nonnull<char*> out_str) {
  154. int num_digits = static_cast<int>(n - 10) >> 8;
  155. uint32_t div10 = (n * kDivisionBy10Mul) / kDivisionBy10Div;
  156. uint32_t mod10 = n - 10u * div10;
  157. uint32_t base = kTwoZeroBytes + div10 + (mod10 << 8);
  158. base >>= num_digits & 8;
  159. little_endian::Store16(out_str, static_cast<uint16_t>(base));
  160. return out_str + 2 + num_digits;
  161. }
  162. inline char* EncodeTenThousand(uint32_t n, y_absl::Nonnull<char*> out_str) {
  163. // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive
  164. // blocks. 123 -> [\0\1][\0\23]. We divide by 10 both blocks
  165. // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in
  166. // parallel". Then we combine both results to have both ASCII digits,
  167. // strip trailing zeros, add ASCII '0000' and return.
  168. uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div;
  169. uint32_t mod100 = n - 100ull * div100;
  170. uint32_t hundreds = (mod100 << 16) + div100;
  171. uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
  172. tens &= (0xFull << 16) | 0xFull;
  173. tens += (hundreds - 10ull * tens) << 8;
  174. Y_ABSL_ASSUME(tens != 0);
  175. // The result can contain trailing zero bits, we need to strip them to a first
  176. // significant byte in a final representation. For example, for n = 123, we
  177. // have tens to have representation \0\1\2\3. We do `& -8` to round
  178. // to a multiple to 8 to strip zero bytes, not all zero bits.
  179. // countr_zero to help.
  180. // 0 minus 8 to make MSVC happy.
  181. uint32_t zeroes = static_cast<uint32_t>(y_absl::countr_zero(tens)) & (0 - 8u);
  182. tens += kFourZeroBytes;
  183. tens >>= zeroes;
  184. little_endian::Store32(out_str, tens);
  185. return out_str + sizeof(tens) - zeroes / 8;
  186. }
  187. // Helper function to produce an ASCII representation of `i`.
  188. //
  189. // Function returns an 8-byte integer which when summed with `kEightZeroBytes`,
  190. // can be treated as a printable buffer with ascii representation of `i`,
  191. // possibly with leading zeros.
  192. //
  193. // Example:
  194. //
  195. // uint64_t buffer = PrepareEightDigits(102030) + kEightZeroBytes;
  196. // char* ascii = reinterpret_cast<char*>(&buffer);
  197. // // Note two leading zeros:
  198. // EXPECT_EQ(y_absl::string_view(ascii, 8), "00102030");
  199. //
  200. // Pre-condition: `i` must be less than 100000000.
  201. inline uint64_t PrepareEightDigits(uint32_t i) {
  202. Y_ABSL_ASSUME(i < 10000'0000);
  203. // Prepare 2 blocks of 4 digits "in parallel".
  204. uint32_t hi = i / 10000;
  205. uint32_t lo = i % 10000;
  206. uint64_t merged = hi | (uint64_t{lo} << 32);
  207. uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) &
  208. ((0x7Full << 32) | 0x7Full);
  209. uint64_t mod100 = merged - 100ull * div100;
  210. uint64_t hundreds = (mod100 << 16) + div100;
  211. uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
  212. tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull;
  213. tens += (hundreds - 10ull * tens) << 8;
  214. return tens;
  215. }
  216. inline Y_ABSL_ATTRIBUTE_ALWAYS_INLINE y_absl::Nonnull<char*> EncodeFullU32(
  217. uint32_t n, y_absl::Nonnull<char*> out_str) {
  218. if (n < 10) {
  219. *out_str = static_cast<char>('0' + n);
  220. return out_str + 1;
  221. }
  222. if (n < 100'000'000) {
  223. uint64_t bottom = PrepareEightDigits(n);
  224. Y_ABSL_ASSUME(bottom != 0);
  225. // 0 minus 8 to make MSVC happy.
  226. uint32_t zeroes =
  227. static_cast<uint32_t>(y_absl::countr_zero(bottom)) & (0 - 8u);
  228. little_endian::Store64(out_str, (bottom + kEightZeroBytes) >> zeroes);
  229. return out_str + sizeof(bottom) - zeroes / 8;
  230. }
  231. uint32_t div08 = n / 100'000'000;
  232. uint32_t mod08 = n % 100'000'000;
  233. uint64_t bottom = PrepareEightDigits(mod08) + kEightZeroBytes;
  234. out_str = EncodeHundred(div08, out_str);
  235. little_endian::Store64(out_str, bottom);
  236. return out_str + sizeof(bottom);
  237. }
  238. inline Y_ABSL_ATTRIBUTE_ALWAYS_INLINE char* EncodeFullU64(uint64_t i,
  239. char* buffer) {
  240. if (i <= std::numeric_limits<uint32_t>::max()) {
  241. return EncodeFullU32(static_cast<uint32_t>(i), buffer);
  242. }
  243. uint32_t mod08;
  244. if (i < 1'0000'0000'0000'0000ull) {
  245. uint32_t div08 = static_cast<uint32_t>(i / 100'000'000ull);
  246. mod08 = static_cast<uint32_t>(i % 100'000'000ull);
  247. buffer = EncodeFullU32(div08, buffer);
  248. } else {
  249. uint64_t div08 = i / 100'000'000ull;
  250. mod08 = static_cast<uint32_t>(i % 100'000'000ull);
  251. uint32_t div016 = static_cast<uint32_t>(div08 / 100'000'000ull);
  252. uint32_t div08mod08 = static_cast<uint32_t>(div08 % 100'000'000ull);
  253. uint64_t mid_result = PrepareEightDigits(div08mod08) + kEightZeroBytes;
  254. buffer = EncodeTenThousand(div016, buffer);
  255. little_endian::Store64(buffer, mid_result);
  256. buffer += sizeof(mid_result);
  257. }
  258. uint64_t mod_result = PrepareEightDigits(mod08) + kEightZeroBytes;
  259. little_endian::Store64(buffer, mod_result);
  260. return buffer + sizeof(mod_result);
  261. }
  262. } // namespace
  263. void numbers_internal::PutTwoDigits(uint32_t i, y_absl::Nonnull<char*> buf) {
  264. assert(i < 100);
  265. uint32_t base = kTwoZeroBytes;
  266. uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div;
  267. uint32_t mod10 = i - 10u * div10;
  268. base += div10 + (mod10 << 8);
  269. little_endian::Store16(buf, static_cast<uint16_t>(base));
  270. }
  271. y_absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  272. uint32_t n, y_absl::Nonnull<char*> out_str) {
  273. out_str = EncodeFullU32(n, out_str);
  274. *out_str = '\0';
  275. return out_str;
  276. }
  277. y_absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  278. int32_t i, y_absl::Nonnull<char*> buffer) {
  279. uint32_t u = static_cast<uint32_t>(i);
  280. if (i < 0) {
  281. *buffer++ = '-';
  282. // We need to do the negation in modular (i.e., "unsigned")
  283. // arithmetic; MSVC++ apparently warns for plain "-u", so
  284. // we write the equivalent expression "0 - u" instead.
  285. u = 0 - u;
  286. }
  287. buffer = EncodeFullU32(u, buffer);
  288. *buffer = '\0';
  289. return buffer;
  290. }
  291. y_absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  292. uint64_t i, y_absl::Nonnull<char*> buffer) {
  293. buffer = EncodeFullU64(i, buffer);
  294. *buffer = '\0';
  295. return buffer;
  296. }
  297. y_absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
  298. int64_t i, y_absl::Nonnull<char*> buffer) {
  299. uint64_t u = static_cast<uint64_t>(i);
  300. if (i < 0) {
  301. *buffer++ = '-';
  302. // We need to do the negation in modular (i.e., "unsigned")
  303. // arithmetic; MSVC++ apparently warns for plain "-u", so
  304. // we write the equivalent expression "0 - u" instead.
  305. u = 0 - u;
  306. }
  307. buffer = EncodeFullU64(u, buffer);
  308. *buffer = '\0';
  309. return buffer;
  310. }
  311. // Given a 128-bit number expressed as a pair of uint64_t, high half first,
  312. // return that number multiplied by the given 32-bit value. If the result is
  313. // too large to fit in a 128-bit number, divide it by 2 until it fits.
  314. static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
  315. uint32_t mul) {
  316. uint64_t bits0_31 = num.second & 0xFFFFFFFF;
  317. uint64_t bits32_63 = num.second >> 32;
  318. uint64_t bits64_95 = num.first & 0xFFFFFFFF;
  319. uint64_t bits96_127 = num.first >> 32;
  320. // The picture so far: each of these 64-bit values has only the lower 32 bits
  321. // filled in.
  322. // bits96_127: [ 00000000 xxxxxxxx ]
  323. // bits64_95: [ 00000000 xxxxxxxx ]
  324. // bits32_63: [ 00000000 xxxxxxxx ]
  325. // bits0_31: [ 00000000 xxxxxxxx ]
  326. bits0_31 *= mul;
  327. bits32_63 *= mul;
  328. bits64_95 *= mul;
  329. bits96_127 *= mul;
  330. // Now the top halves may also have value, though all 64 of their bits will
  331. // never be set at the same time, since they are a result of a 32x32 bit
  332. // multiply. This makes the carry calculation slightly easier.
  333. // bits96_127: [ mmmmmmmm | mmmmmmmm ]
  334. // bits64_95: [ | mmmmmmmm mmmmmmmm | ]
  335. // bits32_63: | [ mmmmmmmm | mmmmmmmm ]
  336. // bits0_31: | [ | mmmmmmmm mmmmmmmm ]
  337. // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ]
  338. uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
  339. uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
  340. (bits0_63 < bits0_31);
  341. uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
  342. if (bits128_up == 0) return {bits64_127, bits0_63};
  343. auto shift = static_cast<unsigned>(bit_width(bits128_up));
  344. uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
  345. uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
  346. return {hi, lo};
  347. }
  348. // Compute num * 5 ^ expfive, and return the first 128 bits of the result,
  349. // where the first bit is always a one. So PowFive(1, 0) starts 0b100000,
  350. // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
  351. static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
  352. std::pair<uint64_t, uint64_t> result = {num, 0};
  353. while (expfive >= 13) {
  354. // 5^13 is the highest power of five that will fit in a 32-bit integer.
  355. result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
  356. expfive -= 13;
  357. }
  358. constexpr uint32_t powers_of_five[13] = {
  359. 1,
  360. 5,
  361. 5 * 5,
  362. 5 * 5 * 5,
  363. 5 * 5 * 5 * 5,
  364. 5 * 5 * 5 * 5 * 5,
  365. 5 * 5 * 5 * 5 * 5 * 5,
  366. 5 * 5 * 5 * 5 * 5 * 5 * 5,
  367. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  368. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  369. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  370. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
  371. 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
  372. result = Mul32(result, powers_of_five[expfive & 15]);
  373. int shift = countl_zero(result.first);
  374. if (shift != 0) {
  375. result.first = (result.first << shift) + (result.second >> (64 - shift));
  376. result.second = (result.second << shift);
  377. }
  378. return result;
  379. }
  380. struct ExpDigits {
  381. int32_t exponent;
  382. char digits[6];
  383. };
  384. // SplitToSix converts value, a positive double-precision floating-point number,
  385. // into a base-10 exponent and 6 ASCII digits, where the first digit is never
  386. // zero. For example, SplitToSix(1) returns an exponent of zero and a digits
  387. // array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between
  388. // two possible representations, e.g. value = 100000.5, then "round to even" is
  389. // performed.
  390. static ExpDigits SplitToSix(const double value) {
  391. ExpDigits exp_dig;
  392. int exp = 5;
  393. double d = value;
  394. // First step: calculate a close approximation of the output, where the
  395. // value d will be between 100,000 and 999,999, representing the digits
  396. // in the output ASCII array, and exp is the base-10 exponent. It would be
  397. // faster to use a table here, and to look up the base-2 exponent of value,
  398. // however value is an IEEE-754 64-bit number, so the table would have 2,000
  399. // entries, which is not cache-friendly.
  400. if (d >= 999999.5) {
  401. if (d >= 1e+261) exp += 256, d *= 1e-256;
  402. if (d >= 1e+133) exp += 128, d *= 1e-128;
  403. if (d >= 1e+69) exp += 64, d *= 1e-64;
  404. if (d >= 1e+37) exp += 32, d *= 1e-32;
  405. if (d >= 1e+21) exp += 16, d *= 1e-16;
  406. if (d >= 1e+13) exp += 8, d *= 1e-8;
  407. if (d >= 1e+9) exp += 4, d *= 1e-4;
  408. if (d >= 1e+7) exp += 2, d *= 1e-2;
  409. if (d >= 1e+6) exp += 1, d *= 1e-1;
  410. } else {
  411. if (d < 1e-250) exp -= 256, d *= 1e256;
  412. if (d < 1e-122) exp -= 128, d *= 1e128;
  413. if (d < 1e-58) exp -= 64, d *= 1e64;
  414. if (d < 1e-26) exp -= 32, d *= 1e32;
  415. if (d < 1e-10) exp -= 16, d *= 1e16;
  416. if (d < 1e-2) exp -= 8, d *= 1e8;
  417. if (d < 1e+2) exp -= 4, d *= 1e4;
  418. if (d < 1e+4) exp -= 2, d *= 1e2;
  419. if (d < 1e+5) exp -= 1, d *= 1e1;
  420. }
  421. // At this point, d is in the range [99999.5..999999.5) and exp is in the
  422. // range [-324..308]. Since we need to round d up, we want to add a half
  423. // and truncate.
  424. // However, the technique above may have lost some precision, due to its
  425. // repeated multiplication by constants that each may be off by half a bit
  426. // of precision. This only matters if we're close to the edge though.
  427. // Since we'd like to know if the fractional part of d is close to a half,
  428. // we multiply it by 65536 and see if the fractional part is close to 32768.
  429. // (The number doesn't have to be a power of two,but powers of two are faster)
  430. uint64_t d64k = d * 65536;
  431. uint32_t dddddd; // A 6-digit decimal integer.
  432. if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
  433. // OK, it's fairly likely that precision was lost above, which is
  434. // not a surprise given only 52 mantissa bits are available. Therefore
  435. // redo the calculation using 128-bit numbers. (64 bits are not enough).
  436. // Start out with digits rounded down; maybe add one below.
  437. dddddd = static_cast<uint32_t>(d64k / 65536);
  438. // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual
  439. // value we're representing, of course, is M.mmm... * 2^exp2.
  440. int exp2;
  441. double m = std::frexp(value, &exp2);
  442. uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
  443. // std::frexp returns an m value in the range [0.5, 1.0), however we
  444. // can't multiply it by 2^64 and convert to an integer because some FPUs
  445. // throw an exception when converting an number higher than 2^63 into an
  446. // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter
  447. // since m only has 52 significant bits anyway.
  448. mantissa <<= 1;
  449. exp2 -= 64; // not needed, but nice for debugging
  450. // OK, we are here to compare:
  451. // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2
  452. // so we can round up dddddd if appropriate. Those values span the full
  453. // range of 600 orders of magnitude of IEE 64-bit floating-point.
  454. // Fortunately, we already know they are very close, so we don't need to
  455. // track the base-2 exponent of both sides. This greatly simplifies the
  456. // the math since the 2^exp2 calculation is unnecessary and the power-of-10
  457. // calculation can become a power-of-5 instead.
  458. std::pair<uint64_t, uint64_t> edge, val;
  459. if (exp >= 6) {
  460. // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
  461. // Since we're tossing powers of two, 2 * dddddd + 1 is the
  462. // same as dddddd + 0.5
  463. edge = PowFive(2 * dddddd + 1, exp - 5);
  464. val.first = mantissa;
  465. val.second = 0;
  466. } else {
  467. // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
  468. // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to
  469. // mantissa * 5 ^ (5 - exp)
  470. edge = PowFive(2 * dddddd + 1, 0);
  471. val = PowFive(mantissa, 5 - exp);
  472. }
  473. // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
  474. // val.second, edge.first, edge.second);
  475. if (val > edge) {
  476. dddddd++;
  477. } else if (val == edge) {
  478. dddddd += (dddddd & 1);
  479. }
  480. } else {
  481. // Here, we are not close to the edge.
  482. dddddd = static_cast<uint32_t>((d64k + 32768) / 65536);
  483. }
  484. if (dddddd == 1000000) {
  485. dddddd = 100000;
  486. exp += 1;
  487. }
  488. exp_dig.exponent = exp;
  489. uint32_t two_digits = dddddd / 10000;
  490. dddddd -= two_digits * 10000;
  491. numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]);
  492. two_digits = dddddd / 100;
  493. dddddd -= two_digits * 100;
  494. numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]);
  495. numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]);
  496. return exp_dig;
  497. }
  498. // Helper function for fast formatting of floating-point.
  499. // The result is the same as "%g", a.k.a. "%.6g".
  500. size_t numbers_internal::SixDigitsToBuffer(double d,
  501. y_absl::Nonnull<char*> const buffer) {
  502. static_assert(std::numeric_limits<float>::is_iec559,
  503. "IEEE-754/IEC-559 support only");
  504. char* out = buffer; // we write data to out, incrementing as we go, but
  505. // FloatToBuffer always returns the address of the buffer
  506. // passed in.
  507. if (std::isnan(d)) {
  508. strcpy(out, "nan"); // NOLINT(runtime/printf)
  509. return 3;
  510. }
  511. if (d == 0) { // +0 and -0 are handled here
  512. if (std::signbit(d)) *out++ = '-';
  513. *out++ = '0';
  514. *out = 0;
  515. return static_cast<size_t>(out - buffer);
  516. }
  517. if (d < 0) {
  518. *out++ = '-';
  519. d = -d;
  520. }
  521. if (d > std::numeric_limits<double>::max()) {
  522. strcpy(out, "inf"); // NOLINT(runtime/printf)
  523. return static_cast<size_t>(out + 3 - buffer);
  524. }
  525. auto exp_dig = SplitToSix(d);
  526. int exp = exp_dig.exponent;
  527. const char* digits = exp_dig.digits;
  528. out[0] = '0';
  529. out[1] = '.';
  530. switch (exp) {
  531. case 5:
  532. memcpy(out, &digits[0], 6), out += 6;
  533. *out = 0;
  534. return static_cast<size_t>(out - buffer);
  535. case 4:
  536. memcpy(out, &digits[0], 5), out += 5;
  537. if (digits[5] != '0') {
  538. *out++ = '.';
  539. *out++ = digits[5];
  540. }
  541. *out = 0;
  542. return static_cast<size_t>(out - buffer);
  543. case 3:
  544. memcpy(out, &digits[0], 4), out += 4;
  545. if ((digits[5] | digits[4]) != '0') {
  546. *out++ = '.';
  547. *out++ = digits[4];
  548. if (digits[5] != '0') *out++ = digits[5];
  549. }
  550. *out = 0;
  551. return static_cast<size_t>(out - buffer);
  552. case 2:
  553. memcpy(out, &digits[0], 3), out += 3;
  554. *out++ = '.';
  555. memcpy(out, &digits[3], 3);
  556. out += 3;
  557. while (out[-1] == '0') --out;
  558. if (out[-1] == '.') --out;
  559. *out = 0;
  560. return static_cast<size_t>(out - buffer);
  561. case 1:
  562. memcpy(out, &digits[0], 2), out += 2;
  563. *out++ = '.';
  564. memcpy(out, &digits[2], 4);
  565. out += 4;
  566. while (out[-1] == '0') --out;
  567. if (out[-1] == '.') --out;
  568. *out = 0;
  569. return static_cast<size_t>(out - buffer);
  570. case 0:
  571. memcpy(out, &digits[0], 1), out += 1;
  572. *out++ = '.';
  573. memcpy(out, &digits[1], 5);
  574. out += 5;
  575. while (out[-1] == '0') --out;
  576. if (out[-1] == '.') --out;
  577. *out = 0;
  578. return static_cast<size_t>(out - buffer);
  579. case -4:
  580. out[2] = '0';
  581. ++out;
  582. Y_ABSL_FALLTHROUGH_INTENDED;
  583. case -3:
  584. out[2] = '0';
  585. ++out;
  586. Y_ABSL_FALLTHROUGH_INTENDED;
  587. case -2:
  588. out[2] = '0';
  589. ++out;
  590. Y_ABSL_FALLTHROUGH_INTENDED;
  591. case -1:
  592. out += 2;
  593. memcpy(out, &digits[0], 6);
  594. out += 6;
  595. while (out[-1] == '0') --out;
  596. *out = 0;
  597. return static_cast<size_t>(out - buffer);
  598. }
  599. assert(exp < -4 || exp >= 6);
  600. out[0] = digits[0];
  601. assert(out[1] == '.');
  602. out += 2;
  603. memcpy(out, &digits[1], 5), out += 5;
  604. while (out[-1] == '0') --out;
  605. if (out[-1] == '.') --out;
  606. *out++ = 'e';
  607. if (exp > 0) {
  608. *out++ = '+';
  609. } else {
  610. *out++ = '-';
  611. exp = -exp;
  612. }
  613. if (exp > 99) {
  614. int dig1 = exp / 100;
  615. exp -= dig1 * 100;
  616. *out++ = '0' + static_cast<char>(dig1);
  617. }
  618. PutTwoDigits(static_cast<uint32_t>(exp), out);
  619. out += 2;
  620. *out = 0;
  621. return static_cast<size_t>(out - buffer);
  622. }
  623. namespace {
  624. // Represents integer values of digits.
  625. // Uses 36 to indicate an invalid character since we support
  626. // bases up to 36.
  627. static const int8_t kAsciiToInt[256] = {
  628. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
  629. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  630. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5,
  631. 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
  632. 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
  633. 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
  634. 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
  635. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  636. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  637. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  638. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  639. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  640. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  641. 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
  642. // Parse the sign and optional hex or oct prefix in text.
  643. inline bool safe_parse_sign_and_base(
  644. y_absl::Nonnull<y_absl::string_view*> text /*inout*/,
  645. y_absl::Nonnull<int*> base_ptr /*inout*/,
  646. y_absl::Nonnull<bool*> negative_ptr /*output*/) {
  647. if (text->data() == nullptr) {
  648. return false;
  649. }
  650. const char* start = text->data();
  651. const char* end = start + text->size();
  652. int base = *base_ptr;
  653. // Consume whitespace.
  654. while (start < end &&
  655. y_absl::ascii_isspace(static_cast<unsigned char>(start[0]))) {
  656. ++start;
  657. }
  658. while (start < end &&
  659. y_absl::ascii_isspace(static_cast<unsigned char>(end[-1]))) {
  660. --end;
  661. }
  662. if (start >= end) {
  663. return false;
  664. }
  665. // Consume sign.
  666. *negative_ptr = (start[0] == '-');
  667. if (*negative_ptr || start[0] == '+') {
  668. ++start;
  669. if (start >= end) {
  670. return false;
  671. }
  672. }
  673. // Consume base-dependent prefix.
  674. // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
  675. // base 16: "0x" -> base 16
  676. // Also validate the base.
  677. if (base == 0) {
  678. if (end - start >= 2 && start[0] == '0' &&
  679. (start[1] == 'x' || start[1] == 'X')) {
  680. base = 16;
  681. start += 2;
  682. if (start >= end) {
  683. // "0x" with no digits after is invalid.
  684. return false;
  685. }
  686. } else if (end - start >= 1 && start[0] == '0') {
  687. base = 8;
  688. start += 1;
  689. } else {
  690. base = 10;
  691. }
  692. } else if (base == 16) {
  693. if (end - start >= 2 && start[0] == '0' &&
  694. (start[1] == 'x' || start[1] == 'X')) {
  695. start += 2;
  696. if (start >= end) {
  697. // "0x" with no digits after is invalid.
  698. return false;
  699. }
  700. }
  701. } else if (base >= 2 && base <= 36) {
  702. // okay
  703. } else {
  704. return false;
  705. }
  706. *text = y_absl::string_view(start, static_cast<size_t>(end - start));
  707. *base_ptr = base;
  708. return true;
  709. }
  710. // Consume digits.
  711. //
  712. // The classic loop:
  713. //
  714. // for each digit
  715. // value = value * base + digit
  716. // value *= sign
  717. //
  718. // The classic loop needs overflow checking. It also fails on the most
  719. // negative integer, -2147483648 in 32-bit two's complement representation.
  720. //
  721. // My improved loop:
  722. //
  723. // if (!negative)
  724. // for each digit
  725. // value = value * base
  726. // value = value + digit
  727. // else
  728. // for each digit
  729. // value = value * base
  730. // value = value - digit
  731. //
  732. // Overflow checking becomes simple.
  733. // Lookup tables per IntType:
  734. // vmax/base and vmin/base are precomputed because division costs at least 8ns.
  735. // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
  736. // struct of arrays) would probably be better in terms of d-cache for the most
  737. // commonly used bases.
  738. template <typename IntType>
  739. struct LookupTables {
  740. Y_ABSL_CONST_INIT static const IntType kVmaxOverBase[];
  741. Y_ABSL_CONST_INIT static const IntType kVminOverBase[];
  742. };
  743. // An array initializer macro for X/base where base in [0, 36].
  744. // However, note that lookups for base in [0, 1] should never happen because
  745. // base has been validated to be in [2, 36] by safe_parse_sign_and_base().
  746. #define X_OVER_BASE_INITIALIZER(X) \
  747. { \
  748. 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
  749. X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \
  750. X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \
  751. X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \
  752. X / 35, X / 36, \
  753. }
  754. // This kVmaxOverBase is generated with
  755. // for (int base = 2; base < 37; ++base) {
  756. // y_absl::uint128 max = std::numeric_limits<y_absl::uint128>::max();
  757. // auto result = max / base;
  758. // std::cout << " MakeUint128(" << y_absl::Uint128High64(result) << "u, "
  759. // << y_absl::Uint128Low64(result) << "u),\n";
  760. // }
  761. // See https://godbolt.org/z/aneYsb
  762. //
  763. // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting
  764. // array to avoid a static initializer.
  765. template <>
  766. Y_ABSL_CONST_INIT const uint128 LookupTables<uint128>::kVmaxOverBase[] = {
  767. 0,
  768. 0,
  769. MakeUint128(9223372036854775807u, 18446744073709551615u),
  770. MakeUint128(6148914691236517205u, 6148914691236517205u),
  771. MakeUint128(4611686018427387903u, 18446744073709551615u),
  772. MakeUint128(3689348814741910323u, 3689348814741910323u),
  773. MakeUint128(3074457345618258602u, 12297829382473034410u),
  774. MakeUint128(2635249153387078802u, 5270498306774157604u),
  775. MakeUint128(2305843009213693951u, 18446744073709551615u),
  776. MakeUint128(2049638230412172401u, 14347467612885206812u),
  777. MakeUint128(1844674407370955161u, 11068046444225730969u),
  778. MakeUint128(1676976733973595601u, 8384883669867978007u),
  779. MakeUint128(1537228672809129301u, 6148914691236517205u),
  780. MakeUint128(1418980313362273201u, 4256940940086819603u),
  781. MakeUint128(1317624576693539401u, 2635249153387078802u),
  782. MakeUint128(1229782938247303441u, 1229782938247303441u),
  783. MakeUint128(1152921504606846975u, 18446744073709551615u),
  784. MakeUint128(1085102592571150095u, 1085102592571150095u),
  785. MakeUint128(1024819115206086200u, 16397105843297379214u),
  786. MakeUint128(970881267037344821u, 16504981539634861972u),
  787. MakeUint128(922337203685477580u, 14757395258967641292u),
  788. MakeUint128(878416384462359600u, 14054662151397753612u),
  789. MakeUint128(838488366986797800u, 13415813871788764811u),
  790. MakeUint128(802032351030850070u, 4812194106185100421u),
  791. MakeUint128(768614336404564650u, 12297829382473034410u),
  792. MakeUint128(737869762948382064u, 11805916207174113034u),
  793. MakeUint128(709490156681136600u, 11351842506898185609u),
  794. MakeUint128(683212743470724133u, 17080318586768103348u),
  795. MakeUint128(658812288346769700u, 10540996613548315209u),
  796. MakeUint128(636094623231363848u, 15266270957552732371u),
  797. MakeUint128(614891469123651720u, 9838263505978427528u),
  798. MakeUint128(595056260442243600u, 9520900167075897608u),
  799. MakeUint128(576460752303423487u, 18446744073709551615u),
  800. MakeUint128(558992244657865200u, 8943875914525843207u),
  801. MakeUint128(542551296285575047u, 9765923333140350855u),
  802. MakeUint128(527049830677415760u, 8432797290838652167u),
  803. MakeUint128(512409557603043100u, 8198552921648689607u),
  804. };
  805. // This kVmaxOverBase generated with
  806. // for (int base = 2; base < 37; ++base) {
  807. // y_absl::int128 max = std::numeric_limits<y_absl::int128>::max();
  808. // auto result = max / base;
  809. // std::cout << "\tMakeInt128(" << y_absl::Int128High64(result) << ", "
  810. // << y_absl::Int128Low64(result) << "u),\n";
  811. // }
  812. // See https://godbolt.org/z/7djYWz
  813. //
  814. // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
  815. // to avoid a static initializer.
  816. template <>
  817. Y_ABSL_CONST_INIT const int128 LookupTables<int128>::kVmaxOverBase[] = {
  818. 0,
  819. 0,
  820. MakeInt128(4611686018427387903, 18446744073709551615u),
  821. MakeInt128(3074457345618258602, 12297829382473034410u),
  822. MakeInt128(2305843009213693951, 18446744073709551615u),
  823. MakeInt128(1844674407370955161, 11068046444225730969u),
  824. MakeInt128(1537228672809129301, 6148914691236517205u),
  825. MakeInt128(1317624576693539401, 2635249153387078802u),
  826. MakeInt128(1152921504606846975, 18446744073709551615u),
  827. MakeInt128(1024819115206086200, 16397105843297379214u),
  828. MakeInt128(922337203685477580, 14757395258967641292u),
  829. MakeInt128(838488366986797800, 13415813871788764811u),
  830. MakeInt128(768614336404564650, 12297829382473034410u),
  831. MakeInt128(709490156681136600, 11351842506898185609u),
  832. MakeInt128(658812288346769700, 10540996613548315209u),
  833. MakeInt128(614891469123651720, 9838263505978427528u),
  834. MakeInt128(576460752303423487, 18446744073709551615u),
  835. MakeInt128(542551296285575047, 9765923333140350855u),
  836. MakeInt128(512409557603043100, 8198552921648689607u),
  837. MakeInt128(485440633518672410, 17475862806672206794u),
  838. MakeInt128(461168601842738790, 7378697629483820646u),
  839. MakeInt128(439208192231179800, 7027331075698876806u),
  840. MakeInt128(419244183493398900, 6707906935894382405u),
  841. MakeInt128(401016175515425035, 2406097053092550210u),
  842. MakeInt128(384307168202282325, 6148914691236517205u),
  843. MakeInt128(368934881474191032, 5902958103587056517u),
  844. MakeInt128(354745078340568300, 5675921253449092804u),
  845. MakeInt128(341606371735362066, 17763531330238827482u),
  846. MakeInt128(329406144173384850, 5270498306774157604u),
  847. MakeInt128(318047311615681924, 7633135478776366185u),
  848. MakeInt128(307445734561825860, 4919131752989213764u),
  849. MakeInt128(297528130221121800, 4760450083537948804u),
  850. MakeInt128(288230376151711743, 18446744073709551615u),
  851. MakeInt128(279496122328932600, 4471937957262921603u),
  852. MakeInt128(271275648142787523, 14106333703424951235u),
  853. MakeInt128(263524915338707880, 4216398645419326083u),
  854. MakeInt128(256204778801521550, 4099276460824344803u),
  855. };
  856. // This kVminOverBase generated with
  857. // for (int base = 2; base < 37; ++base) {
  858. // y_absl::int128 min = std::numeric_limits<y_absl::int128>::min();
  859. // auto result = min / base;
  860. // std::cout << "\tMakeInt128(" << y_absl::Int128High64(result) << ", "
  861. // << y_absl::Int128Low64(result) << "u),\n";
  862. // }
  863. //
  864. // See https://godbolt.org/z/7djYWz
  865. //
  866. // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
  867. // to avoid a static initializer.
  868. template <>
  869. Y_ABSL_CONST_INIT const int128 LookupTables<int128>::kVminOverBase[] = {
  870. 0,
  871. 0,
  872. MakeInt128(-4611686018427387904, 0u),
  873. MakeInt128(-3074457345618258603, 6148914691236517206u),
  874. MakeInt128(-2305843009213693952, 0u),
  875. MakeInt128(-1844674407370955162, 7378697629483820647u),
  876. MakeInt128(-1537228672809129302, 12297829382473034411u),
  877. MakeInt128(-1317624576693539402, 15811494920322472814u),
  878. MakeInt128(-1152921504606846976, 0u),
  879. MakeInt128(-1024819115206086201, 2049638230412172402u),
  880. MakeInt128(-922337203685477581, 3689348814741910324u),
  881. MakeInt128(-838488366986797801, 5030930201920786805u),
  882. MakeInt128(-768614336404564651, 6148914691236517206u),
  883. MakeInt128(-709490156681136601, 7094901566811366007u),
  884. MakeInt128(-658812288346769701, 7905747460161236407u),
  885. MakeInt128(-614891469123651721, 8608480567731124088u),
  886. MakeInt128(-576460752303423488, 0u),
  887. MakeInt128(-542551296285575048, 8680820740569200761u),
  888. MakeInt128(-512409557603043101, 10248191152060862009u),
  889. MakeInt128(-485440633518672411, 970881267037344822u),
  890. MakeInt128(-461168601842738791, 11068046444225730970u),
  891. MakeInt128(-439208192231179801, 11419412998010674810u),
  892. MakeInt128(-419244183493398901, 11738837137815169211u),
  893. MakeInt128(-401016175515425036, 16040647020617001406u),
  894. MakeInt128(-384307168202282326, 12297829382473034411u),
  895. MakeInt128(-368934881474191033, 12543785970122495099u),
  896. MakeInt128(-354745078340568301, 12770822820260458812u),
  897. MakeInt128(-341606371735362067, 683212743470724134u),
  898. MakeInt128(-329406144173384851, 13176245766935394012u),
  899. MakeInt128(-318047311615681925, 10813608594933185431u),
  900. MakeInt128(-307445734561825861, 13527612320720337852u),
  901. MakeInt128(-297528130221121801, 13686293990171602812u),
  902. MakeInt128(-288230376151711744, 0u),
  903. MakeInt128(-279496122328932601, 13974806116446630013u),
  904. MakeInt128(-271275648142787524, 4340410370284600381u),
  905. MakeInt128(-263524915338707881, 14230345428290225533u),
  906. MakeInt128(-256204778801521551, 14347467612885206813u),
  907. };
  908. template <typename IntType>
  909. Y_ABSL_CONST_INIT const IntType LookupTables<IntType>::kVmaxOverBase[] =
  910. X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
  911. template <typename IntType>
  912. Y_ABSL_CONST_INIT const IntType LookupTables<IntType>::kVminOverBase[] =
  913. X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
  914. #undef X_OVER_BASE_INITIALIZER
  915. template <typename IntType>
  916. inline bool safe_parse_positive_int(y_absl::string_view text, int base,
  917. y_absl::Nonnull<IntType*> value_p) {
  918. IntType value = 0;
  919. const IntType vmax = std::numeric_limits<IntType>::max();
  920. assert(vmax > 0);
  921. assert(base >= 0);
  922. const IntType base_inttype = static_cast<IntType>(base);
  923. assert(vmax >= base_inttype);
  924. const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
  925. assert(base < 2 ||
  926. std::numeric_limits<IntType>::max() / base_inttype == vmax_over_base);
  927. const char* start = text.data();
  928. const char* end = start + text.size();
  929. // loop over digits
  930. for (; start < end; ++start) {
  931. unsigned char c = static_cast<unsigned char>(start[0]);
  932. IntType digit = static_cast<IntType>(kAsciiToInt[c]);
  933. if (digit >= base_inttype) {
  934. *value_p = value;
  935. return false;
  936. }
  937. if (value > vmax_over_base) {
  938. *value_p = vmax;
  939. return false;
  940. }
  941. value *= base_inttype;
  942. if (value > vmax - digit) {
  943. *value_p = vmax;
  944. return false;
  945. }
  946. value += digit;
  947. }
  948. *value_p = value;
  949. return true;
  950. }
  951. template <typename IntType>
  952. inline bool safe_parse_negative_int(y_absl::string_view text, int base,
  953. y_absl::Nonnull<IntType*> value_p) {
  954. IntType value = 0;
  955. const IntType vmin = std::numeric_limits<IntType>::min();
  956. assert(vmin < 0);
  957. assert(vmin <= 0 - base);
  958. IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
  959. assert(base < 2 ||
  960. std::numeric_limits<IntType>::min() / base == vmin_over_base);
  961. // 2003 c++ standard [expr.mul]
  962. // "... the sign of the remainder is implementation-defined."
  963. // Although (vmin/base)*base + vmin%base is always vmin.
  964. // 2011 c++ standard tightens the spec but we cannot rely on it.
  965. // TODO(junyer): Handle this in the lookup table generation.
  966. if (vmin % base > 0) {
  967. vmin_over_base += 1;
  968. }
  969. const char* start = text.data();
  970. const char* end = start + text.size();
  971. // loop over digits
  972. for (; start < end; ++start) {
  973. unsigned char c = static_cast<unsigned char>(start[0]);
  974. int digit = kAsciiToInt[c];
  975. if (digit >= base) {
  976. *value_p = value;
  977. return false;
  978. }
  979. if (value < vmin_over_base) {
  980. *value_p = vmin;
  981. return false;
  982. }
  983. value *= base;
  984. if (value < vmin + digit) {
  985. *value_p = vmin;
  986. return false;
  987. }
  988. value -= digit;
  989. }
  990. *value_p = value;
  991. return true;
  992. }
  993. // Input format based on POSIX.1-2008 strtol
  994. // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
  995. template <typename IntType>
  996. inline bool safe_int_internal(y_absl::string_view text,
  997. y_absl::Nonnull<IntType*> value_p, int base) {
  998. *value_p = 0;
  999. bool negative;
  1000. if (!safe_parse_sign_and_base(&text, &base, &negative)) {
  1001. return false;
  1002. }
  1003. if (!negative) {
  1004. return safe_parse_positive_int(text, base, value_p);
  1005. } else {
  1006. return safe_parse_negative_int(text, base, value_p);
  1007. }
  1008. }
  1009. template <typename IntType>
  1010. inline bool safe_uint_internal(y_absl::string_view text,
  1011. y_absl::Nonnull<IntType*> value_p, int base) {
  1012. *value_p = 0;
  1013. bool negative;
  1014. if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
  1015. return false;
  1016. }
  1017. return safe_parse_positive_int(text, base, value_p);
  1018. }
  1019. } // anonymous namespace
  1020. namespace numbers_internal {
  1021. // Digit conversion.
  1022. Y_ABSL_CONST_INIT Y_ABSL_DLL const char kHexChar[] =
  1023. "0123456789abcdef";
  1024. Y_ABSL_CONST_INIT Y_ABSL_DLL const char kHexTable[513] =
  1025. "000102030405060708090a0b0c0d0e0f"
  1026. "101112131415161718191a1b1c1d1e1f"
  1027. "202122232425262728292a2b2c2d2e2f"
  1028. "303132333435363738393a3b3c3d3e3f"
  1029. "404142434445464748494a4b4c4d4e4f"
  1030. "505152535455565758595a5b5c5d5e5f"
  1031. "606162636465666768696a6b6c6d6e6f"
  1032. "707172737475767778797a7b7c7d7e7f"
  1033. "808182838485868788898a8b8c8d8e8f"
  1034. "909192939495969798999a9b9c9d9e9f"
  1035. "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
  1036. "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
  1037. "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
  1038. "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
  1039. "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
  1040. "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
  1041. bool safe_strto32_base(y_absl::string_view text, y_absl::Nonnull<int32_t*> value,
  1042. int base) {
  1043. return safe_int_internal<int32_t>(text, value, base);
  1044. }
  1045. bool safe_strto64_base(y_absl::string_view text, y_absl::Nonnull<int64_t*> value,
  1046. int base) {
  1047. return safe_int_internal<int64_t>(text, value, base);
  1048. }
  1049. bool safe_strto128_base(y_absl::string_view text, y_absl::Nonnull<int128*> value,
  1050. int base) {
  1051. return safe_int_internal<y_absl::int128>(text, value, base);
  1052. }
  1053. bool safe_strtou32_base(y_absl::string_view text, y_absl::Nonnull<uint32_t*> value,
  1054. int base) {
  1055. return safe_uint_internal<uint32_t>(text, value, base);
  1056. }
  1057. bool safe_strtou64_base(y_absl::string_view text, y_absl::Nonnull<uint64_t*> value,
  1058. int base) {
  1059. return safe_uint_internal<uint64_t>(text, value, base);
  1060. }
  1061. bool safe_strtou128_base(y_absl::string_view text, y_absl::Nonnull<uint128*> value,
  1062. int base) {
  1063. return safe_uint_internal<y_absl::uint128>(text, value, base);
  1064. }
  1065. } // namespace numbers_internal
  1066. Y_ABSL_NAMESPACE_END
  1067. } // namespace y_absl