#pragma once #include "byte_reader.h" #include "byte_writer.h" #include "traits.h" #include "zigzag.h" #include #include #include namespace NYsonPull { namespace NDetail { namespace NVarInt { namespace NImpl { template constexpr inline size_t max_size() { return (8 * sizeof(T) - 1) / 7 + 1; } template inline size_t write(ui64 value, T&& consume) { auto stop = false; auto nwritten = size_t{0}; while (!stop) { ++nwritten; auto byte = static_cast(value | 0x80); value >>= 7; if (value == 0) { stop = true; byte &= 0x7F; } consume(byte); } return nwritten; } template inline bool read_fast(byte_reader& reader, ui64* value) { auto& buf = reader.stream().buffer(); auto* ptr = buf.pos(); ui32 b; // Splitting into 32-bit pieces gives better performance on 32-bit // processors. ui32 part0 = 0, part1 = 0, part2 = 0; b = *(ptr++); part0 = (b & 0x7F); if (!(b & 0x80)) goto done; b = *(ptr++); part0 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; b = *(ptr++); part1 = (b & 0x7F); if (!(b & 0x80)) goto done; b = *(ptr++); part1 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; b = *(ptr++); part2 = (b & 0x7F); if (!(b & 0x80)) goto done; b = *(ptr++); part2 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; // We have overrun the maximum size of a Varint (10 bytes). The data // must be corrupt. return false; done: reader.advance(ptr - buf.pos()); *value = (static_cast(part0)) | (static_cast(part1) << 28) | (static_cast(part2) << 56); return true; } template inline bool read_fast(byte_reader& reader, ui32* value) { // Fast path: We have enough bytes left in the buffer to guarantee that // this read won't cross the end, so we can skip the checks. auto& buf = reader.stream().buffer(); auto* ptr = buf.pos(); ui32 b; ui32 result; b = *(ptr++); result = (b & 0x7F); if (!(b & 0x80)) goto done; b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done; // FIXME // If the input is larger than 32 bits, we still need to read it all // and discard the high-order bits. for (size_t i = 0; i < max_size() - max_size(); i++) { b = *(ptr++); if (!(b & 0x80)) goto done; } // We have overrun the maximum size of a Varint (10 bytes). Assume // the data is corrupt. return false; done: reader.advance(ptr - buf.pos()); *value = result; return true; } template inline bool read_slow(byte_reader& reader, ui64* value) { // Slow path: This read might cross the end of the buffer, so we // need to check and refresh the buffer if and when it does. auto& buf = reader.stream().buffer(); ui64 result = 0; int count = 0; ui32 b; do { if (count == max_size()) { return false; } reader.fill_buffer(); if (reader.stream().at_end()) { return false; } b = *buf.pos(); result |= static_cast(b & 0x7F) << (7 * count); reader.advance(1); ++count; } while (b & 0x80); *value = result; return true; } template inline bool read_slow(byte_reader& reader, ui32* value) { ui64 result; // fallback to 64-bit reading if (read_slow(reader, &result) && result <= std::numeric_limits::max()) { *value = static_cast(result); return true; } return false; } // Following functions is an adaptation // of Protobuf code from coded_stream.cc template inline bool read_dispatch(byte_reader& reader, T* value) { auto& buf = reader.stream().buffer(); // NOTE: checking for 64-bit max_size(), since 32-bit // read_fast() might fallback to 64-bit reading if (buf.available() >= max_size() || // Optimization: If the Varint ends at exactly the end of the buffer, // we can detect that and still use the fast path. (!buf.is_empty() && !(buf.end()[-1] & 0x80))) { return read_fast(reader, value); } else { // Really slow case: we will incur the cost of an extra function call here, // but moving this out of line reduces the size of this function, which // improves the common case. In micro benchmarks, this is worth about 10-15% return read_slow(reader, value); } } } // Various functions to read/write varints. // Returns the number of bytes written. template inline NTraits::if_unsigned write(ui8* data, T value) { return NImpl::write( static_cast(value), [&](ui8 byte) { *data++ = byte; }); } template inline NTraits::if_signed write(ui8* data, T value) { return NImpl::write( static_cast(NZigZag::encode(value)), [&](ui8 byte) { *data++ = byte; }); } template inline void write(byte_writer& stream, T value) { ui8 data[NImpl::max_size()]; auto size = write(data, value); stream.write(data, size); } template inline NTraits::if_unsigned read(byte_reader& reader) { auto value = T{}; auto& buf = reader.stream().buffer(); if (!buf.is_empty() && *buf.pos() < 0x80) { value = *buf.pos(); reader.advance(1); return value; } if (Y_UNLIKELY(!NImpl::read_dispatch(reader, &value))) { reader.fail("Error parsing varint value"); } return value; } template inline NTraits::if_signed read(byte_reader& reader) { return NZigZag::decode( read>(reader)); } } } // namespace NDetail }