proto.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. // Copyright 2020 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // -----------------------------------------------------------------------------
  15. // File: internal/proto.h
  16. // -----------------------------------------------------------------------------
  17. //
  18. // Declares functions for serializing and deserializing data to and from memory
  19. // buffers in protocol buffer wire format. This library takes no steps to
  20. // ensure that the encoded data matches with any message specification.
  21. #ifndef ABSL_LOG_INTERNAL_PROTO_H_
  22. #define ABSL_LOG_INTERNAL_PROTO_H_
  23. #include <cstddef>
  24. #include <cstdint>
  25. #include <limits>
  26. #include "absl/base/attributes.h"
  27. #include "absl/base/casts.h"
  28. #include "absl/base/config.h"
  29. #include "absl/strings/string_view.h"
  30. #include "absl/types/span.h"
  31. namespace absl {
  32. ABSL_NAMESPACE_BEGIN
  33. namespace log_internal {
  34. // absl::Span<char> represents a view into the available space in a mutable
  35. // buffer during encoding. Encoding functions shrink the span as they go so
  36. // that the same view can be passed to a series of Encode functions. If the
  37. // data do not fit, nothing is encoded, the view is set to size zero (so that
  38. // all subsequent encode calls fail), and false is returned. Otherwise true is
  39. // returned.
  40. // In particular, attempting to encode a series of data into an insufficient
  41. // buffer has consistent and efficient behavior without any caller-side error
  42. // checking. Individual values will be encoded in their entirety or not at all
  43. // (unless one of the `Truncate` functions is used). Once a value is omitted
  44. // because it does not fit, no subsequent values will be encoded to preserve
  45. // ordering; the decoded sequence will be a prefix of the original sequence.
  46. // There are two ways to encode a message-typed field:
  47. //
  48. // * Construct its contents in a separate buffer and use `EncodeBytes` to copy
  49. // it into the primary buffer with type, tag, and length.
  50. // * Use `EncodeMessageStart` to write type and tag fields and reserve space for
  51. // the length field, then encode the contents directly into the buffer, then
  52. // use `EncodeMessageLength` to write the actual length into the reserved
  53. // bytes. This works fine if the actual length takes fewer bytes to encode
  54. // than were reserved, although you don't get your extra bytes back.
  55. // This approach will always produce a valid encoding, but your protocol may
  56. // require that the whole message field by omitted if the buffer is too small
  57. // to contain all desired subfields. In this case, operate on a copy of the
  58. // buffer view and assign back only if everything fit, i.e. if the last
  59. // `Encode` call returned true.
  60. // Encodes the specified integer as a varint field and returns true if it fits.
  61. // Used for int32_t, int64_t, uint32_t, uint64_t, bool, and enum field types.
  62. // Consumes up to kMaxVarintSize * 2 bytes (20).
  63. bool EncodeVarint(uint64_t tag, uint64_t value, absl::Span<char> *buf);
  64. inline bool EncodeVarint(uint64_t tag, int64_t value, absl::Span<char> *buf) {
  65. return EncodeVarint(tag, static_cast<uint64_t>(value), buf);
  66. }
  67. inline bool EncodeVarint(uint64_t tag, uint32_t value, absl::Span<char> *buf) {
  68. return EncodeVarint(tag, static_cast<uint64_t>(value), buf);
  69. }
  70. inline bool EncodeVarint(uint64_t tag, int32_t value, absl::Span<char> *buf) {
  71. return EncodeVarint(tag, static_cast<uint64_t>(value), buf);
  72. }
  73. // Encodes the specified integer as a varint field using ZigZag encoding and
  74. // returns true if it fits.
  75. // Used for sint32 and sint64 field types.
  76. // Consumes up to kMaxVarintSize * 2 bytes (20).
  77. inline bool EncodeVarintZigZag(uint64_t tag, int64_t value,
  78. absl::Span<char> *buf) {
  79. if (value < 0)
  80. return EncodeVarint(tag, 2 * static_cast<uint64_t>(-(value + 1)) + 1, buf);
  81. return EncodeVarint(tag, 2 * static_cast<uint64_t>(value), buf);
  82. }
  83. // Encodes the specified integer as a 64-bit field and returns true if it fits.
  84. // Used for fixed64 and sfixed64 field types.
  85. // Consumes up to kMaxVarintSize + 8 bytes (18).
  86. bool Encode64Bit(uint64_t tag, uint64_t value, absl::Span<char> *buf);
  87. inline bool Encode64Bit(uint64_t tag, int64_t value, absl::Span<char> *buf) {
  88. return Encode64Bit(tag, static_cast<uint64_t>(value), buf);
  89. }
  90. inline bool Encode64Bit(uint64_t tag, uint32_t value, absl::Span<char> *buf) {
  91. return Encode64Bit(tag, static_cast<uint64_t>(value), buf);
  92. }
  93. inline bool Encode64Bit(uint64_t tag, int32_t value, absl::Span<char> *buf) {
  94. return Encode64Bit(tag, static_cast<uint64_t>(value), buf);
  95. }
  96. // Encodes the specified double as a 64-bit field and returns true if it fits.
  97. // Used for double field type.
  98. // Consumes up to kMaxVarintSize + 8 bytes (18).
  99. inline bool EncodeDouble(uint64_t tag, double value, absl::Span<char> *buf) {
  100. return Encode64Bit(tag, absl::bit_cast<uint64_t>(value), buf);
  101. }
  102. // Encodes the specified integer as a 32-bit field and returns true if it fits.
  103. // Used for fixed32 and sfixed32 field types.
  104. // Consumes up to kMaxVarintSize + 4 bytes (14).
  105. bool Encode32Bit(uint64_t tag, uint32_t value, absl::Span<char> *buf);
  106. inline bool Encode32Bit(uint64_t tag, int32_t value, absl::Span<char> *buf) {
  107. return Encode32Bit(tag, static_cast<uint32_t>(value), buf);
  108. }
  109. // Encodes the specified float as a 32-bit field and returns true if it fits.
  110. // Used for float field type.
  111. // Consumes up to kMaxVarintSize + 4 bytes (14).
  112. inline bool EncodeFloat(uint64_t tag, float value, absl::Span<char> *buf) {
  113. return Encode32Bit(tag, absl::bit_cast<uint32_t>(value), buf);
  114. }
  115. // Encodes the specified bytes as a length-delimited field and returns true if
  116. // they fit.
  117. // Used for string, bytes, message, and packed-repeated field type.
  118. // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
  119. bool EncodeBytes(uint64_t tag, absl::Span<const char> value,
  120. absl::Span<char> *buf);
  121. // Encodes as many of the specified bytes as will fit as a length-delimited
  122. // field and returns true as long as the field header (`tag_type` and `length`)
  123. // fits.
  124. // Used for string, bytes, message, and packed-repeated field type.
  125. // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
  126. bool EncodeBytesTruncate(uint64_t tag, absl::Span<const char> value,
  127. absl::Span<char> *buf);
  128. // Encodes the specified string as a length-delimited field and returns true if
  129. // it fits.
  130. // Used for string, bytes, message, and packed-repeated field type.
  131. // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
  132. inline bool EncodeString(uint64_t tag, absl::string_view value,
  133. absl::Span<char> *buf) {
  134. return EncodeBytes(tag, value, buf);
  135. }
  136. // Encodes as much of the specified string as will fit as a length-delimited
  137. // field and returns true as long as the field header (`tag_type` and `length`)
  138. // fits.
  139. // Used for string, bytes, message, and packed-repeated field type.
  140. // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
  141. inline bool EncodeStringTruncate(uint64_t tag, absl::string_view value,
  142. absl::Span<char> *buf) {
  143. return EncodeBytesTruncate(tag, value, buf);
  144. }
  145. // Encodes the header for a length-delimited field containing up to `max_size`
  146. // bytes or the number remaining in the buffer, whichever is less. If the
  147. // header fits, a non-nullptr `Span` is returned; this must be passed to
  148. // `EncodeMessageLength` after all contents are encoded to finalize the length
  149. // field. If the header does not fit, a nullptr `Span` is returned which is
  150. // safe to pass to `EncodeMessageLength` but need not be.
  151. // Used for string, bytes, message, and packed-repeated field type.
  152. // Consumes up to kMaxVarintSize * 2 bytes (20).
  153. ABSL_MUST_USE_RESULT absl::Span<char> EncodeMessageStart(uint64_t tag,
  154. uint64_t max_size,
  155. absl::Span<char> *buf);
  156. // Finalizes the length field in `msg` so that it encompasses all data encoded
  157. // since the call to `EncodeMessageStart` which returned `msg`. Does nothing if
  158. // `msg` is a `nullptr` `Span`.
  159. void EncodeMessageLength(absl::Span<char> msg, const absl::Span<char> *buf);
  160. enum class WireType : uint64_t {
  161. kVarint = 0,
  162. k64Bit = 1,
  163. kLengthDelimited = 2,
  164. k32Bit = 5,
  165. };
  166. constexpr size_t VarintSize(uint64_t value) {
  167. return value < 128 ? 1 : 1 + VarintSize(value >> 7);
  168. }
  169. constexpr size_t MinVarintSize() {
  170. return VarintSize((std::numeric_limits<uint64_t>::min)());
  171. }
  172. constexpr size_t MaxVarintSize() {
  173. return VarintSize((std::numeric_limits<uint64_t>::max)());
  174. }
  175. constexpr uint64_t MaxVarintForSize(size_t size) {
  176. return size >= 10 ? (std::numeric_limits<uint64_t>::max)()
  177. : (static_cast<uint64_t>(1) << size * 7) - 1;
  178. }
  179. // `BufferSizeFor` returns a number of bytes guaranteed to be sufficient to
  180. // store encoded fields of the specified WireTypes regardless of tag numbers and
  181. // data values. This only makes sense for `WireType::kLengthDelimited` if you
  182. // add in the length of the contents yourself, e.g. for string and bytes fields
  183. // by adding the lengths of any encoded strings to the return value or for
  184. // submessage fields by enumerating the fields you may encode into their
  185. // contents.
  186. constexpr size_t BufferSizeFor() { return 0; }
  187. template <typename... T>
  188. constexpr size_t BufferSizeFor(WireType type, T... tail) {
  189. // tag_type + data + ...
  190. return MaxVarintSize() +
  191. (type == WireType::kVarint ? MaxVarintSize() : //
  192. type == WireType::k64Bit ? 8 : //
  193. type == WireType::k32Bit ? 4 : MaxVarintSize()) + //
  194. BufferSizeFor(tail...);
  195. }
  196. // absl::Span<const char> represents a view into the un-processed space in a
  197. // buffer during decoding. Decoding functions shrink the span as they go so
  198. // that the same view can be decoded iteratively until all data are processed.
  199. // In general, if the buffer is exhausted but additional bytes are expected by
  200. // the decoder, it will return values as if the additional bytes were zeros.
  201. // Length-delimited fields are an exception - if the encoded length field
  202. // indicates more data bytes than are available in the buffer, the `bytes_value`
  203. // and `string_value` accessors will return truncated views.
  204. class ProtoField final {
  205. public:
  206. // Consumes bytes from `data` and returns true if there were any bytes to
  207. // decode.
  208. bool DecodeFrom(absl::Span<const char> *data);
  209. uint64_t tag() const { return tag_; }
  210. WireType type() const { return type_; }
  211. // These value accessors will return nonsense if the data were not encoded in
  212. // the corresponding wiretype from the corresponding C++ (or other language)
  213. // type.
  214. double double_value() const { return absl::bit_cast<double>(value_); }
  215. float float_value() const {
  216. return absl::bit_cast<float>(static_cast<uint32_t>(value_));
  217. }
  218. int32_t int32_value() const { return static_cast<int32_t>(value_); }
  219. int64_t int64_value() const { return static_cast<int64_t>(value_); }
  220. int32_t sint32_value() const {
  221. if (value_ % 2) return static_cast<int32_t>(0 - ((value_ - 1) / 2) - 1);
  222. return static_cast<int32_t>(value_ / 2);
  223. }
  224. int64_t sint64_value() const {
  225. if (value_ % 2) return 0 - ((value_ - 1) / 2) - 1;
  226. return value_ / 2;
  227. }
  228. uint32_t uint32_value() const { return static_cast<uint32_t>(value_); }
  229. uint64_t uint64_value() const { return value_; }
  230. bool bool_value() const { return value_ != 0; }
  231. // To decode an enum, call int32_value() and cast to the appropriate type.
  232. // Note that the official C++ proto compiler treats enum fields with values
  233. // that do not correspond to a defined enumerator as unknown fields.
  234. // To decode fields within a submessage field, call
  235. // `DecodeNextField(field.BytesValue())`.
  236. absl::Span<const char> bytes_value() const { return data_; }
  237. absl::string_view string_value() const {
  238. const auto data = bytes_value();
  239. return absl::string_view(data.data(), data.size());
  240. }
  241. // Returns the encoded length of a length-delimited field. This equals
  242. // `bytes_value().size()` except when the latter has been truncated due to
  243. // buffer underrun.
  244. uint64_t encoded_length() const { return value_; }
  245. private:
  246. uint64_t tag_;
  247. WireType type_;
  248. // For `kTypeVarint`, `kType64Bit`, and `kType32Bit`, holds the decoded value.
  249. // For `kTypeLengthDelimited`, holds the decoded length.
  250. uint64_t value_;
  251. absl::Span<const char> data_;
  252. };
  253. } // namespace log_internal
  254. ABSL_NAMESPACE_END
  255. } // namespace absl
  256. #endif // ABSL_LOG_INTERNAL_PROTO_H_