protozero  1.6.4
Minimalistic protocol buffer decoder and encoder in C++.
pbf_reader.hpp
Go to the documentation of this file.
1 #ifndef PROTOZERO_PBF_READER_HPP
2 #define PROTOZERO_PBF_READER_HPP
3 
4 /*****************************************************************************
5 
6 protozero - Minimalistic protocol buffer decoder and encoder in C++.
7 
8 This file is from https://github.com/mapbox/protozero where you can find more
9 documentation.
10 
11 *****************************************************************************/
12 
19 #include <protozero/config.hpp>
20 #include <protozero/data_view.hpp>
21 #include <protozero/exception.hpp>
22 #include <protozero/iterators.hpp>
23 #include <protozero/types.hpp>
24 #include <protozero/varint.hpp>
25 
26 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
27 # include <protozero/byteswap.hpp>
28 #endif
29 
30 #include <cstddef>
31 #include <cstdint>
32 #include <cstring>
33 #include <string>
34 #include <utility>
35 
36 namespace protozero {
37 
62 class pbf_reader {
63 
64  // A pointer to the next unread data.
65  const char* m_data = nullptr;
66 
67  // A pointer to one past the end of data.
68  const char* m_end = nullptr;
69 
70  // The wire type of the current field.
71  pbf_wire_type m_wire_type = pbf_wire_type::unknown;
72 
73  // The tag of the current field.
74  pbf_tag_type m_tag = 0;
75 
76  template <typename T>
77  T get_fixed() {
78  T result;
79  const char* data = m_data;
80  skip_bytes(sizeof(T));
81  std::memcpy(&result, data, sizeof(T));
82 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
83  byteswap_inplace(&result);
84 #endif
85  return result;
86  }
87 
88  template <typename T>
90  protozero_assert(tag() != 0 && "call next() before accessing field value");
91  const auto len = get_len_and_skip();
92  if (len % sizeof(T) != 0) {
94  }
95  return {const_fixed_iterator<T>(m_data - len),
96  const_fixed_iterator<T>(m_data)};
97  }
98 
99  template <typename T>
100  T get_varint() {
101  const auto val = static_cast<T>(decode_varint(&m_data, m_end));
102  assert(m_data <= m_end);
103  return val;
104  }
105 
106  template <typename T>
107  T get_svarint() {
108  protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint");
109  return static_cast<T>(decode_zigzag64(decode_varint(&m_data, m_end)));
110  }
111 
112  pbf_length_type get_length() {
113  return get_varint<pbf_length_type>();
114  }
115 
116  void skip_bytes(pbf_length_type len) {
117  if (m_data + len > m_end) {
118  throw end_of_buffer_exception{};
119  }
120  m_data += len;
121 
122 #ifndef NDEBUG
123  // In debug builds reset the tag to zero so that we can detect (some)
124  // wrong code.
125  m_tag = 0;
126 #endif
127  }
128 
129  pbf_length_type get_len_and_skip() {
130  const auto len = get_length();
131  skip_bytes(len);
132  return len;
133  }
134 
135  template <typename T>
136  iterator_range<T> get_packed() {
137  protozero_assert(tag() != 0 && "call next() before accessing field value");
138  const auto len = get_len_and_skip();
139  return {T{m_data - len, m_data},
140  T{m_data, m_data}};
141  }
142 
143 public:
144 
155  explicit pbf_reader(const data_view& view) noexcept
156  : m_data(view.data()),
157  m_end(view.data() + view.size()) {
158  }
159 
170  pbf_reader(const char* data, std::size_t size) noexcept
171  : m_data(data),
172  m_end(data + size) {
173  }
174 
175 #ifndef PROTOZERO_STRICT_API
176 
187  explicit pbf_reader(const std::pair<const char*, std::size_t>& data) noexcept
188  : m_data(data.first),
189  m_end(data.first + data.second) {
190  }
191 #endif
192 
203  explicit pbf_reader(const std::string& data) noexcept
204  : m_data(data.data()),
205  m_end(data.data() + data.size()) {
206  }
207 
212  pbf_reader() noexcept = default;
213 
215  pbf_reader(const pbf_reader&) noexcept = default;
216 
218  pbf_reader(pbf_reader&&) noexcept = default;
219 
221  pbf_reader& operator=(const pbf_reader& other) noexcept = default;
222 
224  pbf_reader& operator=(pbf_reader&& other) noexcept = default;
225 
226  ~pbf_reader() = default;
227 
233  void swap(pbf_reader& other) noexcept {
234  using std::swap;
235  swap(m_data, other.m_data);
236  swap(m_end, other.m_end);
237  swap(m_wire_type, other.m_wire_type);
238  swap(m_tag, other.m_tag);
239  }
240 
246  operator bool() const noexcept { // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
247  return m_data < m_end;
248  }
249 
253  data_view data() const noexcept {
254  return {m_data, static_cast<std::size_t>(m_end - m_data)};
255  }
256 
266  std::size_t length() const noexcept {
267  return std::size_t(m_end - m_data);
268  }
269 
285  bool next() {
286  if (m_data == m_end) {
287  return false;
288  }
289 
290  const auto value = get_varint<uint32_t>();
291  m_tag = pbf_tag_type(value >> 3u);
292 
293  // tags 0 and 19000 to 19999 are not allowed as per
294  // https://developers.google.com/protocol-buffers/docs/proto#assigning-tags
295  if (m_tag == 0 || (m_tag >= 19000 && m_tag <= 19999)) {
296  throw invalid_tag_exception{};
297  }
298 
299  m_wire_type = pbf_wire_type(value & 0x07u);
300  switch (m_wire_type) {
301  case pbf_wire_type::varint:
302  case pbf_wire_type::fixed64:
303  case pbf_wire_type::length_delimited:
304  case pbf_wire_type::fixed32:
305  break;
306  default:
308  }
309 
310  return true;
311  }
312 
341  bool next(pbf_tag_type next_tag) {
342  while (next()) {
343  if (m_tag == next_tag) {
344  return true;
345  }
346  skip();
347  }
348  return false;
349  }
350 
379  bool next(pbf_tag_type next_tag, pbf_wire_type type) {
380  while (next()) {
381  if (m_tag == next_tag && m_wire_type == type) {
382  return true;
383  }
384  skip();
385  }
386  return false;
387  }
388 
398  pbf_tag_type tag() const noexcept {
399  return m_tag;
400  }
401 
417  pbf_wire_type wire_type() const noexcept {
418  return m_wire_type;
419  }
420 
443  uint32_t tag_and_type() const noexcept {
445  }
446 
453  bool has_wire_type(pbf_wire_type type) const noexcept {
454  return wire_type() == type;
455  }
456 
463  void skip() {
464  protozero_assert(tag() != 0 && "call next() before calling skip()");
465  switch (wire_type()) {
466  case pbf_wire_type::varint:
467  skip_varint(&m_data, m_end);
468  break;
469  case pbf_wire_type::fixed64:
470  skip_bytes(8);
471  break;
472  case pbf_wire_type::length_delimited:
473  skip_bytes(get_length());
474  break;
475  case pbf_wire_type::fixed32:
476  skip_bytes(4);
477  break;
478  default:
479  break;
480  }
481  assert(m_data <= m_end);
482  }
483 
485 
496  bool get_bool() {
497  protozero_assert(tag() != 0 && "call next() before accessing field value");
498  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
499  const auto data = m_data;
500  skip_varint(&m_data, m_end);
501  return data[0] != 0;
502  }
503 
511  int32_t get_enum() {
512  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
513  return get_varint<int32_t>();
514  }
515 
523  int32_t get_int32() {
524  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
525  return get_varint<int32_t>();
526  }
527 
535  int32_t get_sint32() {
536  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
537  return get_svarint<int32_t>();
538  }
539 
547  uint32_t get_uint32() {
548  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
549  return get_varint<uint32_t>();
550  }
551 
559  int64_t get_int64() {
560  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
561  return get_varint<int64_t>();
562  }
563 
571  int64_t get_sint64() {
572  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
573  return get_svarint<int64_t>();
574  }
575 
583  uint64_t get_uint64() {
584  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
585  return get_varint<uint64_t>();
586  }
587 
595  uint32_t get_fixed32() {
596  protozero_assert(tag() != 0 && "call next() before accessing field value");
597  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
598  return get_fixed<uint32_t>();
599  }
600 
608  int32_t get_sfixed32() {
609  protozero_assert(tag() != 0 && "call next() before accessing field value");
610  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
611  return get_fixed<int32_t>();
612  }
613 
621  uint64_t get_fixed64() {
622  protozero_assert(tag() != 0 && "call next() before accessing field value");
623  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
624  return get_fixed<uint64_t>();
625  }
626 
634  int64_t get_sfixed64() {
635  protozero_assert(tag() != 0 && "call next() before accessing field value");
636  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
637  return get_fixed<int64_t>();
638  }
639 
647  float get_float() {
648  protozero_assert(tag() != 0 && "call next() before accessing field value");
649  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
650  return get_fixed<float>();
651  }
652 
660  double get_double() {
661  protozero_assert(tag() != 0 && "call next() before accessing field value");
662  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
663  return get_fixed<double>();
664  }
665 
676  protozero_assert(tag() != 0 && "call next() before accessing field value");
677  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
678  const auto len = get_len_and_skip();
679  return {m_data - len, len};
680  }
681 
682 #ifndef PROTOZERO_STRICT_API
683 
691  std::pair<const char*, pbf_length_type> get_data() {
692  protozero_assert(tag() != 0 && "call next() before accessing field value");
693  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
694  const auto len = get_len_and_skip();
695  return {m_data - len, len};
696  }
697 #endif
698 
706  std::string get_bytes() {
707  return std::string(get_view());
708  }
709 
717  std::string get_string() {
718  return std::string(get_view());
719  }
720 
729  return pbf_reader{get_view()};
730  }
731 
733 
736 
739 
742 
745 
748 
751 
754 
757 
760 
763 
766 
769 
772 
775 
777 
791  return get_packed<pbf_reader::const_bool_iterator>();
792  }
793 
804  return get_packed<pbf_reader::const_enum_iterator>();
805  }
806 
817  return get_packed<pbf_reader::const_int32_iterator>();
818  }
819 
830  return get_packed<pbf_reader::const_sint32_iterator>();
831  }
832 
843  return get_packed<pbf_reader::const_uint32_iterator>();
844  }
845 
856  return get_packed<pbf_reader::const_int64_iterator>();
857  }
858 
869  return get_packed<pbf_reader::const_sint64_iterator>();
870  }
871 
882  return get_packed<pbf_reader::const_uint64_iterator>();
883  }
884 
895  return packed_fixed<uint32_t>();
896  }
897 
908  return packed_fixed<int32_t>();
909  }
910 
921  return packed_fixed<uint64_t>();
922  }
923 
934  return packed_fixed<int64_t>();
935  }
936 
947  return packed_fixed<float>();
948  }
949 
960  return packed_fixed<double>();
961  }
962 
964 
965 }; // class pbf_reader
966 
973 inline void swap(pbf_reader& lhs, pbf_reader& rhs) noexcept {
974  lhs.swap(rhs);
975 }
976 
977 } // end namespace protozero
978 
979 #endif // PROTOZERO_PBF_READER_HPP
pbf_reader(const data_view &view) noexcept
Definition: pbf_reader.hpp:155
int64_t get_sfixed64()
Definition: pbf_reader.hpp:634
uint32_t get_uint32()
Definition: pbf_reader.hpp:547
uint64_t get_fixed64()
Definition: pbf_reader.hpp:621
int32_t get_sfixed32()
Definition: pbf_reader.hpp:608
Definition: exception.hpp:52
uint64_t get_uint64()
Definition: pbf_reader.hpp:583
constexpr int64_t decode_zigzag64(uint64_t value) noexcept
Definition: varint.hpp:199
Definition: iterators.hpp:160
int32_t get_int32()
Definition: pbf_reader.hpp:523
Definition: exception.hpp:92
pbf_reader(const std::string &data) noexcept
Definition: pbf_reader.hpp:203
iterator_range< pbf_reader::const_sint64_iterator > get_packed_sint64()
Definition: pbf_reader.hpp:868
constexpr uint32_t tag_and_type(T tag, pbf_wire_type wire_type) noexcept
Definition: types.hpp:55
void swap(pbf_reader &lhs, pbf_reader &rhs) noexcept
Definition: pbf_reader.hpp:973
Contains macro checks for different configurations.
iterator_range< pbf_reader::const_float_iterator > get_packed_float()
Definition: pbf_reader.hpp:946
Contains the declaration of low-level types used in the pbf format.
bool has_wire_type(pbf_wire_type type) const noexcept
Definition: pbf_reader.hpp:453
std::size_t length() const noexcept
Definition: pbf_reader.hpp:266
void skip()
Definition: pbf_reader.hpp:463
iterator_range< pbf_reader::const_int32_iterator > get_packed_int32()
Definition: pbf_reader.hpp:816
pbf_reader get_message()
Definition: pbf_reader.hpp:728
pbf_reader() noexcept=default
iterator_range< pbf_reader::const_int64_iterator > get_packed_int64()
Definition: pbf_reader.hpp:855
void skip_varint(const char **data, const char *end)
Definition: varint.hpp:112
Contains the iterators for access to packed repeated fields.
constexpr std::size_t size() const noexcept
Return length of data in bytes.
Definition: data_view.hpp:99
pbf_wire_type
Definition: types.hpp:40
iterator_range< pbf_reader::const_uint64_iterator > get_packed_uint64()
Definition: pbf_reader.hpp:881
iterator_range< pbf_reader::const_sint32_iterator > get_packed_sint32()
Definition: pbf_reader.hpp:829
iterator_range< pbf_reader::const_sfixed32_iterator > get_packed_sfixed32()
Definition: pbf_reader.hpp:907
pbf_wire_type wire_type() const noexcept
Definition: pbf_reader.hpp:417
Contains the implementation of the data_view class.
Definition: exception.hpp:80
void swap(pbf_reader &other) noexcept
Definition: pbf_reader.hpp:233
data_view data() const noexcept
Definition: pbf_reader.hpp:253
int64_t get_sint64()
Definition: pbf_reader.hpp:571
bool next(pbf_tag_type next_tag)
Definition: pbf_reader.hpp:341
uint32_t tag_and_type() const noexcept
Definition: pbf_reader.hpp:443
std::pair< const char *, pbf_length_type > get_data()
Definition: pbf_reader.hpp:691
iterator_range< pbf_reader::const_bool_iterator > get_packed_bool()
Definition: pbf_reader.hpp:790
Contains functions to swap bytes in values (for different endianness).
int32_t get_sint32()
Definition: pbf_reader.hpp:535
std::string get_bytes()
Definition: pbf_reader.hpp:706
iterator_range< pbf_reader::const_double_iterator > get_packed_double()
Definition: pbf_reader.hpp:959
double get_double()
Definition: pbf_reader.hpp:660
pbf_reader & operator=(const pbf_reader &other) noexcept=default
pbf_reader messages can be copied trivially.
bool get_bool()
Definition: pbf_reader.hpp:496
std::string get_string()
Definition: pbf_reader.hpp:717
uint32_t pbf_length_type
Definition: types.hpp:62
Contains the exceptions used in the protozero library.
pbf_reader(const std::pair< const char *, std::size_t > &data) noexcept
Definition: pbf_reader.hpp:187
data_view get_view()
Definition: pbf_reader.hpp:675
iterator_range< pbf_reader::const_fixed32_iterator > get_packed_fixed32()
Definition: pbf_reader.hpp:894
pbf_reader(const char *data, std::size_t size) noexcept
Definition: pbf_reader.hpp:170
uint32_t pbf_tag_type
Definition: types.hpp:33
uint32_t get_fixed32()
Definition: pbf_reader.hpp:595
iterator_range< pbf_reader::const_enum_iterator > get_packed_enum()
Definition: pbf_reader.hpp:803
Definition: data_view.hpp:39
pbf_tag_type tag() const noexcept
Definition: pbf_reader.hpp:398
Definition: iterators.hpp:374
iterator_range< pbf_reader::const_sfixed64_iterator > get_packed_sfixed64()
Definition: pbf_reader.hpp:933
void byteswap_inplace(uint32_t *ptr) noexcept
byteswap the data pointed to by ptr in-place.
Definition: byteswap.hpp:55
Definition: iterators.hpp:289
int32_t get_enum()
Definition: pbf_reader.hpp:511
Definition: pbf_reader.hpp:62
bool next(pbf_tag_type next_tag, pbf_wire_type type)
Definition: pbf_reader.hpp:379
constexpr const char * data() const noexcept
Return pointer to data.
Definition: data_view.hpp:94
Definition: iterators.hpp:39
float get_float()
Definition: pbf_reader.hpp:647
Definition: exception.hpp:67
Contains low-level varint and zigzag encoding and decoding functions.
uint64_t decode_varint(const char **data, const char *end)
Definition: varint.hpp:89
bool next()
Definition: pbf_reader.hpp:285
iterator_range< pbf_reader::const_fixed64_iterator > get_packed_fixed64()
Definition: pbf_reader.hpp:920
int64_t get_int64()
Definition: pbf_reader.hpp:559
iterator_range< pbf_reader::const_uint32_iterator > get_packed_uint32()
Definition: pbf_reader.hpp:842
All parts of the protozero header-only library are in this namespace.
Definition: byteswap.hpp:23