diff options
Diffstat (limited to 'include/google/protobuf/io')
-rw-r--r-- | include/google/protobuf/io/coded_stream.h | 1799 | ||||
-rw-r--r-- | include/google/protobuf/io/gzip_stream.h | 205 | ||||
-rw-r--r-- | include/google/protobuf/io/io_win32.h | 141 | ||||
-rw-r--r-- | include/google/protobuf/io/printer.h | 387 | ||||
-rw-r--r-- | include/google/protobuf/io/strtod.h | 55 | ||||
-rw-r--r-- | include/google/protobuf/io/tokenizer.h | 442 | ||||
-rw-r--r-- | include/google/protobuf/io/zero_copy_stream.h | 260 | ||||
-rw-r--r-- | include/google/protobuf/io/zero_copy_stream_impl.h | 336 | ||||
-rw-r--r-- | include/google/protobuf/io/zero_copy_stream_impl_lite.h | 413 |
9 files changed, 4038 insertions, 0 deletions
diff --git a/include/google/protobuf/io/coded_stream.h b/include/google/protobuf/io/coded_stream.h new file mode 100644 index 0000000000..c8fc994f91 --- /dev/null +++ b/include/google/protobuf/io/coded_stream.h @@ -0,0 +1,1799 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains the CodedInputStream and CodedOutputStream classes, +// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively, +// and allow you to read or write individual pieces of data in various +// formats. In particular, these implement the varint encoding for +// integers, a simple variable-length encoding in which smaller numbers +// take fewer bytes. +// +// Typically these classes will only be used internally by the protocol +// buffer library in order to encode and decode protocol buffers. Clients +// of the library only need to know about this class if they wish to write +// custom message parsing or serialization procedures. +// +// CodedOutputStream example: +// // Write some data to "myfile". First we write a 4-byte "magic number" +// // to identify the file type, then write a length-delimited string. The +// // string is composed of a varint giving the length followed by the raw +// // bytes. +// int fd = open("myfile", O_CREAT | O_WRONLY); +// ZeroCopyOutputStream* raw_output = new FileOutputStream(fd); +// CodedOutputStream* coded_output = new CodedOutputStream(raw_output); +// +// int magic_number = 1234; +// char text[] = "Hello world!"; +// coded_output->WriteLittleEndian32(magic_number); +// coded_output->WriteVarint32(strlen(text)); +// coded_output->WriteRaw(text, strlen(text)); +// +// delete coded_output; +// delete raw_output; +// close(fd); +// +// CodedInputStream example: +// // Read a file created by the above code. +// int fd = open("myfile", O_RDONLY); +// ZeroCopyInputStream* raw_input = new FileInputStream(fd); +// CodedInputStream* coded_input = new CodedInputStream(raw_input); +// +// coded_input->ReadLittleEndian32(&magic_number); +// if (magic_number != 1234) { +// cerr << "File not in expected format." << endl; +// return; +// } +// +// uint32_t size; +// coded_input->ReadVarint32(&size); +// +// char* text = new char[size + 1]; +// coded_input->ReadRaw(buffer, size); +// text[size] = '\0'; +// +// delete coded_input; +// delete raw_input; +// close(fd); +// +// cout << "Text is: " << text << endl; +// delete [] text; +// +// For those who are interested, varint encoding is defined as follows: +// +// The encoding operates on unsigned integers of up to 64 bits in length. +// Each byte of the encoded value has the format: +// * bits 0-6: Seven bits of the number being encoded. +// * bit 7: Zero if this is the last byte in the encoding (in which +// case all remaining bits of the number are zero) or 1 if +// more bytes follow. +// The first byte contains the least-significant 7 bits of the number, the +// second byte (if present) contains the next-least-significant 7 bits, +// and so on. So, the binary number 1011000101011 would be encoded in two +// bytes as "10101011 00101100". +// +// In theory, varint could be used to encode integers of any length. +// However, for practicality we set a limit at 64 bits. The maximum encoded +// length of a number is thus 10 bytes. + +#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ +#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ + + +#include <assert.h> + +#include <atomic> +#include <climits> +#include <cstddef> +#include <cstring> +#include <limits> +#include <string> +#include <type_traits> +#include <utility> + +#if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER) +// If MSVC has "/RTCc" set, it will complain about truncating casts at +// runtime. This file contains some intentional truncating casts. +#pragma runtime_checks("c", off) +#endif + + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/logging.h> +#include <google/protobuf/stubs/strutil.h> +#include <google/protobuf/port.h> +#include <google/protobuf/stubs/port.h> + + +// Must be included last. +#include <google/protobuf/port_def.inc> + +namespace google { +namespace protobuf { + +class DescriptorPool; +class MessageFactory; +class ZeroCopyCodedInputStream; + +namespace internal { +void MapTestForceDeterministic(); +class EpsCopyByteStream; +} // namespace internal + +namespace io { + +// Defined in this file. +class CodedInputStream; +class CodedOutputStream; + +// Defined in other files. +class ZeroCopyInputStream; // zero_copy_stream.h +class ZeroCopyOutputStream; // zero_copy_stream.h + +// Class which reads and decodes binary data which is composed of varint- +// encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream. +// Most users will not need to deal with CodedInputStream. +// +// Most methods of CodedInputStream that return a bool return false if an +// underlying I/O error occurs or if the data is malformed. Once such a +// failure occurs, the CodedInputStream is broken and is no longer useful. +// After a failure, callers also should assume writes to "out" args may have +// occurred, though nothing useful can be determined from those writes. +class PROTOBUF_EXPORT CodedInputStream { + public: + // Create a CodedInputStream that reads from the given ZeroCopyInputStream. + explicit CodedInputStream(ZeroCopyInputStream* input); + + // Create a CodedInputStream that reads from the given flat array. This is + // faster than using an ArrayInputStream. PushLimit(size) is implied by + // this constructor. + explicit CodedInputStream(const uint8_t* buffer, int size); + + // Destroy the CodedInputStream and position the underlying + // ZeroCopyInputStream at the first unread byte. If an error occurred while + // reading (causing a method to return false), then the exact position of + // the input stream may be anywhere between the last value that was read + // successfully and the stream's byte limit. + ~CodedInputStream(); + + // Return true if this CodedInputStream reads from a flat array instead of + // a ZeroCopyInputStream. + inline bool IsFlat() const; + + // Skips a number of bytes. Returns false if an underlying read error + // occurs. + inline bool Skip(int count); + + // Sets *data to point directly at the unread part of the CodedInputStream's + // underlying buffer, and *size to the size of that buffer, but does not + // advance the stream's current position. This will always either produce + // a non-empty buffer or return false. If the caller consumes any of + // this data, it should then call Skip() to skip over the consumed bytes. + // This may be useful for implementing external fast parsing routines for + // types of data not covered by the CodedInputStream interface. + bool GetDirectBufferPointer(const void** data, int* size); + + // Like GetDirectBufferPointer, but this method is inlined, and does not + // attempt to Refresh() if the buffer is currently empty. + PROTOBUF_ALWAYS_INLINE + void GetDirectBufferPointerInline(const void** data, int* size); + + // Read raw bytes, copying them into the given buffer. + bool ReadRaw(void* buffer, int size); + + // Like ReadRaw, but reads into a string. + bool ReadString(std::string* buffer, int size); + + + // Read a 32-bit little-endian integer. + bool ReadLittleEndian32(uint32_t* value); + // Read a 64-bit little-endian integer. + bool ReadLittleEndian64(uint64_t* value); + + // These methods read from an externally provided buffer. The caller is + // responsible for ensuring that the buffer has sufficient space. + // Read a 32-bit little-endian integer. + static const uint8_t* ReadLittleEndian32FromArray(const uint8_t* buffer, + uint32_t* value); + // Read a 64-bit little-endian integer. + static const uint8_t* ReadLittleEndian64FromArray(const uint8_t* buffer, + uint64_t* value); + + // Read an unsigned integer with Varint encoding, truncating to 32 bits. + // Reading a 32-bit value is equivalent to reading a 64-bit one and casting + // it to uint32_t, but may be more efficient. + bool ReadVarint32(uint32_t* value); + // Read an unsigned integer with Varint encoding. + bool ReadVarint64(uint64_t* value); + + // Reads a varint off the wire into an "int". This should be used for reading + // sizes off the wire (sizes of strings, submessages, bytes fields, etc). + // + // The value from the wire is interpreted as unsigned. If its value exceeds + // the representable value of an integer on this platform, instead of + // truncating we return false. Truncating (as performed by ReadVarint32() + // above) is an acceptable approach for fields representing an integer, but + // when we are parsing a size from the wire, truncating the value would result + // in us misparsing the payload. + bool ReadVarintSizeAsInt(int* value); + + // Read a tag. This calls ReadVarint32() and returns the result, or returns + // zero (which is not a valid tag) if ReadVarint32() fails. Also, ReadTag + // (but not ReadTagNoLastTag) updates the last tag value, which can be checked + // with LastTagWas(). + // + // Always inline because this is only called in one place per parse loop + // but it is called for every iteration of said loop, so it should be fast. + // GCC doesn't want to inline this by default. + PROTOBUF_ALWAYS_INLINE uint32_t ReadTag() { + return last_tag_ = ReadTagNoLastTag(); + } + + PROTOBUF_ALWAYS_INLINE uint32_t ReadTagNoLastTag(); + + // This usually a faster alternative to ReadTag() when cutoff is a manifest + // constant. It does particularly well for cutoff >= 127. The first part + // of the return value is the tag that was read, though it can also be 0 in + // the cases where ReadTag() would return 0. If the second part is true + // then the tag is known to be in [0, cutoff]. If not, the tag either is + // above cutoff or is 0. (There's intentional wiggle room when tag is 0, + // because that can arise in several ways, and for best performance we want + // to avoid an extra "is tag == 0?" check here.) + PROTOBUF_ALWAYS_INLINE + std::pair<uint32_t, bool> ReadTagWithCutoff(uint32_t cutoff) { + std::pair<uint32_t, bool> result = ReadTagWithCutoffNoLastTag(cutoff); + last_tag_ = result.first; + return result; + } + + PROTOBUF_ALWAYS_INLINE + std::pair<uint32_t, bool> ReadTagWithCutoffNoLastTag(uint32_t cutoff); + + // Usually returns true if calling ReadVarint32() now would produce the given + // value. Will always return false if ReadVarint32() would not return the + // given value. If ExpectTag() returns true, it also advances past + // the varint. For best performance, use a compile-time constant as the + // parameter. + // Always inline because this collapses to a small number of instructions + // when given a constant parameter, but GCC doesn't want to inline by default. + PROTOBUF_ALWAYS_INLINE bool ExpectTag(uint32_t expected); + + // Like above, except this reads from the specified buffer. The caller is + // responsible for ensuring that the buffer is large enough to read a varint + // of the expected size. For best performance, use a compile-time constant as + // the expected tag parameter. + // + // Returns a pointer beyond the expected tag if it was found, or NULL if it + // was not. + PROTOBUF_ALWAYS_INLINE + static const uint8_t* ExpectTagFromArray(const uint8_t* buffer, + uint32_t expected); + + // Usually returns true if no more bytes can be read. Always returns false + // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent + // call to LastTagWas() will act as if ReadTag() had been called and returned + // zero, and ConsumedEntireMessage() will return true. + bool ExpectAtEnd(); + + // If the last call to ReadTag() or ReadTagWithCutoff() returned the given + // value, returns true. Otherwise, returns false. + // ReadTagNoLastTag/ReadTagWithCutoffNoLastTag do not preserve the last + // returned value. + // + // This is needed because parsers for some types of embedded messages + // (with field type TYPE_GROUP) don't actually know that they've reached the + // end of a message until they see an ENDGROUP tag, which was actually part + // of the enclosing message. The enclosing message would like to check that + // tag to make sure it had the right number, so it calls LastTagWas() on + // return from the embedded parser to check. + bool LastTagWas(uint32_t expected); + void SetLastTag(uint32_t tag) { last_tag_ = tag; } + + // When parsing message (but NOT a group), this method must be called + // immediately after MergeFromCodedStream() returns (if it returns true) + // to further verify that the message ended in a legitimate way. For + // example, this verifies that parsing did not end on an end-group tag. + // It also checks for some cases where, due to optimizations, + // MergeFromCodedStream() can incorrectly return true. + bool ConsumedEntireMessage(); + void SetConsumed() { legitimate_message_end_ = true; } + + // Limits ---------------------------------------------------------- + // Limits are used when parsing length-delimited embedded messages. + // After the message's length is read, PushLimit() is used to prevent + // the CodedInputStream from reading beyond that length. Once the + // embedded message has been parsed, PopLimit() is called to undo the + // limit. + + // Opaque type used with PushLimit() and PopLimit(). Do not modify + // values of this type yourself. The only reason that this isn't a + // struct with private internals is for efficiency. + typedef int Limit; + + // Places a limit on the number of bytes that the stream may read, + // starting from the current position. Once the stream hits this limit, + // it will act like the end of the input has been reached until PopLimit() + // is called. + // + // As the names imply, the stream conceptually has a stack of limits. The + // shortest limit on the stack is always enforced, even if it is not the + // top limit. + // + // The value returned by PushLimit() is opaque to the caller, and must + // be passed unchanged to the corresponding call to PopLimit(). + Limit PushLimit(int byte_limit); + + // Pops the last limit pushed by PushLimit(). The input must be the value + // returned by that call to PushLimit(). + void PopLimit(Limit limit); + + // Returns the number of bytes left until the nearest limit on the + // stack is hit, or -1 if no limits are in place. + int BytesUntilLimit() const; + + // Returns current position relative to the beginning of the input stream. + int CurrentPosition() const; + + // Total Bytes Limit ----------------------------------------------- + // To prevent malicious users from sending excessively large messages + // and causing memory exhaustion, CodedInputStream imposes a hard limit on + // the total number of bytes it will read. + + // Sets the maximum number of bytes that this CodedInputStream will read + // before refusing to continue. To prevent servers from allocating enormous + // amounts of memory to hold parsed messages, the maximum message length + // should be limited to the shortest length that will not harm usability. + // The default limit is INT_MAX (~2GB) and apps should set shorter limits + // if possible. An error will always be printed to stderr if the limit is + // reached. + // + // Note: setting a limit less than the current read position is interpreted + // as a limit on the current position. + // + // This is unrelated to PushLimit()/PopLimit(). + void SetTotalBytesLimit(int total_bytes_limit); + + // The Total Bytes Limit minus the Current Position, or -1 if the total bytes + // limit is INT_MAX. + int BytesUntilTotalBytesLimit() const; + + // Recursion Limit ------------------------------------------------- + // To prevent corrupt or malicious messages from causing stack overflows, + // we must keep track of the depth of recursion when parsing embedded + // messages and groups. CodedInputStream keeps track of this because it + // is the only object that is passed down the stack during parsing. + + // Sets the maximum recursion depth. The default is 100. + void SetRecursionLimit(int limit); + int RecursionBudget() { return recursion_budget_; } + + static int GetDefaultRecursionLimit() { return default_recursion_limit_; } + + // Increments the current recursion depth. Returns true if the depth is + // under the limit, false if it has gone over. + bool IncrementRecursionDepth(); + + // Decrements the recursion depth if possible. + void DecrementRecursionDepth(); + + // Decrements the recursion depth blindly. This is faster than + // DecrementRecursionDepth(). It should be used only if all previous + // increments to recursion depth were successful. + void UnsafeDecrementRecursionDepth(); + + // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_). + // Using this can reduce code size and complexity in some cases. The caller + // is expected to check that the second part of the result is non-negative (to + // bail out if the depth of recursion is too high) and, if all is well, to + // later pass the first part of the result to PopLimit() or similar. + std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit( + int byte_limit); + + // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0). + Limit ReadLengthAndPushLimit(); + + // Helper that is equivalent to: { + // bool result = ConsumedEntireMessage(); + // PopLimit(limit); + // UnsafeDecrementRecursionDepth(); + // return result; } + // Using this can reduce code size and complexity in some cases. + // Do not use unless the current recursion depth is greater than zero. + bool DecrementRecursionDepthAndPopLimit(Limit limit); + + // Helper that is equivalent to: { + // bool result = ConsumedEntireMessage(); + // PopLimit(limit); + // return result; } + // Using this can reduce code size and complexity in some cases. + bool CheckEntireMessageConsumedAndPopLimit(Limit limit); + + // Extension Registry ---------------------------------------------- + // ADVANCED USAGE: 99.9% of people can ignore this section. + // + // By default, when parsing extensions, the parser looks for extension + // definitions in the pool which owns the outer message's Descriptor. + // However, you may call SetExtensionRegistry() to provide an alternative + // pool instead. This makes it possible, for example, to parse a message + // using a generated class, but represent some extensions using + // DynamicMessage. + + // Set the pool used to look up extensions. Most users do not need to call + // this as the correct pool will be chosen automatically. + // + // WARNING: It is very easy to misuse this. Carefully read the requirements + // below. Do not use this unless you are sure you need it. Almost no one + // does. + // + // Let's say you are parsing a message into message object m, and you want + // to take advantage of SetExtensionRegistry(). You must follow these + // requirements: + // + // The given DescriptorPool must contain m->GetDescriptor(). It is not + // sufficient for it to simply contain a descriptor that has the same name + // and content -- it must be the *exact object*. In other words: + // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) == + // m->GetDescriptor()); + // There are two ways to satisfy this requirement: + // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless + // because this is the pool that would be used anyway if you didn't call + // SetExtensionRegistry() at all. + // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an + // "underlay". Read the documentation for DescriptorPool for more + // information about underlays. + // + // You must also provide a MessageFactory. This factory will be used to + // construct Message objects representing extensions. The factory's + // GetPrototype() MUST return non-NULL for any Descriptor which can be found + // through the provided pool. + // + // If the provided factory might return instances of protocol-compiler- + // generated (i.e. compiled-in) types, or if the outer message object m is + // a generated type, then the given factory MUST have this property: If + // GetPrototype() is given a Descriptor which resides in + // DescriptorPool::generated_pool(), the factory MUST return the same + // prototype which MessageFactory::generated_factory() would return. That + // is, given a descriptor for a generated type, the factory must return an + // instance of the generated class (NOT DynamicMessage). However, when + // given a descriptor for a type that is NOT in generated_pool, the factory + // is free to return any implementation. + // + // The reason for this requirement is that generated sub-objects may be + // accessed via the standard (non-reflection) extension accessor methods, + // and these methods will down-cast the object to the generated class type. + // If the object is not actually of that type, the results would be undefined. + // On the other hand, if an extension is not compiled in, then there is no + // way the code could end up accessing it via the standard accessors -- the + // only way to access the extension is via reflection. When using reflection, + // DynamicMessage and generated messages are indistinguishable, so it's fine + // if these objects are represented using DynamicMessage. + // + // Using DynamicMessageFactory on which you have called + // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the + // above requirement. + // + // If either pool or factory is NULL, both must be NULL. + // + // Note that this feature is ignored when parsing "lite" messages as they do + // not have descriptors. + void SetExtensionRegistry(const DescriptorPool* pool, + MessageFactory* factory); + + // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool + // has been provided. + const DescriptorPool* GetExtensionPool(); + + // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no + // factory has been provided. + MessageFactory* GetExtensionFactory(); + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream); + + const uint8_t* buffer_; + const uint8_t* buffer_end_; // pointer to the end of the buffer. + ZeroCopyInputStream* input_; + int total_bytes_read_; // total bytes read from input_, including + // the current buffer + + // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here + // so that we can BackUp() on destruction. + int overflow_bytes_; + + // LastTagWas() stuff. + uint32_t last_tag_; // result of last ReadTag() or ReadTagWithCutoff(). + + // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly + // at EOF, or by ExpectAtEnd() when it returns true. This happens when we + // reach the end of a message and attempt to read another tag. + bool legitimate_message_end_; + + // See EnableAliasing(). + bool aliasing_enabled_; + + // Limits + Limit current_limit_; // if position = -1, no limit is applied + + // For simplicity, if the current buffer crosses a limit (either a normal + // limit created by PushLimit() or the total bytes limit), buffer_size_ + // only tracks the number of bytes before that limit. This field + // contains the number of bytes after it. Note that this implies that if + // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've + // hit a limit. However, if both are zero, it doesn't necessarily mean + // we aren't at a limit -- the buffer may have ended exactly at the limit. + int buffer_size_after_limit_; + + // Maximum number of bytes to read, period. This is unrelated to + // current_limit_. Set using SetTotalBytesLimit(). + int total_bytes_limit_; + + // Current recursion budget, controlled by IncrementRecursionDepth() and + // similar. Starts at recursion_limit_ and goes down: if this reaches + // -1 we are over budget. + int recursion_budget_; + // Recursion depth limit, set by SetRecursionLimit(). + int recursion_limit_; + + // See SetExtensionRegistry(). + const DescriptorPool* extension_pool_; + MessageFactory* extension_factory_; + + // Private member functions. + + // Fallback when Skip() goes past the end of the current buffer. + bool SkipFallback(int count, int original_buffer_size); + + // Advance the buffer by a given number of bytes. + void Advance(int amount); + + // Back up input_ to the current buffer position. + void BackUpInputToCurrentPosition(); + + // Recomputes the value of buffer_size_after_limit_. Must be called after + // current_limit_ or total_bytes_limit_ changes. + void RecomputeBufferLimits(); + + // Writes an error message saying that we hit total_bytes_limit_. + void PrintTotalBytesLimitError(); + + // Called when the buffer runs out to request more data. Implies an + // Advance(BufferSize()). + bool Refresh(); + + // When parsing varints, we optimize for the common case of small values, and + // then optimize for the case when the varint fits within the current buffer + // piece. The Fallback method is used when we can't use the one-byte + // optimization. The Slow method is yet another fallback when the buffer is + // not large enough. Making the slow path out-of-line speeds up the common + // case by 10-15%. The slow path is fairly uncommon: it only triggers when a + // message crosses multiple buffers. Note: ReadVarint32Fallback() and + // ReadVarint64Fallback() are called frequently and generally not inlined, so + // they have been optimized to avoid "out" parameters. The former returns -1 + // if it fails and the uint32_t it read otherwise. The latter has a bool + // indicating success or failure as part of its return type. + int64_t ReadVarint32Fallback(uint32_t first_byte_or_zero); + int ReadVarintSizeAsIntFallback(); + std::pair<uint64_t, bool> ReadVarint64Fallback(); + bool ReadVarint32Slow(uint32_t* value); + bool ReadVarint64Slow(uint64_t* value); + int ReadVarintSizeAsIntSlow(); + bool ReadLittleEndian32Fallback(uint32_t* value); + bool ReadLittleEndian64Fallback(uint64_t* value); + + // Fallback/slow methods for reading tags. These do not update last_tag_, + // but will set legitimate_message_end_ if we are at the end of the input + // stream. + uint32_t ReadTagFallback(uint32_t first_byte_or_zero); + uint32_t ReadTagSlow(); + bool ReadStringFallback(std::string* buffer, int size); + + // Return the size of the buffer. + int BufferSize() const; + + static const int kDefaultTotalBytesLimit = INT_MAX; + + static int default_recursion_limit_; // 100 by default. + + friend class google::protobuf::ZeroCopyCodedInputStream; + friend class google::protobuf::internal::EpsCopyByteStream; +}; + +// EpsCopyOutputStream wraps a ZeroCopyOutputStream and exposes a new stream, +// which has the property you can write kSlopBytes (16 bytes) from the current +// position without bounds checks. The cursor into the stream is managed by +// the user of the class and is an explicit parameter in the methods. Careful +// use of this class, ie. keep ptr a local variable, eliminates the need to +// for the compiler to sync the ptr value between register and memory. +class PROTOBUF_EXPORT EpsCopyOutputStream { + public: + enum { kSlopBytes = 16 }; + + // Initialize from a stream. + EpsCopyOutputStream(ZeroCopyOutputStream* stream, bool deterministic, + uint8_t** pp) + : end_(buffer_), + stream_(stream), + is_serialization_deterministic_(deterministic) { + *pp = buffer_; + } + + // Only for array serialization. No overflow protection, end_ will be the + // pointed to the end of the array. When using this the total size is already + // known, so no need to maintain the slop region. + EpsCopyOutputStream(void* data, int size, bool deterministic) + : end_(static_cast<uint8_t*>(data) + size), + buffer_end_(nullptr), + stream_(nullptr), + is_serialization_deterministic_(deterministic) {} + + // Initialize from stream but with the first buffer already given (eager). + EpsCopyOutputStream(void* data, int size, ZeroCopyOutputStream* stream, + bool deterministic, uint8_t** pp) + : stream_(stream), is_serialization_deterministic_(deterministic) { + *pp = SetInitialBuffer(data, size); + } + + // Flush everything that's written into the underlying ZeroCopyOutputStream + // and trims the underlying stream to the location of ptr. + uint8_t* Trim(uint8_t* ptr); + + // After this it's guaranteed you can safely write kSlopBytes to ptr. This + // will never fail! The underlying stream can produce an error. Use HadError + // to check for errors. + PROTOBUF_NODISCARD uint8_t* EnsureSpace(uint8_t* ptr) { + if (PROTOBUF_PREDICT_FALSE(ptr >= end_)) { + return EnsureSpaceFallback(ptr); + } + return ptr; + } + + uint8_t* WriteRaw(const void* data, int size, uint8_t* ptr) { + if (PROTOBUF_PREDICT_FALSE(end_ - ptr < size)) { + return WriteRawFallback(data, size, ptr); + } + std::memcpy(ptr, data, size); + return ptr + size; + } + // Writes the buffer specified by data, size to the stream. Possibly by + // aliasing the buffer (ie. not copying the data). The caller is responsible + // to make sure the buffer is alive for the duration of the + // ZeroCopyOutputStream. +#ifndef NDEBUG + PROTOBUF_NOINLINE +#endif + uint8_t* WriteRawMaybeAliased(const void* data, int size, uint8_t* ptr) { + if (aliasing_enabled_) { + return WriteAliasedRaw(data, size, ptr); + } else { + return WriteRaw(data, size, ptr); + } + } + + +#ifndef NDEBUG + PROTOBUF_NOINLINE +#endif + uint8_t* WriteStringMaybeAliased(uint32_t num, const std::string& s, + uint8_t* ptr) { + std::ptrdiff_t size = s.size(); + if (PROTOBUF_PREDICT_FALSE( + size >= 128 || end_ - ptr + 16 - TagSize(num << 3) - 1 < size)) { + return WriteStringMaybeAliasedOutline(num, s, ptr); + } + ptr = UnsafeVarint((num << 3) | 2, ptr); + *ptr++ = static_cast<uint8_t>(size); + std::memcpy(ptr, s.data(), size); + return ptr + size; + } + uint8_t* WriteBytesMaybeAliased(uint32_t num, const std::string& s, + uint8_t* ptr) { + return WriteStringMaybeAliased(num, s, ptr); + } + + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteString(uint32_t num, const T& s, + uint8_t* ptr) { + std::ptrdiff_t size = s.size(); + if (PROTOBUF_PREDICT_FALSE( + size >= 128 || end_ - ptr + 16 - TagSize(num << 3) - 1 < size)) { + return WriteStringOutline(num, s, ptr); + } + ptr = UnsafeVarint((num << 3) | 2, ptr); + *ptr++ = static_cast<uint8_t>(size); + std::memcpy(ptr, s.data(), size); + return ptr + size; + } + template <typename T> +#ifndef NDEBUG + PROTOBUF_NOINLINE +#endif + uint8_t* WriteBytes(uint32_t num, const T& s, uint8_t* ptr) { + return WriteString(num, s, ptr); + } + + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteInt32Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteUInt32Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode32); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteSInt32Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, ZigZagEncode32); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteInt64Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteUInt64Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteSInt64Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, ZigZagEncode64); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteEnumPacked(int num, const T& r, int size, + uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteFixedPacked(int num, const T& r, + uint8_t* ptr) { + ptr = EnsureSpace(ptr); + constexpr auto element_size = sizeof(typename T::value_type); + auto size = r.size() * element_size; + ptr = WriteLengthDelim(num, size, ptr); + return WriteRawLittleEndian<element_size>(r.data(), static_cast<int>(size), + ptr); + } + + // Returns true if there was an underlying I/O error since this object was + // created. + bool HadError() const { return had_error_; } + + // Instructs the EpsCopyOutputStream to allow the underlying + // ZeroCopyOutputStream to hold pointers to the original structure instead of + // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the + // underlying stream does not support aliasing, then enabling it has no + // affect. For now, this only affects the behavior of + // WriteRawMaybeAliased(). + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + void EnableAliasing(bool enabled); + + // See documentation on CodedOutputStream::SetSerializationDeterministic. + void SetSerializationDeterministic(bool value) { + is_serialization_deterministic_ = value; + } + + // See documentation on CodedOutputStream::IsSerializationDeterministic. + bool IsSerializationDeterministic() const { + return is_serialization_deterministic_; + } + + // The number of bytes written to the stream at position ptr, relative to the + // stream's overall position. + int64_t ByteCount(uint8_t* ptr) const; + + + private: + uint8_t* end_; + uint8_t* buffer_end_ = buffer_; + uint8_t buffer_[2 * kSlopBytes]; + ZeroCopyOutputStream* stream_; + bool had_error_ = false; + bool aliasing_enabled_ = false; // See EnableAliasing(). + bool is_serialization_deterministic_; + bool skip_check_consistency = false; + + uint8_t* EnsureSpaceFallback(uint8_t* ptr); + inline uint8_t* Next(); + int Flush(uint8_t* ptr); + std::ptrdiff_t GetSize(uint8_t* ptr) const { + GOOGLE_DCHECK(ptr <= end_ + kSlopBytes); // NOLINT + return end_ + kSlopBytes - ptr; + } + + uint8_t* Error() { + had_error_ = true; + // We use the patch buffer to always guarantee space to write to. + end_ = buffer_ + kSlopBytes; + return buffer_; + } + + static constexpr int TagSize(uint32_t tag) { + return (tag < (1 << 7)) ? 1 + : (tag < (1 << 14)) ? 2 + : (tag < (1 << 21)) ? 3 + : (tag < (1 << 28)) ? 4 + : 5; + } + + PROTOBUF_ALWAYS_INLINE uint8_t* WriteTag(uint32_t num, uint32_t wt, + uint8_t* ptr) { + GOOGLE_DCHECK(ptr < end_); // NOLINT + return UnsafeVarint((num << 3) | wt, ptr); + } + + PROTOBUF_ALWAYS_INLINE uint8_t* WriteLengthDelim(int num, uint32_t size, + uint8_t* ptr) { + ptr = WriteTag(num, 2, ptr); + return UnsafeWriteSize(size, ptr); + } + + uint8_t* WriteRawFallback(const void* data, int size, uint8_t* ptr); + + uint8_t* WriteAliasedRaw(const void* data, int size, uint8_t* ptr); + + uint8_t* WriteStringMaybeAliasedOutline(uint32_t num, const std::string& s, + uint8_t* ptr); + uint8_t* WriteStringOutline(uint32_t num, const std::string& s, uint8_t* ptr); + + template <typename T, typename E> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteVarintPacked(int num, const T& r, + int size, uint8_t* ptr, + const E& encode) { + ptr = EnsureSpace(ptr); + ptr = WriteLengthDelim(num, size, ptr); + auto it = r.data(); + auto end = it + r.size(); + do { + ptr = EnsureSpace(ptr); + ptr = UnsafeVarint(encode(*it++), ptr); + } while (it < end); + return ptr; + } + + static uint32_t Encode32(uint32_t v) { return v; } + static uint64_t Encode64(uint64_t v) { return v; } + static uint32_t ZigZagEncode32(int32_t v) { + return (static_cast<uint32_t>(v) << 1) ^ static_cast<uint32_t>(v >> 31); + } + static uint64_t ZigZagEncode64(int64_t v) { + return (static_cast<uint64_t>(v) << 1) ^ static_cast<uint64_t>(v >> 63); + } + + template <typename T> + PROTOBUF_ALWAYS_INLINE static uint8_t* UnsafeVarint(T value, uint8_t* ptr) { + static_assert(std::is_unsigned<T>::value, + "Varint serialization must be unsigned"); + ptr[0] = static_cast<uint8_t>(value); + if (value < 0x80) { + return ptr + 1; + } + // Turn on continuation bit in the byte we just wrote. + ptr[0] |= static_cast<uint8_t>(0x80); + value >>= 7; + ptr[1] = static_cast<uint8_t>(value); + if (value < 0x80) { + return ptr + 2; + } + ptr += 2; + do { + // Turn on continuation bit in the byte we just wrote. + ptr[-1] |= static_cast<uint8_t>(0x80); + value >>= 7; + *ptr = static_cast<uint8_t>(value); + ++ptr; + } while (value >= 0x80); + return ptr; + } + + PROTOBUF_ALWAYS_INLINE static uint8_t* UnsafeWriteSize(uint32_t value, + uint8_t* ptr) { + while (PROTOBUF_PREDICT_FALSE(value >= 0x80)) { + *ptr = static_cast<uint8_t>(value | 0x80); + value >>= 7; + ++ptr; + } + *ptr++ = static_cast<uint8_t>(value); + return ptr; + } + + template <int S> + uint8_t* WriteRawLittleEndian(const void* data, int size, uint8_t* ptr); +#if !defined(PROTOBUF_LITTLE_ENDIAN) || \ + defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) + uint8_t* WriteRawLittleEndian32(const void* data, int size, uint8_t* ptr); + uint8_t* WriteRawLittleEndian64(const void* data, int size, uint8_t* ptr); +#endif + + // These methods are for CodedOutputStream. Ideally they should be private + // but to match current behavior of CodedOutputStream as close as possible + // we allow it some functionality. + public: + uint8_t* SetInitialBuffer(void* data, int size) { + auto ptr = static_cast<uint8_t*>(data); + if (size > kSlopBytes) { + end_ = ptr + size - kSlopBytes; + buffer_end_ = nullptr; + return ptr; + } else { + end_ = buffer_ + size; + buffer_end_ = ptr; + return buffer_; + } + } + + private: + // Needed by CodedOutputStream HadError. HadError needs to flush the patch + // buffers to ensure there is no error as of yet. + uint8_t* FlushAndResetBuffer(uint8_t*); + + // The following functions mimic the old CodedOutputStream behavior as close + // as possible. They flush the current state to the stream, behave as + // the old CodedOutputStream and then return to normal operation. + bool Skip(int count, uint8_t** pp); + bool GetDirectBufferPointer(void** data, int* size, uint8_t** pp); + uint8_t* GetDirectBufferForNBytesAndAdvance(int size, uint8_t** pp); + + friend class CodedOutputStream; +}; + +template <> +inline uint8_t* EpsCopyOutputStream::WriteRawLittleEndian<1>(const void* data, + int size, + uint8_t* ptr) { + return WriteRaw(data, size, ptr); +} +template <> +inline uint8_t* EpsCopyOutputStream::WriteRawLittleEndian<4>(const void* data, + int size, + uint8_t* ptr) { +#if defined(PROTOBUF_LITTLE_ENDIAN) && \ + !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) + return WriteRaw(data, size, ptr); +#else + return WriteRawLittleEndian32(data, size, ptr); +#endif +} +template <> +inline uint8_t* EpsCopyOutputStream::WriteRawLittleEndian<8>(const void* data, + int size, + uint8_t* ptr) { +#if defined(PROTOBUF_LITTLE_ENDIAN) && \ + !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) + return WriteRaw(data, size, ptr); +#else + return WriteRawLittleEndian64(data, size, ptr); +#endif +} + +// Class which encodes and writes binary data which is composed of varint- +// encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream. +// Most users will not need to deal with CodedOutputStream. +// +// Most methods of CodedOutputStream which return a bool return false if an +// underlying I/O error occurs. Once such a failure occurs, the +// CodedOutputStream is broken and is no longer useful. The Write* methods do +// not return the stream status, but will invalidate the stream if an error +// occurs. The client can probe HadError() to determine the status. +// +// Note that every method of CodedOutputStream which writes some data has +// a corresponding static "ToArray" version. These versions write directly +// to the provided buffer, returning a pointer past the last written byte. +// They require that the buffer has sufficient capacity for the encoded data. +// This allows an optimization where we check if an output stream has enough +// space for an entire message before we start writing and, if there is, we +// call only the ToArray methods to avoid doing bound checks for each +// individual value. +// i.e., in the example above: +// +// CodedOutputStream* coded_output = new CodedOutputStream(raw_output); +// int magic_number = 1234; +// char text[] = "Hello world!"; +// +// int coded_size = sizeof(magic_number) + +// CodedOutputStream::VarintSize32(strlen(text)) + +// strlen(text); +// +// uint8_t* buffer = +// coded_output->GetDirectBufferForNBytesAndAdvance(coded_size); +// if (buffer != nullptr) { +// // The output stream has enough space in the buffer: write directly to +// // the array. +// buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number, +// buffer); +// buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer); +// buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer); +// } else { +// // Make bound-checked writes, which will ask the underlying stream for +// // more space as needed. +// coded_output->WriteLittleEndian32(magic_number); +// coded_output->WriteVarint32(strlen(text)); +// coded_output->WriteRaw(text, strlen(text)); +// } +// +// delete coded_output; +class PROTOBUF_EXPORT CodedOutputStream { + public: + // Creates a CodedOutputStream that writes to the given `stream`. + // The provided stream must publicly derive from `ZeroCopyOutputStream`. + template <class Stream, class = typename std::enable_if<std::is_base_of< + ZeroCopyOutputStream, Stream>::value>::type> + explicit CodedOutputStream(Stream* stream); + + // Creates a CodedOutputStream that writes to the given `stream`, and does + // an 'eager initialization' of the internal state if `eager_init` is true. + // The provided stream must publicly derive from `ZeroCopyOutputStream`. + template <class Stream, class = typename std::enable_if<std::is_base_of< + ZeroCopyOutputStream, Stream>::value>::type> + CodedOutputStream(Stream* stream, bool eager_init); + + // Destroy the CodedOutputStream and position the underlying + // ZeroCopyOutputStream immediately after the last byte written. + ~CodedOutputStream(); + + // Returns true if there was an underlying I/O error since this object was + // created. On should call Trim before this function in order to catch all + // errors. + bool HadError() { + cur_ = impl_.FlushAndResetBuffer(cur_); + GOOGLE_DCHECK(cur_); + return impl_.HadError(); + } + + // Trims any unused space in the underlying buffer so that its size matches + // the number of bytes written by this stream. The underlying buffer will + // automatically be trimmed when this stream is destroyed; this call is only + // necessary if the underlying buffer is accessed *before* the stream is + // destroyed. + void Trim() { cur_ = impl_.Trim(cur_); } + + // Skips a number of bytes, leaving the bytes unmodified in the underlying + // buffer. Returns false if an underlying write error occurs. This is + // mainly useful with GetDirectBufferPointer(). + // Note of caution, the skipped bytes may contain uninitialized data. The + // caller must make sure that the skipped bytes are properly initialized, + // otherwise you might leak bytes from your heap. + bool Skip(int count) { return impl_.Skip(count, &cur_); } + + // Sets *data to point directly at the unwritten part of the + // CodedOutputStream's underlying buffer, and *size to the size of that + // buffer, but does not advance the stream's current position. This will + // always either produce a non-empty buffer or return false. If the caller + // writes any data to this buffer, it should then call Skip() to skip over + // the consumed bytes. This may be useful for implementing external fast + // serialization routines for types of data not covered by the + // CodedOutputStream interface. + bool GetDirectBufferPointer(void** data, int* size) { + return impl_.GetDirectBufferPointer(data, size, &cur_); + } + + // If there are at least "size" bytes available in the current buffer, + // returns a pointer directly into the buffer and advances over these bytes. + // The caller may then write directly into this buffer (e.g. using the + // *ToArray static methods) rather than go through CodedOutputStream. If + // there are not enough bytes available, returns NULL. The return pointer is + // invalidated as soon as any other non-const method of CodedOutputStream + // is called. + inline uint8_t* GetDirectBufferForNBytesAndAdvance(int size) { + return impl_.GetDirectBufferForNBytesAndAdvance(size, &cur_); + } + + // Write raw bytes, copying them from the given buffer. + void WriteRaw(const void* buffer, int size) { + cur_ = impl_.WriteRaw(buffer, size, cur_); + } + // Like WriteRaw() but will try to write aliased data if aliasing is + // turned on. + void WriteRawMaybeAliased(const void* data, int size); + // Like WriteRaw() but writing directly to the target array. + // This is _not_ inlined, as the compiler often optimizes memcpy into inline + // copy loops. Since this gets called by every field with string or bytes + // type, inlining may lead to a significant amount of code bloat, with only a + // minor performance gain. + static uint8_t* WriteRawToArray(const void* buffer, int size, + uint8_t* target); + + // Equivalent to WriteRaw(str.data(), str.size()). + void WriteString(const std::string& str); + // Like WriteString() but writing directly to the target array. + static uint8_t* WriteStringToArray(const std::string& str, uint8_t* target); + // Write the varint-encoded size of str followed by str. + static uint8_t* WriteStringWithSizeToArray(const std::string& str, + uint8_t* target); + + + // Write a 32-bit little-endian integer. + void WriteLittleEndian32(uint32_t value) { + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteLittleEndian32ToArray(value, Cur())); + } + // Like WriteLittleEndian32() but writing directly to the target array. + static uint8_t* WriteLittleEndian32ToArray(uint32_t value, uint8_t* target); + // Write a 64-bit little-endian integer. + void WriteLittleEndian64(uint64_t value) { + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteLittleEndian64ToArray(value, Cur())); + } + // Like WriteLittleEndian64() but writing directly to the target array. + static uint8_t* WriteLittleEndian64ToArray(uint64_t value, uint8_t* target); + + // Write an unsigned integer with Varint encoding. Writing a 32-bit value + // is equivalent to casting it to uint64_t and writing it as a 64-bit value, + // but may be more efficient. + void WriteVarint32(uint32_t value); + // Like WriteVarint32() but writing directly to the target array. + static uint8_t* WriteVarint32ToArray(uint32_t value, uint8_t* target); + // Like WriteVarint32() but writing directly to the target array, and with + // the less common-case paths being out of line rather than inlined. + static uint8_t* WriteVarint32ToArrayOutOfLine(uint32_t value, + uint8_t* target); + // Write an unsigned integer with Varint encoding. + void WriteVarint64(uint64_t value); + // Like WriteVarint64() but writing directly to the target array. + static uint8_t* WriteVarint64ToArray(uint64_t value, uint8_t* target); + + // Equivalent to WriteVarint32() except when the value is negative, + // in which case it must be sign-extended to a full 10 bytes. + void WriteVarint32SignExtended(int32_t value); + // Like WriteVarint32SignExtended() but writing directly to the target array. + static uint8_t* WriteVarint32SignExtendedToArray(int32_t value, + uint8_t* target); + + // This is identical to WriteVarint32(), but optimized for writing tags. + // In particular, if the input is a compile-time constant, this method + // compiles down to a couple instructions. + // Always inline because otherwise the aforementioned optimization can't work, + // but GCC by default doesn't want to inline this. + void WriteTag(uint32_t value); + // Like WriteTag() but writing directly to the target array. + PROTOBUF_ALWAYS_INLINE + static uint8_t* WriteTagToArray(uint32_t value, uint8_t* target); + + // Returns the number of bytes needed to encode the given value as a varint. + static size_t VarintSize32(uint32_t value); + // Returns the number of bytes needed to encode the given value as a varint. + static size_t VarintSize64(uint64_t value); + + // If negative, 10 bytes. Otherwise, same as VarintSize32(). + static size_t VarintSize32SignExtended(int32_t value); + + // Same as above, plus one. The additional one comes at no compute cost. + static size_t VarintSize32PlusOne(uint32_t value); + static size_t VarintSize64PlusOne(uint64_t value); + static size_t VarintSize32SignExtendedPlusOne(int32_t value); + + // Compile-time equivalent of VarintSize32(). + template <uint32_t Value> + struct StaticVarintSize32 { + static const size_t value = (Value < (1 << 7)) ? 1 + : (Value < (1 << 14)) ? 2 + : (Value < (1 << 21)) ? 3 + : (Value < (1 << 28)) ? 4 + : 5; + }; + + // Returns the total number of bytes written since this object was created. + int ByteCount() const { + return static_cast<int>(impl_.ByteCount(cur_) - start_count_); + } + + // Instructs the CodedOutputStream to allow the underlying + // ZeroCopyOutputStream to hold pointers to the original structure instead of + // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the + // underlying stream does not support aliasing, then enabling it has no + // affect. For now, this only affects the behavior of + // WriteRawMaybeAliased(). + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + void EnableAliasing(bool enabled) { impl_.EnableAliasing(enabled); } + + // Indicate to the serializer whether the user wants deterministic + // serialization. The default when this is not called comes from the global + // default, controlled by SetDefaultSerializationDeterministic. + // + // What deterministic serialization means is entirely up to the driver of the + // serialization process (i.e. the caller of methods like WriteVarint32). In + // the case of serializing a proto buffer message using one of the methods of + // MessageLite, this means that for a given binary equal messages will always + // be serialized to the same bytes. This implies: + // + // * Repeated serialization of a message will return the same bytes. + // + // * Different processes running the same binary (including on different + // machines) will serialize equal messages to the same bytes. + // + // Note that this is *not* canonical across languages. It is also unstable + // across different builds with intervening message definition changes, due to + // unknown fields. Users who need canonical serialization (e.g. persistent + // storage in a canonical form, fingerprinting) should define their own + // canonicalization specification and implement the serializer using + // reflection APIs rather than relying on this API. + void SetSerializationDeterministic(bool value) { + impl_.SetSerializationDeterministic(value); + } + + // Return whether the user wants deterministic serialization. See above. + bool IsSerializationDeterministic() const { + return impl_.IsSerializationDeterministic(); + } + + static bool IsDefaultSerializationDeterministic() { + return default_serialization_deterministic_.load( + std::memory_order_relaxed) != 0; + } + + template <typename Func> + void Serialize(const Func& func); + + uint8_t* Cur() const { return cur_; } + void SetCur(uint8_t* ptr) { cur_ = ptr; } + EpsCopyOutputStream* EpsCopy() { return &impl_; } + + private: + template <class Stream> + void InitEagerly(Stream* stream); + + EpsCopyOutputStream impl_; + uint8_t* cur_; + int64_t start_count_; + static std::atomic<bool> default_serialization_deterministic_; + + // See above. Other projects may use "friend" to allow them to call this. + // After SetDefaultSerializationDeterministic() completes, all protocol + // buffer serializations will be deterministic by default. Thread safe. + // However, the meaning of "after" is subtle here: to be safe, each thread + // that wants deterministic serialization by default needs to call + // SetDefaultSerializationDeterministic() or ensure on its own that another + // thread has done so. + friend void internal::MapTestForceDeterministic(); + static void SetDefaultSerializationDeterministic() { + default_serialization_deterministic_.store(true, std::memory_order_relaxed); + } + // REQUIRES: value >= 0x80, and that (value & 7f) has been written to *target. + static uint8_t* WriteVarint32ToArrayOutOfLineHelper(uint32_t value, + uint8_t* target); + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream); +}; + +// inline methods ==================================================== +// The vast majority of varints are only one byte. These inline +// methods optimize for that case. + +inline bool CodedInputStream::ReadVarint32(uint32_t* value) { + uint32_t v = 0; + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { + v = *buffer_; + if (v < 0x80) { + *value = v; + Advance(1); + return true; + } + } + int64_t result = ReadVarint32Fallback(v); + *value = static_cast<uint32_t>(result); + return result >= 0; +} + +inline bool CodedInputStream::ReadVarint64(uint64_t* value) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) { + *value = *buffer_; + Advance(1); + return true; + } + std::pair<uint64_t, bool> p = ReadVarint64Fallback(); + *value = p.first; + return p.second; +} + +inline bool CodedInputStream::ReadVarintSizeAsInt(int* value) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { + int v = *buffer_; + if (v < 0x80) { + *value = v; + Advance(1); + return true; + } + } + *value = ReadVarintSizeAsIntFallback(); + return *value >= 0; +} + +// static +inline const uint8_t* CodedInputStream::ReadLittleEndian32FromArray( + const uint8_t* buffer, uint32_t* value) { +#if defined(PROTOBUF_LITTLE_ENDIAN) && \ + !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) + memcpy(value, buffer, sizeof(*value)); + return buffer + sizeof(*value); +#else + *value = (static_cast<uint32_t>(buffer[0])) | + (static_cast<uint32_t>(buffer[1]) << 8) | + (static_cast<uint32_t>(buffer[2]) << 16) | + (static_cast<uint32_t>(buffer[3]) << 24); + return buffer + sizeof(*value); +#endif +} +// static +inline const uint8_t* CodedInputStream::ReadLittleEndian64FromArray( + const uint8_t* buffer, uint64_t* value) { +#if defined(PROTOBUF_LITTLE_ENDIAN) && \ + !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) + memcpy(value, buffer, sizeof(*value)); + return buffer + sizeof(*value); +#else + uint32_t part0 = (static_cast<uint32_t>(buffer[0])) | + (static_cast<uint32_t>(buffer[1]) << 8) | + (static_cast<uint32_t>(buffer[2]) << 16) | + (static_cast<uint32_t>(buffer[3]) << 24); + uint32_t part1 = (static_cast<uint32_t>(buffer[4])) | + (static_cast<uint32_t>(buffer[5]) << 8) | + (static_cast<uint32_t>(buffer[6]) << 16) | + (static_cast<uint32_t>(buffer[7]) << 24); + *value = static_cast<uint64_t>(part0) | (static_cast<uint64_t>(part1) << 32); + return buffer + sizeof(*value); +#endif +} + +inline bool CodedInputStream::ReadLittleEndian32(uint32_t* value) { +#if defined(PROTOBUF_LITTLE_ENDIAN) && \ + !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) + if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) { + buffer_ = ReadLittleEndian32FromArray(buffer_, value); + return true; + } else { + return ReadLittleEndian32Fallback(value); + } +#else + return ReadLittleEndian32Fallback(value); +#endif +} + +inline bool CodedInputStream::ReadLittleEndian64(uint64_t* value) { +#if defined(PROTOBUF_LITTLE_ENDIAN) && \ + !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) + if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) { + buffer_ = ReadLittleEndian64FromArray(buffer_, value); + return true; + } else { + return ReadLittleEndian64Fallback(value); + } +#else + return ReadLittleEndian64Fallback(value); +#endif +} + +inline uint32_t CodedInputStream::ReadTagNoLastTag() { + uint32_t v = 0; + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { + v = *buffer_; + if (v < 0x80) { + Advance(1); + return v; + } + } + v = ReadTagFallback(v); + return v; +} + +inline std::pair<uint32_t, bool> CodedInputStream::ReadTagWithCutoffNoLastTag( + uint32_t cutoff) { + // In performance-sensitive code we can expect cutoff to be a compile-time + // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at + // compile time. + uint32_t first_byte_or_zero = 0; + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { + // Hot case: buffer_ non_empty, buffer_[0] in [1, 128). + // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields + // is large enough then is it better to check for the two-byte case first? + first_byte_or_zero = buffer_[0]; + if (static_cast<int8_t>(buffer_[0]) > 0) { + const uint32_t kMax1ByteVarint = 0x7f; + uint32_t tag = buffer_[0]; + Advance(1); + return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff); + } + // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available, + // and tag is two bytes. The latter is tested by bitwise-and-not of the + // first byte and the second byte. + if (cutoff >= 0x80 && PROTOBUF_PREDICT_TRUE(buffer_ + 1 < buffer_end_) && + PROTOBUF_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) { + const uint32_t kMax2ByteVarint = (0x7f << 7) + 0x7f; + uint32_t tag = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80); + Advance(2); + // It might make sense to test for tag == 0 now, but it is so rare that + // that we don't bother. A varint-encoded 0 should be one byte unless + // the encoder lost its mind. The second part of the return value of + // this function is allowed to be either true or false if the tag is 0, + // so we don't have to check for tag == 0. We may need to check whether + // it exceeds cutoff. + bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff; + return std::make_pair(tag, at_or_below_cutoff); + } + } + // Slow path + const uint32_t tag = ReadTagFallback(first_byte_or_zero); + return std::make_pair(tag, static_cast<uint32_t>(tag - 1) < cutoff); +} + +inline bool CodedInputStream::LastTagWas(uint32_t expected) { + return last_tag_ == expected; +} + +inline bool CodedInputStream::ConsumedEntireMessage() { + return legitimate_message_end_; +} + +inline bool CodedInputStream::ExpectTag(uint32_t expected) { + if (expected < (1 << 7)) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) && + buffer_[0] == expected) { + Advance(1); + return true; + } else { + return false; + } + } else if (expected < (1 << 14)) { + if (PROTOBUF_PREDICT_TRUE(BufferSize() >= 2) && + buffer_[0] == static_cast<uint8_t>(expected | 0x80) && + buffer_[1] == static_cast<uint8_t>(expected >> 7)) { + Advance(2); + return true; + } else { + return false; + } + } else { + // Don't bother optimizing for larger values. + return false; + } +} + +inline const uint8_t* CodedInputStream::ExpectTagFromArray( + const uint8_t* buffer, uint32_t expected) { + if (expected < (1 << 7)) { + if (buffer[0] == expected) { + return buffer + 1; + } + } else if (expected < (1 << 14)) { + if (buffer[0] == static_cast<uint8_t>(expected | 0x80) && + buffer[1] == static_cast<uint8_t>(expected >> 7)) { + return buffer + 2; + } + } + return nullptr; +} + +inline void CodedInputStream::GetDirectBufferPointerInline(const void** data, + int* size) { + *data = buffer_; + *size = static_cast<int>(buffer_end_ - buffer_); +} + +inline bool CodedInputStream::ExpectAtEnd() { + // If we are at a limit we know no more bytes can be read. Otherwise, it's + // hard to say without calling Refresh(), and we'd rather not do that. + + if (buffer_ == buffer_end_ && ((buffer_size_after_limit_ != 0) || + (total_bytes_read_ == current_limit_))) { + last_tag_ = 0; // Pretend we called ReadTag()... + legitimate_message_end_ = true; // ... and it hit EOF. + return true; + } else { + return false; + } +} + +inline int CodedInputStream::CurrentPosition() const { + return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_); +} + +inline void CodedInputStream::Advance(int amount) { buffer_ += amount; } + +inline void CodedInputStream::SetRecursionLimit(int limit) { + recursion_budget_ += limit - recursion_limit_; + recursion_limit_ = limit; +} + +inline bool CodedInputStream::IncrementRecursionDepth() { + --recursion_budget_; + return recursion_budget_ >= 0; +} + +inline void CodedInputStream::DecrementRecursionDepth() { + if (recursion_budget_ < recursion_limit_) ++recursion_budget_; +} + +inline void CodedInputStream::UnsafeDecrementRecursionDepth() { + assert(recursion_budget_ < recursion_limit_); + ++recursion_budget_; +} + +inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool, + MessageFactory* factory) { + extension_pool_ = pool; + extension_factory_ = factory; +} + +inline const DescriptorPool* CodedInputStream::GetExtensionPool() { + return extension_pool_; +} + +inline MessageFactory* CodedInputStream::GetExtensionFactory() { + return extension_factory_; +} + +inline int CodedInputStream::BufferSize() const { + return static_cast<int>(buffer_end_ - buffer_); +} + +inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) + : buffer_(nullptr), + buffer_end_(nullptr), + input_(input), + total_bytes_read_(0), + overflow_bytes_(0), + last_tag_(0), + legitimate_message_end_(false), + aliasing_enabled_(false), + current_limit_(std::numeric_limits<int32_t>::max()), + buffer_size_after_limit_(0), + total_bytes_limit_(kDefaultTotalBytesLimit), + recursion_budget_(default_recursion_limit_), + recursion_limit_(default_recursion_limit_), + extension_pool_(nullptr), + extension_factory_(nullptr) { + // Eagerly Refresh() so buffer space is immediately available. + Refresh(); +} + +inline CodedInputStream::CodedInputStream(const uint8_t* buffer, int size) + : buffer_(buffer), + buffer_end_(buffer + size), + input_(nullptr), + total_bytes_read_(size), + overflow_bytes_(0), + last_tag_(0), + legitimate_message_end_(false), + aliasing_enabled_(false), + current_limit_(size), + buffer_size_after_limit_(0), + total_bytes_limit_(kDefaultTotalBytesLimit), + recursion_budget_(default_recursion_limit_), + recursion_limit_(default_recursion_limit_), + extension_pool_(nullptr), + extension_factory_(nullptr) { + // Note that setting current_limit_ == size is important to prevent some + // code paths from trying to access input_ and segfaulting. +} + +inline bool CodedInputStream::IsFlat() const { return input_ == nullptr; } + +inline bool CodedInputStream::Skip(int count) { + if (count < 0) return false; // security: count is often user-supplied + + const int original_buffer_size = BufferSize(); + + if (count <= original_buffer_size) { + // Just skipping within the current buffer. Easy. + Advance(count); + return true; + } + + return SkipFallback(count, original_buffer_size); +} + +template <class Stream, class> +inline CodedOutputStream::CodedOutputStream(Stream* stream) + : impl_(stream, IsDefaultSerializationDeterministic(), &cur_), + start_count_(stream->ByteCount()) { + InitEagerly(stream); +} + +template <class Stream, class> +inline CodedOutputStream::CodedOutputStream(Stream* stream, bool eager_init) + : impl_(stream, IsDefaultSerializationDeterministic(), &cur_), + start_count_(stream->ByteCount()) { + if (eager_init) { + InitEagerly(stream); + } +} + +template <class Stream> +inline void CodedOutputStream::InitEagerly(Stream* stream) { + void* data; + int size; + if (PROTOBUF_PREDICT_TRUE(stream->Next(&data, &size) && size > 0)) { + cur_ = impl_.SetInitialBuffer(data, size); + } +} + +inline uint8_t* CodedOutputStream::WriteVarint32ToArray(uint32_t value, + uint8_t* target) { + return EpsCopyOutputStream::UnsafeVarint(value, target); +} + +inline uint8_t* CodedOutputStream::WriteVarint32ToArrayOutOfLine( + uint32_t value, uint8_t* target) { + target[0] = static_cast<uint8_t>(value); + if (value < 0x80) { + return target + 1; + } else { + return WriteVarint32ToArrayOutOfLineHelper(value, target); + } +} + +inline uint8_t* CodedOutputStream::WriteVarint64ToArray(uint64_t value, + uint8_t* target) { + return EpsCopyOutputStream::UnsafeVarint(value, target); +} + +inline void CodedOutputStream::WriteVarint32SignExtended(int32_t value) { + WriteVarint64(static_cast<uint64_t>(value)); +} + +inline uint8_t* CodedOutputStream::WriteVarint32SignExtendedToArray( + int32_t value, uint8_t* target) { + return WriteVarint64ToArray(static_cast<uint64_t>(value), target); +} + +inline uint8_t* CodedOutputStream::WriteLittleEndian32ToArray(uint32_t value, + uint8_t* target) { +#if defined(PROTOBUF_LITTLE_ENDIAN) && \ + !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) + memcpy(target, &value, sizeof(value)); +#else + target[0] = static_cast<uint8_t>(value); + target[1] = static_cast<uint8_t>(value >> 8); + target[2] = static_cast<uint8_t>(value >> 16); + target[3] = static_cast<uint8_t>(value >> 24); +#endif + return target + sizeof(value); +} + +inline uint8_t* CodedOutputStream::WriteLittleEndian64ToArray(uint64_t value, + uint8_t* target) { +#if defined(PROTOBUF_LITTLE_ENDIAN) && \ + !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) + memcpy(target, &value, sizeof(value)); +#else + uint32_t part0 = static_cast<uint32_t>(value); + uint32_t part1 = static_cast<uint32_t>(value >> 32); + + target[0] = static_cast<uint8_t>(part0); + target[1] = static_cast<uint8_t>(part0 >> 8); + target[2] = static_cast<uint8_t>(part0 >> 16); + target[3] = static_cast<uint8_t>(part0 >> 24); + target[4] = static_cast<uint8_t>(part1); + target[5] = static_cast<uint8_t>(part1 >> 8); + target[6] = static_cast<uint8_t>(part1 >> 16); + target[7] = static_cast<uint8_t>(part1 >> 24); +#endif + return target + sizeof(value); +} + +inline void CodedOutputStream::WriteVarint32(uint32_t value) { + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteVarint32ToArray(value, Cur())); +} + +inline void CodedOutputStream::WriteVarint64(uint64_t value) { + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteVarint64ToArray(value, Cur())); +} + +inline void CodedOutputStream::WriteTag(uint32_t value) { + WriteVarint32(value); +} + +inline uint8_t* CodedOutputStream::WriteTagToArray(uint32_t value, + uint8_t* target) { + return WriteVarint32ToArray(value, target); +} + +inline size_t CodedOutputStream::VarintSize32(uint32_t value) { + // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1 + // Use an explicit multiplication to implement the divide of + // a number in the 1..31 range. + // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is + // undefined. + uint32_t log2value = Bits::Log2FloorNonZero(value | 0x1); + return static_cast<size_t>((log2value * 9 + 73) / 64); +} + +inline size_t CodedOutputStream::VarintSize32PlusOne(uint32_t value) { + // Same as above, but one more. + uint32_t log2value = Bits::Log2FloorNonZero(value | 0x1); + return static_cast<size_t>((log2value * 9 + 73 + 64) / 64); +} + +inline size_t CodedOutputStream::VarintSize64(uint64_t value) { + // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1 + // Use an explicit multiplication to implement the divide of + // a number in the 1..63 range. + // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is + // undefined. + uint32_t log2value = Bits::Log2FloorNonZero64(value | 0x1); + return static_cast<size_t>((log2value * 9 + 73) / 64); +} + +inline size_t CodedOutputStream::VarintSize64PlusOne(uint64_t value) { + // Same as above, but one more. + uint32_t log2value = Bits::Log2FloorNonZero64(value | 0x1); + return static_cast<size_t>((log2value * 9 + 73 + 64) / 64); +} + +inline size_t CodedOutputStream::VarintSize32SignExtended(int32_t value) { + return VarintSize64(static_cast<uint64_t>(int64_t{value})); +} + +inline size_t CodedOutputStream::VarintSize32SignExtendedPlusOne( + int32_t value) { + return VarintSize64PlusOne(static_cast<uint64_t>(int64_t{value})); +} + +inline void CodedOutputStream::WriteString(const std::string& str) { + WriteRaw(str.data(), static_cast<int>(str.size())); +} + +inline void CodedOutputStream::WriteRawMaybeAliased(const void* data, + int size) { + cur_ = impl_.WriteRawMaybeAliased(data, size, cur_); +} + +inline uint8_t* CodedOutputStream::WriteRawToArray(const void* data, int size, + uint8_t* target) { + memcpy(target, data, size); + return target + size; +} + +inline uint8_t* CodedOutputStream::WriteStringToArray(const std::string& str, + uint8_t* target) { + return WriteRawToArray(str.data(), static_cast<int>(str.size()), target); +} + +} // namespace io +} // namespace protobuf +} // namespace google + +#if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER) +#pragma runtime_checks("c", restore) +#endif // _MSC_VER && !defined(__INTEL_COMPILER) + +#include <google/protobuf/port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ diff --git a/include/google/protobuf/io/gzip_stream.h b/include/google/protobuf/io/gzip_stream.h new file mode 100644 index 0000000000..4cf71b6c32 --- /dev/null +++ b/include/google/protobuf/io/gzip_stream.h @@ -0,0 +1,205 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: brianolson@google.com (Brian Olson) +// +// This file contains the definition for classes GzipInputStream and +// GzipOutputStream. +// +// GzipInputStream decompresses data from an underlying +// ZeroCopyInputStream and provides the decompressed data as a +// ZeroCopyInputStream. +// +// GzipOutputStream is an ZeroCopyOutputStream that compresses data to +// an underlying ZeroCopyOutputStream. + +#ifndef GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__ +#define GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__ + + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/io/zero_copy_stream.h> +#include <google/protobuf/port.h> +#include "zlib.h" + +// Must be included last. +#include <google/protobuf/port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +// A ZeroCopyInputStream that reads compressed data through zlib +class PROTOBUF_EXPORT GzipInputStream PROTOBUF_FUTURE_FINAL + : public ZeroCopyInputStream { + public: + // Format key for constructor + enum Format { + // zlib will autodetect gzip header or deflate stream + AUTO = 0, + + // GZIP streams have some extra header data for file attributes. + GZIP = 1, + + // Simpler zlib stream format. + ZLIB = 2, + }; + + // buffer_size and format may be -1 for default of 64kB and GZIP format + explicit GzipInputStream(ZeroCopyInputStream* sub_stream, + Format format = AUTO, int buffer_size = -1); + virtual ~GzipInputStream(); + + // Return last error message or NULL if no error. + inline const char* ZlibErrorMessage() const { return zcontext_.msg; } + inline int ZlibErrorCode() const { return zerror_; } + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + private: + Format format_; + + ZeroCopyInputStream* sub_stream_; + + z_stream zcontext_; + int zerror_; + + void* output_buffer_; + void* output_position_; + size_t output_buffer_length_; + int64_t byte_count_; + + int Inflate(int flush); + void DoNextOutput(const void** data, int* size); + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GzipInputStream); +}; + +class PROTOBUF_EXPORT GzipOutputStream PROTOBUF_FUTURE_FINAL + : public ZeroCopyOutputStream { + public: + // Format key for constructor + enum Format { + // GZIP streams have some extra header data for file attributes. + GZIP = 1, + + // Simpler zlib stream format. + ZLIB = 2, + }; + + struct PROTOBUF_EXPORT Options { + // Defaults to GZIP. + Format format; + + // What size buffer to use internally. Defaults to 64kB. + int buffer_size; + + // A number between 0 and 9, where 0 is no compression and 9 is best + // compression. Defaults to Z_DEFAULT_COMPRESSION (see zlib.h). + int compression_level; + + // Defaults to Z_DEFAULT_STRATEGY. Can also be set to Z_FILTERED, + // Z_HUFFMAN_ONLY, or Z_RLE. See the documentation for deflateInit2 in + // zlib.h for definitions of these constants. + int compression_strategy; + + Options(); // Initializes with default values. + }; + + // Create a GzipOutputStream with default options. + explicit GzipOutputStream(ZeroCopyOutputStream* sub_stream); + + // Create a GzipOutputStream with the given options. + GzipOutputStream(ZeroCopyOutputStream* sub_stream, const Options& options); + + virtual ~GzipOutputStream(); + + // Return last error message or NULL if no error. + inline const char* ZlibErrorMessage() const { return zcontext_.msg; } + inline int ZlibErrorCode() const { return zerror_; } + + // Flushes data written so far to zipped data in the underlying stream. + // It is the caller's responsibility to flush the underlying stream if + // necessary. + // Compression may be less efficient stopping and starting around flushes. + // Returns true if no error. + // + // Please ensure that block size is > 6. Here is an excerpt from the zlib + // doc that explains why: + // + // In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that avail_out + // is greater than six to avoid repeated flush markers due to + // avail_out == 0 on return. + bool Flush(); + + // Writes out all data and closes the gzip stream. + // It is the caller's responsibility to close the underlying stream if + // necessary. + // Returns true if no error. + bool Close(); + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + + private: + ZeroCopyOutputStream* sub_stream_; + // Result from calling Next() on sub_stream_ + void* sub_data_; + int sub_data_size_; + + z_stream zcontext_; + int zerror_; + void* input_buffer_; + size_t input_buffer_length_; + + // Shared constructor code. + void Init(ZeroCopyOutputStream* sub_stream, const Options& options); + + // Do some compression. + // Takes zlib flush mode. + // Returns zlib error code. + int Deflate(int flush); + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GzipOutputStream); +}; + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <google/protobuf/port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__ diff --git a/include/google/protobuf/io/io_win32.h b/include/google/protobuf/io/io_win32.h new file mode 100644 index 0000000000..a72b4ea3cf --- /dev/null +++ b/include/google/protobuf/io/io_win32.h @@ -0,0 +1,141 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: laszlocsomor@google.com (Laszlo Csomor) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +// This file contains the declarations for Windows implementations of +// commonly used POSIX functions such as open(2) and access(2), as well +// as macro definitions for flags of these functions. +// +// By including this file you'll redefine open/access/etc. to +// ::google::protobuf::io::win32::{open/access/etc.}. +// Make sure you don't include a header that attempts to redeclare or +// redefine these functions, that'll lead to confusing compilation +// errors. It's best to #include this file as the last one to ensure that. +// +// This file is only used on Windows, it's empty on other platforms. + +#ifndef GOOGLE_PROTOBUF_IO_IO_WIN32_H__ +#define GOOGLE_PROTOBUF_IO_IO_WIN32_H__ + +#if defined(_WIN32) + +#include <functional> +#include <string> + +#include <google/protobuf/port.h> + +// Must be included last. +#include <google/protobuf/port_def.inc> + +// Compilers on Windows other than MSVC (e.g. Cygwin, MinGW32) define the +// following functions already, except for mkdir. +namespace google { +namespace protobuf { +namespace io { +namespace win32 { + +PROTOBUF_EXPORT FILE* fopen(const char* path, const char* mode); +PROTOBUF_EXPORT int access(const char* path, int mode); +PROTOBUF_EXPORT int chdir(const char* path); +PROTOBUF_EXPORT int close(int fd); +PROTOBUF_EXPORT int dup(int fd); +PROTOBUF_EXPORT int dup2(int fd1, int fd2); +PROTOBUF_EXPORT int mkdir(const char* path, int _mode); +PROTOBUF_EXPORT int open(const char* path, int flags, int mode = 0); +PROTOBUF_EXPORT int read(int fd, void* buffer, size_t size); +PROTOBUF_EXPORT int setmode(int fd, int mode); +PROTOBUF_EXPORT int stat(const char* path, struct _stat* buffer); +PROTOBUF_EXPORT int write(int fd, const void* buffer, size_t size); +PROTOBUF_EXPORT std::wstring testonly_utf8_to_winpath(const char* path); + +enum class ExpandWildcardsResult { + kSuccess = 0, + kErrorNoMatchingFile = 1, + kErrorInputPathConversion = 2, + kErrorOutputPathConversion = 3, +}; + +// Expand wildcards in a path pattern, feed the result to a consumer function. +// +// `path` must be a valid, Windows-style path. It may be absolute, or relative +// to the current working directory, and it may contain wildcards ("*" and "?") +// in the last path segment. This function passes all matching file names to +// `consume`. The resulting paths may not be absolute nor normalized. +// +// The function returns a value from `ExpandWildcardsResult`. +PROTOBUF_EXPORT ExpandWildcardsResult ExpandWildcards( + const std::string& path, std::function<void(const std::string&)> consume); + +namespace strings { + +// Convert from UTF-16 to Active-Code-Page-encoded or to UTF-8-encoded text. +PROTOBUF_EXPORT bool wcs_to_mbs(const wchar_t* s, std::string* out, + bool outUtf8); + +// Convert from Active-Code-Page-encoded or UTF-8-encoded text to UTF-16. +PROTOBUF_EXPORT bool mbs_to_wcs(const char* s, std::wstring* out, bool inUtf8); + +// Convert from UTF-8-encoded text to UTF-16. +PROTOBUF_EXPORT bool utf8_to_wcs(const char* input, std::wstring* out); + +// Convert from UTF-16-encoded text to UTF-8. +PROTOBUF_EXPORT bool wcs_to_utf8(const wchar_t* input, std::string* out); + +} // namespace strings + +} // namespace win32 +} // namespace io +} // namespace protobuf +} // namespace google + +#ifndef W_OK +#define W_OK 02 // not defined by MSVC for whatever reason +#endif + +#ifndef F_OK +#define F_OK 00 // not defined by MSVC for whatever reason +#endif + +#ifndef STDIN_FILENO +#define STDIN_FILENO 0 +#endif + +#ifndef STDOUT_FILENO +#define STDOUT_FILENO 1 +#endif + +#include <google/protobuf/port_undef.inc> + +#endif // defined(_WIN32) + +#endif // GOOGLE_PROTOBUF_IO_IO_WIN32_H__ diff --git a/include/google/protobuf/io/printer.h b/include/google/protobuf/io/printer.h new file mode 100644 index 0000000000..92a4321c04 --- /dev/null +++ b/include/google/protobuf/io/printer.h @@ -0,0 +1,387 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Utility class for writing text to a ZeroCopyOutputStream. + +#ifndef GOOGLE_PROTOBUF_IO_PRINTER_H__ +#define GOOGLE_PROTOBUF_IO_PRINTER_H__ + + +#include <map> +#include <string> +#include <vector> + +#include <google/protobuf/stubs/common.h> + +// Must be included last. +#include <google/protobuf/port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +class ZeroCopyOutputStream; // zero_copy_stream.h + +// Records annotations about a Printer's output. +class PROTOBUF_EXPORT AnnotationCollector { + public: + // Annotation is a offset range and a payload pair. + typedef std::pair<std::pair<size_t, size_t>, std::string> Annotation; + + // Records that the bytes in file_path beginning with begin_offset and ending + // before end_offset are associated with the SourceCodeInfo-style path. + virtual void AddAnnotation(size_t begin_offset, size_t end_offset, + const std::string& file_path, + const std::vector<int>& path) = 0; + + // TODO(gerbens) I don't see why we need virtuals here. Just a vector of + // range, payload pairs stored in a context should suffice. + virtual void AddAnnotationNew(Annotation& /* a */) {} + + virtual ~AnnotationCollector() {} +}; + +// Records annotations about a Printer's output to the given protocol buffer, +// assuming that the buffer has an ::Annotation message exposing path, +// source_file, begin and end fields. +template <typename AnnotationProto> +class AnnotationProtoCollector : public AnnotationCollector { + public: + // annotation_proto is the protocol buffer to which new Annotations should be + // added. It is not owned by the AnnotationProtoCollector. + explicit AnnotationProtoCollector(AnnotationProto* annotation_proto) + : annotation_proto_(annotation_proto) {} + + // Override for AnnotationCollector::AddAnnotation. + void AddAnnotation(size_t begin_offset, size_t end_offset, + const std::string& file_path, + const std::vector<int>& path) override { + typename AnnotationProto::Annotation* annotation = + annotation_proto_->add_annotation(); + for (int i = 0; i < path.size(); ++i) { + annotation->add_path(path[i]); + } + annotation->set_source_file(file_path); + annotation->set_begin(begin_offset); + annotation->set_end(end_offset); + } + // Override for AnnotationCollector::AddAnnotation. + void AddAnnotationNew(Annotation& a) override { + auto* annotation = annotation_proto_->add_annotation(); + annotation->ParseFromString(a.second); + annotation->set_begin(a.first.first); + annotation->set_end(a.first.second); + } + + private: + // The protocol buffer to which new annotations should be added. + AnnotationProto* const annotation_proto_; +}; + +// This simple utility class assists in code generation. It basically +// allows the caller to define a set of variables and then output some +// text with variable substitutions. Example usage: +// +// Printer printer(output, '$'); +// map<string, string> vars; +// vars["name"] = "Bob"; +// printer.Print(vars, "My name is $name$."); +// +// The above writes "My name is Bob." to the output stream. +// +// Printer aggressively enforces correct usage, crashing (with assert failures) +// in the case of undefined variables in debug builds. This helps greatly in +// debugging code which uses it. +// +// If a Printer is constructed with an AnnotationCollector, it will provide it +// with annotations that connect the Printer's output to paths that can identify +// various descriptors. In the above example, if person_ is a descriptor that +// identifies Bob, we can associate the output string "My name is Bob." with +// a source path pointing to that descriptor with: +// +// printer.Annotate("name", person_); +// +// The AnnotationCollector will be sent an annotation linking the output range +// covering "Bob" to the logical path provided by person_. Tools may use +// this association to (for example) link "Bob" in the output back to the +// source file that defined the person_ descriptor identifying Bob. +// +// Annotate can only examine variables substituted during the last call to +// Print. It is invalid to refer to a variable that was used multiple times +// in a single Print call. +// +// In full generality, one may specify a range of output text using a beginning +// substitution variable and an ending variable. The resulting annotation will +// span from the first character of the substituted value for the beginning +// variable to the last character of the substituted value for the ending +// variable. For example, the Annotate call above is equivalent to this one: +// +// printer.Annotate("name", "name", person_); +// +// This is useful if multiple variables combine to form a single span of output +// that should be annotated with the same source path. For example: +// +// Printer printer(output, '$'); +// map<string, string> vars; +// vars["first"] = "Alice"; +// vars["last"] = "Smith"; +// printer.Print(vars, "My name is $first$ $last$."); +// printer.Annotate("first", "last", person_); +// +// This code would associate the span covering "Alice Smith" in the output with +// the person_ descriptor. +// +// Note that the beginning variable must come before (or overlap with, in the +// case of zero-sized substitution values) the ending variable. +// +// It is also sometimes useful to use variables with zero-sized values as +// markers. This avoids issues with multiple references to the same variable +// and also allows annotation ranges to span literal text from the Print +// templates: +// +// Printer printer(output, '$'); +// map<string, string> vars; +// vars["foo"] = "bar"; +// vars["function"] = "call"; +// vars["mark"] = ""; +// printer.Print(vars, "$function$($foo$,$foo$)$mark$"); +// printer.Annotate("function", "mark", call_); +// +// This code associates the span covering "call(bar,bar)" in the output with the +// call_ descriptor. + +class PROTOBUF_EXPORT Printer { + public: + // Create a printer that writes text to the given output stream. Use the + // given character as the delimiter for variables. + Printer(ZeroCopyOutputStream* output, char variable_delimiter); + + // Create a printer that writes text to the given output stream. Use the + // given character as the delimiter for variables. If annotation_collector + // is not null, Printer will provide it with annotations about code written + // to the stream. annotation_collector is not owned by Printer. + Printer(ZeroCopyOutputStream* output, char variable_delimiter, + AnnotationCollector* annotation_collector); + + ~Printer(); + + // Link a substitution variable emitted by the last call to Print to the + // object described by descriptor. + template <typename SomeDescriptor> + void Annotate(const char* varname, const SomeDescriptor* descriptor) { + Annotate(varname, varname, descriptor); + } + + // Link the output range defined by the substitution variables as emitted by + // the last call to Print to the object described by descriptor. The range + // begins at begin_varname's value and ends after the last character of the + // value substituted for end_varname. + template <typename SomeDescriptor> + void Annotate(const char* begin_varname, const char* end_varname, + const SomeDescriptor* descriptor) { + if (annotation_collector_ == NULL) { + // Annotations aren't turned on for this Printer, so don't pay the cost + // of building the location path. + return; + } + std::vector<int> path; + descriptor->GetLocationPath(&path); + Annotate(begin_varname, end_varname, descriptor->file()->name(), path); + } + + // Link a substitution variable emitted by the last call to Print to the file + // with path file_name. + void Annotate(const char* varname, const std::string& file_name) { + Annotate(varname, varname, file_name); + } + + // Link the output range defined by the substitution variables as emitted by + // the last call to Print to the file with path file_name. The range begins + // at begin_varname's value and ends after the last character of the value + // substituted for end_varname. + void Annotate(const char* begin_varname, const char* end_varname, + const std::string& file_name) { + if (annotation_collector_ == NULL) { + // Annotations aren't turned on for this Printer. + return; + } + std::vector<int> empty_path; + Annotate(begin_varname, end_varname, file_name, empty_path); + } + + // Print some text after applying variable substitutions. If a particular + // variable in the text is not defined, this will crash. Variables to be + // substituted are identified by their names surrounded by delimiter + // characters (as given to the constructor). The variable bindings are + // defined by the given map. + void Print(const std::map<std::string, std::string>& variables, + const char* text); + + // Like the first Print(), except the substitutions are given as parameters. + template <typename... Args> + void Print(const char* text, const Args&... args) { + std::map<std::string, std::string> vars; + PrintInternal(&vars, text, args...); + } + + // Indent text by two spaces. After calling Indent(), two spaces will be + // inserted at the beginning of each line of text. Indent() may be called + // multiple times to produce deeper indents. + void Indent(); + + // Reduces the current indent level by two spaces, or crashes if the indent + // level is zero. + void Outdent(); + + // Write a string to the output buffer. + // This method does not look for newlines to add indentation. + void PrintRaw(const std::string& data); + + // Write a zero-delimited string to output buffer. + // This method does not look for newlines to add indentation. + void PrintRaw(const char* data); + + // Write some bytes to the output buffer. + // This method does not look for newlines to add indentation. + void WriteRaw(const char* data, int size); + + // FormatInternal is a helper function not meant to use directly, use + // compiler::cpp::Formatter instead. This function is meant to support + // formatting text using named variables (eq. "$foo$) from a lookup map (vars) + // and variables directly supplied by arguments (eq "$1$" meaning first + // argument which is the zero index element of args). + void FormatInternal(const std::vector<std::string>& args, + const std::map<std::string, std::string>& vars, + const char* format); + + // True if any write to the underlying stream failed. (We don't just + // crash in this case because this is an I/O failure, not a programming + // error.) + bool failed() const { return failed_; } + + private: + // Link the output range defined by the substitution variables as emitted by + // the last call to Print to the object found at the SourceCodeInfo-style path + // in a file with path file_path. The range begins at the start of + // begin_varname's value and ends after the last character of the value + // substituted for end_varname. Note that begin_varname and end_varname + // may refer to the same variable. + void Annotate(const char* begin_varname, const char* end_varname, + const std::string& file_path, const std::vector<int>& path); + + // Base case + void PrintInternal(std::map<std::string, std::string>* vars, + const char* text) { + Print(*vars, text); + } + + template <typename... Args> + void PrintInternal(std::map<std::string, std::string>* vars, const char* text, + const char* key, const std::string& value, + const Args&... args) { + (*vars)[key] = value; + PrintInternal(vars, text, args...); + } + + // Copy size worth of bytes from data to buffer_. + void CopyToBuffer(const char* data, int size); + + void push_back(char c) { + if (failed_) return; + if (buffer_size_ == 0) { + if (!Next()) return; + } + *buffer_++ = c; + buffer_size_--; + offset_++; + } + + bool Next(); + + inline void IndentIfAtStart(); + const char* WriteVariable( + const std::vector<std::string>& args, + const std::map<std::string, std::string>& vars, const char* format, + int* arg_index, + std::vector<AnnotationCollector::Annotation>* annotations); + + const char variable_delimiter_; + + ZeroCopyOutputStream* const output_; + char* buffer_; + int buffer_size_; + // The current position, in bytes, in the output stream. This is equivalent + // to the total number of bytes that have been written so far. This value is + // used to calculate annotation ranges in the substitutions_ map below. + size_t offset_; + + std::string indent_; + bool at_start_of_line_; + bool failed_; + + // A map from variable name to [start, end) offsets in the output buffer. + // These refer to the offsets used for a variable after the last call to + // Print. If a variable was used more than once, the entry used in + // this map is set to a negative-length span. For singly-used variables, the + // start offset is the beginning of the substitution; the end offset is the + // last byte of the substitution plus one (such that (end - start) is the + // length of the substituted string). + std::map<std::string, std::pair<size_t, size_t> > substitutions_; + + // Keeps track of the keys in substitutions_ that need to be updated when + // indents are inserted. These are keys that refer to the beginning of the + // current line. + std::vector<std::string> line_start_variables_; + + // Returns true and sets range to the substitution range in the output for + // varname if varname was used once in the last call to Print. If varname + // was not used, or if it was used multiple times, returns false (and + // fails a debug assertion). + bool GetSubstitutionRange(const char* varname, + std::pair<size_t, size_t>* range); + + // If non-null, annotation_collector_ is used to store annotations about + // generated code. + AnnotationCollector* const annotation_collector_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Printer); +}; + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <google/protobuf/port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_PRINTER_H__ diff --git a/include/google/protobuf/io/strtod.h b/include/google/protobuf/io/strtod.h new file mode 100644 index 0000000000..38f544af34 --- /dev/null +++ b/include/google/protobuf/io/strtod.h @@ -0,0 +1,55 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A locale-independent version of strtod(), used to parse floating +// point default values in .proto files, where the decimal separator +// is always a dot. + +#ifndef GOOGLE_PROTOBUF_IO_STRTOD_H__ +#define GOOGLE_PROTOBUF_IO_STRTOD_H__ + +namespace google { +namespace protobuf { +namespace io { + +// A locale-independent version of the standard strtod(), which always +// uses a dot as the decimal separator. +double NoLocaleStrtod(const char* str, char** endptr); + +// Casts a double value to a float value. If the value is outside of the +// representable range of float, it will be converted to positive or negative +// infinity. +float SafeDoubleToFloat(double value); + +} // namespace io +} // namespace protobuf +} // namespace google + +#endif // GOOGLE_PROTOBUF_IO_STRTOD_H__ diff --git a/include/google/protobuf/io/tokenizer.h b/include/google/protobuf/io/tokenizer.h new file mode 100644 index 0000000000..4abab7e30c --- /dev/null +++ b/include/google/protobuf/io/tokenizer.h @@ -0,0 +1,442 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Class for parsing tokenized text from a ZeroCopyInputStream. + +#ifndef GOOGLE_PROTOBUF_IO_TOKENIZER_H__ +#define GOOGLE_PROTOBUF_IO_TOKENIZER_H__ + + +#include <string> +#include <vector> + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/logging.h> + +// Must be included last. +#include <google/protobuf/port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +class ZeroCopyInputStream; // zero_copy_stream.h + +// Defined in this file. +class ErrorCollector; +class Tokenizer; + +// By "column number", the proto compiler refers to a count of the number +// of bytes before a given byte, except that a tab character advances to +// the next multiple of 8 bytes. Note in particular that column numbers +// are zero-based, while many user interfaces use one-based column numbers. +typedef int ColumnNumber; + +// Abstract interface for an object which collects the errors that occur +// during parsing. A typical implementation might simply print the errors +// to stdout. +class PROTOBUF_EXPORT ErrorCollector { + public: + inline ErrorCollector() {} + virtual ~ErrorCollector(); + + // Indicates that there was an error in the input at the given line and + // column numbers. The numbers are zero-based, so you may want to add + // 1 to each before printing them. + virtual void AddError(int line, ColumnNumber column, + const std::string& message) = 0; + + // Indicates that there was a warning in the input at the given line and + // column numbers. The numbers are zero-based, so you may want to add + // 1 to each before printing them. + virtual void AddWarning(int /* line */, ColumnNumber /* column */, + const std::string& /* message */) {} + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector); +}; + +// This class converts a stream of raw text into a stream of tokens for +// the protocol definition parser to parse. The tokens recognized are +// similar to those that make up the C language; see the TokenType enum for +// precise descriptions. Whitespace and comments are skipped. By default, +// C- and C++-style comments are recognized, but other styles can be used by +// calling set_comment_style(). +class PROTOBUF_EXPORT Tokenizer { + public: + // Construct a Tokenizer that reads and tokenizes text from the given + // input stream and writes errors to the given error_collector. + // The caller keeps ownership of input and error_collector. + Tokenizer(ZeroCopyInputStream* input, ErrorCollector* error_collector); + ~Tokenizer(); + + enum TokenType { + TYPE_START, // Next() has not yet been called. + TYPE_END, // End of input reached. "text" is empty. + + TYPE_IDENTIFIER, // A sequence of letters, digits, and underscores, not + // starting with a digit. It is an error for a number + // to be followed by an identifier with no space in + // between. + TYPE_INTEGER, // A sequence of digits representing an integer. Normally + // the digits are decimal, but a prefix of "0x" indicates + // a hex number and a leading zero indicates octal, just + // like with C numeric literals. A leading negative sign + // is NOT included in the token; it's up to the parser to + // interpret the unary minus operator on its own. + TYPE_FLOAT, // A floating point literal, with a fractional part and/or + // an exponent. Always in decimal. Again, never + // negative. + TYPE_STRING, // A quoted sequence of escaped characters. Either single + // or double quotes can be used, but they must match. + // A string literal cannot cross a line break. + TYPE_SYMBOL, // Any other printable character, like '!' or '+'. + // Symbols are always a single character, so "!+$%" is + // four tokens. + TYPE_WHITESPACE, // A sequence of whitespace. This token type is only + // produced if report_whitespace() is true. It is not + // reported for whitespace within comments or strings. + TYPE_NEWLINE, // A newline (\n). This token type is only + // produced if report_whitespace() is true and + // report_newlines() is true. It is not reported for + // newlines in comments or strings. + }; + + // Structure representing a token read from the token stream. + struct Token { + TokenType type; + std::string text; // The exact text of the token as it appeared in + // the input. e.g. tokens of TYPE_STRING will still + // be escaped and in quotes. + + // "line" and "column" specify the position of the first character of + // the token within the input stream. They are zero-based. + int line; + ColumnNumber column; + ColumnNumber end_column; + }; + + // Get the current token. This is updated when Next() is called. Before + // the first call to Next(), current() has type TYPE_START and no contents. + const Token& current(); + + // Return the previous token -- i.e. what current() returned before the + // previous call to Next(). + const Token& previous(); + + // Advance to the next token. Returns false if the end of the input is + // reached. + bool Next(); + + // Like Next(), but also collects comments which appear between the previous + // and next tokens. + // + // Comments which appear to be attached to the previous token are stored + // in *prev_tailing_comments. Comments which appear to be attached to the + // next token are stored in *next_leading_comments. Comments appearing in + // between which do not appear to be attached to either will be added to + // detached_comments. Any of these parameters can be NULL to simply discard + // the comments. + // + // A series of line comments appearing on consecutive lines, with no other + // tokens appearing on those lines, will be treated as a single comment. + // + // Only the comment content is returned; comment markers (e.g. //) are + // stripped out. For block comments, leading whitespace and an asterisk will + // be stripped from the beginning of each line other than the first. Newlines + // are included in the output. + // + // Examples: + // + // optional int32 foo = 1; // Comment attached to foo. + // // Comment attached to bar. + // optional int32 bar = 2; + // + // optional string baz = 3; + // // Comment attached to baz. + // // Another line attached to baz. + // + // // Comment attached to qux. + // // + // // Another line attached to qux. + // optional double qux = 4; + // + // // Detached comment. This is not attached to qux or corge + // // because there are blank lines separating it from both. + // + // optional string corge = 5; + // /* Block comment attached + // * to corge. Leading asterisks + // * will be removed. */ + // /* Block comment attached to + // * grault. */ + // optional int32 grault = 6; + bool NextWithComments(std::string* prev_trailing_comments, + std::vector<std::string>* detached_comments, + std::string* next_leading_comments); + + // Parse helpers --------------------------------------------------- + + // Parses a TYPE_FLOAT token. This never fails, so long as the text actually + // comes from a TYPE_FLOAT token parsed by Tokenizer. If it doesn't, the + // result is undefined (possibly an assert failure). + static double ParseFloat(const std::string& text); + + // Parses a TYPE_STRING token. This never fails, so long as the text actually + // comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the + // result is undefined (possibly an assert failure). + static void ParseString(const std::string& text, std::string* output); + + // Identical to ParseString, but appends to output. + static void ParseStringAppend(const std::string& text, std::string* output); + + // Parses a TYPE_INTEGER token. Returns false if the result would be + // greater than max_value. Otherwise, returns true and sets *output to the + // result. If the text is not from a Token of type TYPE_INTEGER originally + // parsed by a Tokenizer, the result is undefined (possibly an assert + // failure). + static bool ParseInteger(const std::string& text, uint64_t max_value, + uint64_t* output); + + // Options --------------------------------------------------------- + + // Set true to allow floats to be suffixed with the letter 'f'. Tokens + // which would otherwise be integers but which have the 'f' suffix will be + // forced to be interpreted as floats. For all other purposes, the 'f' is + // ignored. + void set_allow_f_after_float(bool value) { allow_f_after_float_ = value; } + + // Valid values for set_comment_style(). + enum CommentStyle { + // Line comments begin with "//", block comments are delimited by "/*" and + // "*/". + CPP_COMMENT_STYLE, + // Line comments begin with "#". No way to write block comments. + SH_COMMENT_STYLE + }; + + // Sets the comment style. + void set_comment_style(CommentStyle style) { comment_style_ = style; } + + // Whether to require whitespace between a number and a field name. + // Default is true. Do not use this; for Google-internal cleanup only. + void set_require_space_after_number(bool require) { + require_space_after_number_ = require; + } + + // Whether to allow string literals to span multiple lines. Default is false. + // Do not use this; for Google-internal cleanup only. + void set_allow_multiline_strings(bool allow) { + allow_multiline_strings_ = allow; + } + + // If true, whitespace tokens are reported by Next(). + // Note: `set_report_whitespace(false)` implies `set_report_newlines(false)`. + bool report_whitespace() const; + void set_report_whitespace(bool report); + + // If true, newline tokens are reported by Next(). + // Note: `set_report_newlines(true)` implies `set_report_whitespace(true)`. + bool report_newlines() const; + void set_report_newlines(bool report); + + // External helper: validate an identifier. + static bool IsIdentifier(const std::string& text); + + // ----------------------------------------------------------------- + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer); + + Token current_; // Returned by current(). + Token previous_; // Returned by previous(). + + ZeroCopyInputStream* input_; + ErrorCollector* error_collector_; + + char current_char_; // == buffer_[buffer_pos_], updated by NextChar(). + const char* buffer_; // Current buffer returned from input_. + int buffer_size_; // Size of buffer_. + int buffer_pos_; // Current position within the buffer. + bool read_error_; // Did we previously encounter a read error? + + // Line and column number of current_char_ within the whole input stream. + int line_; + ColumnNumber column_; + + // String to which text should be appended as we advance through it. + // Call RecordTo(&str) to start recording and StopRecording() to stop. + // E.g. StartToken() calls RecordTo(¤t_.text). record_start_ is the + // position within the current buffer where recording started. + std::string* record_target_; + int record_start_; + + // Options. + bool allow_f_after_float_; + CommentStyle comment_style_; + bool require_space_after_number_; + bool allow_multiline_strings_; + bool report_whitespace_ = false; + bool report_newlines_ = false; + + // Since we count columns we need to interpret tabs somehow. We'll take + // the standard 8-character definition for lack of any way to do better. + // This must match the documentation of ColumnNumber. + static const int kTabWidth = 8; + + // ----------------------------------------------------------------- + // Helper methods. + + // Consume this character and advance to the next one. + void NextChar(); + + // Read a new buffer from the input. + void Refresh(); + + inline void RecordTo(std::string* target); + inline void StopRecording(); + + // Called when the current character is the first character of a new + // token (not including whitespace or comments). + inline void StartToken(); + // Called when the current character is the first character after the + // end of the last token. After this returns, current_.text will + // contain all text consumed since StartToken() was called. + inline void EndToken(); + + // Convenience method to add an error at the current line and column. + void AddError(const std::string& message) { + error_collector_->AddError(line_, column_, message); + } + + // ----------------------------------------------------------------- + // The following four methods are used to consume tokens of specific + // types. They are actually used to consume all characters *after* + // the first, since the calling function consumes the first character + // in order to decide what kind of token is being read. + + // Read and consume a string, ending when the given delimiter is + // consumed. + void ConsumeString(char delimiter); + + // Read and consume a number, returning TYPE_FLOAT or TYPE_INTEGER + // depending on what was read. This needs to know if the first + // character was a zero in order to correctly recognize hex and octal + // numbers. + // It also needs to know if the first character was a . to parse floating + // point correctly. + TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot); + + // Consume the rest of a line. + void ConsumeLineComment(std::string* content); + // Consume until "*/". + void ConsumeBlockComment(std::string* content); + + enum NextCommentStatus { + // Started a line comment. + LINE_COMMENT, + + // Started a block comment. + BLOCK_COMMENT, + + // Consumed a slash, then realized it wasn't a comment. current_ has + // been filled in with a slash token. The caller should return it. + SLASH_NOT_COMMENT, + + // We do not appear to be starting a comment here. + NO_COMMENT + }; + + // If we're at the start of a new comment, consume it and return what kind + // of comment it is. + NextCommentStatus TryConsumeCommentStart(); + + // If we're looking at a TYPE_WHITESPACE token and `report_whitespace_` is + // true, consume it and return true. + bool TryConsumeWhitespace(); + + // If we're looking at a TYPE_NEWLINE token and `report_newlines_` is true, + // consume it and return true. + bool TryConsumeNewline(); + + // ----------------------------------------------------------------- + // These helper methods make the parsing code more readable. The + // "character classes" referred to are defined at the top of the .cc file. + // Basically it is a C++ class with one method: + // static bool InClass(char c); + // The method returns true if c is a member of this "class", like "Letter" + // or "Digit". + + // Returns true if the current character is of the given character + // class, but does not consume anything. + template <typename CharacterClass> + inline bool LookingAt(); + + // If the current character is in the given class, consume it and return + // true. Otherwise return false. + // e.g. TryConsumeOne<Letter>() + template <typename CharacterClass> + inline bool TryConsumeOne(); + + // Like above, but try to consume the specific character indicated. + inline bool TryConsume(char c); + + // Consume zero or more of the given character class. + template <typename CharacterClass> + inline void ConsumeZeroOrMore(); + + // Consume one or more of the given character class or log the given + // error message. + // e.g. ConsumeOneOrMore<Digit>("Expected digits."); + template <typename CharacterClass> + inline void ConsumeOneOrMore(const char* error); +}; + +// inline methods ==================================================== +inline const Tokenizer::Token& Tokenizer::current() { return current_; } + +inline const Tokenizer::Token& Tokenizer::previous() { return previous_; } + +inline void Tokenizer::ParseString(const std::string& text, + std::string* output) { + output->clear(); + ParseStringAppend(text, output); +} + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <google/protobuf/port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_TOKENIZER_H__ diff --git a/include/google/protobuf/io/zero_copy_stream.h b/include/google/protobuf/io/zero_copy_stream.h new file mode 100644 index 0000000000..2041cbf648 --- /dev/null +++ b/include/google/protobuf/io/zero_copy_stream.h @@ -0,0 +1,260 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains the ZeroCopyInputStream and ZeroCopyOutputStream +// interfaces, which represent abstract I/O streams to and from which +// protocol buffers can be read and written. For a few simple +// implementations of these interfaces, see zero_copy_stream_impl.h. +// +// These interfaces are different from classic I/O streams in that they +// try to minimize the amount of data copying that needs to be done. +// To accomplish this, responsibility for allocating buffers is moved to +// the stream object, rather than being the responsibility of the caller. +// So, the stream can return a buffer which actually points directly into +// the final data structure where the bytes are to be stored, and the caller +// can interact directly with that buffer, eliminating an intermediate copy +// operation. +// +// As an example, consider the common case in which you are reading bytes +// from an array that is already in memory (or perhaps an mmap()ed file). +// With classic I/O streams, you would do something like: +// char buffer[BUFFER_SIZE]; +// input->Read(buffer, BUFFER_SIZE); +// DoSomething(buffer, BUFFER_SIZE); +// Then, the stream basically just calls memcpy() to copy the data from +// the array into your buffer. With a ZeroCopyInputStream, you would do +// this instead: +// const void* buffer; +// int size; +// input->Next(&buffer, &size); +// DoSomething(buffer, size); +// Here, no copy is performed. The input stream returns a pointer directly +// into the backing array, and the caller ends up reading directly from it. +// +// If you want to be able to read the old-fashion way, you can create +// a CodedInputStream or CodedOutputStream wrapping these objects and use +// their ReadRaw()/WriteRaw() methods. These will, of course, add a copy +// step, but Coded*Stream will handle buffering so at least it will be +// reasonably efficient. +// +// ZeroCopyInputStream example: +// // Read in a file and print its contents to stdout. +// int fd = open("myfile", O_RDONLY); +// ZeroCopyInputStream* input = new FileInputStream(fd); +// +// const void* buffer; +// int size; +// while (input->Next(&buffer, &size)) { +// cout.write(buffer, size); +// } +// +// delete input; +// close(fd); +// +// ZeroCopyOutputStream example: +// // Copy the contents of "infile" to "outfile", using plain read() for +// // "infile" but a ZeroCopyOutputStream for "outfile". +// int infd = open("infile", O_RDONLY); +// int outfd = open("outfile", O_WRONLY); +// ZeroCopyOutputStream* output = new FileOutputStream(outfd); +// +// void* buffer; +// int size; +// while (output->Next(&buffer, &size)) { +// int bytes = read(infd, buffer, size); +// if (bytes < size) { +// // Reached EOF. +// output->BackUp(size - bytes); +// break; +// } +// } +// +// delete output; +// close(infd); +// close(outfd); + +#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ +#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ + + +#include <google/protobuf/stubs/common.h> + + +// Must be included last. +#include <google/protobuf/port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +// Defined in this file. +class ZeroCopyInputStream; +class ZeroCopyOutputStream; + +// Abstract interface similar to an input stream but designed to minimize +// copying. +class PROTOBUF_EXPORT ZeroCopyInputStream { + public: + ZeroCopyInputStream() {} + virtual ~ZeroCopyInputStream() {} + + // Obtains a chunk of data from the stream. + // + // Preconditions: + // * "size" and "data" are not NULL. + // + // Postconditions: + // * If the returned value is false, there is no more data to return or + // an error occurred. All errors are permanent. + // * Otherwise, "size" points to the actual number of bytes read and "data" + // points to a pointer to a buffer containing these bytes. + // * Ownership of this buffer remains with the stream, and the buffer + // remains valid only until some other method of the stream is called + // or the stream is destroyed. + // * It is legal for the returned buffer to have zero size, as long + // as repeatedly calling Next() eventually yields a buffer with non-zero + // size. + virtual bool Next(const void** data, int* size) = 0; + + // Backs up a number of bytes, so that the next call to Next() returns + // data again that was already returned by the last call to Next(). This + // is useful when writing procedures that are only supposed to read up + // to a certain point in the input, then return. If Next() returns a + // buffer that goes beyond what you wanted to read, you can use BackUp() + // to return to the point where you intended to finish. + // + // This method can be called with `count = 0` to finalize (flush) any + // previously returned buffer. For example, a file output stream can + // flush buffers returned from a previous call to Next() upon such + // BackUp(0) invocations. ZeroCopyOutputStream callers should always + // invoke BackUp() after a final Next() call, even if there is no + // excess buffer data to be backed up to indicate a flush point. + // + // Preconditions: + // * The last method called must have been Next(). + // * count must be less than or equal to the size of the last buffer + // returned by Next(). + // + // Postconditions: + // * The last "count" bytes of the last buffer returned by Next() will be + // pushed back into the stream. Subsequent calls to Next() will return + // the same data again before producing new data. + virtual void BackUp(int count) = 0; + + // Skips a number of bytes. Returns false if the end of the stream is + // reached or some input error occurred. In the end-of-stream case, the + // stream is advanced to the end of the stream (so ByteCount() will return + // the total size of the stream). + virtual bool Skip(int count) = 0; + + // Returns the total number of bytes read since this object was created. + virtual int64_t ByteCount() const = 0; + + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyInputStream); +}; + +// Abstract interface similar to an output stream but designed to minimize +// copying. +class PROTOBUF_EXPORT ZeroCopyOutputStream { + public: + ZeroCopyOutputStream() {} + virtual ~ZeroCopyOutputStream() {} + + // Obtains a buffer into which data can be written. Any data written + // into this buffer will eventually (maybe instantly, maybe later on) + // be written to the output. + // + // Preconditions: + // * "size" and "data" are not NULL. + // + // Postconditions: + // * If the returned value is false, an error occurred. All errors are + // permanent. + // * Otherwise, "size" points to the actual number of bytes in the buffer + // and "data" points to the buffer. + // * Ownership of this buffer remains with the stream, and the buffer + // remains valid only until some other method of the stream is called + // or the stream is destroyed. + // * Any data which the caller stores in this buffer will eventually be + // written to the output (unless BackUp() is called). + // * It is legal for the returned buffer to have zero size, as long + // as repeatedly calling Next() eventually yields a buffer with non-zero + // size. + virtual bool Next(void** data, int* size) = 0; + + // Backs up a number of bytes, so that the end of the last buffer returned + // by Next() is not actually written. This is needed when you finish + // writing all the data you want to write, but the last buffer was bigger + // than you needed. You don't want to write a bunch of garbage after the + // end of your data, so you use BackUp() to back up. + // + // Preconditions: + // * The last method called must have been Next(). + // * count must be less than or equal to the size of the last buffer + // returned by Next(). + // * The caller must not have written anything to the last "count" bytes + // of that buffer. + // + // Postconditions: + // * The last "count" bytes of the last buffer returned by Next() will be + // ignored. + virtual void BackUp(int count) = 0; + + // Returns the total number of bytes written since this object was created. + virtual int64_t ByteCount() const = 0; + + // Write a given chunk of data to the output. Some output streams may + // implement this in a way that avoids copying. Check AllowsAliasing() before + // calling WriteAliasedRaw(). It will GOOGLE_CHECK fail if WriteAliasedRaw() is + // called on a stream that does not allow aliasing. + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + virtual bool WriteAliasedRaw(const void* data, int size); + virtual bool AllowsAliasing() const { return false; } + + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream); +}; + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <google/protobuf/port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ diff --git a/include/google/protobuf/io/zero_copy_stream_impl.h b/include/google/protobuf/io/zero_copy_stream_impl.h new file mode 100644 index 0000000000..a385992f20 --- /dev/null +++ b/include/google/protobuf/io/zero_copy_stream_impl.h @@ -0,0 +1,336 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains common implementations of the interfaces defined in +// zero_copy_stream.h which are only included in the full (non-lite) +// protobuf library. These implementations include Unix file descriptors +// and C++ iostreams. See also: zero_copy_stream_impl_lite.h + +#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ +#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ + + +#include <iosfwd> +#include <string> + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/io/zero_copy_stream.h> +#include <google/protobuf/io/zero_copy_stream_impl_lite.h> + +// Must be included last. +#include <google/protobuf/port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +// =================================================================== + +// A ZeroCopyInputStream which reads from a file descriptor. +// +// FileInputStream is preferred over using an ifstream with IstreamInputStream. +// The latter will introduce an extra layer of buffering, harming performance. +// Also, it's conceivable that FileInputStream could someday be enhanced +// to use zero-copy file descriptors on OSs which support them. +class PROTOBUF_EXPORT FileInputStream PROTOBUF_FUTURE_FINAL + : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given Unix file descriptor. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. + explicit FileInputStream(int file_descriptor, int block_size = -1); + + // Flushes any buffers and closes the underlying file. Returns false if + // an error occurs during the process; use GetErrno() to examine the error. + // Even if an error occurs, the file descriptor is closed when this returns. + bool Close(); + + // By default, the file descriptor is not closed when the stream is + // destroyed. Call SetCloseOnDelete(true) to change that. WARNING: + // This leaves no way for the caller to detect if close() fails. If + // detecting close() errors is important to you, you should arrange + // to close the descriptor yourself. + void SetCloseOnDelete(bool value) { copying_input_.SetCloseOnDelete(value); } + + // If an I/O error has occurred on this file descriptor, this is the + // errno from that error. Otherwise, this is zero. Once an error + // occurs, the stream is broken and all subsequent operations will + // fail. + int GetErrno() const { return copying_input_.GetErrno(); } + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + private: + class PROTOBUF_EXPORT CopyingFileInputStream PROTOBUF_FUTURE_FINAL + : public CopyingInputStream { + public: + CopyingFileInputStream(int file_descriptor); + ~CopyingFileInputStream() override; + + bool Close(); + void SetCloseOnDelete(bool value) { close_on_delete_ = value; } + int GetErrno() const { return errno_; } + + // implements CopyingInputStream --------------------------------- + int Read(void* buffer, int size) override; + int Skip(int count) override; + + private: + // The file descriptor. + const int file_; + bool close_on_delete_; + bool is_closed_; + + // The errno of the I/O error, if one has occurred. Otherwise, zero. + int errno_; + + // Did we try to seek once and fail? If so, we assume this file descriptor + // doesn't support seeking and won't try again. + bool previous_seek_failed_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileInputStream); + }; + + CopyingFileInputStream copying_input_; + CopyingInputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which writes to a file descriptor. +// +// FileOutputStream is preferred over using an ofstream with +// OstreamOutputStream. The latter will introduce an extra layer of buffering, +// harming performance. Also, it's conceivable that FileOutputStream could +// someday be enhanced to use zero-copy file descriptors on OSs which +// support them. +class PROTOBUF_EXPORT FileOutputStream PROTOBUF_FUTURE_FINAL + : public CopyingOutputStreamAdaptor { + public: + // Creates a stream that writes to the given Unix file descriptor. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit FileOutputStream(int file_descriptor, int block_size = -1); + + ~FileOutputStream() override; + + // Flushes any buffers and closes the underlying file. Returns false if + // an error occurs during the process; use GetErrno() to examine the error. + // Even if an error occurs, the file descriptor is closed when this returns. + bool Close(); + + // By default, the file descriptor is not closed when the stream is + // destroyed. Call SetCloseOnDelete(true) to change that. WARNING: + // This leaves no way for the caller to detect if close() fails. If + // detecting close() errors is important to you, you should arrange + // to close the descriptor yourself. + void SetCloseOnDelete(bool value) { copying_output_.SetCloseOnDelete(value); } + + // If an I/O error has occurred on this file descriptor, this is the + // errno from that error. Otherwise, this is zero. Once an error + // occurs, the stream is broken and all subsequent operations will + // fail. + int GetErrno() const { return copying_output_.GetErrno(); } + + private: + class PROTOBUF_EXPORT CopyingFileOutputStream PROTOBUF_FUTURE_FINAL + : public CopyingOutputStream { + public: + CopyingFileOutputStream(int file_descriptor); + ~CopyingFileOutputStream() override; + + bool Close(); + void SetCloseOnDelete(bool value) { close_on_delete_ = value; } + int GetErrno() const { return errno_; } + + // implements CopyingOutputStream -------------------------------- + bool Write(const void* buffer, int size) override; + + private: + // The file descriptor. + const int file_; + bool close_on_delete_; + bool is_closed_; + + // The errno of the I/O error, if one has occurred. Otherwise, zero. + int errno_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileOutputStream); + }; + + CopyingFileOutputStream copying_output_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileOutputStream); +}; + +// =================================================================== + +// A ZeroCopyInputStream which reads from a C++ istream. +// +// Note that for reading files (or anything represented by a file descriptor), +// FileInputStream is more efficient. +class PROTOBUF_EXPORT IstreamInputStream PROTOBUF_FUTURE_FINAL + : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given C++ istream. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. + explicit IstreamInputStream(std::istream* stream, int block_size = -1); + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + private: + class PROTOBUF_EXPORT CopyingIstreamInputStream PROTOBUF_FUTURE_FINAL + : public CopyingInputStream { + public: + CopyingIstreamInputStream(std::istream* input); + ~CopyingIstreamInputStream() override; + + // implements CopyingInputStream --------------------------------- + int Read(void* buffer, int size) override; + // (We use the default implementation of Skip().) + + private: + // The stream. + std::istream* input_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingIstreamInputStream); + }; + + CopyingIstreamInputStream copying_input_; + CopyingInputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(IstreamInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which writes to a C++ ostream. +// +// Note that for writing files (or anything represented by a file descriptor), +// FileOutputStream is more efficient. +class PROTOBUF_EXPORT OstreamOutputStream PROTOBUF_FUTURE_FINAL + : public ZeroCopyOutputStream { + public: + // Creates a stream that writes to the given C++ ostream. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit OstreamOutputStream(std::ostream* stream, int block_size = -1); + ~OstreamOutputStream() override; + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + + private: + class PROTOBUF_EXPORT CopyingOstreamOutputStream PROTOBUF_FUTURE_FINAL + : public CopyingOutputStream { + public: + CopyingOstreamOutputStream(std::ostream* output); + ~CopyingOstreamOutputStream() override; + + // implements CopyingOutputStream -------------------------------- + bool Write(const void* buffer, int size) override; + + private: + // The stream. + std::ostream* output_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOstreamOutputStream); + }; + + CopyingOstreamOutputStream copying_output_; + CopyingOutputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(OstreamOutputStream); +}; + +// =================================================================== + +// A ZeroCopyInputStream which reads from several other streams in sequence. +// ConcatenatingInputStream is unable to distinguish between end-of-stream +// and read errors in the underlying streams, so it assumes any errors mean +// end-of-stream. So, if the underlying streams fail for any other reason, +// ConcatenatingInputStream may do odd things. It is suggested that you do +// not use ConcatenatingInputStream on streams that might produce read errors +// other than end-of-stream. +class PROTOBUF_EXPORT ConcatenatingInputStream PROTOBUF_FUTURE_FINAL + : public ZeroCopyInputStream { + public: + // All streams passed in as well as the array itself must remain valid + // until the ConcatenatingInputStream is destroyed. + ConcatenatingInputStream(ZeroCopyInputStream* const streams[], int count); + ~ConcatenatingInputStream() override = default; + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + + private: + // As streams are retired, streams_ is incremented and count_ is + // decremented. + ZeroCopyInputStream* const* streams_; + int stream_count_; + int64_t bytes_retired_; // Bytes read from previous streams. + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ConcatenatingInputStream); +}; + +// =================================================================== + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <google/protobuf/port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ diff --git a/include/google/protobuf/io/zero_copy_stream_impl_lite.h b/include/google/protobuf/io/zero_copy_stream_impl_lite.h new file mode 100644 index 0000000000..cbda32871e --- /dev/null +++ b/include/google/protobuf/io/zero_copy_stream_impl_lite.h @@ -0,0 +1,413 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains common implementations of the interfaces defined in +// zero_copy_stream.h which are included in the "lite" protobuf library. +// These implementations cover I/O on raw arrays and strings, as well as +// adaptors which make it easy to implement streams based on traditional +// streams. Of course, many users will probably want to write their own +// implementations of these interfaces specific to the particular I/O +// abstractions they prefer to use, but these should cover the most common +// cases. + +#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__ +#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__ + + +#include <iosfwd> +#include <memory> +#include <string> + +#include <google/protobuf/stubs/callback.h> +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/io/zero_copy_stream.h> +#include <google/protobuf/stubs/stl_util.h> + + +// Must be included last. +#include <google/protobuf/port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +// =================================================================== + +// A ZeroCopyInputStream backed by an in-memory array of bytes. +class PROTOBUF_EXPORT ArrayInputStream PROTOBUF_FUTURE_FINAL + : public ZeroCopyInputStream { + public: + // Create an InputStream that returns the bytes pointed to by "data". + // "data" remains the property of the caller but must remain valid until + // the stream is destroyed. If a block_size is given, calls to Next() + // will return data blocks no larger than the given size. Otherwise, the + // first call to Next() returns the entire array. block_size is mainly + // useful for testing; in production you would probably never want to set + // it. + ArrayInputStream(const void* data, int size, int block_size = -1); + ~ArrayInputStream() override = default; + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + + private: + const uint8_t* const data_; // The byte array. + const int size_; // Total size of the array. + const int block_size_; // How many bytes to return at a time. + + int position_; + int last_returned_size_; // How many bytes we returned last time Next() + // was called (used for error checking only). + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream backed by an in-memory array of bytes. +class PROTOBUF_EXPORT ArrayOutputStream PROTOBUF_FUTURE_FINAL + : public ZeroCopyOutputStream { + public: + // Create an OutputStream that writes to the bytes pointed to by "data". + // "data" remains the property of the caller but must remain valid until + // the stream is destroyed. If a block_size is given, calls to Next() + // will return data blocks no larger than the given size. Otherwise, the + // first call to Next() returns the entire array. block_size is mainly + // useful for testing; in production you would probably never want to set + // it. + ArrayOutputStream(void* data, int size, int block_size = -1); + ~ArrayOutputStream() override = default; + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + + private: + uint8_t* const data_; // The byte array. + const int size_; // Total size of the array. + const int block_size_; // How many bytes to return at a time. + + int position_; + int last_returned_size_; // How many bytes we returned last time Next() + // was called (used for error checking only). + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayOutputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which appends bytes to a string. +class PROTOBUF_EXPORT StringOutputStream PROTOBUF_FUTURE_FINAL + : public ZeroCopyOutputStream { + public: + // Create a StringOutputStream which appends bytes to the given string. + // The string remains property of the caller, but it is mutated in arbitrary + // ways and MUST NOT be accessed in any way until you're done with the + // stream. Either be sure there's no further usage, or (safest) destroy the + // stream before using the contents. + // + // Hint: If you call target->reserve(n) before creating the stream, + // the first call to Next() will return at least n bytes of buffer + // space. + explicit StringOutputStream(std::string* target); + ~StringOutputStream() override = default; + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + + private: + static constexpr size_t kMinimumSize = 16; + + std::string* target_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StringOutputStream); +}; + +// Note: There is no StringInputStream. Instead, just create an +// ArrayInputStream as follows: +// ArrayInputStream input(str.data(), str.size()); + +// =================================================================== + +// A generic traditional input stream interface. +// +// Lots of traditional input streams (e.g. file descriptors, C stdio +// streams, and C++ iostreams) expose an interface where every read +// involves copying bytes into a buffer. If you want to take such an +// interface and make a ZeroCopyInputStream based on it, simply implement +// CopyingInputStream and then use CopyingInputStreamAdaptor. +// +// CopyingInputStream implementations should avoid buffering if possible. +// CopyingInputStreamAdaptor does its own buffering and will read data +// in large blocks. +class PROTOBUF_EXPORT CopyingInputStream { + public: + virtual ~CopyingInputStream() {} + + // Reads up to "size" bytes into the given buffer. Returns the number of + // bytes read. Read() waits until at least one byte is available, or + // returns zero if no bytes will ever become available (EOF), or -1 if a + // permanent read error occurred. + virtual int Read(void* buffer, int size) = 0; + + // Skips the next "count" bytes of input. Returns the number of bytes + // actually skipped. This will always be exactly equal to "count" unless + // EOF was reached or a permanent read error occurred. + // + // The default implementation just repeatedly calls Read() into a scratch + // buffer. + virtual int Skip(int count); +}; + +// A ZeroCopyInputStream which reads from a CopyingInputStream. This is +// useful for implementing ZeroCopyInputStreams that read from traditional +// streams. Note that this class is not really zero-copy. +// +// If you want to read from file descriptors or C++ istreams, this is +// already implemented for you: use FileInputStream or IstreamInputStream +// respectively. +class PROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given CopyingInputStream. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. The caller retains ownership of + // copying_stream unless SetOwnsCopyingStream(true) is called. + explicit CopyingInputStreamAdaptor(CopyingInputStream* copying_stream, + int block_size = -1); + ~CopyingInputStreamAdaptor() override; + + // Call SetOwnsCopyingStream(true) to tell the CopyingInputStreamAdaptor to + // delete the underlying CopyingInputStream when it is destroyed. + void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; } + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + private: + // Insures that buffer_ is not NULL. + void AllocateBufferIfNeeded(); + // Frees the buffer and resets buffer_used_. + void FreeBuffer(); + + // The underlying copying stream. + CopyingInputStream* copying_stream_; + bool owns_copying_stream_; + + // True if we have seen a permanent error from the underlying stream. + bool failed_; + + // The current position of copying_stream_, relative to the point where + // we started reading. + int64_t position_; + + // Data is read into this buffer. It may be NULL if no buffer is currently + // in use. Otherwise, it points to an array of size buffer_size_. + std::unique_ptr<uint8_t[]> buffer_; + const int buffer_size_; + + // Number of valid bytes currently in the buffer (i.e. the size last + // returned by Next()). 0 <= buffer_used_ <= buffer_size_. + int buffer_used_; + + // Number of bytes in the buffer which were backed up over by a call to + // BackUp(). These need to be returned again. + // 0 <= backup_bytes_ <= buffer_used_ + int backup_bytes_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingInputStreamAdaptor); +}; + +// =================================================================== + +// A generic traditional output stream interface. +// +// Lots of traditional output streams (e.g. file descriptors, C stdio +// streams, and C++ iostreams) expose an interface where every write +// involves copying bytes from a buffer. If you want to take such an +// interface and make a ZeroCopyOutputStream based on it, simply implement +// CopyingOutputStream and then use CopyingOutputStreamAdaptor. +// +// CopyingOutputStream implementations should avoid buffering if possible. +// CopyingOutputStreamAdaptor does its own buffering and will write data +// in large blocks. +class PROTOBUF_EXPORT CopyingOutputStream { + public: + virtual ~CopyingOutputStream() {} + + // Writes "size" bytes from the given buffer to the output. Returns true + // if successful, false on a write error. + virtual bool Write(const void* buffer, int size) = 0; +}; + +// A ZeroCopyOutputStream which writes to a CopyingOutputStream. This is +// useful for implementing ZeroCopyOutputStreams that write to traditional +// streams. Note that this class is not really zero-copy. +// +// If you want to write to file descriptors or C++ ostreams, this is +// already implemented for you: use FileOutputStream or OstreamOutputStream +// respectively. +class PROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStream { + public: + // Creates a stream that writes to the given Unix file descriptor. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit CopyingOutputStreamAdaptor(CopyingOutputStream* copying_stream, + int block_size = -1); + ~CopyingOutputStreamAdaptor() override; + + // Writes all pending data to the underlying stream. Returns false if a + // write error occurred on the underlying stream. (The underlying + // stream itself is not necessarily flushed.) + bool Flush(); + + // Call SetOwnsCopyingStream(true) to tell the CopyingOutputStreamAdaptor to + // delete the underlying CopyingOutputStream when it is destroyed. + void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; } + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + bool WriteAliasedRaw(const void* data, int size) override; + bool AllowsAliasing() const override { return true; } + + private: + // Write the current buffer, if it is present. + bool WriteBuffer(); + // Insures that buffer_ is not NULL. + void AllocateBufferIfNeeded(); + // Frees the buffer. + void FreeBuffer(); + + // The underlying copying stream. + CopyingOutputStream* copying_stream_; + bool owns_copying_stream_; + + // True if we have seen a permanent error from the underlying stream. + bool failed_; + + // The current position of copying_stream_, relative to the point where + // we started writing. + int64_t position_; + + // Data is written from this buffer. It may be NULL if no buffer is + // currently in use. Otherwise, it points to an array of size buffer_size_. + std::unique_ptr<uint8_t[]> buffer_; + const int buffer_size_; + + // Number of valid bytes currently in the buffer (i.e. the size last + // returned by Next()). When BackUp() is called, we just reduce this. + // 0 <= buffer_used_ <= buffer_size_. + int buffer_used_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOutputStreamAdaptor); +}; + +// =================================================================== + +// A ZeroCopyInputStream which wraps some other stream and limits it to +// a particular byte count. +class PROTOBUF_EXPORT LimitingInputStream PROTOBUF_FUTURE_FINAL + : public ZeroCopyInputStream { + public: + LimitingInputStream(ZeroCopyInputStream* input, int64_t limit); + ~LimitingInputStream() override; + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + + private: + ZeroCopyInputStream* input_; + int64_t limit_; // Decreases as we go, becomes negative if we overshoot. + int64_t prior_bytes_read_; // Bytes read on underlying stream at construction + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream); +}; + + +// =================================================================== + +// mutable_string_data() and as_string_data() are workarounds to improve +// the performance of writing new data to an existing string. Unfortunately +// the methods provided by the string class are suboptimal, and using memcpy() +// is mildly annoying because it requires its pointer args to be non-NULL even +// if we ask it to copy 0 bytes. Furthermore, string_as_array() has the +// property that it always returns NULL if its arg is the empty string, exactly +// what we want to avoid if we're using it in conjunction with memcpy()! +// With C++11, the desired memcpy() boils down to memcpy(..., &(*s)[0], size), +// where s is a string*. Without C++11, &(*s)[0] is not guaranteed to be safe, +// so we use string_as_array(), and live with the extra logic that tests whether +// *s is empty. + +// Return a pointer to mutable characters underlying the given string. The +// return value is valid until the next time the string is resized. We +// trust the caller to treat the return value as an array of length s->size(). +inline char* mutable_string_data(std::string* s) { + // This should be simpler & faster than string_as_array() because the latter + // is guaranteed to return NULL when *s is empty, so it has to check for that. + return &(*s)[0]; +} + +// as_string_data(s) is equivalent to +// ({ char* p = mutable_string_data(s); make_pair(p, p != NULL); }) +// Sometimes it's faster: in some scenarios p cannot be NULL, and then the +// code can avoid that check. +inline std::pair<char*, bool> as_string_data(std::string* s) { + char* p = mutable_string_data(s); + return std::make_pair(p, true); +} + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <google/protobuf/port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__ |