diff options
Diffstat (limited to 'include/google/protobuf/compiler/parser.h')
-rw-r--r-- | include/google/protobuf/compiler/parser.h | 602 |
1 files changed, 0 insertions, 602 deletions
diff --git a/include/google/protobuf/compiler/parser.h b/include/google/protobuf/compiler/parser.h deleted file mode 100644 index d4eb76302c..0000000000 --- a/include/google/protobuf/compiler/parser.h +++ /dev/null @@ -1,602 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Author: kenton@google.com (Kenton Varda) -// Based on original Protocol Buffers design by -// Sanjay Ghemawat, Jeff Dean, and others. -// -// Implements parsing of .proto files to FileDescriptorProtos. - -#ifndef GOOGLE_PROTOBUF_COMPILER_PARSER_H__ -#define GOOGLE_PROTOBUF_COMPILER_PARSER_H__ - -#include <cstdint> -#include <map> -#include <string> -#include <utility> - -#include <google/protobuf/descriptor.h> -#include <google/protobuf/descriptor.pb.h> -#include <google/protobuf/io/tokenizer.h> -#include <google/protobuf/repeated_field.h> - -// Must be included last. -#include <google/protobuf/port_def.inc> - -namespace google { -namespace protobuf { - -class Message; - -namespace compiler { - -// Defined in this file. -class Parser; -class SourceLocationTable; - -// Implements parsing of protocol definitions (such as .proto files). -// -// Note that most users will be more interested in the Importer class. -// Parser is a lower-level class which simply converts a single .proto file -// to a FileDescriptorProto. It does not resolve import directives or perform -// many other kinds of validation needed to construct a complete -// FileDescriptor. -class PROTOBUF_EXPORT Parser { - public: - Parser(); - ~Parser(); - - // Parse the entire input and construct a FileDescriptorProto representing - // it. Returns true if no errors occurred, false otherwise. - bool Parse(io::Tokenizer* input, FileDescriptorProto* file); - - // Optional features: - - // DEPRECATED: New code should use the SourceCodeInfo embedded in the - // FileDescriptorProto. - // - // Requests that locations of certain definitions be recorded to the given - // SourceLocationTable while parsing. This can be used to look up exact line - // and column numbers for errors reported by DescriptorPool during validation. - // Set to NULL (the default) to discard source location information. - void RecordSourceLocationsTo(SourceLocationTable* location_table) { - source_location_table_ = location_table; - } - - // Requests that errors be recorded to the given ErrorCollector while - // parsing. Set to NULL (the default) to discard error messages. - void RecordErrorsTo(io::ErrorCollector* error_collector) { - error_collector_ = error_collector; - } - - // Returns the identifier used in the "syntax = " declaration, if one was - // seen during the last call to Parse(), or the empty string otherwise. - const std::string& GetSyntaxIdentifier() { return syntax_identifier_; } - - // If set true, input files will be required to begin with a syntax - // identifier. Otherwise, files may omit this. If a syntax identifier - // is provided, it must be 'syntax = "proto2";' and must appear at the - // top of this file regardless of whether or not it was required. - void SetRequireSyntaxIdentifier(bool value) { - require_syntax_identifier_ = value; - } - - // Call SetStopAfterSyntaxIdentifier(true) to tell the parser to stop - // parsing as soon as it has seen the syntax identifier, or lack thereof. - // This is useful for quickly identifying the syntax of the file without - // parsing the whole thing. If this is enabled, no error will be recorded - // if the syntax identifier is something other than "proto2" (since - // presumably the caller intends to deal with that), but other kinds of - // errors (e.g. parse errors) will still be reported. When this is enabled, - // you may pass a NULL FileDescriptorProto to Parse(). - void SetStopAfterSyntaxIdentifier(bool value) { - stop_after_syntax_identifier_ = value; - } - - private: - class LocationRecorder; - struct MapField; - - // ================================================================= - // Error recovery helpers - - // Consume the rest of the current statement. This consumes tokens - // until it sees one of: - // ';' Consumes the token and returns. - // '{' Consumes the brace then calls SkipRestOfBlock(). - // '}' Returns without consuming. - // EOF Returns (can't consume). - // The Parser often calls SkipStatement() after encountering a syntax - // error. This allows it to go on parsing the following lines, allowing - // it to report more than just one error in the file. - void SkipStatement(); - - // Consume the rest of the current block, including nested blocks, - // ending after the closing '}' is encountered and consumed, or at EOF. - void SkipRestOfBlock(); - - // ----------------------------------------------------------------- - // Single-token consuming helpers - // - // These make parsing code more readable. - - // True if the current token is TYPE_END. - inline bool AtEnd(); - - // True if the next token matches the given text. - inline bool LookingAt(const char* text); - // True if the next token is of the given type. - inline bool LookingAtType(io::Tokenizer::TokenType token_type); - - // If the next token exactly matches the text given, consume it and return - // true. Otherwise, return false without logging an error. - bool TryConsume(const char* text); - - // These attempt to read some kind of token from the input. If successful, - // they return true. Otherwise they return false and add the given error - // to the error list. - - // Consume a token with the exact text given. - bool Consume(const char* text, const char* error); - // Same as above, but automatically generates the error "Expected \"text\".", - // where "text" is the expected token text. - bool Consume(const char* text); - // Consume a token of type IDENTIFIER and store its text in "output". - bool ConsumeIdentifier(std::string* output, const char* error); - // Consume an integer and store its value in "output". - bool ConsumeInteger(int* output, const char* error); - // Consume a signed integer and store its value in "output". - bool ConsumeSignedInteger(int* output, const char* error); - // Consume a 64-bit integer and store its value in "output". If the value - // is greater than max_value, an error will be reported. - bool ConsumeInteger64(uint64_t max_value, uint64_t* output, - const char* error); - // Consume a number and store its value in "output". This will accept - // tokens of either INTEGER or FLOAT type. - bool ConsumeNumber(double* output, const char* error); - // Consume a string literal and store its (unescaped) value in "output". - bool ConsumeString(std::string* output, const char* error); - - // Consume a token representing the end of the statement. Comments between - // this token and the next will be harvested for documentation. The given - // LocationRecorder should refer to the declaration that was just parsed; - // it will be populated with these comments. - // - // TODO(kenton): The LocationRecorder is const because historically locations - // have been passed around by const reference, for no particularly good - // reason. We should probably go through and change them all to mutable - // pointer to make this more intuitive. - bool TryConsumeEndOfDeclaration(const char* text, - const LocationRecorder* location); - bool TryConsumeEndOfDeclarationFinishScope(const char* text, - const LocationRecorder* location); - - bool ConsumeEndOfDeclaration(const char* text, - const LocationRecorder* location); - - // ----------------------------------------------------------------- - // Error logging helpers - - // Invokes error_collector_->AddError(), if error_collector_ is not NULL. - void AddError(int line, int column, const std::string& error); - - // Invokes error_collector_->AddError() with the line and column number - // of the current token. - void AddError(const std::string& error); - - // Invokes error_collector_->AddWarning() with the line and column number - // of the current token. - void AddWarning(const std::string& warning); - - // Records a location in the SourceCodeInfo.location table (see - // descriptor.proto). We use RAII to ensure that the start and end locations - // are recorded -- the constructor records the start location and the - // destructor records the end location. Since the parser is - // recursive-descent, this works out beautifully. - class PROTOBUF_EXPORT LocationRecorder { - public: - // Construct the file's "root" location. - LocationRecorder(Parser* parser); - - // Construct a location that represents a declaration nested within the - // given parent. E.g. a field's location is nested within the location - // for a message type. The parent's path will be copied, so you should - // call AddPath() only to add the path components leading from the parent - // to the child (as opposed to leading from the root to the child). - LocationRecorder(const LocationRecorder& parent); - - // Convenience constructors that call AddPath() one or two times. - LocationRecorder(const LocationRecorder& parent, int path1); - LocationRecorder(const LocationRecorder& parent, int path1, int path2); - - // Creates a recorder that generates locations into given source code info. - LocationRecorder(const LocationRecorder& parent, int path1, - SourceCodeInfo* source_code_info); - - ~LocationRecorder(); - - // Add a path component. See SourceCodeInfo.Location.path in - // descriptor.proto. - void AddPath(int path_component); - - // By default the location is considered to start at the current token at - // the time the LocationRecorder is created. StartAt() sets the start - // location to the given token instead. - void StartAt(const io::Tokenizer::Token& token); - - // Start at the same location as some other LocationRecorder. - void StartAt(const LocationRecorder& other); - - // By default the location is considered to end at the previous token at - // the time the LocationRecorder is destroyed. EndAt() sets the end - // location to the given token instead. - void EndAt(const io::Tokenizer::Token& token); - - // Records the start point of this location to the SourceLocationTable that - // was passed to RecordSourceLocationsTo(), if any. SourceLocationTable - // is an older way of keeping track of source locations which is still - // used in some places. - void RecordLegacyLocation( - const Message* descriptor, - DescriptorPool::ErrorCollector::ErrorLocation location); - void RecordLegacyImportLocation(const Message* descriptor, - const std::string& name); - - // Returns the number of path components in the recorder's current location. - int CurrentPathSize() const; - - // Attaches leading and trailing comments to the location. The two strings - // will be swapped into place, so after this is called *leading and - // *trailing will be empty. - // - // TODO(kenton): See comment on TryConsumeEndOfDeclaration(), above, for - // why this is const. - void AttachComments(std::string* leading, std::string* trailing, - std::vector<std::string>* detached_comments) const; - - private: - Parser* parser_; - SourceCodeInfo* source_code_info_; - SourceCodeInfo::Location* location_; - - void Init(const LocationRecorder& parent, SourceCodeInfo* source_code_info); - }; - - // ================================================================= - // Parsers for various language constructs - - // Parses the "syntax = \"proto2\";" line at the top of the file. Returns - // false if it failed to parse or if the syntax identifier was not - // recognized. - bool ParseSyntaxIdentifier(const LocationRecorder& parent); - - // These methods parse various individual bits of code. They return - // false if they completely fail to parse the construct. In this case, - // it is probably necessary to skip the rest of the statement to recover. - // However, if these methods return true, it does NOT mean that there - // were no errors; only that there were no *syntax* errors. For instance, - // if a service method is defined using proper syntax but uses a primitive - // type as its input or output, ParseMethodField() still returns true - // and only reports the error by calling AddError(). In practice, this - // makes logic much simpler for the caller. - - // Parse a top-level message, enum, service, etc. - bool ParseTopLevelStatement(FileDescriptorProto* file, - const LocationRecorder& root_location); - - // Parse various language high-level language construrcts. - bool ParseMessageDefinition(DescriptorProto* message, - const LocationRecorder& message_location, - const FileDescriptorProto* containing_file); - bool ParseEnumDefinition(EnumDescriptorProto* enum_type, - const LocationRecorder& enum_location, - const FileDescriptorProto* containing_file); - bool ParseServiceDefinition(ServiceDescriptorProto* service, - const LocationRecorder& service_location, - const FileDescriptorProto* containing_file); - bool ParsePackage(FileDescriptorProto* file, - const LocationRecorder& root_location, - const FileDescriptorProto* containing_file); - bool ParseImport(RepeatedPtrField<std::string>* dependency, - RepeatedField<int32_t>* public_dependency, - RepeatedField<int32_t>* weak_dependency, - const LocationRecorder& root_location, - const FileDescriptorProto* containing_file); - - // These methods parse the contents of a message, enum, or service type and - // add them to the given object. They consume the entire block including - // the beginning and ending brace. - bool ParseMessageBlock(DescriptorProto* message, - const LocationRecorder& message_location, - const FileDescriptorProto* containing_file); - bool ParseEnumBlock(EnumDescriptorProto* enum_type, - const LocationRecorder& enum_location, - const FileDescriptorProto* containing_file); - bool ParseServiceBlock(ServiceDescriptorProto* service, - const LocationRecorder& service_location, - const FileDescriptorProto* containing_file); - - // Parse one statement within a message, enum, or service block, including - // final semicolon. - bool ParseMessageStatement(DescriptorProto* message, - const LocationRecorder& message_location, - const FileDescriptorProto* containing_file); - bool ParseEnumStatement(EnumDescriptorProto* message, - const LocationRecorder& enum_location, - const FileDescriptorProto* containing_file); - bool ParseServiceStatement(ServiceDescriptorProto* message, - const LocationRecorder& service_location, - const FileDescriptorProto* containing_file); - - // Parse a field of a message. If the field is a group, its type will be - // added to "messages". - // - // parent_location and location_field_number_for_nested_type are needed when - // parsing groups -- we need to generate a nested message type within the - // parent and record its location accordingly. Since the parent could be - // either a FileDescriptorProto or a DescriptorProto, we must pass in the - // correct field number to use. - bool ParseMessageField(FieldDescriptorProto* field, - RepeatedPtrField<DescriptorProto>* messages, - const LocationRecorder& parent_location, - int location_field_number_for_nested_type, - const LocationRecorder& field_location, - const FileDescriptorProto* containing_file); - - // Like ParseMessageField() but expects the label has already been filled in - // by the caller. - bool ParseMessageFieldNoLabel(FieldDescriptorProto* field, - RepeatedPtrField<DescriptorProto>* messages, - const LocationRecorder& parent_location, - int location_field_number_for_nested_type, - const LocationRecorder& field_location, - const FileDescriptorProto* containing_file); - - bool ParseMapType(MapField* map_field, FieldDescriptorProto* field, - LocationRecorder& type_name_location); - - // Parse an "extensions" declaration. - bool ParseExtensions(DescriptorProto* message, - const LocationRecorder& extensions_location, - const FileDescriptorProto* containing_file); - - // Parse a "reserved" declaration. - bool ParseReserved(DescriptorProto* message, - const LocationRecorder& message_location); - bool ParseReservedNames(DescriptorProto* message, - const LocationRecorder& parent_location); - bool ParseReservedNumbers(DescriptorProto* message, - const LocationRecorder& parent_location); - bool ParseReserved(EnumDescriptorProto* message, - const LocationRecorder& message_location); - bool ParseReservedNames(EnumDescriptorProto* message, - const LocationRecorder& parent_location); - bool ParseReservedNumbers(EnumDescriptorProto* message, - const LocationRecorder& parent_location); - - // Parse an "extend" declaration. (See also comments for - // ParseMessageField().) - bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions, - RepeatedPtrField<DescriptorProto>* messages, - const LocationRecorder& parent_location, - int location_field_number_for_nested_type, - const LocationRecorder& extend_location, - const FileDescriptorProto* containing_file); - - // Parse a "oneof" declaration. The caller is responsible for setting - // oneof_decl->label() since it will have had to parse the label before it - // knew it was parsing a oneof. - bool ParseOneof(OneofDescriptorProto* oneof_decl, - DescriptorProto* containing_type, int oneof_index, - const LocationRecorder& oneof_location, - const LocationRecorder& containing_type_location, - const FileDescriptorProto* containing_file); - - // Parse a single enum value within an enum block. - bool ParseEnumConstant(EnumValueDescriptorProto* enum_value, - const LocationRecorder& enum_value_location, - const FileDescriptorProto* containing_file); - - // Parse enum constant options, i.e. the list in square brackets at the end - // of the enum constant value definition. - bool ParseEnumConstantOptions(EnumValueDescriptorProto* value, - const LocationRecorder& enum_value_location, - const FileDescriptorProto* containing_file); - - // Parse a single method within a service definition. - bool ParseServiceMethod(MethodDescriptorProto* method, - const LocationRecorder& method_location, - const FileDescriptorProto* containing_file); - - // Parse options of a single method or stream. - bool ParseMethodOptions(const LocationRecorder& parent_location, - const FileDescriptorProto* containing_file, - const int optionsFieldNumber, - Message* mutable_options); - - // Parse "required", "optional", or "repeated" and fill in "label" - // with the value. Returns true if such a label is consumed. - bool ParseLabel(FieldDescriptorProto::Label* label, - const LocationRecorder& field_location); - - // Parse a type name and fill in "type" (if it is a primitive) or - // "type_name" (if it is not) with the type parsed. - bool ParseType(FieldDescriptorProto::Type* type, std::string* type_name); - // Parse a user-defined type and fill in "type_name" with the name. - // If a primitive type is named, it is treated as an error. - bool ParseUserDefinedType(std::string* type_name); - - // Parses field options, i.e. the stuff in square brackets at the end - // of a field definition. Also parses default value. - bool ParseFieldOptions(FieldDescriptorProto* field, - const LocationRecorder& field_location, - const FileDescriptorProto* containing_file); - - // Parse the "default" option. This needs special handling because its - // type is the field's type. - bool ParseDefaultAssignment(FieldDescriptorProto* field, - const LocationRecorder& field_location, - const FileDescriptorProto* containing_file); - - bool ParseJsonName(FieldDescriptorProto* field, - const LocationRecorder& field_location, - const FileDescriptorProto* containing_file); - - enum OptionStyle { - OPTION_ASSIGNMENT, // just "name = value" - OPTION_STATEMENT // "option name = value;" - }; - - // Parse a single option name/value pair, e.g. "ctype = CORD". The name - // identifies a field of the given Message, and the value of that field - // is set to the parsed value. - bool ParseOption(Message* options, const LocationRecorder& options_location, - const FileDescriptorProto* containing_file, - OptionStyle style); - - // Parses a single part of a multipart option name. A multipart name consists - // of names separated by dots. Each name is either an identifier or a series - // of identifiers separated by dots and enclosed in parentheses. E.g., - // "foo.(bar.baz).moo". - bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option, - const LocationRecorder& part_location, - const FileDescriptorProto* containing_file); - - // Parses a string surrounded by balanced braces. Strips off the outer - // braces and stores the enclosed string in *value. - // E.g., - // { foo } *value gets 'foo' - // { foo { bar: box } } *value gets 'foo { bar: box }' - // {} *value gets '' - // - // REQUIRES: LookingAt("{") - // When finished successfully, we are looking at the first token past - // the ending brace. - bool ParseUninterpretedBlock(std::string* value); - - struct MapField { - // Whether the field is a map field. - bool is_map_field; - // The types of the key and value if they are primitive types. - FieldDescriptorProto::Type key_type; - FieldDescriptorProto::Type value_type; - // Or the type names string if the types are customized types. - std::string key_type_name; - std::string value_type_name; - - MapField() : is_map_field(false) {} - }; - // Desugar the map syntax to generate a nested map entry message. - void GenerateMapEntry(const MapField& map_field, FieldDescriptorProto* field, - RepeatedPtrField<DescriptorProto>* messages); - - // Whether fields without label default to optional fields. - bool DefaultToOptionalFields() const { - return syntax_identifier_ == "proto3"; - } - - bool ValidateEnum(const EnumDescriptorProto* proto); - - // ================================================================= - - io::Tokenizer* input_; - io::ErrorCollector* error_collector_; - SourceCodeInfo* source_code_info_; - SourceLocationTable* source_location_table_; // legacy - bool had_errors_; - bool require_syntax_identifier_; - bool stop_after_syntax_identifier_; - std::string syntax_identifier_; - - // Leading doc comments for the next declaration. These are not complete - // yet; use ConsumeEndOfDeclaration() to get the complete comments. - std::string upcoming_doc_comments_; - - // Detached comments are not connected to any syntax entities. Elements in - // this vector are paragraphs of comments separated by empty lines. The - // detached comments will be put into the leading_detached_comments field for - // the next element (See SourceCodeInfo.Location in descriptor.proto), when - // ConsumeEndOfDeclaration() is called. - std::vector<std::string> upcoming_detached_comments_; - - GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Parser); -}; - -// A table mapping (descriptor, ErrorLocation) pairs -- as reported by -// DescriptorPool when validating descriptors -- to line and column numbers -// within the original source code. -// -// This is semi-obsolete: FileDescriptorProto.source_code_info now contains -// far more complete information about source locations. However, as of this -// writing you still need to use SourceLocationTable when integrating with -// DescriptorPool. -class PROTOBUF_EXPORT SourceLocationTable { - public: - SourceLocationTable(); - ~SourceLocationTable(); - - // Finds the precise location of the given error and fills in *line and - // *column with the line and column numbers. If not found, sets *line to - // -1 and *column to 0 (since line = -1 is used to mean "error has no exact - // location" in the ErrorCollector interface). Returns true if found, false - // otherwise. - bool Find(const Message* descriptor, - DescriptorPool::ErrorCollector::ErrorLocation location, int* line, - int* column) const; - bool FindImport(const Message* descriptor, const std::string& name, int* line, - int* column) const; - - // Adds a location to the table. - void Add(const Message* descriptor, - DescriptorPool::ErrorCollector::ErrorLocation location, int line, - int column); - void AddImport(const Message* descriptor, const std::string& name, int line, - int column); - - // Clears the contents of the table. - void Clear(); - - private: - typedef std::map< - std::pair<const Message*, DescriptorPool::ErrorCollector::ErrorLocation>, - std::pair<int, int> > - LocationMap; - LocationMap location_map_; - std::map<std::pair<const Message*, std::string>, std::pair<int, int> > - import_location_map_; -}; - -} // namespace compiler -} // namespace protobuf -} // namespace google - -#include <google/protobuf/port_undef.inc> - -#endif // GOOGLE_PROTOBUF_COMPILER_PARSER_H__ |