summaryrefslogtreecommitdiff
path: root/libs/litehtml/src/gumbo/include
diff options
context:
space:
mode:
authorGeorge Hazan <george.hazan@gmail.com>2024-03-18 12:13:54 +0300
committerGeorge Hazan <george.hazan@gmail.com>2024-03-18 12:13:54 +0300
commit705c4d24c9c61edffc82864bf9c24384dc29a8d7 (patch)
tree4d21f87671db36b99402da3221d45b64c257c1fe /libs/litehtml/src/gumbo/include
parent5784fc3a62b9136c6690ed45ec7b505f35512e08 (diff)
litehtml - lightweight html renderer
Diffstat (limited to 'libs/litehtml/src/gumbo/include')
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo.h675
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/attribute.h37
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/char_ref.h60
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/error.h227
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/insertion_mode.h57
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/parser.h57
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/string_buffer.h84
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/string_piece.h38
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/tag_enum.h153
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/tag_gperf.h105
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/tag_sizes.h4
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/tag_strings.h153
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/token_type.h41
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/tokenizer.h123
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/tokenizer_states.h103
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/utf8.h132
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/util.h62
-rw-r--r--libs/litehtml/src/gumbo/include/gumbo/vector.h67
18 files changed, 2178 insertions, 0 deletions
diff --git a/libs/litehtml/src/gumbo/include/gumbo.h b/libs/litehtml/src/gumbo/include/gumbo.h
new file mode 100644
index 0000000000..27e6c6c575
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo.h
@@ -0,0 +1,675 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+// We use Gumbo as a prefix for types, gumbo_ as a prefix for functions, and
+// GUMBO_ as a prefix for enum constants (static constants get the Google-style
+// kGumbo prefix).
+
+/**
+ * @file
+ * @mainpage Gumbo HTML Parser
+ *
+ * This provides a conformant, no-dependencies implementation of the HTML5
+ * parsing algorithm. It supports only UTF8; if you need to parse a different
+ * encoding, run a preprocessing step to convert to UTF8. It returns a parse
+ * tree made of the structs in this file.
+ *
+ * Example:
+ * @code
+ * GumboOutput* output = gumbo_parse(input);
+ * do_something_with_doctype(output->document);
+ * do_something_with_html_tree(output->root);
+ * gumbo_destroy_output(&options, output);
+ * @endcode
+ * HTML5 Spec:
+ *
+ * http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html
+ */
+
+#ifndef GUMBO_GUMBO_H_
+#define GUMBO_GUMBO_H_
+
+#ifdef _MSC_VER
+#ifndef _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+#ifndef fileno
+#define fileno _fileno
+#endif
+#endif
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * A struct representing a character position within the original text buffer.
+ * Line and column numbers are 1-based and offsets are 0-based, which matches
+ * how most editors and command-line tools work. Also, columns measure
+ * positions in terms of characters while offsets measure by bytes; this is
+ * because the offset field is often used to pull out a particular region of
+ * text (which in most languages that bind to C implies pointer arithmetic on a
+ * buffer of bytes), while the column field is often used to reference a
+ * particular column on a printable display, which nowadays is usually UTF-8.
+ */
+typedef struct {
+ unsigned int line;
+ unsigned int column;
+ unsigned int offset;
+} GumboSourcePosition;
+
+/**
+ * A SourcePosition used for elements that have no source position, i.e.
+ * parser-inserted elements.
+ */
+extern const GumboSourcePosition kGumboEmptySourcePosition;
+
+/**
+ * A struct representing a string or part of a string. Strings within the
+ * parser are represented by a char* and a length; the char* points into
+ * an existing data buffer owned by some other code (often the original input).
+ * GumboStringPieces are assumed (by convention) to be immutable, because they
+ * may share data. Use GumboStringBuffer if you need to construct a string.
+ * Clients should assume that it is not NUL-terminated, and should always use
+ * explicit lengths when manipulating them.
+ */
+typedef struct {
+ /** A pointer to the beginning of the string. NULL iff length == 0. */
+ const char* data;
+
+ /** The length of the string fragment, in bytes. May be zero. */
+ size_t length;
+} GumboStringPiece;
+
+/** A constant to represent a 0-length null string. */
+extern const GumboStringPiece kGumboEmptyString;
+
+/**
+ * Compares two GumboStringPieces, and returns true if they're equal or false
+ * otherwise.
+ */
+bool gumbo_string_equals(
+ const GumboStringPiece* str1, const GumboStringPiece* str2);
+
+/**
+ * Compares two GumboStringPieces ignoring case, and returns true if they're
+ * equal or false otherwise.
+ */
+bool gumbo_string_equals_ignore_case(
+ const GumboStringPiece* str1, const GumboStringPiece* str2);
+
+/**
+ * A simple vector implementation. This stores a pointer to a data array and a
+ * length. All elements are stored as void*; client code must cast to the
+ * appropriate type. Overflows upon addition result in reallocation of the data
+ * array, with the size doubling to maintain O(1) amortized cost. There is no
+ * removal function, as this isn't needed for any of the operations within this
+ * library. Iteration can be done through inspecting the structure directly in
+ * a for-loop.
+ */
+typedef struct {
+ /** Data elements. This points to a dynamically-allocated array of capacity
+ * elements, each a void* to the element itself.
+ */
+ void** data;
+
+ /** Number of elements currently in the vector. */
+ unsigned int length;
+
+ /** Current array capacity. */
+ unsigned int capacity;
+} GumboVector;
+
+/** An empty (0-length, 0-capacity) GumboVector. */
+extern const GumboVector kGumboEmptyVector;
+
+/**
+ * Returns the first index at which an element appears in this vector (testing
+ * by pointer equality), or -1 if it never does.
+ */
+int gumbo_vector_index_of(GumboVector* vector, const void* element);
+
+/**
+ * An enum for all the tags defined in the HTML5 standard. These correspond to
+ * the tag names themselves. Enum constants exist only for tags which appear in
+ * the spec itself (or for tags with special handling in the SVG and MathML
+ * namespaces); any other tags appear as GUMBO_TAG_UNKNOWN and the actual tag
+ * name can be obtained through original_tag.
+ *
+ * This is mostly for API convenience, so that clients of this library don't
+ * need to perform a strcasecmp to find the normalized tag name. It also has
+ * efficiency benefits, by letting the parser work with enums instead of
+ * strings.
+ */
+typedef enum {
+// Load all the tags from an external source, generated from tag.in.
+#include "gumbo/tag_enum.h"
+ // Used for all tags that don't have special handling in HTML. Add new tags
+ // to the end of tag.in so as to preserve backwards-compatibility.
+ GUMBO_TAG_UNKNOWN,
+ // A marker value to indicate the end of the enum, for iterating over it.
+ // Also used as the terminator for varargs functions that take tags.
+ GUMBO_TAG_LAST,
+} GumboTag;
+
+/**
+ * Returns the normalized (usually all-lowercased, except for foreign content)
+ * tag name for an GumboTag enum. Return value is static data owned by the
+ * library.
+ */
+const char* gumbo_normalized_tagname(GumboTag tag);
+
+/**
+ * Extracts the tag name from the original_text field of an element or token by
+ * stripping off </> characters and attributes and adjusting the passed-in
+ * GumboStringPiece appropriately. The tag name is in the original case and
+ * shares a buffer with the original text, to simplify memory management.
+ * Behavior is undefined if a string-piece that doesn't represent an HTML tag
+ * (<tagname> or </tagname>) is passed in. If the string piece is completely
+ * empty (NULL data pointer), then this function will exit successfully as a
+ * no-op.
+ */
+void gumbo_tag_from_original_text(GumboStringPiece* text);
+
+/**
+ * Fixes the case of SVG elements that are not all lowercase.
+ * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inforeign
+ * This is not done at parse time because there's no place to store a mutated
+ * tag name. tag_name is an enum (which will be TAG_UNKNOWN for most SVG tags
+ * without special handling), while original_tag_name is a pointer into the
+ * original buffer. Instead, we provide this helper function that clients can
+ * use to rename SVG tags as appropriate.
+ * Returns the case-normalized SVG tagname if a replacement is found, or NULL if
+ * no normalization is called for. The return value is static data and owned by
+ * the library.
+ */
+const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
+
+/**
+ * Converts a tag name string (which may be in upper or mixed case) to a tag
+ * enum. The `tag` version expects `tagname` to be NULL-terminated
+ */
+GumboTag gumbo_tag_enum(const char* tagname);
+GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
+
+/**
+ * Attribute namespaces.
+ * HTML includes special handling for XLink, XML, and XMLNS namespaces on
+ * attributes. Everything else goes in the generic "NONE" namespace.
+ */
+typedef enum {
+ GUMBO_ATTR_NAMESPACE_NONE,
+ GUMBO_ATTR_NAMESPACE_XLINK,
+ GUMBO_ATTR_NAMESPACE_XML,
+ GUMBO_ATTR_NAMESPACE_XMLNS,
+} GumboAttributeNamespaceEnum;
+
+/**
+ * A struct representing a single attribute on an HTML tag. This is a
+ * name-value pair, but also includes information about source locations and
+ * original source text.
+ */
+typedef struct {
+ /**
+ * The namespace for the attribute. This will usually be
+ * GUMBO_ATTR_NAMESPACE_NONE, but some XLink/XMLNS/XML attributes take special
+ * values, per:
+ * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-foreign-attributes
+ */
+ GumboAttributeNamespaceEnum attr_namespace;
+
+ /**
+ * The name of the attribute. This is in a freshly-allocated buffer to deal
+ * with case-normalization, and is null-terminated.
+ */
+ const char* name;
+
+ /**
+ * The original text of the attribute name, as a pointer into the original
+ * source buffer.
+ */
+ GumboStringPiece original_name;
+
+ /**
+ * The value of the attribute. This is in a freshly-allocated buffer to deal
+ * with unescaping, and is null-terminated. It does not include any quotes
+ * that surround the attribute. If the attribute has no value (for example,
+ * 'selected' on a checkbox), this will be an empty string.
+ */
+ const char* value;
+
+ /**
+ * The original text of the value of the attribute. This points into the
+ * original source buffer. It includes any quotes that surround the
+ * attribute, and you can look at original_value.data[0] and
+ * original_value.data[original_value.length - 1] to determine what the quote
+ * characters were. If the attribute has no value, this will be a 0-length
+ * string.
+ */
+ GumboStringPiece original_value;
+
+ /** The starting position of the attribute name. */
+ GumboSourcePosition name_start;
+
+ /**
+ * The ending position of the attribute name. This is not always derivable
+ * from the starting position of the value because of the possibility of
+ * whitespace around the = sign.
+ */
+ GumboSourcePosition name_end;
+
+ /** The starting position of the attribute value. */
+ GumboSourcePosition value_start;
+
+ /** The ending position of the attribute value. */
+ GumboSourcePosition value_end;
+} GumboAttribute;
+
+/**
+ * Given a vector of GumboAttributes, look up the one with the specified name
+ * and return it, or NULL if no such attribute exists. This uses a
+ * case-insensitive match, as HTML is case-insensitive.
+ */
+GumboAttribute* gumbo_get_attribute(const GumboVector* attrs, const char* name);
+
+/**
+ * Enum denoting the type of node. This determines the type of the node.v
+ * union.
+ */
+typedef enum {
+ /** Document node. v will be a GumboDocument. */
+ GUMBO_NODE_DOCUMENT,
+ /** Element node. v will be a GumboElement. */
+ GUMBO_NODE_ELEMENT,
+ /** Text node. v will be a GumboText. */
+ GUMBO_NODE_TEXT,
+ /** CDATA node. v will be a GumboText. */
+ GUMBO_NODE_CDATA,
+ /** Comment node. v will be a GumboText, excluding comment delimiters. */
+ GUMBO_NODE_COMMENT,
+ /** Text node, where all contents is whitespace. v will be a GumboText. */
+ GUMBO_NODE_WHITESPACE,
+ /** Template node. This is separate from GUMBO_NODE_ELEMENT because many
+ * client libraries will want to ignore the contents of template nodes, as
+ * the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
+ * here, while clients that want to include template contents should also
+ * check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
+ GUMBO_NODE_TEMPLATE
+} GumboNodeType;
+
+/**
+ * Forward declaration of GumboNode so it can be used recursively in
+ * GumboNode.parent.
+ */
+typedef struct GumboInternalNode GumboNode;
+
+/**
+ * http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode
+ */
+typedef enum {
+ GUMBO_DOCTYPE_NO_QUIRKS,
+ GUMBO_DOCTYPE_QUIRKS,
+ GUMBO_DOCTYPE_LIMITED_QUIRKS
+} GumboQuirksModeEnum;
+
+/**
+ * Namespaces.
+ * Unlike in X(HT)ML, namespaces in HTML5 are not denoted by a prefix. Rather,
+ * anything inside an <svg> tag is in the SVG namespace, anything inside the
+ * <math> tag is in the MathML namespace, and anything else is inside the HTML
+ * namespace. No other namespaces are supported, so this can be an enum only.
+ */
+typedef enum {
+ GUMBO_NAMESPACE_HTML,
+ GUMBO_NAMESPACE_SVG,
+ GUMBO_NAMESPACE_MATHML
+} GumboNamespaceEnum;
+
+/**
+ * Parse flags.
+ * We track the reasons for parser insertion of nodes and store them in a
+ * bitvector in the node itself. This lets client code optimize out nodes that
+ * are implied by the HTML structure of the document, or flag constructs that
+ * may not be allowed by a style guide, or track the prevalence of incorrect or
+ * tricky HTML code.
+ */
+typedef enum {
+ /**
+ * A normal node - both start and end tags appear in the source, nothing has
+ * been reparented.
+ */
+ GUMBO_INSERTION_NORMAL = 0,
+
+ /**
+ * A node inserted by the parser to fulfill some implicit insertion rule.
+ * This is usually set in addition to some other flag giving a more specific
+ * insertion reason; it's a generic catch-all term meaning "The start tag for
+ * this node did not appear in the document source".
+ */
+ GUMBO_INSERTION_BY_PARSER = 1 << 0,
+
+ /**
+ * A flag indicating that the end tag for this node did not appear in the
+ * document source. Note that in some cases, you can still have
+ * parser-inserted nodes with an explicit end tag: for example, "Text</html>"
+ * has GUMBO_INSERTED_BY_PARSER set on the <html> node, but
+ * GUMBO_INSERTED_END_TAG_IMPLICITLY is unset, as the </html> tag actually
+ * exists. This flag will be set only if the end tag is completely missing;
+ * in some cases, the end tag may be misplaced (eg. a </body> tag with text
+ * afterwards), which will leave this flag unset and require clients to
+ * inspect the parse errors for that case.
+ */
+ GUMBO_INSERTION_IMPLICIT_END_TAG = 1 << 1,
+
+ // Value 1 << 2 was for a flag that has since been removed.
+
+ /**
+ * A flag for nodes that are inserted because their presence is implied by
+ * other tags, eg. <html>, <head>, <body>, <tbody>, etc.
+ */
+ GUMBO_INSERTION_IMPLIED = 1 << 3,
+
+ /**
+ * A flag for nodes that are converted from their end tag equivalents. For
+ * example, </p> when no paragraph is open implies that the parser should
+ * create a <p> tag and immediately close it, while </br> means the same thing
+ * as <br>.
+ */
+ GUMBO_INSERTION_CONVERTED_FROM_END_TAG = 1 << 4,
+
+ /** A flag for nodes that are converted from the parse of an <isindex> tag. */
+ GUMBO_INSERTION_FROM_ISINDEX = 1 << 5,
+
+ /** A flag for <image> tags that are rewritten as <img>. */
+ GUMBO_INSERTION_FROM_IMAGE = 1 << 6,
+
+ /**
+ * A flag for nodes that are cloned as a result of the reconstruction of
+ * active formatting elements. This is set only on the clone; the initial
+ * portion of the formatting run is a NORMAL node with an IMPLICIT_END_TAG.
+ */
+ GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT = 1 << 7,
+
+ /** A flag for nodes that are cloned by the adoption agency algorithm. */
+ GUMBO_INSERTION_ADOPTION_AGENCY_CLONED = 1 << 8,
+
+ /** A flag for nodes that are moved by the adoption agency algorithm. */
+ GUMBO_INSERTION_ADOPTION_AGENCY_MOVED = 1 << 9,
+
+ /**
+ * A flag for nodes that have been foster-parented out of a table (or
+ * should've been foster-parented, if verbatim mode is set).
+ */
+ GUMBO_INSERTION_FOSTER_PARENTED = 1 << 10,
+} GumboParseFlags;
+
+/**
+ * Information specific to document nodes.
+ */
+typedef struct {
+ /**
+ * An array of GumboNodes, containing the children of this element. This will
+ * normally consist of the <html> element and any comment nodes found.
+ * Pointers are owned.
+ */
+ GumboVector /* GumboNode* */ children;
+
+ // True if there was an explicit doctype token as opposed to it being omitted.
+ bool has_doctype;
+
+ // Fields from the doctype token, copied verbatim.
+ const char* name;
+ const char* public_identifier;
+ const char* system_identifier;
+
+ /**
+ * Whether or not the document is in QuirksMode, as determined by the values
+ * in the GumboTokenDocType template.
+ */
+ GumboQuirksModeEnum doc_type_quirks_mode;
+} GumboDocument;
+
+/**
+ * The struct used to represent TEXT, CDATA, COMMENT, and WHITESPACE elements.
+ * This contains just a block of text and its position.
+ */
+typedef struct {
+ /**
+ * The text of this node, after entities have been parsed and decoded. For
+ * comment/cdata nodes, this does not include the comment delimiters.
+ */
+ const char* text;
+
+ /**
+ * The original text of this node, as a pointer into the original buffer. For
+ * comment/cdata nodes, this includes the comment delimiters.
+ */
+ GumboStringPiece original_text;
+
+ /**
+ * The starting position of this node. This corresponds to the position of
+ * original_text, before entities are decoded.
+ * */
+ GumboSourcePosition start_pos;
+} GumboText;
+
+/**
+ * The struct used to represent all HTML elements. This contains information
+ * about the tag, attributes, and child nodes.
+ */
+typedef struct {
+ /**
+ * An array of GumboNodes, containing the children of this element. Pointers
+ * are owned.
+ */
+ GumboVector /* GumboNode* */ children;
+
+ /** The GumboTag enum for this element. */
+ GumboTag tag;
+
+ /** The GumboNamespaceEnum for this element. */
+ GumboNamespaceEnum tag_namespace;
+
+ /**
+ * A GumboStringPiece pointing to the original tag text for this element,
+ * pointing directly into the source buffer. If the tag was inserted
+ * algorithmically (for example, <head> or <tbody> insertion), this will be a
+ * zero-length string.
+ */
+ GumboStringPiece original_tag;
+
+ /**
+ * A GumboStringPiece pointing to the original end tag text for this element.
+ * If the end tag was inserted algorithmically, (for example, closing a
+ * self-closing tag), this will be a zero-length string.
+ */
+ GumboStringPiece original_end_tag;
+
+ /** The source position for the start of the start tag. */
+ GumboSourcePosition start_pos;
+
+ /** The source position for the start of the end tag. */
+ GumboSourcePosition end_pos;
+
+ /**
+ * An array of GumboAttributes, containing the attributes for this tag in the
+ * order that they were parsed. Pointers are owned.
+ */
+ GumboVector /* GumboAttribute* */ attributes;
+} GumboElement;
+
+/**
+ * A supertype for GumboElement and GumboText, so that we can include one
+ * generic type in lists of children and cast as necessary to subtypes.
+ */
+struct GumboInternalNode {
+ /** The type of node that this is. */
+ GumboNodeType type;
+
+ /** Pointer back to parent node. Not owned. */
+ GumboNode* parent;
+
+ /** The index within the parent's children vector of this node. */
+ size_t index_within_parent;
+
+ /**
+ * A bitvector of flags containing information about why this element was
+ * inserted into the parse tree, including a variety of special parse
+ * situations.
+ */
+ GumboParseFlags parse_flags;
+
+ /** The actual node data. */
+ union {
+ GumboDocument document; // For GUMBO_NODE_DOCUMENT.
+ GumboElement element; // For GUMBO_NODE_ELEMENT.
+ GumboText text; // For everything else.
+ } v;
+};
+
+/**
+ * The type for an allocator function. Takes the 'userdata' member of the
+ * GumboParser struct as its first argument. Semantics should be the same as
+ * malloc, i.e. return a block of size_t bytes on success or NULL on failure.
+ * Allocating a block of 0 bytes behaves as per malloc.
+ */
+// TODO(jdtang): Add checks throughout the codebase for out-of-memory condition.
+typedef void* (*GumboAllocatorFunction)(void* userdata, size_t size);
+
+/**
+ * The type for a deallocator function. Takes the 'userdata' member of the
+ * GumboParser struct as its first argument.
+ */
+typedef void (*GumboDeallocatorFunction)(void* userdata, void* ptr);
+
+/**
+ * Input struct containing configuration options for the parser.
+ * These let you specify alternate memory managers, provide different error
+ * handling, etc.
+ * Use kGumboDefaultOptions for sensible defaults, and only set what you need.
+ */
+typedef struct GumboInternalOptions {
+ /** A memory allocator function. Default: malloc. */
+ GumboAllocatorFunction allocator;
+
+ /** A memory deallocator function. Default: free. */
+ GumboDeallocatorFunction deallocator;
+
+ /**
+ * An opaque object that's passed in as the first argument to all callbacks
+ * used by this library. Default: NULL.
+ */
+ void* userdata;
+
+ /**
+ * The tab-stop size, for computing positions in source code that uses tabs.
+ * Default: 8.
+ */
+ int tab_stop;
+
+ /**
+ * Whether or not to stop parsing when the first error is encountered.
+ * Default: false.
+ */
+ bool stop_on_first_error;
+
+ /**
+ * The maximum number of errors before the parser stops recording them. This
+ * is provided so that if the page is totally borked, we don't completely fill
+ * up the errors vector and exhaust memory with useless redundant errors. Set
+ * to -1 to disable the limit.
+ * Default: -1
+ */
+ int max_errors;
+
+ /**
+ * The fragment context for parsing:
+ * https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
+ *
+ * If GUMBO_TAG_LAST is passed here, it is assumed to be "no fragment", i.e.
+ * the regular parsing algorithm. Otherwise, pass the tag enum for the
+ * intended parent of the parsed fragment. We use just the tag enum rather
+ * than a full node because that's enough to set all the parsing context we
+ * need, and it provides some additional flexibility for client code to act as
+ * if parsing a fragment even when a full HTML tree isn't available.
+ *
+ * Default: GUMBO_TAG_LAST
+ */
+ GumboTag fragment_context;
+
+ /**
+ * The namespace for the fragment context. This lets client code
+ * differentiate between, say, parsing a <title> tag in SVG vs. parsing it in
+ * HTML.
+ * Default: GUMBO_NAMESPACE_HTML
+ */
+ GumboNamespaceEnum fragment_namespace;
+} GumboOptions;
+
+/** Default options struct; use this with gumbo_parse_with_options. */
+extern const GumboOptions kGumboDefaultOptions;
+
+/** The output struct containing the results of the parse. */
+typedef struct GumboInternalOutput {
+ /**
+ * Pointer to the document node. This is a GumboNode of type NODE_DOCUMENT
+ * that contains the entire document as its child.
+ */
+ GumboNode* document;
+
+ /**
+ * Pointer to the root node. This the <html> tag that forms the root of the
+ * document.
+ */
+ GumboNode* root;
+
+ /**
+ * A list of errors that occurred during the parse.
+ * NOTE: In version 1.0 of this library, the API for errors hasn't been fully
+ * fleshed out and may change in the future. For this reason, the GumboError
+ * header isn't part of the public API. Contact us if you need errors
+ * reported so we can work out something appropriate for your use-case.
+ */
+ GumboVector /* GumboError */ errors;
+} GumboOutput;
+
+/**
+ * Parses a buffer of UTF8 text into an GumboNode parse tree. The buffer must
+ * live at least as long as the parse tree, as some fields (eg. original_text)
+ * point directly into the original buffer.
+ *
+ * This doesn't support buffers longer than 4 gigabytes.
+ */
+GumboOutput* gumbo_parse(const char* buffer);
+
+/**
+ * Extended version of gumbo_parse that takes an explicit options structure,
+ * buffer, and length.
+ */
+GumboOutput* gumbo_parse_with_options(
+ const GumboOptions* options, const char* buffer, size_t buffer_length);
+
+/** Release the memory used for the parse tree & parse errors. */
+void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_GUMBO_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/attribute.h b/libs/litehtml/src/gumbo/include/gumbo/attribute.h
new file mode 100644
index 0000000000..f9b8aea576
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/attribute.h
@@ -0,0 +1,37 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#ifndef GUMBO_ATTRIBUTE_H_
+#define GUMBO_ATTRIBUTE_H_
+
+#include "gumbo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GumboInternalParser;
+
+// Release the memory used for an GumboAttribute, including the attribute
+// itself.
+void gumbo_destroy_attribute(
+ struct GumboInternalParser* parser, GumboAttribute* attribute);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_ATTRIBUTE_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/char_ref.h b/libs/litehtml/src/gumbo/include/gumbo/char_ref.h
new file mode 100644
index 0000000000..09d2598f45
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/char_ref.h
@@ -0,0 +1,60 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+// Internal header for character reference handling; this should not be exposed
+// transitively by any public API header. This is why the functions aren't
+// namespaced.
+
+#ifndef GUMBO_CHAR_REF_H_
+#define GUMBO_CHAR_REF_H_
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GumboInternalParser;
+struct GumboInternalUtf8Iterator;
+
+// Value that indicates no character was produced.
+extern const int kGumboNoChar;
+
+// Certain named character references generate two codepoints, not one, and so
+// the consume_char_ref subroutine needs to return this instead of an int. The
+// first field will be kGumboNoChar if no character reference was found; the
+// second field will be kGumboNoChar if that is the case or if the character
+// reference returns only a single codepoint.
+typedef struct {
+ int first;
+ int second;
+} OneOrTwoCodepoints;
+
+// Implements the "consume a character reference" section of the spec.
+// This reads in characters from the input as necessary, and fills in a
+// OneOrTwoCodepoints struct containing the characters read. It may add parse
+// errors to the GumboParser's errors vector, if the spec calls for it. Pass a
+// space for the "additional allowed char" when the spec says "with no
+// additional allowed char". Returns false on parse error, true otherwise.
+bool consume_char_ref(struct GumboInternalParser* parser,
+ struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
+ bool is_in_attribute, OneOrTwoCodepoints* output);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_CHAR_REF_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/error.h b/libs/litehtml/src/gumbo/include/gumbo/error.h
new file mode 100644
index 0000000000..3aa54a6b27
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/error.h
@@ -0,0 +1,227 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+// Error types, enums, and handling functions.
+
+#ifndef GUMBO_ERROR_H_
+#define GUMBO_ERROR_H_
+#ifdef _MSC_VER
+#ifndef _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+#endif
+#include <stdint.h>
+
+#include "gumbo.h"
+#include "insertion_mode.h"
+#include "string_buffer.h"
+#include "token_type.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GumboInternalParser;
+
+typedef enum {
+ GUMBO_ERR_UTF8_INVALID,
+ GUMBO_ERR_UTF8_TRUNCATED,
+ GUMBO_ERR_UTF8_NULL,
+ GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS,
+ GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON,
+ GUMBO_ERR_NUMERIC_CHAR_REF_INVALID,
+ GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON,
+ GUMBO_ERR_NAMED_CHAR_REF_INVALID,
+ GUMBO_ERR_TAG_STARTS_WITH_QUESTION,
+ GUMBO_ERR_TAG_EOF,
+ GUMBO_ERR_TAG_INVALID,
+ GUMBO_ERR_CLOSE_TAG_EMPTY,
+ GUMBO_ERR_CLOSE_TAG_EOF,
+ GUMBO_ERR_CLOSE_TAG_INVALID,
+ GUMBO_ERR_SCRIPT_EOF,
+ GUMBO_ERR_ATTR_NAME_EOF,
+ GUMBO_ERR_ATTR_NAME_INVALID,
+ GUMBO_ERR_ATTR_DOUBLE_QUOTE_EOF,
+ GUMBO_ERR_ATTR_SINGLE_QUOTE_EOF,
+ GUMBO_ERR_ATTR_UNQUOTED_EOF,
+ GUMBO_ERR_ATTR_UNQUOTED_RIGHT_BRACKET,
+ GUMBO_ERR_ATTR_UNQUOTED_EQUALS,
+ GUMBO_ERR_ATTR_AFTER_EOF,
+ GUMBO_ERR_ATTR_AFTER_INVALID,
+ GUMBO_ERR_DUPLICATE_ATTR,
+ GUMBO_ERR_SOLIDUS_EOF,
+ GUMBO_ERR_SOLIDUS_INVALID,
+ GUMBO_ERR_DASHES_OR_DOCTYPE,
+ GUMBO_ERR_COMMENT_EOF,
+ GUMBO_ERR_COMMENT_INVALID,
+ GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH,
+ GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH,
+ GUMBO_ERR_COMMENT_SPACE_AFTER_DOUBLE_DASH,
+ GUMBO_ERR_COMMENT_END_BANG_EOF,
+ GUMBO_ERR_DOCTYPE_EOF,
+ GUMBO_ERR_DOCTYPE_INVALID,
+ GUMBO_ERR_DOCTYPE_SPACE,
+ GUMBO_ERR_DOCTYPE_RIGHT_BRACKET,
+ GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET,
+ GUMBO_ERR_DOCTYPE_END,
+ GUMBO_ERR_PARSER,
+ GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG,
+} GumboErrorType;
+
+// Additional data for duplicated attributes.
+typedef struct GumboInternalDuplicateAttrError {
+ // The name of the attribute. Owned by this struct.
+ const char* name;
+
+ // The (0-based) index within the attributes vector of the original
+ // occurrence.
+ unsigned int original_index;
+
+ // The (0-based) index where the new occurrence would be.
+ unsigned int new_index;
+} GumboDuplicateAttrError;
+
+// A simplified representation of the tokenizer state, designed to be more
+// useful to clients of this library than the internal representation. This
+// condenses the actual states used in the tokenizer state machine into a few
+// values that will be familiar to users of HTML.
+typedef enum {
+ GUMBO_ERR_TOKENIZER_DATA,
+ GUMBO_ERR_TOKENIZER_CHAR_REF,
+ GUMBO_ERR_TOKENIZER_RCDATA,
+ GUMBO_ERR_TOKENIZER_RAWTEXT,
+ GUMBO_ERR_TOKENIZER_PLAINTEXT,
+ GUMBO_ERR_TOKENIZER_SCRIPT,
+ GUMBO_ERR_TOKENIZER_TAG,
+ GUMBO_ERR_TOKENIZER_SELF_CLOSING_TAG,
+ GUMBO_ERR_TOKENIZER_ATTR_NAME,
+ GUMBO_ERR_TOKENIZER_ATTR_VALUE,
+ GUMBO_ERR_TOKENIZER_MARKUP_DECLARATION,
+ GUMBO_ERR_TOKENIZER_COMMENT,
+ GUMBO_ERR_TOKENIZER_DOCTYPE,
+ GUMBO_ERR_TOKENIZER_CDATA,
+} GumboTokenizerErrorState;
+
+// Additional data for tokenizer errors.
+// This records the current state and codepoint encountered - this is usually
+// enough to reconstruct what went wrong and provide a friendly error message.
+typedef struct GumboInternalTokenizerError {
+ // The bad codepoint encountered.
+ int codepoint;
+
+ // The state that the tokenizer was in at the time.
+ GumboTokenizerErrorState state;
+} GumboTokenizerError;
+
+// Additional data for parse errors.
+typedef struct GumboInternalParserError {
+ // The type of input token that resulted in this error.
+ GumboTokenType input_type;
+
+ // The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
+ GumboTag input_tag;
+
+ // The insertion mode that the parser was in at the time.
+ GumboInsertionMode parser_state;
+
+ // The tag stack at the point of the error. Note that this is an GumboVector
+ // of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
+ // get at the tag.
+ GumboVector /* GumboTag */ tag_stack;
+} GumboParserError;
+
+// The overall error struct representing an error in decoding/tokenizing/parsing
+// the HTML. This contains an enumerated type flag, a source position, and then
+// a union of fields containing data specific to the error.
+typedef struct GumboInternalError {
+ // The type of error.
+ GumboErrorType type;
+
+ // The position within the source file where the error occurred.
+ GumboSourcePosition position;
+
+ // A pointer to the byte within the original source file text where the error
+ // occurred (note that this is not the same as position.offset, as that gives
+ // character-based instead of byte-based offsets).
+ const char* original_text;
+
+ // Type-specific error information.
+ union {
+ // The code point we encountered, for:
+ // * GUMBO_ERR_UTF8_INVALID
+ // * GUMBO_ERR_UTF8_TRUNCATED
+ // * GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON
+ // * GUMBO_ERR_NUMERIC_CHAR_REF_INVALID
+ uint64_t codepoint;
+
+ // Tokenizer errors.
+ GumboTokenizerError tokenizer;
+
+ // Short textual data, for:
+ // * GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON
+ // * GUMBO_ERR_NAMED_CHAR_REF_INVALID
+ GumboStringPiece text;
+
+ // Duplicate attribute data, for GUMBO_ERR_DUPLICATE_ATTR.
+ GumboDuplicateAttrError duplicate_attr;
+
+ // Parser state, for GUMBO_ERR_PARSER and
+ // GUMBO_ERR_UNACKNOWLEDGE_SELF_CLOSING_TAG.
+ struct GumboInternalParserError parser;
+ } v;
+} GumboError;
+
+// Adds a new error to the parser's error list, and returns a pointer to it so
+// that clients can fill out the rest of its fields. May return NULL if we're
+// already over the max_errors field specified in GumboOptions.
+GumboError* gumbo_add_error(struct GumboInternalParser* parser);
+
+// Initializes the errors vector in the parser.
+void gumbo_init_errors(struct GumboInternalParser* errors);
+
+// Frees all the errors in the 'errors_' field of the parser.
+void gumbo_destroy_errors(struct GumboInternalParser* errors);
+
+// Frees the memory used for a single GumboError.
+void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
+
+// Prints an error to a string. This fills an empty GumboStringBuffer with a
+// freshly-allocated buffer containing the error message text. The caller is
+// responsible for deleting the buffer. (Note that the buffer is allocated with
+// the allocator specified in the GumboParser ~config and hence should be freed
+// by gumbo_parser_deallocate().)
+void gumbo_error_to_string(struct GumboInternalParser* parser,
+ const GumboError* error, GumboStringBuffer* output);
+
+// Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
+// with a freshly-allocated buffer containing the error message text. The
+// caller is responsible for deleting the buffer. (Note that the buffer is
+// allocated with the allocator specified in the GumboParser ~config and hence
+// should be freed by gumbo_parser_deallocate().)
+void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser,
+ const GumboError* error, const char* source_text,
+ GumboStringBuffer* output);
+
+// Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
+// of writing to a string.
+void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser,
+ const GumboError* error, const char* source_text);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_ERROR_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/insertion_mode.h b/libs/litehtml/src/gumbo/include/gumbo/insertion_mode.h
new file mode 100644
index 0000000000..45134c13b3
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/insertion_mode.h
@@ -0,0 +1,57 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#ifndef GUMBO_INSERTION_MODE_H_
+#define GUMBO_INSERTION_MODE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#insertion-mode
+// If new enum values are added, be sure to update the kTokenHandlers dispatch
+// table in parser.c.
+typedef enum {
+ GUMBO_INSERTION_MODE_INITIAL,
+ GUMBO_INSERTION_MODE_BEFORE_HTML,
+ GUMBO_INSERTION_MODE_BEFORE_HEAD,
+ GUMBO_INSERTION_MODE_IN_HEAD,
+ GUMBO_INSERTION_MODE_IN_HEAD_NOSCRIPT,
+ GUMBO_INSERTION_MODE_AFTER_HEAD,
+ GUMBO_INSERTION_MODE_IN_BODY,
+ GUMBO_INSERTION_MODE_TEXT,
+ GUMBO_INSERTION_MODE_IN_TABLE,
+ GUMBO_INSERTION_MODE_IN_TABLE_TEXT,
+ GUMBO_INSERTION_MODE_IN_CAPTION,
+ GUMBO_INSERTION_MODE_IN_COLUMN_GROUP,
+ GUMBO_INSERTION_MODE_IN_TABLE_BODY,
+ GUMBO_INSERTION_MODE_IN_ROW,
+ GUMBO_INSERTION_MODE_IN_CELL,
+ GUMBO_INSERTION_MODE_IN_SELECT,
+ GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE,
+ GUMBO_INSERTION_MODE_IN_TEMPLATE,
+ GUMBO_INSERTION_MODE_AFTER_BODY,
+ GUMBO_INSERTION_MODE_IN_FRAMESET,
+ GUMBO_INSERTION_MODE_AFTER_FRAMESET,
+ GUMBO_INSERTION_MODE_AFTER_AFTER_BODY,
+ GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET
+} GumboInsertionMode;
+
+#ifdef __cplusplus
+} // extern C
+#endif
+
+#endif // GUMBO_INSERTION_MODE_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/parser.h b/libs/litehtml/src/gumbo/include/gumbo/parser.h
new file mode 100644
index 0000000000..95019e3eca
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/parser.h
@@ -0,0 +1,57 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+// Contains the definition of the top-level GumboParser structure that's
+// threaded through basically every internal function in the library.
+
+#ifndef GUMBO_PARSER_H_
+#define GUMBO_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GumboInternalParserState;
+struct GumboInternalOutput;
+struct GumboInternalOptions;
+struct GumboInternalTokenizerState;
+
+// An overarching struct that's threaded through (nearly) all functions in the
+// library, OOP-style. This gives each function access to the options and
+// output, along with any internal state needed for the parse.
+typedef struct GumboInternalParser {
+ // Settings for this parse run.
+ const struct GumboInternalOptions* _options;
+
+ // Output for the parse.
+ struct GumboInternalOutput* _output;
+
+ // The internal tokenizer state, defined as a pointer to avoid a cyclic
+ // dependency on html5tokenizer.h. The main parse routine is responsible for
+ // initializing this on parse start, and destroying it on parse end.
+ // End-users will never see a non-garbage value in this pointer.
+ struct GumboInternalTokenizerState* _tokenizer_state;
+
+ // The internal parser state. Initialized on parse start and destroyed on
+ // parse end; end-users will never see a non-garbage value in this pointer.
+ struct GumboInternalParserState* _parser_state;
+} GumboParser;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_PARSER_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/string_buffer.h b/libs/litehtml/src/gumbo/include/gumbo/string_buffer.h
new file mode 100644
index 0000000000..ee7956acc8
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/string_buffer.h
@@ -0,0 +1,84 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+#ifndef GUMBO_STRING_BUFFER_H_
+#define GUMBO_STRING_BUFFER_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "gumbo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GumboInternalParser;
+
+// A struct representing a mutable, growable string. This consists of a
+// heap-allocated buffer that may grow (by doubling) as necessary. When
+// converting to a string, this allocates a new buffer that is only as long as
+// it needs to be. Note that the internal buffer here is *not* nul-terminated,
+// so be sure not to use ordinary string manipulation functions on it.
+typedef struct {
+ // A pointer to the beginning of the string. NULL iff length == 0.
+ char* data;
+
+ // The length of the string fragment, in bytes. May be zero.
+ size_t length;
+
+ // The capacity of the buffer, in bytes.
+ size_t capacity;
+} GumboStringBuffer;
+
+// Initializes a new GumboStringBuffer.
+void gumbo_string_buffer_init(
+ struct GumboInternalParser* parser, GumboStringBuffer* output);
+
+// Ensures that the buffer contains at least a certain amount of space. Most
+// useful with snprintf and the other length-delimited string functions, which
+// may want to write directly into the buffer.
+void gumbo_string_buffer_reserve(struct GumboInternalParser* parser,
+ size_t min_capacity, GumboStringBuffer* output);
+
+// Appends a single Unicode codepoint onto the end of the GumboStringBuffer.
+// This is essentially a UTF-8 encoder, and may add 1-4 bytes depending on the
+// value of the codepoint.
+void gumbo_string_buffer_append_codepoint(
+ struct GumboInternalParser* parser, int c, GumboStringBuffer* output);
+
+// Appends a string onto the end of the GumboStringBuffer.
+void gumbo_string_buffer_append_string(struct GumboInternalParser* parser,
+ GumboStringPiece* str, GumboStringBuffer* output);
+
+// Converts this string buffer to const char*, alloctaing a new buffer for it.
+char* gumbo_string_buffer_to_string(
+ struct GumboInternalParser* parser, GumboStringBuffer* input);
+
+// Reinitialize this string buffer. This clears it by setting length=0. It
+// does not zero out the buffer itself.
+void gumbo_string_buffer_clear(
+ struct GumboInternalParser* parser, GumboStringBuffer* input);
+
+// Deallocates this GumboStringBuffer.
+void gumbo_string_buffer_destroy(
+ struct GumboInternalParser* parser, GumboStringBuffer* buffer);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_STRING_BUFFER_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/string_piece.h b/libs/litehtml/src/gumbo/include/gumbo/string_piece.h
new file mode 100644
index 0000000000..8c8188c500
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/string_piece.h
@@ -0,0 +1,38 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#ifndef GUMBO_STRING_PIECE_H_
+#define GUMBO_STRING_PIECE_H_
+
+#include "gumbo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GumboInternalParser;
+
+// Performs a deep-copy of an GumboStringPiece, allocating a fresh buffer in the
+// destination and copying over the characters from source. Dest should be
+// empty, with no buffer allocated; otherwise, this leaks it.
+void gumbo_string_copy(struct GumboInternalParser* parser,
+ GumboStringPiece* dest, const GumboStringPiece* source);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_STRING_PIECE_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/tag_enum.h b/libs/litehtml/src/gumbo/include/gumbo/tag_enum.h
new file mode 100644
index 0000000000..6d7aeb3d7d
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/tag_enum.h
@@ -0,0 +1,153 @@
+// Generated via `gentags.py src/tag.in`.
+// Do not edit; edit src/tag.in instead.
+// clang-format off
+GUMBO_TAG_HTML,
+GUMBO_TAG_HEAD,
+GUMBO_TAG_TITLE,
+GUMBO_TAG_BASE,
+GUMBO_TAG_LINK,
+GUMBO_TAG_META,
+GUMBO_TAG_STYLE,
+GUMBO_TAG_SCRIPT,
+GUMBO_TAG_NOSCRIPT,
+GUMBO_TAG_TEMPLATE,
+GUMBO_TAG_BODY,
+GUMBO_TAG_ARTICLE,
+GUMBO_TAG_SECTION,
+GUMBO_TAG_NAV,
+GUMBO_TAG_ASIDE,
+GUMBO_TAG_H1,
+GUMBO_TAG_H2,
+GUMBO_TAG_H3,
+GUMBO_TAG_H4,
+GUMBO_TAG_H5,
+GUMBO_TAG_H6,
+GUMBO_TAG_HGROUP,
+GUMBO_TAG_HEADER,
+GUMBO_TAG_FOOTER,
+GUMBO_TAG_ADDRESS,
+GUMBO_TAG_P,
+GUMBO_TAG_HR,
+GUMBO_TAG_PRE,
+GUMBO_TAG_BLOCKQUOTE,
+GUMBO_TAG_OL,
+GUMBO_TAG_UL,
+GUMBO_TAG_LI,
+GUMBO_TAG_DL,
+GUMBO_TAG_DT,
+GUMBO_TAG_DD,
+GUMBO_TAG_FIGURE,
+GUMBO_TAG_FIGCAPTION,
+GUMBO_TAG_MAIN,
+GUMBO_TAG_DIV,
+GUMBO_TAG_A,
+GUMBO_TAG_EM,
+GUMBO_TAG_STRONG,
+GUMBO_TAG_SMALL,
+GUMBO_TAG_S,
+GUMBO_TAG_CITE,
+GUMBO_TAG_Q,
+GUMBO_TAG_DFN,
+GUMBO_TAG_ABBR,
+GUMBO_TAG_DATA,
+GUMBO_TAG_TIME,
+GUMBO_TAG_CODE,
+GUMBO_TAG_VAR,
+GUMBO_TAG_SAMP,
+GUMBO_TAG_KBD,
+GUMBO_TAG_SUB,
+GUMBO_TAG_SUP,
+GUMBO_TAG_I,
+GUMBO_TAG_B,
+GUMBO_TAG_U,
+GUMBO_TAG_MARK,
+GUMBO_TAG_RUBY,
+GUMBO_TAG_RT,
+GUMBO_TAG_RP,
+GUMBO_TAG_BDI,
+GUMBO_TAG_BDO,
+GUMBO_TAG_SPAN,
+GUMBO_TAG_BR,
+GUMBO_TAG_WBR,
+GUMBO_TAG_INS,
+GUMBO_TAG_DEL,
+GUMBO_TAG_IMAGE,
+GUMBO_TAG_IMG,
+GUMBO_TAG_IFRAME,
+GUMBO_TAG_EMBED,
+GUMBO_TAG_OBJECT,
+GUMBO_TAG_PARAM,
+GUMBO_TAG_VIDEO,
+GUMBO_TAG_AUDIO,
+GUMBO_TAG_SOURCE,
+GUMBO_TAG_TRACK,
+GUMBO_TAG_CANVAS,
+GUMBO_TAG_MAP,
+GUMBO_TAG_AREA,
+GUMBO_TAG_MATH,
+GUMBO_TAG_MI,
+GUMBO_TAG_MO,
+GUMBO_TAG_MN,
+GUMBO_TAG_MS,
+GUMBO_TAG_MTEXT,
+GUMBO_TAG_MGLYPH,
+GUMBO_TAG_MALIGNMARK,
+GUMBO_TAG_ANNOTATION_XML,
+GUMBO_TAG_SVG,
+GUMBO_TAG_FOREIGNOBJECT,
+GUMBO_TAG_DESC,
+GUMBO_TAG_TABLE,
+GUMBO_TAG_CAPTION,
+GUMBO_TAG_COLGROUP,
+GUMBO_TAG_COL,
+GUMBO_TAG_TBODY,
+GUMBO_TAG_THEAD,
+GUMBO_TAG_TFOOT,
+GUMBO_TAG_TR,
+GUMBO_TAG_TD,
+GUMBO_TAG_TH,
+GUMBO_TAG_FORM,
+GUMBO_TAG_FIELDSET,
+GUMBO_TAG_LEGEND,
+GUMBO_TAG_LABEL,
+GUMBO_TAG_INPUT,
+GUMBO_TAG_BUTTON,
+GUMBO_TAG_SELECT,
+GUMBO_TAG_DATALIST,
+GUMBO_TAG_OPTGROUP,
+GUMBO_TAG_OPTION,
+GUMBO_TAG_TEXTAREA,
+GUMBO_TAG_KEYGEN,
+GUMBO_TAG_OUTPUT,
+GUMBO_TAG_PROGRESS,
+GUMBO_TAG_METER,
+GUMBO_TAG_DETAILS,
+GUMBO_TAG_SUMMARY,
+GUMBO_TAG_MENU,
+GUMBO_TAG_MENUITEM,
+GUMBO_TAG_APPLET,
+GUMBO_TAG_ACRONYM,
+GUMBO_TAG_BGSOUND,
+GUMBO_TAG_DIR,
+GUMBO_TAG_FRAME,
+GUMBO_TAG_FRAMESET,
+GUMBO_TAG_NOFRAMES,
+GUMBO_TAG_ISINDEX,
+GUMBO_TAG_LISTING,
+GUMBO_TAG_XMP,
+GUMBO_TAG_NEXTID,
+GUMBO_TAG_NOEMBED,
+GUMBO_TAG_PLAINTEXT,
+GUMBO_TAG_RB,
+GUMBO_TAG_STRIKE,
+GUMBO_TAG_BASEFONT,
+GUMBO_TAG_BIG,
+GUMBO_TAG_BLINK,
+GUMBO_TAG_CENTER,
+GUMBO_TAG_FONT,
+GUMBO_TAG_MARQUEE,
+GUMBO_TAG_MULTICOL,
+GUMBO_TAG_NOBR,
+GUMBO_TAG_SPACER,
+GUMBO_TAG_TT,
+GUMBO_TAG_RTC,
diff --git a/libs/litehtml/src/gumbo/include/gumbo/tag_gperf.h b/libs/litehtml/src/gumbo/include/gumbo/tag_gperf.h
new file mode 100644
index 0000000000..378eaf958c
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/tag_gperf.h
@@ -0,0 +1,105 @@
+static unsigned int tag_hash(
+ register const char *str, register unsigned int len) {
+ static unsigned short asso_values[] = {296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 6, 4, 3, 1, 1, 0,
+ 1, 0, 0, 296, 296, 296, 296, 296, 296, 296, 22, 73, 151, 4, 13, 59, 65, 2,
+ 69, 0, 134, 9, 16, 52, 55, 28, 101, 0, 1, 6, 63, 126, 104, 93, 124, 296,
+ 296, 296, 296, 296, 296, 296, 22, 73, 151, 4, 13, 59, 65, 2, 69, 0, 134,
+ 9, 16, 52, 55, 28, 101, 0, 1, 6, 63, 126, 104, 93, 124, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296};
+ register unsigned int hval = len;
+
+ switch (hval) {
+ default:
+ hval += asso_values[(unsigned char) str[1] + 3];
+ /*FALLTHROUGH*/
+ case 1:
+ hval += asso_values[(unsigned char) str[0]];
+ break;
+ }
+ return hval + asso_values[(unsigned char) str[len - 1]];
+}
+
+static const unsigned char kGumboTagMap[] = {GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_S, GUMBO_TAG_H6, GUMBO_TAG_H5, GUMBO_TAG_H4,
+ GUMBO_TAG_H3, GUMBO_TAG_SPACER, GUMBO_TAG_H2, GUMBO_TAG_HEADER,
+ GUMBO_TAG_H1, GUMBO_TAG_HEAD, GUMBO_TAG_LAST, GUMBO_TAG_DETAILS,
+ GUMBO_TAG_SELECT, GUMBO_TAG_DIR, GUMBO_TAG_LAST, GUMBO_TAG_DEL,
+ GUMBO_TAG_LAST, GUMBO_TAG_SOURCE, GUMBO_TAG_LEGEND, GUMBO_TAG_DATALIST,
+ GUMBO_TAG_METER, GUMBO_TAG_MGLYPH, GUMBO_TAG_LAST, GUMBO_TAG_MATH,
+ GUMBO_TAG_LABEL, GUMBO_TAG_TABLE, GUMBO_TAG_TEMPLATE, GUMBO_TAG_LAST,
+ GUMBO_TAG_RP, GUMBO_TAG_TIME, GUMBO_TAG_TITLE, GUMBO_TAG_DATA,
+ GUMBO_TAG_APPLET, GUMBO_TAG_HGROUP, GUMBO_TAG_SAMP, GUMBO_TAG_TEXTAREA,
+ GUMBO_TAG_ABBR, GUMBO_TAG_MARQUEE, GUMBO_TAG_LAST, GUMBO_TAG_MENUITEM,
+ GUMBO_TAG_SMALL, GUMBO_TAG_META, GUMBO_TAG_A, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_EMBED,
+ GUMBO_TAG_MAP, GUMBO_TAG_LAST, GUMBO_TAG_PARAM, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_NOBR, GUMBO_TAG_P, GUMBO_TAG_SPAN, GUMBO_TAG_EM,
+ GUMBO_TAG_LAST, GUMBO_TAG_NOFRAMES, GUMBO_TAG_SECTION, GUMBO_TAG_NOEMBED,
+ GUMBO_TAG_NEXTID, GUMBO_TAG_FOOTER, GUMBO_TAG_NOSCRIPT, GUMBO_TAG_HR,
+ GUMBO_TAG_LAST, GUMBO_TAG_FONT, GUMBO_TAG_DL, GUMBO_TAG_TR,
+ GUMBO_TAG_SCRIPT, GUMBO_TAG_MO, GUMBO_TAG_LAST, GUMBO_TAG_DD,
+ GUMBO_TAG_MAIN, GUMBO_TAG_TD, GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_FORM,
+ GUMBO_TAG_OBJECT, GUMBO_TAG_LAST, GUMBO_TAG_FIELDSET, GUMBO_TAG_LAST,
+ GUMBO_TAG_BGSOUND, GUMBO_TAG_MENU, GUMBO_TAG_TFOOT, GUMBO_TAG_FIGURE,
+ GUMBO_TAG_RB, GUMBO_TAG_LI, GUMBO_TAG_LISTING, GUMBO_TAG_BASEFONT,
+ GUMBO_TAG_OPTGROUP, GUMBO_TAG_LAST, GUMBO_TAG_BASE, GUMBO_TAG_ADDRESS,
+ GUMBO_TAG_MI, GUMBO_TAG_LAST, GUMBO_TAG_PLAINTEXT, GUMBO_TAG_LAST,
+ GUMBO_TAG_PROGRESS, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_ACRONYM, GUMBO_TAG_ARTICLE, GUMBO_TAG_LAST, GUMBO_TAG_PRE,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_AREA,
+ GUMBO_TAG_RT, GUMBO_TAG_LAST, GUMBO_TAG_OPTION, GUMBO_TAG_IMAGE,
+ GUMBO_TAG_DT, GUMBO_TAG_LAST, GUMBO_TAG_TT, GUMBO_TAG_HTML, GUMBO_TAG_WBR,
+ GUMBO_TAG_OL, GUMBO_TAG_LAST, GUMBO_TAG_STYLE, GUMBO_TAG_STRIKE,
+ GUMBO_TAG_SUP, GUMBO_TAG_MULTICOL, GUMBO_TAG_U, GUMBO_TAG_DFN, GUMBO_TAG_UL,
+ GUMBO_TAG_FIGCAPTION, GUMBO_TAG_MTEXT, GUMBO_TAG_LAST, GUMBO_TAG_VAR,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_FRAMESET, GUMBO_TAG_LAST,
+ GUMBO_TAG_BR, GUMBO_TAG_I, GUMBO_TAG_FRAME, GUMBO_TAG_LAST, GUMBO_TAG_DIV,
+ GUMBO_TAG_LAST, GUMBO_TAG_TH, GUMBO_TAG_MS, GUMBO_TAG_ANNOTATION_XML,
+ GUMBO_TAG_B, GUMBO_TAG_TBODY, GUMBO_TAG_THEAD, GUMBO_TAG_BIG,
+ GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_XMP, GUMBO_TAG_LAST, GUMBO_TAG_KBD,
+ GUMBO_TAG_LAST, GUMBO_TAG_LINK, GUMBO_TAG_IFRAME, GUMBO_TAG_MARK,
+ GUMBO_TAG_CENTER, GUMBO_TAG_OUTPUT, GUMBO_TAG_DESC, GUMBO_TAG_CANVAS,
+ GUMBO_TAG_COL, GUMBO_TAG_MALIGNMARK, GUMBO_TAG_IMG, GUMBO_TAG_ASIDE,
+ GUMBO_TAG_LAST, GUMBO_TAG_CODE, GUMBO_TAG_LAST, GUMBO_TAG_SUB, GUMBO_TAG_MN,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_INS, GUMBO_TAG_AUDIO,
+ GUMBO_TAG_STRONG, GUMBO_TAG_CITE, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_INPUT, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_NAV, GUMBO_TAG_LAST, GUMBO_TAG_COLGROUP,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_SVG, GUMBO_TAG_KEYGEN, GUMBO_TAG_VIDEO,
+ GUMBO_TAG_BDO, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_BODY, GUMBO_TAG_LAST, GUMBO_TAG_Q, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_TRACK,
+ GUMBO_TAG_LAST, GUMBO_TAG_BDI, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_CAPTION, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_RUBY, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BUTTON,
+ GUMBO_TAG_SUMMARY, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_RTC, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BLINK, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_ISINDEX};
diff --git a/libs/litehtml/src/gumbo/include/gumbo/tag_sizes.h b/libs/litehtml/src/gumbo/include/gumbo/tag_sizes.h
new file mode 100644
index 0000000000..7c92de073b
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/tag_sizes.h
@@ -0,0 +1,4 @@
+// Generated via `gentags.py src/tag.in`.
+// Do not edit; edit src/tag.in instead.
+// clang-format off
+4, 4, 5, 4, 4, 4, 5, 6, 8, 8, 4, 7, 7, 3, 5, 2, 2, 2, 2, 2, 2, 6, 6, 6, 7, 1, 2, 3, 10, 2, 2, 2, 2, 2, 2, 6, 10, 4, 3, 1, 2, 6, 5, 1, 4, 1, 3, 4, 4, 4, 4, 3, 4, 3, 3, 3, 1, 1, 1, 4, 4, 2, 2, 3, 3, 4, 2, 3, 3, 3, 5, 3, 6, 5, 6, 5, 5, 5, 6, 5, 6, 3, 4, 4, 2, 2, 2, 2, 5, 6, 10, 14, 3, 13, 4, 5, 7, 8, 3, 5, 5, 5, 2, 2, 2, 4, 8, 6, 5, 5, 6, 6, 8, 8, 6, 8, 6, 6, 8, 5, 7, 7, 4, 8, 6, 7, 7, 3, 5, 8, 8, 7, 7, 3, 6, 7, 9, 2, 6, 8, 3, 5, 6, 4, 7, 8, 4, 6, 2, 3, \ No newline at end of file
diff --git a/libs/litehtml/src/gumbo/include/gumbo/tag_strings.h b/libs/litehtml/src/gumbo/include/gumbo/tag_strings.h
new file mode 100644
index 0000000000..6540e2e6ba
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/tag_strings.h
@@ -0,0 +1,153 @@
+// Generated via `gentags.py src/tag.in`.
+// Do not edit; edit src/tag.in instead.
+// clang-format off
+"html",
+"head",
+"title",
+"base",
+"link",
+"meta",
+"style",
+"script",
+"noscript",
+"template",
+"body",
+"article",
+"section",
+"nav",
+"aside",
+"h1",
+"h2",
+"h3",
+"h4",
+"h5",
+"h6",
+"hgroup",
+"header",
+"footer",
+"address",
+"p",
+"hr",
+"pre",
+"blockquote",
+"ol",
+"ul",
+"li",
+"dl",
+"dt",
+"dd",
+"figure",
+"figcaption",
+"main",
+"div",
+"a",
+"em",
+"strong",
+"small",
+"s",
+"cite",
+"q",
+"dfn",
+"abbr",
+"data",
+"time",
+"code",
+"var",
+"samp",
+"kbd",
+"sub",
+"sup",
+"i",
+"b",
+"u",
+"mark",
+"ruby",
+"rt",
+"rp",
+"bdi",
+"bdo",
+"span",
+"br",
+"wbr",
+"ins",
+"del",
+"image",
+"img",
+"iframe",
+"embed",
+"object",
+"param",
+"video",
+"audio",
+"source",
+"track",
+"canvas",
+"map",
+"area",
+"math",
+"mi",
+"mo",
+"mn",
+"ms",
+"mtext",
+"mglyph",
+"malignmark",
+"annotation-xml",
+"svg",
+"foreignobject",
+"desc",
+"table",
+"caption",
+"colgroup",
+"col",
+"tbody",
+"thead",
+"tfoot",
+"tr",
+"td",
+"th",
+"form",
+"fieldset",
+"legend",
+"label",
+"input",
+"button",
+"select",
+"datalist",
+"optgroup",
+"option",
+"textarea",
+"keygen",
+"output",
+"progress",
+"meter",
+"details",
+"summary",
+"menu",
+"menuitem",
+"applet",
+"acronym",
+"bgsound",
+"dir",
+"frame",
+"frameset",
+"noframes",
+"isindex",
+"listing",
+"xmp",
+"nextid",
+"noembed",
+"plaintext",
+"rb",
+"strike",
+"basefont",
+"big",
+"blink",
+"center",
+"font",
+"marquee",
+"multicol",
+"nobr",
+"spacer",
+"tt",
+"rtc",
diff --git a/libs/litehtml/src/gumbo/include/gumbo/token_type.h b/libs/litehtml/src/gumbo/include/gumbo/token_type.h
new file mode 100644
index 0000000000..eeab507869
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/token_type.h
@@ -0,0 +1,41 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#ifndef GUMBO_TOKEN_TYPE_H_
+#define GUMBO_TOKEN_TYPE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// An enum representing the type of token.
+typedef enum {
+ GUMBO_TOKEN_DOCTYPE,
+ GUMBO_TOKEN_START_TAG,
+ GUMBO_TOKEN_END_TAG,
+ GUMBO_TOKEN_COMMENT,
+ GUMBO_TOKEN_WHITESPACE,
+ GUMBO_TOKEN_CHARACTER,
+ GUMBO_TOKEN_CDATA,
+ GUMBO_TOKEN_NULL,
+ GUMBO_TOKEN_EOF
+} GumboTokenType;
+
+#ifdef __cplusplus
+} // extern C
+#endif
+
+#endif // GUMBO_TOKEN_TYPE_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/tokenizer.h b/libs/litehtml/src/gumbo/include/gumbo/tokenizer.h
new file mode 100644
index 0000000000..1e2a2ca730
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/tokenizer.h
@@ -0,0 +1,123 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+// This contains an implementation of a tokenizer for HTML5. It consumes a
+// buffer of UTF-8 characters, and then emits a stream of tokens.
+
+#ifndef GUMBO_TOKENIZER_H_
+#define GUMBO_TOKENIZER_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "gumbo.h"
+#include "token_type.h"
+#include "tokenizer_states.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GumboInternalParser;
+
+// Struct containing all information pertaining to doctype tokens.
+typedef struct GumboInternalTokenDocType {
+ const char* name;
+ const char* public_identifier;
+ const char* system_identifier;
+ bool force_quirks;
+ // There's no way to tell a 0-length public or system ID apart from the
+ // absence of a public or system ID, but they're handled different by the
+ // spec, so we need bool flags for them.
+ bool has_public_identifier;
+ bool has_system_identifier;
+} GumboTokenDocType;
+
+// Struct containing all information pertaining to start tag tokens.
+typedef struct GumboInternalTokenStartTag {
+ GumboTag tag;
+ GumboVector /* GumboAttribute */ attributes;
+ bool is_self_closing;
+} GumboTokenStartTag;
+
+// A data structure representing a single token in the input stream. This
+// contains an enum for the type, the source position, a GumboStringPiece
+// pointing to the original text, and then a union for any parsed data.
+typedef struct GumboInternalToken {
+ GumboTokenType type;
+ GumboSourcePosition position;
+ GumboStringPiece original_text;
+ union {
+ GumboTokenDocType doc_type;
+ GumboTokenStartTag start_tag;
+ GumboTag end_tag;
+ const char* text; // For comments.
+ int character; // For character, whitespace, null, and EOF tokens.
+ } v;
+} GumboToken;
+
+// Initializes the tokenizer state within the GumboParser object, setting up a
+// parse of the specified text.
+void gumbo_tokenizer_state_init(
+ struct GumboInternalParser* parser, const char* text, size_t text_length);
+
+// Destroys the tokenizer state within the GumboParser object, freeing any
+// dynamically-allocated structures within it.
+void gumbo_tokenizer_state_destroy(struct GumboInternalParser* parser);
+
+// Sets the tokenizer state to the specified value. This is needed by some
+// parser states, which alter the state of the tokenizer in response to tags
+// seen.
+void gumbo_tokenizer_set_state(
+ struct GumboInternalParser* parser, GumboTokenizerEnum state);
+
+// Flags whether the current node is a foreign content element. This is
+// necessary for the markup declaration open state, where the tokenizer must be
+// aware of the state of the parser to properly tokenize bad comment tags.
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state
+void gumbo_tokenizer_set_is_current_node_foreign(
+ struct GumboInternalParser* parser, bool is_foreign);
+
+// Lexes a single token from the specified buffer, filling the output with the
+// parsed GumboToken data structure. Returns true for a successful
+// tokenization, false if a parse error occurs.
+//
+// Example:
+// struct GumboInternalParser parser;
+// GumboToken output;
+// gumbo_tokenizer_state_init(&parser, text, strlen(text));
+// while (gumbo_lex(&parser, &output)) {
+// ...do stuff with output.
+// gumbo_token_destroy(&parser, &token);
+// }
+// gumbo_tokenizer_state_destroy(&parser);
+bool gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
+
+// Frees the internally-allocated pointers within an GumboToken. Note that this
+// doesn't free the token itself, since oftentimes it will be allocated on the
+// stack. A simple call to free() (or GumboParser->deallocator, if
+// appropriate) can handle that.
+//
+// Note that if you are handing over ownership of the internal strings to some
+// other data structure - for example, a parse tree - these do not need to be
+// freed.
+void gumbo_token_destroy(struct GumboInternalParser* parser, GumboToken* token);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_TOKENIZER_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/tokenizer_states.h b/libs/litehtml/src/gumbo/include/gumbo/tokenizer_states.h
new file mode 100644
index 0000000000..80659f5f1a
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/tokenizer_states.h
@@ -0,0 +1,103 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+// This contains the list of states used in the tokenizer. Although at first
+// glance it seems like these could be kept internal to the tokenizer, several
+// of the actions in the parser require that it reach into the tokenizer and
+// reset the tokenizer state. For that to work, it needs to have the
+// definitions of individual states available.
+//
+// This may also be useful for providing more detailed error messages for parse
+// errors, as we can match up states and inputs in a table without having to
+// clutter the tokenizer code with lots of precise error messages.
+
+#ifndef GUMBO_TOKENIZER_STATES_H_
+#define GUMBO_TOKENIZER_STATES_H_
+
+// The ordering of this enum is also used to build the dispatch table for the
+// tokenizer state machine, so if it is changed, be sure to update that too.
+typedef enum {
+ GUMBO_LEX_DATA,
+ GUMBO_LEX_CHAR_REF_IN_DATA,
+ GUMBO_LEX_RCDATA,
+ GUMBO_LEX_CHAR_REF_IN_RCDATA,
+ GUMBO_LEX_RAWTEXT,
+ GUMBO_LEX_SCRIPT,
+ GUMBO_LEX_PLAINTEXT,
+ GUMBO_LEX_TAG_OPEN,
+ GUMBO_LEX_END_TAG_OPEN,
+ GUMBO_LEX_TAG_NAME,
+ GUMBO_LEX_RCDATA_LT,
+ GUMBO_LEX_RCDATA_END_TAG_OPEN,
+ GUMBO_LEX_RCDATA_END_TAG_NAME,
+ GUMBO_LEX_RAWTEXT_LT,
+ GUMBO_LEX_RAWTEXT_END_TAG_OPEN,
+ GUMBO_LEX_RAWTEXT_END_TAG_NAME,
+ GUMBO_LEX_SCRIPT_LT,
+ GUMBO_LEX_SCRIPT_END_TAG_OPEN,
+ GUMBO_LEX_SCRIPT_END_TAG_NAME,
+ GUMBO_LEX_SCRIPT_ESCAPED_START,
+ GUMBO_LEX_SCRIPT_ESCAPED_START_DASH,
+ GUMBO_LEX_SCRIPT_ESCAPED,
+ GUMBO_LEX_SCRIPT_ESCAPED_DASH,
+ GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH,
+ GUMBO_LEX_SCRIPT_ESCAPED_LT,
+ GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_OPEN,
+ GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME,
+ GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_START,
+ GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED,
+ GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH,
+ GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH_DASH,
+ GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_LT,
+ GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END,
+ GUMBO_LEX_BEFORE_ATTR_NAME,
+ GUMBO_LEX_ATTR_NAME,
+ GUMBO_LEX_AFTER_ATTR_NAME,
+ GUMBO_LEX_BEFORE_ATTR_VALUE,
+ GUMBO_LEX_ATTR_VALUE_DOUBLE_QUOTED,
+ GUMBO_LEX_ATTR_VALUE_SINGLE_QUOTED,
+ GUMBO_LEX_ATTR_VALUE_UNQUOTED,
+ GUMBO_LEX_CHAR_REF_IN_ATTR_VALUE,
+ GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED,
+ GUMBO_LEX_SELF_CLOSING_START_TAG,
+ GUMBO_LEX_BOGUS_COMMENT,
+ GUMBO_LEX_MARKUP_DECLARATION,
+ GUMBO_LEX_COMMENT_START,
+ GUMBO_LEX_COMMENT_START_DASH,
+ GUMBO_LEX_COMMENT,
+ GUMBO_LEX_COMMENT_END_DASH,
+ GUMBO_LEX_COMMENT_END,
+ GUMBO_LEX_COMMENT_END_BANG,
+ GUMBO_LEX_DOCTYPE,
+ GUMBO_LEX_BEFORE_DOCTYPE_NAME,
+ GUMBO_LEX_DOCTYPE_NAME,
+ GUMBO_LEX_AFTER_DOCTYPE_NAME,
+ GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_KEYWORD,
+ GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID,
+ GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED,
+ GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED,
+ GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID,
+ GUMBO_LEX_BETWEEN_DOCTYPE_PUBLIC_SYSTEM_ID,
+ GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD,
+ GUMBO_LEX_BEFORE_DOCTYPE_SYSTEM_ID,
+ GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED,
+ GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED,
+ GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID,
+ GUMBO_LEX_BOGUS_DOCTYPE,
+ GUMBO_LEX_CDATA
+} GumboTokenizerEnum;
+
+#endif // GUMBO_TOKENIZER_STATES_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/utf8.h b/libs/litehtml/src/gumbo/include/gumbo/utf8.h
new file mode 100644
index 0000000000..ee852abfba
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/utf8.h
@@ -0,0 +1,132 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+// This contains an implementation of a UTF8 iterator and decoder suitable for
+// an HTML5 parser. This does a bit more than straight UTF-8 decoding. The
+// HTML5 spec specifies that:
+// 1. Decoding errors are parse errors.
+// 2. Certain other codepoints (eg. control characters) are parse errors.
+// 3. Carriage returns and CR/LF groups are converted to line feeds.
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#decoded-as-utf-8,-with-error-handling
+//
+// Also, we want to keep track of source positions for error handling. As a
+// result, we fold all that functionality into this decoder, and can't use an
+// off-the-shelf library.
+//
+// This header is internal-only, which is why we prefix functions with only
+// utf8_ or utf8_iterator_ instead of gumbo_utf8_.
+
+#ifndef GUMBO_UTF8_H_
+#define GUMBO_UTF8_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "gumbo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GumboInternalError;
+struct GumboInternalParser;
+
+// Unicode replacement char.
+extern const int kUtf8ReplacementChar;
+
+typedef struct GumboInternalUtf8Iterator {
+ // Points at the start of the code point most recently read into 'current'.
+ const char* _start;
+
+ // Points at the mark. The mark is initially set to the beginning of the
+ // input.
+ const char* _mark;
+
+ // Points past the end of the iter, like a past-the-end iterator in the STL.
+ const char* _end;
+
+ // The code point under the cursor.
+ int _current;
+
+ // The width in bytes of the current code point.
+ ptrdiff_t _width;
+
+ // The SourcePosition for the current location.
+ GumboSourcePosition _pos;
+
+ // The SourcePosition for the mark.
+ GumboSourcePosition _mark_pos;
+
+ // Pointer back to the GumboParser instance, for configuration options and
+ // error recording.
+ struct GumboInternalParser* _parser;
+} Utf8Iterator;
+
+// Returns true if this Unicode code point is in the list of characters
+// forbidden by the HTML5 spec, such as NUL bytes and undefined control chars.
+bool utf8_is_invalid_code_point(int c);
+
+// Initializes a new Utf8Iterator from the given byte buffer. The source does
+// not have to be NUL-terminated, but the length must be passed in explicitly.
+void utf8iterator_init(struct GumboInternalParser* parser, const char* source,
+ size_t source_length, Utf8Iterator* iter);
+
+// Advances the current position by one code point.
+void utf8iterator_next(Utf8Iterator* iter);
+
+// Returns the current code point as an integer.
+int utf8iterator_current(const Utf8Iterator* iter);
+
+// Retrieves and fills the output parameter with the current source position.
+void utf8iterator_get_position(
+ const Utf8Iterator* iter, GumboSourcePosition* output);
+
+// Retrieves a character pointer to the start of the current character.
+const char* utf8iterator_get_char_pointer(const Utf8Iterator* iter);
+
+// Retrieves a character pointer to 1 past the end of the buffer. This is
+// necessary for certain state machines and string comparisons that would like
+// to look directly for ASCII text in the buffer without going through the
+// decoder.
+const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter);
+
+// If the upcoming text in the buffer matches the specified prefix (which has
+// length 'length'), consume it and return true. Otherwise, return false with
+// no other effects. If the length of the string would overflow the buffer,
+// this returns false. Note that prefix should not contain null bytes because
+// of the use of strncmp/strncasecmp internally. All existing use-cases adhere
+// to this.
+bool utf8iterator_maybe_consume_match(
+ Utf8Iterator* iter, const char* prefix, size_t length, bool case_sensitive);
+
+// "Marks" a particular location of interest in the input stream, so that it can
+// later be reset() to. There's also the ability to record an error at the
+// point that was marked, as oftentimes that's more useful than the last
+// character before the error was detected.
+void utf8iterator_mark(Utf8Iterator* iter);
+
+// Returns the current input stream position to the mark.
+void utf8iterator_reset(Utf8Iterator* iter);
+
+// Sets the position and original text fields of an error to the value at the
+// mark.
+void utf8iterator_fill_error_at_mark(
+ Utf8Iterator* iter, struct GumboInternalError* error);
+
+#ifdef __cplusplus
+}
+#endif
+#endif // GUMBO_UTF8_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/util.h b/libs/litehtml/src/gumbo/include/gumbo/util.h
new file mode 100644
index 0000000000..98a7d1c466
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/util.h
@@ -0,0 +1,62 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+// This contains some utility functions that didn't fit into any of the other
+// headers.
+
+#ifndef GUMBO_UTIL_H_
+#define GUMBO_UTIL_H_
+#ifdef _MSC_VER
+#ifndef _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+#endif
+#include <stdbool.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Forward declaration since it's passed into some of the functions in this
+// header.
+struct GumboInternalParser;
+
+// Utility function for allocating & copying a null-terminated string into a
+// freshly-allocated buffer. This is necessary for proper memory management; we
+// have the convention that all const char* in parse tree structures are
+// freshly-allocated, so if we didn't copy, we'd try to delete a literal string
+// when the parse tree is destroyed.
+char* gumbo_copy_stringz(struct GumboInternalParser* parser, const char* str);
+
+// Allocate a chunk of memory, using the allocator specified in the Parser's
+// ~config options.
+void* gumbo_parser_allocate(
+ struct GumboInternalParser* parser, size_t num_bytes);
+
+// Deallocate a chunk of memory, using the deallocator specified in the Parser's
+// ~config options.
+void gumbo_parser_deallocate(struct GumboInternalParser* parser, void* ptr);
+
+// Debug wrapper for printf, to make it easier to turn off debugging info when
+// required.
+void gumbo_debug(const char* format, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_UTIL_H_
diff --git a/libs/litehtml/src/gumbo/include/gumbo/vector.h b/libs/litehtml/src/gumbo/include/gumbo/vector.h
new file mode 100644
index 0000000000..70fe6fa689
--- /dev/null
+++ b/libs/litehtml/src/gumbo/include/gumbo/vector.h
@@ -0,0 +1,67 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#ifndef GUMBO_VECTOR_H_
+#define GUMBO_VECTOR_H_
+
+#include "gumbo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Forward declaration since it's passed into some of the functions in this
+// header.
+struct GumboInternalParser;
+
+// Initializes a new GumboVector with the specified initial capacity.
+void gumbo_vector_init(struct GumboInternalParser* parser,
+ size_t initial_capacity, GumboVector* vector);
+
+// Frees the memory used by an GumboVector. Does not free the contained
+// pointers.
+void gumbo_vector_destroy(
+ struct GumboInternalParser* parser, GumboVector* vector);
+
+// Adds a new element to an GumboVector.
+void gumbo_vector_add(
+ struct GumboInternalParser* parser, void* element, GumboVector* vector);
+
+// Removes and returns the element most recently added to the GumboVector.
+// Ownership is transferred to caller. Capacity is unchanged. If the vector is
+// empty, NULL is returned.
+void* gumbo_vector_pop(struct GumboInternalParser* parser, GumboVector* vector);
+
+// Inserts an element at a specific index. This is potentially O(N) time, but
+// is necessary for some of the spec's behavior.
+void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
+ unsigned int index, GumboVector* vector);
+
+// Removes an element from the vector, or does nothing if the element is not in
+// the vector.
+void gumbo_vector_remove(
+ struct GumboInternalParser* parser, void* element, GumboVector* vector);
+
+// Removes and returns an element at a specific index. Note that this is
+// potentially O(N) time and should be used sparingly.
+void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
+ unsigned int index, GumboVector* vector);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_VECTOR_H_