diff options
Diffstat (limited to 'libs/litehtml/src/gumbo')
| -rw-r--r-- | libs/litehtml/src/gumbo/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | libs/litehtml/src/gumbo/char_ref.c | 2 | ||||
| -rw-r--r-- | libs/litehtml/src/gumbo/char_ref.rl | 2 | ||||
| -rw-r--r-- | libs/litehtml/src/gumbo/include/gumbo.h | 5 | ||||
| -rw-r--r-- | libs/litehtml/src/gumbo/include/gumbo/tag_enum.h | 1 | ||||
| -rw-r--r-- | libs/litehtml/src/gumbo/include/gumbo/tag_gperf.h | 352 | ||||
| -rw-r--r-- | libs/litehtml/src/gumbo/include/gumbo/tag_sizes.h | 2 | ||||
| -rw-r--r-- | libs/litehtml/src/gumbo/include/gumbo/tag_strings.h | 1 | ||||
| -rw-r--r-- | libs/litehtml/src/gumbo/parser.c | 218 |
9 files changed, 258 insertions, 327 deletions
diff --git a/libs/litehtml/src/gumbo/CMakeLists.txt b/libs/litehtml/src/gumbo/CMakeLists.txt index 7282604893..9c0dc5b072 100644 --- a/libs/litehtml/src/gumbo/CMakeLists.txt +++ b/libs/litehtml/src/gumbo/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.11) project(gumbo C) diff --git a/libs/litehtml/src/gumbo/char_ref.c b/libs/litehtml/src/gumbo/char_ref.c index a1d74fd5df..406afad032 100644 --- a/libs/litehtml/src/gumbo/char_ref.c +++ b/libs/litehtml/src/gumbo/char_ref.c @@ -136,7 +136,7 @@ static bool consume_numeric_ref( return false; } - int codepoint = 0; + unsigned int codepoint = 0; bool status = true; do { codepoint = (codepoint * (is_hex ? 16 : 10)) + digit; diff --git a/libs/litehtml/src/gumbo/char_ref.rl b/libs/litehtml/src/gumbo/char_ref.rl index 139a4bbd33..464e1690ba 100644 --- a/libs/litehtml/src/gumbo/char_ref.rl +++ b/libs/litehtml/src/gumbo/char_ref.rl @@ -162,7 +162,7 @@ static bool consume_numeric_ref( return false; } - int codepoint = 0; + unsigned int codepoint = 0; bool status = true; do { codepoint = (codepoint * (is_hex ? 16 : 10)) + digit; diff --git a/libs/litehtml/src/gumbo/include/gumbo.h b/libs/litehtml/src/gumbo/include/gumbo.h index f8137cf061..83cd22d5df 100644 --- a/libs/litehtml/src/gumbo/include/gumbo.h +++ b/libs/litehtml/src/gumbo/include/gumbo.h @@ -389,7 +389,10 @@ typedef enum { */ GUMBO_INSERTION_CONVERTED_FROM_END_TAG = 1 << 4, - /** A flag for nodes that are converted from the parse of an <isindex> tag. */ + /** + * Deprecated! Do not use! + * It's not used anymore since special handling for <isindex> is obsolete. + */ GUMBO_INSERTION_FROM_ISINDEX = 1 << 5, /** A flag for <image> tags that are rewritten as <img>. */ diff --git a/libs/litehtml/src/gumbo/include/gumbo/tag_enum.h b/libs/litehtml/src/gumbo/include/gumbo/tag_enum.h index 7a33d1e114..4237b1dec4 100644 --- a/libs/litehtml/src/gumbo/include/gumbo/tag_enum.h +++ b/libs/litehtml/src/gumbo/include/gumbo/tag_enum.h @@ -151,5 +151,6 @@ GUMBO_TAG_MULTICOL, GUMBO_TAG_NOBR, GUMBO_TAG_SPACER, GUMBO_TAG_DIALOG, +GUMBO_TAG_SEARCH, GUMBO_TAG_TT, GUMBO_TAG_RTC, diff --git a/libs/litehtml/src/gumbo/include/gumbo/tag_gperf.h b/libs/litehtml/src/gumbo/include/gumbo/tag_gperf.h index 525453946f..10691e473d 100644 --- a/libs/litehtml/src/gumbo/include/gumbo/tag_gperf.h +++ b/libs/litehtml/src/gumbo/include/gumbo/tag_gperf.h @@ -2,32 +2,32 @@ static unsigned int tag_hash(register const char *str, register size_t len) { static unsigned short asso_values[] = { - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 30, - 27, 27, 9, 6, 3, 6, 6, 3, 274, 274, - 274, 274, 274, 274, 274, 78, 3, 171, 12, 30, - 39, 129, 12, 105, 24, 156, 9, 51, 60, 87, - 12, 96, 3, 6, 18, 75, 99, 96, 36, 123, - 274, 274, 274, 274, 274, 274, 274, 78, 3, 171, - 12, 30, 39, 129, 12, 105, 24, 156, 9, 51, - 60, 87, 12, 96, 3, 6, 18, 75, 99, 96, - 36, 123, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, - 274, 274, 274, 274, 274, 274, 274, 274, 274 + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 9, + 5, 4, 2, 2, 1, 2, 1, 1, 284, 284, + 284, 284, 284, 284, 284, 56, 62, 156, 5, 14, + 47, 102, 3, 78, 32, 136, 10, 28, 36, 61, + 22, 108, 1, 2, 7, 55, 80, 123, 115, 76, + 284, 284, 284, 284, 284, 284, 284, 56, 62, 156, + 5, 14, 47, 102, 3, 78, 32, 136, 10, 28, + 36, 61, 22, 108, 1, 2, 7, 55, 80, 123, + 115, 76, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284 }; register unsigned int hval = len; @@ -49,239 +49,244 @@ static const unsigned char kGumboTagMap[] = { GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, - GUMBO_TAG_B, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, GUMBO_TAG_S, GUMBO_TAG_LAST, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, GUMBO_TAG_H6, - GUMBO_TAG_SPACER, - GUMBO_TAG_LAST, - GUMBO_TAG_RP, - GUMBO_TAG_LAST, - GUMBO_TAG_P, GUMBO_TAG_H5, - GUMBO_TAG_DIR, - GUMBO_TAG_LAST, GUMBO_TAG_H4, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, - GUMBO_TAG_SMALL, + GUMBO_TAG_H3, + GUMBO_TAG_SPACER, + GUMBO_TAG_H2, GUMBO_TAG_HEADER, - GUMBO_TAG_SAMP, - GUMBO_TAG_LABEL, - GUMBO_TAG_DEL, - GUMBO_TAG_DETAILS, - GUMBO_TAG_RB, - GUMBO_TAG_LEGEND, + GUMBO_TAG_SEARCH, GUMBO_TAG_HEAD, - GUMBO_TAG_BASEFONT, + GUMBO_TAG_H1, + GUMBO_TAG_DETAILS, GUMBO_TAG_SELECT, + GUMBO_TAG_DIR, + GUMBO_TAG_LAST, + GUMBO_TAG_DEL, GUMBO_TAG_LAST, - GUMBO_TAG_H3, GUMBO_TAG_SOURCE, - GUMBO_TAG_BGSOUND, - GUMBO_TAG_H2, - GUMBO_TAG_SUB, - GUMBO_TAG_BASE, + GUMBO_TAG_LEGEND, GUMBO_TAG_DATALIST, - GUMBO_TAG_FOOTER, GUMBO_TAG_LAST, - GUMBO_TAG_H1, - GUMBO_TAG_HGROUP, + GUMBO_TAG_RP, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_SUP, - GUMBO_TAG_PICTURE, - GUMBO_TAG_EMBED, + GUMBO_TAG_LABEL, + GUMBO_TAG_TABLE, + GUMBO_TAG_TEMPLATE, + GUMBO_TAG_SAMP, GUMBO_TAG_LAST, GUMBO_TAG_TIME, GUMBO_TAG_TITLE, - GUMBO_TAG_XMP, - GUMBO_TAG_FONT, - GUMBO_TAG_TABLE, + GUMBO_TAG_METER, GUMBO_TAG_LAST, + GUMBO_TAG_SMALL, + GUMBO_TAG_MATH, GUMBO_TAG_LAST, - GUMBO_TAG_TEMPLATE, - GUMBO_TAG_SCRIPT, GUMBO_TAG_NOBR, - GUMBO_TAG_METER, - GUMBO_TAG_LAST, - GUMBO_TAG_FOREIGNOBJECT, - GUMBO_TAG_FIELDSET, GUMBO_TAG_LAST, GUMBO_TAG_SPAN, + GUMBO_TAG_P, + GUMBO_TAG_EMBED, GUMBO_TAG_NOFRAMES, - GUMBO_TAG_MAP, - GUMBO_TAG_MATH, - GUMBO_TAG_PARAM, - GUMBO_TAG_LAST, - GUMBO_TAG_NOEMBED, - GUMBO_TAG_BR, - GUMBO_TAG_FIGURE, GUMBO_TAG_SECTION, - GUMBO_TAG_LAST, - GUMBO_TAG_LAST, + GUMBO_TAG_NOEMBED, + GUMBO_TAG_NEXTID, GUMBO_TAG_LAST, GUMBO_TAG_NOSCRIPT, - GUMBO_TAG_NEXTID, + GUMBO_TAG_PICTURE, + GUMBO_TAG_MARQUEE, + GUMBO_TAG_FOOTER, + GUMBO_TAG_LAST, GUMBO_TAG_LAST, + GUMBO_TAG_MAP, + GUMBO_TAG_FONT, + GUMBO_TAG_PARAM, GUMBO_TAG_HR, - GUMBO_TAG_MGLYPH, + GUMBO_TAG_SCRIPT, + GUMBO_TAG_HGROUP, GUMBO_TAG_LAST, + GUMBO_TAG_TR, GUMBO_TAG_EM, + GUMBO_TAG_MENUITEM, + GUMBO_TAG_FOREIGNOBJECT, + GUMBO_TAG_MGLYPH, + GUMBO_TAG_DATA, + GUMBO_TAG_APPLET, + GUMBO_TAG_FIELDSET, + GUMBO_TAG_MAIN, + GUMBO_TAG_TEXTAREA, + GUMBO_TAG_ABBR, GUMBO_TAG_LAST, + GUMBO_TAG_FIGURE, + GUMBO_TAG_DL, + GUMBO_TAG_RB, GUMBO_TAG_FORM, - GUMBO_TAG_TR, GUMBO_TAG_LAST, - GUMBO_TAG_MARQUEE, + GUMBO_TAG_BASEFONT, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_BASE, + GUMBO_TAG_LAST, GUMBO_TAG_PROGRESS, + GUMBO_TAG_OBJECT, + GUMBO_TAG_VAR, + GUMBO_TAG_MENU, + GUMBO_TAG_META, + GUMBO_TAG_MO, + GUMBO_TAG_OPTGROUP, + GUMBO_TAG_PRE, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_MULTICOL, - GUMBO_TAG_BUTTON, - GUMBO_TAG_DATA, + GUMBO_TAG_TFOOT, + GUMBO_TAG_DIV, + GUMBO_TAG_PLAINTEXT, + GUMBO_TAG_LI, GUMBO_TAG_LAST, - GUMBO_TAG_APPLET, + GUMBO_TAG_TBODY, + GUMBO_TAG_FIGCAPTION, GUMBO_TAG_LAST, - GUMBO_TAG_DL, + GUMBO_TAG_OPTION, + GUMBO_TAG_BGSOUND, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_OPTGROUP, GUMBO_TAG_LAST, - GUMBO_TAG_ABBR, - GUMBO_TAG_TEXTAREA, - GUMBO_TAG_VAR, - GUMBO_TAG_FIGCAPTION, + GUMBO_TAG_LAST, + GUMBO_TAG_U, + GUMBO_TAG_MS, + GUMBO_TAG_A, + GUMBO_TAG_DD, + GUMBO_TAG_LAST, + GUMBO_TAG_TD, + GUMBO_TAG_FRAMESET, + GUMBO_TAG_MI, + GUMBO_TAG_IMAGE, + GUMBO_TAG_BR, + GUMBO_TAG_FRAME, + GUMBO_TAG_DFN, + GUMBO_TAG_DIALOG, + GUMBO_TAG_NAV, + GUMBO_TAG_B, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_UL, + GUMBO_TAG_LISTING, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_ARTICLE, GUMBO_TAG_RT, - GUMBO_TAG_PRE, - GUMBO_TAG_HTML, - GUMBO_TAG_MENUITEM, - GUMBO_TAG_DIV, + GUMBO_TAG_OL, + GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_LI, - GUMBO_TAG_PLAINTEXT, - GUMBO_TAG_MAIN, GUMBO_TAG_DT, + GUMBO_TAG_ACRONYM, + GUMBO_TAG_TT, + GUMBO_TAG_HTML, + GUMBO_TAG_WBR, + GUMBO_TAG_SUP, + GUMBO_TAG_BODY, + GUMBO_TAG_STYLE, + GUMBO_TAG_STRIKE, GUMBO_TAG_LAST, GUMBO_TAG_BLOCKQUOTE, + GUMBO_TAG_TH, GUMBO_TAG_LAST, - GUMBO_TAG_WBR, - GUMBO_TAG_BODY, - GUMBO_TAG_TT, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_STYLE, - GUMBO_TAG_STRIKE, GUMBO_TAG_LAST, - GUMBO_TAG_FRAMESET, - GUMBO_TAG_OBJECT, - GUMBO_TAG_MENU, - GUMBO_TAG_MO, + GUMBO_TAG_THEAD, + GUMBO_TAG_LAST, + GUMBO_TAG_ASIDE, + GUMBO_TAG_VIDEO, + GUMBO_TAG_I, + GUMBO_TAG_KBD, + GUMBO_TAG_LAST, + GUMBO_TAG_LINK, + GUMBO_TAG_MULTICOL, + GUMBO_TAG_XMP, + GUMBO_TAG_MTEXT, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_CENTER, + GUMBO_TAG_ADDRESS, + GUMBO_TAG_DESC, + GUMBO_TAG_CANVAS, + GUMBO_TAG_COL, + GUMBO_TAG_AREA, + GUMBO_TAG_LAST, + GUMBO_TAG_MARK, + GUMBO_TAG_MN, + GUMBO_TAG_CODE, + GUMBO_TAG_IFRAME, GUMBO_TAG_BIG, - GUMBO_TAG_META, - GUMBO_TAG_TFOOT, - GUMBO_TAG_OUTPUT, GUMBO_TAG_LAST, - GUMBO_TAG_FRAME, + GUMBO_TAG_MALIGNMARK, GUMBO_TAG_LAST, - GUMBO_TAG_U, - GUMBO_TAG_IMAGE, + GUMBO_TAG_KEYGEN, + GUMBO_TAG_SUB, + GUMBO_TAG_SVG, + GUMBO_TAG_CITE, GUMBO_TAG_LAST, - GUMBO_TAG_LISTING, - GUMBO_TAG_DD, - GUMBO_TAG_DIALOG, - GUMBO_TAG_A, - GUMBO_TAG_MS, - GUMBO_TAG_OPTION, GUMBO_TAG_LAST, - GUMBO_TAG_TD, + GUMBO_TAG_COLGROUP, + GUMBO_TAG_ANNOTATION_XML, + GUMBO_TAG_OUTPUT, GUMBO_TAG_LAST, + GUMBO_TAG_INS, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_RUBY, - GUMBO_TAG_MI, GUMBO_TAG_LAST, + GUMBO_TAG_RUBY, GUMBO_TAG_LAST, - GUMBO_TAG_MTEXT, + GUMBO_TAG_INPUT, GUMBO_TAG_LAST, GUMBO_TAG_SUMMARY, - GUMBO_TAG_UL, - GUMBO_TAG_NAV, - GUMBO_TAG_ACRONYM, - GUMBO_TAG_TBODY, GUMBO_TAG_LAST, - GUMBO_TAG_LINK, GUMBO_TAG_LAST, - GUMBO_TAG_DFN, + GUMBO_TAG_TRACK, + GUMBO_TAG_CAPTION, + GUMBO_TAG_IMG, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_OL, - GUMBO_TAG_COL, GUMBO_TAG_LAST, - GUMBO_TAG_TH, GUMBO_TAG_LAST, - GUMBO_TAG_ARTICLE, - GUMBO_TAG_THEAD, - GUMBO_TAG_CENTER, - GUMBO_TAG_Q, - GUMBO_TAG_COLGROUP, - GUMBO_TAG_CANVAS, GUMBO_TAG_LAST, - GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_LAST, - GUMBO_TAG_DESC, - GUMBO_TAG_VIDEO, - GUMBO_TAG_KBD, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, + GUMBO_TAG_Q, GUMBO_TAG_LAST, - GUMBO_TAG_AUDIO, + GUMBO_TAG_BUTTON, GUMBO_TAG_LAST, - GUMBO_TAG_CODE, - GUMBO_TAG_MN, - GUMBO_TAG_INS, - GUMBO_TAG_I, - GUMBO_TAG_ASIDE, GUMBO_TAG_LAST, - GUMBO_TAG_CITE, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_ADDRESS, GUMBO_TAG_LAST, GUMBO_TAG_BDO, - GUMBO_TAG_MARK, - GUMBO_TAG_INPUT, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_MALIGNMARK, + GUMBO_TAG_STRONG, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_AUDIO, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_KEYGEN, - GUMBO_TAG_AREA, GUMBO_TAG_LAST, - GUMBO_TAG_STRONG, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BDI, @@ -290,22 +295,23 @@ static const unsigned char kGumboTagMap[] = { GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_IFRAME, - GUMBO_TAG_ISINDEX, GUMBO_TAG_LAST, - GUMBO_TAG_IMG, - GUMBO_TAG_CAPTION, - GUMBO_TAG_BLINK, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_TRACK, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, - GUMBO_TAG_SVG, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_BLINK, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, @@ -317,5 +323,9 @@ static const unsigned char kGumboTagMap[] = { GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, + GUMBO_TAG_LAST, + GUMBO_TAG_ISINDEX, + GUMBO_TAG_LAST, + GUMBO_TAG_LAST, GUMBO_TAG_RTC }; diff --git a/libs/litehtml/src/gumbo/include/gumbo/tag_sizes.h b/libs/litehtml/src/gumbo/include/gumbo/tag_sizes.h index 5b93c22fa2..9dc9529995 100644 --- a/libs/litehtml/src/gumbo/include/gumbo/tag_sizes.h +++ b/libs/litehtml/src/gumbo/include/gumbo/tag_sizes.h @@ -1,4 +1,4 @@ // Generated via `gentags.py src/tag.in`. // Do not edit; edit src/tag.in instead. // clang-format off -4, 4, 5, 4, 4, 4, 5, 6, 8, 8, 4, 7, 7, 3, 5, 2, 2, 2, 2, 2, 2, 6, 6, 6, 7, 1, 2, 3, 10, 2, 2, 2, 2, 2, 2, 6, 10, 4, 3, 1, 2, 6, 5, 1, 4, 1, 3, 4, 4, 4, 4, 3, 4, 3, 3, 3, 1, 1, 1, 4, 4, 2, 2, 3, 3, 4, 2, 3, 3, 3, 5, 3, 7, 6, 5, 6, 5, 5, 5, 6, 5, 6, 3, 4, 4, 2, 2, 2, 2, 5, 6, 10, 14, 3, 13, 4, 5, 7, 8, 3, 5, 5, 5, 2, 2, 2, 4, 8, 6, 5, 5, 6, 6, 8, 8, 6, 8, 6, 6, 8, 5, 7, 7, 4, 8, 6, 7, 7, 3, 5, 8, 8, 7, 7, 3, 6, 7, 9, 2, 6, 8, 3, 5, 6, 4, 7, 8, 4, 6, 6, 2, 3,
\ No newline at end of file +4, 4, 5, 4, 4, 4, 5, 6, 8, 8, 4, 7, 7, 3, 5, 2, 2, 2, 2, 2, 2, 6, 6, 6, 7, 1, 2, 3, 10, 2, 2, 2, 2, 2, 2, 6, 10, 4, 3, 1, 2, 6, 5, 1, 4, 1, 3, 4, 4, 4, 4, 3, 4, 3, 3, 3, 1, 1, 1, 4, 4, 2, 2, 3, 3, 4, 2, 3, 3, 3, 5, 3, 7, 6, 5, 6, 5, 5, 5, 6, 5, 6, 3, 4, 4, 2, 2, 2, 2, 5, 6, 10, 14, 3, 13, 4, 5, 7, 8, 3, 5, 5, 5, 2, 2, 2, 4, 8, 6, 5, 5, 6, 6, 8, 8, 6, 8, 6, 6, 8, 5, 7, 7, 4, 8, 6, 7, 7, 3, 5, 8, 8, 7, 7, 3, 6, 7, 9, 2, 6, 8, 3, 5, 6, 4, 7, 8, 4, 6, 6, 6, 2, 3,
\ No newline at end of file diff --git a/libs/litehtml/src/gumbo/include/gumbo/tag_strings.h b/libs/litehtml/src/gumbo/include/gumbo/tag_strings.h index 03d793c05d..188bcda00b 100644 --- a/libs/litehtml/src/gumbo/include/gumbo/tag_strings.h +++ b/libs/litehtml/src/gumbo/include/gumbo/tag_strings.h @@ -151,5 +151,6 @@ "nobr", "spacer", "dialog", +"search", "tt", "rtc", diff --git a/libs/litehtml/src/gumbo/parser.c b/libs/litehtml/src/gumbo/parser.c index 968fcc0f41..0ab3f92084 100644 --- a/libs/litehtml/src/gumbo/parser.c +++ b/libs/litehtml/src/gumbo/parser.c @@ -291,17 +291,16 @@ typedef struct _NamespacedAttributeReplacement { static const NamespacedAttributeReplacement kForeignAttributeReplacements[] = { {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK}, - {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK}, - {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK}, - {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK}, - {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK}, - {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK}, - {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK}, - {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML}, - {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}, - {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML}, - {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS}, - {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS}, + {"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK}, + {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK}, + {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK}, + {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK}, + {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK}, + {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK}, + {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}, + {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML}, + {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS}, + {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS}, }; // The "scope marker" for the list of active formatting elements. We use a @@ -1564,12 +1563,12 @@ static bool is_special_node(const GumboNode* node) { (gumbo_tagset){TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE), TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL), - TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR), + TAG(COLGROUP), TAG(DD), TAG(DETAILS), TAG(DIR), TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET), TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME), TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6), TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME), - TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK), TAG(LISTING), + TAG(IMG), TAG(INPUT), TAG(LI), TAG(LINK), TAG(LISTING), TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED), TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P), TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION), @@ -2179,7 +2178,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) { return handle_in_body(parser, token); } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), - TAG(MENUITEM), TAG(LINK)})) { + TAG(LINK)})) { insert_element_from_token(parser, token); pop_current_node(parser); acknowledge_self_closing_tag(parser); @@ -2419,7 +2418,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) { return false; } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), - TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES), + TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) { return handle_in_head(parser, token); @@ -2514,13 +2513,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) { record_end_of_element(state->_current_token, &body->v.element); } return success; - } else if (tag_in(token, kStartTag, - (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE), - TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS), TAG(DIALOG), - TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION), - TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), - TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P), - TAG(SECTION), TAG(SUMMARY), TAG(UL)})) { + } else if (tag_in(token, kStartTag, (gumbo_tagset){ + TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE), TAG(BLOCKQUOTE), TAG(CENTER), + TAG(DETAILS), TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), + TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), + TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P), TAG(SECTION), + TAG(SUMMARY), TAG(UL), TAG(SEARCH)})) + { bool result = maybe_implicitly_close_p_tag(parser, token); insert_element_from_token(parser, token); return result; @@ -2583,13 +2582,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) { insert_element_from_token(parser, token); state->_frameset_ok = false; return true; - } else if (tag_in(token, kEndTag, - (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE), - TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS), - TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), - TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER), - TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV), - TAG(OL), TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)})) { + } else if (tag_in(token, kEndTag, (gumbo_tagset){ + TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE), TAG(BLOCKQUOTE), TAG(BUTTON), + TAG(CENTER), TAG(DETAILS), TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL), + TAG(FIELDSET), TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER), + TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV), TAG(OL), + TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL), TAG(SEARCH)})) + { GumboTag tag = token->v.end_tag; if (!has_an_element_in_scope(parser, tag)) { parser_add_parse_error(parser, token); @@ -2820,100 +2819,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) { set_frameset_not_ok(parser); return result; } else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) { - parser_add_parse_error(parser, token); - if (parser->_parser_state->_form_element != NULL && - !has_open_element(parser, GUMBO_TAG_TEMPLATE)) { - ignore_token(parser); - return false; - } - acknowledge_self_closing_tag(parser); - maybe_implicitly_close_p_tag(parser, token); - set_frameset_not_ok(parser); - - GumboVector* token_attrs = &token->v.start_tag.attributes; - GumboAttribute* prompt_attr = gumbo_get_attribute(token_attrs, "prompt"); - GumboAttribute* action_attr = gumbo_get_attribute(token_attrs, "action"); - GumboAttribute* name_attr = gumbo_get_attribute(token_attrs, "name"); - - GumboNode* form = insert_element_of_tag_type( - parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX); - if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) { - parser->_parser_state->_form_element = form; - } - if (action_attr) { - gumbo_vector_add(parser, action_attr, &form->v.element.attributes); - } - insert_element_of_tag_type( - parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX); - pop_current_node(parser); // <hr> - - insert_element_of_tag_type( - parser, GUMBO_TAG_LABEL, GUMBO_INSERTION_FROM_ISINDEX); - TextNodeBufferState* text_state = &parser->_parser_state->_text_node; - text_state->_start_original_text = token->original_text.data; - text_state->_start_position = token->position; - text_state->_type = GUMBO_NODE_TEXT; - if (prompt_attr) { - int prompt_attr_length = strlen(prompt_attr->value); - gumbo_string_buffer_destroy(parser, &text_state->_buffer); - text_state->_buffer.data = gumbo_copy_stringz(parser, prompt_attr->value); - text_state->_buffer.length = prompt_attr_length; - text_state->_buffer.capacity = prompt_attr_length + 1; - gumbo_destroy_attribute(parser, prompt_attr); - } else { - GumboStringPiece prompt_text = - GUMBO_STRING("This is a searchable index. Enter search keywords: "); - gumbo_string_buffer_append_string( - parser, &prompt_text, &text_state->_buffer); - } - - GumboNode* input = insert_element_of_tag_type( - parser, GUMBO_TAG_INPUT, GUMBO_INSERTION_FROM_ISINDEX); - for (unsigned int i = 0; i < token_attrs->length; ++i) { - GumboAttribute* attr = token_attrs->data[i]; - if (attr != prompt_attr && attr != action_attr && attr != name_attr) { - gumbo_vector_add(parser, attr, &input->v.element.attributes); - } - token_attrs->data[i] = NULL; - } - - // All attributes have been successfully transferred and nulled out at this - // point, so the call to ignore_token will free the memory for it without - // touching the attributes. - ignore_token(parser); - - // The name attribute, if present, should be destroyed since it's ignored - // when copying over. The action attribute should be kept since it's moved - // to the form. - if (name_attr) { - gumbo_destroy_attribute(parser, name_attr); - } - - GumboAttribute* name = - gumbo_parser_allocate(parser, sizeof(GumboAttribute)); - GumboStringPiece name_str = GUMBO_STRING("name"); - GumboStringPiece isindex_str = GUMBO_STRING("isindex"); - name->attr_namespace = GUMBO_ATTR_NAMESPACE_NONE; - name->name = gumbo_copy_stringz(parser, "name"); - name->value = gumbo_copy_stringz(parser, "isindex"); - name->original_name = name_str; - name->original_value = isindex_str; - name->name_start = kGumboEmptySourcePosition; - name->name_end = kGumboEmptySourcePosition; - name->value_start = kGumboEmptySourcePosition; - name->value_end = kGumboEmptySourcePosition; - gumbo_vector_add(parser, name, &input->v.element.attributes); - - pop_current_node(parser); // <input> - pop_current_node(parser); // <label> - insert_element_of_tag_type( - parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX); - pop_current_node(parser); // <hr> - pop_current_node(parser); // <form> - if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) { - parser->_parser_state->_form_element = NULL; + reconstruct_active_formatting_elements(parser); + insert_element_from_token(parser, token); + if (token->v.start_tag.is_self_closing) { + pop_current_node(parser); + acknowledge_self_closing_tag(parser); } - return false; + return true; } else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) { run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA); parser->_parser_state->_ignore_next_linefeed = true; @@ -3491,6 +3403,17 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) { } insert_element_from_token(parser, token); return true; + } else if (tag_is(token, kStartTag, GUMBO_TAG_HR)) { + if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) { + pop_current_node(parser); + } + if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) { + pop_current_node(parser); + } + insert_element_from_token(parser, token); + pop_current_node(parser); + acknowledge_self_closing_tag(parser); + return true; } else if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) { GumboVector* open_elements = &parser->_parser_state->_open_elements; if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) && @@ -3854,40 +3777,33 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) { // Fall through to the if-statements below. break; } - // Order matters for these clauses. - if (tag_in(token, kStartTag, - (gumbo_tagset){TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR), - TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT), - TAG(EM), TAG(EMBED), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), - TAG(H6), TAG(HEAD), TAG(HR), TAG(I), TAG(IMG), TAG(LI), - TAG(LISTING), TAG(MENU), TAG(META), TAG(NOBR), TAG(OL), TAG(P), - TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), TAG(SPAN), TAG(STRONG), - TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), TAG(TT), TAG(U), - TAG(UL), TAG(VAR)}) || - (tag_is(token, kStartTag, GUMBO_TAG_FONT) && - (token_has_attribute(token, "color") || - token_has_attribute(token, "face") || - token_has_attribute(token, "size")))) { + + if (tag_in(token, kStartTag, (gumbo_tagset){ + TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR), TAG(CENTER), + TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT), TAG(EM), TAG(EMBED), + TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6), TAG(HEAD), + TAG(HR), TAG(I), TAG(IMG), TAG(LI), TAG(LISTING), TAG(MENU), TAG(META), + TAG(NOBR), TAG(OL), TAG(P), TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), + TAG(SPAN), TAG(STRONG), TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), + TAG(TT), TAG(U), TAG(UL), TAG(VAR)}) + || tag_in(token, kEndTag, (gumbo_tagset){TAG(BR), TAG(P)}) + || (tag_is(token, kStartTag, GUMBO_TAG_FONT) + && (token_has_attribute(token, "color") + || token_has_attribute(token, "face") + || token_has_attribute(token, "size")))) + { /* Parse error */ parser_add_parse_error(parser, token); - /* - * Fragment case: If the parser was originally created for the HTML - * fragment parsing algorithm, then act as described in the "any other - * start tag" entry below. - */ - if (!is_fragment_parser(parser)) { - do { - pop_current_node(parser); - } while (!(is_mathml_integration_point(get_current_node(parser)) || - is_html_integration_point(get_current_node(parser)) || - get_current_node(parser)->v.element.tag_namespace == - GUMBO_NAMESPACE_HTML)); - parser->_parser_state->_reprocess_current_token = true; - return false; + while (!is_mathml_integration_point(get_current_node(parser)) + && !is_html_integration_point(get_current_node(parser)) + && get_current_node(parser)->v.element.tag_namespace != GUMBO_NAMESPACE_HTML) + { + pop_current_node(parser); } - assert(token->type == GUMBO_TOKEN_START_TAG); + handle_html_content(parser, token); + return false; } if (token->type == GUMBO_TOKEN_START_TAG) { |
