From 79b554fe05162550656da3c4e459fb717dc4dadf Mon Sep 17 00:00:00 2001 From: George Hazan Date: Sat, 22 Mar 2014 13:34:19 +0000 Subject: glib inlined into SameTime git-svn-id: http://svn.miranda-ng.org/main/trunk@8685 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- protocols/Sametime/src/glib/gmarkup.c | 2785 +++++++++++++++++++++++++++++++++ 1 file changed, 2785 insertions(+) create mode 100644 protocols/Sametime/src/glib/gmarkup.c (limited to 'protocols/Sametime/src/glib/gmarkup.c') diff --git a/protocols/Sametime/src/glib/gmarkup.c b/protocols/Sametime/src/glib/gmarkup.c new file mode 100644 index 0000000000..1ad1ba8b2f --- /dev/null +++ b/protocols/Sametime/src/glib/gmarkup.c @@ -0,0 +1,2785 @@ +/* gmarkup.c - Simple XML-like parser + * + * Copyright 2000, 2003 Red Hat, Inc. + * Copyright 2007, 2008 Ryan Lortie + * + * GLib is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * GLib is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GLib; see the file COPYING.LIB. If not, + * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include "config.h" + +#include +#include +#include +#include +#include + +#include "gmarkup.h" + +#include "galloca.h" +#include "gstrfuncs.h" +#include "gstring.h" +#include "gtestutils.h" +#include "glibintl.h" + +GQuark +g_markup_error_quark (void) +{ + return g_quark_from_static_string ("g-markup-error-quark"); +} + +typedef enum +{ + STATE_START, + STATE_AFTER_OPEN_ANGLE, + STATE_AFTER_CLOSE_ANGLE, + STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */ + STATE_INSIDE_OPEN_TAG_NAME, + STATE_INSIDE_ATTRIBUTE_NAME, + STATE_AFTER_ATTRIBUTE_NAME, + STATE_BETWEEN_ATTRIBUTES, + STATE_AFTER_ATTRIBUTE_EQUALS_SIGN, + STATE_INSIDE_ATTRIBUTE_VALUE_SQ, + STATE_INSIDE_ATTRIBUTE_VALUE_DQ, + STATE_INSIDE_TEXT, + STATE_AFTER_CLOSE_TAG_SLASH, + STATE_INSIDE_CLOSE_TAG_NAME, + STATE_AFTER_CLOSE_TAG_NAME, + STATE_INSIDE_PASSTHROUGH, + STATE_ERROR +} GMarkupParseState; + +typedef struct +{ + const char *prev_element; + const GMarkupParser *prev_parser; + gpointer prev_user_data; +} GMarkupRecursionTracker; + +struct _GMarkupParseContext +{ + const GMarkupParser *parser; + + GMarkupParseFlags flags; + + gint line_number; + gint char_number; + + gpointer user_data; + GDestroyNotify dnotify; + + /* A piece of character data or an element that + * hasn't "ended" yet so we haven't yet called + * the callback for it. + */ + GString *partial_chunk; + GSList *spare_chunks; + + GMarkupParseState state; + GSList *tag_stack; + GSList *tag_stack_gstr; + GSList *spare_list_nodes; + + GString **attr_names; + GString **attr_values; + gint cur_attr; + gint alloc_attrs; + + const gchar *current_text; + gssize current_text_len; + const gchar *current_text_end; + + /* used to save the start of the last interesting thingy */ + const gchar *start; + + const gchar *iter; + + guint document_empty : 1; + guint parsing : 1; + guint awaiting_pop : 1; + gint balance; + + /* subparser support */ + GSList *subparser_stack; /* (GMarkupRecursionTracker *) */ + const char *subparser_element; + gpointer held_user_data; +}; + +/* + * Helpers to reduce our allocation overhead, we have + * a well defined allocation lifecycle. + */ +static GSList * +get_list_node (GMarkupParseContext *context, gpointer data) +{ + GSList *node; + if (context->spare_list_nodes != NULL) + { + node = context->spare_list_nodes; + context->spare_list_nodes = g_slist_remove_link (context->spare_list_nodes, node); + } + else + node = g_slist_alloc(); + node->data = data; + return node; +} + +static void +free_list_node (GMarkupParseContext *context, GSList *node) +{ + node->data = NULL; + context->spare_list_nodes = g_slist_concat (node, context->spare_list_nodes); +} + +static inline void +string_blank (GString *string) +{ + string->str[0] = '\0'; + string->len = 0; +} + +/** + * g_markup_parse_context_new: + * @parser: a #GMarkupParser + * @flags: one or more #GMarkupParseFlags + * @user_data: user data to pass to #GMarkupParser functions + * @user_data_dnotify: user data destroy notifier called when the parse context is freed + * + * Creates a new parse context. A parse context is used to parse + * marked-up documents. You can feed any number of documents into + * a context, as long as no errors occur; once an error occurs, + * the parse context can't continue to parse text (you have to free it + * and create a new parse context). + * + * Return value: a new #GMarkupParseContext + **/ +GMarkupParseContext * +g_markup_parse_context_new (const GMarkupParser *parser, + GMarkupParseFlags flags, + gpointer user_data, + GDestroyNotify user_data_dnotify) +{ + GMarkupParseContext *context; + + g_return_val_if_fail (parser != NULL, NULL); + + context = g_new (GMarkupParseContext, 1); + + context->parser = parser; + context->flags = flags; + context->user_data = user_data; + context->dnotify = user_data_dnotify; + + context->line_number = 1; + context->char_number = 1; + + context->partial_chunk = NULL; + context->spare_chunks = NULL; + context->spare_list_nodes = NULL; + + context->state = STATE_START; + context->tag_stack = NULL; + context->tag_stack_gstr = NULL; + context->attr_names = NULL; + context->attr_values = NULL; + context->cur_attr = -1; + context->alloc_attrs = 0; + + context->current_text = NULL; + context->current_text_len = -1; + context->current_text_end = NULL; + + context->start = NULL; + context->iter = NULL; + + context->document_empty = TRUE; + context->parsing = FALSE; + + context->awaiting_pop = FALSE; + context->subparser_stack = NULL; + context->subparser_element = NULL; + + /* this is only looked at if awaiting_pop = TRUE. initialise anyway. */ + context->held_user_data = NULL; + + context->balance = 0; + + return context; +} + +static void +string_full_free (gpointer ptr, gpointer user_data) +{ + g_string_free (ptr, TRUE); +} + +static void clear_attributes (GMarkupParseContext *context); + +/** + * g_markup_parse_context_free: + * @context: a #GMarkupParseContext + * + * Frees a #GMarkupParseContext. Can't be called from inside + * one of the #GMarkupParser functions. Can't be called while + * a subparser is pushed. + **/ +void +g_markup_parse_context_free (GMarkupParseContext *context) +{ + g_return_if_fail (context != NULL); + g_return_if_fail (!context->parsing); + g_return_if_fail (!context->subparser_stack); + g_return_if_fail (!context->awaiting_pop); + + if (context->dnotify) + (* context->dnotify) (context->user_data); + + clear_attributes (context); + g_free (context->attr_names); + g_free (context->attr_values); + + g_slist_foreach (context->tag_stack_gstr, string_full_free, NULL); + g_slist_free (context->tag_stack_gstr); + g_slist_free (context->tag_stack); + + g_slist_foreach (context->spare_chunks, string_full_free, NULL); + g_slist_free (context->spare_chunks); + g_slist_free (context->spare_list_nodes); + + if (context->partial_chunk) + g_string_free (context->partial_chunk, TRUE); + + g_free (context); +} + +static void pop_subparser_stack (GMarkupParseContext *context); + +static void +mark_error (GMarkupParseContext *context, + GError *error) +{ + context->state = STATE_ERROR; + + if (context->parser->error) + (*context->parser->error) (context, error, context->user_data); + + /* report the error all the way up to free all the user-data */ + while (context->subparser_stack) + { + pop_subparser_stack (context); + context->awaiting_pop = FALSE; /* already been freed */ + + if (context->parser->error) + (*context->parser->error) (context, error, context->user_data); + } +} + +static void set_error (GMarkupParseContext *context, + GError **error, + GMarkupError code, + const gchar *format, + ...) G_GNUC_PRINTF (4, 5); + +static void +set_error_literal (GMarkupParseContext *context, + GError **error, + GMarkupError code, + const gchar *message) +{ + GError *tmp_error; + + tmp_error = g_error_new_literal (G_MARKUP_ERROR, code, message); + + g_prefix_error (&tmp_error, + _("Error on line %d char %d: "), + context->line_number, + context->char_number); + + mark_error (context, tmp_error); + + g_propagate_error (error, tmp_error); +} + +static void +set_error (GMarkupParseContext *context, + GError **error, + GMarkupError code, + const gchar *format, + ...) +{ + gchar *s; + gchar *s_valid; + va_list args; + + va_start (args, format); + s = g_strdup_vprintf (format, args); + va_end (args); + + /* Make sure that the GError message is valid UTF-8 even if it is + * complaining about invalid UTF-8 in the markup: */ + s_valid = _g_utf8_make_valid (s); + set_error_literal (context, error, code, s); + + g_free (s); + g_free (s_valid); +} + +static void +propagate_error (GMarkupParseContext *context, + GError **dest, + GError *src) +{ + if (context->flags & G_MARKUP_PREFIX_ERROR_POSITION) + g_prefix_error (&src, + _("Error on line %d char %d: "), + context->line_number, + context->char_number); + + mark_error (context, src); + + g_propagate_error (dest, src); +} + +#define IS_COMMON_NAME_END_CHAR(c) \ + ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ') + +static gboolean +slow_name_validate (GMarkupParseContext *context, const char *name, GError **error) +{ + const char *p = name; + + if (!g_utf8_validate (name, strlen (name), NULL)) + { + set_error (context, error, G_MARKUP_ERROR_BAD_UTF8, + _("Invalid UTF-8 encoded text in name - not valid '%s'"), name); + return FALSE; + } + + if (!(g_ascii_isalpha (*p) || + (!IS_COMMON_NAME_END_CHAR (*p) && + (*p == '_' || + *p == ':' || + g_unichar_isalpha (g_utf8_get_char (p)))))) + { + set_error (context, error, G_MARKUP_ERROR_PARSE, + _("'%s' is not a valid name "), name); + return FALSE; + } + + for (p = g_utf8_next_char (name); *p != '\0'; p = g_utf8_next_char (p)) + { + /* is_name_char */ + if (!(g_ascii_isalnum (*p) || + (!IS_COMMON_NAME_END_CHAR (*p) && + (*p == '.' || + *p == '-' || + *p == '_' || + *p == ':' || + g_unichar_isalpha (g_utf8_get_char (p)))))) + { + set_error (context, error, G_MARKUP_ERROR_PARSE, + _("'%s' is not a valid name: '%c' "), name, *p); + return FALSE; + } + } + return TRUE; +} + +/* + * Use me for elements, attributes etc. + */ +static gboolean +name_validate (GMarkupParseContext *context, const char *name, GError **error) +{ + char mask; + const char *p; + + /* name start char */ + p = name; + if (G_UNLIKELY (IS_COMMON_NAME_END_CHAR (*p) || + !(g_ascii_isalpha (*p) || *p == '_' || *p == ':'))) + goto slow_validate; + + for (mask = *p++; *p != '\0'; p++) + { + mask |= *p; + + /* is_name_char */ + if (G_UNLIKELY (!(g_ascii_isalnum (*p) || + (!IS_COMMON_NAME_END_CHAR (*p) && + (*p == '.' || + *p == '-' || + *p == '_' || + *p == ':'))))) + goto slow_validate; + } + + if (mask & 0x80) /* un-common / non-ascii */ + goto slow_validate; + + return TRUE; + + slow_validate: + return slow_name_validate (context, name, error); +} + +static gboolean +text_validate (GMarkupParseContext *context, const char *p, int len, GError **error) +{ + if (!g_utf8_validate (p, len, NULL)) + { + set_error (context, error, G_MARKUP_ERROR_BAD_UTF8, + _("Invalid UTF-8 encoded text in name - not valid '%s'"), p); + return FALSE; + } + else + return TRUE; +} + +static gchar* +char_str (gunichar c, + gchar *buf) +{ + memset (buf, 0, 8); + g_unichar_to_utf8 (c, buf); + return buf; +} + +static gchar* +utf8_str (const gchar *utf8, + gchar *buf) +{ + char_str (g_utf8_get_char (utf8), buf); + return buf; +} + +static void +set_unescape_error (GMarkupParseContext *context, + GError **error, + const gchar *remaining_text, + GMarkupError code, + const gchar *format, + ...) +{ + GError *tmp_error; + gchar *s; + va_list args; + gint remaining_newlines; + const gchar *p; + + remaining_newlines = 0; + p = remaining_text; + while (*p != '\0') + { + if (*p == '\n') + ++remaining_newlines; + ++p; + } + + va_start (args, format); + s = g_strdup_vprintf (format, args); + va_end (args); + + tmp_error = g_error_new (G_MARKUP_ERROR, + code, + _("Error on line %d: %s"), + context->line_number - remaining_newlines, + s); + + g_free (s); + + mark_error (context, tmp_error); + + g_propagate_error (error, tmp_error); +} + +/* + * re-write the GString in-place, unescaping anything that escaped. + * most XML does not contain entities, or escaping. + */ +static gboolean +unescape_gstring_inplace (GMarkupParseContext *context, + GString *string, + gboolean *is_ascii, + GError **error) +{ + char mask, *to; + int line_num = 1; + const char *from; + gboolean normalize_attribute; + + *is_ascii = FALSE; + + /* are we unescaping an attribute or not ? */ + if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ || + context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ) + normalize_attribute = TRUE; + else + normalize_attribute = FALSE; + + /* + * Meeks' theorum: unescaping can only shrink text. + * for < etc. this is obvious, for ￿ more + * thought is required, but this is patently so. + */ + mask = 0; + for (from = to = string->str; *from != '\0'; from++, to++) + { + *to = *from; + + mask |= *to; + if (*to == '\n') + line_num++; + if (normalize_attribute && (*to == '\t' || *to == '\n')) + *to = ' '; + if (*to == '\r') + { + *to = normalize_attribute ? ' ' : '\n'; + if (from[1] == '\n') + from++; + } + if (*from == '&') + { + from++; + if (*from == '#') + { + gboolean is_hex = FALSE; + gulong l; + gchar *end = NULL; + + from++; + + if (*from == 'x') + { + is_hex = TRUE; + from++; + } + + /* digit is between start and p */ + errno = 0; + if (is_hex) + l = strtoul (from, &end, 16); + else + l = strtoul (from, &end, 10); + + if (end == from || errno != 0) + { + set_unescape_error (context, error, + from, G_MARKUP_ERROR_PARSE, + _("Failed to parse '%-.*s', which " + "should have been a digit " + "inside a character reference " + "(ê for example) - perhaps " + "the digit is too large"), + end - from, from); + return FALSE; + } + else if (*end != ';') + { + set_unescape_error (context, error, + from, G_MARKUP_ERROR_PARSE, + _("Character reference did not end with a " + "semicolon; " + "most likely you used an ampersand " + "character without intending to start " + "an entity - escape ampersand as &")); + return FALSE; + } + else + { + /* characters XML 1.1 permits */ + if ((0 < l && l <= 0xD7FF) || + (0xE000 <= l && l <= 0xFFFD) || + (0x10000 <= l && l <= 0x10FFFF)) + { + gchar buf[8]; + char_str (l, buf); + strcpy (to, buf); + to += strlen (buf) - 1; + from = end; + if (l >= 0x80) /* not ascii */ + mask |= 0x80; + } + else + { + set_unescape_error (context, error, + from, G_MARKUP_ERROR_PARSE, + _("Character reference '%-.*s' does not " + "encode a permitted character"), + end - from, from); + return FALSE; + } + } + } + + else if (strncmp (from, "lt;", 3) == 0) + { + *to = '<'; + from += 2; + } + else if (strncmp (from, "gt;", 3) == 0) + { + *to = '>'; + from += 2; + } + else if (strncmp (from, "amp;", 4) == 0) + { + *to = '&'; + from += 3; + } + else if (strncmp (from, "quot;", 5) == 0) + { + *to = '"'; + from += 4; + } + else if (strncmp (from, "apos;", 5) == 0) + { + *to = '\''; + from += 4; + } + else + { + if (*from == ';') + set_unescape_error (context, error, + from, G_MARKUP_ERROR_PARSE, + _("Empty entity '&;' seen; valid " + "entities are: & " < > '")); + else + { + const char *end = strchr (from, ';'); + if (end) + set_unescape_error (context, error, + from, G_MARKUP_ERROR_PARSE, + _("Entity name '%-.*s' is not known"), + end-from, from); + else + set_unescape_error (context, error, + from, G_MARKUP_ERROR_PARSE, + _("Entity did not end with a semicolon; " + "most likely you used an ampersand " + "character without intending to start " + "an entity - escape ampersand as &")); + } + return FALSE; + } + } + } + + g_assert (to - string->str <= string->len); + if (to - string->str != string->len) + g_string_truncate (string, to - string->str); + + *is_ascii = !(mask & 0x80); + + return TRUE; +} + +static inline gboolean +advance_char (GMarkupParseContext *context) +{ + context->iter++; + context->char_number++; + + if (G_UNLIKELY (context->iter == context->current_text_end)) + return FALSE; + + else if (G_UNLIKELY (*context->iter == '\n')) + { + context->line_number++; + context->char_number = 1; + } + + return TRUE; +} + +static inline gboolean +xml_isspace (char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} + +static void +skip_spaces (GMarkupParseContext *context) +{ + do + { + if (!xml_isspace (*context->iter)) + return; + } + while (advance_char (context)); +} + +static void +advance_to_name_end (GMarkupParseContext *context) +{ + do + { + if (IS_COMMON_NAME_END_CHAR (*(context->iter))) + return; + if (xml_isspace (*(context->iter))) + return; + } + while (advance_char (context)); +} + +static void +release_chunk (GMarkupParseContext *context, GString *str) +{ + GSList *node; + if (!str) + return; + if (str->allocated_len > 256) + { /* large strings are unusual and worth freeing */ + g_string_free (str, TRUE); + return; + } + string_blank (str); + node = get_list_node (context, str); + context->spare_chunks = g_slist_concat (node, context->spare_chunks); +} + +static void +add_to_partial (GMarkupParseContext *context, + const gchar *text_start, + const gchar *text_end) +{ + if (context->partial_chunk == NULL) + { /* allocate a new chunk to parse into */ + + if (context->spare_chunks != NULL) + { + GSList *node = context->spare_chunks; + context->spare_chunks = g_slist_remove_link (context->spare_chunks, node); + context->partial_chunk = node->data; + free_list_node (context, node); + } + else + context->partial_chunk = g_string_sized_new (MAX (28, text_end - text_start)); + } + + if (text_start != text_end) + g_string_insert_len (context->partial_chunk, -1, + text_start, text_end - text_start); +} + +static inline void +truncate_partial (GMarkupParseContext *context) +{ + if (context->partial_chunk != NULL) + string_blank (context->partial_chunk); +} + +static inline const gchar* +current_element (GMarkupParseContext *context) +{ + return context->tag_stack->data; +} + +static void +pop_subparser_stack (GMarkupParseContext *context) +{ + GMarkupRecursionTracker *tracker; + + g_assert (context->subparser_stack); + + tracker = context->subparser_stack->data; + + context->awaiting_pop = TRUE; + context->held_user_data = context->user_data; + + context->user_data = tracker->prev_user_data; + context->parser = tracker->prev_parser; + context->subparser_element = tracker->prev_element; + g_slice_free (GMarkupRecursionTracker, tracker); + + context->subparser_stack = g_slist_delete_link (context->subparser_stack, + context->subparser_stack); +} + +static void +push_partial_as_tag (GMarkupParseContext *context) +{ + GString *str = context->partial_chunk; + /* sadly, this is exported by gmarkup_get_element_stack as-is */ + context->tag_stack = g_slist_concat (get_list_node (context, str->str), context->tag_stack); + context->tag_stack_gstr = g_slist_concat (get_list_node (context, str), context->tag_stack_gstr); + context->partial_chunk = NULL; +} + +static void +pop_tag (GMarkupParseContext *context) +{ + GSList *nodea, *nodeb; + + nodea = context->tag_stack; + nodeb = context->tag_stack_gstr; + release_chunk (context, nodeb->data); + context->tag_stack = g_slist_remove_link (context->tag_stack, nodea); + context->tag_stack_gstr = g_slist_remove_link (context->tag_stack_gstr, nodeb); + free_list_node (context, nodea); + free_list_node (context, nodeb); +} + +static void +possibly_finish_subparser (GMarkupParseContext *context) +{ + if (current_element (context) == context->subparser_element) + pop_subparser_stack (context); +} + +static void +ensure_no_outstanding_subparser (GMarkupParseContext *context) +{ + if (context->awaiting_pop) + g_critical ("During the first end_element call after invoking a " + "subparser you must pop the subparser stack and handle " + "the freeing of the subparser user_data. This can be " + "done by calling the end function of the subparser. " + "Very probably, your program just leaked memory."); + + /* let valgrind watch the pointer disappear... */ + context->held_user_data = NULL; + context->awaiting_pop = FALSE; +} + +static const gchar* +current_attribute (GMarkupParseContext *context) +{ + g_assert (context->cur_attr >= 0); + return context->attr_names[context->cur_attr]->str; +} + +static void +add_attribute (GMarkupParseContext *context, GString *str) +{ + if (context->cur_attr + 2 >= context->alloc_attrs) + { + context->alloc_attrs += 5; /* silly magic number */ + context->attr_names = g_realloc (context->attr_names, sizeof(GString*)*context->alloc_attrs); + context->attr_values = g_realloc (context->attr_values, sizeof(GString*)*context->alloc_attrs); + } + context->cur_attr++; + context->attr_names[context->cur_attr] = str; + context->attr_values[context->cur_attr] = NULL; + context->attr_names[context->cur_attr+1] = NULL; + context->attr_values[context->cur_attr+1] = NULL; +} + +static void +clear_attributes (GMarkupParseContext *context) +{ + /* Go ahead and free the attributes. */ + for (; context->cur_attr >= 0; context->cur_attr--) + { + int pos = context->cur_attr; + release_chunk (context, context->attr_names[pos]); + release_chunk (context, context->attr_values[pos]); + context->attr_names[pos] = context->attr_values[pos] = NULL; + } + g_assert (context->cur_attr == -1); + g_assert (context->attr_names == NULL || + context->attr_names[0] == NULL); + g_assert (context->attr_values == NULL || + context->attr_values[0] == NULL); +} + +/* This has to be a separate function to ensure the alloca's + are unwound on exit - otherwise we grow & blow the stack + with large documents */ +static inline void +emit_start_element (GMarkupParseContext *context, GError **error) +{ + int i; + const gchar *start_name; + const gchar **attr_names; + const gchar **attr_values; + GError *tmp_error; + + attr_names = g_newa (const gchar *, context->cur_attr + 2); + attr_values = g_newa (const gchar *, context->cur_attr + 2); + for (i = 0; i < context->cur_attr + 1; i++) + { + attr_names[i] = context->attr_names[i]->str; + attr_values[i] = context->attr_values[i]->str; + } + attr_names[i] = NULL; + attr_values[i] = NULL; + + /* Call user callback for element start */ + tmp_error = NULL; + start_name = current_element (context); + + if (context->parser->start_element && + name_validate (context, start_name, error)) + (* context->parser->start_element) (context, + start_name, + (const gchar **)attr_names, + (const gchar **)attr_values, + context->user_data, + &tmp_error); + clear_attributes (context); + + if (tmp_error != NULL) + propagate_error (context, error, tmp_error); +} + +/** + * g_markup_parse_context_parse: + * @context: a #GMarkupParseContext + * @text: chunk of text to parse + * @text_len: length of @text in bytes + * @error: return location for a #GError + * + * Feed some data to the #GMarkupParseContext. The data need not + * be valid UTF-8; an error will be signaled if it's invalid. + * The data need not be an entire document; you can feed a document + * into the parser incrementally, via multiple calls to this function. + * Typically, as you receive data from a network connection or file, + * you feed each received chunk of data into this function, aborting + * the process if an error occurs. Once an error is reported, no further + * data may be fed to the #GMarkupParseContext; all errors are fatal. + * + * Return value: %FALSE if an error occurred, %TRUE on success + **/ +gboolean +g_markup_parse_context_parse (GMarkupParseContext *context, + const gchar *text, + gssize text_len, + GError **error) +{ + g_return_val_if_fail (context != NULL, FALSE); + g_return_val_if_fail (text != NULL, FALSE); + g_return_val_if_fail (context->state != STATE_ERROR, FALSE); + g_return_val_if_fail (!context->parsing, FALSE); + + if (text_len < 0) + text_len = strlen (text); + + if (text_len == 0) + return TRUE; + + context->parsing = TRUE; + + + context->current_text = text; + context->current_text_len = text_len; + context->current_text_end = context->current_text + text_len; + context->iter = context->current_text; + context->start = context->iter; + + if (context->current_text_len == 0) + goto finished; + + while (context->iter != context->current_text_end) + { + switch (context->state) + { + case STATE_START: + /* Possible next state: AFTER_OPEN_ANGLE */ + + g_assert (context->tag_stack == NULL); + + /* whitespace is ignored outside of any elements */ + skip_spaces (context); + + if (context->iter != context->current_text_end) + { + if (*context->iter == '<') + { + /* Move after the open angle */ + advance_char (context); + + context->state = STATE_AFTER_OPEN_ANGLE; + + /* this could start a passthrough */ + context->start = context->iter; + + /* document is now non-empty */ + context->document_empty = FALSE; + } + else + { + set_error_literal (context, + error, + G_MARKUP_ERROR_PARSE, + _("Document must begin with an element (e.g. )")); + } + } + break; + + case STATE_AFTER_OPEN_ANGLE: + /* Possible next states: INSIDE_OPEN_TAG_NAME, + * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH + */ + if (*context->iter == '?' || + *context->iter == '!') + { + /* include < in the passthrough */ + const gchar *openangle = "<"; + add_to_partial (context, openangle, openangle + 1); + context->start = context->iter; + context->balance = 1; + context->state = STATE_INSIDE_PASSTHROUGH; + } + else if (*context->iter == '/') + { + /* move after it */ + advance_char (context); + + context->state = STATE_AFTER_CLOSE_TAG_SLASH; + } + else if (!IS_COMMON_NAME_END_CHAR (*(context->iter))) + { + context->state = STATE_INSIDE_OPEN_TAG_NAME; + + /* start of tag name */ + context->start = context->iter; + } + else + { + gchar buf[8]; + + set_error (context, + error, + G_MARKUP_ERROR_PARSE, + _("'%s' is not a valid character following " + "a '<' character; it may not begin an " + "element name"), + utf8_str (context->iter, buf)); + } + break; + + /* The AFTER_CLOSE_ANGLE state is actually sort of + * broken, because it doesn't correspond to a range + * of characters in the input stream as the others do, + * and thus makes things harder to conceptualize + */ + case STATE_AFTER_CLOSE_ANGLE: + /* Possible next states: INSIDE_TEXT, STATE_START */ + if (context->tag_stack == NULL) + { + context->start = NULL; + context->state = STATE_START; + } + else + { + context->start = context->iter; + context->state = STATE_INSIDE_TEXT; + } + break; + + case STATE_AFTER_ELISION_SLASH: + /* Possible next state: AFTER_CLOSE_ANGLE */ + + { + /* We need to pop the tag stack and call the end_element + * function, since this is the close tag + */ + GError *tmp_error = NULL; + + g_assert (context->tag_stack != NULL); + + possibly_finish_subparser (context); + + tmp_error = NULL; + if (context->parser->end_element) + (* context->parser->end_element) (context, + current_element (context), + context->user_data, + &tmp_error); + + ensure_no_outstanding_subparser (context); + + if (tmp_error) + { + mark_error (context, tmp_error); + g_propagate_error (error, tmp_error); + } + else + { + if (*context->iter == '>') + { + /* move after the close angle */ + advance_char (context); + context->state = STATE_AFTER_CLOSE_ANGLE; + } + else + { + gchar buf[8]; + + set_error (context, + error, + G_MARKUP_ERROR_PARSE, + _("Odd character '%s', expected a '>' character " + "to end the empty-element tag '%s'"), + utf8_str (context->iter, buf), + current_element (context)); + } + } + pop_tag (context); + } + break; + + case STATE_INSIDE_OPEN_TAG_NAME: + /* Possible next states: BETWEEN_ATTRIBUTES */ + + /* if there's a partial chunk then it's the first part of the + * tag name. If there's a context->start then it's the start + * of the tag name in current_text, the partial chunk goes + * before that start though. + */ + advance_to_name_end (context); + + if (context->iter == context->current_text_end) + { + /* The name hasn't necessarily ended. Merge with + * partial chunk, leave state unchanged. + */ + add_to_partial (context, context->start, context->iter); + } + else + { + /* The name has ended. Combine it with the partial chunk + * if any; push it on the stack; enter next state. + */ + add_to_partial (context, context->start, context->iter); + push_partial_as_tag (context); + + context->state = STATE_BETWEEN_ATTRIBUTES; + context->start = NULL; + } + break; + + case STATE_INSIDE_ATTRIBUTE_NAME: + /* Possible next states: AFTER_ATTRIBUTE_NAME */ + + advance_to_name_end (context); + add_to_partial (context, context->start, context->iter); + + /* read the full name, if we enter the equals sign state + * then add the attribute to the list (without the value), + * otherwise store a partial chunk to be prepended later. + */ + if (context->iter != context->current_text_end) + context->state = STATE_AFTER_ATTRIBUTE_NAME; + break; + + case STATE_AFTER_ATTRIBUTE_NAME: + /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */ + + skip_spaces (context); + + if (context->iter != context->current_text_end) + { + /* The name has ended. Combine it with the partial chunk + * if any; push it on the stack; enter next state. + */ + if (!name_validate (context, context->partial_chunk->str, error)) + break; + + add_attribute (context, context->partial_chunk); + + context->partial_chunk = NULL; + context->start = NULL; + + if (*context->iter == '=') + { + advance_char (context); + context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN; + } + else + { + gchar buf[8]; + + set_error (context, + error, + G_MARKUP_ERROR_PARSE, + _("Odd character '%s', expected a '=' after " + "attribute name '%s' of element '%s'"), + utf8_str (context->iter, buf), + current_attribute (context), + current_element (context)); + + } + } + break; + + case STATE_BETWEEN_ATTRIBUTES: + /* Possible next states: AFTER_CLOSE_ANGLE, + * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME + */ + skip_spaces (context); + + if (context->iter != context->current_text_end) + { + if (*context->iter == '/') + { + advance_char (context); + context->state = STATE_AFTER_ELISION_SLASH; + } + else if (*context->iter == '>') + { + advance_char (context); + context->state = STATE_AFTER_CLOSE_ANGLE; + } + else if (!IS_COMMON_NAME_END_CHAR (*(context->iter))) + { + context->state = STATE_INSIDE_ATTRIBUTE_NAME; + /* start of attribute name */ + context->start = context->iter; + } + else + { + gchar buf[8]; + + set_error (context, + error, + G_MARKUP_ERROR_PARSE, + _("Odd character '%s', expected a '>' or '/' " + "character to end the start tag of " + "element '%s', or optionally an attribute; " + "perhaps you used an invalid character in " + "an attribute name"), + utf8_str (context->iter, buf), + current_element (context)); + } + + /* If we're done with attributes, invoke + * the start_element callback + */ + if (context->state == STATE_AFTER_ELISION_SLASH || + context->state == STATE_AFTER_CLOSE_ANGLE) + emit_start_element (context, error); + } + break; + + case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: + /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */ + + skip_spaces (context); + + if (context->iter != context->current_text_end) + { + if (*context->iter == '"') + { + advance_char (context); + context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ; + context->start = context->iter; + } + else if (*context->iter == '\'') + { + advance_char (context); + context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ; + context->start = context->iter; + } + else + { + gchar buf[8]; + + set_error (context, + error, + G_MARKUP_ERROR_PARSE, + _("Odd character '%s', expected an open quote mark " + "after the equals sign when giving value for " + "attribute '%s' of element '%s'"), + utf8_str (context->iter, buf), + current_attribute (context), + current_element (context)); + } + } + break; + + case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: + case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: + /* Possible next states: BETWEEN_ATTRIBUTES */ + { + gchar delim; + + if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ) + { + delim = '\''; + } + else + { + delim = '"'; + } + + do + { + if (*context->iter == delim) + break; + } + while (advance_char (context)); + } + if (context->iter == context->current_text_end) + { + /* The value hasn't necessarily ended. Merge with + * partial chunk, leave state unchanged. + */ + add_to_partial (context, context->start, context->iter); + } + else + { + gboolean is_ascii; + /* The value has ended at the quote mark. Combine it + * with the partial chunk if any; set it for the current + * attribute. + */ + add_to_partial (context, context->start, context->iter); + + g_assert (context->cur_attr >= 0); + + if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) && + (is_ascii || text_validate (context, context->partial_chunk->str, + context->partial_chunk->len, error))) + { + /* success, advance past quote and set state. */ + context->attr_values[context->cur_attr] = context->partial_chunk; + context->partial_chunk = NULL; + advance_char (context); + context->state = STATE_BETWEEN_ATTRIBUTES; + context->start = NULL; + } + + truncate_partial (context); + } + break; + + case STATE_INSIDE_TEXT: + /* Possible next states: AFTER_OPEN_ANGLE */ + do + { + if (*context->iter == '<') + break; + } + while (advance_char (context)); + + /* The text hasn't necessarily ended. Merge with + * partial chunk, leave state unchanged. + */ + + add_to_partial (context, context->start, context->iter); + + if (context->iter != context->current_text_end) + { + gboolean is_ascii; + + /* The text has ended at the open angle. Call the text + * callback. + */ + + if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) && + (is_ascii || text_validate (context, context->partial_chunk->str, + context->partial_chunk->len, error))) + { + GError *tmp_error = NULL; + + if (context->parser->text) + (*context->parser->text) (context, + context->partial_chunk->str, + context->partial_chunk->len, + context->user_data, + &tmp_error); + + if (tmp_error == NULL) + { + /* advance past open angle and set state. */ + advance_char (context); + context->state = STATE_AFTER_OPEN_ANGLE; + /* could begin a passthrough */ + context->start = context->iter; + } + else + propagate_error (context, error, tmp_error); + } + + truncate_partial (context); + } + break; + + case STATE_AFTER_CLOSE_TAG_SLASH: + /* Possible next state: INSIDE_CLOSE_TAG_NAME */ + if (!IS_COMMON_NAME_END_CHAR (*(context->iter))) + { + context->state = STATE_INSIDE_CLOSE_TAG_NAME; + + /* start of tag name */ + context->start = context->iter; + } + else + { + gchar buf[8]; + + set_error (context, + error, + G_MARKUP_ERROR_PARSE, + _("'%s' is not a valid character following " + "the characters 'iter, buf), + utf8_str (context->iter, buf)); + } + break; + + case STATE_INSIDE_CLOSE_TAG_NAME: + /* Possible next state: AFTER_CLOSE_TAG_NAME */ + advance_to_name_end (context); + add_to_partial (context, context->start, context->iter); + + if (context->iter != context->current_text_end) + context->state = STATE_AFTER_CLOSE_TAG_NAME; + break; + + case STATE_AFTER_CLOSE_TAG_NAME: + /* Possible next state: AFTER_CLOSE_TAG_SLASH */ + + skip_spaces (context); + + if (context->iter != context->current_text_end) + { + GString *close_name; + + close_name = context->partial_chunk; + context->partial_chunk = NULL; + + if (*context->iter != '>') + { + gchar buf[8]; + + set_error (context, + error, + G_MARKUP_ERROR_PARSE, + _("'%s' is not a valid character following " + "the close element name '%s'; the allowed " + "character is '>'"), + utf8_str (context->iter, buf), + close_name->str); + } + else if (context->tag_stack == NULL) + { + set_error (context, + error, + G_MARKUP_ERROR_PARSE, + _("Element '%s' was closed, no element " + "is currently open"), + close_name->str); + } + else if (strcmp (close_name->str, current_element (context)) != 0) + { + set_error (context, + error, + G_MARKUP_ERROR_PARSE, + _("Element '%s' was closed, but the currently " + "open element is '%s'"), + close_name->str, + current_element (context)); + } + else + { + GError *tmp_error; + advance_char (context); + context->state = STATE_AFTER_CLOSE_ANGLE; + context->start = NULL; + + possibly_finish_subparser (context); + + /* call the end_element callback */ + tmp_error = NULL; + if (context->parser->end_element) + (* context->parser->end_element) (context, + close_name->str, + context->user_data, + &tmp_error); + + ensure_no_outstanding_subparser (context); + pop_tag (context); + + if (tmp_error) + propagate_error (context, error, tmp_error); + } + context->partial_chunk = close_name; + truncate_partial (context); + } + break; + + case STATE_INSIDE_PASSTHROUGH: + /* Possible next state: AFTER_CLOSE_ANGLE */ + do + { + if (*context->iter == '<') + context->balance++; + if (*context->iter == '>') + { + gchar *str; + gsize len; + + context->balance--; + add_to_partial (context, context->start, context->iter); + context->start = context->iter; + + str = context->partial_chunk->str; + len = context->partial_chunk->len; + + if (str[1] == '?' && str[len - 1] == '?') + break; + if (strncmp (str, "