/* gmarkup.c - Simple XML-like parser * * Copyright 2000, 2003 Red Hat, Inc. * Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca> * * GLib is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * GLib is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with GLib; see the file COPYING.LIB. If not, * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include "config.h" #include <stdarg.h> #include <string.h> #include <stdio.h> #include <stdlib.h> #include <errno.h> #include "gmarkup.h" #include "galloca.h" #include "gstrfuncs.h" #include "gstring.h" #include "gtestutils.h" #include "glibintl.h" GQuark g_markup_error_quark (void) { return g_quark_from_static_string ("g-markup-error-quark"); } typedef enum { STATE_START, STATE_AFTER_OPEN_ANGLE, STATE_AFTER_CLOSE_ANGLE, STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */ STATE_INSIDE_OPEN_TAG_NAME, STATE_INSIDE_ATTRIBUTE_NAME, STATE_AFTER_ATTRIBUTE_NAME, STATE_BETWEEN_ATTRIBUTES, STATE_AFTER_ATTRIBUTE_EQUALS_SIGN, STATE_INSIDE_ATTRIBUTE_VALUE_SQ, STATE_INSIDE_ATTRIBUTE_VALUE_DQ, STATE_INSIDE_TEXT, STATE_AFTER_CLOSE_TAG_SLASH, STATE_INSIDE_CLOSE_TAG_NAME, STATE_AFTER_CLOSE_TAG_NAME, STATE_INSIDE_PASSTHROUGH, STATE_ERROR } GMarkupParseState; typedef struct { const char *prev_element; const GMarkupParser *prev_parser; gpointer prev_user_data; } GMarkupRecursionTracker; struct _GMarkupParseContext { const GMarkupParser *parser; GMarkupParseFlags flags; gint line_number; gint char_number; gpointer user_data; GDestroyNotify dnotify; /* A piece of character data or an element that * hasn't "ended" yet so we haven't yet called * the callback for it. */ GString *partial_chunk; GSList *spare_chunks; GMarkupParseState state; GSList *tag_stack; GSList *tag_stack_gstr; GSList *spare_list_nodes; GString **attr_names; GString **attr_values; gint cur_attr; gint alloc_attrs; const gchar *current_text; gssize current_text_len; const gchar *current_text_end; /* used to save the start of the last interesting thingy */ const gchar *start; const gchar *iter; guint document_empty : 1; guint parsing : 1; guint awaiting_pop : 1; gint balance; /* subparser support */ GSList *subparser_stack; /* (GMarkupRecursionTracker *) */ const char *subparser_element; gpointer held_user_data; }; /* * Helpers to reduce our allocation overhead, we have * a well defined allocation lifecycle. */ static GSList * get_list_node (GMarkupParseContext *context, gpointer data) { GSList *node; if (context->spare_list_nodes != NULL) { node = context->spare_list_nodes; context->spare_list_nodes = g_slist_remove_link (context->spare_list_nodes, node); } else node = g_slist_alloc(); node->data = data; return node; } static void free_list_node (GMarkupParseContext *context, GSList *node) { node->data = NULL; context->spare_list_nodes = g_slist_concat (node, context->spare_list_nodes); } static inline void string_blank (GString *string) { string->str[0] = '\0'; string->len = 0; } /** * g_markup_parse_context_new: * @parser: a #GMarkupParser * @flags: one or more #GMarkupParseFlags * @user_data: user data to pass to #GMarkupParser functions * @user_data_dnotify: user data destroy notifier called when the parse context is freed * * Creates a new parse context. A parse context is used to parse * marked-up documents. You can feed any number of documents into * a context, as long as no errors occur; once an error occurs, * the parse context can't continue to parse text (you have to free it * and create a new parse context). * * Return value: a new #GMarkupParseContext **/ GMarkupParseContext * g_markup_parse_context_new (const GMarkupParser *parser, GMarkupParseFlags flags, gpointer user_data, GDestroyNotify user_data_dnotify) { GMarkupParseContext *context; g_return_val_if_fail (parser != NULL, NULL); context = g_new (GMarkupParseContext, 1); context->parser = parser; context->flags = flags; context->user_data = user_data; context->dnotify = user_data_dnotify; context->line_number = 1; context->char_number = 1; context->partial_chunk = NULL; context->spare_chunks = NULL; context->spare_list_nodes = NULL; context->state = STATE_START; context->tag_stack = NULL; context->tag_stack_gstr = NULL; context->attr_names = NULL; context->attr_values = NULL; context->cur_attr = -1; context->alloc_attrs = 0; context->current_text = NULL; context->current_text_len = -1; context->current_text_end = NULL; context->start = NULL; context->iter = NULL; context->document_empty = TRUE; context->parsing = FALSE; context->awaiting_pop = FALSE; context->subparser_stack = NULL; context->subparser_element = NULL; /* this is only looked at if awaiting_pop = TRUE. initialise anyway. */ context->held_user_data = NULL; context->balance = 0; return context; } static void string_full_free (gpointer ptr, gpointer user_data) { g_string_free (ptr, TRUE); } static void clear_attributes (GMarkupParseContext *context); /** * g_markup_parse_context_free: * @context: a #GMarkupParseContext * * Frees a #GMarkupParseContext. Can't be called from inside * one of the #GMarkupParser functions. Can't be called while * a subparser is pushed. **/ void g_markup_parse_context_free (GMarkupParseContext *context) { g_return_if_fail (context != NULL); g_return_if_fail (!context->parsing); g_return_if_fail (!context->subparser_stack); g_return_if_fail (!context->awaiting_pop); if (context->dnotify) (* context->dnotify) (context->user_data); clear_attributes (context); g_free (context->attr_names); g_free (context->attr_values); g_slist_foreach (context->tag_stack_gstr, string_full_free, NULL); g_slist_free (context->tag_stack_gstr); g_slist_free (context->tag_stack); g_slist_foreach (context->spare_chunks, string_full_free, NULL); g_slist_free (context->spare_chunks); g_slist_free (context->spare_list_nodes); if (context->partial_chunk) g_string_free (context->partial_chunk, TRUE); g_free (context); } static void pop_subparser_stack (GMarkupParseContext *context); static void mark_error (GMarkupParseContext *context, GError *error) { context->state = STATE_ERROR; if (context->parser->error) (*context->parser->error) (context, error, context->user_data); /* report the error all the way up to free all the user-data */ while (context->subparser_stack) { pop_subparser_stack (context); context->awaiting_pop = FALSE; /* already been freed */ if (context->parser->error) (*context->parser->error) (context, error, context->user_data); } } static void set_error (GMarkupParseContext *context, GError **error, GMarkupError code, const gchar *format, ...) G_GNUC_PRINTF (4, 5); static void set_error_literal (GMarkupParseContext *context, GError **error, GMarkupError code, const gchar *message) { GError *tmp_error; tmp_error = g_error_new_literal (G_MARKUP_ERROR, code, message); g_prefix_error (&tmp_error, _("Error on line %d char %d: "), context->line_number, context->char_number); mark_error (context, tmp_error); g_propagate_error (error, tmp_error); } static void set_error (GMarkupParseContext *context, GError **error, GMarkupError code, const gchar *format, ...) { gchar *s; gchar *s_valid; va_list args; va_start (args, format); s = g_strdup_vprintf (format, args); va_end (args); /* Make sure that the GError message is valid UTF-8 even if it is * complaining about invalid UTF-8 in the markup: */ s_valid = _g_utf8_make_valid (s); set_error_literal (context, error, code, s); g_free (s); g_free (s_valid); } static void propagate_error (GMarkupParseContext *context, GError **dest, GError *src) { if (context->flags & G_MARKUP_PREFIX_ERROR_POSITION) g_prefix_error (&src, _("Error on line %d char %d: "), context->line_number, context->char_number); mark_error (context, src); g_propagate_error (dest, src); } #define IS_COMMON_NAME_END_CHAR(c) \ ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ') static gboolean slow_name_validate (GMarkupParseContext *context, const char *name, GError **error) { const char *p = name; if (!g_utf8_validate (name, strlen (name), NULL)) { set_error (context, error, G_MARKUP_ERROR_BAD_UTF8, _("Invalid UTF-8 encoded text in name - not valid '%s'"), name); return FALSE; } if (!(g_ascii_isalpha (*p) || (!IS_COMMON_NAME_END_CHAR (*p) && (*p == '_' || *p == ':' || g_unichar_isalpha (g_utf8_get_char (p)))))) { set_error (context, error, G_MARKUP_ERROR_PARSE, _("'%s' is not a valid name "), name); return FALSE; } for (p = g_utf8_next_char (name); *p != '\0'; p = g_utf8_next_char (p)) { /* is_name_char */ if (!(g_ascii_isalnum (*p) || (!IS_COMMON_NAME_END_CHAR (*p) && (*p == '.' || *p == '-' || *p == '_' || *p == ':' || g_unichar_isalpha (g_utf8_get_char (p)))))) { set_error (context, error, G_MARKUP_ERROR_PARSE, _("'%s' is not a valid name: '%c' "), name, *p); return FALSE; } } return TRUE; } /* * Use me for elements, attributes etc. */ static gboolean name_validate (GMarkupParseContext *context, const char *name, GError **error) { char mask; const char *p; /* name start char */ p = name; if (G_UNLIKELY (IS_COMMON_NAME_END_CHAR (*p) || !(g_ascii_isalpha (*p) || *p == '_' || *p == ':'))) goto slow_validate; for (mask = *p++; *p != '\0'; p++) { mask |= *p; /* is_name_char */ if (G_UNLIKELY (!(g_ascii_isalnum (*p) || (!IS_COMMON_NAME_END_CHAR (*p) && (*p == '.' || *p == '-' || *p == '_' || *p == ':'))))) goto slow_validate; } if (mask & 0x80) /* un-common / non-ascii */ goto slow_validate; return TRUE; slow_validate: return slow_name_validate (context, name, error); } static gboolean text_validate (GMarkupParseContext *context, const char *p, int len, GError **error) { if (!g_utf8_validate (p, len, NULL)) { set_error (context, error, G_MARKUP_ERROR_BAD_UTF8, _("Invalid UTF-8 encoded text in name - not valid '%s'"), p); return FALSE; } else return TRUE; } static gchar* char_str (gunichar c, gchar *buf) { memset (buf, 0, 8); g_unichar_to_utf8 (c, buf); return buf; } static gchar* utf8_str (const gchar *utf8, gchar *buf) { char_str (g_utf8_get_char (utf8), buf); return buf; } static void set_unescape_error (GMarkupParseContext *context, GError **error, const gchar *remaining_text, GMarkupError code, const gchar *format, ...) { GError *tmp_error; gchar *s; va_list args; gint remaining_newlines; const gchar *p; remaining_newlines = 0; p = remaining_text; while (*p != '\0') { if (*p == '\n') ++remaining_newlines; ++p; } va_start (args, format); s = g_strdup_vprintf (format, args); va_end (args); tmp_error = g_error_new (G_MARKUP_ERROR, code, _("Error on line %d: %s"), context->line_number - remaining_newlines, s); g_free (s); mark_error (context, tmp_error); g_propagate_error (error, tmp_error); } /* * re-write the GString in-place, unescaping anything that escaped. * most XML does not contain entities, or escaping. */ static gboolean unescape_gstring_inplace (GMarkupParseContext *context, GString *string, gboolean *is_ascii, GError **error) { char mask, *to; int line_num = 1; const char *from; gboolean normalize_attribute; *is_ascii = FALSE; /* are we unescaping an attribute or not ? */ if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ || context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ) normalize_attribute = TRUE; else normalize_attribute = FALSE; /* * Meeks' theorum: unescaping can only shrink text. * for < etc. this is obvious, for  more * thought is required, but this is patently so. */ mask = 0; for (from = to = string->str; *from != '\0'; from++, to++) { *to = *from; mask |= *to; if (*to == '\n') line_num++; if (normalize_attribute && (*to == '\t' || *to == '\n')) *to = ' '; if (*to == '\r') { *to = normalize_attribute ? ' ' : '\n'; if (from[1] == '\n') from++; } if (*from == '&') { from++; if (*from == '#') { gboolean is_hex = FALSE; gulong l; gchar *end = NULL; from++; if (*from == 'x') { is_hex = TRUE; from++; } /* digit is between start and p */ errno = 0; if (is_hex) l = strtoul (from, &end, 16); else l = strtoul (from, &end, 10); if (end == from || errno != 0) { set_unescape_error (context, error, from, G_MARKUP_ERROR_PARSE, _("Failed to parse '%-.*s', which " "should have been a digit " "inside a character reference " "(ê for example) - perhaps " "the digit is too large"), end - from, from); return FALSE; } else if (*end != ';') { set_unescape_error (context, error, from, G_MARKUP_ERROR_PARSE, _("Character reference did not end with a " "semicolon; " "most likely you used an ampersand " "character without intending to start " "an entity - escape ampersand as &")); return FALSE; } else { /* characters XML 1.1 permits */ if ((0 < l && l <= 0xD7FF) || (0xE000 <= l && l <= 0xFFFD) || (0x10000 <= l && l <= 0x10FFFF)) { gchar buf[8]; char_str (l, buf); strcpy (to, buf); to += strlen (buf) - 1; from = end; if (l >= 0x80) /* not ascii */ mask |= 0x80; } else { set_unescape_error (context, error, from, G_MARKUP_ERROR_PARSE, _("Character reference '%-.*s' does not " "encode a permitted character"), end - from, from); return FALSE; } } } else if (strncmp (from, "lt;", 3) == 0) { *to = '<'; from += 2; } else if (strncmp (from, "gt;", 3) == 0) { *to = '>'; from += 2; } else if (strncmp (from, "amp;", 4) == 0) { *to = '&'; from += 3; } else if (strncmp (from, "quot;", 5) == 0) { *to = '"'; from += 4; } else if (strncmp (from, "apos;", 5) == 0) { *to = '\''; from += 4; } else { if (*from == ';') set_unescape_error (context, error, from, G_MARKUP_ERROR_PARSE, _("Empty entity '&;' seen; valid " "entities are: & " < > '")); else { const char *end = strchr (from, ';'); if (end) set_unescape_error (context, error, from, G_MARKUP_ERROR_PARSE, _("Entity name '%-.*s' is not known"), end-from, from); else set_unescape_error (context, error, from, G_MARKUP_ERROR_PARSE, _("Entity did not end with a semicolon; " "most likely you used an ampersand " "character without intending to start " "an entity - escape ampersand as &")); } return FALSE; } } } g_assert (to - string->str <= string->len); if (to - string->str != string->len) g_string_truncate (string, to - string->str); *is_ascii = !(mask & 0x80); return TRUE; } static inline gboolean advance_char (GMarkupParseContext *context) { context->iter++; context->char_number++; if (G_UNLIKELY (context->iter == context->current_text_end)) return FALSE; else if (G_UNLIKELY (*context->iter == '\n')) { context->line_number++; context->char_number = 1; } return TRUE; } static inline gboolean xml_isspace (char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; } static void skip_spaces (GMarkupParseContext *context) { do { if (!xml_isspace (*context->iter)) return; } while (advance_char (context)); } static void advance_to_name_end (GMarkupParseContext *context) { do { if (IS_COMMON_NAME_END_CHAR (*(context->iter))) return; if (xml_isspace (*(context->iter))) return; } while (advance_char (context)); } static void release_chunk (GMarkupParseContext *context, GString *str) { GSList *node; if (!str) return; if (str->allocated_len > 256) { /* large strings are unusual and worth freeing */ g_string_free (str, TRUE); return; } string_blank (str); node = get_list_node (context, str); context->spare_chunks = g_slist_concat (node, context->spare_chunks); } static void add_to_partial (GMarkupParseContext *context, const gchar *text_start, const gchar *text_end) { if (context->partial_chunk == NULL) { /* allocate a new chunk to parse into */ if (context->spare_chunks != NULL) { GSList *node = context->spare_chunks; context->spare_chunks = g_slist_remove_link (context->spare_chunks, node); context->partial_chunk = node->data; free_list_node (context, node); } else context->partial_chunk = g_string_sized_new (MAX (28, text_end - text_start)); } if (text_start != text_end) g_string_insert_len (context->partial_chunk, -1, text_start, text_end - text_start); } static inline void truncate_partial (GMarkupParseContext *context) { if (context->partial_chunk != NULL) string_blank (context->partial_chunk); } static inline const gchar* current_element (GMarkupParseContext *context) { return context->tag_stack->data; } static void pop_subparser_stack (GMarkupParseContext *context) { GMarkupRecursionTracker *tracker; g_assert (context->subparser_stack); tracker = context->subparser_stack->data; context->awaiting_pop = TRUE; context->held_user_data = context->user_data; context->user_data = tracker->prev_user_data; context->parser = tracker->prev_parser; context->subparser_element = tracker->prev_element; g_slice_free (GMarkupRecursionTracker, tracker); context->subparser_stack = g_slist_delete_link (context->subparser_stack, context->subparser_stack); } static void push_partial_as_tag (GMarkupParseContext *context) { GString *str = context->partial_chunk; /* sadly, this is exported by gmarkup_get_element_stack as-is */ context->tag_stack = g_slist_concat (get_list_node (context, str->str), context->tag_stack); context->tag_stack_gstr = g_slist_concat (get_list_node (context, str), context->tag_stack_gstr); context->partial_chunk = NULL; } static void pop_tag (GMarkupParseContext *context) { GSList *nodea, *nodeb; nodea = context->tag_stack; nodeb = context->tag_stack_gstr; release_chunk (context, nodeb->data); context->tag_stack = g_slist_remove_link (context->tag_stack, nodea); context->tag_stack_gstr = g_slist_remove_link (context->tag_stack_gstr, nodeb); free_list_node (context, nodea); free_list_node (context, nodeb); } static void possibly_finish_subparser (GMarkupParseContext *context) { if (current_element (context) == context->subparser_element) pop_subparser_stack (context); } static void ensure_no_outstanding_subparser (GMarkupParseContext *context) { if (context->awaiting_pop) g_critical ("During the first end_element call after invoking a " "subparser you must pop the subparser stack and handle " "the freeing of the subparser user_data. This can be " "done by calling the end function of the subparser. " "Very probably, your program just leaked memory."); /* let valgrind watch the pointer disappear... */ context->held_user_data = NULL; context->awaiting_pop = FALSE; } static const gchar* current_attribute (GMarkupParseContext *context) { g_assert (context->cur_attr >= 0); return context->attr_names[context->cur_attr]->str; } static void add_attribute (GMarkupParseContext *context, GString *str) { if (context->cur_attr + 2 >= context->alloc_attrs) { context->alloc_attrs += 5; /* silly magic number */ context->attr_names = g_realloc (context->attr_names, sizeof(GString*)*context->alloc_attrs); context->attr_values = g_realloc (context->attr_values, sizeof(GString*)*context->alloc_attrs); } context->cur_attr++; context->attr_names[context->cur_attr] = str; context->attr_values[context->cur_attr] = NULL; context->attr_names[context->cur_attr+1] = NULL; context->attr_values[context->cur_attr+1] = NULL; } static void clear_attributes (GMarkupParseContext *context) { /* Go ahead and free the attributes. */ for (; context->cur_attr >= 0; context->cur_attr--) { int pos = context->cur_attr; release_chunk (context, context->attr_names[pos]); release_chunk (context, context->attr_values[pos]); context->attr_names[pos] = context->attr_values[pos] = NULL; } g_assert (context->cur_attr == -1); g_assert (context->attr_names == NULL || context->attr_names[0] == NULL); g_assert (context->attr_values == NULL || context->attr_values[0] == NULL); } /* This has to be a separate function to ensure the alloca's are unwound on exit - otherwise we grow & blow the stack with large documents */ static inline void emit_start_element (GMarkupParseContext *context, GError **error) { int i; const gchar *start_name; const gchar **attr_names; const gchar **attr_values; GError *tmp_error; attr_names = g_newa (const gchar *, context->cur_attr + 2); attr_values = g_newa (const gchar *, context->cur_attr + 2); for (i = 0; i < context->cur_attr + 1; i++) { attr_names[i] = context->attr_names[i]->str; attr_values[i] = context->attr_values[i]->str; } attr_names[i] = NULL; attr_values[i] = NULL; /* Call user callback for element start */ tmp_error = NULL; start_name = current_element (context); if (context->parser->start_element && name_validate (context, start_name, error)) (* context->parser->start_element) (context, start_name, (const gchar **)attr_names, (const gchar **)attr_values, context->user_data, &tmp_error); clear_attributes (context); if (tmp_error != NULL) propagate_error (context, error, tmp_error); } /** * g_markup_parse_context_parse: * @context: a #GMarkupParseContext * @text: chunk of text to parse * @text_len: length of @text in bytes * @error: return location for a #GError * * Feed some data to the #GMarkupParseContext. The data need not * be valid UTF-8; an error will be signaled if it's invalid. * The data need not be an entire document; you can feed a document * into the parser incrementally, via multiple calls to this function. * Typically, as you receive data from a network connection or file, * you feed each received chunk of data into this function, aborting * the process if an error occurs. Once an error is reported, no further * data may be fed to the #GMarkupParseContext; all errors are fatal. * * Return value: %FALSE if an error occurred, %TRUE on success **/ gboolean g_markup_parse_context_parse (GMarkupParseContext *context, const gchar *text, gssize text_len, GError **error) { g_return_val_if_fail (context != NULL, FALSE); g_return_val_if_fail (text != NULL, FALSE); g_return_val_if_fail (context->state != STATE_ERROR, FALSE); g_return_val_if_fail (!context->parsing, FALSE); if (text_len < 0) text_len = strlen (text); if (text_len == 0) return TRUE; context->parsing = TRUE; context->current_text = text; context->current_text_len = text_len; context->current_text_end = context->current_text + text_len; context->iter = context->current_text; context->start = context->iter; if (context->current_text_len == 0) goto finished; while (context->iter != context->current_text_end) { switch (context->state) { case STATE_START: /* Possible next state: AFTER_OPEN_ANGLE */ g_assert (context->tag_stack == NULL); /* whitespace is ignored outside of any elements */ skip_spaces (context); if (context->iter != context->current_text_end) { if (*context->iter == '<') { /* Move after the open angle */ advance_char (context); context->state = STATE_AFTER_OPEN_ANGLE; /* this could start a passthrough */ context->start = context->iter; /* document is now non-empty */ context->document_empty = FALSE; } else { set_error_literal (context, error, G_MARKUP_ERROR_PARSE, _("Document must begin with an element (e.g. <book>)")); } } break; case STATE_AFTER_OPEN_ANGLE: /* Possible next states: INSIDE_OPEN_TAG_NAME, * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH */ if (*context->iter == '?' || *context->iter == '!') { /* include < in the passthrough */ const gchar *openangle = "<"; add_to_partial (context, openangle, openangle + 1); context->start = context->iter; context->balance = 1; context->state = STATE_INSIDE_PASSTHROUGH; } else if (*context->iter == '/') { /* move after it */ advance_char (context); context->state = STATE_AFTER_CLOSE_TAG_SLASH; } else if (!IS_COMMON_NAME_END_CHAR (*(context->iter))) { context->state = STATE_INSIDE_OPEN_TAG_NAME; /* start of tag name */ context->start = context->iter; } else { gchar buf[8]; set_error (context, error, G_MARKUP_ERROR_PARSE, _("'%s' is not a valid character following " "a '<' character; it may not begin an " "element name"), utf8_str (context->iter, buf)); } break; /* The AFTER_CLOSE_ANGLE state is actually sort of * broken, because it doesn't correspond to a range * of characters in the input stream as the others do, * and thus makes things harder to conceptualize */ case STATE_AFTER_CLOSE_ANGLE: /* Possible next states: INSIDE_TEXT, STATE_START */ if (context->tag_stack == NULL) { context->start = NULL; context->state = STATE_START; } else { context->start = context->iter; context->state = STATE_INSIDE_TEXT; } break; case STATE_AFTER_ELISION_SLASH: /* Possible next state: AFTER_CLOSE_ANGLE */ { /* We need to pop the tag stack and call the end_element * function, since this is the close tag */ GError *tmp_error = NULL; g_assert (context->tag_stack != NULL); possibly_finish_subparser (context); tmp_error = NULL; if (context->parser->end_element) (* context->parser->end_element) (context, current_element (context), context->user_data, &tmp_error); ensure_no_outstanding_subparser (context); if (tmp_error) { mark_error (context, tmp_error); g_propagate_error (error, tmp_error); } else { if (*context->iter == '>') { /* move after the close angle */ advance_char (context); context->state = STATE_AFTER_CLOSE_ANGLE; } else { gchar buf[8]; set_error (context, error, G_MARKUP_ERROR_PARSE, _("Odd character '%s', expected a '>' character " "to end the empty-element tag '%s'"), utf8_str (context->iter, buf), current_element (context)); } } pop_tag (context); } break; case STATE_INSIDE_OPEN_TAG_NAME: /* Possible next states: BETWEEN_ATTRIBUTES */ /* if there's a partial chunk then it's the first part of the * tag name. If there's a context->start then it's the start * of the tag name in current_text, the partial chunk goes * before that start though. */ advance_to_name_end (context); if (context->iter == context->current_text_end) { /* The name hasn't necessarily ended. Merge with * partial chunk, leave state unchanged. */ add_to_partial (context, context->start, context->iter); } else { /* The name has ended. Combine it with the partial chunk * if any; push it on the stack; enter next state. */ add_to_partial (context, context->start, context->iter); push_partial_as_tag (context); context->state = STATE_BETWEEN_ATTRIBUTES; context->start = NULL; } break; case STATE_INSIDE_ATTRIBUTE_NAME: /* Possible next states: AFTER_ATTRIBUTE_NAME */ advance_to_name_end (context); add_to_partial (context, context->start, context->iter); /* read the full name, if we enter the equals sign state * then add the attribute to the list (without the value), * otherwise store a partial chunk to be prepended later. */ if (context->iter != context->current_text_end) context->state = STATE_AFTER_ATTRIBUTE_NAME; break; case STATE_AFTER_ATTRIBUTE_NAME: /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */ skip_spaces (context); if (context->iter != context->current_text_end) { /* The name has ended. Combine it with the partial chunk * if any; push it on the stack; enter next state. */ if (!name_validate (context, context->partial_chunk->str, error)) break; add_attribute (context, context->partial_chunk); context->partial_chunk = NULL; context->start = NULL; if (*context->iter == '=') { advance_char (context); context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN; } else { gchar buf[8]; set_error (context, error, G_MARKUP_ERROR_PARSE, _("Odd character '%s', expected a '=' after " "attribute name '%s' of element '%s'"), utf8_str (context->iter, buf), current_attribute (context), current_element (context)); } } break; case STATE_BETWEEN_ATTRIBUTES: /* Possible next states: AFTER_CLOSE_ANGLE, * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME */ skip_spaces (context); if (context->iter != context->current_text_end) { if (*context->iter == '/') { advance_char (context); context->state = STATE_AFTER_ELISION_SLASH; } else if (*context->iter == '>') { advance_char (context); context->state = STATE_AFTER_CLOSE_ANGLE; } else if (!IS_COMMON_NAME_END_CHAR (*(context->iter))) { context->state = STATE_INSIDE_ATTRIBUTE_NAME; /* start of attribute name */ context->start = context->iter; } else { gchar buf[8]; set_error (context, error, G_MARKUP_ERROR_PARSE, _("Odd character '%s', expected a '>' or '/' " "character to end the start tag of " "element '%s', or optionally an attribute; " "perhaps you used an invalid character in " "an attribute name"), utf8_str (context->iter, buf), current_element (context)); } /* If we're done with attributes, invoke * the start_element callback */ if (context->state == STATE_AFTER_ELISION_SLASH || context->state == STATE_AFTER_CLOSE_ANGLE) emit_start_element (context, error); } break; case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */ skip_spaces (context); if (context->iter != context->current_text_end) { if (*context->iter == '"') { advance_char (context); context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ; context->start = context->iter; } else if (*context->iter == '\'') { advance_char (context); context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ; context->start = context->iter; } else { gchar buf[8]; set_error (context, error, G_MARKUP_ERROR_PARSE, _("Odd character '%s', expected an open quote mark " "after the equals sign when giving value for " "attribute '%s' of element '%s'"), utf8_str (context->iter, buf), current_attribute (context), current_element (context)); } } break; case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: /* Possible next states: BETWEEN_ATTRIBUTES */ { gchar delim; if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ) { delim = '\''; } else { delim = '"'; } do { if (*context->iter == delim) break; } while (advance_char (context)); } if (context->iter == context->current_text_end) { /* The value hasn't necessarily ended. Merge with * partial chunk, leave state unchanged. */ add_to_partial (context, context->start, context->iter); } else { gboolean is_ascii; /* The value has ended at the quote mark. Combine it * with the partial chunk if any; set it for the current * attribute. */ add_to_partial (context, context->start, context->iter); g_assert (context->cur_attr >= 0); if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) && (is_ascii || text_validate (context, context->partial_chunk->str, context->partial_chunk->len, error))) { /* success, advance past quote and set state. */ context->attr_values[context->cur_attr] = context->partial_chunk; context->partial_chunk = NULL; advance_char (context); context->state = STATE_BETWEEN_ATTRIBUTES; context->start = NULL; } truncate_partial (context); } break; case STATE_INSIDE_TEXT: /* Possible next states: AFTER_OPEN_ANGLE */ do { if (*context->iter == '<') break; } while (advance_char (context)); /* The text hasn't necessarily ended. Merge with * partial chunk, leave state unchanged. */ add_to_partial (context, context->start, context->iter); if (context->iter != context->current_text_end) { gboolean is_ascii; /* The text has ended at the open angle. Call the text * callback. */ if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) && (is_ascii || text_validate (context, context->partial_chunk->str, context->partial_chunk->len, error))) { GError *tmp_error = NULL; if (context->parser->text) (*context->parser->text) (context, context->partial_chunk->str, context->partial_chunk->len, context->user_data, &tmp_error); if (tmp_error == NULL) { /* advance past open angle and set state. */ advance_char (context); context->state = STATE_AFTER_OPEN_ANGLE; /* could begin a passthrough */ context->start = context->iter; } else propagate_error (context, error, tmp_error); } truncate_partial (context); } break; case STATE_AFTER_CLOSE_TAG_SLASH: /* Possible next state: INSIDE_CLOSE_TAG_NAME */ if (!IS_COMMON_NAME_END_CHAR (*(context->iter))) { context->state = STATE_INSIDE_CLOSE_TAG_NAME; /* start of tag name */ context->start = context->iter; } else { gchar buf[8]; set_error (context, error, G_MARKUP_ERROR_PARSE, _("'%s' is not a valid character following " "the characters '</'; '%s' may not begin an " "element name"), utf8_str (context->iter, buf), utf8_str (context->iter, buf)); } break; case STATE_INSIDE_CLOSE_TAG_NAME: /* Possible next state: AFTER_CLOSE_TAG_NAME */ advance_to_name_end (context); add_to_partial (context, context->start, context->iter); if (context->iter != context->current_text_end) context->state = STATE_AFTER_CLOSE_TAG_NAME; break; case STATE_AFTER_CLOSE_TAG_NAME: /* Possible next state: AFTER_CLOSE_TAG_SLASH */ skip_spaces (context); if (context->iter != context->current_text_end) { GString *close_name; close_name = context->partial_chunk; context->partial_chunk = NULL; if (*context->iter != '>') { gchar buf[8]; set_error (context, error, G_MARKUP_ERROR_PARSE, _("'%s' is not a valid character following " "the close element name '%s'; the allowed " "character is '>'"), utf8_str (context->iter, buf), close_name->str); } else if (context->tag_stack == NULL) { set_error (context, error, G_MARKUP_ERROR_PARSE, _("Element '%s' was closed, no element " "is currently open"), close_name->str); } else if (strcmp (close_name->str, current_element (context)) != 0) { set_error (context, error, G_MARKUP_ERROR_PARSE, _("Element '%s' was closed, but the currently " "open element is '%s'"), close_name->str, current_element (context)); } else { GError *tmp_error; advance_char (context); context->state = STATE_AFTER_CLOSE_ANGLE; context->start = NULL; possibly_finish_subparser (context); /* call the end_element callback */ tmp_error = NULL; if (context->parser->end_element) (* context->parser->end_element) (context, close_name->str, context->user_data, &tmp_error); ensure_no_outstanding_subparser (context); pop_tag (context); if (tmp_error) propagate_error (context, error, tmp_error); } context->partial_chunk = close_name; truncate_partial (context); } break; case STATE_INSIDE_PASSTHROUGH: /* Possible next state: AFTER_CLOSE_ANGLE */ do { if (*context->iter == '<') context->balance++; if (*context->iter == '>') { gchar *str; gsize len; context->balance--; add_to_partial (context, context->start, context->iter); context->start = context->iter; str = context->partial_chunk->str; len = context->partial_chunk->len; if (str[1] == '?' && str[len - 1] == '?') break; if (strncmp (str, "<!--", 4) == 0 && strcmp (str + len - 2, "--") == 0) break; if (strncmp (str, "<![CDATA[", 9) == 0 && strcmp (str + len - 2, "]]") == 0) break; if (strncmp (str, "<!DOCTYPE", 9) == 0 && context->balance == 0) break; } } while (advance_char (context)); if (context->iter == context->current_text_end) { /* The passthrough hasn't necessarily ended. Merge with * partial chunk, leave state unchanged. */ add_to_partial (context, context->start, context->iter); } else { /* The passthrough has ended at the close angle. Combine * it with the partial chunk if any. Call the passthrough * callback. Note that the open/close angles are * included in the text of the passthrough. */ GError *tmp_error = NULL; advance_char (context); /* advance past close angle */ add_to_partial (context, context->start, context->iter); if (context->flags & G_MARKUP_TREAT_CDATA_AS_TEXT && strncmp (context->partial_chunk->str, "<![CDATA[", 9) == 0) { if (context->parser->text && text_validate (context, context->partial_chunk->str + 9, context->partial_chunk->len - 12, error)) (*context->parser->text) (context, context->partial_chunk->str + 9, context->partial_chunk->len - 12, context->user_data, &tmp_error); } else if (context->parser->passthrough && text_validate (context, context->partial_chunk->str, context->partial_chunk->len, error)) (*context->parser->passthrough) (context, context->partial_chunk->str, context->partial_chunk->len, context->user_data, &tmp_error); truncate_partial (context); if (tmp_error == NULL) { context->state = STATE_AFTER_CLOSE_ANGLE; context->start = context->iter; /* could begin text */ } else propagate_error (context, error, tmp_error); } break; case STATE_ERROR: goto finished; break; default: g_assert_not_reached (); break; } } finished: context->parsing = FALSE; return context->state != STATE_ERROR; } /** * g_markup_parse_context_end_parse: * @context: a #GMarkupParseContext * @error: return location for a #GError * * Signals to the #GMarkupParseContext that all data has been * fed into the parse context with g_markup_parse_context_parse(). * This function reports an error if the document isn't complete, * for example if elements are still open. * * Return value: %TRUE on success, %FALSE if an error was set **/ gboolean g_markup_parse_context_end_parse (GMarkupParseContext *context, GError **error) { g_return_val_if_fail (context != NULL, FALSE); g_return_val_if_fail (!context->parsing, FALSE); g_return_val_if_fail (context->state != STATE_ERROR, FALSE); if (context->partial_chunk != NULL) { g_string_free (context->partial_chunk, TRUE); context->partial_chunk = NULL; } if (context->document_empty) { set_error_literal (context, error, G_MARKUP_ERROR_EMPTY, _("Document was empty or contained only whitespace")); return FALSE; } context->parsing = TRUE; switch (context->state) { case STATE_START: /* Nothing to do */ break; case STATE_AFTER_OPEN_ANGLE: set_error_literal (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly just after an open angle bracket '<'")); break; case STATE_AFTER_CLOSE_ANGLE: if (context->tag_stack != NULL) { /* Error message the same as for INSIDE_TEXT */ set_error (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly with elements still open - " "'%s' was the last element opened"), current_element (context)); } break; case STATE_AFTER_ELISION_SLASH: set_error (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly, expected to see a close angle " "bracket ending the tag <%s/>"), current_element (context)); break; case STATE_INSIDE_OPEN_TAG_NAME: set_error_literal (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly inside an element name")); break; case STATE_INSIDE_ATTRIBUTE_NAME: case STATE_AFTER_ATTRIBUTE_NAME: set_error_literal (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly inside an attribute name")); break; case STATE_BETWEEN_ATTRIBUTES: set_error_literal (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly inside an element-opening " "tag.")); break; case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: set_error_literal (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly after the equals sign " "following an attribute name; no attribute value")); break; case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: set_error_literal (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly while inside an attribute " "value")); break; case STATE_INSIDE_TEXT: g_assert (context->tag_stack != NULL); set_error (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly with elements still open - " "'%s' was the last element opened"), current_element (context)); break; case STATE_AFTER_CLOSE_TAG_SLASH: case STATE_INSIDE_CLOSE_TAG_NAME: case STATE_AFTER_CLOSE_TAG_NAME: set_error (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly inside the close tag for " "element '%s'"), current_element (context)); break; case STATE_INSIDE_PASSTHROUGH: set_error_literal (context, error, G_MARKUP_ERROR_PARSE, _("Document ended unexpectedly inside a comment or " "processing instruction")); break; case STATE_ERROR: default: g_assert_not_reached (); break; } context->parsing = FALSE; return context->state != STATE_ERROR; } /** * g_markup_parse_context_get_element: * @context: a #GMarkupParseContext * @returns: the name of the currently open element, or %NULL * * Retrieves the name of the currently open element. * * If called from the start_element or end_element handlers this will * give the element_name as passed to those functions. For the parent * elements, see g_markup_parse_context_get_element_stack(). * * Since: 2.2 **/ G_CONST_RETURN gchar * g_markup_parse_context_get_element (GMarkupParseContext *context) { g_return_val_if_fail (context != NULL, NULL); if (context->tag_stack == NULL) return NULL; else return current_element (context); } /** * g_markup_parse_context_get_element_stack: * @context: a #GMarkupParseContext * * Retrieves the element stack from the internal state of the parser. * The returned #GSList is a list of strings where the first item is * the currently open tag (as would be returned by * g_markup_parse_context_get_element()) and the next item is its * immediate parent. * * This function is intended to be used in the start_element and * end_element handlers where g_markup_parse_context_get_element() * would merely return the name of the element that is being * processed. * * Returns: the element stack, which must not be modified * * Since: 2.16 **/ G_CONST_RETURN GSList * g_markup_parse_context_get_element_stack (GMarkupParseContext *context) { g_return_val_if_fail (context != NULL, NULL); return context->tag_stack; } /** * g_markup_parse_context_get_position: * @context: a #GMarkupParseContext * @line_number: return location for a line number, or %NULL * @char_number: return location for a char-on-line number, or %NULL * * Retrieves the current line number and the number of the character on * that line. Intended for use in error messages; there are no strict * semantics for what constitutes the "current" line number other than * "the best number we could come up with for error messages." * **/ void g_markup_parse_context_get_position (GMarkupParseContext *context, gint *line_number, gint *char_number) { g_return_if_fail (context != NULL); if (line_number) *line_number = context->line_number; if (char_number) *char_number = context->char_number; } /** * g_markup_parse_context_get_user_data: * @context: a #GMarkupParseContext * * Returns the user_data associated with @context. This will either * be the user_data that was provided to g_markup_parse_context_new() * or to the most recent call of g_markup_parse_context_push(). * * Returns: the provided user_data. The returned data belongs to * the markup context and will be freed when g_markup_context_free() * is called. * * Since: 2.18 **/ gpointer g_markup_parse_context_get_user_data (GMarkupParseContext *context) { return context->user_data; } /** * g_markup_parse_context_push: * @context: a #GMarkupParseContext * @parser: a #GMarkupParser * @user_data: user data to pass to #GMarkupParser functions * * Temporarily redirects markup data to a sub-parser. * * This function may only be called from the start_element handler of * a #GMarkupParser. It must be matched with a corresponding call to * g_markup_parse_context_pop() in the matching end_element handler * (except in the case that the parser aborts due to an error). * * All tags, text and other data between the matching tags is * redirected to the subparser given by @parser. @user_data is used * as the user_data for that parser. @user_data is also passed to the * error callback in the event that an error occurs. This includes * errors that occur in subparsers of the subparser. * * The end tag matching the start tag for which this call was made is * handled by the previous parser (which is given its own user_data) * which is why g_markup_parse_context_pop() is provided to allow "one * last access" to the @user_data provided to this function. In the * case of error, the @user_data provided here is passed directly to * the error callback of the subparser and g_markup_parse_context() * should not be called. In either case, if @user_data was allocated * then it ought to be freed from both of these locations. * * This function is not intended to be directly called by users * interested in invoking subparsers. Instead, it is intended to be * used by the subparsers themselves to implement a higher-level * interface. * * As an example, see the following implementation of a simple * parser that counts the number of tags encountered. * * |[ * typedef struct * { * gint tag_count; * } CounterData; * * static void * counter_start_element (GMarkupParseContext *context, * const gchar *element_name, * const gchar **attribute_names, * const gchar **attribute_values, * gpointer user_data, * GError **error) * { * CounterData *data = user_data; * * data->tag_count++; * } * * static void * counter_error (GMarkupParseContext *context, * GError *error, * gpointer user_data) * { * CounterData *data = user_data; * * g_slice_free (CounterData, data); * } * * static GMarkupParser counter_subparser = * { * counter_start_element, * NULL, * NULL, * NULL, * counter_error * }; * ]| * * In order to allow this parser to be easily used as a subparser, the * following interface is provided: * * |[ * void * start_counting (GMarkupParseContext *context) * { * CounterData *data = g_slice_new (CounterData); * * data->tag_count = 0; * g_markup_parse_context_push (context, &counter_subparser, data); * } * * gint * end_counting (GMarkupParseContext *context) * { * CounterData *data = g_markup_parse_context_pop (context); * int result; * * result = data->tag_count; * g_slice_free (CounterData, data); * * return result; * } * ]| * * The subparser would then be used as follows: * * |[ * static void start_element (context, element_name, ...) * { * if (strcmp (element_name, "count-these") == 0) * start_counting (context); * * /* else, handle other tags... */ * } * * static void end_element (context, element_name, ...) * { * if (strcmp (element_name, "count-these") == 0) * g_print ("Counted %d tags\n", end_counting (context)); * * /* else, handle other tags... */ * } * ]| * * Since: 2.18 **/ void g_markup_parse_context_push (GMarkupParseContext *context, const GMarkupParser *parser, gpointer user_data) { GMarkupRecursionTracker *tracker; tracker = g_slice_new (GMarkupRecursionTracker); tracker->prev_element = context->subparser_element; tracker->prev_parser = context->parser; tracker->prev_user_data = context->user_data; context->subparser_element = current_element (context); context->parser = parser; context->user_data = user_data; context->subparser_stack = g_slist_prepend (context->subparser_stack, tracker); } /** * g_markup_parse_context_pop: * @context: a #GMarkupParseContext * * Completes the process of a temporary sub-parser redirection. * * This function exists to collect the user_data allocated by a * matching call to g_markup_parse_context_push(). It must be called * in the end_element handler corresponding to the start_element * handler during which g_markup_parse_context_push() was called. You * must not call this function from the error callback -- the * @user_data is provided directly to the callback in that case. * * This function is not intended to be directly called by users * interested in invoking subparsers. Instead, it is intended to be * used by the subparsers themselves to implement a higher-level * interface. * * Returns: the user_data passed to g_markup_parse_context_push(). * * Since: 2.18 **/ gpointer g_markup_parse_context_pop (GMarkupParseContext *context) { gpointer user_data; if (!context->awaiting_pop) possibly_finish_subparser (context); g_assert (context->awaiting_pop); context->awaiting_pop = FALSE; /* valgrind friendliness */ user_data = context->held_user_data; context->held_user_data = NULL; return user_data; } static void append_escaped_text (GString *str, const gchar *text, gssize length) { const gchar *p; const gchar *end; gunichar c; p = text; end = text + length; while (p != end) { const gchar *next; next = g_utf8_next_char (p); switch (*p) { case '&': g_string_append (str, "&"); break; case '<': g_string_append (str, "<"); break; case '>': g_string_append (str, ">"); break; case '\'': g_string_append (str, "'"); break; case '"': g_string_append (str, """); break; default: c = g_utf8_get_char (p); if ((0x1 <= c && c <= 0x8) || (0xb <= c && c <= 0xc) || (0xe <= c && c <= 0x1f) || (0x7f <= c && c <= 0x84) || (0x86 <= c && c <= 0x9f)) g_string_append_printf (str, "&#x%x;", c); else g_string_append_len (str, p, next - p); break; } p = next; } } /** * g_markup_escape_text: * @text: some valid UTF-8 text * @length: length of @text in bytes, or -1 if the text is nul-terminated * * Escapes text so that the markup parser will parse it verbatim. * Less than, greater than, ampersand, etc. are replaced with the * corresponding entities. This function would typically be used * when writing out a file to be parsed with the markup parser. * * Note that this function doesn't protect whitespace and line endings * from being processed according to the XML rules for normalization * of line endings and attribute values. * * Note also that this function will produce character references in * the range of &#x1; ... &#x1f; for all control sequences * except for tabstop, newline and carriage return. The character * references in this range are not valid XML 1.0, but they are * valid XML 1.1 and will be accepted by the GMarkup parser. * * Return value: a newly allocated string with the escaped text **/ gchar* g_markup_escape_text (const gchar *text, gssize length) { GString *str; g_return_val_if_fail (text != NULL, NULL); if (length < 0) length = strlen (text); /* prealloc at least as long as original text */ str = g_string_sized_new (length); append_escaped_text (str, text, length); return g_string_free (str, FALSE); } /** * find_conversion: * @format: a printf-style format string * @after: location to store a pointer to the character after * the returned conversion. On a %NULL return, returns the * pointer to the trailing NUL in the string * * Find the next conversion in a printf-style format string. * Partially based on code from printf-parser.c, * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc. * * Return value: pointer to the next conversion in @format, * or %NULL, if none. **/ static const char * find_conversion (const char *format, const char **after) { const char *start = format; const char *cp; while (*start != '\0' && *start != '%') start++; if (*start == '\0') { *after = start; return NULL; } cp = start + 1; if (*cp == '\0') { *after = cp; return NULL; } /* Test for positional argument. */ if (*cp >= '0' && *cp <= '9') { const char *np; for (np = cp; *np >= '0' && *np <= '9'; np++) ; if (*np == '$') cp = np + 1; } /* Skip the flags. */ for (;;) { if (*cp == '\'' || *cp == '-' || *cp == '+' || *cp == ' ' || *cp == '#' || *cp == '0') cp++; else break; } /* Skip the field width. */ if (*cp == '*') { cp++; /* Test for positional argument. */ if (*cp >= '0' && *cp <= '9') { const char *np; for (np = cp; *np >= '0' && *np <= '9'; np++) ; if (*np == '$') cp = np + 1; } } else { for (; *cp >= '0' && *cp <= '9'; cp++) ; } /* Skip the precision. */ if (*cp == '.') { cp++; if (*cp == '*') { /* Test for positional argument. */ if (*cp >= '0' && *cp <= '9') { const char *np; for (np = cp; *np >= '0' && *np <= '9'; np++) ; if (*np == '$') cp = np + 1; } } else { for (; *cp >= '0' && *cp <= '9'; cp++) ; } } /* Skip argument type/size specifiers. */ while (*cp == 'h' || *cp == 'L' || *cp == 'l' || *cp == 'j' || *cp == 'z' || *cp == 'Z' || *cp == 't') cp++; /* Skip the conversion character. */ cp++; *after = cp; return start; } /** * g_markup_vprintf_escaped: * @format: printf() style format string * @args: variable argument list, similar to vprintf() * * Formats the data in @args according to @format, escaping * all string and character arguments in the fashion * of g_markup_escape_text(). See g_markup_printf_escaped(). * * Return value: newly allocated result from formatting * operation. Free with g_free(). * * Since: 2.4 **/ char * g_markup_vprintf_escaped (const char *format, va_list args) { GString *format1; GString *format2; GString *result = NULL; gchar *output1 = NULL; gchar *output2 = NULL; const char *p, *op1, *op2; va_list args2; /* The technique here, is that we make two format strings that * have the identical conversions in the identical order to the * original strings, but differ in the text in-between. We * then use the normal g_strdup_vprintf() to format the arguments * with the two new format strings. By comparing the results, * we can figure out what segments of the output come from * the the original format string, and what from the arguments, * and thus know what portions of the string to escape. * * For instance, for: * * g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5); * * We form the two format strings "%sX%dX" and %sY%sY". The results * of formatting with those two strings are * * "%sX%dX" => "Susan & FredX5X" * "%sY%dY" => "Susan & FredY5Y" * * To find the span of the first argument, we find the first position * where the two arguments differ, which tells us that the first * argument formatted to "Susan & Fred". We then escape that * to "Susan & Fred" and join up with the intermediate portions * of the format string and the second argument to get * "Susan & Fred ate 5 apples". */ /* Create the two modified format strings */ format1 = g_string_new (NULL); format2 = g_string_new (NULL); p = format; while (TRUE) { const char *after; const char *conv = find_conversion (p, &after); if (!conv) break; g_string_append_len (format1, conv, after - conv); g_string_append_c (format1, 'X'); g_string_append_len (format2, conv, after - conv); g_string_append_c (format2, 'Y'); p = after; } /* Use them to format the arguments */ G_VA_COPY (args2, args); output1 = g_strdup_vprintf (format1->str, args); if (!output1) { va_end (args2); goto cleanup; } output2 = g_strdup_vprintf (format2->str, args2); va_end (args2); if (!output2) goto cleanup; result = g_string_new (NULL); /* Iterate through the original format string again, * copying the non-conversion portions and the escaped * converted arguments to the output string. */ op1 = output1; op2 = output2; p = format; while (TRUE) { const char *after; const char *output_start; const char *conv = find_conversion (p, &after); char *escaped; if (!conv) /* The end, after points to the trailing \0 */ { g_string_append_len (result, p, after - p); break; } g_string_append_len (result, p, conv - p); output_start = op1; while (*op1 == *op2) { op1++; op2++; } escaped = g_markup_escape_text (output_start, op1 - output_start); g_string_append (result, escaped); g_free (escaped); p = after; op1++; op2++; } cleanup: g_string_free (format1, TRUE); g_string_free (format2, TRUE); g_free (output1); g_free (output2); if (result) return g_string_free (result, FALSE); else return NULL; } /** * g_markup_printf_escaped: * @format: printf() style format string * @Varargs: the arguments to insert in the format string * * Formats arguments according to @format, escaping * all string and character arguments in the fashion * of g_markup_escape_text(). This is useful when you * want to insert literal strings into XML-style markup * output, without having to worry that the strings * might themselves contain markup. * * |[ * const char *store = "Fortnum & Mason"; * const char *item = "Tea"; * char *output; * * output = g_markup_printf_escaped ("<purchase>" * "<store>%s</store>" * "<item>%s</item>" * "</purchase>", * store, item); * ]| * * Return value: newly allocated result from formatting * operation. Free with g_free(). * * Since: 2.4 **/ char * g_markup_printf_escaped (const char *format, ...) { char *result; va_list args; va_start (args, format); result = g_markup_vprintf_escaped (format, args); va_end (args); return result; } static gboolean g_markup_parse_boolean (const char *string, gboolean *value) { char const * const falses[] = { "false", "f", "no", "n", "0" }; char const * const trues[] = { "true", "t", "yes", "y", "1" }; int i; for (i = 0; i < G_N_ELEMENTS (falses); i++) { if (g_ascii_strcasecmp (string, falses[i]) == 0) { if (value != NULL) *value = FALSE; return TRUE; } } for (i = 0; i < G_N_ELEMENTS (trues); i++) { if (g_ascii_strcasecmp (string, trues[i]) == 0) { if (value != NULL) *value = TRUE; return TRUE; } } return FALSE; } /** * GMarkupCollectType: * @G_MARKUP_COLLECT_INVALID: used to terminate the list of attributes * to collect. * @G_MARKUP_COLLECT_STRING: collect the string pointer directly from * the attribute_values[] array. Expects a * parameter of type (const char **). If * %G_MARKUP_COLLECT_OPTIONAL is specified * and the attribute isn't present then the * pointer will be set to %NULL. * @G_MARKUP_COLLECT_STRDUP: as with %G_MARKUP_COLLECT_STRING, but * expects a parameter of type (char **) and * g_strdup()s the returned pointer. The * pointer must be freed with g_free(). * @G_MARKUP_COLLECT_BOOLEAN: expects a parameter of type (gboolean *) * and parses the attribute value as a * boolean. Sets %FALSE if the attribute * isn't present. Valid boolean values * consist of (case insensitive) "false", * "f", "no", "n", "0" and "true", "t", * "yes", "y", "1". * @G_MARKUP_COLLECT_TRISTATE: as with %G_MARKUP_COLLECT_BOOLEAN, but * in the case of a missing attribute a * value is set that compares equal to * neither %FALSE nor %TRUE. * G_MARKUP_COLLECT_OPTIONAL is implied. * @G_MARKUP_COLLECT_OPTIONAL: can be bitwise ORed with the other * fields. If present, allows the * attribute not to appear. A default * value is set depending on what value * type is used. * * A mixed enumerated type and flags field. You must specify one type * (string, strdup, boolean, tristate). Additionally, you may * optionally bitwise OR the type with the flag * %G_MARKUP_COLLECT_OPTIONAL. * * It is likely that this enum will be extended in the future to * support other types. **/ /** * g_markup_collect_attributes: * @element_name: the current tag name * @attribute_names: the attribute names * @attribute_values: the attribute values * @error: a pointer to a #GError or %NULL * @first_type: the #GMarkupCollectType of the * first attribute * @first_attr: the name of the first attribute * @...: a pointer to the storage location of the * first attribute (or %NULL), followed by * more types names and pointers, ending * with %G_MARKUP_COLLECT_INVALID. * * Collects the attributes of the element from the * data passed to the #GMarkupParser start_element * function, dealing with common error conditions * and supporting boolean values. * * This utility function is not required to write * a parser but can save a lot of typing. * * The @element_name, @attribute_names, * @attribute_values and @error parameters passed * to the start_element callback should be passed * unmodified to this function. * * Following these arguments is a list of * "supported" attributes to collect. It is an * error to specify multiple attributes with the * same name. If any attribute not in the list * appears in the @attribute_names array then an * unknown attribute error will result. * * The #GMarkupCollectType field allows specifying * the type of collection to perform and if a * given attribute must appear or is optional. * * The attribute name is simply the name of the * attribute to collect. * * The pointer should be of the appropriate type * (see the descriptions under * #GMarkupCollectType) and may be %NULL in case a * particular attribute is to be allowed but * ignored. * * This function deals with issuing errors for missing attributes * (of type %G_MARKUP_ERROR_MISSING_ATTRIBUTE), unknown attributes * (of type %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE) and duplicate * attributes (of type %G_MARKUP_ERROR_INVALID_CONTENT) as well * as parse errors for boolean-valued attributes (again of type * %G_MARKUP_ERROR_INVALID_CONTENT). In all of these cases %FALSE * will be returned and @error will be set as appropriate. * * Return value: %TRUE if successful * * Since: 2.16 **/ gboolean g_markup_collect_attributes (const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, GError **error, GMarkupCollectType first_type, const gchar *first_attr, ...) { GMarkupCollectType type; const gchar *attr; guint64 collected; int written; va_list ap; int i; type = first_type; attr = first_attr; collected = 0; written = 0; va_start (ap, first_attr); while (type != G_MARKUP_COLLECT_INVALID) { gboolean mandatory; const gchar *value; mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL); type &= (G_MARKUP_COLLECT_OPTIONAL - 1); /* tristate records a value != TRUE and != FALSE * for the case where the attribute is missing */ if (type == G_MARKUP_COLLECT_TRISTATE) mandatory = FALSE; for (i = 0; attribute_names[i]; i++) if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i))) if (!strcmp (attribute_names[i], attr)) break; /* ISO C99 only promises that the user can pass up to 127 arguments. * Subtracting the first 4 arguments plus the final NULL and dividing * by 3 arguments per collected attribute, we are left with a maximum * number of supported attributes of (127 - 5) / 3 = 40. * * In reality, nobody is ever going to call us with anywhere close to * 40 attributes to collect, so it is safe to assume that if i > 40 * then the user has given some invalid or repeated arguments. These * problems will be caught and reported at the end of the function. * * We know at this point that we have an error, but we don't know * what error it is, so just continue... */ if (i < 40) collected |= (G_GUINT64_CONSTANT(1) << i); value = attribute_values[i]; if (value == NULL && mandatory) { g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_MISSING_ATTRIBUTE, "element '%s' requires attribute '%s'", element_name, attr); va_end (ap); goto failure; } switch (type) { case G_MARKUP_COLLECT_STRING: { const char **str_ptr; str_ptr = va_arg (ap, const char **); if (str_ptr != NULL) *str_ptr = value; } break; case G_MARKUP_COLLECT_STRDUP: { char **str_ptr; str_ptr = va_arg (ap, char **); if (str_ptr != NULL) *str_ptr = g_strdup (value); } break; case G_MARKUP_COLLECT_BOOLEAN: case G_MARKUP_COLLECT_TRISTATE: if (value == NULL) { gboolean *bool_ptr; bool_ptr = va_arg (ap, gboolean *); if (bool_ptr != NULL) { if (type == G_MARKUP_COLLECT_TRISTATE) /* constructivists rejoice! * neither false nor true... */ *bool_ptr = -1; else /* G_MARKUP_COLLECT_BOOLEAN */ *bool_ptr = FALSE; } } else { if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *))) { g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_INVALID_CONTENT, "element '%s', attribute '%s', value '%s' " "cannot be parsed as a boolean value", element_name, attr, value); va_end (ap); goto failure; } } break; default: g_assert_not_reached (); } type = va_arg (ap, GMarkupCollectType); attr = va_arg (ap, const char *); written++; } va_end (ap); /* ensure we collected all the arguments */ for (i = 0; attribute_names[i]; i++) if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0) { /* attribute not collected: could be caused by two things. * * 1) it doesn't exist in our list of attributes * 2) it existed but was matched by a duplicate attribute earlier * * find out. */ int j; for (j = 0; j < i; j++) if (strcmp (attribute_names[i], attribute_names[j]) == 0) /* duplicate! */ break; /* j is now the first occurrence of attribute_names[i] */ if (i == j) g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE, "attribute '%s' invalid for element '%s'", attribute_names[i], element_name); else g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_INVALID_CONTENT, "attribute '%s' given multiple times for element '%s'", attribute_names[i], element_name); goto failure; } return TRUE; failure: /* replay the above to free allocations */ type = first_type; attr = first_attr; va_start (ap, first_attr); while (type != G_MARKUP_COLLECT_INVALID) { gpointer ptr; ptr = va_arg (ap, gpointer); if (ptr == NULL) continue; switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1)) { case G_MARKUP_COLLECT_STRDUP: if (written) g_free (*(char **) ptr); case G_MARKUP_COLLECT_STRING: *(char **) ptr = NULL; break; case G_MARKUP_COLLECT_BOOLEAN: *(gboolean *) ptr = FALSE; break; case G_MARKUP_COLLECT_TRISTATE: *(gboolean *) ptr = -1; break; } type = va_arg (ap, GMarkupCollectType); attr = va_arg (ap, const char *); if (written) written--; } va_end (ap); return FALSE; }