/*
 * Copyright (c) 2002-2004, Jon Travis
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef EKHTML_PRIVATE_DOT_H
#define EKHTML_PRIVATE_DOT_H

#ifndef EKHTML_USE_PRIVATE
#error This file should only be used by the EKHTML library
#endif

#include <stdio.h>

#include "ekhtml_config.h"
#include "hash.h"

/* 
 *  Container structures -- since a void * is not guaranteed to store 
 *  the function pointer correctly, we make a small container for them 
 */

typedef struct {
    ekhtml_starttag_cb_t startfunc;
    ekhtml_endtag_cb_t   endfunc;
} ekhtml_tag_container;

/*
 * Processing state information
 */

#define EKHTML_STATE_NONE      0  /* No state - figure it out      */
#define EKHTML_STATE_INDATA    1  /* In regular data               */
#define EKHTML_STATE_BADDATA   2  /* In some kind of bad data .. I.e
				     an invalid tag.  The first char
				     in the data is a '<' we must skip */
#define EKHTML_STATE_STARTTAG  3  /* In a start tag                */
#define EKHTML_STATE_ENDTAG    4  /* In an end tag                 */
#define EKHTML_STATE_SPECIAL   5  /* Special tag: <!FOO BAR BAZ    */
#define EKHTML_STATE_COMMENT   6  /* <!--Comment tag, yo! -->      */

#define EKHTML_CHAR_TAGNAME      (1 << 0)  /* AFTER first letter of tag  */
#define EKHTML_CHAR_WHITESPACE   (1 << 1)  /* Whitespace, obviously      */
#define EKHTML_CHAR_BEGATTRNAME  (1 << 2)  /* Valid 1st letter of attr   */
#define EKHTML_CHAR_ATTRNAME     (1 << 3)  /* AFTER first letter of attr */
#define EKHTML_CHAR_ATTRVALUE    (1 << 4)  /* Valid attr value chars     */


#define EKHTML_CHARMAP_TYPE      unsigned int
#define EKHTML_CHARMAP_TYPE_S    "unsigned int"
#define EKHTML_CHARMAP_LEN       (sizeof(EKHTML_CHARMAP_TYPE))

typedef struct {
    int                tagend;  /* Offset to the end of the tag name         */
    int                mode;    /* One of EKHTML_STMODE_*                    */
    ekhtml_attr_t     *attrs;   /* Already processed attributes              */
    ekhtml_attr_t     *curattr; /* Attribute currently being processed       */
    char               quote;   /* If the value is quoted, this is the char  */
} ekhtml_starttag_state;

typedef struct {
    int dashes;                 /* # of dashes in a row found                */
    int lastdash;               /* Offset of last dash                       */
} ekhtml_comment_state;

typedef struct {
    int lastchar;     /* Offset of last non-witespace char (offset from '<') */
} ekhtml_endtag_state;

struct ekhtml_parser_t {
    ekhtml_data_cb_t      datacb;      /* Callback when data is read         */
    hash_t               *startendcb;  /* Hash of start & end tag callbacks  */
    void                 *cbdata;      /* Data to pass into all callbacks    */
    ekhtml_starttag_cb_t  startcb_unk; /* Unknown starttag callback          */
    ekhtml_endtag_cb_t    endcb_unk;   /* Unknown endtag callback            */
    ekhtml_data_cb_t      commentcb;   /* Comment callback                   */
    
    char                 *buf;         /* malloced buffer holding parse data */
    size_t                nalloced;    /* # of bytes alloced in 'buf'        */
    size_t                nbuf;        /* # of bytes used in 'buf'           */
    
    /* The next two vars are only used in the starttag portion               */
    ekhtml_starttag_state startstate;  /* State info in start tags           */
    ekhtml_attr_t         *freeattrs;  /* Attribute structures which callers 
                                          can allocate and release at will   */
    ekhtml_endtag_state   endstate;    /* State info in end tags             */
    ekhtml_comment_state  commentstate;/* Only used in comment state         */
    
    struct {
        int state;          /* One of EKHTML_STATE_*                         */
        void *state_data;   /* State dependent data pointer                  */
        int offset;         /* State dependent offset information            */
    } state;
};

extern char *ekhtml_parse_comment(ekhtml_parser_t *, void **, const char *, 
				  const char *, int *baddata);
extern char *ekhtml_parse_special(ekhtml_parser_t *, void **, const char *, 
				  const char *, int *);
extern char *ekhtml_parse_starttag(ekhtml_parser_t *, void **, char *,
				   char *, int *);
extern char *ekhtml_parse_endtag(ekhtml_parser_t *, void **, char *,
				 char *, int *);
extern char *ekhtml_parse_data(ekhtml_parser_t *, const char *, const char *,
			       int);

extern void ekhtml_parser_starttag_cleanup(ekhtml_parser_t *);
extern char *ekhtml_make_upperstr(char *, int);

/*
 * find_notcharsmap:  Find a character in a buffer which contains a false
 *                    value in the character map 'map'
 *
 * Arguments:         buf     = Buffer to search through
 *                    len     = Length of `buf`
 *                    charmap = Character map (i.e. one from ekhtml_tables.h)
 *                    mask    = Mask to compare against each value of the map.
 *                              If the char masked against 'mask' is zero,
 *                              then the comparison is 'false'.
 *
 * Return values:     Returns buf + len if a character was not found, else
 *                    a pointer to the invalid character.
 */

static inline
char *ekhtml_find_notcharsmap(const char *buf, int len, 
			      const EKHTML_CHARMAP_TYPE *charmap,
			      EKHTML_CHARMAP_TYPE mask)
{
    const char *endp = buf + len;
    
    for(;buf<endp;buf++)
        if(!(charmap[(unsigned char )*buf] & mask))
            break;
    
    return (char *)((buf == endp) ? endp : buf);
}


#endif