summaryrefslogtreecommitdiff
path: root/ekhtml/include/ekhtml.h
blob: ba4b196649fe1a3053fd67a58694a89c82f4bae4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
/*
 * Copyright (c) 2002-2004, Jon Travis
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef EKHTML_DOT_H
#define EKHTML_DOT_H

#include <stdio.h>

#ifdef __cplusplus
extern "C" {
#endif

/*! 
 * \file ekhtml.h
 * \brief Main El-Kabong header file.
 *
 * This header defines everything that a program should need to use
 * the El-Kabong library.
 */

/**
 * A string object, which is not NUL terminated.
 * For speed reasons, El-Kabong does not deal with zero-terminated
 * strings.  
 */

typedef struct ekhtml_string_t {
    const char *str;    /**< Actual string data          */
    size_t      len;    /**< Length of the data in `str` */
} ekhtml_string_t;

/**
 * Attribute object, passed into callbacks.  
 * When ekhtml parses tags containing key/value attributes, it will pass 
 * this structure representing those values into the callbacks.  Note, for 
 * speed reasons, things such as the 'name' and 'value' fields are not 
 * terminated with '\0', the length is in name->len, and val->len
 */

typedef struct ekhtml_attr_t {
    ekhtml_string_t       name;       /**< Name of the attribute             */
    ekhtml_string_t       val;        /**< Value of the attribute            */
    unsigned int          isBoolean;  /**< True if the attribute is boolean  */
    char                  quoteChar;  /**< The character used to quote 'val'. 
                                           If no quoting was used, '\0'      */
    struct ekhtml_attr_t *next;  /**< Pointer to next attribute in the list  */
} ekhtml_attr_t;

/*
 * Typedefs for function callback types
 */

/**
 * The parser object.  
 * The parser object holds state information, such as which callbacks 
 * to invoke when reading tags, how much data is being processed, etc.
 */

typedef struct ekhtml_parser_t ekhtml_parser_t; 

/**
 * Callback for simple data.
 * Callback functions of this form are used to process data which is
 * not part of a start or end tag.  This callback may also be used
 * to process the body of comment tags.
 * 
 * I.e. <FOO>data_to_process</FOO>  
 * The data passed into the callback function will be "data_to_process"
 *
 * @param cbdata Callback data, as previously set by ekhtml_parser_cbdata_set
 * @param data   A pointer to the data in-between tags.
 *              
 * @see ekhtml_parser_cbdata_set()
 * @see ekhtml_parser_datacb_set()
 */

typedef void (*ekhtml_data_cb_t)(void *cbdata, ekhtml_string_t *data);

/**
 * Callback for start tags.
 * Callback functions of this form are used to process start tags.
 * 
 * I.e. <FOO>data_to_process</FOO>  
 * The tag passed into the callback will be "FOO" with a length of 3.
 *
 * @param cbdata Callback data, as previously set by ekhtml_parser_cbdata_set
 * @param tag    A pointer to tag name.  This is a traditional NUL terminated
 *               string.
 * @param attrs  Attributes of the tag.  
 *              
 * @see ekhtml_parser_cbdata_set()
 * @see ekhtml_parser_startcb_add()
 */

typedef void (*ekhtml_starttag_cb_t)(void *cbdata, ekhtml_string_t *tag,
				     ekhtml_attr_t *attrs);

/**
 * Callback for end tags.
 * Callback functions of this form are used to process end tags.
 * 
 * I.e. <FOO>data_to_process</FOO>  
 * The tag passed into the callback will be "FOO" with a length of 3.
 *
 * @param cbdata Callback data, as previously set by ekhtml_parser_cbdata_set
 * @param tag    A pointer to tag name.  This is a traditional NUL terminated
 *               string.
 *
 * @see ekhtml_parser_cbdata_set()
 * @see ekhtml_parser_endcb_add()
 */

typedef void (*ekhtml_endtag_cb_t)(void *cbdata, ekhtml_string_t *tag);

/**
 * Create a new parser object.
 * This routine creates a new parser object, with no set callback
 * functions or state.
 *
 * @param cbdata  Callback data to use when invoking callbacks
 *
 * @returns A new ekhtml_parser_t object
 *
 * @see ekhtml_parser_cbdata_set()
 */

extern ekhtml_parser_t *ekhtml_parser_new(void *cbdata);

/**
 * Destroys a parser object and all memory associated with it.
 * After calling this routine, the parser should no longer be
 * used, as any results would be undefined.
 *
 * @param parser  The parser to destroy
 *
 * @see ekhtml_parser_new()
 */

extern void ekhtml_parser_destroy(ekhtml_parser_t *parser);

/**
 * Set the callback data for the parser.
 * This routine sets the callback data which is passed to set callbacks.
 *
 * @param parser  Parser to set the callback data for
 * @param cbdata  Callback data the parser should use to pass to callbacks
 */

extern void ekhtml_parser_cbdata_set(ekhtml_parser_t *parser, void *cbdata);

/**
 * Set the parser's data callback.
 * This routine sets the callback which should be invoked for
 * non-tagged data.
 *
 * @param parser  Parser to set the callback for
 * @param cb      Callback to invoke when processing non-tagged data
 */

extern void ekhtml_parser_datacb_set(ekhtml_parser_t *parser, 
                                     ekhtml_data_cb_t cb);

/**
 * Set the parser's comment callback.
 * This routine sets the callback which should be invoked when 
 * the parser processes a comment.
 *
 * @param parser  Parser to set the callback for
 * @param cb      Callback to invoke when processing a comment
 */

extern void ekhtml_parser_commentcb_set(ekhtml_parser_t *parser, 
                                        ekhtml_data_cb_t cb);

/**
 * Feed data for the parser to process.
 * Feed data into the HTML parser.  This routine will fill up the 
 * internal buffer until it can go no more, then flush the data 
 * and refill.  If there is more data that is required than the 
 * internal buffer can hold, it will be resized
 *
 * @param parser  Parser to feed data to
 * @param data    Data to feed to the parser
 */

extern void ekhtml_parser_feed(ekhtml_parser_t *parser, 
                               ekhtml_string_t *data);

/**
 * Flush the parser innards.
 * When this function is invoked, the parser will flush all data that is
 * currently held, and any remaining state is saved.  All data which is
 * processed is removed from the parser, and the internal buffer is
 * reshuffled.
 *
 * @param parser   Parser to flush
 * @param flushall If true, will flush all data, even if tags are not
 *                 complete (i.e. "<FO")
 * @returns 1 if action was taken (i.e. bytes were processed and the
 *          internal buffer was reshuffled) else 0
 */

extern int ekhtml_parser_flush(ekhtml_parser_t *parser, int flushall);

/**
 * Add a callback for a start tag.
 * This routine sets the callback which should be invoked when 
 * the parser processes a start tag.  Both specific tags, and
 * unknown tags can be used with this method.
 *
 * @param parser  Parser to set the callback for
 * @param tag     Name of the tag to call `cb` for.  If `tag` is NULL, then
 *                any tags which are unknown to the parser will be sent
 *                to the callback specified by `cb`.
 * @param cb      Callback to invoke
 */

extern void ekhtml_parser_startcb_add(ekhtml_parser_t *parser, const char *tag,
				      ekhtml_starttag_cb_t cb);

/**
 * Add a callback for an end tag.
 * This routine sets the callback which should be invoked when 
 * the parser processes an end tag.  Both specific tags, and
 * unknown tags can be used with this method.
 *
 * @param parser  Parser to set the callback for
 * @param tag     Name of the tag to call `cb` for.  If `tag` is NULL, then
 *                any tags which are unknown to the parser will be sent
 *                to the callback specified by `cb`.
 * @param cb      Callback to invoke
 */

extern void ekhtml_parser_endcb_add(ekhtml_parser_t *parser, const char *tag,
				    ekhtml_endtag_cb_t cb);

/** EKHTML_BLOCKSIZE = # of blocks to allocate per chunk */
#define EKHTML_BLOCKSIZE (1024 * 4)

#ifdef __cplusplus
}
#endif

#endif