added MirOTR

git-svn-id: http://svn.miranda-ng.org/main/trunk@83 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
author: Kirill Volinsky <mataes2007@gmail.com> 2012-05-19 18:01:32 +0000
committer: Kirill Volinsky <mataes2007@gmail.com> 2012-05-19 18:01:32 +0000
commit: b1509f22892dc98057c750e7fae39ded5cea3b09 (patch)
tree: 6bdcc9379ae86339a67022b758575729d1304074 /plugins/MirOTR/ekhtml/src/ekhtml.c
parent: e7a776a6f5ab323cd9dd824e815846ef268fa7f1 (diff)
1 files changed, 394 insertions, 0 deletions
diff --git a/plugins/MirOTR/ekhtml/src/ekhtml.c b/plugins/MirOTR/ekhtml/src/ekhtml.c
new file mode 100644
index 0000000000..f3697469f6
--- /dev/null
+++ b/plugins/MirOTR/ekhtml/src/ekhtml.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2002, Jon Travis
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * ekhtml: The El-Kabong HTML parser
+ *         by Jon Travis (jtravis@p00p.org)
+ *
+ * El-Kabong: A speedy, yet forgiving, SAX-stylee HTML parser.  
+ *
+ * The idea behind this parser is for it to use very little memory, and still 
+ * be very speedy, while forgiving poorly written HTML.
+
+ * The internals of the parser consist of a small memory buffer which is able
+ * to grow when not enough information is known to correctly parse a tag.
+ * Given the typical layout of HTML, 4k should be plenty.  
+ *
+ * The main state engine loops through this internal buffer, determining what 
+ * the next state should be.  Once this is known, it passes off a segment to 
+ * the state handlers (starttag, endtag, etc.) to process.  The segment
+ * handlers and the main state engine communicate via a few variables.  These
+ * variables indicate whether or not the main engine should switch state,
+ * or successfully remove some data, etc.  The segment handlers are 
+ * guaranteed the same starting data (though not the same pointer) on each
+ * invocation until the state is changed.  Thus, the segment handlers cannot
+ * use pointers into the main buffer -- they must use offsets.
+ *
+ * Some of the speed is gained from using character map data found in
+ * ekhtml_tables.h.  I don't have any empirical data for this yet --
+ * it only sounds like it would be faster.. ;-)  
+ *
+ * I'm always looking for ways to clean && speed up this code.  Feel free
+ * to give feedback -- JMT
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include "ekhtml_config.h"
+#include "ekhtml.h"
+#define EKHTML_USE_TABLES
+#include "ekhtml_tables.h"
+#define EKHTML_USE_PRIVATE
+#include "ekhtml_private.h"
+
+#ifndef MIN
+#define MIN(a,b) (((a)<(b)) ? (a) : (b))
+#endif
+
+
+/*
+ * ekhtml_buffer_grow:  Grow the parser's internal buffer by a blocksize.  
+ *                      NOTE:  Calling the function has the potential to
+ *                             change the data buffer location.  Do
+ *                             not rely on it's location!
+ *
+ * Arguments:           Parser = Parser to grow
+ */
+
+static void ekhtml_buffer_grow(ekhtml_parser_t *parser){
+    size_t newsize;
+    char *newbuf;
+    
+    newsize = parser->nalloced + EKHTML_BLOCKSIZE;
+    
+    if((newbuf = realloc(parser->buf, newsize)) == NULL){
+        fprintf(stderr, "BAD! Can't allocate %d bytes in ekhtml_buffer_grow\n",
+                newsize);
+        fflush(stderr); /* Just in case someone changes the buffering scheme */
+    }
+
+    parser->buf      = newbuf;
+    parser->nalloced = newsize;
+}
+
+/*
+ * parser_state_determine:  Determine the next state that the main parser 
+ *                          should have, by investigating up to the first
+ *                          4 characters in the buffer.
+ *
+ * Arguments:      startp = Starting data pointer
+ *                 endp   = Pointer to first byte of 'out of range' data
+ *
+ * Return values:  Returns one of EKHTML_STATE_* indicating the state that
+ *                 was found.
+ *
+ */
+
+static inline
+int parser_state_determine(const char *startp, const char *endp){
+    const char *firstchar;
+    int newstate;
+    
+    assert(startp != endp);
+    
+    if(*startp != '<')
+        return EKHTML_STATE_INDATA;
+    
+    firstchar = startp + 1;
+    if(firstchar == endp)
+        return EKHTML_STATE_NONE;
+
+    newstate = EKCMap_EKState[(unsigned char)*firstchar];
+    if(newstate == EKHTML_STATE_NONE){
+        if(firstchar + 2 >= endp) /* Not enough data to evaluate */
+            return EKHTML_STATE_NONE;
+        if(*(firstchar + 1) == '-' && *(firstchar + 2) == '-')
+            return EKHTML_STATE_COMMENT;
+        else
+            return EKHTML_STATE_SPECIAL;
+    } else 
+        return newstate;
+}
+
+
+int ekhtml_parser_flush(ekhtml_parser_t *parser, int flushall){
+    void **state_data = &parser->state.state_data;
+    char *buf = parser->buf, *curp = buf, *endp = buf + parser->nbuf;
+    int badp = -1, tmpstate = parser->state.state, didsomething = 0;
+    
+    while(curp != endp){
+        char *workp = curp;
+        
+        if(tmpstate == EKHTML_STATE_NONE){
+            tmpstate = parser_state_determine(workp, endp);
+            if(tmpstate == EKHTML_STATE_NONE)  /* Not enough data yet */
+                break;
+        }
+        
+        if(tmpstate == EKHTML_STATE_INDATA || tmpstate == EKHTML_STATE_BADDATA)
+            curp = ekhtml_parse_data(parser, workp, endp, tmpstate);
+        else if(endp - workp > 2){  /* All tags fall under this catagory */
+            switch(tmpstate){
+            case EKHTML_STATE_ENDTAG:
+                curp = ekhtml_parse_endtag(parser, state_data, 
+                                           workp, endp, &badp);
+                break;
+            case EKHTML_STATE_STARTTAG:
+                curp = ekhtml_parse_starttag(parser, state_data, 
+                                             workp, endp, &badp);
+                break;
+            case EKHTML_STATE_COMMENT:
+                curp = ekhtml_parse_comment(parser, state_data, 
+                                            workp, endp, &badp);
+                break;
+            case EKHTML_STATE_SPECIAL:
+                curp = ekhtml_parse_special(parser, state_data, 
+                                            workp, endp, &badp);
+                break;
+            default:
+                assert(!"Unimplemented state");
+            }
+        } else {
+            curp = NULL; /* Not enough data, keep going */
+        }
+        
+        /* If one of the parsers said the data was bad, reset the state */
+        if(badp != -1){
+            tmpstate = badp;
+            badp = -1;
+        }
+
+        if(curp == NULL){ /* State needed more data, so break out */
+            curp = workp;
+            break;
+        }
+
+        if(workp != curp){  /* state backend cleared up some data */
+            didsomething = 1;
+            tmpstate = EKHTML_STATE_NONE;
+            assert(*state_data == NULL);
+        }
+    }
+
+    if(flushall){
+        /* Flush whatever we didn't use */
+        if(parser->datacb){
+            ekhtml_string_t str;
+
+            str.str = curp;
+            str.len = endp - curp;
+            parser->datacb(parser->cbdata, &str);
+        }
+        curp = endp;
+        didsomething = 1;
+        tmpstate = EKHTML_STATE_NONE;   /* Clean up to an unknown state */
+        *state_data = NULL;
+    }
+
+    parser->state.state = tmpstate;
+
+    if(didsomething){
+        /* Shuffle the data back, based on where we ended up */
+        parser->nbuf -= curp - buf;
+        if(endp - curp){  /* If there's still any data to move */
+            memmove(buf, curp, endp - curp);
+        }
+    }
+    return didsomething;
+}
+
+void ekhtml_parser_feed(ekhtml_parser_t *parser, ekhtml_string_t *str){
+    size_t nfed = 0;
+    
+    while(nfed != str->len){
+        size_t tocopy;
+        
+        /* First see how much we can fill up our internal buffer */
+        tocopy = MIN(parser->nalloced - parser->nbuf, str->len - nfed);
+        memcpy(parser->buf + parser->nbuf, str->str + nfed, tocopy);
+        nfed         += tocopy;
+        parser->nbuf += tocopy;
+        if(parser->nalloced == parser->nbuf){
+            /* Process the buffer */
+            if(!ekhtml_parser_flush(parser, 0)){
+                /* If we didn't actually process anything, grow our buffer */
+                ekhtml_buffer_grow(parser);
+            }
+        }
+    }
+}
+
+void ekhtml_parser_datacb_set(ekhtml_parser_t *parser, ekhtml_data_cb_t cb){
+    parser->datacb = cb;
+}
+
+void ekhtml_parser_commentcb_set(ekhtml_parser_t *parser, ekhtml_data_cb_t cb){
+    parser->commentcb = cb;
+}
+
+void ekhtml_parser_cbdata_set(ekhtml_parser_t *parser, void *cbdata){
+    parser->cbdata = cbdata;
+}
+
+static void 
+ekhtml_parser_startendcb_add(ekhtml_parser_t *parser, const char *tag,
+                             ekhtml_starttag_cb_t startcb, 
+                             ekhtml_endtag_cb_t endcb,
+                             int isStart)
+{
+    ekhtml_tag_container *cont;
+    ekhtml_string_t lookup_str;
+    char *newtag, *cp;
+    unsigned int taglen;
+    hnode_t *hn;
+
+    if(!tag){
+        if(isStart)
+            parser->startcb_unk = startcb;
+        else
+            parser->endcb_unk = endcb;
+        return;
+    }
+
+
+    newtag = _strdup(tag);
+    for(cp=newtag; *cp; cp++)
+        *cp = toupper(*cp);
+    
+    taglen = cp - newtag;
+
+    /* First see if the container already exists */
+    lookup_str.str = newtag;
+    lookup_str.len = taglen;
+
+    if((hn = hash_lookup(parser->startendcb, &lookup_str))){
+        cont = hnode_get(hn);
+        free(newtag);
+        if(isStart)
+            cont->startfunc = startcb;
+        else
+            cont->endfunc = endcb;
+    } else {
+        ekhtml_string_t *set_str;
+
+        cont = malloc(sizeof(*cont));
+        if(isStart){
+            cont->startfunc = startcb;
+            cont->endfunc   = NULL;
+        } else {
+            cont->startfunc = NULL;
+            cont->endfunc   = endcb;
+        }
+        set_str = malloc(sizeof(*set_str));
+        *set_str = lookup_str;
+        hash_alloc_insert(parser->startendcb, set_str, cont);
+    }
+}
+
+void ekhtml_parser_startcb_add(ekhtml_parser_t *parser, const char *tag,
+			       ekhtml_starttag_cb_t cback)
+{
+    ekhtml_parser_startendcb_add(parser, tag, cback, NULL, 1);
+}
+
+void ekhtml_parser_endcb_add(ekhtml_parser_t *parser, const char *tag,
+			     ekhtml_endtag_cb_t cback)
+{
+    ekhtml_parser_startendcb_add(parser, tag, NULL, cback, 0);
+}
+
+
+static hash_val_t ekhtml_string_hash(const void *key){
+    const ekhtml_string_t *s = key;
+    hash_val_t res = 5381;
+    const char *str = s->str;
+    size_t len = s->len;
+    int c;
+
+    while(len--){
+        c = str[len];
+        res = ((res << 5) + res) + c; /* res * 33 + c */
+    }
+    return res;
+}
+
+static int ekhtml_string_comp(const void *key1, const void *key2){
+    const ekhtml_string_t *s1 = key1, *s2 = key2;
+
+    if(s1->len == s2->len)
+        return memcmp(s1->str, s2->str, s1->len);
+    return 1;
+}
+
+void ekhtml_parser_destroy(ekhtml_parser_t *ekparser){
+    hnode_t *hn;
+    hscan_t hs;
+
+    hash_scan_begin(&hs, ekparser->startendcb);
+    while((hn = hash_scan_next(&hs))){
+        ekhtml_string_t *key = (ekhtml_string_t *)hnode_getkey(hn);
+        ekhtml_tag_container *cont = hnode_get(hn);
+
+        hash_scan_delete(ekparser->startendcb, hn);
+        free((char *)key->str);
+        free(key);
+        free(cont);
+    }
+    
+    hash_destroy(ekparser->startendcb);
+    ekhtml_parser_starttag_cleanup(ekparser);
+    free(ekparser->buf);
+    free(ekparser);
+}
+
+ekhtml_parser_t *ekhtml_parser_new(void *cbdata){
+    ekhtml_parser_t *res;
+    
+    res = malloc(sizeof(*res));
+    res->datacb             = NULL;
+    res->startendcb         = hash_create(HASHCOUNT_T_MAX, ekhtml_string_comp,
+                                          ekhtml_string_hash);
+    res->cbdata             = cbdata;
+    res->startcb_unk        = NULL;
+    res->endcb_unk          = NULL;
+    res->commentcb          = NULL;
+    res->buf                = NULL;
+    res->nalloced           = 0;
+    res->nbuf               = 0;
+    res->freeattrs          = NULL;
+    res->state.state        = EKHTML_STATE_NONE;
+    res->state.state_data   = NULL;
+    
+    /* Start out with a buffer of 1 block size */
+    ekhtml_buffer_grow(res);
+    return res;
+}
+
author	Kirill Volinsky <mataes2007@gmail.com>	2012-05-19 18:01:32 +0000
committer	Kirill Volinsky <mataes2007@gmail.com>	2012-05-19 18:01:32 +0000
commit	b1509f22892dc98057c750e7fae39ded5cea3b09 (patch)
tree	6bdcc9379ae86339a67022b758575729d1304074 /plugins/MirOTR/ekhtml/src/ekhtml.c
parent	e7a776a6f5ab323cd9dd824e815846ef268fa7f1 (diff)