diff options
Diffstat (limited to 'MirOTR/striphtml.cpp')
-rw-r--r-- | MirOTR/striphtml.cpp | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/MirOTR/striphtml.cpp b/MirOTR/striphtml.cpp new file mode 100644 index 0000000..f4eebaf --- /dev/null +++ b/MirOTR/striphtml.cpp @@ -0,0 +1,114 @@ +#include "stdafx.h"
+#include "striphtml.h"
+#include "ekhtml.h"
+#include "entities.h"
+
+void starttag_cb (void *cbdata, ekhtml_string_t *tag, ekhtml_attr_t *attrs) {
+ STRIPHTML_DATA *data = (STRIPHTML_DATA *)cbdata;
+ switch (tag->len) {
+ case 1:
+ switch (*(tag->str)) {
+ case 'a':
+ case 'A':
+ {
+ ekhtml_attr_t *attr = attrs;
+ while (attr) {
+ if (_strnicmp(attr->name.str, "href", attr->name.len)==0) {
+ data->stack.push(strncpy((char*)mir_calloc(attr->val.len+1), attr->val.str, attr->val.len));
+ break;
+ }
+ }
+ }break;
+ case 'i':
+ case 'I':
+ data->buffer.append(" *");
+ break;
+ case 'b':
+ case 'B':
+ data->buffer.append(" _");
+ break;
+ }
+ case 2:
+ if (toupper(tag->str[0]) == 'B' && toupper(tag->str[1]) == 'R')
+ data->buffer.append("\r\n");
+ break;
+ case 3:
+ if (_strnicmp(tag->str, "img", 3) == 0) {
+ ekhtml_attr_t *attr = attrs;
+ data->buffer.append("IMAGE:( ");
+ while (attr) {
+ if (_strnicmp(attr->name.str, "src", attr->name.len)==0) {
+ data->buffer.append(attr->val.str, attr->val.len);
+ break;
+ }
+ }
+ data->buffer.append(" )");
+ }
+ break;
+ }
+}
+
+void endtag_cb (void *cbdata, ekhtml_string_t *tag) {
+ STRIPHTML_DATA *data = (STRIPHTML_DATA *)cbdata;
+ switch (tag->len) {
+ case 1:
+ switch (*(tag->str)) {
+ case 'a':
+ case 'A':
+ data->buffer.append(":( ");
+ data->buffer.append(data->stack.top());
+ mir_free(data->stack.top());
+ data->stack.pop();
+ data->buffer.append(" ) ");
+ break;
+ case 'i':
+ case 'I':
+ data->buffer.append("* ");
+ break;
+ case 'b':
+ case 'B':
+ data->buffer.append("_ ");
+ break;
+ }
+ }
+
+}
+
+void data_cb (void *cbdata, ekhtml_string_t *text) {
+ STRIPHTML_DATA *data = (STRIPHTML_DATA *)cbdata;
+ char* s = (char*) mir_calloc(text->len+1);
+ decode_html_entities_utf8(s, text->str, text->len);
+ data->buffer.append(s);
+ mir_free(s);
+}
+
+char * striphtml(char *html) {
+ STRIPHTML_DATA data;
+ ekhtml_string_t ekstring;
+
+ ekstring.len = strlen(html);
+ ekstring.str = html;
+
+ data.buffer.clear();
+ data.buffer.reserve(ekstring.len);
+
+ ekhtml_parser_t *parser = ekhtml_parser_new(&data);
+ ekhtml_parser_datacb_set(parser, &data_cb);
+ ekhtml_parser_startcb_add(parser, NULL, &starttag_cb);
+ ekhtml_parser_endcb_add(parser, NULL, &endtag_cb);
+
+ ekhtml_parser_feed(parser, &ekstring);
+ ekhtml_parser_flush(parser, 1);
+
+ while (!data.stack.empty()) {
+ mir_free(data.stack.top());
+ data.stack.pop();
+ }
+
+ ekhtml_parser_destroy(parser);
+
+ char *s = mir_strdup(data.buffer.c_str());
+ data.buffer.erase();
+ return s;
+ //ekhtml_parser_create(
+}
|