/* * Copyright (c) 2002-2004, Jon Travis * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * ekhtml_mktables: A small utility for generating tables of valid characters * for different parts of the HTML parsing, such as tags, * whitespace, etc. * * Each table is described by a function which returns 1 * if the character should be in the table, else 0 */ #include #include #include #include "ekhtml_config.h" #include "ekhtml.h" #define EKHTML_USE_PRIVATE #include "ekhtml_private.h" /* valid_tagname: Character map for a tagname AFTER the first letter */ static EKHTML_CHARMAP_TYPE valid_tagname(char in){ if(in == '-' || in == '.' || isdigit(in) || isalpha(in)) return 1; return 0; } static EKHTML_CHARMAP_TYPE valid_whitespace(char in){ return isspace(in) ? 1 : 0; } /* attribute name AFTER the first character */ static EKHTML_CHARMAP_TYPE valid_attrname(char in){ if(in == '_' || valid_tagname(in)) return 1; return 0; } /* attribute value */ static EKHTML_CHARMAP_TYPE valid_attrvalue(char in){ if(valid_attrname(in) || in == '/' || in == ':' || in == '+' || in == '*' || in == '%' || in == '?' || in == '!' || in == '&' || in == '(' || in == ')' || in == '#' || in == '=' || in == '~' || in == ']' || in == '@' || in == '$' || in == '_' || in == ',' || in == ';' || in == '|') return 1; return 0; } static EKHTML_CHARMAP_TYPE valid_begattrname(char in){ return (isalpha(in) || in == '_') ? 1 : 0; } static EKHTML_CHARMAP_TYPE ekhtml_state(char in){ if(in == '/') return EKHTML_STATE_ENDTAG; if(isalpha(in)) return EKHTML_STATE_STARTTAG; if(in == '!') return EKHTML_STATE_NONE; /* Must be determined by caller */ return EKHTML_STATE_BADDATA; } static EKHTML_CHARMAP_TYPE charmap_values(char in){ EKHTML_CHARMAP_TYPE res = 0; if(valid_tagname(in)) res |= EKHTML_CHAR_TAGNAME; if(valid_whitespace(in)) res |= EKHTML_CHAR_WHITESPACE; if(valid_begattrname(in)) res |= EKHTML_CHAR_BEGATTRNAME; if(valid_attrname(in)) res |= EKHTML_CHAR_ATTRNAME; if(valid_attrvalue(in)) res |= EKHTML_CHAR_ATTRVALUE; return res; } #define EKHTML_STRINGIFY(x) #x static void print_charmap(char *name, EKHTML_CHARMAP_TYPE (*cmap_func)(char)){ int ch; char sbuf[256]; sprintf(sbuf, "0x%%0%dx ", (int)EKHTML_CHARMAP_LEN * 2); printf("#ifdef EKHTML_USE_TABLES\n"); printf("const %s %s[256] = {\n", EKHTML_CHARMAP_TYPE_S, name); for(ch=0; ch < 256; ch++){ printf(sbuf, cmap_func((char)ch)); if(isgraph(ch)) printf("/* '%c' */", ch); else printf("/* 0x%.2x */", ch); printf(", "); if(!((ch + 1) % 4)) printf("\n"); } printf("};\n\n"); printf("#else\n"); printf("extern const %s %s[256];\n", EKHTML_CHARMAP_TYPE_S, name); printf("#endif\n"); } int main(int argc, char *argv[]){ printf("#ifndef EKHTML_MKTABLES_DOT_H\n"); printf("#define EKHTML_MKTABLES_DOT_H\n"); print_charmap("EKCMap_CharMap", charmap_values); print_charmap("EKCMap_EKState", ekhtml_state); printf("\n#endif\n"); return 0; }