summaryrefslogtreecommitdiff
path: root/ekhtml/src
diff options
context:
space:
mode:
authoradmin@progandy.co.cc <admin@progandy.co.cc@eced67a3-f377-a0ae-92ae-d6de1850b05a>2010-09-13 18:11:13 +0000
committeradmin@progandy.co.cc <admin@progandy.co.cc@eced67a3-f377-a0ae-92ae-d6de1850b05a>2010-09-13 18:11:13 +0000
commit376594ac1d65cbb31165f5a74775d624c3fd2981 (patch)
treeb1f9bf2ee2f1a82c5d9378c03cf09f72535c001d /ekhtml/src
parent99ef7f1cfcaa4ead4b860faec88754abbe31ebfc (diff)
- added basic html conversion for receiving (maybe a bit too powerful)
- some bugfixes git-svn-id: http://mirotr.googlecode.com/svn/trunk@12 eced67a3-f377-a0ae-92ae-d6de1850b05a
Diffstat (limited to 'ekhtml/src')
-rw-r--r--ekhtml/src/.deps/ekhtml.Plo1
-rw-r--r--ekhtml/src/.deps/ekhtml_comment.Plo1
-rw-r--r--ekhtml/src/.deps/ekhtml_data.Plo1
-rw-r--r--ekhtml/src/.deps/ekhtml_endtag.Plo1
-rw-r--r--ekhtml/src/.deps/ekhtml_mktables.Po1
-rw-r--r--ekhtml/src/.deps/ekhtml_special.Plo1
-rw-r--r--ekhtml/src/.deps/ekhtml_starttag.Plo1
-rw-r--r--ekhtml/src/.deps/ekhtml_util.Plo1
-rw-r--r--ekhtml/src/.deps/hash.Plo1
-rw-r--r--ekhtml/src/Makefile402
-rw-r--r--ekhtml/src/Makefile.am22
-rw-r--r--ekhtml/src/Makefile.in402
-rw-r--r--ekhtml/src/ekhtml.c394
-rw-r--r--ekhtml/src/ekhtml_comment.c119
-rw-r--r--ekhtml/src/ekhtml_data.c61
-rw-r--r--ekhtml/src/ekhtml_endtag.c129
-rw-r--r--ekhtml/src/ekhtml_mktables.c141
-rw-r--r--ekhtml/src/ekhtml_special.c72
-rw-r--r--ekhtml/src/ekhtml_starttag.c390
-rw-r--r--ekhtml/src/ekhtml_util.c55
-rw-r--r--ekhtml/src/hash.c1035
21 files changed, 3231 insertions, 0 deletions
diff --git a/ekhtml/src/.deps/ekhtml.Plo b/ekhtml/src/.deps/ekhtml.Plo
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/ekhtml/src/.deps/ekhtml.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/ekhtml/src/.deps/ekhtml_comment.Plo b/ekhtml/src/.deps/ekhtml_comment.Plo
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/ekhtml/src/.deps/ekhtml_comment.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/ekhtml/src/.deps/ekhtml_data.Plo b/ekhtml/src/.deps/ekhtml_data.Plo
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/ekhtml/src/.deps/ekhtml_data.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/ekhtml/src/.deps/ekhtml_endtag.Plo b/ekhtml/src/.deps/ekhtml_endtag.Plo
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/ekhtml/src/.deps/ekhtml_endtag.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/ekhtml/src/.deps/ekhtml_mktables.Po b/ekhtml/src/.deps/ekhtml_mktables.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/ekhtml/src/.deps/ekhtml_mktables.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/ekhtml/src/.deps/ekhtml_special.Plo b/ekhtml/src/.deps/ekhtml_special.Plo
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/ekhtml/src/.deps/ekhtml_special.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/ekhtml/src/.deps/ekhtml_starttag.Plo b/ekhtml/src/.deps/ekhtml_starttag.Plo
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/ekhtml/src/.deps/ekhtml_starttag.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/ekhtml/src/.deps/ekhtml_util.Plo b/ekhtml/src/.deps/ekhtml_util.Plo
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/ekhtml/src/.deps/ekhtml_util.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/ekhtml/src/.deps/hash.Plo b/ekhtml/src/.deps/hash.Plo
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/ekhtml/src/.deps/hash.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/ekhtml/src/Makefile b/ekhtml/src/Makefile
new file mode 100644
index 0000000..3739820
--- /dev/null
+++ b/ekhtml/src/Makefile
@@ -0,0 +1,402 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# Generated automatically from Makefile.in by configure.
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+SHELL = /bin/sh
+
+srcdir = .
+top_srcdir = ..
+
+prefix = /usr/local
+exec_prefix = ${prefix}
+
+bindir = ${exec_prefix}/bin
+sbindir = ${exec_prefix}/sbin
+libexecdir = ${exec_prefix}/libexec
+datadir = ${prefix}/share
+sysconfdir = ${prefix}/etc
+sharedstatedir = ${prefix}/com
+localstatedir = ${prefix}/var
+libdir = ${exec_prefix}/lib
+infodir = ${prefix}/info
+mandir = ${prefix}/man
+includedir = ${prefix}/include
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/ekhtml
+pkglibdir = $(libdir)/ekhtml
+pkgincludedir = $(includedir)/ekhtml
+top_builddir = ..
+
+ACLOCAL = ${SHELL} /z/missing --run aclocal-1.6
+AUTOCONF = ${SHELL} /z/missing --run autoconf
+AUTOMAKE = ${SHELL} /z/missing --run automake-1.6
+AUTOHEADER = ${SHELL} /z/missing --run autoheader
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = /bin/install -c
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_DATA = ${INSTALL} -m 644
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = ${INSTALL}
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = s,x,x,
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias =
+host_triplet = i686-pc-mingw32
+
+EXEEXT = .exe
+OBJEXT = o
+PATH_SEPARATOR = :
+AMTAR = ${SHELL} /z/missing --run tar
+AS = @AS@
+AWK = gawk
+CC = gcc
+DEPDIR = .deps
+DLLTOOL = @DLLTOOL@
+ECHO = echo
+INSTALL_STRIP_PROGRAM = ${SHELL} $(install_sh) -c -s
+LIBTOOL = $(SHELL) $(top_builddir)/libtool
+LN_S = ln -s
+MAINT = #
+OBJDUMP = @OBJDUMP@
+PACKAGE = ekhtml
+RANLIB = ranlib
+STRIP = strip
+VERSION = 0.3.2
+am__include = include
+am__quote =
+install_sh = /z/install-sh
+libekhtml_la_SOURCES = \
+ ekhtml.c \
+ ekhtml_comment.c \
+ ekhtml_data.c \
+ ekhtml_endtag.c \
+ ekhtml_special.c \
+ ekhtml_starttag.c \
+ ekhtml_util.c \
+ hash.c
+
+
+libekhtml_la_DEPENDENCIES = ekhtml_tables.h
+lib_LTLIBRARIES = libekhtml.la
+noinst_PROGRAMS = ekhtml_mktables
+
+ekhtml_mktables_SOURCES = ekhtml_mktables.c
+
+CLEANFILES = ekhtml_tables.h
+subdir = src
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/include/ekhtml_config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(lib_LTLIBRARIES)
+
+libekhtml_la_LDFLAGS =
+libekhtml_la_LIBADD =
+am_libekhtml_la_OBJECTS = ekhtml.lo ekhtml_comment.lo ekhtml_data.lo \
+ ekhtml_endtag.lo ekhtml_special.lo ekhtml_starttag.lo \
+ ekhtml_util.lo hash.lo
+libekhtml_la_OBJECTS = $(am_libekhtml_la_OBJECTS)
+noinst_PROGRAMS = ekhtml_mktables$(EXEEXT)
+PROGRAMS = $(noinst_PROGRAMS)
+
+am_ekhtml_mktables_OBJECTS = ekhtml_mktables.$(OBJEXT)
+ekhtml_mktables_OBJECTS = $(am_ekhtml_mktables_OBJECTS)
+ekhtml_mktables_LDADD = $(LDADD)
+ekhtml_mktables_DEPENDENCIES =
+ekhtml_mktables_LDFLAGS =
+
+DEFS = -DHAVE_CONFIG_H
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include
+CPPFLAGS =
+LDFLAGS =
+LIBS =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+DEP_FILES = ./$(DEPDIR)/ekhtml.Plo \
+ ./$(DEPDIR)/ekhtml_comment.Plo \
+ ./$(DEPDIR)/ekhtml_data.Plo \
+ ./$(DEPDIR)/ekhtml_endtag.Plo \
+ ./$(DEPDIR)/ekhtml_mktables.Po \
+ ./$(DEPDIR)/ekhtml_special.Plo \
+ ./$(DEPDIR)/ekhtml_starttag.Plo \
+ ./$(DEPDIR)/ekhtml_util.Plo ./$(DEPDIR)/hash.Plo
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+ $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+CFLAGS = -g -O2
+DIST_SOURCES = $(libekhtml_la_SOURCES) $(ekhtml_mktables_SOURCES)
+DIST_COMMON = Makefile.am Makefile.in
+SOURCES = $(libekhtml_la_SOURCES) $(ekhtml_mktables_SOURCES)
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: # Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu src/Makefile
+Makefile: # $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+libLTLIBRARIES_INSTALL = $(INSTALL)
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(libdir)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f"; \
+ $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f; \
+ else :; fi; \
+ done
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ p="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \
+ $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \
+ done
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test -z "$dir" && dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libekhtml.la: $(libekhtml_la_OBJECTS) $(libekhtml_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libekhtml_la_LDFLAGS) $(libekhtml_la_OBJECTS) $(libekhtml_la_LIBADD) $(LIBS)
+
+clean-noinstPROGRAMS:
+ @list='$(noinst_PROGRAMS)'; for p in $$list; do \
+ f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f $$p $$f"; \
+ rm -f $$p $$f ; \
+ done
+ekhtml_mktables$(EXEEXT): $(ekhtml_mktables_OBJECTS) $(ekhtml_mktables_DEPENDENCIES)
+ @rm -f ekhtml_mktables$(EXEEXT)
+ $(LINK) $(ekhtml_mktables_LDFLAGS) $(ekhtml_mktables_OBJECTS) $(ekhtml_mktables_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT) core *.core
+
+distclean-compile:
+ -rm -f *.tab.c
+
+include ./$(DEPDIR)/ekhtml.Plo
+include ./$(DEPDIR)/ekhtml_comment.Plo
+include ./$(DEPDIR)/ekhtml_data.Plo
+include ./$(DEPDIR)/ekhtml_endtag.Plo
+include ./$(DEPDIR)/ekhtml_mktables.Po
+include ./$(DEPDIR)/ekhtml_special.Plo
+include ./$(DEPDIR)/ekhtml_starttag.Plo
+include ./$(DEPDIR)/ekhtml_util.Plo
+include ./$(DEPDIR)/hash.Plo
+
+distclean-depend:
+ -rm -rf ./$(DEPDIR)
+
+.c.o:
+ source='$<' object='$@' libtool=no \
+ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' \
+ $(CCDEPMODE) $(depcomp) \
+ $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$<
+
+.c.obj:
+ source='$<' object='$@' libtool=no \
+ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' \
+ $(CCDEPMODE) $(depcomp) \
+ $(COMPILE) -c `cygpath -w $<`
+
+.c.lo:
+ source='$<' object='$@' libtool=yes \
+ depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' \
+ $(CCDEPMODE) $(depcomp) \
+ $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+CCDEPMODE = depmode=gcc3
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+
+ETAGS = etags
+ETAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ @list='$(DISTFILES)'; for file in $$list; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(PROGRAMS)
+
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(libdir)
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+ -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+ clean-noinstPROGRAMS mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-compile distclean-depend \
+ distclean-generic distclean-libtool distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES
+
+.PHONY: GTAGS all all-am check check-am clean clean-generic \
+ clean-libLTLIBRARIES clean-libtool clean-noinstPROGRAMS \
+ distclean distclean-compile distclean-depend distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am info \
+ info-am install install-am install-data install-data-am \
+ install-exec install-exec-am install-info install-info-am \
+ install-libLTLIBRARIES install-man install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool tags uninstall \
+ uninstall-am uninstall-info-am uninstall-libLTLIBRARIES
+
+
+ekhtml.c: ekhtml_tables.h
+
+ekhtml_tables.h: ekhtml_mktables
+ ./ekhtml_mktables > $@
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/ekhtml/src/Makefile.am b/ekhtml/src/Makefile.am
new file mode 100644
index 0000000..6fcdaf8
--- /dev/null
+++ b/ekhtml/src/Makefile.am
@@ -0,0 +1,22 @@
+libekhtml_la_SOURCES = \
+ ekhtml.c \
+ ekhtml_comment.c \
+ ekhtml_data.c \
+ ekhtml_endtag.c \
+ ekhtml_special.c \
+ ekhtml_starttag.c \
+ ekhtml_util.c \
+ hash.c
+
+libekhtml_la_DEPENDENCIES = ekhtml_tables.h
+lib_LTLIBRARIES = libekhtml.la
+noinst_PROGRAMS = ekhtml_mktables
+
+ekhtml_mktables_SOURCES = ekhtml_mktables.c
+
+ekhtml.c: ekhtml_tables.h
+
+ekhtml_tables.h: ekhtml_mktables
+ ./ekhtml_mktables > $@
+
+CLEANFILES = ekhtml_tables.h
diff --git a/ekhtml/src/Makefile.in b/ekhtml/src/Makefile.in
new file mode 100644
index 0000000..c5147f0
--- /dev/null
+++ b/ekhtml/src/Makefile.in
@@ -0,0 +1,402 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = @program_transform_name@
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+AMTAR = @AMTAR@
+AS = @AS@
+AWK = @AWK@
+CC = @CC@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+ECHO = @ECHO@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+MAINT = @MAINT@
+OBJDUMP = @OBJDUMP@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+STRIP = @STRIP@
+VERSION = @VERSION@
+am__include = @am__include@
+am__quote = @am__quote@
+install_sh = @install_sh@
+libekhtml_la_SOURCES = \
+ ekhtml.c \
+ ekhtml_comment.c \
+ ekhtml_data.c \
+ ekhtml_endtag.c \
+ ekhtml_special.c \
+ ekhtml_starttag.c \
+ ekhtml_util.c \
+ hash.c
+
+
+libekhtml_la_DEPENDENCIES = ekhtml_tables.h
+lib_LTLIBRARIES = libekhtml.la
+noinst_PROGRAMS = ekhtml_mktables
+
+ekhtml_mktables_SOURCES = ekhtml_mktables.c
+
+CLEANFILES = ekhtml_tables.h
+subdir = src
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/include/ekhtml_config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(lib_LTLIBRARIES)
+
+libekhtml_la_LDFLAGS =
+libekhtml_la_LIBADD =
+am_libekhtml_la_OBJECTS = ekhtml.lo ekhtml_comment.lo ekhtml_data.lo \
+ ekhtml_endtag.lo ekhtml_special.lo ekhtml_starttag.lo \
+ ekhtml_util.lo hash.lo
+libekhtml_la_OBJECTS = $(am_libekhtml_la_OBJECTS)
+noinst_PROGRAMS = ekhtml_mktables$(EXEEXT)
+PROGRAMS = $(noinst_PROGRAMS)
+
+am_ekhtml_mktables_OBJECTS = ekhtml_mktables.$(OBJEXT)
+ekhtml_mktables_OBJECTS = $(am_ekhtml_mktables_OBJECTS)
+ekhtml_mktables_LDADD = $(LDADD)
+ekhtml_mktables_DEPENDENCIES =
+ekhtml_mktables_LDFLAGS =
+
+DEFS = @DEFS@
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/ekhtml.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_comment.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_data.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_endtag.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_mktables.Po \
+@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_special.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_starttag.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_util.Plo ./$(DEPDIR)/hash.Plo
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+ $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+CFLAGS = @CFLAGS@
+DIST_SOURCES = $(libekhtml_la_SOURCES) $(ekhtml_mktables_SOURCES)
+DIST_COMMON = Makefile.am Makefile.in
+SOURCES = $(libekhtml_la_SOURCES) $(ekhtml_mktables_SOURCES)
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu src/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+libLTLIBRARIES_INSTALL = $(INSTALL)
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(libdir)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f"; \
+ $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f; \
+ else :; fi; \
+ done
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ p="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \
+ $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \
+ done
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test -z "$dir" && dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libekhtml.la: $(libekhtml_la_OBJECTS) $(libekhtml_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libekhtml_la_LDFLAGS) $(libekhtml_la_OBJECTS) $(libekhtml_la_LIBADD) $(LIBS)
+
+clean-noinstPROGRAMS:
+ @list='$(noinst_PROGRAMS)'; for p in $$list; do \
+ f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f $$p $$f"; \
+ rm -f $$p $$f ; \
+ done
+ekhtml_mktables$(EXEEXT): $(ekhtml_mktables_OBJECTS) $(ekhtml_mktables_DEPENDENCIES)
+ @rm -f ekhtml_mktables$(EXEEXT)
+ $(LINK) $(ekhtml_mktables_LDFLAGS) $(ekhtml_mktables_OBJECTS) $(ekhtml_mktables_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT) core *.core
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_comment.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_data.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_endtag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_mktables.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_special.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_starttag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hash.Plo@am__quote@
+
+distclean-depend:
+ -rm -rf ./$(DEPDIR)
+
+.c.o:
+@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$<
+
+.c.obj:
+@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(COMPILE) -c `cygpath -w $<`
+
+.c.lo:
+@AMDEP_TRUE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+CCDEPMODE = @CCDEPMODE@
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+
+ETAGS = etags
+ETAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ @list='$(DISTFILES)'; for file in $$list; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(PROGRAMS)
+
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(libdir)
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+ -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+ clean-noinstPROGRAMS mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-compile distclean-depend \
+ distclean-generic distclean-libtool distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES
+
+.PHONY: GTAGS all all-am check check-am clean clean-generic \
+ clean-libLTLIBRARIES clean-libtool clean-noinstPROGRAMS \
+ distclean distclean-compile distclean-depend distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am info \
+ info-am install install-am install-data install-data-am \
+ install-exec install-exec-am install-info install-info-am \
+ install-libLTLIBRARIES install-man install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool tags uninstall \
+ uninstall-am uninstall-info-am uninstall-libLTLIBRARIES
+
+
+ekhtml.c: ekhtml_tables.h
+
+ekhtml_tables.h: ekhtml_mktables
+ ./ekhtml_mktables > $@
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/ekhtml/src/ekhtml.c b/ekhtml/src/ekhtml.c
new file mode 100644
index 0000000..f369746
--- /dev/null
+++ b/ekhtml/src/ekhtml.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2002, Jon Travis
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * ekhtml: The El-Kabong HTML parser
+ * by Jon Travis (jtravis@p00p.org)
+ *
+ * El-Kabong: A speedy, yet forgiving, SAX-stylee HTML parser.
+ *
+ * The idea behind this parser is for it to use very little memory, and still
+ * be very speedy, while forgiving poorly written HTML.
+
+ * The internals of the parser consist of a small memory buffer which is able
+ * to grow when not enough information is known to correctly parse a tag.
+ * Given the typical layout of HTML, 4k should be plenty.
+ *
+ * The main state engine loops through this internal buffer, determining what
+ * the next state should be. Once this is known, it passes off a segment to
+ * the state handlers (starttag, endtag, etc.) to process. The segment
+ * handlers and the main state engine communicate via a few variables. These
+ * variables indicate whether or not the main engine should switch state,
+ * or successfully remove some data, etc. The segment handlers are
+ * guaranteed the same starting data (though not the same pointer) on each
+ * invocation until the state is changed. Thus, the segment handlers cannot
+ * use pointers into the main buffer -- they must use offsets.
+ *
+ * Some of the speed is gained from using character map data found in
+ * ekhtml_tables.h. I don't have any empirical data for this yet --
+ * it only sounds like it would be faster.. ;-)
+ *
+ * I'm always looking for ways to clean && speed up this code. Feel free
+ * to give feedback -- JMT
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include "ekhtml_config.h"
+#include "ekhtml.h"
+#define EKHTML_USE_TABLES
+#include "ekhtml_tables.h"
+#define EKHTML_USE_PRIVATE
+#include "ekhtml_private.h"
+
+#ifndef MIN
+#define MIN(a,b) (((a)<(b)) ? (a) : (b))
+#endif
+
+
+/*
+ * ekhtml_buffer_grow: Grow the parser's internal buffer by a blocksize.
+ * NOTE: Calling the function has the potential to
+ * change the data buffer location. Do
+ * not rely on it's location!
+ *
+ * Arguments: Parser = Parser to grow
+ */
+
+static void ekhtml_buffer_grow(ekhtml_parser_t *parser){
+ size_t newsize;
+ char *newbuf;
+
+ newsize = parser->nalloced + EKHTML_BLOCKSIZE;
+
+ if((newbuf = realloc(parser->buf, newsize)) == NULL){
+ fprintf(stderr, "BAD! Can't allocate %d bytes in ekhtml_buffer_grow\n",
+ newsize);
+ fflush(stderr); /* Just in case someone changes the buffering scheme */
+ }
+
+ parser->buf = newbuf;
+ parser->nalloced = newsize;
+}
+
+/*
+ * parser_state_determine: Determine the next state that the main parser
+ * should have, by investigating up to the first
+ * 4 characters in the buffer.
+ *
+ * Arguments: startp = Starting data pointer
+ * endp = Pointer to first byte of 'out of range' data
+ *
+ * Return values: Returns one of EKHTML_STATE_* indicating the state that
+ * was found.
+ *
+ */
+
+static inline
+int parser_state_determine(const char *startp, const char *endp){
+ const char *firstchar;
+ int newstate;
+
+ assert(startp != endp);
+
+ if(*startp != '<')
+ return EKHTML_STATE_INDATA;
+
+ firstchar = startp + 1;
+ if(firstchar == endp)
+ return EKHTML_STATE_NONE;
+
+ newstate = EKCMap_EKState[(unsigned char)*firstchar];
+ if(newstate == EKHTML_STATE_NONE){
+ if(firstchar + 2 >= endp) /* Not enough data to evaluate */
+ return EKHTML_STATE_NONE;
+ if(*(firstchar + 1) == '-' && *(firstchar + 2) == '-')
+ return EKHTML_STATE_COMMENT;
+ else
+ return EKHTML_STATE_SPECIAL;
+ } else
+ return newstate;
+}
+
+
+int ekhtml_parser_flush(ekhtml_parser_t *parser, int flushall){
+ void **state_data = &parser->state.state_data;
+ char *buf = parser->buf, *curp = buf, *endp = buf + parser->nbuf;
+ int badp = -1, tmpstate = parser->state.state, didsomething = 0;
+
+ while(curp != endp){
+ char *workp = curp;
+
+ if(tmpstate == EKHTML_STATE_NONE){
+ tmpstate = parser_state_determine(workp, endp);
+ if(tmpstate == EKHTML_STATE_NONE) /* Not enough data yet */
+ break;
+ }
+
+ if(tmpstate == EKHTML_STATE_INDATA || tmpstate == EKHTML_STATE_BADDATA)
+ curp = ekhtml_parse_data(parser, workp, endp, tmpstate);
+ else if(endp - workp > 2){ /* All tags fall under this catagory */
+ switch(tmpstate){
+ case EKHTML_STATE_ENDTAG:
+ curp = ekhtml_parse_endtag(parser, state_data,
+ workp, endp, &badp);
+ break;
+ case EKHTML_STATE_STARTTAG:
+ curp = ekhtml_parse_starttag(parser, state_data,
+ workp, endp, &badp);
+ break;
+ case EKHTML_STATE_COMMENT:
+ curp = ekhtml_parse_comment(parser, state_data,
+ workp, endp, &badp);
+ break;
+ case EKHTML_STATE_SPECIAL:
+ curp = ekhtml_parse_special(parser, state_data,
+ workp, endp, &badp);
+ break;
+ default:
+ assert(!"Unimplemented state");
+ }
+ } else {
+ curp = NULL; /* Not enough data, keep going */
+ }
+
+ /* If one of the parsers said the data was bad, reset the state */
+ if(badp != -1){
+ tmpstate = badp;
+ badp = -1;
+ }
+
+ if(curp == NULL){ /* State needed more data, so break out */
+ curp = workp;
+ break;
+ }
+
+ if(workp != curp){ /* state backend cleared up some data */
+ didsomething = 1;
+ tmpstate = EKHTML_STATE_NONE;
+ assert(*state_data == NULL);
+ }
+ }
+
+ if(flushall){
+ /* Flush whatever we didn't use */
+ if(parser->datacb){
+ ekhtml_string_t str;
+
+ str.str = curp;
+ str.len = endp - curp;
+ parser->datacb(parser->cbdata, &str);
+ }
+ curp = endp;
+ didsomething = 1;
+ tmpstate = EKHTML_STATE_NONE; /* Clean up to an unknown state */
+ *state_data = NULL;
+ }
+
+ parser->state.state = tmpstate;
+
+ if(didsomething){
+ /* Shuffle the data back, based on where we ended up */
+ parser->nbuf -= curp - buf;
+ if(endp - curp){ /* If there's still any data to move */
+ memmove(buf, curp, endp - curp);
+ }
+ }
+ return didsomething;
+}
+
+void ekhtml_parser_feed(ekhtml_parser_t *parser, ekhtml_string_t *str){
+ size_t nfed = 0;
+
+ while(nfed != str->len){
+ size_t tocopy;
+
+ /* First see how much we can fill up our internal buffer */
+ tocopy = MIN(parser->nalloced - parser->nbuf, str->len - nfed);
+ memcpy(parser->buf + parser->nbuf, str->str + nfed, tocopy);
+ nfed += tocopy;
+ parser->nbuf += tocopy;
+ if(parser->nalloced == parser->nbuf){
+ /* Process the buffer */
+ if(!ekhtml_parser_flush(parser, 0)){
+ /* If we didn't actually process anything, grow our buffer */
+ ekhtml_buffer_grow(parser);
+ }
+ }
+ }
+}
+
+void ekhtml_parser_datacb_set(ekhtml_parser_t *parser, ekhtml_data_cb_t cb){
+ parser->datacb = cb;
+}
+
+void ekhtml_parser_commentcb_set(ekhtml_parser_t *parser, ekhtml_data_cb_t cb){
+ parser->commentcb = cb;
+}
+
+void ekhtml_parser_cbdata_set(ekhtml_parser_t *parser, void *cbdata){
+ parser->cbdata = cbdata;
+}
+
+static void
+ekhtml_parser_startendcb_add(ekhtml_parser_t *parser, const char *tag,
+ ekhtml_starttag_cb_t startcb,
+ ekhtml_endtag_cb_t endcb,
+ int isStart)
+{
+ ekhtml_tag_container *cont;
+ ekhtml_string_t lookup_str;
+ char *newtag, *cp;
+ unsigned int taglen;
+ hnode_t *hn;
+
+ if(!tag){
+ if(isStart)
+ parser->startcb_unk = startcb;
+ else
+ parser->endcb_unk = endcb;
+ return;
+ }
+
+
+ newtag = _strdup(tag);
+ for(cp=newtag; *cp; cp++)
+ *cp = toupper(*cp);
+
+ taglen = cp - newtag;
+
+ /* First see if the container already exists */
+ lookup_str.str = newtag;
+ lookup_str.len = taglen;
+
+ if((hn = hash_lookup(parser->startendcb, &lookup_str))){
+ cont = hnode_get(hn);
+ free(newtag);
+ if(isStart)
+ cont->startfunc = startcb;
+ else
+ cont->endfunc = endcb;
+ } else {
+ ekhtml_string_t *set_str;
+
+ cont = malloc(sizeof(*cont));
+ if(isStart){
+ cont->startfunc = startcb;
+ cont->endfunc = NULL;
+ } else {
+ cont->startfunc = NULL;
+ cont->endfunc = endcb;
+ }
+ set_str = malloc(sizeof(*set_str));
+ *set_str = lookup_str;
+ hash_alloc_insert(parser->startendcb, set_str, cont);
+ }
+}
+
+void ekhtml_parser_startcb_add(ekhtml_parser_t *parser, const char *tag,
+ ekhtml_starttag_cb_t cback)
+{
+ ekhtml_parser_startendcb_add(parser, tag, cback, NULL, 1);
+}
+
+void ekhtml_parser_endcb_add(ekhtml_parser_t *parser, const char *tag,
+ ekhtml_endtag_cb_t cback)
+{
+ ekhtml_parser_startendcb_add(parser, tag, NULL, cback, 0);
+}
+
+
+static hash_val_t ekhtml_string_hash(const void *key){
+ const ekhtml_string_t *s = key;
+ hash_val_t res = 5381;
+ const char *str = s->str;
+ size_t len = s->len;
+ int c;
+
+ while(len--){
+ c = str[len];
+ res = ((res << 5) + res) + c; /* res * 33 + c */
+ }
+ return res;
+}
+
+static int ekhtml_string_comp(const void *key1, const void *key2){
+ const ekhtml_string_t *s1 = key1, *s2 = key2;
+
+ if(s1->len == s2->len)
+ return memcmp(s1->str, s2->str, s1->len);
+ return 1;
+}
+
+void ekhtml_parser_destroy(ekhtml_parser_t *ekparser){
+ hnode_t *hn;
+ hscan_t hs;
+
+ hash_scan_begin(&hs, ekparser->startendcb);
+ while((hn = hash_scan_next(&hs))){
+ ekhtml_string_t *key = (ekhtml_string_t *)hnode_getkey(hn);
+ ekhtml_tag_container *cont = hnode_get(hn);
+
+ hash_scan_delete(ekparser->startendcb, hn);
+ free((char *)key->str);
+ free(key);
+ free(cont);
+ }
+
+ hash_destroy(ekparser->startendcb);
+ ekhtml_parser_starttag_cleanup(ekparser);
+ free(ekparser->buf);
+ free(ekparser);
+}
+
+ekhtml_parser_t *ekhtml_parser_new(void *cbdata){
+ ekhtml_parser_t *res;
+
+ res = malloc(sizeof(*res));
+ res->datacb = NULL;
+ res->startendcb = hash_create(HASHCOUNT_T_MAX, ekhtml_string_comp,
+ ekhtml_string_hash);
+ res->cbdata = cbdata;
+ res->startcb_unk = NULL;
+ res->endcb_unk = NULL;
+ res->commentcb = NULL;
+ res->buf = NULL;
+ res->nalloced = 0;
+ res->nbuf = 0;
+ res->freeattrs = NULL;
+ res->state.state = EKHTML_STATE_NONE;
+ res->state.state_data = NULL;
+
+ /* Start out with a buffer of 1 block size */
+ ekhtml_buffer_grow(res);
+ return res;
+}
+
diff --git a/ekhtml/src/ekhtml_comment.c b/ekhtml/src/ekhtml_comment.c
new file mode 100644
index 0000000..e37cc48
--- /dev/null
+++ b/ekhtml/src/ekhtml_comment.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2002, Jon Travis
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * ekhtml_comment.c: Comment tag processor for El-Kabong.
+ *
+ * The comment processor is a pretty simple piece of machinery. It
+ * relies that the first 4 characters are '<!--'. It then searches
+ * for two adjacent dashes '--' followed by optional whitespace,
+ * followed by a '>'.
+ */
+
+#include <string.h>
+#include <assert.h>
+
+#include "ekhtml_config.h"
+#include "ekhtml.h"
+#include "ekhtml_tables.h"
+#define EKHTML_USE_PRIVATE
+#include "ekhtml_private.h"
+
+char *ekhtml_parse_comment(ekhtml_parser_t *parser, void **state_data,
+ const char *curp, const char *endp,
+ int *baddata)
+{
+ ekhtml_comment_state *comstate = *state_data;
+ int *offset = &parser->state.offset;
+ const char *workp;
+
+ assert(*curp == '<' && *(curp + 1) == '!' && *(curp + 2) == '-');
+ assert(*(curp + 3) == '-' && endp - curp >= 4);
+
+ if(comstate == NULL){ /* Only called the first time the tag is started */
+ comstate = &parser->commentstate;
+ comstate->dashes = 0;
+ comstate->lastdash = 0;
+ *state_data = comstate;
+ *offset = sizeof("<!--") - 1;
+ }
+
+ workp = curp + *offset;
+ while(workp != endp){
+ if(comstate->dashes == 0){ /* Still on the quest for the double dash*/
+ /* XXX -- Searching for '--' could be faster, doing
+ multibyte searching, or something similar */
+ for(; workp < endp - 1; workp += 2){
+ if(*workp == '-')
+ break;
+ }
+
+ if(!(workp < endp - 1)){
+ *offset = endp - 1 - curp;
+ return NULL;
+ }
+
+ if((*(workp - 1) == '-') &&
+ (workp - curp) > (sizeof("<!--") - 1))
+ {
+ comstate->lastdash = workp - 1 - curp;
+ comstate->dashes = 1;
+ } else if(*(workp + 1) == '-'){
+ comstate->lastdash = workp - curp;
+ comstate->dashes = 1;
+ } else {
+ /* Else, a lone dash -- how sad, lonely dash.. ;-) */
+ }
+ workp++;
+ continue;
+ }
+
+ /* At this point we have the double dash. Search through whitespace */
+ workp = ekhtml_find_notcharsmap(workp, endp - workp, EKCMap_CharMap,
+ EKHTML_CHAR_WHITESPACE);
+ if(workp == endp){/* Reached end of the data without finding the '>' */
+ *offset = workp - curp;
+ return NULL;
+ }
+
+ if(*workp == '>'){
+ if(parser->commentcb){
+ ekhtml_string_t str;
+
+ str.str = curp + 4;
+ str.len = comstate->lastdash - 4;
+ parser->commentcb(parser->cbdata, &str);
+ }
+ *state_data = NULL;
+ return (char *)workp + 1;
+ } else {
+ comstate->dashes = 0;
+ }
+ }
+
+ *offset = workp - curp;
+ return NULL; /* Not done yet */
+}
diff --git a/ekhtml/src/ekhtml_data.c b/ekhtml/src/ekhtml_data.c
new file mode 100644
index 0000000..2b1d6c8
--- /dev/null
+++ b/ekhtml/src/ekhtml_data.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2002, Jon Travis
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <assert.h>
+
+#include "ekhtml_config.h"
+#include "ekhtml.h"
+#include "ekhtml_tables.h"
+#define EKHTML_USE_PRIVATE
+#include "ekhtml_private.h"
+
+char *ekhtml_parse_data(ekhtml_parser_t *parser, const char *curp,
+ const char *endp, int curstate)
+{
+ const char *resp, *startp = curp;
+
+ /* If we are in the data state, we can absorb everything up to a
+ '<' sign */
+
+ if(curstate == EKHTML_STATE_BADDATA){
+ /* This state signifies that there was some bad-data involved,
+ skip over the first '<' sign, and take it as raw data */
+ assert(*curp == '<');
+ startp++;
+ }
+ resp = memchr(startp, '<', endp - startp);
+ resp = resp ? resp : endp;
+ if(parser->datacb){
+ ekhtml_string_t str;
+
+ str.str = curp;
+ str.len = resp - curp;
+ parser->datacb(parser->cbdata, &str);
+ }
+ return (char *)resp;
+}
+
diff --git a/ekhtml/src/ekhtml_endtag.c b/ekhtml/src/ekhtml_endtag.c
new file mode 100644
index 0000000..f7095dc
--- /dev/null
+++ b/ekhtml/src/ekhtml_endtag.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2002, Jon Travis
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * ekhtml_endtag.c: Processor for a closing tag '</tag>
+ *
+ * The endtag processor is a very simple processor with a very small
+ * state machine. Like the other tags, it accomodates for malformated
+ * HTML, exchanging a '<' for a '>'. Whitespace trailing the tagname is
+ * ignored, and it might be interesting to note that a tag of </FOO BAR>
+ * will make callbacks with a tag of 'FOO BAR'
+ */
+
+#include <assert.h>
+#include <ctype.h>
+
+#include "ekhtml_config.h"
+#include "ekhtml.h"
+#include "ekhtml_tables.h"
+#define EKHTML_USE_PRIVATE
+#include "ekhtml_private.h"
+
+static void handle_endtag(ekhtml_parser_t *parser, ekhtml_string_t *str){
+ ekhtml_tag_container *container;
+ hnode_t *hn;
+
+ if((hn = hash_lookup(parser->startendcb, str)) &&
+ (container = hnode_get(hn)) &&
+ container->endfunc)
+ {
+ container->endfunc(parser->cbdata, str);
+ } else if(parser->endcb_unk)
+ parser->endcb_unk(parser->cbdata, str);
+}
+
+char *ekhtml_parse_endtag(ekhtml_parser_t *parser, void **state_data,
+ char *curp, char *endp, int *baddata)
+{
+ const char *workp, *arrowp, *upper_tag;
+ ekhtml_endtag_state *endstate = *state_data;
+ int taglen, *offset = &parser->state.offset;
+ ekhtml_string_t str;
+
+ /* Prerequisites for this function are that the first chars are </'
+ and that there are at least 3 bytes of data to work with */
+ assert(*curp == '<' && *(curp + 1) == '/');
+ assert(endp - curp >= 3);
+
+ if(endstate == NULL){ /* Only called the first time a tag is started */
+ const char *secondchar = curp + 2;
+
+ /* Initial check to make sure this isn't some bad tag */
+ if(!isalpha(*secondchar)){
+ if(*secondchar != '>' && *secondchar != '<'){
+ /* Bogus tag */
+ *baddata = EKHTML_STATE_BADDATA;
+ return (char *)curp;
+ } else { /* Might as well handle this case while we are here */
+ str.str = "";
+ str.len = 0;
+ handle_endtag(parser, &str);
+ return (char *)(*secondchar == '>' ? secondchar + 1 :
+ secondchar);
+ }
+ }
+
+ /* Store state, since this is the first time we are state-ifying. */
+ endstate = &parser->endstate;
+ endstate->lastchar = 2;
+ *state_data = endstate;
+ *offset = 2;
+ }
+
+ workp = curp + *offset;
+ /* Search for the close tag, or even malformed HTML */
+ for(arrowp=workp;
+ arrowp != endp && *arrowp != '<' && *arrowp != '>';
+ arrowp++)
+ {
+ if(!(EKCMap_CharMap[(unsigned char)*arrowp] & EKHTML_CHAR_WHITESPACE))
+ endstate->lastchar = arrowp - curp;
+ }
+
+ if(arrowp == endp){
+ /* Didn't find the end, so return NULL */
+ *offset = endp - curp;
+ return NULL;
+ }
+
+ /* Found the end. Clear up our state and return the next char that
+ * the parser should process
+ */
+ taglen = endstate->lastchar + 1 - 2;
+ upper_tag = ekhtml_make_upperstr(curp + 2, taglen);
+ str.str = upper_tag;
+ str.len = taglen;
+ handle_endtag(parser, &str);
+ *state_data = NULL;
+ assert(arrowp < endp);
+ if(*arrowp == '<'){ /* Malformed HTML */
+ return (char *)(arrowp);
+ } else {
+ return (char *)(arrowp + 1);
+ }
+}
+
diff --git a/ekhtml/src/ekhtml_mktables.c b/ekhtml/src/ekhtml_mktables.c
new file mode 100644
index 0000000..9a8dea2
--- /dev/null
+++ b/ekhtml/src/ekhtml_mktables.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2002, Jon Travis
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * ekhtml_mktables: A small utility for generating tables of valid characters
+ * for different parts of the HTML parsing, such as tags,
+ * whitespace, etc.
+ *
+ * Each table is described by a function which returns 1
+ * if the character should be in the table, else 0
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+
+#include "ekhtml_config.h"
+#include "ekhtml.h"
+#define EKHTML_USE_PRIVATE
+#include "ekhtml_private.h"
+
+
+/* valid_tagname: Character map for a tagname AFTER the first letter */
+static EKHTML_CHARMAP_TYPE valid_tagname(char in){
+ if(in == '-' || in == '.' || isdigit(in) || isalpha(in))
+ return 1;
+ return 0;
+}
+
+static EKHTML_CHARMAP_TYPE valid_whitespace(char in){
+ return isspace(in) ? 1 : 0;
+}
+
+/* attribute name AFTER the first character */
+static EKHTML_CHARMAP_TYPE valid_attrname(char in){
+ if(in == '_' || valid_tagname(in))
+ return 1;
+ return 0;
+}
+
+/* attribute value */
+static EKHTML_CHARMAP_TYPE valid_attrvalue(char in){
+ if(valid_attrname(in) ||
+ in == '/' || in == ':' || in == '+' || in == '*' ||
+ in == '%' || in == '?' || in == '!' || in == '&' ||
+ in == '(' || in == ')' || in == '#' || in == '=' ||
+ in == '~' || in == ']' || in == '*' || in == '@' ||
+ in == '$' || in == '_')
+ return 1;
+ return 0;
+}
+
+static EKHTML_CHARMAP_TYPE valid_begattrname(char in){
+ return (isalpha(in) || in == '_') ? 1 : 0;
+}
+
+static EKHTML_CHARMAP_TYPE ekhtml_state(char in){
+ if(in == '/')
+ return EKHTML_STATE_ENDTAG;
+ if(isalpha(in))
+ return EKHTML_STATE_STARTTAG;
+ if(in == '!')
+ return EKHTML_STATE_NONE; /* Must be determined by caller */
+ return EKHTML_STATE_BADDATA;
+}
+
+static EKHTML_CHARMAP_TYPE charmap_values(char in){
+ EKHTML_CHARMAP_TYPE res = 0;
+
+ if(valid_tagname(in))
+ res |= EKHTML_CHAR_TAGNAME;
+ if(valid_whitespace(in))
+ res |= EKHTML_CHAR_WHITESPACE;
+ if(valid_begattrname(in))
+ res |= EKHTML_CHAR_BEGATTRNAME;
+ if(valid_attrname(in))
+ res |= EKHTML_CHAR_ATTRNAME;
+ if(valid_attrvalue(in))
+ res |= EKHTML_CHAR_ATTRVALUE;
+ return res;
+}
+
+#define EKHTML_STRINGIFY(x) #x
+
+static void print_charmap(char *name, EKHTML_CHARMAP_TYPE (*cmap_func)(char)){
+ int ch;
+ char sbuf[256];
+
+ sprintf_s(sbuf, 256, "0x%%0%dx ", EKHTML_CHARMAP_LEN * 2);
+ printf("#ifdef EKHTML_USE_TABLES\n");
+ printf("const %s %s[256] = {\n", EKHTML_CHARMAP_TYPE_S, name);
+ for(ch=0; ch < 256; ch++){
+ printf(sbuf, cmap_func((char)ch));
+ if(isgraph(ch))
+ printf("/* '%c' */", ch);
+ else
+ printf("/* 0x%.2x */", ch);
+ printf(", ");
+ if(!((ch + 1) % 4))
+ printf("\n");
+ }
+ printf("};\n\n");
+ printf("#else\n");
+ printf("extern const %s %s[256];\n", EKHTML_CHARMAP_TYPE_S, name);
+ printf("#endif\n");
+}
+
+int main(int argc, char *argv[]){
+ printf("#ifndef EKHTML_MKTABLES_DOT_H\n");
+ printf("#define EKHTML_MKTABLES_DOT_H\n");
+
+ print_charmap("EKCMap_CharMap", charmap_values);
+ print_charmap("EKCMap_EKState", ekhtml_state);
+
+ printf("\n#endif\n");
+ return 0;
+}
diff --git a/ekhtml/src/ekhtml_special.c b/ekhtml/src/ekhtml_special.c
new file mode 100644
index 0000000..f43a62d
--- /dev/null
+++ b/ekhtml/src/ekhtml_special.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2002, Jon Travis
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+
+#include "ekhtml_config.h"
+#include "ekhtml.h"
+#include "ekhtml_tables.h"
+#define EKHTML_USE_PRIVATE
+#include "ekhtml_private.h"
+
+char *ekhtml_parse_special(ekhtml_parser_t *parser, void **state_data,
+ const char *curp, const char *endp,
+ int *baddata)
+{
+ const char *workp;
+ int *offset = &parser->state.offset;
+
+ assert(*curp == '<' && *(curp + 1) == '!');
+
+ if(*state_data == NULL){/* Only called the first time the tag is started */
+ *offset = 2;
+ *state_data = (void *)1; /* Assign it any non-NULL value */
+ }
+
+ for(workp=curp + *offset;workp != endp; workp++)
+ if(*workp == '<' || *workp == '>')
+ break;
+
+ if(workp == endp){
+ /* No end of tag found yet, save state */
+ *offset = endp - curp;
+ return NULL;
+ }
+
+ if(parser->datacb){
+ ekhtml_string_t str;
+
+ str.str = curp;
+ str.len = workp - curp + 1;
+ parser->datacb(parser->cbdata, &str);
+ }
+
+ *state_data = NULL;
+ if(*workp == '<') /* Malformed HTML */
+ return (char *)workp;
+ else
+ return (char *)workp + 1;
+}
diff --git a/ekhtml/src/ekhtml_starttag.c b/ekhtml/src/ekhtml_starttag.c
new file mode 100644
index 0000000..cfd3ea4
--- /dev/null
+++ b/ekhtml/src/ekhtml_starttag.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2002, Jon Travis
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * ekhtml_starttag: Processor for HTML start-tags.
+ *
+ * This starttag processor is the most complicated of the tag processors.
+ * It has it's own small internal state machine which keeps track of what
+ * the next thing it is searching for is.
+ *
+ * SOURCE NOTES: We do a bit of weird hackery with respect to storing
+ * attributes. Since we need to store of OFFSET of the
+ * attribute and values, and cannot store actual pointers
+ * (see ekhtml.c for an explanation as to why), we need
+ * to create a whole new structure to store this info.
+ * BUT, we are just going to allocate a very similar
+ * structure (ekhtml_attr_t) anyway, and fill it in.
+ * SO, we just use the ekhtml_attr_t, and assign 'integer'
+ * values to the pointer. This saves us allocations and
+ * some management issues at the cost of readability.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "ekhtml_config.h"
+#include "ekhtml.h"
+#include "ekhtml_tables.h"
+#define EKHTML_USE_PRIVATE
+#include "ekhtml_private.h"
+
+#define EKHTML_STMODE_TAG 0 /* Finding the tagname */
+#define EKHTML_STMODE_SUCK 1 /* Suck data until the last '<' or '>' */
+#define EKHTML_STMODE_BEGNAME 2 /* Find the beginning of an attribute name */
+#define EKHTML_STMODE_GETNAME 3 /* Get the rest of the attribute name */
+#define EKHTML_STMODE_GETEQUAL 4 /* Find the equals sign */
+#define EKHTML_STMODE_BEGVALUE 5 /* Get the beginning of a attribute value */
+#define EKHTML_STMODE_GETVALUE 6 /* Get the rest of an attribute value */
+
+/*
+ * ekhtml_parser_starttag_cleanup: Cleanup allocated memory, as the
+ * parser object is about to be destroyed
+ *
+ * Arguments: parser = Parser to cleanup
+ *
+ */
+
+void ekhtml_parser_starttag_cleanup(ekhtml_parser_t *parser){
+ ekhtml_attr_t *attr, *next;
+
+ for(attr=parser->freeattrs; attr; attr=next){
+ next = attr->next;
+ free(attr);
+ }
+}
+
+/*
+ * ekhtml_parser_newattr: Get a new unused attribute structure.
+ *
+ * Arguments: parser = Parser to get a new attribute structure for
+ *
+ * Return values: Returns a new attribute structure, which should be
+ * passed to ekhtml_parser_attr_release when the caller
+ * is done using it. The values of the returned attribute
+ * must be initialized by the caller
+ */
+
+static inline
+ekhtml_attr_t *ekhtml_parser_attr_new(ekhtml_parser_t *parser){
+ ekhtml_attr_t *res;
+
+ if(parser->freeattrs == NULL){
+ res = malloc(sizeof(*res));
+ } else {
+ res = parser->freeattrs;
+ /* Remove it from the list */
+ parser->freeattrs = parser->freeattrs->next;
+ }
+ return res;
+}
+
+/*
+ * ekhtml_parser_attr_release: Release use of an attribute previously fetched
+ * via the ekhtml_parser_newattr.
+ *
+ * Arguments: parser = parser to give the attribute back to
+ * attr = Attribute to relinquish use of
+ *
+ */
+
+static inline
+void ekhtml_parser_attr_release(ekhtml_parser_t *parser, ekhtml_attr_t *attr){
+ attr->next = parser->freeattrs;
+ parser->freeattrs = attr;
+}
+
+static void handle_starttag(ekhtml_parser_t *parser, char *curp,
+ ekhtml_starttag_state *sstate)
+{
+ ekhtml_tag_container *container;
+ ekhtml_starttag_cb_t cback = NULL;
+ int taglen = sstate->tagend - 1;
+ ekhtml_string_t str;
+ ekhtml_attr_t *attr;
+ char *upper_str;
+ hnode_t *hn;
+
+ upper_str = ekhtml_make_upperstr(curp + 1, taglen);
+ str.str = upper_str;
+ str.len = taglen;
+
+ if((hn = hash_lookup(parser->startendcb, &str)) &&
+ (container = hnode_get(hn)) &&
+ container->startfunc)
+ {
+ cback = container->startfunc;
+ } else if(parser->startcb_unk)
+ cback = parser->startcb_unk;
+
+ if(!cback)
+ return;
+
+ /* Formulate real attribute callback data from the 'offset'
+ pointer values */
+ for(attr=sstate->attrs;attr;attr=attr->next){
+ attr->name.str = curp + (int)attr->name.str;
+ if(!attr->isBoolean)
+ attr->val.str = curp + (int)attr->val.str;
+ }
+
+ cback(parser->cbdata, &str, sstate->attrs);
+}
+
+static void release_attributes(ekhtml_parser_t *parser,
+ ekhtml_starttag_state *sstate)
+{
+ ekhtml_attr_t *attr, *next;
+
+ if(sstate->curattr)
+ ekhtml_parser_attr_release(parser, sstate->curattr);
+
+ attr = sstate->attrs;
+ while(attr){
+ next = attr->next;
+ ekhtml_parser_attr_release(parser, attr);
+ attr = next;
+ }
+}
+
+static inline void scroll_attribute(ekhtml_starttag_state *sstate){
+ sstate->curattr->next = sstate->attrs;
+ sstate->attrs = sstate->curattr;
+ sstate->curattr = NULL;
+}
+
+
+char *ekhtml_parse_starttag(ekhtml_parser_t *parser, void **state_data,
+ char *curp, char *endp, int *baddata)
+{
+ ekhtml_starttag_state *startstate = *state_data;
+ int *offset = &parser->state.offset;
+ char *workp;
+
+ assert(*curp == '<' && isalpha(*(curp + 1)));
+ assert(endp - curp >= 3);
+
+ if(startstate == NULL){ /* First time the tag is called */
+ startstate = &parser->startstate;
+ startstate->tagend = sizeof("<F") - 1;
+ startstate->mode = EKHTML_STMODE_TAG;
+ startstate->attrs = NULL;
+ startstate->curattr = NULL;
+ startstate->quote = '\0';
+ *state_data = startstate;
+ *offset = startstate->tagend;
+ }
+
+ workp = curp + *offset;
+
+ if(startstate->mode == EKHTML_STMODE_TAG){
+ /* Find that tag! */
+ workp = ekhtml_find_notcharsmap(workp, endp - workp, EKCMap_CharMap,
+ EKHTML_CHAR_TAGNAME);
+ *offset = workp - curp;
+ if(workp == endp)
+ return NULL;
+
+ startstate->tagend = *offset;
+ startstate->mode = EKHTML_STMODE_BEGNAME;
+ }
+
+ while(workp != endp){ /* Main state processing loop */
+ if(startstate->mode == EKHTML_STMODE_BEGNAME){
+ ekhtml_attr_t *attr;
+
+ workp = ekhtml_find_notcharsmap(workp, endp - workp,
+ EKCMap_CharMap,
+ EKHTML_CHAR_WHITESPACE);
+ if(workp == endp)
+ break;
+
+ if(!(EKCMap_CharMap[(unsigned char)*workp] &
+ EKHTML_CHAR_BEGATTRNAME))
+ {
+ /* Bad attrname character */
+ startstate->mode = EKHTML_STMODE_SUCK;
+ } else {
+ assert(startstate->curattr == NULL);
+ /* Valid attribute name, allocate space for it */
+ attr = ekhtml_parser_attr_new(parser);
+ attr->name.str = (char *)NULL + (workp - curp);
+ attr->name.len = 0; /* Will get assigned later */
+ attr->val.str = NULL;
+ attr->val.len = 0;
+ attr->isBoolean = 1;
+ attr->next = NULL;
+ startstate->mode = EKHTML_STMODE_GETNAME;
+ startstate->curattr = attr;
+ }
+ }
+
+ if(startstate->mode == EKHTML_STMODE_GETNAME){
+ workp = ekhtml_find_notcharsmap(workp, endp - workp,
+ EKCMap_CharMap,
+ EKHTML_CHAR_ATTRNAME);
+ if(workp == endp)
+ break;
+
+ /* There be dragons here -- watch out -- see comment @ top
+ of file */
+ startstate->curattr->name.len =
+ workp - (curp + (int)startstate->curattr->name.str);
+ if(*workp == '='){
+ startstate->mode = EKHTML_STMODE_BEGVALUE;
+ workp++; /* Skip the equals sign */
+ } else {
+ if(!(EKCMap_CharMap[(unsigned char)*workp] &
+ EKHTML_CHAR_WHITESPACE))
+ {
+ /* Found something we weren't expecting. Use the current
+ attribute as a boolean value and suck the rest */
+ scroll_attribute(startstate);
+ startstate->mode = EKHTML_STMODE_SUCK;
+ } else
+ startstate->mode = EKHTML_STMODE_GETEQUAL;
+ }
+ }
+
+ if(startstate->mode == EKHTML_STMODE_GETEQUAL){
+ workp = ekhtml_find_notcharsmap(workp, endp - workp,
+ EKCMap_CharMap,
+ EKHTML_CHAR_WHITESPACE);
+ if(workp == endp)
+ break;
+
+ if(*workp != '='){
+ /* Unexpected value. Could either be time to suck, or this was
+ really only a boolean value */
+ scroll_attribute(startstate);
+
+ if(EKCMap_CharMap[(unsigned char)*workp] &
+ EKHTML_CHAR_BEGATTRNAME)
+ {
+ startstate->mode = EKHTML_STMODE_BEGNAME;
+ continue;
+ } else {
+ startstate->mode = EKHTML_STMODE_SUCK;
+ }
+ } else {
+ startstate->mode = EKHTML_STMODE_BEGVALUE;
+ workp++; /* Skip the equals sign */
+ }
+ }
+
+ if(startstate->mode == EKHTML_STMODE_BEGVALUE){
+ workp = ekhtml_find_notcharsmap(workp, endp - workp,
+ EKCMap_CharMap,
+ EKHTML_CHAR_WHITESPACE);
+ if(workp == endp)
+ break;
+
+ startstate->curattr->isBoolean = 0;
+ startstate->curattr->val.str = (char *)NULL + (workp - curp);
+ startstate->quote = '\0';
+ if(*workp == '"' || *workp == '\''){
+ startstate->curattr->val.str++; /* Skip the quote */
+ startstate->mode = EKHTML_STMODE_GETVALUE;
+ startstate->quote = *workp;
+ workp++;
+ } else if(!(EKCMap_CharMap[(unsigned char)*workp] &
+ EKHTML_CHAR_ATTRVALUE))
+ {
+ /* Bad value .. */
+ startstate->curattr->val.len = 0;
+ scroll_attribute(startstate);
+ startstate->mode = EKHTML_STMODE_SUCK;
+ } else {
+ /* Valid value */
+ startstate->mode = EKHTML_STMODE_GETVALUE;
+ }
+ }
+
+ if(startstate->mode == EKHTML_STMODE_GETVALUE){
+ if(startstate->quote){
+ for(;workp != endp && *workp != '>' && *workp != '<'; workp++){
+ if(*workp == startstate->quote){
+ startstate->curattr->val.len =
+ workp - (curp + (int)startstate->curattr->val.str);
+ scroll_attribute(startstate);
+ startstate->mode = EKHTML_STMODE_BEGNAME;
+ workp++; /* Skip the quote */
+ break;
+ }
+ }
+ /* In case we broke out in the above loop, we may
+ need to continue in the main loop -- CONFUSING */
+ if(startstate->mode == EKHTML_STMODE_BEGNAME)
+ continue;
+ } else
+ workp = ekhtml_find_notcharsmap(workp, endp - workp,
+ EKCMap_CharMap,
+ EKHTML_CHAR_ATTRVALUE);
+ if(workp == endp)
+ break;
+
+ startstate->curattr->val.len =
+ workp - (curp + (int)startstate->curattr->val.str);
+ scroll_attribute(startstate);
+
+ if(*workp == '>' || *workp == '<') {
+ *offset = workp - curp;
+ handle_starttag(parser, curp, startstate);
+ release_attributes(parser, startstate);
+ *state_data = NULL;
+ if(*workp == '<')
+ return workp;
+ else
+ return workp + 1;
+ } else {
+ startstate->mode = EKHTML_STMODE_BEGNAME;
+ continue;
+ }
+ }
+
+ if(startstate->mode == EKHTML_STMODE_SUCK){
+ /* The sucking mode is here in case someone puts a bad character
+ in an attribute name. We suck until what looks like end of tag*/
+ for(;workp != endp && *workp != '<' && *workp != '>'; workp++)
+ ;
+ if(workp == endp)
+ break;
+
+ *offset = workp - curp;
+ handle_starttag(parser, curp, startstate);
+ release_attributes(parser, startstate);
+ *state_data = NULL;
+ if(*workp == '<')
+ return workp;
+ else
+ return workp + 1;
+ }
+ }
+
+ *offset = workp - curp;
+ return NULL;
+}
diff --git a/ekhtml/src/ekhtml_util.c b/ekhtml/src/ekhtml_util.c
new file mode 100644
index 0000000..0bc5c1d
--- /dev/null
+++ b/ekhtml/src/ekhtml_util.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2002, Jon Travis
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <ctype.h>
+
+#include "ekhtml_config.h"
+#include "ekhtml.h"
+#include "ekhtml_tables.h"
+#define EKHTML_USE_PRIVATE
+#include "ekhtml_private.h"
+
+
+/*
+ * ekhtml_make_upperstr: Make a new bytestring based on the old one ..
+ * only uppercase.
+ *
+ * Arguments: buf = Buffer containing bytes to 'upper'
+ * len = Length of bytes in buf
+ *
+ * Return values: Capitalizes the string pointed at by 'buf', and returns
+ * 'buf'
+ */
+
+char *ekhtml_make_upperstr(char *buf, int len){
+ char *endp = buf + len, *cp;
+
+ for(cp = buf; cp < endp; cp++)
+ *cp = toupper(*cp);
+ return buf;
+}
+
+
diff --git a/ekhtml/src/hash.c b/ekhtml/src/hash.c
new file mode 100644
index 0000000..95651d4
--- /dev/null
+++ b/ekhtml/src/hash.c
@@ -0,0 +1,1035 @@
+/*
+ * Hash Table Data Type
+ * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net>
+ *
+ * Free Software License:
+ *
+ * All rights are reserved by the author, with the following exceptions:
+ * Permission is granted to freely reproduce and distribute this software,
+ * possibly in exchange for a fee, provided that this copyright notice appears
+ * intact. Permission is also granted to adapt this software to produce
+ * derivative works, as long as the modified versions carry this copyright
+ * notice and additional notices stating that the work has been modified.
+ * This source code may be translated into executable form and incorporated
+ * into proprietary software; there is no requirement for such software to
+ * contain a copyright notice related to this source.
+ *
+ * $Id: hash.c,v 1.1 2002/09/17 02:49:36 jick Exp $
+ * $Name: EKHTML_RELEASE_0_3_2 $
+ */
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <assert.h>
+#include <string.h>
+#define HASH_IMPLEMENTATION
+#include "hash.h"
+
+#ifdef KAZLIB_RCSID
+static const char rcsid[] = "$Id: hash.c,v 1.1 2002/09/17 02:49:36 jick Exp $";
+#endif
+
+#define INIT_BITS 6
+#define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */
+#define INIT_MASK ((INIT_SIZE) - 1)
+
+#define next hash_next
+#define key hash_key
+#define data hash_data
+#define hkey hash_hkey
+
+#define table hash_table
+#define nchains hash_nchains
+#define nodecount hash_nodecount
+#define maxcount hash_maxcount
+#define highmark hash_highmark
+#define lowmark hash_lowmark
+#define compare hash_compare
+#define function hash_function
+#define allocnode hash_allocnode
+#define freenode hash_freenode
+#define context hash_context
+#define mask hash_mask
+#define dynamic hash_dynamic
+
+#define table hash_table
+#define chain hash_chain
+
+static hnode_t *hnode_alloc(void *context);
+static void hnode_free(hnode_t *node, void *context);
+static hash_val_t hash_fun_default(const void *key);
+static int hash_comp_default(const void *key1, const void *key2);
+
+int hash_val_t_bit;
+
+/*
+ * Compute the number of bits in the hash_val_t type. We know that hash_val_t
+ * is an unsigned integral type. Thus the highest value it can hold is a
+ * Mersenne number (power of two, less one). We initialize a hash_val_t
+ * object with this value and then shift bits out one by one while counting.
+ * Notes:
+ * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power
+ * of two. This means that its binary representation consists of all one
+ * bits, and hence ``val'' is initialized to all one bits.
+ * 2. While bits remain in val, we increment the bit count and shift it to the
+ * right, replacing the topmost bit by zero.
+ */
+
+static void compute_bits(void)
+{
+ hash_val_t val = HASH_VAL_T_MAX; /* 1 */
+ int bits = 0;
+
+ while (val) { /* 2 */
+ bits++;
+ val >>= 1;
+ }
+
+ hash_val_t_bit = bits;
+}
+
+/*
+ * Verify whether the given argument is a power of two.
+ */
+
+static int is_power_of_two(hash_val_t arg)
+{
+ if (arg == 0)
+ return 0;
+ while ((arg & 1) == 0)
+ arg >>= 1;
+ return (arg == 1);
+}
+
+/*
+ * Compute a shift amount from a given table size
+ */
+
+static hash_val_t compute_mask(hashcount_t size)
+{
+ assert (is_power_of_two(size));
+ assert (size >= 2);
+
+ return size - 1;
+}
+
+/*
+ * Initialize the table of pointers to null.
+ */
+
+static void clear_table(hash_t *hash)
+{
+ hash_val_t i;
+
+ for (i = 0; i < hash->nchains; i++)
+ hash->table[i] = NULL;
+}
+
+/*
+ * Double the size of a dynamic table. This works as follows. Each chain splits
+ * into two adjacent chains. The shift amount increases by one, exposing an
+ * additional bit of each hashed key. For each node in the original chain, the
+ * value of this newly exposed bit will decide which of the two new chains will
+ * receive the node: if the bit is 1, the chain with the higher index will have
+ * the node, otherwise the lower chain will receive the node. In this manner,
+ * the hash table will continue to function exactly as before without having to
+ * rehash any of the keys.
+ * Notes:
+ * 1. Overflow check.
+ * 2. The new number of chains is twice the old number of chains.
+ * 3. The new mask is one bit wider than the previous, revealing a
+ * new bit in all hashed keys.
+ * 4. Allocate a new table of chain pointers that is twice as large as the
+ * previous one.
+ * 5. If the reallocation was successful, we perform the rest of the growth
+ * algorithm, otherwise we do nothing.
+ * 6. The exposed_bit variable holds a mask with which each hashed key can be
+ * AND-ed to test the value of its newly exposed bit.
+ * 7. Now loop over each chain in the table and sort its nodes into two
+ * chains based on the value of each node's newly exposed hash bit.
+ * 8. The low chain replaces the current chain. The high chain goes
+ * into the corresponding sister chain in the upper half of the table.
+ * 9. We have finished dealing with the chains and nodes. We now update
+ * the various bookeeping fields of the hash structure.
+ */
+
+static void grow_table(hash_t *hash)
+{
+ hnode_t **newtable;
+
+ assert (2 * hash->nchains > hash->nchains); /* 1 */
+
+ newtable = realloc(hash->table,
+ sizeof *newtable * hash->nchains * 2); /* 4 */
+
+ if (newtable) { /* 5 */
+ hash_val_t mask = (hash->mask << 1) | 1; /* 3 */
+ hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */
+ hash_val_t chain;
+
+ assert (mask != hash->mask);
+
+ for (chain = 0; chain < hash->nchains; chain++) { /* 7 */
+ hnode_t *low_chain = 0, *high_chain = 0, *hptr, *next;
+
+ for (hptr = newtable[chain]; hptr != 0; hptr = next) {
+ next = hptr->next;
+
+ if (hptr->hkey & exposed_bit) {
+ hptr->next = high_chain;
+ high_chain = hptr;
+ } else {
+ hptr->next = low_chain;
+ low_chain = hptr;
+ }
+ }
+
+ newtable[chain] = low_chain; /* 8 */
+ newtable[chain + hash->nchains] = high_chain;
+ }
+
+ hash->table = newtable; /* 9 */
+ hash->mask = mask;
+ hash->nchains *= 2;
+ hash->lowmark *= 2;
+ hash->highmark *= 2;
+ }
+ assert (hash_verify(hash));
+}
+
+/*
+ * Cut a table size in half. This is done by folding together adjacent chains
+ * and populating the lower half of the table with these chains. The chains are
+ * simply spliced together. Once this is done, the whole table is reallocated
+ * to a smaller object.
+ * Notes:
+ * 1. It is illegal to have a hash table with one slot. This would mean that
+ * hash->shift is equal to hash_val_t_bit, an illegal shift value.
+ * Also, other things could go wrong, such as hash->lowmark becoming zero.
+ * 2. Looping over each pair of sister chains, the low_chain is set to
+ * point to the head node of the chain in the lower half of the table,
+ * and high_chain points to the head node of the sister in the upper half.
+ * 3. The intent here is to compute a pointer to the last node of the
+ * lower chain into the low_tail variable. If this chain is empty,
+ * low_tail ends up with a null value.
+ * 4. If the lower chain is not empty, we simply tack the upper chain onto it.
+ * If the upper chain is a null pointer, nothing happens.
+ * 5. Otherwise if the lower chain is empty but the upper one is not,
+ * If the low chain is empty, but the high chain is not, then the
+ * high chain is simply transferred to the lower half of the table.
+ * 6. Otherwise if both chains are empty, there is nothing to do.
+ * 7. All the chain pointers are in the lower half of the table now, so
+ * we reallocate it to a smaller object. This, of course, invalidates
+ * all pointer-to-pointers which reference into the table from the
+ * first node of each chain.
+ * 8. Though it's unlikely, the reallocation may fail. In this case we
+ * pretend that the table _was_ reallocated to a smaller object.
+ * 9. Finally, update the various table parameters to reflect the new size.
+ */
+
+static void shrink_table(hash_t *hash)
+{
+ hash_val_t chain, nchains;
+ hnode_t **newtable, *low_tail, *low_chain, *high_chain;
+
+ assert (hash->nchains >= 2); /* 1 */
+ nchains = hash->nchains / 2;
+
+ for (chain = 0; chain < nchains; chain++) {
+ low_chain = hash->table[chain]; /* 2 */
+ high_chain = hash->table[chain + nchains];
+ for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next)
+ ; /* 3 */
+ if (low_chain != 0) /* 4 */
+ low_tail->next = high_chain;
+ else if (high_chain != 0) /* 5 */
+ hash->table[chain] = high_chain;
+ else
+ assert (hash->table[chain] == NULL); /* 6 */
+ }
+ newtable = realloc(hash->table,
+ sizeof *newtable * nchains); /* 7 */
+ if (newtable) /* 8 */
+ hash->table = newtable;
+ hash->mask >>= 1; /* 9 */
+ hash->nchains = nchains;
+ hash->lowmark /= 2;
+ hash->highmark /= 2;
+ assert (hash_verify(hash));
+}
+
+
+/*
+ * Create a dynamic hash table. Both the hash table structure and the table
+ * itself are dynamically allocated. Furthermore, the table is extendible in
+ * that it will automatically grow as its load factor increases beyond a
+ * certain threshold.
+ * Notes:
+ * 1. If the number of bits in the hash_val_t type has not been computed yet,
+ * we do so here, because this is likely to be the first function that the
+ * user calls.
+ * 2. Allocate a hash table control structure.
+ * 3. If a hash table control structure is successfully allocated, we
+ * proceed to initialize it. Otherwise we return a null pointer.
+ * 4. We try to allocate the table of hash chains.
+ * 5. If we were able to allocate the hash chain table, we can finish
+ * initializing the hash structure and the table. Otherwise, we must
+ * backtrack by freeing the hash structure.
+ * 6. INIT_SIZE should be a power of two. The high and low marks are always set
+ * to be twice the table size and half the table size respectively. When the
+ * number of nodes in the table grows beyond the high size (beyond load
+ * factor 2), it will double in size to cut the load factor down to about
+ * about 1. If the table shrinks down to or beneath load factor 0.5,
+ * it will shrink, bringing the load up to about 1. However, the table
+ * will never shrink beneath INIT_SIZE even if it's emptied.
+ * 7. This indicates that the table is dynamically allocated and dynamically
+ * resized on the fly. A table that has this value set to zero is
+ * assumed to be statically allocated and will not be resized.
+ * 8. The table of chains must be properly reset to all null pointers.
+ */
+
+hash_t *hash_create(hashcount_t maxcount, hash_comp_t compfun,
+ hash_fun_t hashfun)
+{
+ hash_t *hash;
+
+ if (hash_val_t_bit == 0) /* 1 */
+ compute_bits();
+
+ hash = malloc(sizeof *hash); /* 2 */
+
+ if (hash) { /* 3 */
+ hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */
+ if (hash->table) { /* 5 */
+ hash->nchains = INIT_SIZE; /* 6 */
+ hash->highmark = INIT_SIZE * 2;
+ hash->lowmark = INIT_SIZE / 2;
+ hash->nodecount = 0;
+ hash->maxcount = maxcount;
+ hash->compare = compfun ? compfun : hash_comp_default;
+ hash->function = hashfun ? hashfun : hash_fun_default;
+ hash->allocnode = hnode_alloc;
+ hash->freenode = hnode_free;
+ hash->context = NULL;
+ hash->mask = INIT_MASK;
+ hash->dynamic = 1; /* 7 */
+ clear_table(hash); /* 8 */
+ assert (hash_verify(hash));
+ return hash;
+ }
+ free(hash);
+ }
+
+ return NULL;
+}
+
+/*
+ * Select a different set of node allocator routines.
+ */
+
+void hash_set_allocator(hash_t *hash, hnode_alloc_t al,
+ hnode_free_t fr, void *context)
+{
+ assert (hash_count(hash) == 0);
+ assert ((al == 0 && fr == 0) || (al != 0 && fr != 0));
+
+ hash->allocnode = al ? al : hnode_alloc;
+ hash->freenode = fr ? fr : hnode_free;
+ hash->context = context;
+}
+
+/*
+ * Free every node in the hash using the hash->freenode() function pointer, and
+ * cause the hash to become empty.
+ */
+
+void hash_free_nodes(hash_t *hash)
+{
+ hscan_t hs;
+ hnode_t *node;
+ hash_scan_begin(&hs, hash);
+ while ((node = hash_scan_next(&hs))) {
+ hash_scan_delete(hash, node);
+ hash->freenode(node, hash->context);
+ }
+ hash->nodecount = 0;
+ clear_table(hash);
+}
+
+/*
+ * Obsolescent function for removing all nodes from a table,
+ * freeing them and then freeing the table all in one step.
+ */
+
+void hash_free(hash_t *hash)
+{
+#ifdef KAZLIB_OBSOLESCENT_DEBUG
+ assert ("call to obsolescent function hash_free()" && 0);
+#endif
+ hash_free_nodes(hash);
+ hash_destroy(hash);
+}
+
+/*
+ * Free a dynamic hash table structure.
+ */
+
+void hash_destroy(hash_t *hash)
+{
+ assert (hash_val_t_bit != 0);
+ assert (hash_isempty(hash));
+ free(hash->table);
+ free(hash);
+}
+
+/*
+ * Initialize a user supplied hash structure. The user also supplies a table of
+ * chains which is assigned to the hash structure. The table is static---it
+ * will not grow or shrink.
+ * 1. See note 1. in hash_create().
+ * 2. The user supplied array of pointers hopefully contains nchains nodes.
+ * 3. See note 7. in hash_create().
+ * 4. We must dynamically compute the mask from the given power of two table
+ * size.
+ * 5. The user supplied table can't be assumed to contain null pointers,
+ * so we reset it here.
+ */
+
+hash_t *hash_init(hash_t *hash, hashcount_t maxcount,
+ hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table,
+ hashcount_t nchains)
+{
+ if (hash_val_t_bit == 0) /* 1 */
+ compute_bits();
+
+ assert (is_power_of_two(nchains));
+
+ hash->table = table; /* 2 */
+ hash->nchains = nchains;
+ hash->nodecount = 0;
+ hash->maxcount = maxcount;
+ hash->compare = compfun ? compfun : hash_comp_default;
+ hash->function = hashfun ? hashfun : hash_fun_default;
+ hash->dynamic = 0; /* 3 */
+ hash->mask = compute_mask(nchains); /* 4 */
+ clear_table(hash); /* 5 */
+
+ assert (hash_verify(hash));
+
+ return hash;
+}
+
+/*
+ * Reset the hash scanner so that the next element retrieved by
+ * hash_scan_next() shall be the first element on the first non-empty chain.
+ * Notes:
+ * 1. Locate the first non empty chain.
+ * 2. If an empty chain is found, remember which one it is and set the next
+ * pointer to refer to its first element.
+ * 3. Otherwise if a chain is not found, set the next pointer to NULL
+ * so that hash_scan_next() shall indicate failure.
+ */
+
+void hash_scan_begin(hscan_t *scan, hash_t *hash)
+{
+ hash_val_t nchains = hash->nchains;
+ hash_val_t chain;
+
+ scan->table = hash;
+
+ /* 1 */
+
+ for (chain = 0; chain < nchains && hash->table[chain] == 0; chain++)
+ ;
+
+ if (chain < nchains) { /* 2 */
+ scan->chain = chain;
+ scan->next = hash->table[chain];
+ } else { /* 3 */
+ scan->next = NULL;
+ }
+}
+
+/*
+ * Retrieve the next node from the hash table, and update the pointer
+ * for the next invocation of hash_scan_next().
+ * Notes:
+ * 1. Remember the next pointer in a temporary value so that it can be
+ * returned.
+ * 2. This assertion essentially checks whether the module has been properly
+ * initialized. The first point of interaction with the module should be
+ * either hash_create() or hash_init(), both of which set hash_val_t_bit to
+ * a non zero value.
+ * 3. If the next pointer we are returning is not NULL, then the user is
+ * allowed to call hash_scan_next() again. We prepare the new next pointer
+ * for that call right now. That way the user is allowed to delete the node
+ * we are about to return, since we will no longer be needing it to locate
+ * the next node.
+ * 4. If there is a next node in the chain (next->next), then that becomes the
+ * new next node, otherwise ...
+ * 5. We have exhausted the current chain, and must locate the next subsequent
+ * non-empty chain in the table.
+ * 6. If a non-empty chain is found, the first element of that chain becomes
+ * the new next node. Otherwise there is no new next node and we set the
+ * pointer to NULL so that the next time hash_scan_next() is called, a null
+ * pointer shall be immediately returned.
+ */
+
+
+hnode_t *hash_scan_next(hscan_t *scan)
+{
+ hnode_t *next = scan->next; /* 1 */
+ hash_t *hash = scan->table;
+ hash_val_t chain = scan->chain + 1;
+ hash_val_t nchains = hash->nchains;
+
+ assert (hash_val_t_bit != 0); /* 2 */
+
+ if (next) { /* 3 */
+ if (next->next) { /* 4 */
+ scan->next = next->next;
+ } else {
+ while (chain < nchains && hash->table[chain] == 0) /* 5 */
+ chain++;
+ if (chain < nchains) { /* 6 */
+ scan->chain = chain;
+ scan->next = hash->table[chain];
+ } else {
+ scan->next = NULL;
+ }
+ }
+ }
+ return next;
+}
+
+/*
+ * Insert a node into the hash table.
+ * Notes:
+ * 1. It's illegal to insert more than the maximum number of nodes. The client
+ * should verify that the hash table is not full before attempting an
+ * insertion.
+ * 2. The same key may not be inserted into a table twice.
+ * 3. If the table is dynamic and the load factor is already at >= 2,
+ * grow the table.
+ * 4. We take the bottom N bits of the hash value to derive the chain index,
+ * where N is the base 2 logarithm of the size of the hash table.
+ */
+
+void hash_insert(hash_t *hash, hnode_t *node, const void *key)
+{
+ hash_val_t hkey, chain;
+
+ assert (hash_val_t_bit != 0);
+ assert (node->next == NULL);
+ assert (hash->nodecount < hash->maxcount); /* 1 */
+ assert (hash_lookup(hash, key) == NULL); /* 2 */
+
+ if (hash->dynamic && hash->nodecount >= hash->highmark) /* 3 */
+ grow_table(hash);
+
+ hkey = hash->function(key);
+ chain = hkey & hash->mask; /* 4 */
+
+ node->key = key;
+ node->hkey = hkey;
+ node->next = hash->table[chain];
+ hash->table[chain] = node;
+ hash->nodecount++;
+
+ assert (hash_verify(hash));
+}
+
+/*
+ * Find a node in the hash table and return a pointer to it.
+ * Notes:
+ * 1. We hash the key and keep the entire hash value. As an optimization, when
+ * we descend down the chain, we can compare hash values first and only if
+ * hash values match do we perform a full key comparison.
+ * 2. To locate the chain from among 2^N chains, we look at the lower N bits of
+ * the hash value by anding them with the current mask.
+ * 3. Looping through the chain, we compare the stored hash value inside each
+ * node against our computed hash. If they match, then we do a full
+ * comparison between the unhashed keys. If these match, we have located the
+ * entry.
+ */
+
+hnode_t *hash_lookup(hash_t *hash, const void *key)
+{
+ hash_val_t hkey, chain;
+ hnode_t *nptr;
+
+ hkey = hash->function(key); /* 1 */
+ chain = hkey & hash->mask; /* 2 */
+
+ for (nptr = hash->table[chain]; nptr; nptr = nptr->next) { /* 3 */
+ if (nptr->hkey == hkey && hash->compare(nptr->key, key) == 0)
+ return nptr;
+ }
+
+ return NULL;
+}
+
+/*
+ * Delete the given node from the hash table. Since the chains
+ * are singly linked, we must locate the start of the node's chain
+ * and traverse.
+ * Notes:
+ * 1. The node must belong to this hash table, and its key must not have
+ * been tampered with.
+ * 2. If this deletion will take the node count below the low mark, we
+ * shrink the table now.
+ * 3. Determine which chain the node belongs to, and fetch the pointer
+ * to the first node in this chain.
+ * 4. If the node being deleted is the first node in the chain, then
+ * simply update the chain head pointer.
+ * 5. Otherwise advance to the node's predecessor, and splice out
+ * by updating the predecessor's next pointer.
+ * 6. Indicate that the node is no longer in a hash table.
+ */
+
+hnode_t *hash_delete(hash_t *hash, hnode_t *node)
+{
+ hash_val_t chain;
+ hnode_t *hptr;
+
+ assert (hash_lookup(hash, node->key) == node); /* 1 */
+ assert (hash_val_t_bit != 0);
+
+ if (hash->dynamic && hash->nodecount <= hash->lowmark
+ && hash->nodecount > INIT_SIZE)
+ shrink_table(hash); /* 2 */
+
+ chain = node->hkey & hash->mask; /* 3 */
+ hptr = hash->table[chain];
+
+ if (hptr == node) { /* 4 */
+ hash->table[chain] = node->next;
+ } else {
+ while (hptr->next != node) { /* 5 */
+ assert (hptr != 0);
+ hptr = hptr->next;
+ }
+ assert (hptr->next == node);
+ hptr->next = node->next;
+ }
+
+ hash->nodecount--;
+ assert (hash_verify(hash));
+
+ node->next = NULL; /* 6 */
+ return node;
+}
+
+int hash_alloc_insert(hash_t *hash, const void *key, void *data)
+{
+ hnode_t *node = hash->allocnode(hash->context);
+
+ if (node) {
+ hnode_init(node, data);
+ hash_insert(hash, node, key);
+ return 1;
+ }
+ return 0;
+}
+
+void hash_delete_free(hash_t *hash, hnode_t *node)
+{
+ hash_delete(hash, node);
+ hash->freenode(node, hash->context);
+}
+
+/*
+ * Exactly like hash_delete, except does not trigger table shrinkage. This is to be
+ * used from within a hash table scan operation. See notes for hash_delete.
+ */
+
+hnode_t *hash_scan_delete(hash_t *hash, hnode_t *node)
+{
+ hash_val_t chain;
+ hnode_t *hptr;
+
+ assert (hash_lookup(hash, node->key) == node);
+ assert (hash_val_t_bit != 0);
+
+ chain = node->hkey & hash->mask;
+ hptr = hash->table[chain];
+
+ if (hptr == node) {
+ hash->table[chain] = node->next;
+ } else {
+ while (hptr->next != node)
+ hptr = hptr->next;
+ hptr->next = node->next;
+ }
+
+ hash->nodecount--;
+ assert (hash_verify(hash));
+ node->next = NULL;
+
+ return node;
+}
+
+/*
+ * Like hash_delete_free but based on hash_scan_delete.
+ */
+
+void hash_scan_delfree(hash_t *hash, hnode_t *node)
+{
+ hash_scan_delete(hash, node);
+ hash->freenode(node, hash->context);
+}
+
+/*
+ * Verify whether the given object is a valid hash table. This means
+ * Notes:
+ * 1. If the hash table is dynamic, verify whether the high and
+ * low expansion/shrinkage thresholds are powers of two.
+ * 2. Count all nodes in the table, and test each hash value
+ * to see whether it is correct for the node's chain.
+ */
+
+int hash_verify(hash_t *hash)
+{
+ hashcount_t count = 0;
+ hash_val_t chain;
+ hnode_t *hptr;
+
+ if (hash->dynamic) { /* 1 */
+ if (hash->lowmark >= hash->highmark)
+ return 0;
+ if (!is_power_of_two(hash->highmark))
+ return 0;
+ if (!is_power_of_two(hash->lowmark))
+ return 0;
+ }
+
+ for (chain = 0; chain < hash->nchains; chain++) { /* 2 */
+ for (hptr = hash->table[chain]; hptr != 0; hptr = hptr->next) {
+ if ((hptr->hkey & hash->mask) != chain)
+ return 0;
+ count++;
+ }
+ }
+
+ if (count != hash->nodecount)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Test whether the hash table is full and return 1 if this is true,
+ * 0 if it is false.
+ */
+
+#undef hash_isfull
+int hash_isfull(hash_t *hash)
+{
+ return hash->nodecount == hash->maxcount;
+}
+
+/*
+ * Test whether the hash table is empty and return 1 if this is true,
+ * 0 if it is false.
+ */
+
+#undef hash_isempty
+int hash_isempty(hash_t *hash)
+{
+ return hash->nodecount == 0;
+}
+
+static hnode_t *hnode_alloc(void *context)
+{
+ return malloc(sizeof *hnode_alloc(NULL));
+}
+
+static void hnode_free(hnode_t *node, void *context)
+{
+ free(node);
+}
+
+
+/*
+ * Create a hash table node dynamically and assign it the given data.
+ */
+
+hnode_t *hnode_create(void *data)
+{
+ hnode_t *node = malloc(sizeof *node);
+ if (node) {
+ node->data = data;
+ node->next = NULL;
+ }
+ return node;
+}
+
+/*
+ * Initialize a client-supplied node
+ */
+
+hnode_t *hnode_init(hnode_t *hnode, void *data)
+{
+ hnode->data = data;
+ hnode->next = NULL;
+ return hnode;
+}
+
+/*
+ * Destroy a dynamically allocated node.
+ */
+
+void hnode_destroy(hnode_t *hnode)
+{
+ free(hnode);
+}
+
+#undef hnode_put
+void hnode_put(hnode_t *node, void *data)
+{
+ node->data = data;
+}
+
+#undef hnode_get
+void *hnode_get(hnode_t *node)
+{
+ return node->data;
+}
+
+#undef hnode_getkey
+const void *hnode_getkey(hnode_t *node)
+{
+ return node->key;
+}
+
+#undef hash_count
+hashcount_t hash_count(hash_t *hash)
+{
+ return hash->nodecount;
+}
+
+#undef hash_size
+hashcount_t hash_size(hash_t *hash)
+{
+ return hash->nchains;
+}
+
+static hash_val_t hash_fun_default(const void *key)
+{
+ static unsigned long randbox[] = {
+ 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
+ 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
+ 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU,
+ 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU,
+ };
+
+ const unsigned char *str = key;
+ hash_val_t acc = 0;
+
+ while (*str) {
+ acc ^= randbox[(*str + acc) & 0xf];
+ acc = (acc << 1) | (acc >> 31);
+ acc &= 0xffffffffU;
+ acc ^= randbox[((*str++ >> 4) + acc) & 0xf];
+ acc = (acc << 2) | (acc >> 30);
+ acc &= 0xffffffffU;
+ }
+ return acc;
+}
+
+static int hash_comp_default(const void *key1, const void *key2)
+{
+ return strcmp(key1, key2);
+}
+
+#ifdef KAZLIB_TEST_MAIN
+
+#include <stdio.h>
+#include <ctype.h>
+#include <stdarg.h>
+
+typedef char input_t[256];
+
+static int tokenize(char *string, ...)
+{
+ char **tokptr;
+ va_list arglist;
+ int tokcount = 0;
+
+ va_start(arglist, string);
+ tokptr = va_arg(arglist, char **);
+ while (tokptr) {
+ while (*string && isspace((unsigned char) *string))
+ string++;
+ if (!*string)
+ break;
+ *tokptr = string;
+ while (*string && !isspace((unsigned char) *string))
+ string++;
+ tokptr = va_arg(arglist, char **);
+ tokcount++;
+ if (!*string)
+ break;
+ *string++ = 0;
+ }
+ va_end(arglist);
+
+ return tokcount;
+}
+
+static char *dupstring(char *str)
+{
+ int sz = strlen(str) + 1;
+ char *new = malloc(sz);
+ if (new)
+ memcpy(new, str, sz);
+ return new;
+}
+
+static hnode_t *new_node(void *c)
+{
+ static hnode_t few[5];
+ static int count;
+
+ if (count < 5)
+ return few + count++;
+
+ return NULL;
+}
+
+static void del_node(hnode_t *n, void *c)
+{
+}
+
+int main(void)
+{
+ input_t in;
+ hash_t *h = hash_create(HASHCOUNT_T_MAX, 0, 0);
+ hnode_t *hn;
+ hscan_t hs;
+ char *tok1, *tok2, *val;
+ const char *key;
+ int prompt = 0;
+
+ char *help =
+ "a <key> <val> add value to hash table\n"
+ "d <key> delete value from hash table\n"
+ "l <key> lookup value in hash table\n"
+ "n show size of hash table\n"
+ "c show number of entries\n"
+ "t dump whole hash table\n"
+ "+ increase hash table (private func)\n"
+ "- decrease hash table (private func)\n"
+ "b print hash_t_bit value\n"
+ "p turn prompt on\n"
+ "s switch to non-functioning allocator\n"
+ "q quit";
+
+ if (!h)
+ puts("hash_create failed");
+
+ for (;;) {
+ if (prompt)
+ putchar('>');
+ fflush(stdout);
+
+ if (!fgets(in, sizeof(input_t), stdin))
+ break;
+
+ switch(in[0]) {
+ case '?':
+ puts(help);
+ break;
+ case 'b':
+ printf("%d\n", hash_val_t_bit);
+ break;
+ case 'a':
+ if (tokenize(in+1, &tok1, &tok2, (char **) 0) != 2) {
+ puts("what?");
+ break;
+ }
+ key = dupstring(tok1);
+ val = dupstring(tok2);
+
+ if (!key || !val) {
+ puts("out of memory");
+ free((void *) key);
+ free(val);
+ }
+
+ if (!hash_alloc_insert(h, key, val)) {
+ puts("hash_alloc_insert failed");
+ free((void *) key);
+ free(val);
+ break;
+ }
+ break;
+ case 'd':
+ if (tokenize(in+1, &tok1, (char **) 0) != 1) {
+ puts("what?");
+ break;
+ }
+ hn = hash_lookup(h, tok1);
+ if (!hn) {
+ puts("hash_lookup failed");
+ break;
+ }
+ val = hnode_get(hn);
+ key = hnode_getkey(hn);
+ hash_scan_delfree(h, hn);
+ free((void *) key);
+ free(val);
+ break;
+ case 'l':
+ if (tokenize(in+1, &tok1, (char **) 0) != 1) {
+ puts("what?");
+ break;
+ }
+ hn = hash_lookup(h, tok1);
+ if (!hn) {
+ puts("hash_lookup failed");
+ break;
+ }
+ val = hnode_get(hn);
+ puts(val);
+ break;
+ case 'n':
+ printf("%lu\n", (unsigned long) hash_size(h));
+ break;
+ case 'c':
+ printf("%lu\n", (unsigned long) hash_count(h));
+ break;
+ case 't':
+ hash_scan_begin(&hs, h);
+ while ((hn = hash_scan_next(&hs)))
+ printf("%s\t%s\n", (char*) hnode_getkey(hn),
+ (char*) hnode_get(hn));
+ break;
+ case '+':
+ grow_table(h); /* private function */
+ break;
+ case '-':
+ shrink_table(h); /* private function */
+ break;
+ case 'q':
+ exit(0);
+ break;
+ case '\0':
+ break;
+ case 'p':
+ prompt = 1;
+ break;
+ case 's':
+ hash_set_allocator(h, new_node, del_node, NULL);
+ break;
+ default:
+ putchar('?');
+ putchar('\n');
+ break;
+ }
+ }
+
+ return 0;
+}
+
+#endif