diff options
author | admin@progandy.co.cc <admin@progandy.co.cc@eced67a3-f377-a0ae-92ae-d6de1850b05a> | 2010-09-13 18:11:13 +0000 |
---|---|---|
committer | admin@progandy.co.cc <admin@progandy.co.cc@eced67a3-f377-a0ae-92ae-d6de1850b05a> | 2010-09-13 18:11:13 +0000 |
commit | 376594ac1d65cbb31165f5a74775d624c3fd2981 (patch) | |
tree | b1f9bf2ee2f1a82c5d9378c03cf09f72535c001d /ekhtml/src | |
parent | 99ef7f1cfcaa4ead4b860faec88754abbe31ebfc (diff) |
- added basic html conversion for receiving (maybe a bit too powerful)
- some bugfixes
git-svn-id: http://mirotr.googlecode.com/svn/trunk@12 eced67a3-f377-a0ae-92ae-d6de1850b05a
Diffstat (limited to 'ekhtml/src')
-rw-r--r-- | ekhtml/src/.deps/ekhtml.Plo | 1 | ||||
-rw-r--r-- | ekhtml/src/.deps/ekhtml_comment.Plo | 1 | ||||
-rw-r--r-- | ekhtml/src/.deps/ekhtml_data.Plo | 1 | ||||
-rw-r--r-- | ekhtml/src/.deps/ekhtml_endtag.Plo | 1 | ||||
-rw-r--r-- | ekhtml/src/.deps/ekhtml_mktables.Po | 1 | ||||
-rw-r--r-- | ekhtml/src/.deps/ekhtml_special.Plo | 1 | ||||
-rw-r--r-- | ekhtml/src/.deps/ekhtml_starttag.Plo | 1 | ||||
-rw-r--r-- | ekhtml/src/.deps/ekhtml_util.Plo | 1 | ||||
-rw-r--r-- | ekhtml/src/.deps/hash.Plo | 1 | ||||
-rw-r--r-- | ekhtml/src/Makefile | 402 | ||||
-rw-r--r-- | ekhtml/src/Makefile.am | 22 | ||||
-rw-r--r-- | ekhtml/src/Makefile.in | 402 | ||||
-rw-r--r-- | ekhtml/src/ekhtml.c | 394 | ||||
-rw-r--r-- | ekhtml/src/ekhtml_comment.c | 119 | ||||
-rw-r--r-- | ekhtml/src/ekhtml_data.c | 61 | ||||
-rw-r--r-- | ekhtml/src/ekhtml_endtag.c | 129 | ||||
-rw-r--r-- | ekhtml/src/ekhtml_mktables.c | 141 | ||||
-rw-r--r-- | ekhtml/src/ekhtml_special.c | 72 | ||||
-rw-r--r-- | ekhtml/src/ekhtml_starttag.c | 390 | ||||
-rw-r--r-- | ekhtml/src/ekhtml_util.c | 55 | ||||
-rw-r--r-- | ekhtml/src/hash.c | 1035 |
21 files changed, 3231 insertions, 0 deletions
diff --git a/ekhtml/src/.deps/ekhtml.Plo b/ekhtml/src/.deps/ekhtml.Plo new file mode 100644 index 0000000..9ce06a8 --- /dev/null +++ b/ekhtml/src/.deps/ekhtml.Plo @@ -0,0 +1 @@ +# dummy diff --git a/ekhtml/src/.deps/ekhtml_comment.Plo b/ekhtml/src/.deps/ekhtml_comment.Plo new file mode 100644 index 0000000..9ce06a8 --- /dev/null +++ b/ekhtml/src/.deps/ekhtml_comment.Plo @@ -0,0 +1 @@ +# dummy diff --git a/ekhtml/src/.deps/ekhtml_data.Plo b/ekhtml/src/.deps/ekhtml_data.Plo new file mode 100644 index 0000000..9ce06a8 --- /dev/null +++ b/ekhtml/src/.deps/ekhtml_data.Plo @@ -0,0 +1 @@ +# dummy diff --git a/ekhtml/src/.deps/ekhtml_endtag.Plo b/ekhtml/src/.deps/ekhtml_endtag.Plo new file mode 100644 index 0000000..9ce06a8 --- /dev/null +++ b/ekhtml/src/.deps/ekhtml_endtag.Plo @@ -0,0 +1 @@ +# dummy diff --git a/ekhtml/src/.deps/ekhtml_mktables.Po b/ekhtml/src/.deps/ekhtml_mktables.Po new file mode 100644 index 0000000..9ce06a8 --- /dev/null +++ b/ekhtml/src/.deps/ekhtml_mktables.Po @@ -0,0 +1 @@ +# dummy diff --git a/ekhtml/src/.deps/ekhtml_special.Plo b/ekhtml/src/.deps/ekhtml_special.Plo new file mode 100644 index 0000000..9ce06a8 --- /dev/null +++ b/ekhtml/src/.deps/ekhtml_special.Plo @@ -0,0 +1 @@ +# dummy diff --git a/ekhtml/src/.deps/ekhtml_starttag.Plo b/ekhtml/src/.deps/ekhtml_starttag.Plo new file mode 100644 index 0000000..9ce06a8 --- /dev/null +++ b/ekhtml/src/.deps/ekhtml_starttag.Plo @@ -0,0 +1 @@ +# dummy diff --git a/ekhtml/src/.deps/ekhtml_util.Plo b/ekhtml/src/.deps/ekhtml_util.Plo new file mode 100644 index 0000000..9ce06a8 --- /dev/null +++ b/ekhtml/src/.deps/ekhtml_util.Plo @@ -0,0 +1 @@ +# dummy diff --git a/ekhtml/src/.deps/hash.Plo b/ekhtml/src/.deps/hash.Plo new file mode 100644 index 0000000..9ce06a8 --- /dev/null +++ b/ekhtml/src/.deps/hash.Plo @@ -0,0 +1 @@ +# dummy diff --git a/ekhtml/src/Makefile b/ekhtml/src/Makefile new file mode 100644 index 0000000..3739820 --- /dev/null +++ b/ekhtml/src/Makefile @@ -0,0 +1,402 @@ +# Makefile.in generated by automake 1.6.3 from Makefile.am. +# Generated automatically from Makefile.in by configure. + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + + +SHELL = /bin/sh + +srcdir = . +top_srcdir = .. + +prefix = /usr/local +exec_prefix = ${prefix} + +bindir = ${exec_prefix}/bin +sbindir = ${exec_prefix}/sbin +libexecdir = ${exec_prefix}/libexec +datadir = ${prefix}/share +sysconfdir = ${prefix}/etc +sharedstatedir = ${prefix}/com +localstatedir = ${prefix}/var +libdir = ${exec_prefix}/lib +infodir = ${prefix}/info +mandir = ${prefix}/man +includedir = ${prefix}/include +oldincludedir = /usr/include +pkgdatadir = $(datadir)/ekhtml +pkglibdir = $(libdir)/ekhtml +pkgincludedir = $(includedir)/ekhtml +top_builddir = .. + +ACLOCAL = ${SHELL} /z/missing --run aclocal-1.6 +AUTOCONF = ${SHELL} /z/missing --run autoconf +AUTOMAKE = ${SHELL} /z/missing --run automake-1.6 +AUTOHEADER = ${SHELL} /z/missing --run autoheader + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = /bin/install -c +INSTALL_PROGRAM = ${INSTALL} +INSTALL_DATA = ${INSTALL} -m 644 +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_SCRIPT = ${INSTALL} +INSTALL_HEADER = $(INSTALL_DATA) +transform = s,x,x, +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_alias = +host_triplet = i686-pc-mingw32 + +EXEEXT = .exe +OBJEXT = o +PATH_SEPARATOR = : +AMTAR = ${SHELL} /z/missing --run tar +AS = @AS@ +AWK = gawk +CC = gcc +DEPDIR = .deps +DLLTOOL = @DLLTOOL@ +ECHO = echo +INSTALL_STRIP_PROGRAM = ${SHELL} $(install_sh) -c -s +LIBTOOL = $(SHELL) $(top_builddir)/libtool +LN_S = ln -s +MAINT = # +OBJDUMP = @OBJDUMP@ +PACKAGE = ekhtml +RANLIB = ranlib +STRIP = strip +VERSION = 0.3.2 +am__include = include +am__quote = +install_sh = /z/install-sh +libekhtml_la_SOURCES = \ + ekhtml.c \ + ekhtml_comment.c \ + ekhtml_data.c \ + ekhtml_endtag.c \ + ekhtml_special.c \ + ekhtml_starttag.c \ + ekhtml_util.c \ + hash.c + + +libekhtml_la_DEPENDENCIES = ekhtml_tables.h +lib_LTLIBRARIES = libekhtml.la +noinst_PROGRAMS = ekhtml_mktables + +ekhtml_mktables_SOURCES = ekhtml_mktables.c + +CLEANFILES = ekhtml_tables.h +subdir = src +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/include/ekhtml_config.h +CONFIG_CLEAN_FILES = +LTLIBRARIES = $(lib_LTLIBRARIES) + +libekhtml_la_LDFLAGS = +libekhtml_la_LIBADD = +am_libekhtml_la_OBJECTS = ekhtml.lo ekhtml_comment.lo ekhtml_data.lo \ + ekhtml_endtag.lo ekhtml_special.lo ekhtml_starttag.lo \ + ekhtml_util.lo hash.lo +libekhtml_la_OBJECTS = $(am_libekhtml_la_OBJECTS) +noinst_PROGRAMS = ekhtml_mktables$(EXEEXT) +PROGRAMS = $(noinst_PROGRAMS) + +am_ekhtml_mktables_OBJECTS = ekhtml_mktables.$(OBJEXT) +ekhtml_mktables_OBJECTS = $(am_ekhtml_mktables_OBJECTS) +ekhtml_mktables_LDADD = $(LDADD) +ekhtml_mktables_DEPENDENCIES = +ekhtml_mktables_LDFLAGS = + +DEFS = -DHAVE_CONFIG_H +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include +CPPFLAGS = +LDFLAGS = +LIBS = +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +DEP_FILES = ./$(DEPDIR)/ekhtml.Plo \ + ./$(DEPDIR)/ekhtml_comment.Plo \ + ./$(DEPDIR)/ekhtml_data.Plo \ + ./$(DEPDIR)/ekhtml_endtag.Plo \ + ./$(DEPDIR)/ekhtml_mktables.Po \ + ./$(DEPDIR)/ekhtml_special.Plo \ + ./$(DEPDIR)/ekhtml_starttag.Plo \ + ./$(DEPDIR)/ekhtml_util.Plo ./$(DEPDIR)/hash.Plo +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ + $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +CFLAGS = -g -O2 +DIST_SOURCES = $(libekhtml_la_SOURCES) $(ekhtml_mktables_SOURCES) +DIST_COMMON = Makefile.am Makefile.in +SOURCES = $(libekhtml_la_SOURCES) $(ekhtml_mktables_SOURCES) + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: # Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/Makefile +Makefile: # $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) +libLTLIBRARIES_INSTALL = $(INSTALL) +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(libdir) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + if test -f $$p; then \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f"; \ + $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f; \ + else :; fi; \ + done + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + p="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \ + $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test -z "$dir" && dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +libekhtml.la: $(libekhtml_la_OBJECTS) $(libekhtml_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libekhtml_la_LDFLAGS) $(libekhtml_la_OBJECTS) $(libekhtml_la_LIBADD) $(LIBS) + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +ekhtml_mktables$(EXEEXT): $(ekhtml_mktables_OBJECTS) $(ekhtml_mktables_DEPENDENCIES) + @rm -f ekhtml_mktables$(EXEEXT) + $(LINK) $(ekhtml_mktables_LDFLAGS) $(ekhtml_mktables_OBJECTS) $(ekhtml_mktables_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) core *.core + +distclean-compile: + -rm -f *.tab.c + +include ./$(DEPDIR)/ekhtml.Plo +include ./$(DEPDIR)/ekhtml_comment.Plo +include ./$(DEPDIR)/ekhtml_data.Plo +include ./$(DEPDIR)/ekhtml_endtag.Plo +include ./$(DEPDIR)/ekhtml_mktables.Po +include ./$(DEPDIR)/ekhtml_special.Plo +include ./$(DEPDIR)/ekhtml_starttag.Plo +include ./$(DEPDIR)/ekhtml_util.Plo +include ./$(DEPDIR)/hash.Plo + +distclean-depend: + -rm -rf ./$(DEPDIR) + +.c.o: + source='$<' object='$@' libtool=no \ + depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' \ + $(CCDEPMODE) $(depcomp) \ + $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$< + +.c.obj: + source='$<' object='$@' libtool=no \ + depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' \ + $(CCDEPMODE) $(depcomp) \ + $(COMPILE) -c `cygpath -w $<` + +.c.lo: + source='$<' object='$@' libtool=yes \ + depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' \ + $(CCDEPMODE) $(depcomp) \ + $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< +CCDEPMODE = depmode=gcc3 + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: + +ETAGS = etags +ETAGSFLAGS = + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$tags$$unique" \ + || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = .. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + @list='$(DISTFILES)'; for file in $$list; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) + +installdirs: + $(mkinstalldirs) $(DESTDIR)$(libdir) + +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + +distclean-am: clean-am distclean-compile distclean-depend \ + distclean-generic distclean-libtool distclean-tags + +dvi: dvi-am + +dvi-am: + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: install-libLTLIBRARIES + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES + +.PHONY: GTAGS all all-am check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool clean-noinstPROGRAMS \ + distclean distclean-compile distclean-depend distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am info \ + info-am install install-am install-data install-data-am \ + install-exec install-exec-am install-info install-info-am \ + install-libLTLIBRARIES install-man install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool tags uninstall \ + uninstall-am uninstall-info-am uninstall-libLTLIBRARIES + + +ekhtml.c: ekhtml_tables.h + +ekhtml_tables.h: ekhtml_mktables + ./ekhtml_mktables > $@ +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/ekhtml/src/Makefile.am b/ekhtml/src/Makefile.am new file mode 100644 index 0000000..6fcdaf8 --- /dev/null +++ b/ekhtml/src/Makefile.am @@ -0,0 +1,22 @@ +libekhtml_la_SOURCES = \ + ekhtml.c \ + ekhtml_comment.c \ + ekhtml_data.c \ + ekhtml_endtag.c \ + ekhtml_special.c \ + ekhtml_starttag.c \ + ekhtml_util.c \ + hash.c + +libekhtml_la_DEPENDENCIES = ekhtml_tables.h +lib_LTLIBRARIES = libekhtml.la +noinst_PROGRAMS = ekhtml_mktables + +ekhtml_mktables_SOURCES = ekhtml_mktables.c + +ekhtml.c: ekhtml_tables.h + +ekhtml_tables.h: ekhtml_mktables + ./ekhtml_mktables > $@ + +CLEANFILES = ekhtml_tables.h diff --git a/ekhtml/src/Makefile.in b/ekhtml/src/Makefile.in new file mode 100644 index 0000000..c5147f0 --- /dev/null +++ b/ekhtml/src/Makefile.in @@ -0,0 +1,402 @@ +# Makefile.in generated by automake 1.6.3 from Makefile.am. +# @configure_input@ + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_HEADER = $(INSTALL_DATA) +transform = @program_transform_name@ +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_alias = @host_alias@ +host_triplet = @host@ + +EXEEXT = @EXEEXT@ +OBJEXT = @OBJEXT@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +AMTAR = @AMTAR@ +AS = @AS@ +AWK = @AWK@ +CC = @CC@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +ECHO = @ECHO@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +MAINT = @MAINT@ +OBJDUMP = @OBJDUMP@ +PACKAGE = @PACKAGE@ +RANLIB = @RANLIB@ +STRIP = @STRIP@ +VERSION = @VERSION@ +am__include = @am__include@ +am__quote = @am__quote@ +install_sh = @install_sh@ +libekhtml_la_SOURCES = \ + ekhtml.c \ + ekhtml_comment.c \ + ekhtml_data.c \ + ekhtml_endtag.c \ + ekhtml_special.c \ + ekhtml_starttag.c \ + ekhtml_util.c \ + hash.c + + +libekhtml_la_DEPENDENCIES = ekhtml_tables.h +lib_LTLIBRARIES = libekhtml.la +noinst_PROGRAMS = ekhtml_mktables + +ekhtml_mktables_SOURCES = ekhtml_mktables.c + +CLEANFILES = ekhtml_tables.h +subdir = src +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/include/ekhtml_config.h +CONFIG_CLEAN_FILES = +LTLIBRARIES = $(lib_LTLIBRARIES) + +libekhtml_la_LDFLAGS = +libekhtml_la_LIBADD = +am_libekhtml_la_OBJECTS = ekhtml.lo ekhtml_comment.lo ekhtml_data.lo \ + ekhtml_endtag.lo ekhtml_special.lo ekhtml_starttag.lo \ + ekhtml_util.lo hash.lo +libekhtml_la_OBJECTS = $(am_libekhtml_la_OBJECTS) +noinst_PROGRAMS = ekhtml_mktables$(EXEEXT) +PROGRAMS = $(noinst_PROGRAMS) + +am_ekhtml_mktables_OBJECTS = ekhtml_mktables.$(OBJEXT) +ekhtml_mktables_OBJECTS = $(am_ekhtml_mktables_OBJECTS) +ekhtml_mktables_LDADD = $(LDADD) +ekhtml_mktables_DEPENDENCIES = +ekhtml_mktables_LDFLAGS = + +DEFS = @DEFS@ +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBS = @LIBS@ +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/ekhtml.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_comment.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_data.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_endtag.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_mktables.Po \ +@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_special.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_starttag.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/ekhtml_util.Plo ./$(DEPDIR)/hash.Plo +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ + $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +CFLAGS = @CFLAGS@ +DIST_SOURCES = $(libekhtml_la_SOURCES) $(ekhtml_mktables_SOURCES) +DIST_COMMON = Makefile.am Makefile.in +SOURCES = $(libekhtml_la_SOURCES) $(ekhtml_mktables_SOURCES) + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/Makefile +Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) +libLTLIBRARIES_INSTALL = $(INSTALL) +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(libdir) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + if test -f $$p; then \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f"; \ + $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f; \ + else :; fi; \ + done + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + p="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \ + $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test -z "$dir" && dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +libekhtml.la: $(libekhtml_la_OBJECTS) $(libekhtml_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libekhtml_la_LDFLAGS) $(libekhtml_la_OBJECTS) $(libekhtml_la_LIBADD) $(LIBS) + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +ekhtml_mktables$(EXEEXT): $(ekhtml_mktables_OBJECTS) $(ekhtml_mktables_DEPENDENCIES) + @rm -f ekhtml_mktables$(EXEEXT) + $(LINK) $(ekhtml_mktables_LDFLAGS) $(ekhtml_mktables_OBJECTS) $(ekhtml_mktables_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) core *.core + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_comment.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_data.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_endtag.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_mktables.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_special.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_starttag.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ekhtml_util.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hash.Plo@am__quote@ + +distclean-depend: + -rm -rf ./$(DEPDIR) + +.c.o: +@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$< + +.c.obj: +@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(COMPILE) -c `cygpath -w $<` + +.c.lo: +@AMDEP_TRUE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< +CCDEPMODE = @CCDEPMODE@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: + +ETAGS = etags +ETAGSFLAGS = + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$tags$$unique" \ + || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = .. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + @list='$(DISTFILES)'; for file in $$list; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) + +installdirs: + $(mkinstalldirs) $(DESTDIR)$(libdir) + +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + +distclean-am: clean-am distclean-compile distclean-depend \ + distclean-generic distclean-libtool distclean-tags + +dvi: dvi-am + +dvi-am: + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: install-libLTLIBRARIES + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES + +.PHONY: GTAGS all all-am check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool clean-noinstPROGRAMS \ + distclean distclean-compile distclean-depend distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am info \ + info-am install install-am install-data install-data-am \ + install-exec install-exec-am install-info install-info-am \ + install-libLTLIBRARIES install-man install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool tags uninstall \ + uninstall-am uninstall-info-am uninstall-libLTLIBRARIES + + +ekhtml.c: ekhtml_tables.h + +ekhtml_tables.h: ekhtml_mktables + ./ekhtml_mktables > $@ +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/ekhtml/src/ekhtml.c b/ekhtml/src/ekhtml.c new file mode 100644 index 0000000..f369746 --- /dev/null +++ b/ekhtml/src/ekhtml.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2002, Jon Travis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * ekhtml: The El-Kabong HTML parser + * by Jon Travis (jtravis@p00p.org) + * + * El-Kabong: A speedy, yet forgiving, SAX-stylee HTML parser. + * + * The idea behind this parser is for it to use very little memory, and still + * be very speedy, while forgiving poorly written HTML. + + * The internals of the parser consist of a small memory buffer which is able + * to grow when not enough information is known to correctly parse a tag. + * Given the typical layout of HTML, 4k should be plenty. + * + * The main state engine loops through this internal buffer, determining what + * the next state should be. Once this is known, it passes off a segment to + * the state handlers (starttag, endtag, etc.) to process. The segment + * handlers and the main state engine communicate via a few variables. These + * variables indicate whether or not the main engine should switch state, + * or successfully remove some data, etc. The segment handlers are + * guaranteed the same starting data (though not the same pointer) on each + * invocation until the state is changed. Thus, the segment handlers cannot + * use pointers into the main buffer -- they must use offsets. + * + * Some of the speed is gained from using character map data found in + * ekhtml_tables.h. I don't have any empirical data for this yet -- + * it only sounds like it would be faster.. ;-) + * + * I'm always looking for ways to clean && speed up this code. Feel free + * to give feedback -- JMT + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <assert.h> + +#include "ekhtml_config.h" +#include "ekhtml.h" +#define EKHTML_USE_TABLES +#include "ekhtml_tables.h" +#define EKHTML_USE_PRIVATE +#include "ekhtml_private.h" + +#ifndef MIN +#define MIN(a,b) (((a)<(b)) ? (a) : (b)) +#endif + + +/* + * ekhtml_buffer_grow: Grow the parser's internal buffer by a blocksize. + * NOTE: Calling the function has the potential to + * change the data buffer location. Do + * not rely on it's location! + * + * Arguments: Parser = Parser to grow + */ + +static void ekhtml_buffer_grow(ekhtml_parser_t *parser){ + size_t newsize; + char *newbuf; + + newsize = parser->nalloced + EKHTML_BLOCKSIZE; + + if((newbuf = realloc(parser->buf, newsize)) == NULL){ + fprintf(stderr, "BAD! Can't allocate %d bytes in ekhtml_buffer_grow\n", + newsize); + fflush(stderr); /* Just in case someone changes the buffering scheme */ + } + + parser->buf = newbuf; + parser->nalloced = newsize; +} + +/* + * parser_state_determine: Determine the next state that the main parser + * should have, by investigating up to the first + * 4 characters in the buffer. + * + * Arguments: startp = Starting data pointer + * endp = Pointer to first byte of 'out of range' data + * + * Return values: Returns one of EKHTML_STATE_* indicating the state that + * was found. + * + */ + +static inline +int parser_state_determine(const char *startp, const char *endp){ + const char *firstchar; + int newstate; + + assert(startp != endp); + + if(*startp != '<') + return EKHTML_STATE_INDATA; + + firstchar = startp + 1; + if(firstchar == endp) + return EKHTML_STATE_NONE; + + newstate = EKCMap_EKState[(unsigned char)*firstchar]; + if(newstate == EKHTML_STATE_NONE){ + if(firstchar + 2 >= endp) /* Not enough data to evaluate */ + return EKHTML_STATE_NONE; + if(*(firstchar + 1) == '-' && *(firstchar + 2) == '-') + return EKHTML_STATE_COMMENT; + else + return EKHTML_STATE_SPECIAL; + } else + return newstate; +} + + +int ekhtml_parser_flush(ekhtml_parser_t *parser, int flushall){ + void **state_data = &parser->state.state_data; + char *buf = parser->buf, *curp = buf, *endp = buf + parser->nbuf; + int badp = -1, tmpstate = parser->state.state, didsomething = 0; + + while(curp != endp){ + char *workp = curp; + + if(tmpstate == EKHTML_STATE_NONE){ + tmpstate = parser_state_determine(workp, endp); + if(tmpstate == EKHTML_STATE_NONE) /* Not enough data yet */ + break; + } + + if(tmpstate == EKHTML_STATE_INDATA || tmpstate == EKHTML_STATE_BADDATA) + curp = ekhtml_parse_data(parser, workp, endp, tmpstate); + else if(endp - workp > 2){ /* All tags fall under this catagory */ + switch(tmpstate){ + case EKHTML_STATE_ENDTAG: + curp = ekhtml_parse_endtag(parser, state_data, + workp, endp, &badp); + break; + case EKHTML_STATE_STARTTAG: + curp = ekhtml_parse_starttag(parser, state_data, + workp, endp, &badp); + break; + case EKHTML_STATE_COMMENT: + curp = ekhtml_parse_comment(parser, state_data, + workp, endp, &badp); + break; + case EKHTML_STATE_SPECIAL: + curp = ekhtml_parse_special(parser, state_data, + workp, endp, &badp); + break; + default: + assert(!"Unimplemented state"); + } + } else { + curp = NULL; /* Not enough data, keep going */ + } + + /* If one of the parsers said the data was bad, reset the state */ + if(badp != -1){ + tmpstate = badp; + badp = -1; + } + + if(curp == NULL){ /* State needed more data, so break out */ + curp = workp; + break; + } + + if(workp != curp){ /* state backend cleared up some data */ + didsomething = 1; + tmpstate = EKHTML_STATE_NONE; + assert(*state_data == NULL); + } + } + + if(flushall){ + /* Flush whatever we didn't use */ + if(parser->datacb){ + ekhtml_string_t str; + + str.str = curp; + str.len = endp - curp; + parser->datacb(parser->cbdata, &str); + } + curp = endp; + didsomething = 1; + tmpstate = EKHTML_STATE_NONE; /* Clean up to an unknown state */ + *state_data = NULL; + } + + parser->state.state = tmpstate; + + if(didsomething){ + /* Shuffle the data back, based on where we ended up */ + parser->nbuf -= curp - buf; + if(endp - curp){ /* If there's still any data to move */ + memmove(buf, curp, endp - curp); + } + } + return didsomething; +} + +void ekhtml_parser_feed(ekhtml_parser_t *parser, ekhtml_string_t *str){ + size_t nfed = 0; + + while(nfed != str->len){ + size_t tocopy; + + /* First see how much we can fill up our internal buffer */ + tocopy = MIN(parser->nalloced - parser->nbuf, str->len - nfed); + memcpy(parser->buf + parser->nbuf, str->str + nfed, tocopy); + nfed += tocopy; + parser->nbuf += tocopy; + if(parser->nalloced == parser->nbuf){ + /* Process the buffer */ + if(!ekhtml_parser_flush(parser, 0)){ + /* If we didn't actually process anything, grow our buffer */ + ekhtml_buffer_grow(parser); + } + } + } +} + +void ekhtml_parser_datacb_set(ekhtml_parser_t *parser, ekhtml_data_cb_t cb){ + parser->datacb = cb; +} + +void ekhtml_parser_commentcb_set(ekhtml_parser_t *parser, ekhtml_data_cb_t cb){ + parser->commentcb = cb; +} + +void ekhtml_parser_cbdata_set(ekhtml_parser_t *parser, void *cbdata){ + parser->cbdata = cbdata; +} + +static void +ekhtml_parser_startendcb_add(ekhtml_parser_t *parser, const char *tag, + ekhtml_starttag_cb_t startcb, + ekhtml_endtag_cb_t endcb, + int isStart) +{ + ekhtml_tag_container *cont; + ekhtml_string_t lookup_str; + char *newtag, *cp; + unsigned int taglen; + hnode_t *hn; + + if(!tag){ + if(isStart) + parser->startcb_unk = startcb; + else + parser->endcb_unk = endcb; + return; + } + + + newtag = _strdup(tag); + for(cp=newtag; *cp; cp++) + *cp = toupper(*cp); + + taglen = cp - newtag; + + /* First see if the container already exists */ + lookup_str.str = newtag; + lookup_str.len = taglen; + + if((hn = hash_lookup(parser->startendcb, &lookup_str))){ + cont = hnode_get(hn); + free(newtag); + if(isStart) + cont->startfunc = startcb; + else + cont->endfunc = endcb; + } else { + ekhtml_string_t *set_str; + + cont = malloc(sizeof(*cont)); + if(isStart){ + cont->startfunc = startcb; + cont->endfunc = NULL; + } else { + cont->startfunc = NULL; + cont->endfunc = endcb; + } + set_str = malloc(sizeof(*set_str)); + *set_str = lookup_str; + hash_alloc_insert(parser->startendcb, set_str, cont); + } +} + +void ekhtml_parser_startcb_add(ekhtml_parser_t *parser, const char *tag, + ekhtml_starttag_cb_t cback) +{ + ekhtml_parser_startendcb_add(parser, tag, cback, NULL, 1); +} + +void ekhtml_parser_endcb_add(ekhtml_parser_t *parser, const char *tag, + ekhtml_endtag_cb_t cback) +{ + ekhtml_parser_startendcb_add(parser, tag, NULL, cback, 0); +} + + +static hash_val_t ekhtml_string_hash(const void *key){ + const ekhtml_string_t *s = key; + hash_val_t res = 5381; + const char *str = s->str; + size_t len = s->len; + int c; + + while(len--){ + c = str[len]; + res = ((res << 5) + res) + c; /* res * 33 + c */ + } + return res; +} + +static int ekhtml_string_comp(const void *key1, const void *key2){ + const ekhtml_string_t *s1 = key1, *s2 = key2; + + if(s1->len == s2->len) + return memcmp(s1->str, s2->str, s1->len); + return 1; +} + +void ekhtml_parser_destroy(ekhtml_parser_t *ekparser){ + hnode_t *hn; + hscan_t hs; + + hash_scan_begin(&hs, ekparser->startendcb); + while((hn = hash_scan_next(&hs))){ + ekhtml_string_t *key = (ekhtml_string_t *)hnode_getkey(hn); + ekhtml_tag_container *cont = hnode_get(hn); + + hash_scan_delete(ekparser->startendcb, hn); + free((char *)key->str); + free(key); + free(cont); + } + + hash_destroy(ekparser->startendcb); + ekhtml_parser_starttag_cleanup(ekparser); + free(ekparser->buf); + free(ekparser); +} + +ekhtml_parser_t *ekhtml_parser_new(void *cbdata){ + ekhtml_parser_t *res; + + res = malloc(sizeof(*res)); + res->datacb = NULL; + res->startendcb = hash_create(HASHCOUNT_T_MAX, ekhtml_string_comp, + ekhtml_string_hash); + res->cbdata = cbdata; + res->startcb_unk = NULL; + res->endcb_unk = NULL; + res->commentcb = NULL; + res->buf = NULL; + res->nalloced = 0; + res->nbuf = 0; + res->freeattrs = NULL; + res->state.state = EKHTML_STATE_NONE; + res->state.state_data = NULL; + + /* Start out with a buffer of 1 block size */ + ekhtml_buffer_grow(res); + return res; +} + diff --git a/ekhtml/src/ekhtml_comment.c b/ekhtml/src/ekhtml_comment.c new file mode 100644 index 0000000..e37cc48 --- /dev/null +++ b/ekhtml/src/ekhtml_comment.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2002, Jon Travis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * ekhtml_comment.c: Comment tag processor for El-Kabong. + * + * The comment processor is a pretty simple piece of machinery. It + * relies that the first 4 characters are '<!--'. It then searches + * for two adjacent dashes '--' followed by optional whitespace, + * followed by a '>'. + */ + +#include <string.h> +#include <assert.h> + +#include "ekhtml_config.h" +#include "ekhtml.h" +#include "ekhtml_tables.h" +#define EKHTML_USE_PRIVATE +#include "ekhtml_private.h" + +char *ekhtml_parse_comment(ekhtml_parser_t *parser, void **state_data, + const char *curp, const char *endp, + int *baddata) +{ + ekhtml_comment_state *comstate = *state_data; + int *offset = &parser->state.offset; + const char *workp; + + assert(*curp == '<' && *(curp + 1) == '!' && *(curp + 2) == '-'); + assert(*(curp + 3) == '-' && endp - curp >= 4); + + if(comstate == NULL){ /* Only called the first time the tag is started */ + comstate = &parser->commentstate; + comstate->dashes = 0; + comstate->lastdash = 0; + *state_data = comstate; + *offset = sizeof("<!--") - 1; + } + + workp = curp + *offset; + while(workp != endp){ + if(comstate->dashes == 0){ /* Still on the quest for the double dash*/ + /* XXX -- Searching for '--' could be faster, doing + multibyte searching, or something similar */ + for(; workp < endp - 1; workp += 2){ + if(*workp == '-') + break; + } + + if(!(workp < endp - 1)){ + *offset = endp - 1 - curp; + return NULL; + } + + if((*(workp - 1) == '-') && + (workp - curp) > (sizeof("<!--") - 1)) + { + comstate->lastdash = workp - 1 - curp; + comstate->dashes = 1; + } else if(*(workp + 1) == '-'){ + comstate->lastdash = workp - curp; + comstate->dashes = 1; + } else { + /* Else, a lone dash -- how sad, lonely dash.. ;-) */ + } + workp++; + continue; + } + + /* At this point we have the double dash. Search through whitespace */ + workp = ekhtml_find_notcharsmap(workp, endp - workp, EKCMap_CharMap, + EKHTML_CHAR_WHITESPACE); + if(workp == endp){/* Reached end of the data without finding the '>' */ + *offset = workp - curp; + return NULL; + } + + if(*workp == '>'){ + if(parser->commentcb){ + ekhtml_string_t str; + + str.str = curp + 4; + str.len = comstate->lastdash - 4; + parser->commentcb(parser->cbdata, &str); + } + *state_data = NULL; + return (char *)workp + 1; + } else { + comstate->dashes = 0; + } + } + + *offset = workp - curp; + return NULL; /* Not done yet */ +} diff --git a/ekhtml/src/ekhtml_data.c b/ekhtml/src/ekhtml_data.c new file mode 100644 index 0000000..2b1d6c8 --- /dev/null +++ b/ekhtml/src/ekhtml_data.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2002, Jon Travis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include <assert.h> + +#include "ekhtml_config.h" +#include "ekhtml.h" +#include "ekhtml_tables.h" +#define EKHTML_USE_PRIVATE +#include "ekhtml_private.h" + +char *ekhtml_parse_data(ekhtml_parser_t *parser, const char *curp, + const char *endp, int curstate) +{ + const char *resp, *startp = curp; + + /* If we are in the data state, we can absorb everything up to a + '<' sign */ + + if(curstate == EKHTML_STATE_BADDATA){ + /* This state signifies that there was some bad-data involved, + skip over the first '<' sign, and take it as raw data */ + assert(*curp == '<'); + startp++; + } + resp = memchr(startp, '<', endp - startp); + resp = resp ? resp : endp; + if(parser->datacb){ + ekhtml_string_t str; + + str.str = curp; + str.len = resp - curp; + parser->datacb(parser->cbdata, &str); + } + return (char *)resp; +} + diff --git a/ekhtml/src/ekhtml_endtag.c b/ekhtml/src/ekhtml_endtag.c new file mode 100644 index 0000000..f7095dc --- /dev/null +++ b/ekhtml/src/ekhtml_endtag.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2002, Jon Travis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * ekhtml_endtag.c: Processor for a closing tag '</tag> + * + * The endtag processor is a very simple processor with a very small + * state machine. Like the other tags, it accomodates for malformated + * HTML, exchanging a '<' for a '>'. Whitespace trailing the tagname is + * ignored, and it might be interesting to note that a tag of </FOO BAR> + * will make callbacks with a tag of 'FOO BAR' + */ + +#include <assert.h> +#include <ctype.h> + +#include "ekhtml_config.h" +#include "ekhtml.h" +#include "ekhtml_tables.h" +#define EKHTML_USE_PRIVATE +#include "ekhtml_private.h" + +static void handle_endtag(ekhtml_parser_t *parser, ekhtml_string_t *str){ + ekhtml_tag_container *container; + hnode_t *hn; + + if((hn = hash_lookup(parser->startendcb, str)) && + (container = hnode_get(hn)) && + container->endfunc) + { + container->endfunc(parser->cbdata, str); + } else if(parser->endcb_unk) + parser->endcb_unk(parser->cbdata, str); +} + +char *ekhtml_parse_endtag(ekhtml_parser_t *parser, void **state_data, + char *curp, char *endp, int *baddata) +{ + const char *workp, *arrowp, *upper_tag; + ekhtml_endtag_state *endstate = *state_data; + int taglen, *offset = &parser->state.offset; + ekhtml_string_t str; + + /* Prerequisites for this function are that the first chars are </' + and that there are at least 3 bytes of data to work with */ + assert(*curp == '<' && *(curp + 1) == '/'); + assert(endp - curp >= 3); + + if(endstate == NULL){ /* Only called the first time a tag is started */ + const char *secondchar = curp + 2; + + /* Initial check to make sure this isn't some bad tag */ + if(!isalpha(*secondchar)){ + if(*secondchar != '>' && *secondchar != '<'){ + /* Bogus tag */ + *baddata = EKHTML_STATE_BADDATA; + return (char *)curp; + } else { /* Might as well handle this case while we are here */ + str.str = ""; + str.len = 0; + handle_endtag(parser, &str); + return (char *)(*secondchar == '>' ? secondchar + 1 : + secondchar); + } + } + + /* Store state, since this is the first time we are state-ifying. */ + endstate = &parser->endstate; + endstate->lastchar = 2; + *state_data = endstate; + *offset = 2; + } + + workp = curp + *offset; + /* Search for the close tag, or even malformed HTML */ + for(arrowp=workp; + arrowp != endp && *arrowp != '<' && *arrowp != '>'; + arrowp++) + { + if(!(EKCMap_CharMap[(unsigned char)*arrowp] & EKHTML_CHAR_WHITESPACE)) + endstate->lastchar = arrowp - curp; + } + + if(arrowp == endp){ + /* Didn't find the end, so return NULL */ + *offset = endp - curp; + return NULL; + } + + /* Found the end. Clear up our state and return the next char that + * the parser should process + */ + taglen = endstate->lastchar + 1 - 2; + upper_tag = ekhtml_make_upperstr(curp + 2, taglen); + str.str = upper_tag; + str.len = taglen; + handle_endtag(parser, &str); + *state_data = NULL; + assert(arrowp < endp); + if(*arrowp == '<'){ /* Malformed HTML */ + return (char *)(arrowp); + } else { + return (char *)(arrowp + 1); + } +} + diff --git a/ekhtml/src/ekhtml_mktables.c b/ekhtml/src/ekhtml_mktables.c new file mode 100644 index 0000000..9a8dea2 --- /dev/null +++ b/ekhtml/src/ekhtml_mktables.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2002, Jon Travis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * ekhtml_mktables: A small utility for generating tables of valid characters + * for different parts of the HTML parsing, such as tags, + * whitespace, etc. + * + * Each table is described by a function which returns 1 + * if the character should be in the table, else 0 + */ + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> + + +#include "ekhtml_config.h" +#include "ekhtml.h" +#define EKHTML_USE_PRIVATE +#include "ekhtml_private.h" + + +/* valid_tagname: Character map for a tagname AFTER the first letter */ +static EKHTML_CHARMAP_TYPE valid_tagname(char in){ + if(in == '-' || in == '.' || isdigit(in) || isalpha(in)) + return 1; + return 0; +} + +static EKHTML_CHARMAP_TYPE valid_whitespace(char in){ + return isspace(in) ? 1 : 0; +} + +/* attribute name AFTER the first character */ +static EKHTML_CHARMAP_TYPE valid_attrname(char in){ + if(in == '_' || valid_tagname(in)) + return 1; + return 0; +} + +/* attribute value */ +static EKHTML_CHARMAP_TYPE valid_attrvalue(char in){ + if(valid_attrname(in) || + in == '/' || in == ':' || in == '+' || in == '*' || + in == '%' || in == '?' || in == '!' || in == '&' || + in == '(' || in == ')' || in == '#' || in == '=' || + in == '~' || in == ']' || in == '*' || in == '@' || + in == '$' || in == '_') + return 1; + return 0; +} + +static EKHTML_CHARMAP_TYPE valid_begattrname(char in){ + return (isalpha(in) || in == '_') ? 1 : 0; +} + +static EKHTML_CHARMAP_TYPE ekhtml_state(char in){ + if(in == '/') + return EKHTML_STATE_ENDTAG; + if(isalpha(in)) + return EKHTML_STATE_STARTTAG; + if(in == '!') + return EKHTML_STATE_NONE; /* Must be determined by caller */ + return EKHTML_STATE_BADDATA; +} + +static EKHTML_CHARMAP_TYPE charmap_values(char in){ + EKHTML_CHARMAP_TYPE res = 0; + + if(valid_tagname(in)) + res |= EKHTML_CHAR_TAGNAME; + if(valid_whitespace(in)) + res |= EKHTML_CHAR_WHITESPACE; + if(valid_begattrname(in)) + res |= EKHTML_CHAR_BEGATTRNAME; + if(valid_attrname(in)) + res |= EKHTML_CHAR_ATTRNAME; + if(valid_attrvalue(in)) + res |= EKHTML_CHAR_ATTRVALUE; + return res; +} + +#define EKHTML_STRINGIFY(x) #x + +static void print_charmap(char *name, EKHTML_CHARMAP_TYPE (*cmap_func)(char)){ + int ch; + char sbuf[256]; + + sprintf_s(sbuf, 256, "0x%%0%dx ", EKHTML_CHARMAP_LEN * 2); + printf("#ifdef EKHTML_USE_TABLES\n"); + printf("const %s %s[256] = {\n", EKHTML_CHARMAP_TYPE_S, name); + for(ch=0; ch < 256; ch++){ + printf(sbuf, cmap_func((char)ch)); + if(isgraph(ch)) + printf("/* '%c' */", ch); + else + printf("/* 0x%.2x */", ch); + printf(", "); + if(!((ch + 1) % 4)) + printf("\n"); + } + printf("};\n\n"); + printf("#else\n"); + printf("extern const %s %s[256];\n", EKHTML_CHARMAP_TYPE_S, name); + printf("#endif\n"); +} + +int main(int argc, char *argv[]){ + printf("#ifndef EKHTML_MKTABLES_DOT_H\n"); + printf("#define EKHTML_MKTABLES_DOT_H\n"); + + print_charmap("EKCMap_CharMap", charmap_values); + print_charmap("EKCMap_EKState", ekhtml_state); + + printf("\n#endif\n"); + return 0; +} diff --git a/ekhtml/src/ekhtml_special.c b/ekhtml/src/ekhtml_special.c new file mode 100644 index 0000000..f43a62d --- /dev/null +++ b/ekhtml/src/ekhtml_special.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2002, Jon Travis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <assert.h> + +#include "ekhtml_config.h" +#include "ekhtml.h" +#include "ekhtml_tables.h" +#define EKHTML_USE_PRIVATE +#include "ekhtml_private.h" + +char *ekhtml_parse_special(ekhtml_parser_t *parser, void **state_data, + const char *curp, const char *endp, + int *baddata) +{ + const char *workp; + int *offset = &parser->state.offset; + + assert(*curp == '<' && *(curp + 1) == '!'); + + if(*state_data == NULL){/* Only called the first time the tag is started */ + *offset = 2; + *state_data = (void *)1; /* Assign it any non-NULL value */ + } + + for(workp=curp + *offset;workp != endp; workp++) + if(*workp == '<' || *workp == '>') + break; + + if(workp == endp){ + /* No end of tag found yet, save state */ + *offset = endp - curp; + return NULL; + } + + if(parser->datacb){ + ekhtml_string_t str; + + str.str = curp; + str.len = workp - curp + 1; + parser->datacb(parser->cbdata, &str); + } + + *state_data = NULL; + if(*workp == '<') /* Malformed HTML */ + return (char *)workp; + else + return (char *)workp + 1; +} diff --git a/ekhtml/src/ekhtml_starttag.c b/ekhtml/src/ekhtml_starttag.c new file mode 100644 index 0000000..cfd3ea4 --- /dev/null +++ b/ekhtml/src/ekhtml_starttag.c @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2002, Jon Travis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * ekhtml_starttag: Processor for HTML start-tags. + * + * This starttag processor is the most complicated of the tag processors. + * It has it's own small internal state machine which keeps track of what + * the next thing it is searching for is. + * + * SOURCE NOTES: We do a bit of weird hackery with respect to storing + * attributes. Since we need to store of OFFSET of the + * attribute and values, and cannot store actual pointers + * (see ekhtml.c for an explanation as to why), we need + * to create a whole new structure to store this info. + * BUT, we are just going to allocate a very similar + * structure (ekhtml_attr_t) anyway, and fill it in. + * SO, we just use the ekhtml_attr_t, and assign 'integer' + * values to the pointer. This saves us allocations and + * some management issues at the cost of readability. + */ + +#include <assert.h> +#include <stdlib.h> +#include <ctype.h> + +#include "ekhtml_config.h" +#include "ekhtml.h" +#include "ekhtml_tables.h" +#define EKHTML_USE_PRIVATE +#include "ekhtml_private.h" + +#define EKHTML_STMODE_TAG 0 /* Finding the tagname */ +#define EKHTML_STMODE_SUCK 1 /* Suck data until the last '<' or '>' */ +#define EKHTML_STMODE_BEGNAME 2 /* Find the beginning of an attribute name */ +#define EKHTML_STMODE_GETNAME 3 /* Get the rest of the attribute name */ +#define EKHTML_STMODE_GETEQUAL 4 /* Find the equals sign */ +#define EKHTML_STMODE_BEGVALUE 5 /* Get the beginning of a attribute value */ +#define EKHTML_STMODE_GETVALUE 6 /* Get the rest of an attribute value */ + +/* + * ekhtml_parser_starttag_cleanup: Cleanup allocated memory, as the + * parser object is about to be destroyed + * + * Arguments: parser = Parser to cleanup + * + */ + +void ekhtml_parser_starttag_cleanup(ekhtml_parser_t *parser){ + ekhtml_attr_t *attr, *next; + + for(attr=parser->freeattrs; attr; attr=next){ + next = attr->next; + free(attr); + } +} + +/* + * ekhtml_parser_newattr: Get a new unused attribute structure. + * + * Arguments: parser = Parser to get a new attribute structure for + * + * Return values: Returns a new attribute structure, which should be + * passed to ekhtml_parser_attr_release when the caller + * is done using it. The values of the returned attribute + * must be initialized by the caller + */ + +static inline +ekhtml_attr_t *ekhtml_parser_attr_new(ekhtml_parser_t *parser){ + ekhtml_attr_t *res; + + if(parser->freeattrs == NULL){ + res = malloc(sizeof(*res)); + } else { + res = parser->freeattrs; + /* Remove it from the list */ + parser->freeattrs = parser->freeattrs->next; + } + return res; +} + +/* + * ekhtml_parser_attr_release: Release use of an attribute previously fetched + * via the ekhtml_parser_newattr. + * + * Arguments: parser = parser to give the attribute back to + * attr = Attribute to relinquish use of + * + */ + +static inline +void ekhtml_parser_attr_release(ekhtml_parser_t *parser, ekhtml_attr_t *attr){ + attr->next = parser->freeattrs; + parser->freeattrs = attr; +} + +static void handle_starttag(ekhtml_parser_t *parser, char *curp, + ekhtml_starttag_state *sstate) +{ + ekhtml_tag_container *container; + ekhtml_starttag_cb_t cback = NULL; + int taglen = sstate->tagend - 1; + ekhtml_string_t str; + ekhtml_attr_t *attr; + char *upper_str; + hnode_t *hn; + + upper_str = ekhtml_make_upperstr(curp + 1, taglen); + str.str = upper_str; + str.len = taglen; + + if((hn = hash_lookup(parser->startendcb, &str)) && + (container = hnode_get(hn)) && + container->startfunc) + { + cback = container->startfunc; + } else if(parser->startcb_unk) + cback = parser->startcb_unk; + + if(!cback) + return; + + /* Formulate real attribute callback data from the 'offset' + pointer values */ + for(attr=sstate->attrs;attr;attr=attr->next){ + attr->name.str = curp + (int)attr->name.str; + if(!attr->isBoolean) + attr->val.str = curp + (int)attr->val.str; + } + + cback(parser->cbdata, &str, sstate->attrs); +} + +static void release_attributes(ekhtml_parser_t *parser, + ekhtml_starttag_state *sstate) +{ + ekhtml_attr_t *attr, *next; + + if(sstate->curattr) + ekhtml_parser_attr_release(parser, sstate->curattr); + + attr = sstate->attrs; + while(attr){ + next = attr->next; + ekhtml_parser_attr_release(parser, attr); + attr = next; + } +} + +static inline void scroll_attribute(ekhtml_starttag_state *sstate){ + sstate->curattr->next = sstate->attrs; + sstate->attrs = sstate->curattr; + sstate->curattr = NULL; +} + + +char *ekhtml_parse_starttag(ekhtml_parser_t *parser, void **state_data, + char *curp, char *endp, int *baddata) +{ + ekhtml_starttag_state *startstate = *state_data; + int *offset = &parser->state.offset; + char *workp; + + assert(*curp == '<' && isalpha(*(curp + 1))); + assert(endp - curp >= 3); + + if(startstate == NULL){ /* First time the tag is called */ + startstate = &parser->startstate; + startstate->tagend = sizeof("<F") - 1; + startstate->mode = EKHTML_STMODE_TAG; + startstate->attrs = NULL; + startstate->curattr = NULL; + startstate->quote = '\0'; + *state_data = startstate; + *offset = startstate->tagend; + } + + workp = curp + *offset; + + if(startstate->mode == EKHTML_STMODE_TAG){ + /* Find that tag! */ + workp = ekhtml_find_notcharsmap(workp, endp - workp, EKCMap_CharMap, + EKHTML_CHAR_TAGNAME); + *offset = workp - curp; + if(workp == endp) + return NULL; + + startstate->tagend = *offset; + startstate->mode = EKHTML_STMODE_BEGNAME; + } + + while(workp != endp){ /* Main state processing loop */ + if(startstate->mode == EKHTML_STMODE_BEGNAME){ + ekhtml_attr_t *attr; + + workp = ekhtml_find_notcharsmap(workp, endp - workp, + EKCMap_CharMap, + EKHTML_CHAR_WHITESPACE); + if(workp == endp) + break; + + if(!(EKCMap_CharMap[(unsigned char)*workp] & + EKHTML_CHAR_BEGATTRNAME)) + { + /* Bad attrname character */ + startstate->mode = EKHTML_STMODE_SUCK; + } else { + assert(startstate->curattr == NULL); + /* Valid attribute name, allocate space for it */ + attr = ekhtml_parser_attr_new(parser); + attr->name.str = (char *)NULL + (workp - curp); + attr->name.len = 0; /* Will get assigned later */ + attr->val.str = NULL; + attr->val.len = 0; + attr->isBoolean = 1; + attr->next = NULL; + startstate->mode = EKHTML_STMODE_GETNAME; + startstate->curattr = attr; + } + } + + if(startstate->mode == EKHTML_STMODE_GETNAME){ + workp = ekhtml_find_notcharsmap(workp, endp - workp, + EKCMap_CharMap, + EKHTML_CHAR_ATTRNAME); + if(workp == endp) + break; + + /* There be dragons here -- watch out -- see comment @ top + of file */ + startstate->curattr->name.len = + workp - (curp + (int)startstate->curattr->name.str); + if(*workp == '='){ + startstate->mode = EKHTML_STMODE_BEGVALUE; + workp++; /* Skip the equals sign */ + } else { + if(!(EKCMap_CharMap[(unsigned char)*workp] & + EKHTML_CHAR_WHITESPACE)) + { + /* Found something we weren't expecting. Use the current + attribute as a boolean value and suck the rest */ + scroll_attribute(startstate); + startstate->mode = EKHTML_STMODE_SUCK; + } else + startstate->mode = EKHTML_STMODE_GETEQUAL; + } + } + + if(startstate->mode == EKHTML_STMODE_GETEQUAL){ + workp = ekhtml_find_notcharsmap(workp, endp - workp, + EKCMap_CharMap, + EKHTML_CHAR_WHITESPACE); + if(workp == endp) + break; + + if(*workp != '='){ + /* Unexpected value. Could either be time to suck, or this was + really only a boolean value */ + scroll_attribute(startstate); + + if(EKCMap_CharMap[(unsigned char)*workp] & + EKHTML_CHAR_BEGATTRNAME) + { + startstate->mode = EKHTML_STMODE_BEGNAME; + continue; + } else { + startstate->mode = EKHTML_STMODE_SUCK; + } + } else { + startstate->mode = EKHTML_STMODE_BEGVALUE; + workp++; /* Skip the equals sign */ + } + } + + if(startstate->mode == EKHTML_STMODE_BEGVALUE){ + workp = ekhtml_find_notcharsmap(workp, endp - workp, + EKCMap_CharMap, + EKHTML_CHAR_WHITESPACE); + if(workp == endp) + break; + + startstate->curattr->isBoolean = 0; + startstate->curattr->val.str = (char *)NULL + (workp - curp); + startstate->quote = '\0'; + if(*workp == '"' || *workp == '\''){ + startstate->curattr->val.str++; /* Skip the quote */ + startstate->mode = EKHTML_STMODE_GETVALUE; + startstate->quote = *workp; + workp++; + } else if(!(EKCMap_CharMap[(unsigned char)*workp] & + EKHTML_CHAR_ATTRVALUE)) + { + /* Bad value .. */ + startstate->curattr->val.len = 0; + scroll_attribute(startstate); + startstate->mode = EKHTML_STMODE_SUCK; + } else { + /* Valid value */ + startstate->mode = EKHTML_STMODE_GETVALUE; + } + } + + if(startstate->mode == EKHTML_STMODE_GETVALUE){ + if(startstate->quote){ + for(;workp != endp && *workp != '>' && *workp != '<'; workp++){ + if(*workp == startstate->quote){ + startstate->curattr->val.len = + workp - (curp + (int)startstate->curattr->val.str); + scroll_attribute(startstate); + startstate->mode = EKHTML_STMODE_BEGNAME; + workp++; /* Skip the quote */ + break; + } + } + /* In case we broke out in the above loop, we may + need to continue in the main loop -- CONFUSING */ + if(startstate->mode == EKHTML_STMODE_BEGNAME) + continue; + } else + workp = ekhtml_find_notcharsmap(workp, endp - workp, + EKCMap_CharMap, + EKHTML_CHAR_ATTRVALUE); + if(workp == endp) + break; + + startstate->curattr->val.len = + workp - (curp + (int)startstate->curattr->val.str); + scroll_attribute(startstate); + + if(*workp == '>' || *workp == '<') { + *offset = workp - curp; + handle_starttag(parser, curp, startstate); + release_attributes(parser, startstate); + *state_data = NULL; + if(*workp == '<') + return workp; + else + return workp + 1; + } else { + startstate->mode = EKHTML_STMODE_BEGNAME; + continue; + } + } + + if(startstate->mode == EKHTML_STMODE_SUCK){ + /* The sucking mode is here in case someone puts a bad character + in an attribute name. We suck until what looks like end of tag*/ + for(;workp != endp && *workp != '<' && *workp != '>'; workp++) + ; + if(workp == endp) + break; + + *offset = workp - curp; + handle_starttag(parser, curp, startstate); + release_attributes(parser, startstate); + *state_data = NULL; + if(*workp == '<') + return workp; + else + return workp + 1; + } + } + + *offset = workp - curp; + return NULL; +} diff --git a/ekhtml/src/ekhtml_util.c b/ekhtml/src/ekhtml_util.c new file mode 100644 index 0000000..0bc5c1d --- /dev/null +++ b/ekhtml/src/ekhtml_util.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2002, Jon Travis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <ctype.h> + +#include "ekhtml_config.h" +#include "ekhtml.h" +#include "ekhtml_tables.h" +#define EKHTML_USE_PRIVATE +#include "ekhtml_private.h" + + +/* + * ekhtml_make_upperstr: Make a new bytestring based on the old one .. + * only uppercase. + * + * Arguments: buf = Buffer containing bytes to 'upper' + * len = Length of bytes in buf + * + * Return values: Capitalizes the string pointed at by 'buf', and returns + * 'buf' + */ + +char *ekhtml_make_upperstr(char *buf, int len){ + char *endp = buf + len, *cp; + + for(cp = buf; cp < endp; cp++) + *cp = toupper(*cp); + return buf; +} + + diff --git a/ekhtml/src/hash.c b/ekhtml/src/hash.c new file mode 100644 index 0000000..95651d4 --- /dev/null +++ b/ekhtml/src/hash.c @@ -0,0 +1,1035 @@ +/* + * Hash Table Data Type + * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net> + * + * Free Software License: + * + * All rights are reserved by the author, with the following exceptions: + * Permission is granted to freely reproduce and distribute this software, + * possibly in exchange for a fee, provided that this copyright notice appears + * intact. Permission is also granted to adapt this software to produce + * derivative works, as long as the modified versions carry this copyright + * notice and additional notices stating that the work has been modified. + * This source code may be translated into executable form and incorporated + * into proprietary software; there is no requirement for such software to + * contain a copyright notice related to this source. + * + * $Id: hash.c,v 1.1 2002/09/17 02:49:36 jick Exp $ + * $Name: EKHTML_RELEASE_0_3_2 $ + */ + +#include <stdlib.h> +#include <stddef.h> +#include <assert.h> +#include <string.h> +#define HASH_IMPLEMENTATION +#include "hash.h" + +#ifdef KAZLIB_RCSID +static const char rcsid[] = "$Id: hash.c,v 1.1 2002/09/17 02:49:36 jick Exp $"; +#endif + +#define INIT_BITS 6 +#define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */ +#define INIT_MASK ((INIT_SIZE) - 1) + +#define next hash_next +#define key hash_key +#define data hash_data +#define hkey hash_hkey + +#define table hash_table +#define nchains hash_nchains +#define nodecount hash_nodecount +#define maxcount hash_maxcount +#define highmark hash_highmark +#define lowmark hash_lowmark +#define compare hash_compare +#define function hash_function +#define allocnode hash_allocnode +#define freenode hash_freenode +#define context hash_context +#define mask hash_mask +#define dynamic hash_dynamic + +#define table hash_table +#define chain hash_chain + +static hnode_t *hnode_alloc(void *context); +static void hnode_free(hnode_t *node, void *context); +static hash_val_t hash_fun_default(const void *key); +static int hash_comp_default(const void *key1, const void *key2); + +int hash_val_t_bit; + +/* + * Compute the number of bits in the hash_val_t type. We know that hash_val_t + * is an unsigned integral type. Thus the highest value it can hold is a + * Mersenne number (power of two, less one). We initialize a hash_val_t + * object with this value and then shift bits out one by one while counting. + * Notes: + * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power + * of two. This means that its binary representation consists of all one + * bits, and hence ``val'' is initialized to all one bits. + * 2. While bits remain in val, we increment the bit count and shift it to the + * right, replacing the topmost bit by zero. + */ + +static void compute_bits(void) +{ + hash_val_t val = HASH_VAL_T_MAX; /* 1 */ + int bits = 0; + + while (val) { /* 2 */ + bits++; + val >>= 1; + } + + hash_val_t_bit = bits; +} + +/* + * Verify whether the given argument is a power of two. + */ + +static int is_power_of_two(hash_val_t arg) +{ + if (arg == 0) + return 0; + while ((arg & 1) == 0) + arg >>= 1; + return (arg == 1); +} + +/* + * Compute a shift amount from a given table size + */ + +static hash_val_t compute_mask(hashcount_t size) +{ + assert (is_power_of_two(size)); + assert (size >= 2); + + return size - 1; +} + +/* + * Initialize the table of pointers to null. + */ + +static void clear_table(hash_t *hash) +{ + hash_val_t i; + + for (i = 0; i < hash->nchains; i++) + hash->table[i] = NULL; +} + +/* + * Double the size of a dynamic table. This works as follows. Each chain splits + * into two adjacent chains. The shift amount increases by one, exposing an + * additional bit of each hashed key. For each node in the original chain, the + * value of this newly exposed bit will decide which of the two new chains will + * receive the node: if the bit is 1, the chain with the higher index will have + * the node, otherwise the lower chain will receive the node. In this manner, + * the hash table will continue to function exactly as before without having to + * rehash any of the keys. + * Notes: + * 1. Overflow check. + * 2. The new number of chains is twice the old number of chains. + * 3. The new mask is one bit wider than the previous, revealing a + * new bit in all hashed keys. + * 4. Allocate a new table of chain pointers that is twice as large as the + * previous one. + * 5. If the reallocation was successful, we perform the rest of the growth + * algorithm, otherwise we do nothing. + * 6. The exposed_bit variable holds a mask with which each hashed key can be + * AND-ed to test the value of its newly exposed bit. + * 7. Now loop over each chain in the table and sort its nodes into two + * chains based on the value of each node's newly exposed hash bit. + * 8. The low chain replaces the current chain. The high chain goes + * into the corresponding sister chain in the upper half of the table. + * 9. We have finished dealing with the chains and nodes. We now update + * the various bookeeping fields of the hash structure. + */ + +static void grow_table(hash_t *hash) +{ + hnode_t **newtable; + + assert (2 * hash->nchains > hash->nchains); /* 1 */ + + newtable = realloc(hash->table, + sizeof *newtable * hash->nchains * 2); /* 4 */ + + if (newtable) { /* 5 */ + hash_val_t mask = (hash->mask << 1) | 1; /* 3 */ + hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */ + hash_val_t chain; + + assert (mask != hash->mask); + + for (chain = 0; chain < hash->nchains; chain++) { /* 7 */ + hnode_t *low_chain = 0, *high_chain = 0, *hptr, *next; + + for (hptr = newtable[chain]; hptr != 0; hptr = next) { + next = hptr->next; + + if (hptr->hkey & exposed_bit) { + hptr->next = high_chain; + high_chain = hptr; + } else { + hptr->next = low_chain; + low_chain = hptr; + } + } + + newtable[chain] = low_chain; /* 8 */ + newtable[chain + hash->nchains] = high_chain; + } + + hash->table = newtable; /* 9 */ + hash->mask = mask; + hash->nchains *= 2; + hash->lowmark *= 2; + hash->highmark *= 2; + } + assert (hash_verify(hash)); +} + +/* + * Cut a table size in half. This is done by folding together adjacent chains + * and populating the lower half of the table with these chains. The chains are + * simply spliced together. Once this is done, the whole table is reallocated + * to a smaller object. + * Notes: + * 1. It is illegal to have a hash table with one slot. This would mean that + * hash->shift is equal to hash_val_t_bit, an illegal shift value. + * Also, other things could go wrong, such as hash->lowmark becoming zero. + * 2. Looping over each pair of sister chains, the low_chain is set to + * point to the head node of the chain in the lower half of the table, + * and high_chain points to the head node of the sister in the upper half. + * 3. The intent here is to compute a pointer to the last node of the + * lower chain into the low_tail variable. If this chain is empty, + * low_tail ends up with a null value. + * 4. If the lower chain is not empty, we simply tack the upper chain onto it. + * If the upper chain is a null pointer, nothing happens. + * 5. Otherwise if the lower chain is empty but the upper one is not, + * If the low chain is empty, but the high chain is not, then the + * high chain is simply transferred to the lower half of the table. + * 6. Otherwise if both chains are empty, there is nothing to do. + * 7. All the chain pointers are in the lower half of the table now, so + * we reallocate it to a smaller object. This, of course, invalidates + * all pointer-to-pointers which reference into the table from the + * first node of each chain. + * 8. Though it's unlikely, the reallocation may fail. In this case we + * pretend that the table _was_ reallocated to a smaller object. + * 9. Finally, update the various table parameters to reflect the new size. + */ + +static void shrink_table(hash_t *hash) +{ + hash_val_t chain, nchains; + hnode_t **newtable, *low_tail, *low_chain, *high_chain; + + assert (hash->nchains >= 2); /* 1 */ + nchains = hash->nchains / 2; + + for (chain = 0; chain < nchains; chain++) { + low_chain = hash->table[chain]; /* 2 */ + high_chain = hash->table[chain + nchains]; + for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next) + ; /* 3 */ + if (low_chain != 0) /* 4 */ + low_tail->next = high_chain; + else if (high_chain != 0) /* 5 */ + hash->table[chain] = high_chain; + else + assert (hash->table[chain] == NULL); /* 6 */ + } + newtable = realloc(hash->table, + sizeof *newtable * nchains); /* 7 */ + if (newtable) /* 8 */ + hash->table = newtable; + hash->mask >>= 1; /* 9 */ + hash->nchains = nchains; + hash->lowmark /= 2; + hash->highmark /= 2; + assert (hash_verify(hash)); +} + + +/* + * Create a dynamic hash table. Both the hash table structure and the table + * itself are dynamically allocated. Furthermore, the table is extendible in + * that it will automatically grow as its load factor increases beyond a + * certain threshold. + * Notes: + * 1. If the number of bits in the hash_val_t type has not been computed yet, + * we do so here, because this is likely to be the first function that the + * user calls. + * 2. Allocate a hash table control structure. + * 3. If a hash table control structure is successfully allocated, we + * proceed to initialize it. Otherwise we return a null pointer. + * 4. We try to allocate the table of hash chains. + * 5. If we were able to allocate the hash chain table, we can finish + * initializing the hash structure and the table. Otherwise, we must + * backtrack by freeing the hash structure. + * 6. INIT_SIZE should be a power of two. The high and low marks are always set + * to be twice the table size and half the table size respectively. When the + * number of nodes in the table grows beyond the high size (beyond load + * factor 2), it will double in size to cut the load factor down to about + * about 1. If the table shrinks down to or beneath load factor 0.5, + * it will shrink, bringing the load up to about 1. However, the table + * will never shrink beneath INIT_SIZE even if it's emptied. + * 7. This indicates that the table is dynamically allocated and dynamically + * resized on the fly. A table that has this value set to zero is + * assumed to be statically allocated and will not be resized. + * 8. The table of chains must be properly reset to all null pointers. + */ + +hash_t *hash_create(hashcount_t maxcount, hash_comp_t compfun, + hash_fun_t hashfun) +{ + hash_t *hash; + + if (hash_val_t_bit == 0) /* 1 */ + compute_bits(); + + hash = malloc(sizeof *hash); /* 2 */ + + if (hash) { /* 3 */ + hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */ + if (hash->table) { /* 5 */ + hash->nchains = INIT_SIZE; /* 6 */ + hash->highmark = INIT_SIZE * 2; + hash->lowmark = INIT_SIZE / 2; + hash->nodecount = 0; + hash->maxcount = maxcount; + hash->compare = compfun ? compfun : hash_comp_default; + hash->function = hashfun ? hashfun : hash_fun_default; + hash->allocnode = hnode_alloc; + hash->freenode = hnode_free; + hash->context = NULL; + hash->mask = INIT_MASK; + hash->dynamic = 1; /* 7 */ + clear_table(hash); /* 8 */ + assert (hash_verify(hash)); + return hash; + } + free(hash); + } + + return NULL; +} + +/* + * Select a different set of node allocator routines. + */ + +void hash_set_allocator(hash_t *hash, hnode_alloc_t al, + hnode_free_t fr, void *context) +{ + assert (hash_count(hash) == 0); + assert ((al == 0 && fr == 0) || (al != 0 && fr != 0)); + + hash->allocnode = al ? al : hnode_alloc; + hash->freenode = fr ? fr : hnode_free; + hash->context = context; +} + +/* + * Free every node in the hash using the hash->freenode() function pointer, and + * cause the hash to become empty. + */ + +void hash_free_nodes(hash_t *hash) +{ + hscan_t hs; + hnode_t *node; + hash_scan_begin(&hs, hash); + while ((node = hash_scan_next(&hs))) { + hash_scan_delete(hash, node); + hash->freenode(node, hash->context); + } + hash->nodecount = 0; + clear_table(hash); +} + +/* + * Obsolescent function for removing all nodes from a table, + * freeing them and then freeing the table all in one step. + */ + +void hash_free(hash_t *hash) +{ +#ifdef KAZLIB_OBSOLESCENT_DEBUG + assert ("call to obsolescent function hash_free()" && 0); +#endif + hash_free_nodes(hash); + hash_destroy(hash); +} + +/* + * Free a dynamic hash table structure. + */ + +void hash_destroy(hash_t *hash) +{ + assert (hash_val_t_bit != 0); + assert (hash_isempty(hash)); + free(hash->table); + free(hash); +} + +/* + * Initialize a user supplied hash structure. The user also supplies a table of + * chains which is assigned to the hash structure. The table is static---it + * will not grow or shrink. + * 1. See note 1. in hash_create(). + * 2. The user supplied array of pointers hopefully contains nchains nodes. + * 3. See note 7. in hash_create(). + * 4. We must dynamically compute the mask from the given power of two table + * size. + * 5. The user supplied table can't be assumed to contain null pointers, + * so we reset it here. + */ + +hash_t *hash_init(hash_t *hash, hashcount_t maxcount, + hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table, + hashcount_t nchains) +{ + if (hash_val_t_bit == 0) /* 1 */ + compute_bits(); + + assert (is_power_of_two(nchains)); + + hash->table = table; /* 2 */ + hash->nchains = nchains; + hash->nodecount = 0; + hash->maxcount = maxcount; + hash->compare = compfun ? compfun : hash_comp_default; + hash->function = hashfun ? hashfun : hash_fun_default; + hash->dynamic = 0; /* 3 */ + hash->mask = compute_mask(nchains); /* 4 */ + clear_table(hash); /* 5 */ + + assert (hash_verify(hash)); + + return hash; +} + +/* + * Reset the hash scanner so that the next element retrieved by + * hash_scan_next() shall be the first element on the first non-empty chain. + * Notes: + * 1. Locate the first non empty chain. + * 2. If an empty chain is found, remember which one it is and set the next + * pointer to refer to its first element. + * 3. Otherwise if a chain is not found, set the next pointer to NULL + * so that hash_scan_next() shall indicate failure. + */ + +void hash_scan_begin(hscan_t *scan, hash_t *hash) +{ + hash_val_t nchains = hash->nchains; + hash_val_t chain; + + scan->table = hash; + + /* 1 */ + + for (chain = 0; chain < nchains && hash->table[chain] == 0; chain++) + ; + + if (chain < nchains) { /* 2 */ + scan->chain = chain; + scan->next = hash->table[chain]; + } else { /* 3 */ + scan->next = NULL; + } +} + +/* + * Retrieve the next node from the hash table, and update the pointer + * for the next invocation of hash_scan_next(). + * Notes: + * 1. Remember the next pointer in a temporary value so that it can be + * returned. + * 2. This assertion essentially checks whether the module has been properly + * initialized. The first point of interaction with the module should be + * either hash_create() or hash_init(), both of which set hash_val_t_bit to + * a non zero value. + * 3. If the next pointer we are returning is not NULL, then the user is + * allowed to call hash_scan_next() again. We prepare the new next pointer + * for that call right now. That way the user is allowed to delete the node + * we are about to return, since we will no longer be needing it to locate + * the next node. + * 4. If there is a next node in the chain (next->next), then that becomes the + * new next node, otherwise ... + * 5. We have exhausted the current chain, and must locate the next subsequent + * non-empty chain in the table. + * 6. If a non-empty chain is found, the first element of that chain becomes + * the new next node. Otherwise there is no new next node and we set the + * pointer to NULL so that the next time hash_scan_next() is called, a null + * pointer shall be immediately returned. + */ + + +hnode_t *hash_scan_next(hscan_t *scan) +{ + hnode_t *next = scan->next; /* 1 */ + hash_t *hash = scan->table; + hash_val_t chain = scan->chain + 1; + hash_val_t nchains = hash->nchains; + + assert (hash_val_t_bit != 0); /* 2 */ + + if (next) { /* 3 */ + if (next->next) { /* 4 */ + scan->next = next->next; + } else { + while (chain < nchains && hash->table[chain] == 0) /* 5 */ + chain++; + if (chain < nchains) { /* 6 */ + scan->chain = chain; + scan->next = hash->table[chain]; + } else { + scan->next = NULL; + } + } + } + return next; +} + +/* + * Insert a node into the hash table. + * Notes: + * 1. It's illegal to insert more than the maximum number of nodes. The client + * should verify that the hash table is not full before attempting an + * insertion. + * 2. The same key may not be inserted into a table twice. + * 3. If the table is dynamic and the load factor is already at >= 2, + * grow the table. + * 4. We take the bottom N bits of the hash value to derive the chain index, + * where N is the base 2 logarithm of the size of the hash table. + */ + +void hash_insert(hash_t *hash, hnode_t *node, const void *key) +{ + hash_val_t hkey, chain; + + assert (hash_val_t_bit != 0); + assert (node->next == NULL); + assert (hash->nodecount < hash->maxcount); /* 1 */ + assert (hash_lookup(hash, key) == NULL); /* 2 */ + + if (hash->dynamic && hash->nodecount >= hash->highmark) /* 3 */ + grow_table(hash); + + hkey = hash->function(key); + chain = hkey & hash->mask; /* 4 */ + + node->key = key; + node->hkey = hkey; + node->next = hash->table[chain]; + hash->table[chain] = node; + hash->nodecount++; + + assert (hash_verify(hash)); +} + +/* + * Find a node in the hash table and return a pointer to it. + * Notes: + * 1. We hash the key and keep the entire hash value. As an optimization, when + * we descend down the chain, we can compare hash values first and only if + * hash values match do we perform a full key comparison. + * 2. To locate the chain from among 2^N chains, we look at the lower N bits of + * the hash value by anding them with the current mask. + * 3. Looping through the chain, we compare the stored hash value inside each + * node against our computed hash. If they match, then we do a full + * comparison between the unhashed keys. If these match, we have located the + * entry. + */ + +hnode_t *hash_lookup(hash_t *hash, const void *key) +{ + hash_val_t hkey, chain; + hnode_t *nptr; + + hkey = hash->function(key); /* 1 */ + chain = hkey & hash->mask; /* 2 */ + + for (nptr = hash->table[chain]; nptr; nptr = nptr->next) { /* 3 */ + if (nptr->hkey == hkey && hash->compare(nptr->key, key) == 0) + return nptr; + } + + return NULL; +} + +/* + * Delete the given node from the hash table. Since the chains + * are singly linked, we must locate the start of the node's chain + * and traverse. + * Notes: + * 1. The node must belong to this hash table, and its key must not have + * been tampered with. + * 2. If this deletion will take the node count below the low mark, we + * shrink the table now. + * 3. Determine which chain the node belongs to, and fetch the pointer + * to the first node in this chain. + * 4. If the node being deleted is the first node in the chain, then + * simply update the chain head pointer. + * 5. Otherwise advance to the node's predecessor, and splice out + * by updating the predecessor's next pointer. + * 6. Indicate that the node is no longer in a hash table. + */ + +hnode_t *hash_delete(hash_t *hash, hnode_t *node) +{ + hash_val_t chain; + hnode_t *hptr; + + assert (hash_lookup(hash, node->key) == node); /* 1 */ + assert (hash_val_t_bit != 0); + + if (hash->dynamic && hash->nodecount <= hash->lowmark + && hash->nodecount > INIT_SIZE) + shrink_table(hash); /* 2 */ + + chain = node->hkey & hash->mask; /* 3 */ + hptr = hash->table[chain]; + + if (hptr == node) { /* 4 */ + hash->table[chain] = node->next; + } else { + while (hptr->next != node) { /* 5 */ + assert (hptr != 0); + hptr = hptr->next; + } + assert (hptr->next == node); + hptr->next = node->next; + } + + hash->nodecount--; + assert (hash_verify(hash)); + + node->next = NULL; /* 6 */ + return node; +} + +int hash_alloc_insert(hash_t *hash, const void *key, void *data) +{ + hnode_t *node = hash->allocnode(hash->context); + + if (node) { + hnode_init(node, data); + hash_insert(hash, node, key); + return 1; + } + return 0; +} + +void hash_delete_free(hash_t *hash, hnode_t *node) +{ + hash_delete(hash, node); + hash->freenode(node, hash->context); +} + +/* + * Exactly like hash_delete, except does not trigger table shrinkage. This is to be + * used from within a hash table scan operation. See notes for hash_delete. + */ + +hnode_t *hash_scan_delete(hash_t *hash, hnode_t *node) +{ + hash_val_t chain; + hnode_t *hptr; + + assert (hash_lookup(hash, node->key) == node); + assert (hash_val_t_bit != 0); + + chain = node->hkey & hash->mask; + hptr = hash->table[chain]; + + if (hptr == node) { + hash->table[chain] = node->next; + } else { + while (hptr->next != node) + hptr = hptr->next; + hptr->next = node->next; + } + + hash->nodecount--; + assert (hash_verify(hash)); + node->next = NULL; + + return node; +} + +/* + * Like hash_delete_free but based on hash_scan_delete. + */ + +void hash_scan_delfree(hash_t *hash, hnode_t *node) +{ + hash_scan_delete(hash, node); + hash->freenode(node, hash->context); +} + +/* + * Verify whether the given object is a valid hash table. This means + * Notes: + * 1. If the hash table is dynamic, verify whether the high and + * low expansion/shrinkage thresholds are powers of two. + * 2. Count all nodes in the table, and test each hash value + * to see whether it is correct for the node's chain. + */ + +int hash_verify(hash_t *hash) +{ + hashcount_t count = 0; + hash_val_t chain; + hnode_t *hptr; + + if (hash->dynamic) { /* 1 */ + if (hash->lowmark >= hash->highmark) + return 0; + if (!is_power_of_two(hash->highmark)) + return 0; + if (!is_power_of_two(hash->lowmark)) + return 0; + } + + for (chain = 0; chain < hash->nchains; chain++) { /* 2 */ + for (hptr = hash->table[chain]; hptr != 0; hptr = hptr->next) { + if ((hptr->hkey & hash->mask) != chain) + return 0; + count++; + } + } + + if (count != hash->nodecount) + return 0; + + return 1; +} + +/* + * Test whether the hash table is full and return 1 if this is true, + * 0 if it is false. + */ + +#undef hash_isfull +int hash_isfull(hash_t *hash) +{ + return hash->nodecount == hash->maxcount; +} + +/* + * Test whether the hash table is empty and return 1 if this is true, + * 0 if it is false. + */ + +#undef hash_isempty +int hash_isempty(hash_t *hash) +{ + return hash->nodecount == 0; +} + +static hnode_t *hnode_alloc(void *context) +{ + return malloc(sizeof *hnode_alloc(NULL)); +} + +static void hnode_free(hnode_t *node, void *context) +{ + free(node); +} + + +/* + * Create a hash table node dynamically and assign it the given data. + */ + +hnode_t *hnode_create(void *data) +{ + hnode_t *node = malloc(sizeof *node); + if (node) { + node->data = data; + node->next = NULL; + } + return node; +} + +/* + * Initialize a client-supplied node + */ + +hnode_t *hnode_init(hnode_t *hnode, void *data) +{ + hnode->data = data; + hnode->next = NULL; + return hnode; +} + +/* + * Destroy a dynamically allocated node. + */ + +void hnode_destroy(hnode_t *hnode) +{ + free(hnode); +} + +#undef hnode_put +void hnode_put(hnode_t *node, void *data) +{ + node->data = data; +} + +#undef hnode_get +void *hnode_get(hnode_t *node) +{ + return node->data; +} + +#undef hnode_getkey +const void *hnode_getkey(hnode_t *node) +{ + return node->key; +} + +#undef hash_count +hashcount_t hash_count(hash_t *hash) +{ + return hash->nodecount; +} + +#undef hash_size +hashcount_t hash_size(hash_t *hash) +{ + return hash->nchains; +} + +static hash_val_t hash_fun_default(const void *key) +{ + static unsigned long randbox[] = { + 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U, + 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU, + 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU, + 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU, + }; + + const unsigned char *str = key; + hash_val_t acc = 0; + + while (*str) { + acc ^= randbox[(*str + acc) & 0xf]; + acc = (acc << 1) | (acc >> 31); + acc &= 0xffffffffU; + acc ^= randbox[((*str++ >> 4) + acc) & 0xf]; + acc = (acc << 2) | (acc >> 30); + acc &= 0xffffffffU; + } + return acc; +} + +static int hash_comp_default(const void *key1, const void *key2) +{ + return strcmp(key1, key2); +} + +#ifdef KAZLIB_TEST_MAIN + +#include <stdio.h> +#include <ctype.h> +#include <stdarg.h> + +typedef char input_t[256]; + +static int tokenize(char *string, ...) +{ + char **tokptr; + va_list arglist; + int tokcount = 0; + + va_start(arglist, string); + tokptr = va_arg(arglist, char **); + while (tokptr) { + while (*string && isspace((unsigned char) *string)) + string++; + if (!*string) + break; + *tokptr = string; + while (*string && !isspace((unsigned char) *string)) + string++; + tokptr = va_arg(arglist, char **); + tokcount++; + if (!*string) + break; + *string++ = 0; + } + va_end(arglist); + + return tokcount; +} + +static char *dupstring(char *str) +{ + int sz = strlen(str) + 1; + char *new = malloc(sz); + if (new) + memcpy(new, str, sz); + return new; +} + +static hnode_t *new_node(void *c) +{ + static hnode_t few[5]; + static int count; + + if (count < 5) + return few + count++; + + return NULL; +} + +static void del_node(hnode_t *n, void *c) +{ +} + +int main(void) +{ + input_t in; + hash_t *h = hash_create(HASHCOUNT_T_MAX, 0, 0); + hnode_t *hn; + hscan_t hs; + char *tok1, *tok2, *val; + const char *key; + int prompt = 0; + + char *help = + "a <key> <val> add value to hash table\n" + "d <key> delete value from hash table\n" + "l <key> lookup value in hash table\n" + "n show size of hash table\n" + "c show number of entries\n" + "t dump whole hash table\n" + "+ increase hash table (private func)\n" + "- decrease hash table (private func)\n" + "b print hash_t_bit value\n" + "p turn prompt on\n" + "s switch to non-functioning allocator\n" + "q quit"; + + if (!h) + puts("hash_create failed"); + + for (;;) { + if (prompt) + putchar('>'); + fflush(stdout); + + if (!fgets(in, sizeof(input_t), stdin)) + break; + + switch(in[0]) { + case '?': + puts(help); + break; + case 'b': + printf("%d\n", hash_val_t_bit); + break; + case 'a': + if (tokenize(in+1, &tok1, &tok2, (char **) 0) != 2) { + puts("what?"); + break; + } + key = dupstring(tok1); + val = dupstring(tok2); + + if (!key || !val) { + puts("out of memory"); + free((void *) key); + free(val); + } + + if (!hash_alloc_insert(h, key, val)) { + puts("hash_alloc_insert failed"); + free((void *) key); + free(val); + break; + } + break; + case 'd': + if (tokenize(in+1, &tok1, (char **) 0) != 1) { + puts("what?"); + break; + } + hn = hash_lookup(h, tok1); + if (!hn) { + puts("hash_lookup failed"); + break; + } + val = hnode_get(hn); + key = hnode_getkey(hn); + hash_scan_delfree(h, hn); + free((void *) key); + free(val); + break; + case 'l': + if (tokenize(in+1, &tok1, (char **) 0) != 1) { + puts("what?"); + break; + } + hn = hash_lookup(h, tok1); + if (!hn) { + puts("hash_lookup failed"); + break; + } + val = hnode_get(hn); + puts(val); + break; + case 'n': + printf("%lu\n", (unsigned long) hash_size(h)); + break; + case 'c': + printf("%lu\n", (unsigned long) hash_count(h)); + break; + case 't': + hash_scan_begin(&hs, h); + while ((hn = hash_scan_next(&hs))) + printf("%s\t%s\n", (char*) hnode_getkey(hn), + (char*) hnode_get(hn)); + break; + case '+': + grow_table(h); /* private function */ + break; + case '-': + shrink_table(h); /* private function */ + break; + case 'q': + exit(0); + break; + case '\0': + break; + case 'p': + prompt = 1; + break; + case 's': + hash_set_allocator(h, new_node, del_node, NULL); + break; + default: + putchar('?'); + putchar('\n'); + break; + } + } + + return 0; +} + +#endif |