summaryrefslogtreecommitdiff
path: root/ekhtml/testsuite
diff options
context:
space:
mode:
authoradmin@progandy.co.cc <admin@progandy.co.cc@eced67a3-f377-a0ae-92ae-d6de1850b05a>2010-09-13 18:11:13 +0000
committeradmin@progandy.co.cc <admin@progandy.co.cc@eced67a3-f377-a0ae-92ae-d6de1850b05a>2010-09-13 18:11:13 +0000
commit376594ac1d65cbb31165f5a74775d624c3fd2981 (patch)
treeb1f9bf2ee2f1a82c5d9378c03cf09f72535c001d /ekhtml/testsuite
parent99ef7f1cfcaa4ead4b860faec88754abbe31ebfc (diff)
- added basic html conversion for receiving (maybe a bit too powerful)
- some bugfixes git-svn-id: http://mirotr.googlecode.com/svn/trunk@12 eced67a3-f377-a0ae-92ae-d6de1850b05a
Diffstat (limited to 'ekhtml/testsuite')
-rw-r--r--ekhtml/testsuite/.deps/tester.Po1
-rw-r--r--ekhtml/testsuite/Makefile406
-rw-r--r--ekhtml/testsuite/Makefile.am26
-rw-r--r--ekhtml/testsuite/Makefile.in406
-rw-r--r--ekhtml/testsuite/ek_sgmllib.py489
-rw-r--r--ekhtml/testsuite/gen_html.py154
-rw-r--r--ekhtml/testsuite/plparser.pl38
-rw-r--r--ekhtml/testsuite/pyparser.py44
-rw-r--r--ekhtml/testsuite/test_basic.sh10
-rw-r--r--ekhtml/testsuite/test_bogus_feedsize.sh27
-rw-r--r--ekhtml/testsuite/test_crazypage.sh16
-rw-r--r--ekhtml/testsuite/tester.c158
-rw-r--r--ekhtml/testsuite/tstpages/01_stock_cases/comment.suite18
-rw-r--r--ekhtml/testsuite/tstpages/01_stock_cases/endtag.suite13
-rw-r--r--ekhtml/testsuite/tstpages/01_stock_cases/special.suite11
-rw-r--r--ekhtml/testsuite/tstpages/01_stock_cases/starttag.suite21
-rw-r--r--ekhtml/testsuite/tstpages/01_stock_cases/way.suite4
17 files changed, 1842 insertions, 0 deletions
diff --git a/ekhtml/testsuite/.deps/tester.Po b/ekhtml/testsuite/.deps/tester.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/ekhtml/testsuite/.deps/tester.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/ekhtml/testsuite/Makefile b/ekhtml/testsuite/Makefile
new file mode 100644
index 0000000..5c6b92b
--- /dev/null
+++ b/ekhtml/testsuite/Makefile
@@ -0,0 +1,406 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# Generated automatically from Makefile.in by configure.
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+SHELL = /bin/sh
+
+srcdir = .
+top_srcdir = ..
+
+prefix = /usr/local
+exec_prefix = ${prefix}
+
+bindir = ${exec_prefix}/bin
+sbindir = ${exec_prefix}/sbin
+libexecdir = ${exec_prefix}/libexec
+datadir = ${prefix}/share
+sysconfdir = ${prefix}/etc
+sharedstatedir = ${prefix}/com
+localstatedir = ${prefix}/var
+libdir = ${exec_prefix}/lib
+infodir = ${prefix}/info
+mandir = ${prefix}/man
+includedir = ${prefix}/include
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/ekhtml
+pkglibdir = $(libdir)/ekhtml
+pkgincludedir = $(includedir)/ekhtml
+top_builddir = ..
+
+ACLOCAL = ${SHELL} /z/missing --run aclocal-1.6
+AUTOCONF = ${SHELL} /z/missing --run autoconf
+AUTOMAKE = ${SHELL} /z/missing --run automake-1.6
+AUTOHEADER = ${SHELL} /z/missing --run autoheader
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = /bin/install -c
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_DATA = ${INSTALL} -m 644
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = ${INSTALL}
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = s,x,x,
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias =
+host_triplet = i686-pc-mingw32
+
+EXEEXT = .exe
+OBJEXT = o
+PATH_SEPARATOR = :
+AMTAR = ${SHELL} /z/missing --run tar
+AS = @AS@
+AWK = gawk
+CC = gcc
+DEPDIR = .deps
+DLLTOOL = @DLLTOOL@
+ECHO = echo
+INSTALL_STRIP_PROGRAM = ${SHELL} $(install_sh) -c -s
+LIBTOOL = $(SHELL) $(top_builddir)/libtool
+LN_S = ln -s
+MAINT = #
+OBJDUMP = @OBJDUMP@
+PACKAGE = ekhtml
+RANLIB = ranlib
+STRIP = strip
+VERSION = 0.3.2
+am__include = include
+am__quote =
+install_sh = /z/install-sh
+EKHTML_LIB = $(top_srcdir)/src/libekhtml.la
+
+EXTRA_DIST = \
+ ek_sgmllib.py \
+ gen_html.py \
+ plparser.pl \
+ pyparser.py \
+ test_basic.sh \
+ test_bogus_feedsize.sh \
+ test_crazypage.sh \
+ tstpages
+
+
+noinst_PROGRAMS = tester
+
+tester_SOURCES = tester.c
+tester_LDADD = $(EKHTML_LIB)
+tester_LDFLAGS = -static
+tester_DEPENDENCIES = $(EKHTML_LIB)
+
+TESTS = test_basic.sh test_crazypage.sh test_bogus_feedsize.sh
+subdir = testsuite
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/include/ekhtml_config.h
+CONFIG_CLEAN_FILES =
+noinst_PROGRAMS = tester$(EXEEXT)
+PROGRAMS = $(noinst_PROGRAMS)
+
+am_tester_OBJECTS = tester.$(OBJEXT)
+tester_OBJECTS = $(am_tester_OBJECTS)
+
+DEFS = -DHAVE_CONFIG_H
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include
+CPPFLAGS =
+LDFLAGS =
+LIBS =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+DEP_FILES = ./$(DEPDIR)/tester.Po
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+ $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+CFLAGS = -g -O2
+DIST_SOURCES = $(tester_SOURCES)
+DIST_COMMON = Makefile.am Makefile.in
+SOURCES = $(tester_SOURCES)
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: # Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu testsuite/Makefile
+Makefile: # $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+
+clean-noinstPROGRAMS:
+ @list='$(noinst_PROGRAMS)'; for p in $$list; do \
+ f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f $$p $$f"; \
+ rm -f $$p $$f ; \
+ done
+tester$(EXEEXT): $(tester_OBJECTS) $(tester_DEPENDENCIES)
+ @rm -f tester$(EXEEXT)
+ $(LINK) $(tester_LDFLAGS) $(tester_OBJECTS) $(tester_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT) core *.core
+
+distclean-compile:
+ -rm -f *.tab.c
+
+include ./$(DEPDIR)/tester.Po
+
+distclean-depend:
+ -rm -rf ./$(DEPDIR)
+
+.c.o:
+ source='$<' object='$@' libtool=no \
+ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' \
+ $(CCDEPMODE) $(depcomp) \
+ $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$<
+
+.c.obj:
+ source='$<' object='$@' libtool=no \
+ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' \
+ $(CCDEPMODE) $(depcomp) \
+ $(COMPILE) -c `cygpath -w $<`
+
+.c.lo:
+ source='$<' object='$@' libtool=yes \
+ depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' \
+ $(CCDEPMODE) $(depcomp) \
+ $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+CCDEPMODE = depmode=gcc3
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+
+ETAGS = etags
+ETAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
+
+check-TESTS: $(TESTS)
+ @failed=0; all=0; xfail=0; xpass=0; \
+ srcdir=$(srcdir); export srcdir; \
+ list='$(TESTS)'; \
+ if test -n "$$list"; then \
+ for tst in $$list; do \
+ if test -f ./$$tst; then dir=./; \
+ elif test -f $$tst; then dir=; \
+ else dir="$(srcdir)/"; fi; \
+ if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *" $$tst "*) \
+ xpass=`expr $$xpass + 1`; \
+ failed=`expr $$failed + 1`; \
+ echo "XPASS: $$tst"; \
+ ;; \
+ *) \
+ echo "PASS: $$tst"; \
+ ;; \
+ esac; \
+ elif test $$? -ne 77; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *" $$tst "*) \
+ xfail=`expr $$xfail + 1`; \
+ echo "XFAIL: $$tst"; \
+ ;; \
+ *) \
+ failed=`expr $$failed + 1`; \
+ echo "FAIL: $$tst"; \
+ ;; \
+ esac; \
+ fi; \
+ done; \
+ if test "$$failed" -eq 0; then \
+ if test "$$xfail" -eq 0; then \
+ banner="All $$all tests passed"; \
+ else \
+ banner="All $$all tests behaved as expected ($$xfail expected failures)"; \
+ fi; \
+ else \
+ if test "$$xpass" -eq 0; then \
+ banner="$$failed of $$all tests failed"; \
+ else \
+ banner="$$failed of $$all tests did not behave as expected ($$xpass unexpected passes)"; \
+ fi; \
+ fi; \
+ dashes=`echo "$$banner" | sed s/./=/g`; \
+ echo "$$dashes"; \
+ echo "$$banner"; \
+ echo "$$dashes"; \
+ test "$$failed" -eq 0; \
+ else :; fi
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ @list='$(DISTFILES)'; for file in $$list; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="${top_distdir}" distdir="$(distdir)" \
+ dist-hook
+check-am: all-am
+ $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile $(PROGRAMS)
+
+installdirs:
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \
+ mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-compile distclean-depend \
+ distclean-generic distclean-libtool distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-exec-am:
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+uninstall-am: uninstall-info-am
+
+.PHONY: GTAGS all all-am check check-TESTS check-am clean clean-generic \
+ clean-libtool clean-noinstPROGRAMS distclean distclean-compile \
+ distclean-depend distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am info info-am install \
+ install-am install-data install-data-am install-exec \
+ install-exec-am install-info install-info-am install-man \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ tags uninstall uninstall-am uninstall-info-am
+
+
+$(EKHTML_LIB):
+ cd $(top_srcdir)/src && $(MAKE)
+
+dist-hook:
+ rm -rf `find $(distdir)/tstpages -name CVS`
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/ekhtml/testsuite/Makefile.am b/ekhtml/testsuite/Makefile.am
new file mode 100644
index 0000000..adbda98
--- /dev/null
+++ b/ekhtml/testsuite/Makefile.am
@@ -0,0 +1,26 @@
+EKHTML_LIB = $(top_srcdir)/src/libekhtml.la
+
+EXTRA_DIST = \
+ ek_sgmllib.py \
+ gen_html.py \
+ plparser.pl \
+ pyparser.py \
+ test_basic.sh \
+ test_bogus_feedsize.sh \
+ test_crazypage.sh \
+ tstpages
+
+noinst_PROGRAMS = tester
+
+tester_SOURCES = tester.c
+tester_LDADD = $(EKHTML_LIB)
+tester_LDFLAGS = -static
+tester_DEPENDENCIES = $(EKHTML_LIB)
+
+$(EKHTML_LIB):
+ cd $(top_srcdir)/src && $(MAKE)
+
+TESTS = test_basic.sh test_crazypage.sh test_bogus_feedsize.sh
+
+dist-hook:
+ rm -rf `find $(distdir)/tstpages -name CVS`
diff --git a/ekhtml/testsuite/Makefile.in b/ekhtml/testsuite/Makefile.in
new file mode 100644
index 0000000..0cb60d3
--- /dev/null
+++ b/ekhtml/testsuite/Makefile.in
@@ -0,0 +1,406 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = @program_transform_name@
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+AMTAR = @AMTAR@
+AS = @AS@
+AWK = @AWK@
+CC = @CC@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+ECHO = @ECHO@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+MAINT = @MAINT@
+OBJDUMP = @OBJDUMP@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+STRIP = @STRIP@
+VERSION = @VERSION@
+am__include = @am__include@
+am__quote = @am__quote@
+install_sh = @install_sh@
+EKHTML_LIB = $(top_srcdir)/src/libekhtml.la
+
+EXTRA_DIST = \
+ ek_sgmllib.py \
+ gen_html.py \
+ plparser.pl \
+ pyparser.py \
+ test_basic.sh \
+ test_bogus_feedsize.sh \
+ test_crazypage.sh \
+ tstpages
+
+
+noinst_PROGRAMS = tester
+
+tester_SOURCES = tester.c
+tester_LDADD = $(EKHTML_LIB)
+tester_LDFLAGS = -static
+tester_DEPENDENCIES = $(EKHTML_LIB)
+
+TESTS = test_basic.sh test_crazypage.sh test_bogus_feedsize.sh
+subdir = testsuite
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/include/ekhtml_config.h
+CONFIG_CLEAN_FILES =
+noinst_PROGRAMS = tester$(EXEEXT)
+PROGRAMS = $(noinst_PROGRAMS)
+
+am_tester_OBJECTS = tester.$(OBJEXT)
+tester_OBJECTS = $(am_tester_OBJECTS)
+
+DEFS = @DEFS@
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/tester.Po
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+ $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+CFLAGS = @CFLAGS@
+DIST_SOURCES = $(tester_SOURCES)
+DIST_COMMON = Makefile.am Makefile.in
+SOURCES = $(tester_SOURCES)
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu testsuite/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+
+clean-noinstPROGRAMS:
+ @list='$(noinst_PROGRAMS)'; for p in $$list; do \
+ f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f $$p $$f"; \
+ rm -f $$p $$f ; \
+ done
+tester$(EXEEXT): $(tester_OBJECTS) $(tester_DEPENDENCIES)
+ @rm -f tester$(EXEEXT)
+ $(LINK) $(tester_LDFLAGS) $(tester_OBJECTS) $(tester_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT) core *.core
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tester.Po@am__quote@
+
+distclean-depend:
+ -rm -rf ./$(DEPDIR)
+
+.c.o:
+@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$<
+
+.c.obj:
+@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(COMPILE) -c `cygpath -w $<`
+
+.c.lo:
+@AMDEP_TRUE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+CCDEPMODE = @CCDEPMODE@
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+
+ETAGS = etags
+ETAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
+
+check-TESTS: $(TESTS)
+ @failed=0; all=0; xfail=0; xpass=0; \
+ srcdir=$(srcdir); export srcdir; \
+ list='$(TESTS)'; \
+ if test -n "$$list"; then \
+ for tst in $$list; do \
+ if test -f ./$$tst; then dir=./; \
+ elif test -f $$tst; then dir=; \
+ else dir="$(srcdir)/"; fi; \
+ if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *" $$tst "*) \
+ xpass=`expr $$xpass + 1`; \
+ failed=`expr $$failed + 1`; \
+ echo "XPASS: $$tst"; \
+ ;; \
+ *) \
+ echo "PASS: $$tst"; \
+ ;; \
+ esac; \
+ elif test $$? -ne 77; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *" $$tst "*) \
+ xfail=`expr $$xfail + 1`; \
+ echo "XFAIL: $$tst"; \
+ ;; \
+ *) \
+ failed=`expr $$failed + 1`; \
+ echo "FAIL: $$tst"; \
+ ;; \
+ esac; \
+ fi; \
+ done; \
+ if test "$$failed" -eq 0; then \
+ if test "$$xfail" -eq 0; then \
+ banner="All $$all tests passed"; \
+ else \
+ banner="All $$all tests behaved as expected ($$xfail expected failures)"; \
+ fi; \
+ else \
+ if test "$$xpass" -eq 0; then \
+ banner="$$failed of $$all tests failed"; \
+ else \
+ banner="$$failed of $$all tests did not behave as expected ($$xpass unexpected passes)"; \
+ fi; \
+ fi; \
+ dashes=`echo "$$banner" | sed s/./=/g`; \
+ echo "$$dashes"; \
+ echo "$$banner"; \
+ echo "$$dashes"; \
+ test "$$failed" -eq 0; \
+ else :; fi
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ @list='$(DISTFILES)'; for file in $$list; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="${top_distdir}" distdir="$(distdir)" \
+ dist-hook
+check-am: all-am
+ $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile $(PROGRAMS)
+
+installdirs:
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \
+ mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-compile distclean-depend \
+ distclean-generic distclean-libtool distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-exec-am:
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+uninstall-am: uninstall-info-am
+
+.PHONY: GTAGS all all-am check check-TESTS check-am clean clean-generic \
+ clean-libtool clean-noinstPROGRAMS distclean distclean-compile \
+ distclean-depend distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am info info-am install \
+ install-am install-data install-data-am install-exec \
+ install-exec-am install-info install-info-am install-man \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ tags uninstall uninstall-am uninstall-info-am
+
+
+$(EKHTML_LIB):
+ cd $(top_srcdir)/src && $(MAKE)
+
+dist-hook:
+ rm -rf `find $(distdir)/tstpages -name CVS`
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/ekhtml/testsuite/ek_sgmllib.py b/ekhtml/testsuite/ek_sgmllib.py
new file mode 100644
index 0000000..f71496d
--- /dev/null
+++ b/ekhtml/testsuite/ek_sgmllib.py
@@ -0,0 +1,489 @@
+"""A parser for SGML, using the derived class as a static DTD."""
+
+# Stolen from the Python 2.0 distribution and tweaked by JMT
+
+# XXX This only supports those SGML features used by HTML.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+
+import re
+import string
+
+
+# Regular expressions used for parsing
+
+interesting = re.compile('[&<]')
+incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
+ '<([a-zA-Z][^<>]*|'
+ '/([a-zA-Z][^<>]*)?|'
+ '![^<>]*)?')
+
+entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*);')
+charref = re.compile('&#([0-9]+)[^0-9];')
+
+starttagopen = re.compile('<[>a-zA-Z]')
+shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
+shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
+piopen = re.compile('<\?')
+piclose = re.compile('>')
+endtagopen = re.compile('</[<>a-zA-Z]')
+endbracket = re.compile('[<>]')
+special = re.compile('<![^<>]*>')
+commentopen = re.compile('<!--')
+commentclose = re.compile('--[%s]*>' % string.whitespace)
+tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9]*')
+attrfind = re.compile(
+ '[%s]*([a-zA-Z_][-.a-zA-Z_0-9]*)' % string.whitespace
+ + ('([%s]*=[%s]*' % (string.whitespace, string.whitespace))
+ + r'(\'[^\'<>]*[\'<>]|"[^"<>]*["<>]|[-a-zA-Z0-9@./:+*%?!&$\(\)_#=~]*))?')
+
+
+# SGML parser base class -- find tags and call handler functions.
+# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
+# The dtd is defined by deriving a class which defines methods
+# with special names to handle tags: start_foo and end_foo to handle
+# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
+# (Tags are converted to lower case for this purpose.) The data
+# between tags is passed to the parser by calling self.handle_data()
+# with some data as argument (the data may be split up in arbitrary
+# chunks). Entity references are passed by calling
+# self.handle_entityref() with the entity reference as argument.
+
+class SGMLParser:
+
+ # Interface -- initialize and reset this instance
+ def __init__(self, verbose=0):
+ self.verbose = verbose
+ self.reset()
+
+ # Interface -- reset this instance. Loses all unprocessed data
+ def reset(self):
+ self.rawdata = ''
+ self.stack = []
+ self.lasttag = '???'
+ self.nomoretags = 0
+ self.literal = 0
+
+ # For derived classes only -- enter literal mode (CDATA) till EOF
+ def setnomoretags(self):
+ self.nomoretags = self.literal = 1
+
+ # For derived classes only -- enter literal mode (CDATA)
+ def setliteral(self, *args):
+ self.literal = 1
+
+ # Interface -- feed some data to the parser. Call this as
+ # often as you want, with as little or as much text as you
+ # want (may include '\n'). (This just saves the text, all the
+ # processing is done by goahead().)
+ def feed(self, data):
+ self.rawdata = self.rawdata + data
+ self.goahead(0)
+
+ # Interface -- handle the remaining data
+ def close(self):
+ self.goahead(1)
+
+ # Internal -- handle data as far as reasonable. May leave state
+ # and data to be processed by a subsequent call. If 'end' is
+ # true, force handling all data as if followed by EOF marker.
+ def goahead(self, end):
+ rawdata = self.rawdata
+ i = 0
+ n = len(rawdata)
+ while i < n:
+ if self.nomoretags:
+ self.handle_data(rawdata[i:n])
+ i = n
+ break
+ match = interesting.search(rawdata, i)
+ if match: j = match.start(0)
+ else: j = n
+ if i < j: self.handle_data(rawdata[i:j])
+ i = j
+ if i == n: break
+ if rawdata[i] == '<':
+ if starttagopen.match(rawdata, i):
+ if self.literal:
+ self.handle_data(rawdata[i])
+ i = i+1
+ continue
+ k = self.parse_starttag(i)
+ if k < 0: break
+ i = k
+ continue
+ if endtagopen.match(rawdata, i):
+ k = self.parse_endtag(i)
+ if k < 0: break
+ i = k
+ self.literal = 0
+ continue
+ if commentopen.match(rawdata, i):
+ if self.literal:
+ self.handle_data(rawdata[i])
+ i = i+1
+ continue
+ k = self.parse_comment(i)
+ if k < 0: break
+ i = i+k
+ continue
+ if piopen.match(rawdata, i):
+ if self.literal:
+ self.handle_data(rawdata[i])
+ i = i+1
+ continue
+ k = self.parse_pi(i)
+ if k < 0: break
+ i = i+k
+ continue
+ match = special.match(rawdata, i)
+ if match:
+ if self.literal:
+ self.handle_data(rawdata[i])
+ i = i+1
+ continue
+ start = i
+ i = match.end(0)
+ self.handle_data(rawdata[start:i])
+ continue
+ elif rawdata[i] == '&':
+ match = charref.match(rawdata, i)
+ if match:
+ name = match.group(1)
+ self.handle_charref(name)
+ i = match.end(0)
+ if rawdata[i-1] != ';': i = i-1
+ continue
+ match = entityref.match(rawdata, i)
+ if match:
+ name = match.group(1)
+ self.handle_entityref(name)
+ i = match.end(0)
+ if rawdata[i-1] != ';': i = i-1
+ continue
+ else:
+ raise RuntimeError, 'neither < nor & ??'
+ # We get here only if incomplete matches but
+ # nothing else
+ match = incomplete.match(rawdata, i)
+ if not match:
+ self.handle_data(rawdata[i])
+ i = i+1
+ continue
+ j = match.end(0)
+ if j == n:
+ break # Really incomplete
+ self.handle_data(rawdata[i:j])
+ i = j
+ # end while
+ if end and i < n:
+ self.handle_data(rawdata[i:n])
+ i = n
+ self.rawdata = rawdata[i:]
+ # XXX if end: check for empty stack
+
+ # Internal -- parse comment, return length or -1 if not terminated
+ def parse_comment(self, i):
+ rawdata = self.rawdata
+ if rawdata[i:i+4] <> '<!--':
+ raise RuntimeError, 'unexpected call to handle_comment'
+ match = commentclose.search(rawdata, i+4)
+ if not match:
+ return -1
+ j = match.start(0)
+ self.handle_comment(rawdata[i+4: j])
+ j = match.end(0)
+ return j-i
+
+ # Internal -- parse processing instr, return length or -1 if not terminated
+ def parse_pi(self, i):
+ rawdata = self.rawdata
+ if rawdata[i:i+2] <> '<?':
+ raise RuntimeError, 'unexpected call to handle_pi'
+ match = piclose.search(rawdata, i+2)
+ if not match:
+ return -1
+ j = match.start(0)
+ self.handle_pi(rawdata[i+2: j])
+ j = match.end(0)
+ return j-i
+
+ __starttag_text = None
+ def get_starttag_text(self):
+ return self.__starttag_text
+
+ # Internal -- handle starttag, return length or -1 if not terminated
+ def parse_starttag(self, i):
+ self.__starttag_text = None
+ start_pos = i
+ rawdata = self.rawdata
+ if shorttagopen.match(rawdata, i):
+ # SGML shorthand: <tag/data/ == <tag>data</tag>
+ # XXX Can data contain &... (entity or char refs)?
+ # XXX Can data contain < or > (tag characters)?
+ # XXX Can there be whitespace before the first /?
+ match = shorttag.match(rawdata, i)
+ if not match:
+ return -1
+ tag, data = match.group(1, 2)
+ self.__starttag_text = '<%s/' % tag
+ tag = string.lower(tag)
+ k = match.end(0)
+ self.finish_shorttag(tag, data)
+ self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
+ return k
+ # XXX The following should skip matching quotes (' or ")
+ match = endbracket.search(rawdata, i+1)
+ if not match:
+ return -1
+ j = match.start(0)
+ # Now parse the data between i+1 and j into a tag and attrs
+ attrs = []
+ if rawdata[i:i+2] == '<>':
+ # SGML shorthand: <> == <last open tag seen>
+ k = j
+ self.handle_data('<>')
+ return k + 1
+ else:
+ match = tagfind.match(rawdata, i+1)
+ if not match:
+ raise RuntimeError, 'unexpected call to parse_starttag'
+ k = match.end(0)
+ tag = string.lower(rawdata[i+1:k])
+ self.lasttag = tag
+ while k < j:
+ match = attrfind.match(rawdata, k)
+ if not match: break
+ attrname, rest, attrvalue = match.group(1, 2, 3)
+ if not rest:
+ attrvalue = attrname
+ elif attrvalue[:1] == '\'' or attrvalue[:1] == '"':
+ attrvalue = attrvalue[1:-1]
+ attrs.append((string.lower(attrname), attrvalue))
+ k = match.end(0)
+ if rawdata[j] == '>':
+ j = j+1
+ self.__starttag_text = rawdata[start_pos:j]
+ self.finish_starttag(tag, attrs)
+ return j
+
+ # Internal -- parse endtag
+ def parse_endtag(self, i):
+ rawdata = self.rawdata
+ match = endbracket.search(rawdata, i+1)
+ if not match:
+ return -1
+ j = match.start(0)
+ tag = string.lower(string.strip(rawdata[i+2:j]))
+ if rawdata[j] == '>':
+ j = j+1
+ self.finish_endtag(tag)
+ return j
+
+ # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
+ def finish_shorttag(self, tag, data):
+ self.finish_starttag(tag, [])
+ self.handle_data(data)
+ self.finish_endtag(tag)
+
+ # Internal -- finish processing of start tag
+ # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag
+ def finish_starttag(self, tag, attrs):
+ try:
+ method = getattr(self, 'start_' + tag)
+ except AttributeError:
+ try:
+ method = getattr(self, 'do_' + tag)
+ except AttributeError:
+ self.unknown_starttag(tag, attrs)
+ return -1
+ else:
+ self.handle_starttag(tag, method, attrs)
+ return 0
+ else:
+ self.stack.append(tag)
+ self.handle_starttag(tag, method, attrs)
+ return 1
+
+ # Internal -- finish processing of end tag
+ def finish_endtag(self, tag):
+ if not tag:
+ found = len(self.stack) - 1
+ if found < 0:
+ self.unknown_endtag(tag)
+ return
+ else:
+ if tag not in self.stack:
+ try:
+ method = getattr(self, 'end_' + tag)
+ except AttributeError:
+ self.unknown_endtag(tag)
+ else:
+ self.report_unbalanced(tag)
+ return
+ found = len(self.stack)
+ for i in range(found):
+ if self.stack[i] == tag: found = i
+ while len(self.stack) > found:
+ tag = self.stack[-1]
+ try:
+ method = getattr(self, 'end_' + tag)
+ except AttributeError:
+ method = None
+ if method:
+ self.handle_endtag(tag, method)
+ else:
+ self.unknown_endtag(tag)
+ del self.stack[-1]
+
+ # Overridable -- handle start tag
+ def handle_starttag(self, tag, method, attrs):
+ method(attrs)
+
+ # Overridable -- handle end tag
+ def handle_endtag(self, tag, method):
+ method()
+
+ # Example -- report an unbalanced </...> tag.
+ def report_unbalanced(self, tag):
+ if self.verbose:
+ print '*** Unbalanced </' + tag + '>'
+ print '*** Stack:', self.stack
+
+ # Example -- handle character reference, no need to override
+ def handle_charref(self, name):
+ try:
+ n = string.atoi(name)
+ except string.atoi_error:
+ self.unknown_charref(name)
+ return
+ if not 0 <= n <= 255:
+ self.unknown_charref(name)
+ return
+ self.handle_data(chr(n))
+
+ # Definition of entities -- derived classes may override
+ entitydefs = \
+ {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
+
+ # Example -- handle entity reference, no need to override
+ def handle_entityref(self, name):
+ table = self.entitydefs
+ if table.has_key(name):
+ self.handle_data(table[name])
+ else:
+ self.unknown_entityref(name)
+ return
+
+ # Example -- handle data, should be overridden
+ def handle_data(self, data):
+ pass
+
+ # Example -- handle comment, could be overridden
+ def handle_comment(self, data):
+ pass
+
+ # Example -- handle processing instruction, could be overridden
+ def handle_pi(self, data):
+ pass
+
+ # To be overridden -- handlers for unknown objects
+ def unknown_starttag(self, tag, attrs): pass
+ def unknown_endtag(self, tag): pass
+ def unknown_charref(self, ref): pass
+ def unknown_entityref(self, ref): pass
+
+
+class TestSGMLParser(SGMLParser):
+
+ def __init__(self, verbose=0):
+ self.testdata = ""
+ SGMLParser.__init__(self, verbose)
+
+ def handle_data(self, data):
+ self.testdata = self.testdata + data
+ if len(`self.testdata`) >= 70:
+ self.flush()
+
+ def flush(self):
+ data = self.testdata
+ if data:
+ self.testdata = ""
+ print 'data:', `data`
+
+ def handle_comment(self, data):
+ self.flush()
+ r = `data`
+ if len(r) > 68:
+ r = r[:32] + '...' + r[-32:]
+ print 'comment:', r
+
+ def unknown_starttag(self, tag, attrs):
+ self.flush()
+ if not attrs:
+ print 'start tag: <' + tag + '>'
+ else:
+ print 'start tag: <' + tag,
+ for name, value in attrs:
+ print name + '=' + '"' + value + '"',
+ print '>'
+
+ def unknown_endtag(self, tag):
+ self.flush()
+ print 'end tag: </' + tag + '>'
+
+ def unknown_entityref(self, ref):
+ self.flush()
+ print '*** unknown entity ref: &' + ref + ';'
+
+ def unknown_charref(self, ref):
+ self.flush()
+ print '*** unknown char ref: &#' + ref + ';'
+
+ def close(self):
+ SGMLParser.close(self)
+ self.flush()
+
+
+def test(args = None):
+ import sys
+
+ if not args:
+ args = sys.argv[1:]
+
+ if args and args[0] == '-s':
+ args = args[1:]
+ klass = SGMLParser
+ else:
+ klass = TestSGMLParser
+
+ if args:
+ file = args[0]
+ else:
+ file = 'test.html'
+
+ if file == '-':
+ f = sys.stdin
+ else:
+ try:
+ f = open(file, 'r')
+ except IOError, msg:
+ print file, ":", msg
+ sys.exit(1)
+
+ data = f.read()
+ if f is not sys.stdin:
+ f.close()
+
+ x = klass()
+ for c in data:
+ x.feed(c)
+ x.close()
+
+
+if __name__ == '__main__':
+ test()
diff --git a/ekhtml/testsuite/gen_html.py b/ekhtml/testsuite/gen_html.py
new file mode 100644
index 0000000..dc61ffc
--- /dev/null
+++ b/ekhtml/testsuite/gen_html.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+
+#
+# gen_html: An HTML generation utility
+#
+# This utility is used to generate random HTML for the parser to
+# deal with. Flags for each of the components allow variations
+# on HTML, so as to spew either valid or invalid HTML.
+#
+
+from string import *
+from ek_sgmllib import tagfind
+from random import randint
+import re, sys
+
+attrname = re.compile('[a-zA-Z_][-.a-zA-Z_0-9]*')
+attrvalue = re.compile('[-a-zA-Z0-9@./:+*%?!&$\(\)_#=~]')
+quotedattrvalueset = digits + '#$%&()*+,-./:;=?@[\]^_`{|}~'
+attrvalueset = '-@./:+*%?!&$()_#=~*' + letters + digits
+
+DataString = ['In A.D. 2101',
+ 'War was beginning.',
+ 'What happen?',
+ 'Somebody set up us the bomb',
+ 'We get signal',
+ 'What!',
+ 'Main screen turn on',
+ "It's You!!",
+ 'How are you gentlemen!!',
+ 'All your base are belong to us',
+ 'You are on the way to destruction',
+ 'What you say!!',
+ 'You have no chance to survive make your time',
+ 'HA HA HA HA ....',
+ "Take off every 'zig'",
+ 'You know what you doing',
+ "Move 'zig'",
+ 'For great justice']
+
+DataWords = split(join(DataString, ' '))
+
+def get_word(matcher, bogus=0):
+ while 1:
+ res = DataWords[randint(0, len(DataWords)-1)]
+ if not bogus:
+ z = matcher.match(res)
+ if not z or z.end(0) != len(res):
+ continue
+ return res
+
+def get_tagname(bogus=0):
+ return upper(get_word(tagfind, bogus))
+
+def get_whitespace():
+ return join(map(lambda x: whitespace[randint(0,len(whitespace) - 1)],
+ range(randint(0, 3))), '')
+
+def get_attrname(bogus=0):
+ return get_word(attrname, bogus)
+
+
+def get_quoted_attrvalue(bogus=0):
+ quoteidx = randint(0, 1)
+ quote = ["'", '"'][quoteidx]
+ otherquote = ["'", '"'][not quoteidx]
+
+ res = quote
+ for i in range(randint(0, 20)):
+ if not randint(0, 5):
+ res = res +quotedattrvalueset[randint(0,len(quotedattrvalueset)-1)]
+ elif not randint(0, 5):
+ res = res + otherquote
+ elif not randint(0, 5):
+ res = res + get_whitespace()
+ else:
+ res = res + get_attrname(0)
+
+ if not bogus:
+ res = res + quote
+ else:
+ res = res + [otherquote, '<', '>'][randint(0, 2)]
+ return res
+
+def get_unquoted_attrvalue(bogus=0):
+ res = ''
+ for i in range(randint(not bogus, 20)):
+ if not randint(0, 5):
+ res = res + get_tagname(bogus)
+ elif bogus and not randint(0, 5):
+ res = res + get_whitespace()
+ else:
+ res = res + attrvalueset[randint(0, len(attrvalueset)-1)]
+
+ return res
+
+def get_starttag(bogus=0):
+ res = '<'
+ if bogus and not randint(0, 10):
+ res = res + whitespace[randint(0, len(whitespace)-1)]
+
+ res = res + get_tagname(bogus)
+ if not randint(0, 5):
+ res = res + get_whitespace()
+
+ for nattrs in range(randint(0, 4)):
+ res = res + whitespace[randint(0, len(whitespace)-1)]
+ res = res + get_whitespace()
+ res = res + get_attrname(bogus)
+ t = randint(0, 1)
+ if t:
+ res = res + get_whitespace() + '=' + get_whitespace() + \
+ get_quoted_attrvalue(randint(0, bogus))
+ else:
+ res = res + get_whitespace() + '=' + get_whitespace() + \
+ get_unquoted_attrvalue(randint(0, bogus))
+
+ if bogus and not randint(0, 5):
+ res = res + get_whitespace() + '>'
+ else:
+ res = res + '>'
+
+ return res
+
+def get_comment():
+ res = '<!--'
+
+ for i in range(randint(0, 100)):
+ res = res + get_whitespace()
+ res = res + get_attrname(0)
+ if not randint(0, 5):
+ res = res + '--'
+ elif not randint(0, 10):
+ res = res + '->'
+ res = res + get_whitespace() + '--' + get_whitespace() + '>'
+ return res
+
+def get_endtag():
+ return '</' + get_attrname() + get_whitespace() + '>'
+
+
+if __name__ == '__main__':
+ if len(sys.argv) < 3:
+ print 'Syntax: %s <# items> <bogus>' % sys.argv[0]
+ sys.exit(1)
+
+ isbogus = int(sys.argv[2])
+ for i in xrange(int(sys.argv[1])):
+ x = randint(0, 2)
+ if x == 0:
+ print get_starttag(isbogus)
+ elif x == 1:
+ print get_comment()
+ elif x == 2:
+ print get_endtag()
diff --git a/ekhtml/testsuite/plparser.pl b/ekhtml/testsuite/plparser.pl
new file mode 100644
index 0000000..f7ee4a1
--- /dev/null
+++ b/ekhtml/testsuite/plparser.pl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl
+use HTML::Parser();
+use strict;
+sub start_handler
+{
+ my ($tag, $self) = @_;
+ print "START: \"".$tag."\"\n";
+}
+
+sub end_handler
+{
+ my ($tag, $self) = @_;
+ print "END: \"".$tag."\"\n";
+}
+
+sub text_handler
+{
+ my ($text, $self) = @_;
+ print $text;
+}
+
+sub comment_handler
+{
+ my ($text, $self) = @_;
+ print "COMMENT: \"";
+ foreach my $cur_comment (@$text) {
+ print $cur_comment;
+ }
+ print "\"\n";
+}
+
+my $p = HTML::Parser->new(api_version => 3);
+$p->handler( start => \&start_handler, "tagname,self");
+$p->handler( end => \&end_handler, "tagname,self");
+$p->handler( text => \&text_handler, "dtext, self");
+$p->handler( comment => \&comment_handler, "tokens, self");
+$p->parse_file(shift || die) || die $!;
+
diff --git a/ekhtml/testsuite/pyparser.py b/ekhtml/testsuite/pyparser.py
new file mode 100644
index 0000000..a96d999
--- /dev/null
+++ b/ekhtml/testsuite/pyparser.py
@@ -0,0 +1,44 @@
+#! /usr/bin/env python
+
+import ek_sgmllib, sys, string
+
+class myparser(ek_sgmllib.SGMLParser):
+ def __init__(self):
+ ek_sgmllib.SGMLParser.__init__(self)
+
+ def handle_data(self, data):
+ sys.stdout.write(data)
+
+ def start_way(self, attrs):
+ print 'GOT WAY START!'
+
+ def unknown_starttag(self, tag, attrs):
+ print 'START: "%s"' % tag
+ for i in range(len(attrs) - 1, -1, -1):
+ key, val = attrs[i]
+ print 'ATTRIBUTE: "%s" = "%s"' % (key, val)
+
+ def unknown_endtag(self, tag):
+ print 'END: "%s"' % tag
+
+ def handle_comment(self, comment):
+ print 'COMMENT: "%s"' % comment
+
+ def handle_entityref(self, name):
+ self.handle_data('&' + name + ';')
+
+ def handle_charref(self, name):
+ self.handle_data('&#' + name + ';')
+
+def testit(data):
+ x = myparser()
+ x.feed(data)
+ x.close()
+
+if __name__ == '__main__':
+ if len(sys.argv) == 1:
+ f = sys.stdin
+ else:
+ f = open(sys.argv[1])
+
+ testit(f.read())
diff --git a/ekhtml/testsuite/test_basic.sh b/ekhtml/testsuite/test_basic.sh
new file mode 100644
index 0000000..87d2e64
--- /dev/null
+++ b/ekhtml/testsuite/test_basic.sh
@@ -0,0 +1,10 @@
+#! /bin/bash
+
+for i in `find tstpages -type f | sort`
+do
+ echo Testing page: $i
+ ./tester 1 < $i > ek.test
+ ./pyparser.py $i > py.test
+ diff -a -i -u ek.test py.test || exit 1
+done
+
diff --git a/ekhtml/testsuite/test_bogus_feedsize.sh b/ekhtml/testsuite/test_bogus_feedsize.sh
new file mode 100644
index 0000000..12cf9ab
--- /dev/null
+++ b/ekhtml/testsuite/test_bogus_feedsize.sh
@@ -0,0 +1,27 @@
+#! /bin/bash
+#
+# Test the e-k parser to make sure that altering the feedsize doesn't change
+# which tags get recognized. This is really a boundary condition.
+#
+
+NUMTAGS=1000
+NUMFEED=1024
+
+echo Generating crazy.page with $NUMTAGS tags
+python gen_html.py $NUMTAGS 1 > crazy.page
+
+feedsize=1
+
+echo "Parsing with $NUMFEED different feed sizes"
+while [ $(($feedsize < $NUMFEED)) -eq 1 ]; do
+ ./tester $feedsize < crazy.page > crazy.page.out.$feedsize
+ feedsize=$(($feedsize + 1))
+done
+
+NUMSIZES=`ls -la crazy.page.out.* | awk '{print $5}' | sort | uniq | wc -l`
+if [ $NUMSIZES -ne 1 ] ; then
+ echo 'FAILURE -- Leaving crazy.page.out.* for examination'
+ exit 1
+fi
+
+rm -f crazy.page.out.*
diff --git a/ekhtml/testsuite/test_crazypage.sh b/ekhtml/testsuite/test_crazypage.sh
new file mode 100644
index 0000000..adcd98c
--- /dev/null
+++ b/ekhtml/testsuite/test_crazypage.sh
@@ -0,0 +1,16 @@
+#! /bin/bash
+
+for ntags in 1 10 100 1000
+do
+ for bogus in 0 1
+ do
+ python gen_html.py $ntags $bogus > crazy.page
+ for bytesize in 1 13 162 983 8192
+ do
+ echo numElements=$ntags allowBogusTags=$bogus feedSize=$bytesize
+ ./tester $bytesize < crazy.page > ek.test
+ ./pyparser.py < crazy.page > py.test
+ diff -i -u ek.test py.test || exit 1
+ done
+ done
+done
diff --git a/ekhtml/testsuite/tester.c b/ekhtml/testsuite/tester.c
new file mode 100644
index 0000000..972a156
--- /dev/null
+++ b/ekhtml/testsuite/tester.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2002, Jon Travis
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "ekhtml.h"
+
+#define MAGIC_DOODIE 0xf9d33bc1
+
+typedef struct {
+ unsigned int n_starttags;
+ unsigned int n_endtags;
+ unsigned int n_comments;
+ unsigned int n_data;
+ unsigned int magic_doodie;
+ unsigned int only_parse;
+} tester_cbdata;
+
+static void handle_starttag_way(void *cbdata, ekhtml_string_t *tag,
+ ekhtml_attr_t *attrs)
+{
+ printf("GOT WAY START!\n");
+}
+
+static void handle_starttag(void *cbdata, ekhtml_string_t *tag,
+ ekhtml_attr_t *attrs)
+{
+ ekhtml_attr_t *attr;
+ tester_cbdata *tdata = cbdata;
+
+ assert(tdata->magic_doodie == MAGIC_DOODIE);
+ tdata->n_starttags++;
+ if(tdata->only_parse)
+ return;
+
+ printf("START: \"%.*s\"\n", tag->len, tag->str);
+ for(attr=attrs; attr; attr=attr->next) {
+ printf("ATTRIBUTE: \"%.*s\" = ", attr->name.len, attr->name.str);
+ if(!attr->isBoolean)
+ printf("\"%.*s\"\n", attr->val.len, attr->val.str);
+ else
+ printf("\"%.*s\"\n", attr->name.len, attr->name.str);
+ }
+}
+
+static void handle_endtag(void *cbdata, ekhtml_string_t *str){
+ tester_cbdata *tdata = cbdata;
+
+ assert(tdata->magic_doodie == MAGIC_DOODIE);
+ tdata->n_endtags++;
+ if(tdata->only_parse)
+ return;
+
+ printf("END: \"%.*s\"\n", str->len, str->str);
+}
+
+static void handle_comment(void *cbdata, ekhtml_string_t *str){
+ tester_cbdata *tdata = cbdata;
+
+ assert(tdata->magic_doodie == MAGIC_DOODIE);
+ tdata->n_comments++;
+ if(tdata->only_parse)
+ return;
+
+ printf("COMMENT: \"%.*s\"\n", str->len, str->str);
+}
+
+static void handle_data(void *cbdata, ekhtml_string_t *str){
+ tester_cbdata *tdata = cbdata;
+
+ assert(tdata->magic_doodie == MAGIC_DOODIE);
+ tdata->n_data++;
+ if(tdata->only_parse)
+ return;
+
+ fwrite(str->str, str->len, 1, stdout);
+}
+
+int main(int argc, char *argv[]){
+ tester_cbdata cbdata;
+ ekhtml_parser_t *ekparser;
+ char *buf;
+ size_t nbuf;
+ int feedsize;
+
+ if(argc < 2){
+ fprintf(stderr, "Syntax: %s <feedsize> [1|0 (to print debug)]\n",
+ argv[0]);
+ return -1;
+ }
+
+ feedsize = atoi(argv[1]);
+
+ ekparser = ekhtml_parser_new(NULL);
+
+ cbdata.n_starttags = 0;
+ cbdata.n_endtags = 0;
+ cbdata.n_comments = 0;
+ cbdata.n_data = 0;
+ cbdata.magic_doodie = MAGIC_DOODIE;
+ cbdata.only_parse = argc == 3;
+
+ ekhtml_parser_datacb_set(ekparser, handle_data);
+ ekhtml_parser_commentcb_set(ekparser, handle_comment);
+ ekhtml_parser_startcb_add(ekparser, "WAY", handle_starttag_way);
+ ekhtml_parser_startcb_add(ekparser, NULL, handle_starttag);
+ ekhtml_parser_endcb_add(ekparser, NULL, handle_endtag);
+ ekhtml_parser_cbdata_set(ekparser, &cbdata);
+ buf = malloc(feedsize);
+
+ while((nbuf = fread(buf, 1, feedsize, stdin))){
+ ekhtml_string_t str;
+
+ str.str = buf;
+ str.len = nbuf;
+ ekhtml_parser_feed(ekparser, &str);
+ ekhtml_parser_flush(ekparser, 0);
+ }
+ ekhtml_parser_flush(ekparser, 1);
+ ekhtml_parser_destroy(ekparser);
+ free(buf);
+
+ if(argc == 3){
+ fprintf(stderr,
+ "# starttags: %u\n"
+ "# endtags: %u\n"
+ "# comments: %u\n"
+ "# data: %u\n", cbdata.n_starttags,
+ cbdata.n_endtags, cbdata.n_comments, cbdata.n_data);
+ }
+
+ return 0;
+}
diff --git a/ekhtml/testsuite/tstpages/01_stock_cases/comment.suite b/ekhtml/testsuite/tstpages/01_stock_cases/comment.suite
new file mode 100644
index 0000000..88ce043
--- /dev/null
+++ b/ekhtml/testsuite/tstpages/01_stock_cases/comment.suite
@@ -0,0 +1,18 @@
+<!---->
+<!-- -->
+<!-- -- >
+<!--
+
+--
+
+--
+
+>
+<!-- A comment string -->
+<!-- More comments
+--
+>
+<!-- More comments
+-->
+<!-- And even -- more -- comments - from the -- peanut -- gallery -->
+
diff --git a/ekhtml/testsuite/tstpages/01_stock_cases/endtag.suite b/ekhtml/testsuite/tstpages/01_stock_cases/endtag.suite
new file mode 100644
index 0000000..869db71
--- /dev/null
+++ b/ekhtml/testsuite/tstpages/01_stock_cases/endtag.suite
@@ -0,0 +1,13 @@
+First start out with some properly formed end tags
+</F></FO></FOO>
+
+Now something a little more interesting
+</></FOO BAR></FOO BAR >
+
+Some weird stuff
+</ ></ FOO BAR></ FOO BAR >
+</FOO
+
+></
+
+>
diff --git a/ekhtml/testsuite/tstpages/01_stock_cases/special.suite b/ekhtml/testsuite/tstpages/01_stock_cases/special.suite
new file mode 100644
index 0000000..ffd6517
--- /dev/null
+++ b/ekhtml/testsuite/tstpages/01_stock_cases/special.suite
@@ -0,0 +1,11 @@
+<!>
+<!->
+<!- >
+<!FOOBAR BAZ QUUX!>
+<!FOO
+BAR
+BAZ
+>
+<!FOO
+BAR
+BAZ>
diff --git a/ekhtml/testsuite/tstpages/01_stock_cases/starttag.suite b/ekhtml/testsuite/tstpages/01_stock_cases/starttag.suite
new file mode 100644
index 0000000..c1a03af
--- /dev/null
+++ b/ekhtml/testsuite/tstpages/01_stock_cases/starttag.suite
@@ -0,0 +1,21 @@
+First, valid tags
+<TAG>
+<TAG BOOLATTR>
+<TAG ATTR1=1>
+<TAG ATTR1="1">
+<TAG ATTR1='1'>
+<TAG ATTR1='"1'>
+<TAG ATTR1='""1'>
+<TAG ATTR1='""1"'>
+<TAG ATTR1='""1""'>
+<TAG ATTR1='"1""'>
+<TAG ATTR1='1""'>
+<TAG ATTR1='1"'>
+
+<TAG ATTR1
+ATTR2>
+<TAG ATTR1
+ATTR2
+>
+<TAG ATTR1="foo"
+ATTR2>
diff --git a/ekhtml/testsuite/tstpages/01_stock_cases/way.suite b/ekhtml/testsuite/tstpages/01_stock_cases/way.suite
new file mode 100644
index 0000000..c33756c
--- /dev/null
+++ b/ekhtml/testsuite/tstpages/01_stock_cases/way.suite
@@ -0,0 +1,4 @@
+<WAY>
+
+<WAY COOL>
+<WAY COOL=bar> \ No newline at end of file