diff options
author | admin@progandy.co.cc <admin@progandy.co.cc@eced67a3-f377-a0ae-92ae-d6de1850b05a> | 2010-09-13 18:11:13 +0000 |
---|---|---|
committer | admin@progandy.co.cc <admin@progandy.co.cc@eced67a3-f377-a0ae-92ae-d6de1850b05a> | 2010-09-13 18:11:13 +0000 |
commit | 376594ac1d65cbb31165f5a74775d624c3fd2981 (patch) | |
tree | b1f9bf2ee2f1a82c5d9378c03cf09f72535c001d /ekhtml/testsuite | |
parent | 99ef7f1cfcaa4ead4b860faec88754abbe31ebfc (diff) |
- added basic html conversion for receiving (maybe a bit too powerful)
- some bugfixes
git-svn-id: http://mirotr.googlecode.com/svn/trunk@12 eced67a3-f377-a0ae-92ae-d6de1850b05a
Diffstat (limited to 'ekhtml/testsuite')
-rw-r--r-- | ekhtml/testsuite/.deps/tester.Po | 1 | ||||
-rw-r--r-- | ekhtml/testsuite/Makefile | 406 | ||||
-rw-r--r-- | ekhtml/testsuite/Makefile.am | 26 | ||||
-rw-r--r-- | ekhtml/testsuite/Makefile.in | 406 | ||||
-rw-r--r-- | ekhtml/testsuite/ek_sgmllib.py | 489 | ||||
-rw-r--r-- | ekhtml/testsuite/gen_html.py | 154 | ||||
-rw-r--r-- | ekhtml/testsuite/plparser.pl | 38 | ||||
-rw-r--r-- | ekhtml/testsuite/pyparser.py | 44 | ||||
-rw-r--r-- | ekhtml/testsuite/test_basic.sh | 10 | ||||
-rw-r--r-- | ekhtml/testsuite/test_bogus_feedsize.sh | 27 | ||||
-rw-r--r-- | ekhtml/testsuite/test_crazypage.sh | 16 | ||||
-rw-r--r-- | ekhtml/testsuite/tester.c | 158 | ||||
-rw-r--r-- | ekhtml/testsuite/tstpages/01_stock_cases/comment.suite | 18 | ||||
-rw-r--r-- | ekhtml/testsuite/tstpages/01_stock_cases/endtag.suite | 13 | ||||
-rw-r--r-- | ekhtml/testsuite/tstpages/01_stock_cases/special.suite | 11 | ||||
-rw-r--r-- | ekhtml/testsuite/tstpages/01_stock_cases/starttag.suite | 21 | ||||
-rw-r--r-- | ekhtml/testsuite/tstpages/01_stock_cases/way.suite | 4 |
17 files changed, 1842 insertions, 0 deletions
diff --git a/ekhtml/testsuite/.deps/tester.Po b/ekhtml/testsuite/.deps/tester.Po new file mode 100644 index 0000000..9ce06a8 --- /dev/null +++ b/ekhtml/testsuite/.deps/tester.Po @@ -0,0 +1 @@ +# dummy diff --git a/ekhtml/testsuite/Makefile b/ekhtml/testsuite/Makefile new file mode 100644 index 0000000..5c6b92b --- /dev/null +++ b/ekhtml/testsuite/Makefile @@ -0,0 +1,406 @@ +# Makefile.in generated by automake 1.6.3 from Makefile.am. +# Generated automatically from Makefile.in by configure. + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + + +SHELL = /bin/sh + +srcdir = . +top_srcdir = .. + +prefix = /usr/local +exec_prefix = ${prefix} + +bindir = ${exec_prefix}/bin +sbindir = ${exec_prefix}/sbin +libexecdir = ${exec_prefix}/libexec +datadir = ${prefix}/share +sysconfdir = ${prefix}/etc +sharedstatedir = ${prefix}/com +localstatedir = ${prefix}/var +libdir = ${exec_prefix}/lib +infodir = ${prefix}/info +mandir = ${prefix}/man +includedir = ${prefix}/include +oldincludedir = /usr/include +pkgdatadir = $(datadir)/ekhtml +pkglibdir = $(libdir)/ekhtml +pkgincludedir = $(includedir)/ekhtml +top_builddir = .. + +ACLOCAL = ${SHELL} /z/missing --run aclocal-1.6 +AUTOCONF = ${SHELL} /z/missing --run autoconf +AUTOMAKE = ${SHELL} /z/missing --run automake-1.6 +AUTOHEADER = ${SHELL} /z/missing --run autoheader + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = /bin/install -c +INSTALL_PROGRAM = ${INSTALL} +INSTALL_DATA = ${INSTALL} -m 644 +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_SCRIPT = ${INSTALL} +INSTALL_HEADER = $(INSTALL_DATA) +transform = s,x,x, +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_alias = +host_triplet = i686-pc-mingw32 + +EXEEXT = .exe +OBJEXT = o +PATH_SEPARATOR = : +AMTAR = ${SHELL} /z/missing --run tar +AS = @AS@ +AWK = gawk +CC = gcc +DEPDIR = .deps +DLLTOOL = @DLLTOOL@ +ECHO = echo +INSTALL_STRIP_PROGRAM = ${SHELL} $(install_sh) -c -s +LIBTOOL = $(SHELL) $(top_builddir)/libtool +LN_S = ln -s +MAINT = # +OBJDUMP = @OBJDUMP@ +PACKAGE = ekhtml +RANLIB = ranlib +STRIP = strip +VERSION = 0.3.2 +am__include = include +am__quote = +install_sh = /z/install-sh +EKHTML_LIB = $(top_srcdir)/src/libekhtml.la + +EXTRA_DIST = \ + ek_sgmllib.py \ + gen_html.py \ + plparser.pl \ + pyparser.py \ + test_basic.sh \ + test_bogus_feedsize.sh \ + test_crazypage.sh \ + tstpages + + +noinst_PROGRAMS = tester + +tester_SOURCES = tester.c +tester_LDADD = $(EKHTML_LIB) +tester_LDFLAGS = -static +tester_DEPENDENCIES = $(EKHTML_LIB) + +TESTS = test_basic.sh test_crazypage.sh test_bogus_feedsize.sh +subdir = testsuite +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/include/ekhtml_config.h +CONFIG_CLEAN_FILES = +noinst_PROGRAMS = tester$(EXEEXT) +PROGRAMS = $(noinst_PROGRAMS) + +am_tester_OBJECTS = tester.$(OBJEXT) +tester_OBJECTS = $(am_tester_OBJECTS) + +DEFS = -DHAVE_CONFIG_H +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include +CPPFLAGS = +LDFLAGS = +LIBS = +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +DEP_FILES = ./$(DEPDIR)/tester.Po +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ + $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +CFLAGS = -g -O2 +DIST_SOURCES = $(tester_SOURCES) +DIST_COMMON = Makefile.am Makefile.in +SOURCES = $(tester_SOURCES) + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: # Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu testsuite/Makefile +Makefile: # $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +tester$(EXEEXT): $(tester_OBJECTS) $(tester_DEPENDENCIES) + @rm -f tester$(EXEEXT) + $(LINK) $(tester_LDFLAGS) $(tester_OBJECTS) $(tester_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) core *.core + +distclean-compile: + -rm -f *.tab.c + +include ./$(DEPDIR)/tester.Po + +distclean-depend: + -rm -rf ./$(DEPDIR) + +.c.o: + source='$<' object='$@' libtool=no \ + depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' \ + $(CCDEPMODE) $(depcomp) \ + $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$< + +.c.obj: + source='$<' object='$@' libtool=no \ + depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' \ + $(CCDEPMODE) $(depcomp) \ + $(COMPILE) -c `cygpath -w $<` + +.c.lo: + source='$<' object='$@' libtool=yes \ + depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' \ + $(CCDEPMODE) $(depcomp) \ + $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< +CCDEPMODE = depmode=gcc3 + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: + +ETAGS = etags +ETAGSFLAGS = + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$tags$$unique" \ + || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH + +check-TESTS: $(TESTS) + @failed=0; all=0; xfail=0; xpass=0; \ + srcdir=$(srcdir); export srcdir; \ + list='$(TESTS)'; \ + if test -n "$$list"; then \ + for tst in $$list; do \ + if test -f ./$$tst; then dir=./; \ + elif test -f $$tst; then dir=; \ + else dir="$(srcdir)/"; fi; \ + if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *" $$tst "*) \ + xpass=`expr $$xpass + 1`; \ + failed=`expr $$failed + 1`; \ + echo "XPASS: $$tst"; \ + ;; \ + *) \ + echo "PASS: $$tst"; \ + ;; \ + esac; \ + elif test $$? -ne 77; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *" $$tst "*) \ + xfail=`expr $$xfail + 1`; \ + echo "XFAIL: $$tst"; \ + ;; \ + *) \ + failed=`expr $$failed + 1`; \ + echo "FAIL: $$tst"; \ + ;; \ + esac; \ + fi; \ + done; \ + if test "$$failed" -eq 0; then \ + if test "$$xfail" -eq 0; then \ + banner="All $$all tests passed"; \ + else \ + banner="All $$all tests behaved as expected ($$xfail expected failures)"; \ + fi; \ + else \ + if test "$$xpass" -eq 0; then \ + banner="$$failed of $$all tests failed"; \ + else \ + banner="$$failed of $$all tests did not behave as expected ($$xpass unexpected passes)"; \ + fi; \ + fi; \ + dashes=`echo "$$banner" | sed s/./=/g`; \ + echo "$$dashes"; \ + echo "$$banner"; \ + echo "$$dashes"; \ + test "$$failed" -eq 0; \ + else :; fi +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = .. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + @list='$(DISTFILES)'; for file in $$list; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="${top_distdir}" distdir="$(distdir)" \ + dist-hook +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile $(PROGRAMS) + +installdirs: + +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + +distclean-am: clean-am distclean-compile distclean-depend \ + distclean-generic distclean-libtool distclean-tags + +dvi: dvi-am + +dvi-am: + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +uninstall-am: uninstall-info-am + +.PHONY: GTAGS all all-am check check-TESTS check-am clean clean-generic \ + clean-libtool clean-noinstPROGRAMS distclean distclean-compile \ + distclean-depend distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am info info-am install \ + install-am install-data install-data-am install-exec \ + install-exec-am install-info install-info-am install-man \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + tags uninstall uninstall-am uninstall-info-am + + +$(EKHTML_LIB): + cd $(top_srcdir)/src && $(MAKE) + +dist-hook: + rm -rf `find $(distdir)/tstpages -name CVS` +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/ekhtml/testsuite/Makefile.am b/ekhtml/testsuite/Makefile.am new file mode 100644 index 0000000..adbda98 --- /dev/null +++ b/ekhtml/testsuite/Makefile.am @@ -0,0 +1,26 @@ +EKHTML_LIB = $(top_srcdir)/src/libekhtml.la + +EXTRA_DIST = \ + ek_sgmllib.py \ + gen_html.py \ + plparser.pl \ + pyparser.py \ + test_basic.sh \ + test_bogus_feedsize.sh \ + test_crazypage.sh \ + tstpages + +noinst_PROGRAMS = tester + +tester_SOURCES = tester.c +tester_LDADD = $(EKHTML_LIB) +tester_LDFLAGS = -static +tester_DEPENDENCIES = $(EKHTML_LIB) + +$(EKHTML_LIB): + cd $(top_srcdir)/src && $(MAKE) + +TESTS = test_basic.sh test_crazypage.sh test_bogus_feedsize.sh + +dist-hook: + rm -rf `find $(distdir)/tstpages -name CVS` diff --git a/ekhtml/testsuite/Makefile.in b/ekhtml/testsuite/Makefile.in new file mode 100644 index 0000000..0cb60d3 --- /dev/null +++ b/ekhtml/testsuite/Makefile.in @@ -0,0 +1,406 @@ +# Makefile.in generated by automake 1.6.3 from Makefile.am. +# @configure_input@ + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_HEADER = $(INSTALL_DATA) +transform = @program_transform_name@ +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_alias = @host_alias@ +host_triplet = @host@ + +EXEEXT = @EXEEXT@ +OBJEXT = @OBJEXT@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +AMTAR = @AMTAR@ +AS = @AS@ +AWK = @AWK@ +CC = @CC@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +ECHO = @ECHO@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +MAINT = @MAINT@ +OBJDUMP = @OBJDUMP@ +PACKAGE = @PACKAGE@ +RANLIB = @RANLIB@ +STRIP = @STRIP@ +VERSION = @VERSION@ +am__include = @am__include@ +am__quote = @am__quote@ +install_sh = @install_sh@ +EKHTML_LIB = $(top_srcdir)/src/libekhtml.la + +EXTRA_DIST = \ + ek_sgmllib.py \ + gen_html.py \ + plparser.pl \ + pyparser.py \ + test_basic.sh \ + test_bogus_feedsize.sh \ + test_crazypage.sh \ + tstpages + + +noinst_PROGRAMS = tester + +tester_SOURCES = tester.c +tester_LDADD = $(EKHTML_LIB) +tester_LDFLAGS = -static +tester_DEPENDENCIES = $(EKHTML_LIB) + +TESTS = test_basic.sh test_crazypage.sh test_bogus_feedsize.sh +subdir = testsuite +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/include/ekhtml_config.h +CONFIG_CLEAN_FILES = +noinst_PROGRAMS = tester$(EXEEXT) +PROGRAMS = $(noinst_PROGRAMS) + +am_tester_OBJECTS = tester.$(OBJEXT) +tester_OBJECTS = $(am_tester_OBJECTS) + +DEFS = @DEFS@ +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBS = @LIBS@ +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/tester.Po +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ + $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +CFLAGS = @CFLAGS@ +DIST_SOURCES = $(tester_SOURCES) +DIST_COMMON = Makefile.am Makefile.in +SOURCES = $(tester_SOURCES) + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu testsuite/Makefile +Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +tester$(EXEEXT): $(tester_OBJECTS) $(tester_DEPENDENCIES) + @rm -f tester$(EXEEXT) + $(LINK) $(tester_LDFLAGS) $(tester_OBJECTS) $(tester_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) core *.core + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tester.Po@am__quote@ + +distclean-depend: + -rm -rf ./$(DEPDIR) + +.c.o: +@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$< + +.c.obj: +@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(COMPILE) -c `cygpath -w $<` + +.c.lo: +@AMDEP_TRUE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< +CCDEPMODE = @CCDEPMODE@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: + +ETAGS = etags +ETAGSFLAGS = + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$tags$$unique" \ + || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH + +check-TESTS: $(TESTS) + @failed=0; all=0; xfail=0; xpass=0; \ + srcdir=$(srcdir); export srcdir; \ + list='$(TESTS)'; \ + if test -n "$$list"; then \ + for tst in $$list; do \ + if test -f ./$$tst; then dir=./; \ + elif test -f $$tst; then dir=; \ + else dir="$(srcdir)/"; fi; \ + if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *" $$tst "*) \ + xpass=`expr $$xpass + 1`; \ + failed=`expr $$failed + 1`; \ + echo "XPASS: $$tst"; \ + ;; \ + *) \ + echo "PASS: $$tst"; \ + ;; \ + esac; \ + elif test $$? -ne 77; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *" $$tst "*) \ + xfail=`expr $$xfail + 1`; \ + echo "XFAIL: $$tst"; \ + ;; \ + *) \ + failed=`expr $$failed + 1`; \ + echo "FAIL: $$tst"; \ + ;; \ + esac; \ + fi; \ + done; \ + if test "$$failed" -eq 0; then \ + if test "$$xfail" -eq 0; then \ + banner="All $$all tests passed"; \ + else \ + banner="All $$all tests behaved as expected ($$xfail expected failures)"; \ + fi; \ + else \ + if test "$$xpass" -eq 0; then \ + banner="$$failed of $$all tests failed"; \ + else \ + banner="$$failed of $$all tests did not behave as expected ($$xpass unexpected passes)"; \ + fi; \ + fi; \ + dashes=`echo "$$banner" | sed s/./=/g`; \ + echo "$$dashes"; \ + echo "$$banner"; \ + echo "$$dashes"; \ + test "$$failed" -eq 0; \ + else :; fi +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = .. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + @list='$(DISTFILES)'; for file in $$list; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="${top_distdir}" distdir="$(distdir)" \ + dist-hook +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile $(PROGRAMS) + +installdirs: + +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + +distclean-am: clean-am distclean-compile distclean-depend \ + distclean-generic distclean-libtool distclean-tags + +dvi: dvi-am + +dvi-am: + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +uninstall-am: uninstall-info-am + +.PHONY: GTAGS all all-am check check-TESTS check-am clean clean-generic \ + clean-libtool clean-noinstPROGRAMS distclean distclean-compile \ + distclean-depend distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am info info-am install \ + install-am install-data install-data-am install-exec \ + install-exec-am install-info install-info-am install-man \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + tags uninstall uninstall-am uninstall-info-am + + +$(EKHTML_LIB): + cd $(top_srcdir)/src && $(MAKE) + +dist-hook: + rm -rf `find $(distdir)/tstpages -name CVS` +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/ekhtml/testsuite/ek_sgmllib.py b/ekhtml/testsuite/ek_sgmllib.py new file mode 100644 index 0000000..f71496d --- /dev/null +++ b/ekhtml/testsuite/ek_sgmllib.py @@ -0,0 +1,489 @@ +"""A parser for SGML, using the derived class as a static DTD.""" + +# Stolen from the Python 2.0 distribution and tweaked by JMT + +# XXX This only supports those SGML features used by HTML. + +# XXX There should be a way to distinguish between PCDATA (parsed +# character data -- the normal case), RCDATA (replaceable character +# data -- only char and entity references and end tags are special) +# and CDATA (character data -- only end tags are special). + + +import re +import string + + +# Regular expressions used for parsing + +interesting = re.compile('[&<]') +incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' + '<([a-zA-Z][^<>]*|' + '/([a-zA-Z][^<>]*)?|' + '![^<>]*)?') + +entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*);') +charref = re.compile('&#([0-9]+)[^0-9];') + +starttagopen = re.compile('<[>a-zA-Z]') +shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') +shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') +piopen = re.compile('<\?') +piclose = re.compile('>') +endtagopen = re.compile('</[<>a-zA-Z]') +endbracket = re.compile('[<>]') +special = re.compile('<![^<>]*>') +commentopen = re.compile('<!--') +commentclose = re.compile('--[%s]*>' % string.whitespace) +tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9]*') +attrfind = re.compile( + '[%s]*([a-zA-Z_][-.a-zA-Z_0-9]*)' % string.whitespace + + ('([%s]*=[%s]*' % (string.whitespace, string.whitespace)) + + r'(\'[^\'<>]*[\'<>]|"[^"<>]*["<>]|[-a-zA-Z0-9@./:+*%?!&$\(\)_#=~]*))?') + + +# SGML parser base class -- find tags and call handler functions. +# Usage: p = SGMLParser(); p.feed(data); ...; p.close(). +# The dtd is defined by deriving a class which defines methods +# with special names to handle tags: start_foo and end_foo to handle +# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself. +# (Tags are converted to lower case for this purpose.) The data +# between tags is passed to the parser by calling self.handle_data() +# with some data as argument (the data may be split up in arbitrary +# chunks). Entity references are passed by calling +# self.handle_entityref() with the entity reference as argument. + +class SGMLParser: + + # Interface -- initialize and reset this instance + def __init__(self, verbose=0): + self.verbose = verbose + self.reset() + + # Interface -- reset this instance. Loses all unprocessed data + def reset(self): + self.rawdata = '' + self.stack = [] + self.lasttag = '???' + self.nomoretags = 0 + self.literal = 0 + + # For derived classes only -- enter literal mode (CDATA) till EOF + def setnomoretags(self): + self.nomoretags = self.literal = 1 + + # For derived classes only -- enter literal mode (CDATA) + def setliteral(self, *args): + self.literal = 1 + + # Interface -- feed some data to the parser. Call this as + # often as you want, with as little or as much text as you + # want (may include '\n'). (This just saves the text, all the + # processing is done by goahead().) + def feed(self, data): + self.rawdata = self.rawdata + data + self.goahead(0) + + # Interface -- handle the remaining data + def close(self): + self.goahead(1) + + # Internal -- handle data as far as reasonable. May leave state + # and data to be processed by a subsequent call. If 'end' is + # true, force handling all data as if followed by EOF marker. + def goahead(self, end): + rawdata = self.rawdata + i = 0 + n = len(rawdata) + while i < n: + if self.nomoretags: + self.handle_data(rawdata[i:n]) + i = n + break + match = interesting.search(rawdata, i) + if match: j = match.start(0) + else: j = n + if i < j: self.handle_data(rawdata[i:j]) + i = j + if i == n: break + if rawdata[i] == '<': + if starttagopen.match(rawdata, i): + if self.literal: + self.handle_data(rawdata[i]) + i = i+1 + continue + k = self.parse_starttag(i) + if k < 0: break + i = k + continue + if endtagopen.match(rawdata, i): + k = self.parse_endtag(i) + if k < 0: break + i = k + self.literal = 0 + continue + if commentopen.match(rawdata, i): + if self.literal: + self.handle_data(rawdata[i]) + i = i+1 + continue + k = self.parse_comment(i) + if k < 0: break + i = i+k + continue + if piopen.match(rawdata, i): + if self.literal: + self.handle_data(rawdata[i]) + i = i+1 + continue + k = self.parse_pi(i) + if k < 0: break + i = i+k + continue + match = special.match(rawdata, i) + if match: + if self.literal: + self.handle_data(rawdata[i]) + i = i+1 + continue + start = i + i = match.end(0) + self.handle_data(rawdata[start:i]) + continue + elif rawdata[i] == '&': + match = charref.match(rawdata, i) + if match: + name = match.group(1) + self.handle_charref(name) + i = match.end(0) + if rawdata[i-1] != ';': i = i-1 + continue + match = entityref.match(rawdata, i) + if match: + name = match.group(1) + self.handle_entityref(name) + i = match.end(0) + if rawdata[i-1] != ';': i = i-1 + continue + else: + raise RuntimeError, 'neither < nor & ??' + # We get here only if incomplete matches but + # nothing else + match = incomplete.match(rawdata, i) + if not match: + self.handle_data(rawdata[i]) + i = i+1 + continue + j = match.end(0) + if j == n: + break # Really incomplete + self.handle_data(rawdata[i:j]) + i = j + # end while + if end and i < n: + self.handle_data(rawdata[i:n]) + i = n + self.rawdata = rawdata[i:] + # XXX if end: check for empty stack + + # Internal -- parse comment, return length or -1 if not terminated + def parse_comment(self, i): + rawdata = self.rawdata + if rawdata[i:i+4] <> '<!--': + raise RuntimeError, 'unexpected call to handle_comment' + match = commentclose.search(rawdata, i+4) + if not match: + return -1 + j = match.start(0) + self.handle_comment(rawdata[i+4: j]) + j = match.end(0) + return j-i + + # Internal -- parse processing instr, return length or -1 if not terminated + def parse_pi(self, i): + rawdata = self.rawdata + if rawdata[i:i+2] <> '<?': + raise RuntimeError, 'unexpected call to handle_pi' + match = piclose.search(rawdata, i+2) + if not match: + return -1 + j = match.start(0) + self.handle_pi(rawdata[i+2: j]) + j = match.end(0) + return j-i + + __starttag_text = None + def get_starttag_text(self): + return self.__starttag_text + + # Internal -- handle starttag, return length or -1 if not terminated + def parse_starttag(self, i): + self.__starttag_text = None + start_pos = i + rawdata = self.rawdata + if shorttagopen.match(rawdata, i): + # SGML shorthand: <tag/data/ == <tag>data</tag> + # XXX Can data contain &... (entity or char refs)? + # XXX Can data contain < or > (tag characters)? + # XXX Can there be whitespace before the first /? + match = shorttag.match(rawdata, i) + if not match: + return -1 + tag, data = match.group(1, 2) + self.__starttag_text = '<%s/' % tag + tag = string.lower(tag) + k = match.end(0) + self.finish_shorttag(tag, data) + self.__starttag_text = rawdata[start_pos:match.end(1) + 1] + return k + # XXX The following should skip matching quotes (' or ") + match = endbracket.search(rawdata, i+1) + if not match: + return -1 + j = match.start(0) + # Now parse the data between i+1 and j into a tag and attrs + attrs = [] + if rawdata[i:i+2] == '<>': + # SGML shorthand: <> == <last open tag seen> + k = j + self.handle_data('<>') + return k + 1 + else: + match = tagfind.match(rawdata, i+1) + if not match: + raise RuntimeError, 'unexpected call to parse_starttag' + k = match.end(0) + tag = string.lower(rawdata[i+1:k]) + self.lasttag = tag + while k < j: + match = attrfind.match(rawdata, k) + if not match: break + attrname, rest, attrvalue = match.group(1, 2, 3) + if not rest: + attrvalue = attrname + elif attrvalue[:1] == '\'' or attrvalue[:1] == '"': + attrvalue = attrvalue[1:-1] + attrs.append((string.lower(attrname), attrvalue)) + k = match.end(0) + if rawdata[j] == '>': + j = j+1 + self.__starttag_text = rawdata[start_pos:j] + self.finish_starttag(tag, attrs) + return j + + # Internal -- parse endtag + def parse_endtag(self, i): + rawdata = self.rawdata + match = endbracket.search(rawdata, i+1) + if not match: + return -1 + j = match.start(0) + tag = string.lower(string.strip(rawdata[i+2:j])) + if rawdata[j] == '>': + j = j+1 + self.finish_endtag(tag) + return j + + # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>) + def finish_shorttag(self, tag, data): + self.finish_starttag(tag, []) + self.handle_data(data) + self.finish_endtag(tag) + + # Internal -- finish processing of start tag + # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag + def finish_starttag(self, tag, attrs): + try: + method = getattr(self, 'start_' + tag) + except AttributeError: + try: + method = getattr(self, 'do_' + tag) + except AttributeError: + self.unknown_starttag(tag, attrs) + return -1 + else: + self.handle_starttag(tag, method, attrs) + return 0 + else: + self.stack.append(tag) + self.handle_starttag(tag, method, attrs) + return 1 + + # Internal -- finish processing of end tag + def finish_endtag(self, tag): + if not tag: + found = len(self.stack) - 1 + if found < 0: + self.unknown_endtag(tag) + return + else: + if tag not in self.stack: + try: + method = getattr(self, 'end_' + tag) + except AttributeError: + self.unknown_endtag(tag) + else: + self.report_unbalanced(tag) + return + found = len(self.stack) + for i in range(found): + if self.stack[i] == tag: found = i + while len(self.stack) > found: + tag = self.stack[-1] + try: + method = getattr(self, 'end_' + tag) + except AttributeError: + method = None + if method: + self.handle_endtag(tag, method) + else: + self.unknown_endtag(tag) + del self.stack[-1] + + # Overridable -- handle start tag + def handle_starttag(self, tag, method, attrs): + method(attrs) + + # Overridable -- handle end tag + def handle_endtag(self, tag, method): + method() + + # Example -- report an unbalanced </...> tag. + def report_unbalanced(self, tag): + if self.verbose: + print '*** Unbalanced </' + tag + '>' + print '*** Stack:', self.stack + + # Example -- handle character reference, no need to override + def handle_charref(self, name): + try: + n = string.atoi(name) + except string.atoi_error: + self.unknown_charref(name) + return + if not 0 <= n <= 255: + self.unknown_charref(name) + return + self.handle_data(chr(n)) + + # Definition of entities -- derived classes may override + entitydefs = \ + {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''} + + # Example -- handle entity reference, no need to override + def handle_entityref(self, name): + table = self.entitydefs + if table.has_key(name): + self.handle_data(table[name]) + else: + self.unknown_entityref(name) + return + + # Example -- handle data, should be overridden + def handle_data(self, data): + pass + + # Example -- handle comment, could be overridden + def handle_comment(self, data): + pass + + # Example -- handle processing instruction, could be overridden + def handle_pi(self, data): + pass + + # To be overridden -- handlers for unknown objects + def unknown_starttag(self, tag, attrs): pass + def unknown_endtag(self, tag): pass + def unknown_charref(self, ref): pass + def unknown_entityref(self, ref): pass + + +class TestSGMLParser(SGMLParser): + + def __init__(self, verbose=0): + self.testdata = "" + SGMLParser.__init__(self, verbose) + + def handle_data(self, data): + self.testdata = self.testdata + data + if len(`self.testdata`) >= 70: + self.flush() + + def flush(self): + data = self.testdata + if data: + self.testdata = "" + print 'data:', `data` + + def handle_comment(self, data): + self.flush() + r = `data` + if len(r) > 68: + r = r[:32] + '...' + r[-32:] + print 'comment:', r + + def unknown_starttag(self, tag, attrs): + self.flush() + if not attrs: + print 'start tag: <' + tag + '>' + else: + print 'start tag: <' + tag, + for name, value in attrs: + print name + '=' + '"' + value + '"', + print '>' + + def unknown_endtag(self, tag): + self.flush() + print 'end tag: </' + tag + '>' + + def unknown_entityref(self, ref): + self.flush() + print '*** unknown entity ref: &' + ref + ';' + + def unknown_charref(self, ref): + self.flush() + print '*** unknown char ref: &#' + ref + ';' + + def close(self): + SGMLParser.close(self) + self.flush() + + +def test(args = None): + import sys + + if not args: + args = sys.argv[1:] + + if args and args[0] == '-s': + args = args[1:] + klass = SGMLParser + else: + klass = TestSGMLParser + + if args: + file = args[0] + else: + file = 'test.html' + + if file == '-': + f = sys.stdin + else: + try: + f = open(file, 'r') + except IOError, msg: + print file, ":", msg + sys.exit(1) + + data = f.read() + if f is not sys.stdin: + f.close() + + x = klass() + for c in data: + x.feed(c) + x.close() + + +if __name__ == '__main__': + test() diff --git a/ekhtml/testsuite/gen_html.py b/ekhtml/testsuite/gen_html.py new file mode 100644 index 0000000..dc61ffc --- /dev/null +++ b/ekhtml/testsuite/gen_html.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +# +# gen_html: An HTML generation utility +# +# This utility is used to generate random HTML for the parser to +# deal with. Flags for each of the components allow variations +# on HTML, so as to spew either valid or invalid HTML. +# + +from string import * +from ek_sgmllib import tagfind +from random import randint +import re, sys + +attrname = re.compile('[a-zA-Z_][-.a-zA-Z_0-9]*') +attrvalue = re.compile('[-a-zA-Z0-9@./:+*%?!&$\(\)_#=~]') +quotedattrvalueset = digits + '#$%&()*+,-./:;=?@[\]^_`{|}~' +attrvalueset = '-@./:+*%?!&$()_#=~*' + letters + digits + +DataString = ['In A.D. 2101', + 'War was beginning.', + 'What happen?', + 'Somebody set up us the bomb', + 'We get signal', + 'What!', + 'Main screen turn on', + "It's You!!", + 'How are you gentlemen!!', + 'All your base are belong to us', + 'You are on the way to destruction', + 'What you say!!', + 'You have no chance to survive make your time', + 'HA HA HA HA ....', + "Take off every 'zig'", + 'You know what you doing', + "Move 'zig'", + 'For great justice'] + +DataWords = split(join(DataString, ' ')) + +def get_word(matcher, bogus=0): + while 1: + res = DataWords[randint(0, len(DataWords)-1)] + if not bogus: + z = matcher.match(res) + if not z or z.end(0) != len(res): + continue + return res + +def get_tagname(bogus=0): + return upper(get_word(tagfind, bogus)) + +def get_whitespace(): + return join(map(lambda x: whitespace[randint(0,len(whitespace) - 1)], + range(randint(0, 3))), '') + +def get_attrname(bogus=0): + return get_word(attrname, bogus) + + +def get_quoted_attrvalue(bogus=0): + quoteidx = randint(0, 1) + quote = ["'", '"'][quoteidx] + otherquote = ["'", '"'][not quoteidx] + + res = quote + for i in range(randint(0, 20)): + if not randint(0, 5): + res = res +quotedattrvalueset[randint(0,len(quotedattrvalueset)-1)] + elif not randint(0, 5): + res = res + otherquote + elif not randint(0, 5): + res = res + get_whitespace() + else: + res = res + get_attrname(0) + + if not bogus: + res = res + quote + else: + res = res + [otherquote, '<', '>'][randint(0, 2)] + return res + +def get_unquoted_attrvalue(bogus=0): + res = '' + for i in range(randint(not bogus, 20)): + if not randint(0, 5): + res = res + get_tagname(bogus) + elif bogus and not randint(0, 5): + res = res + get_whitespace() + else: + res = res + attrvalueset[randint(0, len(attrvalueset)-1)] + + return res + +def get_starttag(bogus=0): + res = '<' + if bogus and not randint(0, 10): + res = res + whitespace[randint(0, len(whitespace)-1)] + + res = res + get_tagname(bogus) + if not randint(0, 5): + res = res + get_whitespace() + + for nattrs in range(randint(0, 4)): + res = res + whitespace[randint(0, len(whitespace)-1)] + res = res + get_whitespace() + res = res + get_attrname(bogus) + t = randint(0, 1) + if t: + res = res + get_whitespace() + '=' + get_whitespace() + \ + get_quoted_attrvalue(randint(0, bogus)) + else: + res = res + get_whitespace() + '=' + get_whitespace() + \ + get_unquoted_attrvalue(randint(0, bogus)) + + if bogus and not randint(0, 5): + res = res + get_whitespace() + '>' + else: + res = res + '>' + + return res + +def get_comment(): + res = '<!--' + + for i in range(randint(0, 100)): + res = res + get_whitespace() + res = res + get_attrname(0) + if not randint(0, 5): + res = res + '--' + elif not randint(0, 10): + res = res + '->' + res = res + get_whitespace() + '--' + get_whitespace() + '>' + return res + +def get_endtag(): + return '</' + get_attrname() + get_whitespace() + '>' + + +if __name__ == '__main__': + if len(sys.argv) < 3: + print 'Syntax: %s <# items> <bogus>' % sys.argv[0] + sys.exit(1) + + isbogus = int(sys.argv[2]) + for i in xrange(int(sys.argv[1])): + x = randint(0, 2) + if x == 0: + print get_starttag(isbogus) + elif x == 1: + print get_comment() + elif x == 2: + print get_endtag() diff --git a/ekhtml/testsuite/plparser.pl b/ekhtml/testsuite/plparser.pl new file mode 100644 index 0000000..f7ee4a1 --- /dev/null +++ b/ekhtml/testsuite/plparser.pl @@ -0,0 +1,38 @@ +#!/usr/bin/perl +use HTML::Parser(); +use strict; +sub start_handler +{ + my ($tag, $self) = @_; + print "START: \"".$tag."\"\n"; +} + +sub end_handler +{ + my ($tag, $self) = @_; + print "END: \"".$tag."\"\n"; +} + +sub text_handler +{ + my ($text, $self) = @_; + print $text; +} + +sub comment_handler +{ + my ($text, $self) = @_; + print "COMMENT: \""; + foreach my $cur_comment (@$text) { + print $cur_comment; + } + print "\"\n"; +} + +my $p = HTML::Parser->new(api_version => 3); +$p->handler( start => \&start_handler, "tagname,self"); +$p->handler( end => \&end_handler, "tagname,self"); +$p->handler( text => \&text_handler, "dtext, self"); +$p->handler( comment => \&comment_handler, "tokens, self"); +$p->parse_file(shift || die) || die $!; + diff --git a/ekhtml/testsuite/pyparser.py b/ekhtml/testsuite/pyparser.py new file mode 100644 index 0000000..a96d999 --- /dev/null +++ b/ekhtml/testsuite/pyparser.py @@ -0,0 +1,44 @@ +#! /usr/bin/env python + +import ek_sgmllib, sys, string + +class myparser(ek_sgmllib.SGMLParser): + def __init__(self): + ek_sgmllib.SGMLParser.__init__(self) + + def handle_data(self, data): + sys.stdout.write(data) + + def start_way(self, attrs): + print 'GOT WAY START!' + + def unknown_starttag(self, tag, attrs): + print 'START: "%s"' % tag + for i in range(len(attrs) - 1, -1, -1): + key, val = attrs[i] + print 'ATTRIBUTE: "%s" = "%s"' % (key, val) + + def unknown_endtag(self, tag): + print 'END: "%s"' % tag + + def handle_comment(self, comment): + print 'COMMENT: "%s"' % comment + + def handle_entityref(self, name): + self.handle_data('&' + name + ';') + + def handle_charref(self, name): + self.handle_data('&#' + name + ';') + +def testit(data): + x = myparser() + x.feed(data) + x.close() + +if __name__ == '__main__': + if len(sys.argv) == 1: + f = sys.stdin + else: + f = open(sys.argv[1]) + + testit(f.read()) diff --git a/ekhtml/testsuite/test_basic.sh b/ekhtml/testsuite/test_basic.sh new file mode 100644 index 0000000..87d2e64 --- /dev/null +++ b/ekhtml/testsuite/test_basic.sh @@ -0,0 +1,10 @@ +#! /bin/bash + +for i in `find tstpages -type f | sort` +do + echo Testing page: $i + ./tester 1 < $i > ek.test + ./pyparser.py $i > py.test + diff -a -i -u ek.test py.test || exit 1 +done + diff --git a/ekhtml/testsuite/test_bogus_feedsize.sh b/ekhtml/testsuite/test_bogus_feedsize.sh new file mode 100644 index 0000000..12cf9ab --- /dev/null +++ b/ekhtml/testsuite/test_bogus_feedsize.sh @@ -0,0 +1,27 @@ +#! /bin/bash +# +# Test the e-k parser to make sure that altering the feedsize doesn't change +# which tags get recognized. This is really a boundary condition. +# + +NUMTAGS=1000 +NUMFEED=1024 + +echo Generating crazy.page with $NUMTAGS tags +python gen_html.py $NUMTAGS 1 > crazy.page + +feedsize=1 + +echo "Parsing with $NUMFEED different feed sizes" +while [ $(($feedsize < $NUMFEED)) -eq 1 ]; do + ./tester $feedsize < crazy.page > crazy.page.out.$feedsize + feedsize=$(($feedsize + 1)) +done + +NUMSIZES=`ls -la crazy.page.out.* | awk '{print $5}' | sort | uniq | wc -l` +if [ $NUMSIZES -ne 1 ] ; then + echo 'FAILURE -- Leaving crazy.page.out.* for examination' + exit 1 +fi + +rm -f crazy.page.out.* diff --git a/ekhtml/testsuite/test_crazypage.sh b/ekhtml/testsuite/test_crazypage.sh new file mode 100644 index 0000000..adcd98c --- /dev/null +++ b/ekhtml/testsuite/test_crazypage.sh @@ -0,0 +1,16 @@ +#! /bin/bash + +for ntags in 1 10 100 1000 +do + for bogus in 0 1 + do + python gen_html.py $ntags $bogus > crazy.page + for bytesize in 1 13 162 983 8192 + do + echo numElements=$ntags allowBogusTags=$bogus feedSize=$bytesize + ./tester $bytesize < crazy.page > ek.test + ./pyparser.py < crazy.page > py.test + diff -i -u ek.test py.test || exit 1 + done + done +done diff --git a/ekhtml/testsuite/tester.c b/ekhtml/testsuite/tester.c new file mode 100644 index 0000000..972a156 --- /dev/null +++ b/ekhtml/testsuite/tester.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2002, Jon Travis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +#include "ekhtml.h" + +#define MAGIC_DOODIE 0xf9d33bc1 + +typedef struct { + unsigned int n_starttags; + unsigned int n_endtags; + unsigned int n_comments; + unsigned int n_data; + unsigned int magic_doodie; + unsigned int only_parse; +} tester_cbdata; + +static void handle_starttag_way(void *cbdata, ekhtml_string_t *tag, + ekhtml_attr_t *attrs) +{ + printf("GOT WAY START!\n"); +} + +static void handle_starttag(void *cbdata, ekhtml_string_t *tag, + ekhtml_attr_t *attrs) +{ + ekhtml_attr_t *attr; + tester_cbdata *tdata = cbdata; + + assert(tdata->magic_doodie == MAGIC_DOODIE); + tdata->n_starttags++; + if(tdata->only_parse) + return; + + printf("START: \"%.*s\"\n", tag->len, tag->str); + for(attr=attrs; attr; attr=attr->next) { + printf("ATTRIBUTE: \"%.*s\" = ", attr->name.len, attr->name.str); + if(!attr->isBoolean) + printf("\"%.*s\"\n", attr->val.len, attr->val.str); + else + printf("\"%.*s\"\n", attr->name.len, attr->name.str); + } +} + +static void handle_endtag(void *cbdata, ekhtml_string_t *str){ + tester_cbdata *tdata = cbdata; + + assert(tdata->magic_doodie == MAGIC_DOODIE); + tdata->n_endtags++; + if(tdata->only_parse) + return; + + printf("END: \"%.*s\"\n", str->len, str->str); +} + +static void handle_comment(void *cbdata, ekhtml_string_t *str){ + tester_cbdata *tdata = cbdata; + + assert(tdata->magic_doodie == MAGIC_DOODIE); + tdata->n_comments++; + if(tdata->only_parse) + return; + + printf("COMMENT: \"%.*s\"\n", str->len, str->str); +} + +static void handle_data(void *cbdata, ekhtml_string_t *str){ + tester_cbdata *tdata = cbdata; + + assert(tdata->magic_doodie == MAGIC_DOODIE); + tdata->n_data++; + if(tdata->only_parse) + return; + + fwrite(str->str, str->len, 1, stdout); +} + +int main(int argc, char *argv[]){ + tester_cbdata cbdata; + ekhtml_parser_t *ekparser; + char *buf; + size_t nbuf; + int feedsize; + + if(argc < 2){ + fprintf(stderr, "Syntax: %s <feedsize> [1|0 (to print debug)]\n", + argv[0]); + return -1; + } + + feedsize = atoi(argv[1]); + + ekparser = ekhtml_parser_new(NULL); + + cbdata.n_starttags = 0; + cbdata.n_endtags = 0; + cbdata.n_comments = 0; + cbdata.n_data = 0; + cbdata.magic_doodie = MAGIC_DOODIE; + cbdata.only_parse = argc == 3; + + ekhtml_parser_datacb_set(ekparser, handle_data); + ekhtml_parser_commentcb_set(ekparser, handle_comment); + ekhtml_parser_startcb_add(ekparser, "WAY", handle_starttag_way); + ekhtml_parser_startcb_add(ekparser, NULL, handle_starttag); + ekhtml_parser_endcb_add(ekparser, NULL, handle_endtag); + ekhtml_parser_cbdata_set(ekparser, &cbdata); + buf = malloc(feedsize); + + while((nbuf = fread(buf, 1, feedsize, stdin))){ + ekhtml_string_t str; + + str.str = buf; + str.len = nbuf; + ekhtml_parser_feed(ekparser, &str); + ekhtml_parser_flush(ekparser, 0); + } + ekhtml_parser_flush(ekparser, 1); + ekhtml_parser_destroy(ekparser); + free(buf); + + if(argc == 3){ + fprintf(stderr, + "# starttags: %u\n" + "# endtags: %u\n" + "# comments: %u\n" + "# data: %u\n", cbdata.n_starttags, + cbdata.n_endtags, cbdata.n_comments, cbdata.n_data); + } + + return 0; +} diff --git a/ekhtml/testsuite/tstpages/01_stock_cases/comment.suite b/ekhtml/testsuite/tstpages/01_stock_cases/comment.suite new file mode 100644 index 0000000..88ce043 --- /dev/null +++ b/ekhtml/testsuite/tstpages/01_stock_cases/comment.suite @@ -0,0 +1,18 @@ +<!----> +<!-- --> +<!-- -- > +<!-- + +-- + +-- + +> +<!-- A comment string --> +<!-- More comments +-- +> +<!-- More comments +--> +<!-- And even -- more -- comments - from the -- peanut -- gallery --> + diff --git a/ekhtml/testsuite/tstpages/01_stock_cases/endtag.suite b/ekhtml/testsuite/tstpages/01_stock_cases/endtag.suite new file mode 100644 index 0000000..869db71 --- /dev/null +++ b/ekhtml/testsuite/tstpages/01_stock_cases/endtag.suite @@ -0,0 +1,13 @@ +First start out with some properly formed end tags +</F></FO></FOO> + +Now something a little more interesting +</></FOO BAR></FOO BAR > + +Some weird stuff +</ ></ FOO BAR></ FOO BAR > +</FOO + +></ + +> diff --git a/ekhtml/testsuite/tstpages/01_stock_cases/special.suite b/ekhtml/testsuite/tstpages/01_stock_cases/special.suite new file mode 100644 index 0000000..ffd6517 --- /dev/null +++ b/ekhtml/testsuite/tstpages/01_stock_cases/special.suite @@ -0,0 +1,11 @@ +<!> +<!-> +<!- > +<!FOOBAR BAZ QUUX!> +<!FOO +BAR +BAZ +> +<!FOO +BAR +BAZ> diff --git a/ekhtml/testsuite/tstpages/01_stock_cases/starttag.suite b/ekhtml/testsuite/tstpages/01_stock_cases/starttag.suite new file mode 100644 index 0000000..c1a03af --- /dev/null +++ b/ekhtml/testsuite/tstpages/01_stock_cases/starttag.suite @@ -0,0 +1,21 @@ +First, valid tags +<TAG> +<TAG BOOLATTR> +<TAG ATTR1=1> +<TAG ATTR1="1"> +<TAG ATTR1='1'> +<TAG ATTR1='"1'> +<TAG ATTR1='""1'> +<TAG ATTR1='""1"'> +<TAG ATTR1='""1""'> +<TAG ATTR1='"1""'> +<TAG ATTR1='1""'> +<TAG ATTR1='1"'> + +<TAG ATTR1 +ATTR2> +<TAG ATTR1 +ATTR2 +> +<TAG ATTR1="foo" +ATTR2> diff --git a/ekhtml/testsuite/tstpages/01_stock_cases/way.suite b/ekhtml/testsuite/tstpages/01_stock_cases/way.suite new file mode 100644 index 0000000..c33756c --- /dev/null +++ b/ekhtml/testsuite/tstpages/01_stock_cases/way.suite @@ -0,0 +1,4 @@ +<WAY> + +<WAY COOL> +<WAY COOL=bar>
\ No newline at end of file |