From b1509f22892dc98057c750e7fae39ded5cea3b09 Mon Sep 17 00:00:00 2001 From: Kirill Volinsky Date: Sat, 19 May 2012 18:01:32 +0000 Subject: added MirOTR git-svn-id: http://svn.miranda-ng.org/main/trunk@83 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- plugins/MirOTR/ekhtml/testsuite/ek_sgmllib.py | 489 ++++++++++++++++++++++++++ 1 file changed, 489 insertions(+) create mode 100644 plugins/MirOTR/ekhtml/testsuite/ek_sgmllib.py (limited to 'plugins/MirOTR/ekhtml/testsuite/ek_sgmllib.py') diff --git a/plugins/MirOTR/ekhtml/testsuite/ek_sgmllib.py b/plugins/MirOTR/ekhtml/testsuite/ek_sgmllib.py new file mode 100644 index 0000000000..f71496d3ce --- /dev/null +++ b/plugins/MirOTR/ekhtml/testsuite/ek_sgmllib.py @@ -0,0 +1,489 @@ +"""A parser for SGML, using the derived class as a static DTD.""" + +# Stolen from the Python 2.0 distribution and tweaked by JMT + +# XXX This only supports those SGML features used by HTML. + +# XXX There should be a way to distinguish between PCDATA (parsed +# character data -- the normal case), RCDATA (replaceable character +# data -- only char and entity references and end tags are special) +# and CDATA (character data -- only end tags are special). + + +import re +import string + + +# Regular expressions used for parsing + +interesting = re.compile('[&<]') +incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' + '<([a-zA-Z][^<>]*|' + '/([a-zA-Z][^<>]*)?|' + '![^<>]*)?') + +entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*);') +charref = re.compile('&#([0-9]+)[^0-9];') + +starttagopen = re.compile('<[>a-zA-Z]') +shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') +shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') +piopen = re.compile('<\?') +piclose = re.compile('>') +endtagopen = re.compile('a-zA-Z]') +endbracket = re.compile('[<>]') +special = re.compile(']*>') +commentopen = re.compile('