summaryrefslogtreecommitdiff
path: root/spamfilter/Utilities/PCRE
diff options
context:
space:
mode:
Diffstat (limited to 'spamfilter/Utilities/PCRE')
-rw-r--r--spamfilter/Utilities/PCRE/bin/pcre.dllbin0 -> 183313 bytes
-rw-r--r--spamfilter/Utilities/PCRE/bin/pcreposix.dllbin0 -> 175142 bytes
-rw-r--r--spamfilter/Utilities/PCRE/include/pcre.h239
-rw-r--r--spamfilter/Utilities/PCRE/include/pcreposix.h117
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcre-bcc.libbin0 -> 2560 bytes
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcre.def26
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcre.dll.abin0 -> 15942 bytes
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcre.libbin0 -> 6514 bytes
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcreposix-bcc.libbin0 -> 2048 bytes
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcreposix.def16
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcreposix.dll.abin0 -> 9690 bytes
-rw-r--r--spamfilter/Utilities/PCRE/lib/libpcreposix.libbin0 -> 4386 bytes
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre.3.html174
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_compile.3.html67
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_config.3.html56
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_copy_named_substring.3.html48
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_copy_substring.3.html47
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_exec.3.html71
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_free_substring.3.html37
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_free_substring_list.3.html37
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_fullinfo.3.html67
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_get_named_substring.3.html48
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_get_stringnumber.3.html42
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_get_substring.3.html47
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_get_substring_list.3.html47
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_info.3.html35
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_maketables.3.html39
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_study.3.html49
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcre_version.3.html36
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcreapi.3.html1069
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcrebuild.3.html167
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcrecallout.3.html148
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcrecompat.3.html115
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcregrep.1.html147
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcrepattern.3.html1268
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcreperform.3.html86
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcreposix.3.html187
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcresample.3.html72
-rw-r--r--spamfilter/Utilities/PCRE/man/html/pcretest.1.html433
-rw-r--r--spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.mft58
-rw-r--r--spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.ver2
-rw-r--r--spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.mft12
-rw-r--r--spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.ver2
-rw-r--r--spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.373
-rw-r--r--spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.c191
-rw-r--r--spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.h35
46 files changed, 5410 insertions, 0 deletions
diff --git a/spamfilter/Utilities/PCRE/bin/pcre.dll b/spamfilter/Utilities/PCRE/bin/pcre.dll
new file mode 100644
index 0000000..54f5ae7
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/bin/pcre.dll
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/bin/pcreposix.dll b/spamfilter/Utilities/PCRE/bin/pcreposix.dll
new file mode 100644
index 0000000..06881cf
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/bin/pcreposix.dll
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/include/pcre.h b/spamfilter/Utilities/PCRE/include/pcre.h
new file mode 100644
index 0000000..aa37389
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/include/pcre.h
@@ -0,0 +1,239 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* In its original form, this is the .in file that is transformed by
+"configure" into pcre.h.
+
+ Copyright (c) 1997-2004 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef _PCRE_H
+#define _PCRE_H
+
+/* The file pcre.h is build by "configure". Do not edit it; instead
+make changes to pcre.in. */
+
+#define PCRE_MAJOR 5
+#define PCRE_MINOR 0
+#define PCRE_DATE 13-Sep-2004
+
+/* Win32 uses DLL by default */
+
+#ifdef _WIN32
+# ifdef PCRE_DEFINITION
+# ifdef DLL_EXPORT
+# define PCRE_DATA_SCOPE __declspec(dllexport)
+# endif
+# else
+# ifndef PCRE_STATIC
+# define PCRE_DATA_SCOPE extern __declspec(dllimport)
+# endif
+# endif
+#endif
+#ifndef PCRE_DATA_SCOPE
+# define PCRE_DATA_SCOPE extern
+#endif
+
+/* Have to include stdlib.h in order to ensure that size_t is defined;
+it is needed here for malloc. */
+
+#include <stdlib.h>
+
+/* Allow for C++ users */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Options */
+
+#define PCRE_CASELESS 0x0001
+#define PCRE_MULTILINE 0x0002
+#define PCRE_DOTALL 0x0004
+#define PCRE_EXTENDED 0x0008
+#define PCRE_ANCHORED 0x0010
+#define PCRE_DOLLAR_ENDONLY 0x0020
+#define PCRE_EXTRA 0x0040
+#define PCRE_NOTBOL 0x0080
+#define PCRE_NOTEOL 0x0100
+#define PCRE_UNGREEDY 0x0200
+#define PCRE_NOTEMPTY 0x0400
+#define PCRE_UTF8 0x0800
+#define PCRE_NO_AUTO_CAPTURE 0x1000
+#define PCRE_NO_UTF8_CHECK 0x2000
+#define PCRE_AUTO_CALLOUT 0x4000
+#define PCRE_PARTIAL 0x8000
+
+/* Exec-time and get/set-time error codes */
+
+#define PCRE_ERROR_NOMATCH (-1)
+#define PCRE_ERROR_NULL (-2)
+#define PCRE_ERROR_BADOPTION (-3)
+#define PCRE_ERROR_BADMAGIC (-4)
+#define PCRE_ERROR_UNKNOWN_NODE (-5)
+#define PCRE_ERROR_NOMEMORY (-6)
+#define PCRE_ERROR_NOSUBSTRING (-7)
+#define PCRE_ERROR_MATCHLIMIT (-8)
+#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
+#define PCRE_ERROR_BADUTF8 (-10)
+#define PCRE_ERROR_BADUTF8_OFFSET (-11)
+#define PCRE_ERROR_PARTIAL (-12)
+#define PCRE_ERROR_BADPARTIAL (-13)
+#define PCRE_ERROR_INTERNAL (-14)
+#define PCRE_ERROR_BADCOUNT (-15)
+
+/* Request types for pcre_fullinfo() */
+
+#define PCRE_INFO_OPTIONS 0
+#define PCRE_INFO_SIZE 1
+#define PCRE_INFO_CAPTURECOUNT 2
+#define PCRE_INFO_BACKREFMAX 3
+#define PCRE_INFO_FIRSTBYTE 4
+#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
+#define PCRE_INFO_FIRSTTABLE 5
+#define PCRE_INFO_LASTLITERAL 6
+#define PCRE_INFO_NAMEENTRYSIZE 7
+#define PCRE_INFO_NAMECOUNT 8
+#define PCRE_INFO_NAMETABLE 9
+#define PCRE_INFO_STUDYSIZE 10
+#define PCRE_INFO_DEFAULT_TABLES 11
+
+/* Request types for pcre_config() */
+
+#define PCRE_CONFIG_UTF8 0
+#define PCRE_CONFIG_NEWLINE 1
+#define PCRE_CONFIG_LINK_SIZE 2
+#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
+#define PCRE_CONFIG_MATCH_LIMIT 4
+#define PCRE_CONFIG_STACKRECURSE 5
+#define PCRE_CONFIG_UNICODE_PROPERTIES 6
+
+/* Bit flags for the pcre_extra structure */
+
+#define PCRE_EXTRA_STUDY_DATA 0x0001
+#define PCRE_EXTRA_MATCH_LIMIT 0x0002
+#define PCRE_EXTRA_CALLOUT_DATA 0x0004
+#define PCRE_EXTRA_TABLES 0x0008
+
+/* Types */
+
+struct real_pcre; /* declaration; the definition is private */
+typedef struct real_pcre pcre;
+
+/* The structure for passing additional data to pcre_exec(). This is defined in
+such as way as to be extensible. Always add new fields at the end, in order to
+remain compatible. */
+
+typedef struct pcre_extra {
+ unsigned long int flags; /* Bits for which fields are set */
+ void *study_data; /* Opaque data from pcre_study() */
+ unsigned long int match_limit; /* Maximum number of calls to match() */
+ void *callout_data; /* Data passed back in callouts */
+ const unsigned char *tables; /* Pointer to character tables */
+} pcre_extra;
+
+/* The structure for passing out data via the pcre_callout_function. We use a
+structure so that new fields can be added on the end in future versions,
+without changing the API of the function, thereby allowing old clients to work
+without modification. */
+
+typedef struct pcre_callout_block {
+ int version; /* Identifies version of block */
+ /* ------------------------ Version 0 ------------------------------- */
+ int callout_number; /* Number compiled into pattern */
+ int *offset_vector; /* The offset vector */
+ const char *subject; /* The subject being matched */
+ int subject_length; /* The length of the subject */
+ int start_match; /* Offset to start of this match attempt */
+ int current_position; /* Where we currently are in the subject */
+ int capture_top; /* Max current capture */
+ int capture_last; /* Most recently closed capture */
+ void *callout_data; /* Data passed in with the call */
+ /* ------------------- Added for Version 1 -------------------------- */
+ int pattern_position; /* Offset to next item in the pattern */
+ int next_item_length; /* Length of next item in the pattern */
+ /* ------------------------------------------------------------------ */
+} pcre_callout_block;
+
+/* Indirection for store get and free functions. These can be set to
+alternative malloc/free functions if required. Special ones are used in the
+non-recursive case for "frames". There is also an optional callout function
+that is triggered by the (?) regex item. Some magic is required for Win32 DLL;
+it is null on other OS. For Virtual Pascal, these have to be different again.
+*/
+
+#ifndef VPCOMPAT
+PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
+PCRE_DATA_SCOPE void (*pcre_free)(void *);
+PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
+PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
+PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
+#else /* VPCOMPAT */
+extern void *pcre_malloc(size_t);
+extern void pcre_free(void *);
+extern void *pcre_stack_malloc(size_t);
+extern void pcre_stack_free(void *);
+extern int pcre_callout(pcre_callout_block *);
+#endif /* VPCOMPAT */
+
+/* Exported PCRE functions */
+
+extern pcre *pcre_compile(const char *, int, const char **,
+ int *, const unsigned char *);
+extern int pcre_config(int, void *);
+extern int pcre_copy_named_substring(const pcre *, const char *,
+ int *, int, const char *, char *, int);
+extern int pcre_copy_substring(const char *, int *, int, int,
+ char *, int);
+extern int pcre_exec(const pcre *, const pcre_extra *,
+ const char *, int, int, int, int *, int);
+extern void pcre_free_substring(const char *);
+extern void pcre_free_substring_list(const char **);
+extern int pcre_fullinfo(const pcre *, const pcre_extra *, int,
+ void *);
+extern int pcre_get_named_substring(const pcre *, const char *,
+ int *, int, const char *, const char **);
+extern int pcre_get_stringnumber(const pcre *, const char *);
+extern int pcre_get_substring(const char *, int *, int, int,
+ const char **);
+extern int pcre_get_substring_list(const char *, int *, int,
+ const char ***);
+extern int pcre_info(const pcre *, int *, int *);
+extern const unsigned char *pcre_maketables(void);
+extern pcre_extra *pcre_study(const pcre *, int, const char **);
+extern const char *pcre_version(void);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* End of pcre.h */
diff --git a/spamfilter/Utilities/PCRE/include/pcreposix.h b/spamfilter/Utilities/PCRE/include/pcreposix.h
new file mode 100644
index 0000000..a8056bd
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/include/pcreposix.h
@@ -0,0 +1,117 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+#ifndef _PCREPOSIX_H
+#define _PCREPOSIX_H
+
+/* This is the header for the POSIX wrapper interface to the PCRE Perl-
+Compatible Regular Expression library. It defines the things POSIX says should
+be there. I hope.
+
+ Copyright (c) 1997-2004 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Have to include stdlib.h in order to ensure that size_t is defined. */
+
+#include <stdlib.h>
+
+/* Allow for C++ users */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Options defined by POSIX. */
+
+#define REG_ICASE 0x01
+#define REG_NEWLINE 0x02
+#define REG_NOTBOL 0x04
+#define REG_NOTEOL 0x08
+
+/* These are not used by PCRE, but by defining them we make it easier
+to slot PCRE into existing programs that make POSIX calls. */
+
+#define REG_EXTENDED 0
+#define REG_NOSUB 0
+
+/* Error values. Not all these are relevant or used by the wrapper. */
+
+enum {
+ REG_ASSERT = 1, /* internal error ? */
+ REG_BADBR, /* invalid repeat counts in {} */
+ REG_BADPAT, /* pattern error */
+ REG_BADRPT, /* ? * + invalid */
+ REG_EBRACE, /* unbalanced {} */
+ REG_EBRACK, /* unbalanced [] */
+ REG_ECOLLATE, /* collation error - not relevant */
+ REG_ECTYPE, /* bad class */
+ REG_EESCAPE, /* bad escape sequence */
+ REG_EMPTY, /* empty expression */
+ REG_EPAREN, /* unbalanced () */
+ REG_ERANGE, /* bad range inside [] */
+ REG_ESIZE, /* expression too big */
+ REG_ESPACE, /* failed to get memory */
+ REG_ESUBREG, /* bad back reference */
+ REG_INVARG, /* bad argument */
+ REG_NOMATCH /* match failed */
+};
+
+
+/* The structure representing a compiled regular expression. */
+
+typedef struct {
+ void *re_pcre;
+ size_t re_nsub;
+ size_t re_erroffset;
+} regex_t;
+
+/* The structure in which a captured offset is returned. */
+
+typedef int regoff_t;
+
+typedef struct {
+ regoff_t rm_so;
+ regoff_t rm_eo;
+} regmatch_t;
+
+/* The functions */
+
+extern int regcomp(regex_t *, const char *, int);
+extern int regexec(const regex_t *, const char *, size_t, regmatch_t *, int);
+extern size_t regerror(int, const regex_t *, char *, size_t);
+extern void regfree(regex_t *);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* End of pcreposix.h */
diff --git a/spamfilter/Utilities/PCRE/lib/libpcre-bcc.lib b/spamfilter/Utilities/PCRE/lib/libpcre-bcc.lib
new file mode 100644
index 0000000..706c7af
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcre-bcc.lib
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/lib/libpcre.def b/spamfilter/Utilities/PCRE/lib/libpcre.def
new file mode 100644
index 0000000..a7bdcbf
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcre.def
@@ -0,0 +1,26 @@
+EXPORTS
+ pcre_callout @1 DATA
+ pcre_compile @2
+ pcre_config @3
+ pcre_copy_named_substring @4
+ pcre_copy_substring @5
+ pcre_exec @6
+ pcre_free @7 DATA
+ pcre_free_substring @8
+ pcre_free_substring_list @9
+ pcre_fullinfo @10
+ pcre_get_named_substring @11
+ pcre_get_stringnumber @12
+ pcre_get_substring @13
+ pcre_get_substring_list @14
+ pcre_info @15
+ pcre_maketables @16
+ pcre_malloc @17 DATA
+ pcre_stack_free @18 DATA
+ pcre_stack_malloc @19 DATA
+ pcre_study @20
+ pcre_version @21
+ regcomp @22
+ regerror @23
+ regexec @24
+ regfree @25
diff --git a/spamfilter/Utilities/PCRE/lib/libpcre.dll.a b/spamfilter/Utilities/PCRE/lib/libpcre.dll.a
new file mode 100644
index 0000000..2191488
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcre.dll.a
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/lib/libpcre.lib b/spamfilter/Utilities/PCRE/lib/libpcre.lib
new file mode 100644
index 0000000..71c1835
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcre.lib
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/lib/libpcreposix-bcc.lib b/spamfilter/Utilities/PCRE/lib/libpcreposix-bcc.lib
new file mode 100644
index 0000000..b532b5b
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcreposix-bcc.lib
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/lib/libpcreposix.def b/spamfilter/Utilities/PCRE/lib/libpcreposix.def
new file mode 100644
index 0000000..8dca4c8
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcreposix.def
@@ -0,0 +1,16 @@
+EXPORTS
+ pcre_callout @1 DATA
+ pcre_compile @2
+ pcre_config @3
+ pcre_exec @4
+ pcre_free @5 DATA
+ pcre_fullinfo @6
+ pcre_info @7
+ pcre_malloc @8 DATA
+ pcre_stack_free @9 DATA
+ pcre_stack_malloc @10 DATA
+ pcre_version @11
+ regcomp @12
+ regerror @13
+ regexec @14
+ regfree @15
diff --git a/spamfilter/Utilities/PCRE/lib/libpcreposix.dll.a b/spamfilter/Utilities/PCRE/lib/libpcreposix.dll.a
new file mode 100644
index 0000000..1208b41
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcreposix.dll.a
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/lib/libpcreposix.lib b/spamfilter/Utilities/PCRE/lib/libpcreposix.lib
new file mode 100644
index 0000000..a4bfe43
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/lib/libpcreposix.lib
Binary files differ
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre.3.html b/spamfilter/Utilities/PCRE/man/html/pcre.3.html
new file mode 100644
index 0000000..93f32fa
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre.3.html
@@ -0,0 +1,174 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Introduction</a></h2>
+ <p>
+The PCRE library
+is a set of functions that implement regular expression pattern matching
+using the same syntax and semantics as Perl, with just a few differences.
+The current implementation of PCRE (release 5.x) corresponds approximately
+with Perl 5.8, including support for UTF-8 encoded strings and Unicode general
+category properties. However, this support has to be explicitly enabled;
+it is not the default. <p>
+PCRE is written in C and released as a C library.
+A number of people have written wrappers and interfaces of various kinds.
+A C++ class is included in these contributions, which can be found in the
+<i>Contrib</i> directory at the primary FTP site, which is: <p>
+ ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
+<p>
+Details of exactly which Perl regular expression features are and are not
+supported by PCRE are given in separate documents. See the <b>pcrepattern</b>
+ and <b>pcrecompat</b> pages. <p>
+Some features of PCRE can be included, excluded,
+or changed when the library is built. The <b>pcre_config()</b> function makes
+it possible for a client to discover which features are available. The features
+themselves are described in the <b>pcrebuild</b> page. Documentation about building
+PCRE for various operating systems can be found in the <b>README</b> file in the
+source distribution.
+<h2><a name='sect2' href='#toc2'>User Documentation</a></h2>
+ <p>
+The user documentation for PCRE
+comprises a number of different sections. In the "man" format, each of these
+is a separate "man page". In the HTML format, each is a separate page, linked
+from the index page. In the plain text format, all the sections are concatenated,
+for ease of searching. The sections are as follows: <p>
+ pcre
+this document<br>
+ pcreapi details of PCRE&rsquo;s native API<br>
+ pcrebuild options for building PCRE<br>
+ pcrecallout details of the callout feature<br>
+ pcrecompat discussion of Perl compatibility<br>
+ pcregrep description of the <b>pcregrep</b> command<br>
+ pcrepartial details of the partial matching facility<br>
+ pcrepattern syntax and semantics of supported<br>
+ regular expressions<br>
+ pcreperform discussion of performance issues<br>
+ pcreposix the POSIX-compatible API<br>
+ pcreprecompile details of saving and re-using precompiled patterns<br>
+ pcresample discussion of the sample program<br>
+ pcretest description of the <b>pcretest</b> testing command<br>
+ <p>
+In addition, in the "man" and HTML formats, there is a short page for
+each library function, listing its arguments and results.
+<h2><a name='sect3' href='#toc3'>Limitations</a></h2>
+
+<p>
+There are some size limitations in PCRE but it is hoped that they will
+never in practice be relevant. <p>
+The maximum length of a compiled pattern
+is 65539 (sic) bytes if PCRE is compiled with the default internal linkage
+size of 2. If you want to process regular expressions that are truly enormous,
+you can compile PCRE with an internal linkage size of 3 or 4 (see the <b>README</b>
+file in the source distribution and the <b>pcrebuild</b> documentation for details).
+In these cases the limit is substantially larger. However, the speed of
+execution will be slower. <p>
+All values in repeating quantifiers must be less
+than 65536. The maximum number of capturing subpatterns is 65535. <p>
+There is
+no limit to the number of non-capturing subpatterns, but the maximum depth
+of nesting of all kinds of parenthesized subpattern, including capturing
+subpatterns, assertions, and other types of subpattern, is 200. <p>
+The maximum
+length of a subject string is the largest positive number that an integer
+variable can hold. However, PCRE uses recursion to handle subpatterns and
+indefinite repetition. This means that the available stack space may limit
+the size of a subject string that can be processed by certain patterns.
+<p>
+
+<h2><a name='sect4' href='#toc4'>Utf-8 and Unicode Property Support</a></h2>
+ <p>
+From release 3.3, PCRE has had some
+support for character strings encoded in the UTF-8 format. For release 4.0
+this was greatly extended to cover most common requirements, and in release
+5.0 additional support for Unicode general category properties was added.
+<p>
+In order process UTF-8 strings, you must build PCRE to include UTF-8 support
+in the code, and, in addition, you must call <b>pcre_compile()</b> with the
+PCRE_UTF8 option flag. When you do this, both the pattern and any subject
+strings that are matched against it are treated as UTF-8 strings instead
+of just strings of bytes. <p>
+If you compile PCRE with UTF-8 support, but do
+not use it at run time, the library will be a bit bigger, but the additional
+run time overhead is limited to testing the PCRE_UTF8 flag in several places,
+so should not be very large. <p>
+If PCRE is built with Unicode character property
+support (which implies UTF-8 support), the escape sequences \p{..}, \P{..}, and
+\X are supported. The available properties that can be tested are limited
+to the general category properties such as Lu for an upper case letter
+or Nd for a decimal number. A full list is given in the <b>pcrepattern</b> documentation.
+The PCRE library is increased in size by about 90K when Unicode property
+support is included. <p>
+The following comments apply when PCRE is running in
+UTF-8 mode: <p>
+1. When you set the PCRE_UTF8 flag, the strings passed as patterns
+and subjects are checked for validity on entry to the relevant functions.
+If an invalid UTF-8 string is passed, an error return is given. In some situations,
+you may already know that your strings are valid, and therefore want to
+skip these checks in order to improve performance. If you set the PCRE_NO_UTF8_CHECK
+flag at compile time or at run time, PCRE assumes that the pattern or subject
+it is given (respectively) contains only valid UTF-8 codes. In this case,
+it does not diagnose an invalid UTF-8 string. If you pass an invalid UTF-8
+string to PCRE when PCRE_NO_UTF8_CHECK is set, the results are undefined.
+Your program may crash. <p>
+2. In a pattern, the escape sequence \x{...}, where the
+contents of the braces is a string of hexadecimal digits, is interpreted
+as a UTF-8 character whose code number is the given hexadecimal number,
+for example: \x{1234}. If a non-hexadecimal digit appears between the braces,
+the item is not recognized. This escape sequence can be used either as a
+literal, or within a character class. <p>
+3. The original hexadecimal escape
+sequence, \xhh, matches a two-byte UTF-8 character if the value is greater
+than 127. <p>
+4. Repeat quantifiers apply to complete UTF-8 characters, not to
+individual bytes, for example: \x{100}{3}. <p>
+5. The dot metacharacter matches
+one UTF-8 character instead of a single byte. <p>
+6. The escape sequence \C can
+be used to match a single byte in UTF-8 mode, but its use can lead to some
+strange effects. <p>
+7. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
+test characters of any code value, but the characters that PCRE recognizes
+as digits, spaces, or word characters remain the same set as before, all
+with values less than 256. This remains true even when PCRE includes Unicode
+property support, because to do otherwise would slow down PCRE in many
+common cases. If you really want to test for a wider sense of, say, "digit",
+you must use Unicode property tests such as \p{Nd}. <p>
+8. Similarly, characters
+that match the POSIX named character classes are all low-valued characters.
+<p>
+9. Case-insensitive matching applies only to characters whose values are
+less than 128, unless PCRE is built with Unicode property support. Even
+when Unicode property support is available, PCRE still uses its own character
+tables when checking the case of low-valued characters, so as not to degrade
+performance. The Unicode property information is used only for characters
+with higher values.
+<h2><a name='sect5' href='#toc5'>Author</a></h2>
+ <p>
+Philip Hazel &lt;ph10@cam.ac.uk&gt; <br>
+University Computing Service, <br>
+Cambridge CB2 3QG, England. <br>
+Phone: +44 1223 334714 <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Introduction</a></li>
+<li><a name='toc2' href='#sect2'>User Documentation</a></li>
+<li><a name='toc3' href='#sect3'>Limitations</a></li>
+<li><a name='toc4' href='#sect4'>Utf-8 and Unicode Property Support</a></li>
+<li><a name='toc5' href='#sect5'>Author</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_compile.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_compile.3.html
new file mode 100644
index 0000000..bd9272c
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_compile.3.html
@@ -0,0 +1,67 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b> <b>const char **<i>errptr</i>,
+int *<i>erroffset</i>,</b> <b>const unsigned char *<i>tableptr</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function
+compiles a regular expression into an internal form. Its arguments are:
+<p>
+ <i>pattern</i> A zero-terminated string containing the<br>
+ regular expression to be compiled<br>
+ <i>options</i> Zero or more option bits<br>
+ <i>errptr</i> Where to put an error message<br>
+ <i>erroffset</i> Offset in pattern where error was found<br>
+ <i>tableptr</i> Pointer to character tables, or NULL to<br>
+ use the built-in default<br>
+ <p>
+The option bits are: <p>
+ PCRE_ANCHORED Force pattern anchoring<br>
+ PCRE_AUTO_CALLOUT Compile automatic callouts<br>
+ PCRE_CASELESS Do caseless matching<br>
+ PCRE_DOLLAR_ENDONLY $ not to match newline at end<br>
+ PCRE_DOTALL . matches anything including NL<br>
+ PCRE_EXTENDED Ignore whitespace and # comments<br>
+ PCRE_EXTRA PCRE extra features<br>
+ (not much use currently)<br>
+ PCRE_MULTILINE ^ and $ match newlines within data<br>
+ PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-<br>
+ theses (named ones available)<br>
+ PCRE_UNGREEDY Invert greediness of quantifiers<br>
+ PCRE_UTF8 Run in UTF-8 mode<br>
+ PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8<br>
+ validity (only relevant if<br>
+ PCRE_UTF8 is set)<br>
+ <p>
+PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and PCRE_NO_UTF8_CHECK.
+<p>
+The yield of the function is a pointer to a private data structure that
+contains the compiled pattern, or NULL if an error was detected. <p>
+There is
+a complete description of the PCRE native API in the <b>pcreapi</b> page and
+a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_config.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_config.3.html
new file mode 100644
index 0000000..edf2450
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_config.3.html
@@ -0,0 +1,56 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function makes
+it possible for a client program to find out which optional features are
+available in the version of the PCRE library it is using. Its arguments
+are as follows: <p>
+ <i>what</i> A code specifying what information is required<br>
+ <i>where</i> Points to where to put the data<br>
+ <p>
+The available codes are: <p>
+ PCRE_CONFIG_LINK_SIZE Internal link size:
+2, 3, or 4<br>
+ PCRE_CONFIG_MATCH_LIMIT Internal resource limit<br>
+ PCRE_CONFIG_NEWLINE Value of the newline character<br>
+ PCRE_CONFIG_POSIX_MALLOC_THRESHOLD<br>
+ Threshold of return slots, above<br>
+ which <b>malloc()</b> is used by<br>
+ the POSIX API<br>
+ PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)<br>
+ PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no)<br>
+ PCRE_CONFIG_UNICODE_PROPERTIES<br>
+ Availability of Unicode property support<br>
+ (1=yes 0=no)<br>
+ <p>
+The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. <p>
+There
+is a complete description of the PCRE native API in the <b>pcreapi</b> page
+and a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_copy_named_substring.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_copy_named_substring.3.html
new file mode 100644
index 0000000..09c341f
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_copy_named_substring.3.html
@@ -0,0 +1,48 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>char *<i>buffer</i>, int
+<i>buffersize</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is a convenience function for extracting
+a captured substring, identified by name, into a given buffer. The arguments
+are: <p>
+ <i>code</i> Pattern that was successfully matched<br>
+ <i>subject</i> Subject that has been successfully matched<br>
+ <i>ovector</i> Offset vector that <b>pcre_exec()</b> used<br>
+ <i>stringcount</i> Value returned by <b>pcre_exec()</b><br>
+ <i>stringname</i> Name of the required substring<br>
+ <i>buffer</i> Buffer to receive the string<br>
+ <i>buffersize</i> Size of buffer<br>
+ <p>
+The yield is the length of the substring, PCRE_ERROR_NOMEMORY if the buffer
+was too small, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
+<p>
+There is a complete description of the PCRE native API in the <b>pcreapi</b>
+ page and a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_copy_substring.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_copy_substring.3.html
new file mode 100644
index 0000000..c706691
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_copy_substring.3.html
@@ -0,0 +1,47 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>,
+int <i>stringnumber</i>, char *<i>buffer</i>,</b> <b>int <i>buffersize</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is
+a convenience function for extracting a captured substring into a given
+buffer. The arguments are: <p>
+ <i>subject</i> Subject that has been successfully
+matched<br>
+ <i>ovector</i> Offset vector that <b>pcre_exec()</b> used<br>
+ <i>stringcount</i> Value returned by <b>pcre_exec()</b><br>
+ <i>stringnumber</i> Number of the required substring<br>
+ <i>buffer</i> Buffer to receive the string<br>
+ <i>buffersize</i> Size of buffer<br>
+ <p>
+The yield is the legnth of the string, PCRE_ERROR_NOMEMORY if the buffer
+was too small, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
+<p>
+There is a complete description of the PCRE native API in the <b>pcreapi</b>
+ page and a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_exec.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_exec.3.html
new file mode 100644
index 0000000..c985429
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_exec.3.html
@@ -0,0 +1,71 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_exec(const pcre *<i>code</i>, "const pcre_extra *<i>extra</i>,"</b> <b>const char
+*<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b> <b>int <i>options</i>, int *<i>ovector</i>, int
+<i>ovecsize</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function matches a compiled regular expression
+against a given subject string, and returns offsets to capturing subexpressions.
+Its arguments are: <p>
+ <i>code</i> Points to the compiled pattern<br>
+ <i>extra</i> Points to an associated <b>pcre_extra</b> structure,<br>
+ or is NULL<br>
+ <i>subject</i> Points to the subject string<br>
+ <i>length</i> Length of the subject string, in bytes<br>
+ <i>startoffset</i> Offset in bytes in the subject at which to<br>
+ start matching<br>
+ <i>options</i> Option bits<br>
+ <i>ovector</i> Points to a vector of ints for result offsets<br>
+ <i>ovecsize</i> Number of elements in the vector (a multiple of 3)<br>
+ <p>
+The options are: <p>
+ PCRE_ANCHORED Match only at the first position<br>
+ PCRE_NOTBOL Subject is not the beginning of a line<br>
+ PCRE_NOTEOL Subject is not the end of a line<br>
+ PCRE_NOTEMPTY An empty string is not a valid match<br>
+ PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8<br>
+ validity (only relevant if PCRE_UTF8<br>
+ was set at compile time)<br>
+ PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match<br>
+ <p>
+There are restrictions on what may appear in a pattern when partial matching
+is requested. <p>
+A <b>pcre_extra</b> structure contains the following fields: <p>
+ <i>flags</i>
+ Bits indicating which fields are set<br>
+ <i>study_data</i> Opaque data from <b>pcre_study()</b><br>
+ <i>match_limit</i> Limit on internal recursion<br>
+ <i>callout_data</i> Opaque data passed back to callouts<br>
+ <i>tables</i> Points to character tables or is NULL<br>
+ <p>
+The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT, PCRE_EXTRA_CALLOUT_DATA,
+and PCRE_EXTRA_TABLES. <p>
+There is a complete description of the PCRE native
+API in the <b>pcreapi</b> page and a description of the POSIX API in the <b>pcreposix</b>
+ page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_free_substring.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_free_substring.3.html
new file mode 100644
index 0000000..60a97b6
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_free_substring.3.html
@@ -0,0 +1,37 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is a
+convenience function for freeing the store obtained by a previous call
+to <b>pcre_get_substring()</b> or <b>pcre_get_named_substring()</b>. Its only argument
+is a pointer to the string. <p>
+There is a complete description of the PCRE
+native API in the <b>pcreapi</b> page and a description of the POSIX API in
+the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_free_substring_list.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_free_substring_list.3.html
new file mode 100644
index 0000000..a66f3cc
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_free_substring_list.3.html
@@ -0,0 +1,37 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This
+is a convenience function for freeing the store obtained by a previous
+call to <b>pcre_get_substring_list()</b>. Its only argument is a pointer to the
+list of string pointers. <p>
+There is a complete description of the PCRE native
+API in the <b>pcreapi</b> page and a description of the POSIX API in the <b>pcreposix</b>
+ page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_fullinfo.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_fullinfo.3.html
new file mode 100644
index 0000000..30392fc
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_fullinfo.3.html
@@ -0,0 +1,67 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_fullinfo(const pcre *<i>code</i>, "const pcre_extra *<i>extra</i>,"</b> <b>int <i>what</i>,
+void *<i>where</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function returns information about a compiled
+pattern. Its arguments are: <p>
+ <i>code</i> Compiled regular
+expression<br>
+ <i>extra</i> Result of <b>pcre_study()</b> or NULL<br>
+ <i>what</i> What information is required<br>
+ <i>where</i> Where to put the information<br>
+ <p>
+The following information is available: <p>
+ PCRE_INFO_BACKREFMAX Number
+of highest back reference<br>
+ PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns<br>
+ PCRE_INFO_DEFAULT_TABLES Pointer to default tables<br>
+ PCRE_INFO_FIRSTBYTE Fixed first byte for a match, or<br>
+ -1 for start of string<br>
+ or after newline, or<br>
+ -2 otherwise<br>
+ PCRE_INFO_FIRSTTABLE Table of first bytes<br>
+ (after studying)<br>
+ PCRE_INFO_LASTLITERAL Literal last byte required<br>
+ PCRE_INFO_NAMECOUNT Number of named subpatterns<br>
+ PCRE_INFO_NAMEENTRYSIZE Size of name table entry<br>
+ PCRE_INFO_NAMETABLE Pointer to name table<br>
+ PCRE_INFO_OPTIONS Options used for compilation<br>
+ PCRE_INFO_SIZE Size of compiled pattern<br>
+ PCRE_INFO_STUDYSIZE Size of study data<br>
+ <p>
+The yield of the function is zero on success or: <p>
+ PCRE_ERROR_NULL
+ the argument <i>code</i> was NULL<br>
+ the argument <i>where</i> was NULL<br>
+ PCRE_ERROR_BADMAGIC the "magic number" was not found<br>
+ PCRE_ERROR_BADOPTION the value of <i>what</i> was invalid<br>
+ <p>
+There is a complete description of the PCRE native API in the <b>pcreapi</b>
+ page and a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_get_named_substring.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_get_named_substring.3.html
new file mode 100644
index 0000000..14f1c49
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_get_named_substring.3.html
@@ -0,0 +1,48 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>const char **<i>stringptr</i>);</b>
+
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is a convenience function for extracting a captured substring
+by name. The arguments are: <p>
+ <i>code</i> Compiled pattern<br>
+ <i>subject</i> Subject that has been successfully matched<br>
+ <i>ovector</i> Offset vector that <b>pcre_exec()</b> used<br>
+ <i>stringcount</i> Value returned by <b>pcre_exec()</b><br>
+ <i>stringname</i> Name of the required substring<br>
+ <i>stringptr</i> Where to put the string pointer<br>
+ <p>
+The memory in which the substring is placed is obtained by calling <b>pcre_malloc()</b>.
+The yield of the function is the length of the extracted substring, PCRE_ERROR_NOMEMORY
+if sufficient memory could not be obtained, or PCRE_ERROR_NOSUBSTRING if
+the string name is invalid. <p>
+There is a complete description of the PCRE
+native API in the <b>pcreapi</b> page and a description of the POSIX API in
+the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_get_stringnumber.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_get_stringnumber.3.html
new file mode 100644
index 0000000..1ca280c
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_get_stringnumber.3.html
@@ -0,0 +1,42 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b> <b>const char *<i>name</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+
+<p>
+This convenience function finds the number of a named substring capturing
+parenthesis in a compiled pattern. Its arguments are: <p>
+ <i>code</i> Compiled
+regular expression<br>
+ <i>name</i> Name whose number is required<br>
+ <p>
+The yield of the function is the number of the parenthesis if the name
+is found, or PCRE_ERROR_NOSUBSTRING otherwise. <p>
+There is a complete description
+of the PCRE native API in the <b>pcreapi</b> page and a description of the POSIX
+API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_get_substring.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_get_substring.3.html
new file mode 100644
index 0000000..566a393
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_get_substring.3.html
@@ -0,0 +1,47 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>,
+int <i>stringnumber</i>,</b> <b>const char **<i>stringptr</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is a convenience
+function for extracting a captured substring. The arguments are: <p>
+ <i>subject</i>
+ Subject that has been successfully matched<br>
+ <i>ovector</i> Offset vector that <b>pcre_exec()</b> used<br>
+ <i>stringcount</i> Value returned by <b>pcre_exec()</b><br>
+ <i>stringnumber</i> Number of the required substring<br>
+ <i>stringptr</i> Where to put the string pointer<br>
+ <p>
+The memory in which the substring is placed is obtained by calling <b>pcre_malloc()</b>.
+The yield of the function is the length of the substring, PCRE_ERROR_NOMEMORY
+if sufficient memory could not be obtained, or PCRE_ERROR_NOSUBSTRING if
+the string number is invalid. <p>
+There is a complete description of the PCRE
+native API in the <b>pcreapi</b> page and a description of the POSIX API in
+the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_get_substring_list.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_get_substring_list.3.html
new file mode 100644
index 0000000..f4e5931
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_get_substring_list.3.html
@@ -0,0 +1,47 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_get_substring_list(const char *<i>subject</i>,</b> <b>int *<i>ovector</i>, int <i>stringcount</i>,
+"const char ***<i>listptr</i>);"</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This is a convenience function for
+extracting a list of all the captured substrings. The arguments are: <p>
+ <i>subject</i>
+ Subject that has been successfully matched<br>
+ <i>ovector</i> Offset vector that <b>pcre_exec</b> used<br>
+ <i>stringcount</i> Value returned by <b>pcre_exec</b><br>
+ <i>listptr</i> Where to put a pointer to the list<br>
+ <p>
+The memory in which the substrings and the list are placed is obtained
+by calling <b>pcre_malloc()</b>. A pointer to a list of pointers is put in the
+variable whose address is in <i>listptr</i>. The list is terminated by a NULL pointer.
+The yield of the function is zero on success or PCRE_ERROR_NOMEMORY if
+sufficient memory could not be obtained. <p>
+There is a complete description
+of the PCRE native API in the <b>pcreapi</b> page and a description of the POSIX
+API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_info.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_info.3.html
new file mode 100644
index 0000000..caf66db
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_info.3.html
@@ -0,0 +1,35 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b> <b>*<i>firstcharptr</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+
+<p>
+This function is obsolete. You should be using <b>pcre_fullinfo()</b> instead. <p>
+There
+is a complete description of the PCRE native API in the <b>pcreapi</b> page
+and a description of the POSIX API in the <b>pcreposix</b> page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_maketables.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_maketables.3.html
new file mode 100644
index 0000000..7a73848
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_maketables.3.html
@@ -0,0 +1,39 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>const unsigned char *pcre_maketables(void);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function
+builds a set of character tables for character values less than 256. These
+can be passed to <b>pcre_compile()</b> to override PCRE&rsquo;s internal, built-in tables
+(which were made by <b>pcre_maketables()</b> when PCRE was compiled). You might
+want to do this if you are using a non-standard locale. The function yields
+a pointer to the tables. <p>
+There is a complete description of the PCRE native
+API in the <b>pcreapi</b> page and a description of the POSIX API in the <b>pcreposix</b>
+ page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_study.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_study.3.html
new file mode 100644
index 0000000..20a7a67
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_study.3.html
@@ -0,0 +1,49 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b> <b>const char **<i>errptr</i>);</b>
+
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function studies a compiled pattern, to see if additional
+information can be extracted that might speed up matching. Its arguments
+are: <p>
+ <i>code</i> A compiled regular expression<br>
+ <i>options</i> Options for <b>pcre_study()</b><br>
+ <i>errptr</i> Where to put an error message<br>
+ <p>
+If the function succeeds, it returns a value that can be passed to <b>pcre_exec()</b>
+via its <i>extra</i> argument. <p>
+If the function returns NULL, either it could not
+find any additional information, or there was an error. You can tell the
+difference by looking at the error value. It is NULL in first case. <p>
+There
+are currently no options defined; the value of the second argument should
+always be zero. <p>
+There is a complete description of the PCRE native API in
+the <b>pcreapi</b> page and a description of the POSIX API in the <b>pcreposix</b>
+ page. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcre_version.3.html b/spamfilter/Utilities/PCRE/man/html/pcre_version.3.html
new file mode 100644
index 0000000..fe50a95
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcre_version.3.html
@@ -0,0 +1,36 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>char *pcre_version(void);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This function returns a character
+string that gives the version number of the PCRE library and the date of
+its release. <p>
+There is a complete description of the PCRE native API in the
+ <b>pcreapi</b> page and a description of the POSIX API in the <b>pcreposix</b> page.
+<p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcreapi.3.html b/spamfilter/Utilities/PCRE/man/html/pcreapi.3.html
new file mode 100644
index 0000000..a083204
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcreapi.3.html
@@ -0,0 +1,1069 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Native API</a></h2>
+ <p>
+<b>#include &lt;pcre.h&gt;</b>
+<p>
+<font size='-1'></font>
+ <br>
+<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b> <b>const char **<i>errptr</i>,
+int *<i>erroffset</i>,</b> <b>const unsigned char *<i>tableptr</i>);</b> <p>
+<br>
+<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b> <b>const char **<i>errptr</i>);</b>
+<p>
+<br>
+<b>int pcre_exec(const pcre *<i>code</i>, "const pcre_extra *<i>extra</i>,"</b> <b>const char
+*<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b> <b>int <i>options</i>, int *<i>ovector</i>, int
+<i>ovecsize</i>);</b> <p>
+<br>
+<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>char *<i>buffer</i>, int
+<i>buffersize</i>);</b> <p>
+<br>
+<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>,
+int <i>stringnumber</i>, char *<i>buffer</i>,</b> <b>int <i>buffersize</i>);</b> <p>
+<br>
+<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>const char **<i>stringptr</i>);</b>
+<p>
+<br>
+<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b> <b>const char *<i>name</i>);</b> <p>
+<br>
+<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>,
+int <i>stringnumber</i>,</b> <b>const char **<i>stringptr</i>);</b> <p>
+<br>
+<b>int pcre_get_substring_list(const char *<i>subject</i>,</b> <b>int *<i>ovector</i>, int <i>stringcount</i>,
+"const char ***<i>listptr</i>);"</b> <p>
+<br>
+<b>void pcre_free_substring(const char *<i>stringptr</i>);</b> <p>
+<br>
+<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b> <p>
+<br>
+<b>const unsigned char *pcre_maketables(void);</b> <p>
+<br>
+<b>int pcre_fullinfo(const pcre *<i>code</i>, "const pcre_extra *<i>extra</i>,"</b> <b>int <i>what</i>,
+void *<i>where</i>);</b> <p>
+<br>
+<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b> <b>*<i>firstcharptr</i>);</b> <p>
+<br>
+<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b> <p>
+<br>
+<b>char *pcre_version(void);</b> <p>
+<br>
+<b>void *(*pcre_malloc)(size_t);</b> <p>
+<br>
+<b>void (*pcre_free)(void *);</b> <p>
+<br>
+<b>void *(*pcre_stack_malloc)(size_t);</b> <p>
+<br>
+<b>void (*pcre_stack_free)(void *);</b> <p>
+<br>
+<b>int (*pcre_callout)(pcre_callout_block *);</b>
+<h2><a name='sect2' href='#toc2'>Pcre API Overview</a></h2>
+ <p>
+PCRE has
+its own native API, which is described in this document. There is also a
+set of wrapper functions that correspond to the POSIX regular expression
+API. These are described in the <b>pcreposix</b> documentation. <p>
+The native API
+function prototypes are defined in the header file <b>pcre.h</b>, and on Unix systems
+the library itself is called <b>libpcre</b>. It can normally be accessed by adding
+<b>-lpcre</b> to the command for linking an application that uses PCRE. The header
+file defines the macros PCRE_MAJOR and PCRE_MINOR to contain the major
+and minor release numbers for the library. Applications can use these to
+include support for different releases of PCRE. <p>
+The functions <b>pcre_compile()</b>,
+<b>pcre_study()</b>, and <b>pcre_exec()</b> are used for compiling and matching regular
+expressions. A sample program that demonstrates the simplest way of using
+them is provided in the file called <i>pcredemo.c</i> in the source distribution.
+The <b>pcresample</b> documentation describes how to run it. <p>
+In addition to the
+main compiling and matching functions, there are convenience functions
+for extracting captured substrings from a matched subject string. They are:
+<p>
+ <b>pcre_copy_substring()</b><br>
+ <b>pcre_copy_named_substring()</b><br>
+ <b>pcre_get_substring()</b><br>
+ <b>pcre_get_named_substring()</b><br>
+ <b>pcre_get_substring_list()</b><br>
+ <b>pcre_get_stringnumber()</b><br>
+ <p>
+<b>pcre_free_substring()</b> and <b>pcre_free_substring_list()</b> are also provided,
+to free the memory used for extracted strings. <p>
+The function <b>pcre_maketables()</b>
+is used to build a set of character tables in the current locale for passing
+to <b>pcre_compile()</b> or <b>pcre_exec()</b>. This is an optional facility that is provided
+for specialist use. Most commonly, no special tables are passed, in which
+case internal tables that are generated when PCRE is built are used. <p>
+The
+function <b>pcre_fullinfo()</b> is used to find out information about a compiled
+pattern; <b>pcre_info()</b> is an obsolete version that returns only some of the
+available information, but is retained for backwards compatibility. The
+function <b>pcre_version()</b> returns a pointer to a string containing the version
+of PCRE and its date of release. <p>
+The global variables <b>pcre_malloc</b> and <b>pcre_free</b>
+initially contain the entry points of the standard <b>malloc()</b> and <b>free()</b>
+functions, respectively. PCRE calls the memory management functions via
+these variables, so a calling program can replace them if it wishes to
+intercept the calls. This should be done before calling any PCRE functions.
+<p>
+The global variables <b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> are also indirections
+to memory management functions. These special functions are used only when
+PCRE is compiled to use the heap for remembering data, instead of recursive
+function calls. This is a non-standard way of building PCRE, for use in environments
+that have limited stacks. Because of the greater use of memory management,
+it runs more slowly. Separate functions are provided so that special-purpose
+external code can be used for this case. When used, these functions are
+always called in a stack-like manner (last obtained, first freed), and always
+for memory blocks of the same size. <p>
+The global variable <b>pcre_callout</b> initially
+contains NULL. It can be set by the caller to a "callout" function, which
+PCRE will then call at specified points during a matching operation. Details
+are given in the <b>pcrecallout</b> documentation.
+<h2><a name='sect3' href='#toc3'>Multithreading</a></h2>
+ <p>
+The PCRE
+functions can be used in multi-threading applications, with the proviso
+that the memory management functions pointed to by <b>pcre_malloc</b>, <b>pcre_free</b>,
+<b>pcre_stack_malloc</b>, and <b>pcre_stack_free</b>, and the callout function pointed
+to by <b>pcre_callout</b>, are shared by all threads. <p>
+The compiled form of a regular
+expression is not altered during matching, so the same compiled pattern
+can safely be used by several threads at once.
+<h2><a name='sect4' href='#toc4'>Saving Precompiled Patterns
+for Later Use</a></h2>
+ <p>
+The compiled form of a regular expression can be saved and
+re-used at a later time, possibly by a different program, and even on a
+host other than the one on which it was compiled. Details are given in the
+ <b>pcreprecompile</b> documentation.
+<h2><a name='sect5' href='#toc5'>Checking Build-time Options</a></h2>
+ <p>
+<b>int pcre_config(int
+<i>what</i>, void *<i>where</i>);</b> <p>
+The function <b>pcre_config()</b> makes it possible for a
+PCRE client to discover which optional features have been compiled into
+the PCRE library. The <b>pcrebuild</b> documentation has more details about these
+optional features. <p>
+The first argument for <b>pcre_config()</b> is an integer, specifying
+which information is required; the second argument is a pointer to a variable
+into which the information is placed. The following information is available:
+<p>
+ PCRE_CONFIG_UTF8<br>
+ <p>
+The output is an integer that is set to one if UTF-8 support is available;
+otherwise it is set to zero. <p>
+ PCRE_CONFIG_UNICODE_PROPERTIES<br>
+ <p>
+The output is an integer that is set to one if support for Unicode character
+properties is available; otherwise it is set to zero. <p>
+ PCRE_CONFIG_NEWLINE<br>
+ <p>
+The output is an integer that is set to the value of the code that is
+used for the newline character. It is either linefeed (10) or carriage return
+(13), and should normally be the standard character for your operating
+system. <p>
+ PCRE_CONFIG_LINK_SIZE<br>
+ <p>
+The output is an integer that contains the number of bytes used for internal
+linkage in compiled regular expressions. The value is 2, 3, or 4. Larger
+values allow larger regular expressions to be compiled, at the expense
+of slower matching. The default value of 2 is sufficient for all but the
+most massive patterns, since it allows the compiled pattern to be up to
+64K in size. <p>
+ PCRE_CONFIG_POSIX_MALLOC_THRESHOLD<br>
+ <p>
+The output is an integer that contains the threshold above which the POSIX
+interface uses <b>malloc()</b> for output vectors. Further details are given in
+the <b>pcreposix</b> documentation. <p>
+ PCRE_CONFIG_MATCH_LIMIT<br>
+ <p>
+The output is an integer that gives the default limit for the number of
+internal matching function calls in a <b>pcre_exec()</b> execution. Further details
+are given with <b>pcre_exec()</b> below. <p>
+ PCRE_CONFIG_STACKRECURSE<br>
+ <p>
+The output is an integer that is set to one if internal recursion is implemented
+by recursive function calls that use the stack to remember their state.
+This is the usual way that PCRE is compiled. The output is zero if PCRE
+was compiled to use blocks of data on the heap instead of recursive function
+calls. In this case, <b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> are called to
+manage memory blocks on the heap, thus avoiding the use of the stack.
+
+<h2><a name='sect6' href='#toc6'>Compiling a Pattern</a></h2>
+ <p>
+<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
+ <b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b> <b>const unsigned char *<i>tableptr</i>);</b>
+<p>
+The function <b>pcre_compile()</b> is called to compile a pattern into an internal
+form. The pattern is a C string terminated by a binary zero, and is passed
+in the <i>pattern</i> argument. A pointer to a single block of memory that is obtained
+via <b>pcre_malloc</b> is returned. This contains the compiled code and related
+data. The <b>pcre</b> type is defined for the returned block; this is a typedef
+for a structure whose contents are not externally defined. It is up to the
+caller to free the memory when it is no longer required. <p>
+Although the compiled
+code of a PCRE regex is relocatable, that is, it does not depend on memory
+location, the complete <b>pcre</b> data block is not fully relocatable, because
+it may contain a copy of the <i>tableptr</i> argument, which is an address (see
+below). <p>
+The <i>options</i> argument contains independent bits that affect the compilation.
+It should be zero if no options are required. The available options are
+described below. Some of them, in particular, those that are compatible
+with Perl, can also be set and unset from within the pattern (see the detailed
+description in the <b>pcrepattern</b> documentation). For these options, the
+contents of the <i>options</i> argument specifies their initial settings at the
+start of compilation and execution. The PCRE_ANCHORED option can be set
+at the time of matching as well as at compile time. <p>
+If <i>errptr</i> is NULL, <b>pcre_compile()</b>
+returns NULL immediately. Otherwise, if compilation of a pattern fails,
+<b>pcre_compile()</b> returns NULL, and sets the variable pointed to by <i>errptr</i>
+to point to a textual error message. The offset from the start of the pattern
+to the character where the error was discovered is placed in the variable
+pointed to by <i>erroffset</i>, which must not be NULL. If it is, an immediate
+error is given. <p>
+If the final argument, <i>tableptr</i>, is NULL, PCRE uses a default
+set of character tables that are built when PCRE is compiled, using the
+default C locale. Otherwise, <i>tableptr</i> must be an address that is the result
+of a call to <b>pcre_maketables()</b>. This value is stored with the compiled pattern,
+and used again by <b>pcre_exec()</b>, unless another table pointer is passed to
+it. For more discussion, see the section on locale support below. <p>
+This code
+fragment shows a typical straightforward call to <b>pcre_compile()</b>: <p>
+ pcre
+*re;<br>
+ const char *error;<br>
+ int erroffset;<br>
+ re = pcre_compile(<br>
+ "^A.*Z", /* the pattern */<br>
+ 0, /* default options */<br>
+ &amp;error, /* for error message */<br>
+ &amp;erroffset, /* for error offset */<br>
+ NULL); /* use default character tables */<br>
+ <p>
+The following names for option bits are defined in the <b>pcre.h</b> header file:
+<p>
+ PCRE_ANCHORED<br>
+ <p>
+If this bit is set, the pattern is forced to be "anchored", that is, it
+is constrained to match only at the first matching point in the string
+that is being searched (the "subject string"). This effect can also be achieved
+by appropriate constructs in the pattern itself, which is the only way
+to do it in Perl. <p>
+ PCRE_AUTO_CALLOUT<br>
+ <p>
+If this bit is set, <b>pcre_compile()</b> automatically inserts callout items,
+all with number 255, before each pattern item. For discussion of the callout
+facility, see the <b>pcrecallout</b> documentation. <p>
+ PCRE_CASELESS<br>
+ <p>
+If this bit is set, letters in the pattern match both upper and lower
+case letters. It is equivalent to Perl&rsquo;s /i option, and it can be changed
+within a pattern by a (?i) option setting. When running in UTF-8 mode, case
+support for high-valued characters is available only when PCRE is built
+with Unicode character property support. <p>
+ PCRE_DOLLAR_ENDONLY<br>
+ <p>
+If this bit is set, a dollar metacharacter in the pattern matches only
+at the end of the subject string. Without this option, a dollar also matches
+immediately before the final character if it is a newline (but not before
+any other newlines). The PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE
+is set. There is no equivalent to this option in Perl, and no way to set
+it within a pattern. <p>
+ PCRE_DOTALL<br>
+ <p>
+If this bit is set, a dot metacharater in the pattern matches all characters,
+including newlines. Without it, newlines are excluded. This option is equivalent
+to Perl&rsquo;s /s option, and it can be changed within a pattern by a (?s) option
+setting. A negative class such as [^a] always matches a newline character,
+independent of the setting of this option. <p>
+ PCRE_EXTENDED<br>
+ <p>
+If this bit is set, whitespace data characters in the pattern are totally
+ignored except when escaped or inside a character class. Whitespace does
+not include the VT character (code 11). In addition, characters between
+an unescaped # outside a character class and the next newline character,
+inclusive, are also ignored. This is equivalent to Perl&rsquo;s /x option, and
+it can be changed within a pattern by a (?x) option setting. <p>
+This option
+makes it possible to include comments inside complicated patterns. Note,
+however, that this applies only to data characters. Whitespace characters
+may never appear within special character sequences in a pattern, for example
+within the sequence (?( which introduces a conditional subpattern. <p>
+ PCRE_EXTRA<br>
+ <p>
+This option was invented in order to turn on additional functionality
+of PCRE that is incompatible with Perl, but it is currently of very little
+use. When set, any backslash in a pattern that is followed by a letter that
+has no special meaning causes an error, thus reserving these combinations
+for future expansion. By default, as in Perl, a backslash followed by a
+letter with no special meaning is treated as a literal. There are at present
+no other features controlled by this option. It can also be set by a (?X)
+option setting within a pattern. <p>
+ PCRE_MULTILINE<br>
+ <p>
+By default, PCRE treats the subject string as consisting of a single line
+of characters (even if it actually contains newlines). The "start of line"
+metacharacter (^) matches only at the start of the string, while the "end
+of line" metacharacter ($) matches only at the end of the string, or before
+a terminating newline (unless PCRE_DOLLAR_ENDONLY is set). This is the same
+as Perl. <p>
+When PCRE_MULTILINE it is set, the "start of line" and "end of
+line" constructs match immediately following or immediately before any
+newline in the subject string, respectively, as well as at the very start
+and end. This is equivalent to Perl&rsquo;s /m option, and it can be changed within
+a pattern by a (?m) option setting. If there are no "\n" characters in a
+subject string, or no occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE
+has no effect. <p>
+ PCRE_NO_AUTO_CAPTURE<br>
+ <p>
+If this option is set, it disables the use of numbered capturing parentheses
+in the pattern. Any opening parenthesis that is not followed by ? behaves
+as if it were followed by ?: but named parentheses can still be used for
+capturing (and they acquire numbers in the usual way). There is no equivalent
+of this option in Perl. <p>
+ PCRE_UNGREEDY<br>
+ <p>
+This option inverts the "greediness" of the quantifiers so that they are
+not greedy by default, but become greedy if followed by "?". It is not compatible
+with Perl. It can also be set by a (?U) option setting within the pattern.
+<p>
+ PCRE_UTF8<br>
+ <p>
+This option causes PCRE to regard both the pattern and the subject as
+strings of UTF-8 characters instead of single-byte character strings. However,
+it is available only when PCRE is built to include UTF-8 support. If not,
+the use of this option provokes an error. Details of how this option changes
+the behaviour of PCRE are given in the section on UTF-8 support in the
+main <b>pcre</b> page. <p>
+ PCRE_NO_UTF8_CHECK<br>
+ <p>
+When PCRE_UTF8 is set, the validity of the pattern as a UTF-8 string is
+automatically checked. If an invalid UTF-8 sequence of bytes is found, <b>pcre_compile()</b>
+returns an error. If you already know that your pattern is valid, and you
+want to skip this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK
+option. When it is set, the effect of passing an invalid UTF-8 string as
+a pattern is undefined. It may cause your program to crash. Note that this
+option can also be passed to <b>pcre_exec()</b>, to suppress the UTF-8 validity
+checking of subject strings.
+<h2><a name='sect7' href='#toc7'>Studying a Pattern</a></h2>
+ <p>
+<b>pcre_extra *pcre_study(const
+pcre *<i>code</i>, int <i>options</i>,</b> <b>const char **<i>errptr</i>);</b> <p>
+If a compiled pattern is
+going to be used several times, it is worth spending more time analyzing
+it in order to speed up the time taken for matching. The function <b>pcre_study()</b>
+takes a pointer to a compiled pattern as its first argument. If studying
+the pattern produces additional information that will help speed up matching,
+<b>pcre_study()</b> returns a pointer to a <b>pcre_extra</b> block, in which the <i>study_data</i>
+field points to the results of the study. <p>
+The returned value from <b>pcre_study()</b>
+can be passed directly to <b>pcre_exec()</b>. However, a <b>pcre_extra</b> block also
+contains other fields that can be set by the caller before the block is
+passed; these are described below in the section on matching a pattern.
+<p>
+If studying the pattern does not produce any additional information, <b>pcre_study()</b>
+returns NULL. In that circumstance, if the calling program wants to pass
+any of the other fields to <b>pcre_exec()</b>, it must set up its own <b>pcre_extra</b>
+block. <p>
+The second argument of <b>pcre_study()</b> contains option bits. At present,
+no options are defined, and this argument should always be zero. <p>
+The third
+argument for <b>pcre_study()</b> is a pointer for an error message. If studying
+succeeds (even if no data is returned), the variable it points to is set
+to NULL. Otherwise it points to a textual error message. You should therefore
+test the error pointer for NULL after calling <b>pcre_study()</b>, to be sure
+that it has run successfully. <p>
+This is a typical call to <b>pcre_study</b>(): <p>
+
+pcre_extra *pe;<br>
+ pe = pcre_study(<br>
+ re, /* result of pcre_compile() */<br>
+ 0, /* no options exist */<br>
+ &amp;error); /* set to NULL or points to a message */<br>
+ <p>
+At present, studying a pattern is useful only for non-anchored patterns
+that do not have a single fixed starting character. A bitmap of possible
+starting bytes is created.
+<h2><a name='sect8' href='#toc8'>Locale Support</a></h2>
+ <p>
+PCRE handles caseless matching,
+and determines whether characters are letters, digits, or whatever, by
+reference to a set of tables, indexed by character value. (When running
+in UTF-8 mode, this applies only to characters with codes less than 128.
+Higher-valued codes never match escapes such as \w or \d, but can be tested
+with \p if PCRE is built with Unicode character property support.) <p>
+An internal
+set of tables is created in the default C locale when PCRE is built. This
+is used when the final argument of <b>pcre_compile()</b> is NULL, and is sufficient
+for many applications. An alternative set of tables can, however, be supplied.
+These may be created in a different locale from the default. As more and
+more applications change to using Unicode, the need for this locale support
+is expected to die away. <p>
+External tables are built by calling the <b>pcre_maketables()</b>
+function, which has no arguments, in the relevant locale. The result can
+then be passed to <b>pcre_compile()</b> or <b>pcre_exec()</b> as often as necessary. For
+example, to build and use tables that are appropriate for the French locale
+(where accented characters with values greater than 128 are treated as
+letters), the following code could be used: <p>
+ setlocale(LC_CTYPE, "fr_FR");<br>
+ tables = pcre_maketables();<br>
+ re = pcre_compile(..., tables);<br>
+ <p>
+When <b>pcre_maketables()</b> runs, the tables are built in memory that is obtained
+via <b>pcre_malloc</b>. It is the caller&rsquo;s responsibility to ensure that the memory
+containing the tables remains available for as long as it is needed. <p>
+The
+pointer that is passed to <b>pcre_compile()</b> is saved with the compiled pattern,
+and the same tables are used via this pointer by <b>pcre_study()</b> and normally
+also by <b>pcre_exec()</b>. Thus, by default, for any single pattern, compilation,
+studying and matching all happen in the same locale, but different patterns
+can be compiled in different locales. <p>
+It is possible to pass a table pointer
+or NULL (indicating the use of the internal tables) to <b>pcre_exec()</b>. Although
+not intended for this purpose, this facility could be used to match a pattern
+in a different locale from the one in which it was compiled. Passing table
+pointers at run time is discussed below in the section on matching a pattern.
+
+<h2><a name='sect9' href='#toc9'>Information About a Pattern</a></h2>
+ <p>
+<b>int pcre_fullinfo(const pcre *<i>code</i>, "const
+pcre_extra *<i>extra</i>,"</b> <b>int <i>what</i>, void *<i>where</i>);</b> <p>
+The <b>pcre_fullinfo()</b> function
+returns information about a compiled pattern. It replaces the obsolete <b>pcre_info()</b>
+function, which is nevertheless retained for backwards compability (and
+is documented below). <p>
+The first argument for <b>pcre_fullinfo()</b> is a pointer
+to the compiled pattern. The second argument is the result of <b>pcre_study()</b>,
+or NULL if the pattern was not studied. The third argument specifies which
+piece of information is required, and the fourth argument is a pointer
+to a variable to receive the data. The yield of the function is zero for
+success, or one of the following negative numbers: <p>
+ PCRE_ERROR_NULL
+ the argument <i>code</i> was NULL<br>
+ the argument <i>where</i> was NULL<br>
+ PCRE_ERROR_BADMAGIC the "magic number" was not found<br>
+ PCRE_ERROR_BADOPTION the value of <i>what</i> was invalid<br>
+ <p>
+The "magic number" is placed at the start of each compiled pattern as
+an simple check against passing an arbitrary memory pointer. Here is a typical
+call of <b>pcre_fullinfo()</b>, to obtain the length of the compiled pattern:
+<p>
+ int rc;<br>
+ unsigned long int length;<br>
+ rc = pcre_fullinfo(<br>
+ re, /* result of pcre_compile() */<br>
+ pe, /* result of pcre_study(), or NULL */<br>
+ PCRE_INFO_SIZE, /* what is required */<br>
+ &amp;length); /* where to put the data */<br>
+ <p>
+The possible values for the third argument are defined in <b>pcre.h</b>, and are
+as follows: <p>
+ PCRE_INFO_BACKREFMAX<br>
+ <p>
+Return the number of the highest back reference in the pattern. The fourth
+argument should point to an <b>int</b> variable. Zero is returned if there are
+no back references. <p>
+ PCRE_INFO_CAPTURECOUNT<br>
+ <p>
+Return the number of capturing subpatterns in the pattern. The fourth argument
+should point to an <b>int</b> variable. <p>
+ PCRE_INFO_DEFAULTTABLES<br>
+ <p>
+Return a pointer to the internal default character tables within PCRE.
+The fourth argument should point to an <b>unsigned char *</b> variable. This information
+call is provided for internal use by the <b>pcre_study()</b> function. External
+callers can cause PCRE to use its internal tables by passing a NULL table
+pointer. <p>
+ PCRE_INFO_FIRSTBYTE<br>
+ <p>
+Return information about the first byte of any matched string, for a non-anchored
+pattern. (This option used to be called PCRE_INFO_FIRSTCHAR; the old name
+is still recognized for backwards compatibility.) <p>
+If there is a fixed first
+byte, for example, from a pattern such as (cat|cow|coyote), it is returned
+in the integer pointed to by <i>where</i>. Otherwise, if either <p>
+(a) the pattern
+was compiled with the PCRE_MULTILINE option, and every branch starts with
+"^", or <p>
+(b) every branch of the pattern starts with ".*" and PCRE_DOTALL
+is not set (if it were set, the pattern would be anchored), <p>
+-1 is returned,
+indicating that the pattern matches only at the start of a subject string
+or after any newline within the string. Otherwise -2 is returned. For anchored
+patterns, -2 is returned. <p>
+ PCRE_INFO_FIRSTTABLE<br>
+ <p>
+If the pattern was studied, and this resulted in the construction of a
+256-bit table indicating a fixed set of bytes for the first byte in any
+matching string, a pointer to the table is returned. Otherwise NULL is returned.
+The fourth argument should point to an <b>unsigned char *</b> variable. <p>
+ PCRE_INFO_LASTLITERAL<br>
+ <p>
+Return the value of the rightmost literal byte that must exist in any
+matched string, other than at its start, if such a byte has been recorded.
+The fourth argument should point to an <b>int</b> variable. If there is no such
+byte, -1 is returned. For anchored patterns, a last literal byte is recorded
+only if it follows something of variable length. For example, for the pattern
+/^a\d+z\d+/ the returned value is "z", but for /^a\dz\d/ the returned value is
+-1. <p>
+ PCRE_INFO_NAMECOUNT<br>
+ PCRE_INFO_NAMEENTRYSIZE<br>
+ PCRE_INFO_NAMETABLE<br>
+ <p>
+PCRE supports the use of named as well as numbered capturing parentheses.
+The names are just an additional way of identifying the parentheses, which
+still acquire numbers. A convenience function called <b>pcre_get_named_substring()</b>
+is provided for extracting an individual captured substring by name. It
+is also possible to extract the data directly, by first converting the
+name to a number in order to access the correct pointers in the output
+vector (described with <b>pcre_exec()</b> below). To do the conversion, you need
+to use the name-to-number map, which is described by these three values. <p>
+The
+map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT gives
+the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size of each
+entry; both of these return an <b>int</b> value. The entry size depends on the
+length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the
+first entry of the table (a pointer to <b>char</b>). The first two bytes of each
+entry are the number of the capturing parenthesis, most significant byte
+first. The rest of the entry is the corresponding name, zero terminated.
+The names are in alphabetical order. For example, consider the following
+pattern (assume PCRE_EXTENDED is set, so white space - including newlines
+- is ignored): <p>
+ (?P&lt;date&gt; (?P&lt;year&gt;(\d\d)?\d\d) -<br>
+ (?P&lt;month&gt;\d\d) - (?P&lt;day&gt;\d\d) )<br>
+ <p>
+There are four named subpatterns, so the table has four entries, and each
+entry in the table is eight bytes long. The table is as follows, with non-printing
+bytes shows in hexadecimal, and undefined bytes shown as ??: <p>
+ 00 01 d
+ a t e 00 ??<br>
+ 00 05 d a y 00 ?? ??<br>
+ 00 04 m o n t h 00<br>
+ 00 02 y e a r 00 ??<br>
+ <p>
+When writing code to extract data from named subpatterns using the name-to-number
+map, remember that the length of each entry is likely to be different for
+each compiled pattern. <p>
+ PCRE_INFO_OPTIONS<br>
+ <p>
+Return a copy of the options with which the pattern was compiled. The fourth
+argument should point to an <b>unsigned long int</b> variable. These option bits
+are those specified in the call to <b>pcre_compile()</b>, modified by any top-level
+option settings within the pattern itself. <p>
+A pattern is automatically anchored
+by PCRE if all of its top-level alternatives begin with one of the following:
+<p>
+ ^ unless PCRE_MULTILINE is set<br>
+ \A always<br>
+ \G always<br>
+ .* if PCRE_DOTALL is set and there are no back<br>
+ references to the subpattern in which .* appears<br>
+ <p>
+For such patterns, the PCRE_ANCHORED bit is set in the options returned
+by <b>pcre_fullinfo()</b>. <p>
+ PCRE_INFO_SIZE<br>
+ <p>
+Return the size of the compiled pattern, that is, the value that was passed
+as the argument to <b>pcre_malloc()</b> when PCRE was getting memory in which
+to place the compiled data. The fourth argument should point to a <b>size_t</b>
+variable. <p>
+ PCRE_INFO_STUDYSIZE<br>
+ <p>
+Return the size of the data block pointed to by the <i>study_data</i> field in
+a <b>pcre_extra</b> block. That is, it is the value that was passed to <b>pcre_malloc()</b>
+when PCRE was getting memory into which to place the data created by <b>pcre_study()</b>.
+The fourth argument should point to a <b>size_t</b> variable.
+<h2><a name='sect10' href='#toc10'>Obsolete Info Function</a></h2>
+
+<p>
+<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b> <b>*<i>firstcharptr</i>);</b> <p>
+The <b>pcre_info()</b>
+function is now obsolete because its interface is too restrictive to return
+all the available data about a compiled pattern. New programs should use
+<b>pcre_fullinfo()</b> instead. The yield of <b>pcre_info()</b> is the number of capturing
+subpatterns, or one of the following negative numbers: <p>
+ PCRE_ERROR_NULL
+ the argument <i>code</i> was NULL<br>
+ PCRE_ERROR_BADMAGIC the "magic number" was not found<br>
+ <p>
+If the <i>optptr</i> argument is not NULL, a copy of the options with which the
+pattern was compiled is placed in the integer it points to (see PCRE_INFO_OPTIONS
+above). <p>
+If the pattern is not anchored and the <i>firstcharptr</i> argument is
+not NULL, it is used to pass back information about the first character
+of any matched string (see PCRE_INFO_FIRSTBYTE above).
+<h2><a name='sect11' href='#toc11'>Matching a Pattern</a></h2>
+
+<p>
+<b>int pcre_exec(const pcre *<i>code</i>, "const pcre_extra *<i>extra</i>,"</b> <b>const char
+*<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b> <b>int <i>options</i>, int *<i>ovector</i>, int
+<i>ovecsize</i>);</b> <p>
+The function <b>pcre_exec()</b> is called to match a subject string
+against a compiled pattern, which is passed in the <i>code</i> argument. If the
+pattern has been studied, the result of the study should be passed in the
+<i>extra</i> argument. <p>
+In most applications, the pattern will have been compiled
+(and optionally studied) in the same process that calls <b>pcre_exec()</b>. However,
+it is possible to save compiled patterns and study data, and then use them
+later in different processes, possibly even on different hosts. For a discussion
+about this, see the <b>pcreprecompile</b> documentation. <p>
+Here is an example of
+a simple call to <b>pcre_exec()</b>: <p>
+ int rc;<br>
+ int ovector[30];<br>
+ rc = pcre_exec(<br>
+ re, /* result of pcre_compile() */<br>
+ NULL, /* we didn&rsquo;t study the pattern */<br>
+ "some string", /* the subject string */<br>
+ 11, /* the length of the subject string */<br>
+ 0, /* start at offset 0 in the subject */<br>
+ 0, /* default options */<br>
+ ovector, /* vector of integers for substring information */<br>
+ 30); /* number of elements in the vector (NOT size in bytes)
+*/<br>
+
+<h3><a name='sect12' href='#toc12'>Extra data for <b>pcre_exec()</b></a></h3>
+ <p>
+If the <i>extra</i> argument is not NULL, it must
+point to a <b>pcre_extra</b> data block. The <b>pcre_study()</b> function returns such
+a block (when it doesn&rsquo;t return NULL), but you can also create one for yourself,
+and pass additional information in it. The fields in a <b>pcre_extra</b> block
+are as follows: <p>
+ unsigned long int <i>flags</i>;<br>
+ void *<i>study_data</i>;<br>
+ unsigned long int <i>match_limit</i>;<br>
+ void *<i>callout_data</i>;<br>
+ const unsigned char *<i>tables</i>;<br>
+ <p>
+The <i>flags</i> field is a bitmap that specifies which of the other fields are
+set. The flag bits are: <p>
+ PCRE_EXTRA_STUDY_DATA<br>
+ PCRE_EXTRA_MATCH_LIMIT<br>
+ PCRE_EXTRA_CALLOUT_DATA<br>
+ PCRE_EXTRA_TABLES<br>
+ <p>
+Other flag bits should be set to zero. The <i>study_data</i> field is set in the
+<b>pcre_extra</b> block that is returned by <b>pcre_study()</b>, together with the appropriate
+flag bit. You should not set this yourself, but you may add to the block
+by setting the other fields and their corresponding flag bits. <p>
+The <i>match_limit</i>
+field provides a means of preventing PCRE from using up a vast amount of
+resources when running patterns that are not going to match, but which
+have a very large number of possibilities in their search trees. The classic
+example is the use of nested unlimited repeats. <p>
+Internally, PCRE uses a
+function called <b>match()</b> which it calls repeatedly (sometimes recursively).
+The limit is imposed on the number of times this function is called during
+a match, which has the effect of limiting the amount of recursion and backtracking
+that can take place. For patterns that are not anchored, the count starts
+from zero for each position in the subject string. <p>
+The default limit for
+the library can be set when PCRE is built; the default default is 10 million,
+which handles all but the most extreme cases. You can reduce the default
+by suppling <b>pcre_exec()</b> with a <b>pcre_extra</b> block in which <i>match_limit</i> is
+set to a smaller value, and PCRE_EXTRA_MATCH_LIMIT is set in the <i>flags</i>
+field. If the limit is exceeded, <b>pcre_exec()</b> returns PCRE_ERROR_MATCHLIMIT.
+<p>
+The <i>pcre_callout</i> field is used in conjunction with the "callout" feature,
+which is described in the <b>pcrecallout</b> documentation. <p>
+The <i>tables</i> field
+is used to pass a character tables pointer to <b>pcre_exec()</b>; this overrides
+the value that is stored with the compiled pattern. A non-NULL value is stored
+with the compiled pattern only if custom tables were supplied to <b>pcre_compile()</b>
+via its <i>tableptr</i> argument. If NULL is passed to <b>pcre_exec()</b> using this mechanism,
+it forces PCRE&rsquo;s internal tables to be used. This facility is helpful when
+re-using patterns that have been saved after compiling with an external
+set of tables, because the external tables might be at a different address
+when <b>pcre_exec()</b> is called. See the <b>pcreprecompile</b> documentation for a
+discussion of saving compiled patterns for later use.
+<h3><a name='sect13' href='#toc13'>Option bits for <b>pcre_exec()</b></a></h3>
+
+<p>
+The unused bits of the <i>options</i> argument for <b>pcre_exec()</b> must be zero. The
+only bits that may be set are PCRE_ANCHORED, PCRE_NOTBOL, PCRE_NOTEOL,
+PCRE_NOTEMPTY, PCRE_NO_UTF8_CHECK and PCRE_PARTIAL. <p>
+ PCRE_ANCHORED<br>
+ <p>
+The PCRE_ANCHORED option limits <b>pcre_exec()</b> to matching at the first matching
+position. If a pattern was compiled with PCRE_ANCHORED, or turned out to
+be anchored by virtue of its contents, it cannot be made unachored at matching
+time. <p>
+ PCRE_NOTBOL<br>
+ <p>
+This option specifies that first character of the subject string is not
+the beginning of a line, so the circumflex metacharacter should not match
+before it. Setting this without PCRE_MULTILINE (at compile time) causes
+circumflex never to match. This option affects only the behaviour of the
+circumflex metacharacter. It does not affect \A. <p>
+ PCRE_NOTEOL<br>
+ <p>
+This option specifies that the end of the subject string is not the end
+of a line, so the dollar metacharacter should not match it nor (except
+in multiline mode) a newline immediately before it. Setting this without
+PCRE_MULTILINE (at compile time) causes dollar never to match. This option
+affects only the behaviour of the dollar metacharacter. It does not affect
+\Z or \z. <p>
+ PCRE_NOTEMPTY<br>
+ <p>
+An empty string is not considered to be a valid match if this option is
+set. If there are alternatives in the pattern, they are tried. If all the
+alternatives match the empty string, the entire match fails. For example,
+if the pattern <p>
+ a?b?<br>
+ <p>
+is applied to a string not beginning with "a" or "b", it matches the empty
+string at the start of the subject. With PCRE_NOTEMPTY set, this match is
+not valid, so PCRE searches further into the string for occurrences of
+"a" or "b". <p>
+Perl has no direct equivalent of PCRE_NOTEMPTY, but it does
+make a special case of a pattern match of the empty string within its <b>split()</b>
+function, and when using the /g modifier. It is possible to emulate Perl&rsquo;s
+behaviour after matching a null string by first trying the match again
+at the same offset with PCRE_NOTEMPTY and PCRE_ANCHORED, and then if that
+fails by advancing the starting offset (see below) and trying an ordinary
+match again. There is some code that demonstrates how to do this in the
+<i>pcredemo.c</i> sample program. <p>
+ PCRE_NO_UTF8_CHECK<br>
+ <p>
+When PCRE_UTF8 is set at compile time, the validity of the subject as
+a UTF-8 string is automatically checked when <b>pcre_exec()</b> is subsequently
+called. The value of <i>startoffset</i> is also checked to ensure that it points
+to the start of a UTF-8 character. If an invalid UTF-8 sequence of bytes is
+found, <b>pcre_exec()</b> returns the error PCRE_ERROR_BADUTF8. If <i>startoffset</i>
+contains an invalid value, PCRE_ERROR_BADUTF8_OFFSET is returned. <p>
+If you
+already know that your subject is valid, and you want to skip these checks
+for performance reasons, you can set the PCRE_NO_UTF8_CHECK option when
+calling <b>pcre_exec()</b>. You might want to do this for the second and subsequent
+calls to <b>pcre_exec()</b> if you are making repeated calls to find all the matches
+in a single subject string. However, you should be sure that the value of
+<i>startoffset</i> points to the start of a UTF-8 character. When PCRE_NO_UTF8_CHECK
+is set, the effect of passing an invalid UTF-8 string as a subject, or a
+value of <i>startoffset</i> that does not point to the start of a UTF-8 character,
+is undefined. Your program may crash. <p>
+ PCRE_PARTIAL<br>
+ <p>
+This option turns on the partial matching feature. If the subject string
+fails to match the pattern, but at some point during the matching process
+the end of the subject was reached (that is, the subject partially matches
+the pattern and the failure to match occurred only because there were not
+enough subject characters), <b>pcre_exec()</b> returns PCRE_ERROR_PARTIAL instead
+of PCRE_ERROR_NOMATCH. When PCRE_PARTIAL is used, there are restrictions
+on what may appear in the pattern. These are discussed in the <b>pcrepartial</b>
+ documentation.
+<h3><a name='sect14' href='#toc14'>The string to be matched by <b>pcre_exec()</b></a></h3>
+ <p>
+The subject string
+is passed to <b>pcre_exec()</b> as a pointer in <i>subject</i>, a length in <i>length</i>, and
+a starting byte offset in <i>startoffset</i>. In UTF-8 mode, the byte offset must
+point to the start of a UTF-8 character. Unlike the pattern string, the subject
+may contain binary zero bytes. When the starting offset is zero, the search
+for a match starts at the beginning of the subject, and this is by far
+the most common case. <p>
+A non-zero starting offset is useful when searching
+for another match in the same subject by calling <b>pcre_exec()</b> again after
+a previous success. Setting <i>startoffset</i> differs from just passing over a
+shortened string and setting PCRE_NOTBOL in the case of a pattern that
+begins with any kind of lookbehind. For example, consider the pattern <p>
+
+\Biss\B<br>
+ <p>
+which finds occurrences of "iss" in the middle of words. (\B matches only
+if the current position in the subject is not a word boundary.) When applied
+to the string "Mississipi" the first call to <b>pcre_exec()</b> finds the first
+occurrence. If <b>pcre_exec()</b> is called again with just the remainder of the
+subject, namely "issipi", it does not match, because \B is always false
+at the start of the subject, which is deemed to be a word boundary. However,
+if <b>pcre_exec()</b> is passed the entire string again, but with <i>startoffset</i>
+set to 4, it finds the second occurrence of "iss" because it is able to
+look behind the starting point to discover that it is preceded by a letter.
+<p>
+If a non-zero starting offset is passed when the pattern is anchored, one
+attempt to match at the given offset is made. This can only succeed if the
+pattern does not require the match to be at the start of the subject.
+<h3><a name='sect15' href='#toc15'>How
+<b>pcre_exec()</b> returns captured substrings</a></h3>
+ <p>
+In general, a pattern matches a
+certain portion of the subject, and in addition, further substrings from
+the subject may be picked out by parts of the pattern. Following the usage
+in Jeffrey Friedl&rsquo;s book, this is called "capturing" in what follows, and
+the phrase "capturing subpattern" is used for a fragment of a pattern that
+picks out a substring. PCRE supports several other kinds of parenthesized
+subpattern that do not cause substrings to be captured. <p>
+Captured substrings
+are returned to the caller via a vector of integer offsets whose address
+is passed in <i>ovector</i>. The number of elements in the vector is passed in
+<i>ovecsize</i>, which must be a non-negative number. <b>Note</b>: this argument is NOT
+the size of <i>ovector</i> in bytes. <p>
+The first two-thirds of the vector is used
+to pass back captured substrings, each substring using a pair of integers.
+The remaining third of the vector is used as workspace by <b>pcre_exec()</b> while
+matching capturing subpatterns, and is not available for passing back information.
+The length passed in <i>ovecsize</i> should always be a multiple of three. If it
+is not, it is rounded down. <p>
+When a match is successful, information about
+captured substrings is returned in pairs of integers, starting at the beginning
+of <i>ovector</i>, and continuing up to two-thirds of its length at the most. The
+first element of a pair is set to the offset of the first character in
+a substring, and the second is set to the offset of the first character
+after the end of a substring. The first pair, <i>ovector[0]</i> and <i>ovector[1]</i>,
+identify the portion of the subject string matched by the entire pattern.
+The next pair is used for the first capturing subpattern, and so on. The
+value returned by <b>pcre_exec()</b> is the number of pairs that have been set.
+If there are no capturing subpatterns, the return value from a successful
+match is 1, indicating that just the first pair of offsets has been set.
+<p>
+Some convenience functions are provided for extracting the captured substrings
+as separate strings. These are described in the following section. <p>
+It is
+possible for an capturing subpattern number <i>n+1</i> to match some part of the
+subject when subpattern <i>n</i> has not been used at all. For example, if the
+string "abc" is matched against the pattern (a|(z))(bc) subpatterns 1 and
+3 are matched, but 2 is not. When this happens, both offset values corresponding
+to the unused subpattern are set to -1. <p>
+If a capturing subpattern is matched
+repeatedly, it is the last portion of the string that it matched that is
+returned. <p>
+If the vector is too small to hold all the captured substring
+offsets, it is used as far as possible (up to two-thirds of its length),
+and the function returns a value of zero. In particular, if the substring
+offsets are not of interest, <b>pcre_exec()</b> may be called with <i>ovector</i> passed
+as NULL and <i>ovecsize</i> as zero. However, if the pattern contains back references
+and the <i>ovector</i> is not big enough to remember the related substrings, PCRE
+has to get additional memory for use during matching. Thus it is usually
+advisable to supply an <i>ovector</i>. <p>
+Note that <b>pcre_info()</b> can be used to find
+out how many capturing subpatterns there are in a compiled pattern. The
+smallest size for <i>ovector</i> that will allow for <i>n</i> captured substrings, in
+addition to the offsets of the substring matched by the whole pattern,
+is (<i>n</i>+1)*3.
+<h3><a name='sect16' href='#toc16'>Return values from <b>pcre_exec()</b></a></h3>
+ <p>
+If <b>pcre_exec()</b> fails, it returns
+a negative number. The following are defined in the header file: <p>
+ PCRE_ERROR_NOMATCH
+ (-1)<br>
+ <p>
+The subject string did not match the pattern. <p>
+ PCRE_ERROR_NULL
+ (-2)<br>
+ <p>
+Either <i>code</i> or <i>subject</i> was passed as NULL, or <i>ovector</i> was NULL and <i>ovecsize</i>
+was not zero. <p>
+ PCRE_ERROR_BADOPTION (-3)<br>
+ <p>
+An unrecognized bit was set in the <i>options</i> argument. <p>
+ PCRE_ERROR_BADMAGIC
+ (-4)<br>
+ <p>
+PCRE stores a 4-byte "magic number" at the start of the compiled code,
+to catch the case when it is passed a junk pointer and to detect when a
+pattern that was compiled in an environment of one endianness is run in
+an environment with the other endianness. This is the error that PCRE gives
+when the magic number is not present. <p>
+ PCRE_ERROR_UNKNOWN_NODE (-5)<br>
+ <p>
+While running the pattern match, an unknown item was encountered in the
+compiled pattern. This error could be caused by a bug in PCRE or by overwriting
+of the compiled pattern. <p>
+ PCRE_ERROR_NOMEMORY (-6)<br>
+ <p>
+If a pattern contains back references, but the <i>ovector</i> that is passed
+to <b>pcre_exec()</b> is not big enough to remember the referenced substrings,
+PCRE gets a block of memory at the start of matching to use for this purpose.
+If the call via <b>pcre_malloc()</b> fails, this error is given. The memory is
+automatically freed at the end of matching. <p>
+ PCRE_ERROR_NOSUBSTRING
+(-7)<br>
+ <p>
+This error is used by the <b>pcre_copy_substring()</b>, <b>pcre_get_substring()</b>,
+and <b>pcre_get_substring_list()</b> functions (see below). It is never returned
+by <b>pcre_exec()</b>. <p>
+ PCRE_ERROR_MATCHLIMIT (-8)<br>
+ <p>
+The recursion and backtracking limit, as specified by the <i>match_limit</i>
+field in a <b>pcre_extra</b> structure (or defaulted) was reached. See the description
+above. <p>
+ PCRE_ERROR_CALLOUT (-9)<br>
+ <p>
+This error is never generated by <b>pcre_exec()</b> itself. It is provided for
+use by callout functions that want to yield a distinctive error code. See
+the <b>pcrecallout</b> documentation for details. <p>
+ PCRE_ERROR_BADUTF8
+ (-10)<br>
+ <p>
+A string that contains an invalid UTF-8 byte sequence was passed as a subject.
+<p>
+ PCRE_ERROR_BADUTF8_OFFSET (-11)<br>
+ <p>
+The UTF-8 byte sequence that was passed as a subject was valid, but the
+value of <i>startoffset</i> did not point to the beginning of a UTF-8 character.
+<p>
+ PCRE_ERROR_PARTIAL (-12)<br>
+ <p>
+The subject string did not match, but it did match partially. See the
+<b>pcrepartial</b> documentation for details of partial matching. <p>
+ PCRE_ERROR_BAD_PARTIAL
+(-13)<br>
+ <p>
+The PCRE_PARTIAL option was used with a compiled pattern containing items
+that are not supported for partial matching. See the <b>pcrepartial</b> documentation
+for details of partial matching. <p>
+ PCRE_ERROR_INTERNAL (-14)<br>
+ <p>
+An unexpected internal error has occurred. This error could be caused by
+a bug in PCRE or by overwriting of the compiled pattern. <p>
+ PCRE_ERROR_BADCOUNT
+(-15)<br>
+ <p>
+This error is given if the value of the <i>ovecsize</i> argument is negative.
+
+<h2><a name='sect17' href='#toc17'>Extracting Captured Substrings by Number</a></h2>
+ <p>
+<b>int pcre_copy_substring(const
+char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
+ <b>int <i>buffersize</i>);</b> <p>
+<br>
+<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b> <b>int <i>stringcount</i>,
+int <i>stringnumber</i>,</b> <b>const char **<i>stringptr</i>);</b> <p>
+<br>
+<b>int pcre_get_substring_list(const char *<i>subject</i>,</b> <b>int *<i>ovector</i>, int <i>stringcount</i>,
+"const char ***<i>listptr</i>);"</b> <p>
+Captured substrings can be accessed directly
+by using the offsets returned by <b>pcre_exec()</b> in <i>ovector</i>. For convenience,
+the functions <b>pcre_copy_substring()</b>, <b>pcre_get_substring()</b>, and <b>pcre_get_substring_list()</b>
+are provided for extracting captured substrings as new, separate, zero-terminated
+strings. These functions identify substrings by number. The next section
+describes functions for extracting named substrings. A substring that contains
+a binary zero is correctly extracted and has a further zero added on the
+end, but the result is not, of course, a C string. <p>
+The first three arguments
+are the same for all three of these functions: <i>subject</i> is the subject string
+that has just been successfully matched, <i>ovector</i> is a pointer to the vector
+of integer offsets that was passed to <b>pcre_exec()</b>, and <i>stringcount</i> is the
+number of substrings that were captured by the match, including the substring
+that matched the entire regular expression. This is the value returned by
+<b>pcre_exec()</b> if it is greater than zero. If <b>pcre_exec()</b> returned zero, indicating
+that it ran out of space in <i>ovector</i>, the value passed as <i>stringcount</i> should
+be the number of elements in the vector divided by three. <p>
+The functions
+<b>pcre_copy_substring()</b> and <b>pcre_get_substring()</b> extract a single substring,
+whose number is given as <i>stringnumber</i>. A value of zero extracts the substring
+that matched the entire pattern, whereas higher values extract the captured
+substrings. For <b>pcre_copy_substring()</b>, the string is placed in <i>buffer</i>, whose
+length is given by <i>buffersize</i>, while for <b>pcre_get_substring()</b> a new block
+of memory is obtained via <b>pcre_malloc</b>, and its address is returned via
+<i>stringptr</i>. The yield of the function is the length of the string, not including
+the terminating zero, or one of <p>
+ PCRE_ERROR_NOMEMORY (-6)<br>
+ <p>
+The buffer was too small for <b>pcre_copy_substring()</b>, or the attempt to
+get memory failed for <b>pcre_get_substring()</b>. <p>
+ PCRE_ERROR_NOSUBSTRING
+(-7)<br>
+ <p>
+There is no substring whose number is <i>stringnumber</i>. <p>
+The <b>pcre_get_substring_list()</b>
+function extracts all available substrings and builds a list of pointers
+to them. All this is done in a single block of memory that is obtained via
+<b>pcre_malloc</b>. The address of the memory block is returned via <i>listptr</i>, which
+is also the start of the list of string pointers. The end of the list is
+marked by a NULL pointer. The yield of the function is zero if all went
+well, or <p>
+ PCRE_ERROR_NOMEMORY (-6)<br>
+ <p>
+if the attempt to get the memory block failed. <p>
+When any of these functions
+encounter a substring that is unset, which can happen when capturing subpattern
+number <i>n+1</i> matches some part of the subject, but subpattern <i>n</i> has not been
+used at all, they return an empty string. This can be distinguished from
+a genuine zero-length substring by inspecting the appropriate offset in
+<i>ovector</i>, which is negative for unset substrings. <p>
+The two convenience functions
+<b>pcre_free_substring()</b> and <b>pcre_free_substring_list()</b> can be used to free
+the memory returned by a previous call of <b>pcre_get_substring()</b> or <b>pcre_get_substring_list()</b>,
+respectively. They do nothing more than call the function pointed to by
+<b>pcre_free</b>, which of course could be called directly from a C program. However,
+PCRE is used in some situations where it is linked via a special interface
+to another programming language which cannot use <b>pcre_free</b> directly; it
+is for these cases that the functions are provided.
+<h2><a name='sect18' href='#toc18'>Extracting Captured
+Substrings by Name</a></h2>
+ <p>
+<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b> <b>const char
+*<i>name</i>);</b> <p>
+<br>
+<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>char *<i>buffer</i>, int
+<i>buffersize</i>);</b> <p>
+<br>
+<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b> <b>const char *<i>subject</i>, int
+*<i>ovector</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>const char **<i>stringptr</i>);</b>
+<p>
+To extract a substring by name, you first have to find associated number.
+For example, for this pattern <p>
+ (a+)b(?&lt;xxx&gt;\d+)...<br>
+ <p>
+the number of the subpattern called "xxx" is 2. You can find the number
+from the name by calling <b>pcre_get_stringnumber()</b>. The first argument is
+the compiled pattern, and the second is the name. The yield of the function
+is the subpattern number, or PCRE_ERROR_NOSUBSTRING (-7) if there is no
+subpattern of that name. <p>
+Given the number, you can extract the substring
+directly, or use one of the functions described in the previous section.
+For convenience, there are also two functions that do the whole job. <p>
+Most
+of the arguments of <i>pcre_copy_named_substring()</i> and <i>pcre_get_named_substring()</i>
+are the same as those for the similarly named functions that extract by
+number. As these are described in the previous section, they are not re-described
+here. There are just two differences: <p>
+First, instead of a substring number,
+a substring name is given. Second, there is an extra argument, given at
+the start, which is a pointer to the compiled pattern. This is needed in
+order to gain access to the name-to-number translation table. <p>
+These functions
+call <b>pcre_get_stringnumber()</b>, and if it succeeds, they then call <i>pcre_copy_substring()</i>
+or <i>pcre_get_substring()</i>, as appropriate. <p>
+ Last updated: 09 September 2004
+<br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Native API</a></li>
+<li><a name='toc2' href='#sect2'>Pcre API Overview</a></li>
+<li><a name='toc3' href='#sect3'>Multithreading</a></li>
+<li><a name='toc4' href='#sect4'>Saving Precompiled Patterns for Later Use</a></li>
+<li><a name='toc5' href='#sect5'>Checking Build-time Options</a></li>
+<li><a name='toc6' href='#sect6'>Compiling a Pattern</a></li>
+<li><a name='toc7' href='#sect7'>Studying a Pattern</a></li>
+<li><a name='toc8' href='#sect8'>Locale Support</a></li>
+<li><a name='toc9' href='#sect9'>Information About a Pattern</a></li>
+<li><a name='toc10' href='#sect10'>Obsolete Info Function</a></li>
+<li><a name='toc11' href='#sect11'>Matching a Pattern</a></li>
+<ul>
+<li><a name='toc12' href='#sect12'>Extra data for pcre_exec()</a></li>
+<li><a name='toc13' href='#sect13'>Option bits for pcre_exec()</a></li>
+<li><a name='toc14' href='#sect14'>The string to be matched by pcre_exec()</a></li>
+<li><a name='toc15' href='#sect15'>How pcre_exec() returns captured substrings</a></li>
+<li><a name='toc16' href='#sect16'>Return values from pcre_exec()</a></li>
+</ul>
+<li><a name='toc17' href='#sect17'>Extracting Captured Substrings by Number</a></li>
+<li><a name='toc18' href='#sect18'>Extracting Captured Substrings by Name</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcrebuild.3.html b/spamfilter/Utilities/PCRE/man/html/pcrebuild.3.html
new file mode 100644
index 0000000..950a1f0
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcrebuild.3.html
@@ -0,0 +1,167 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Build-time Options</a></h2>
+ <p>
+This
+document describes the optional features of PCRE that can be selected when
+the library is compiled. They are all selected, or deselected, by providing
+options to the <b>configure</b> script that is run before the <b>make</b> command. The
+complete list of options for <b>configure</b> (which includes the standard ones
+such as the selection of the installation directory) can be obtained by
+running <p>
+ ./configure --help<br>
+ <p>
+The following sections describe certain options whose names begin with
+--enable or --disable. These settings specify changes to the defaults for the
+<b>configure</b> command. Because of the way that <b>configure</b> works, --enable and --disable
+always come in pairs, so the complementary option always exists as well,
+but as it specifies the default, it is not described.
+<h2><a name='sect2' href='#toc2'>Utf-8 Support</a></h2>
+ <p>
+To build
+PCRE with support for UTF-8 character strings, add <p>
+ --enable-utf8<br>
+ <p>
+to the <b>configure</b> command. Of itself, this does not make PCRE treat strings
+as UTF-8. As well as compiling PCRE with this option, you also have have
+to set the PCRE_UTF8 option when you call the <b>pcre_compile()</b> function.
+
+<h2><a name='sect3' href='#toc3'>Unicode Character Property Support</a></h2>
+ <p>
+UTF-8 support allows PCRE to process
+character values greater than 255 in the strings that it handles. On its
+own, however, it does not provide any facilities for accessing the properties
+of such characters. If you want to be able to use the pattern escapes \P,
+\p, and \X, which refer to Unicode character properties, you must add <p>
+ --enable-unicode-properties<br>
+ <p>
+to the <b>configure</b> command. This implies UTF-8 support, even if you have not
+explicitly requested it. <p>
+Including Unicode property support adds around
+90K of tables to the PCRE library, approximately doubling its size. Only
+the general category properties such as <i>Lu</i> and <i>Nd</i> are supported. Details
+are given in the <b>pcrepattern</b> documentation.
+<h2><a name='sect4' href='#toc4'>Code Value of Newline</a></h2>
+ <p>
+By
+default, PCRE treats character 10 (linefeed) as the newline character. This
+is the normal newline character on Unix-like systems. You can compile PCRE
+to use character 13 (carriage return) instead by adding <p>
+ --enable-newline-is-cr<br>
+ <p>
+to the <b>configure</b> command. For completeness there is also a --enable-newline-is-lf
+option, which explicitly specifies linefeed as the newline character.
+<h2><a name='sect5' href='#toc5'>Building
+Shared and Static Libraries</a></h2>
+ <p>
+The PCRE building process uses <b>libtool</b> to build
+both shared and static Unix libraries by default. You can suppress one of
+these by adding one of <p>
+ --disable-shared<br>
+ --disable-static<br>
+ <p>
+to the <b>configure</b> command, as required.
+<h2><a name='sect6' href='#toc6'>Posix Malloc Usage</a></h2>
+ <p>
+When PCRE is
+called through the POSIX interface (see the <b>pcreposix</b> documentation),
+additional working storage is required for holding the pointers to capturing
+substrings, because PCRE requires three integers per substring, whereas
+the POSIX interface provides only two. If the number of expected substrings
+is small, the wrapper function uses space on the stack, because this is
+faster than using <b>malloc()</b> for each call. The default threshold above which
+the stack is no longer used is 10; it can be changed by adding a setting
+such as <p>
+ --with-posix-malloc-threshold=20<br>
+ <p>
+to the <b>configure</b> command.
+<h2><a name='sect7' href='#toc7'>Limiting Pcre Resource Usage</a></h2>
+ <p>
+Internally, PCRE
+has a function called <b>match()</b>, which it calls repeatedly (possibly recursively)
+when matching a pattern. By controlling the maximum number of times this
+function may be called during a single matching operation, a limit can
+be placed on the resources used by a single call to <b>pcre_exec()</b>. The limit
+can be changed at run time, as described in the <b>pcreapi</b> documentation.
+The default is 10 million, but this can be changed by adding a setting
+such as <p>
+ --with-match-limit=500000<br>
+ <p>
+to the <b>configure</b> command.
+<h2><a name='sect8' href='#toc8'>Handling Very Large Patterns</a></h2>
+ <p>
+Within a compiled
+pattern, offset values are used to point from one part to another (for
+example, from an opening parenthesis to an alternation metacharacter). By
+default, two-byte values are used for these offsets, leading to a maximum
+size for a compiled pattern of around 64K. This is sufficient to handle
+all but the most gigantic patterns. Nevertheless, some people do want to
+process enormous patterns, so it is possible to compile PCRE to use three-byte
+or four-byte offsets by adding a setting such as <p>
+ --with-link-size=3<br>
+ <p>
+to the <b>configure</b> command. The value given must be 2, 3, or 4. Using longer
+offsets slows down the operation of PCRE because it has to load additional
+bytes when handling them. <p>
+If you build PCRE with an increased link size,
+test 2 (and test 5 if you are using UTF-8) will fail. Part of the output
+of these tests is a representation of the compiled pattern, and this changes
+with the link size.
+<h2><a name='sect9' href='#toc9'>Avoiding Excessive Stack Usage</a></h2>
+ <p>
+PCRE implements backtracking
+while matching by making recursive calls to an internal function called
+<b>match()</b>. In environments where the size of the stack is limited, this can
+severely limit PCRE&rsquo;s operation. (The Unix environment does not usually suffer
+from this problem.) An alternative approach that uses memory from the heap
+to remember data, instead of using recursive function calls, has been implemented
+to work round this problem. If you want to build a version of PCRE that
+works this way, add <p>
+ --disable-stack-for-recursion<br>
+ <p>
+to the <b>configure</b> command. With this configuration, PCRE will use the <b>pcre_stack_malloc</b>
+and <b>pcre_stack_free</b> variables to call memory management functions. Separate
+functions are provided because the usage is very predictable: the block
+sizes requested are always the same, and the blocks are always freed in
+reverse order. A calling program might be able to implement optimized functions
+that perform better than the standard <b>malloc()</b> and <b>free()</b> functions. PCRE
+runs noticeably more slowly when built in this way.
+<h2><a name='sect10' href='#toc10'>Using Ebcdic Code</a></h2>
+ <p>
+PCRE
+assumes by default that it will run in an environment where the character
+code is ASCII (or Unicode, which is a superset of ASCII). PCRE can, however,
+be compiled to run in an EBCDIC environment by adding <p>
+ --enable-ebcdic<br>
+ <p>
+to the <b>configure</b> command. <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Build-time Options</a></li>
+<li><a name='toc2' href='#sect2'>Utf-8 Support</a></li>
+<li><a name='toc3' href='#sect3'>Unicode Character Property Support</a></li>
+<li><a name='toc4' href='#sect4'>Code Value of Newline</a></li>
+<li><a name='toc5' href='#sect5'>Building Shared and Static Libraries</a></li>
+<li><a name='toc6' href='#sect6'>Posix Malloc Usage</a></li>
+<li><a name='toc7' href='#sect7'>Limiting Pcre Resource Usage</a></li>
+<li><a name='toc8' href='#sect8'>Handling Very Large Patterns</a></li>
+<li><a name='toc9' href='#sect9'>Avoiding Excessive Stack Usage</a></li>
+<li><a name='toc10' href='#sect10'>Using Ebcdic Code</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcrecallout.3.html b/spamfilter/Utilities/PCRE/man/html/pcrecallout.3.html
new file mode 100644
index 0000000..83e61b2
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcrecallout.3.html
@@ -0,0 +1,148 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Callouts</a></h2>
+ <p>
+<b>int (*pcre_callout)(pcre_callout_block
+*);</b> <p>
+PCRE provides a feature called "callout", which is a means of temporarily
+passing control to the caller of PCRE in the middle of pattern matching.
+The caller of PCRE provides an external function by putting its entry point
+in the global variable <i>pcre_callout</i>. By default, this variable contains
+NULL, which disables all calling out. <p>
+Within a regular expression, (?C)
+indicates the points at which the external function is to be called. Different
+callout points can be identified by putting a number less than 256 after
+the letter C. The default value is zero. For example, this pattern has two
+callout points: <p>
+ (?C1)deabc(?C2)def<br>
+ <p>
+If the PCRE_AUTO_CALLOUT option bit is set when <b>pcre_compile()</b> is called,
+PCRE automatically inserts callouts, all with number 255, before each item
+in the pattern. For example, if PCRE_AUTO_CALLOUT is used with the pattern
+<p>
+ A(\d{2}|--)<br>
+ <p>
+it is processed as if it were <p>
+(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
+<p>
+Notice that there is a callout before and after each parenthesis and alternation
+bar. Automatic callouts can be used for tracking the progress of pattern
+matching. The <b>pcretest</b> command has an option that sets automatic callouts;
+when it is used, the output indicates how the pattern is matched. This is
+useful information when you are trying to optimize the performance of a
+particular pattern.
+<h2><a name='sect2' href='#toc2'>Missing Callouts</a></h2>
+ <p>
+You should be aware that, because
+of optimizations in the way PCRE matches patterns, callouts sometimes do
+not happen. For example, if the pattern is <p>
+ ab(?C4)cd<br>
+ <p>
+PCRE knows that any matching string must contain the letter "d". If the
+subject string is "abyz", the lack of "d" means that matching doesn&rsquo;t ever
+start, and the callout is never reached. However, with "abyd", though the
+result is still no match, the callout is obeyed.
+<h2><a name='sect3' href='#toc3'>the Callout Interface</a></h2>
+
+<p>
+During matching, when PCRE reaches a callout point, the external function
+defined by <i>pcre_callout</i> is called (if it is set). The only argument is a
+pointer to a <b>pcre_callout</b> block. This structure contains the following fields:
+<p>
+ int <i>version</i>;<br>
+ int <i>callout_number</i>;<br>
+ int *<i>offset_vector</i>;<br>
+ const char *<i>subject</i>;<br>
+ int <i>subject_length</i>;<br>
+ int <i>start_match</i>;<br>
+ int <i>current_position</i>;<br>
+ int <i>capture_top</i>;<br>
+ int <i>capture_last</i>;<br>
+ void *<i>callout_data</i>;<br>
+ int <i>pattern_position</i>;<br>
+ int <i>next_item_length</i>;<br>
+ <p>
+The <i>version</i> field is an integer containing the version number of the block
+format. The initial version was 0; the current version is 1. The version
+number will change again in future if additional fields are added, but
+the intention is never to remove any of the existing fields. <p>
+The <i>callout_number</i>
+field contains the number of the callout, as compiled into the pattern
+(that is, the number after ?C for manual callouts, and 255 for automatically
+generated callouts). <p>
+The <i>offset_vector</i> field is a pointer to the vector
+of offsets that was passed by the caller to <b>pcre_exec()</b>. The contents can
+be inspected in order to extract substrings that have been matched so far,
+in the same way as for extracting substrings after a match has completed.
+<p>
+The <i>subject</i> and <i>subject_length</i> fields contain copies of the values that
+were passed to <b>pcre_exec()</b>. <p>
+The <i>start_match</i> field contains the offset within
+the subject at which the current match attempt started. If the pattern is
+not anchored, the callout function may be called several times from the
+same point in the pattern for different starting points in the subject.
+<p>
+The <i>current_position</i> field contains the offset within the subject of the
+current match pointer. <p>
+The <i>capture_top</i> field contains one more than the
+number of the highest numbered captured substring so far. If no substrings
+have been captured, the value of <i>capture_top</i> is one. <p>
+The <i>capture_last</i> field
+contains the number of the most recently captured substring. If no substrings
+have been captured, its value is -1. <p>
+The <i>callout_data</i> field contains a value
+that is passed to <b>pcre_exec()</b> by the caller specifically so that it can
+be passed back in callouts. It is passed in the <i>pcre_callout</i> field of the
+<b>pcre_extra</b> data structure. If no such data was passed, the value of <i>callout_data</i>
+in a <b>pcre_callout</b> block is NULL. There is a description of the <b>pcre_extra</b>
+structure in the <b>pcreapi</b> documentation. <p>
+The <i>pattern_position</i> field is
+present from version 1 of the <i>pcre_callout</i> structure. It contains the offset
+to the next item to be matched in the pattern string. <p>
+The <i>next_item_length</i>
+field is present from version 1 of the <i>pcre_callout</i> structure. It contains
+the length of the next item to be matched in the pattern string. When the
+callout immediately precedes an alternation bar, a closing parenthesis,
+or the end of the pattern, the length is zero. When the callout precedes
+an opening parenthesis, the length is that of the entire subpattern. <p>
+The
+<i>pattern_position</i> and <i>next_item_length</i> fields are intended to help in distinguishing
+between different automatic callouts, which all have the same callout number.
+However, they are set for all callouts.
+<h2><a name='sect4' href='#toc4'>Return Values</a></h2>
+ <p>
+The external callout
+function returns an integer to PCRE. If the value is zero, matching proceeds
+as normal. If the value is greater than zero, matching fails at the current
+point, but backtracking to test other matching possibilities goes ahead,
+just as if a lookahead assertion had failed. If the value is less than zero,
+the match is abandoned, and <b>pcre_exec()</b> returns the negative value. <p>
+Negative
+values should normally be chosen from the set of PCRE_ERROR_xxx values.
+In particular, PCRE_ERROR_NOMATCH forces a standard "no match" failure.
+The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions;
+it will never be used by PCRE itself. <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Callouts</a></li>
+<li><a name='toc2' href='#sect2'>Missing Callouts</a></li>
+<li><a name='toc3' href='#sect3'>the Callout Interface</a></li>
+<li><a name='toc4' href='#sect4'>Return Values</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcrecompat.3.html b/spamfilter/Utilities/PCRE/man/html/pcrecompat.3.html
new file mode 100644
index 0000000..af67000
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcrecompat.3.html
@@ -0,0 +1,115 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Differences Between Pcre and
+Perl</a></h2>
+ <p>
+This document describes the differences in the ways that PCRE and
+Perl handle regular expressions. The differences described here are with
+respect to Perl 5.8. <p>
+1. PCRE does not have full UTF-8 support. Details of what
+it does have are given in the section on UTF-8 support in the main <b>pcre</b>
+ page. <p>
+2. PCRE does not allow repeat quantifiers on lookahead assertions.
+Perl permits them, but they do not mean what you might think. For example,
+(?!a){3} does not assert that the next three characters are not "a". It
+just asserts that the next character is not "a" three times. <p>
+3. Capturing
+subpatterns that occur inside negative lookahead assertions are counted,
+but their entries in the offsets vector are never set. Perl sets its numerical
+variables from any such patterns that are matched before the assertion
+fails to match something (thereby succeeding), but only if the negative
+lookahead assertion contains just one branch. <p>
+4. Though binary zero characters
+are supported in the subject string, they are not allowed in a pattern
+string because it is passed as a normal C string, terminated by zero. The
+escape sequence \0 can be used in the pattern to represent a binary zero.
+<p>
+5. The following Perl escape sequences are not supported: \l, \u, \L, \U, and
+\N. In fact these are implemented by Perl&rsquo;s general string-handling and are
+not part of its pattern matching engine. If any of these are encountered
+by PCRE, an error is generated. <p>
+6. The Perl escape sequences \p, \P, and \X
+are supported only if PCRE is built with Unicode character property support.
+The properties that can be tested with \p and \P are limited to the general
+category properties such as Lu and Nd. <p>
+7. PCRE does support the \Q...\E escape
+for quoting substrings. Characters in between are treated as literals. This
+is slightly different from Perl in that $ and @ are also handled as literals
+inside the quotes. In Perl, they cause variable interpolation (but of course
+PCRE does not have variables). Note the following examples: <p>
+ Pattern
+ PCRE matches Perl matches<br>
+ <p>
+ \Qabc$xyz\E abc$xyz abc followed by the<br>
+ contents of $xyz<br>
+ \Qabc\$xyz\E abc\$xyz abc\$xyz<br>
+ \Qabc\E\$\Qxyz\E abc$xyz abc$xyz<br>
+ <p>
+The \Q...\E sequence is recognized both inside and outside character classes.
+<p>
+8. Fairly obviously, PCRE does not support the (?{code}) and (?p{code})
+constructions. However, there is support for recursive patterns using the
+non-Perl items (?R), (?number), and (?P&gt;name). Also, the PCRE "callout" feature
+allows an external function to be called during pattern matching. See the
+ <b>pcrecallout</b> documentation for details. <p>
+9. There are some differences that
+are concerned with the settings of captured strings when part of a pattern
+is repeated. For example, matching "aba" against the pattern /^(a(b)?)+$/
+in Perl leaves $2 unset, but in PCRE it is set to "b". <p>
+10. PCRE provides
+some extensions to the Perl regular expression facilities: <p>
+(a) Although
+lookbehind assertions must match fixed length strings, each alternative
+branch of a lookbehind assertion can match a different length of string.
+Perl requires them all to have the same length. <p>
+(b) If PCRE_DOLLAR_ENDONLY
+is set and PCRE_MULTILINE is not set, the $ meta-character matches only
+at the very end of the string. <p>
+(c) If PCRE_EXTRA is set, a backslash followed
+by a letter with no special meaning is faulted. <p>
+(d) If PCRE_UNGREEDY is
+set, the greediness of the repetition quantifiers is inverted, that is,
+by default they are not greedy, but if followed by a question mark they
+are. <p>
+(e) PCRE_ANCHORED can be used at matching time to force a pattern to
+be tried only at the first matching position in the subject string. <p>
+(f)
+The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, and PCRE_NO_AUTO_CAPTURE options
+for <b>pcre_exec()</b> have no Perl equivalents. <p>
+(g) The (?R), (?number), and (?P&gt;name)
+constructs allows for recursive pattern matching (Perl can do this using
+the (?p{code}) construct, which PCRE cannot support.) <p>
+(h) PCRE supports
+named capturing substrings, using the Python syntax. <p>
+(i) PCRE supports the
+possessive quantifier "++" syntax, taken from Sun&rsquo;s Java package. <p>
+(j) The
+(R) condition, for testing recursion, is a PCRE extension. <p>
+(k) The callout
+facility is PCRE-specific. <p>
+(l) The partial matching facility is PCRE-specific.
+<p>
+(m) Patterns compiled by PCRE can be saved and re-used at a later time,
+even on different hosts that have the other endianness. <p>
+ Last updated: 09
+September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Differences Between Pcre and Perl</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcregrep.1.html b/spamfilter/Utilities/PCRE/man/html/pcregrep.1.html
new file mode 100644
index 0000000..726e8b7
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcregrep.1.html
@@ -0,0 +1,147 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCREGREP(1) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+pcregrep - a grep with Perl-compatible regular expressions.
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+<b>pcregrep
+[-Vcfhilnrsuvx] [long options] [pattern] [file1 file2 ...]</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+<b>pcregrep</b>
+searches files for character patterns, in the same way as other grep commands
+do, but it uses the PCRE regular expression library to support patterns
+that are compatible with the regular expressions of Perl 5. See <b>pcrepattern</b>
+ for a full description of syntax and semantics of the regular expressions
+that PCRE supports. <p>
+A pattern must be specified on the command line unless
+the <b>-f</b> option is used (see below). <p>
+If no files are specified, <b>pcregrep</b> reads
+the standard input. By default, each line that matches the pattern is copied
+to the standard output, and if there is more than one file, the file name
+is printed before each line of output. However, there are options that can
+change how <b>pcregrep</b> behaves. <p>
+Lines are limited to BUFSIZ characters. BUFSIZ
+is defined in <b>&lt;stdio.h&gt;</b>. The newline character is removed from the end of each
+line before it is matched against the pattern.
+<h2><a name='sect3' href='#toc3'>Options</a></h2>
+ <p>
+
+<dl>
+
+<dt><b>-V</b> </dt>
+<dd>Write the version
+number of the PCRE library being used to the standard error stream. </dd>
+
+<dt><b>-c</b> </dt>
+<dd>Do
+not print individual lines; instead just print a count of the number of
+lines that would otherwise have been printed. If several files are given,
+a count is printed for each of them. </dd>
+
+<dt><b>-f</b><i>filename</i> </dt>
+<dd>Read a number of patterns
+from the file, one per line, and match all of them against each line of
+input. A line is output if any of the patterns match it. When <b>-f</b> is used,
+no pattern is taken from the command line; all arguments are treated as
+file names. There is a maximum of 100 patterns. Trailing white space is removed,
+and blank lines are ignored. An empty file contains no patterns and therefore
+matches nothing. </dd>
+
+<dt><b>-h</b> </dt>
+<dd>Suppress printing of filenames when searching multiple
+files. </dd>
+
+<dt><b>-i</b> </dt>
+<dd>Ignore upper/lower case distinctions during comparisons. </dd>
+
+<dt><b>-l</b> </dt>
+<dd>Instead
+of printing lines from the files, just print the names of the files containing
+lines that would have been printed. Each file name is printed once, on a
+separate line. </dd>
+
+<dt><b>-n</b> </dt>
+<dd>Precede each line by its line number in the file. </dd>
+
+<dt><b>-r</b> </dt>
+<dd>If any
+file is a directory, recursively scan the files it contains. Without <b>-r</b> a
+directory is scanned as a normal file. </dd>
+
+<dt><b>-s</b> </dt>
+<dd>Work silently, that is, display
+nothing except error messages. The exit status indicates whether any matches
+were found. </dd>
+
+<dt><b>-u</b> </dt>
+<dd>Operate in UTF-8 mode. This option is available only if PCRE
+has been compiled with UTF-8 support. Both the pattern and each subject line
+must be valid strings of UTF-8 characters. </dd>
+
+<dt><b>-v</b> </dt>
+<dd>Invert the sense of the match,
+so that lines which do <i>not</i> match the pattern are now the ones that are
+found. </dd>
+
+<dt><b>-x</b> </dt>
+<dd>Force the pattern to be anchored (it must start matching at the
+beginning of the line) and in addition, require it to match the entire
+line. This is equivalent to having ^ and $ characters at the start and end
+of each alternative branch in the regular expression. </dd>
+</dl>
+
+<h2><a name='sect4' href='#toc4'>Long Options</a></h2>
+ <p>
+Long
+forms of all the options are available, as in GNU grep. They are shown in
+the following table: <p>
+ -c --count<br>
+ -h --no-filename<br>
+ -i --ignore-case<br>
+ -l --files-with-matches<br>
+ -n --line-number<br>
+ -r --recursive<br>
+ -s --no-messages<br>
+ -u --utf-8<br>
+ -V --version<br>
+ -v --invert-match<br>
+ -x --line-regex<br>
+ -x --line-regexp<br>
+ <p>
+In addition, --file=<i>filename</i> is equivalent to -f<i>filename</i>, and --help shows
+the list of options and then exits.
+<h2><a name='sect5' href='#toc5'>Diagnostics</a></h2>
+ <p>
+Exit status is 0 if any
+matches were found, 1 if no matches were found, and 2 for syntax errors
+or inacessible files (even if matches were found).
+<h2><a name='sect6' href='#toc6'>Author</a></h2>
+ <p>
+Philip Hazel
+&lt;ph10@cam.ac.uk&gt; <br>
+University Computing Service <br>
+Cambridge CB2 3QG, England. <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+<li><a name='toc3' href='#sect3'>Options</a></li>
+<li><a name='toc4' href='#sect4'>Long Options</a></li>
+<li><a name='toc5' href='#sect5'>Diagnostics</a></li>
+<li><a name='toc6' href='#sect6'>Author</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcrepattern.3.html b/spamfilter/Utilities/PCRE/man/html/pcrepattern.3.html
new file mode 100644
index 0000000..11bb198
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcrepattern.3.html
@@ -0,0 +1,1268 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Regular Expression Details</a></h2>
+
+<p>
+The syntax and semantics of the regular expressions supported by PCRE are
+described below. Regular expressions are also described in the Perl documentation
+and in a number of books, some of which have copious examples. Jeffrey Friedl&rsquo;s
+"Mastering Regular Expressions", published by O&rsquo;Reilly, covers regular expressions
+in great detail. This description of PCRE&rsquo;s regular expressions is intended
+as reference material. <p>
+The original operation of PCRE was on strings of
+one-byte characters. However, there is now also support for UTF-8 character
+strings. To use this, you must build PCRE to include UTF-8 support, and then
+call <b>pcre_compile()</b> with the PCRE_UTF8 option. How this affects pattern
+matching is mentioned in several places below. There is also a summary of
+UTF-8 features in the section on UTF-8 support in the main <b>pcre</b> page.
+<p>
+A regular expression is a pattern that is matched against a subject string
+from left to right. Most characters stand for themselves in a pattern, and
+match the corresponding characters in the subject. As a trivial example,
+the pattern <p>
+ The quick brown fox<br>
+ <p>
+matches a portion of a subject string that is identical to itself. The
+power of regular expressions comes from the ability to include alternatives
+and repetitions in the pattern. These are encoded in the pattern by the
+use of <i>metacharacters</i>, which do not stand for themselves but instead are
+interpreted in some special way. <p>
+There are two different sets of metacharacters:
+those that are recognized anywhere in the pattern except within square
+brackets, and those that are recognized in square brackets. Outside square
+brackets, the metacharacters are as follows: <p>
+ \ general escape character
+with several uses<br>
+ ^ assert start of string (or line, in multiline mode)<br>
+ $ assert end of string (or line, in multiline mode)<br>
+ . match any character except newline (by default)<br>
+ [ start character class definition<br>
+ | start of alternative branch<br>
+ ( start subpattern<br>
+ ) end subpattern<br>
+ ? extends the meaning of (<br>
+ also 0 or 1 quantifier<br>
+ also quantifier minimizer<br>
+ * 0 or more quantifier<br>
+ + 1 or more quantifier<br>
+ also "possessive quantifier"<br>
+ { start min/max quantifier<br>
+ <p>
+Part of a pattern that is in square brackets is called a "character class".
+In a character class the only metacharacters are: <p>
+ \ general escape
+character<br>
+ ^ negate the class, but only if the first character<br>
+ - indicates character range<br>
+ [ POSIX character class (only if followed by POSIX<br>
+ syntax)<br>
+ ] terminates the character class<br>
+ <p>
+The following sections describe the use of each of the metacharacters.
+
+<h2><a name='sect2' href='#toc2'>Backslash</a></h2>
+ <p>
+The backslash character has several uses. Firstly, if it is followed
+by a non-alphanumeric character, it takes away any special meaning that
+character may have. This use of backslash as an escape character applies
+both inside and outside character classes. <p>
+For example, if you want to match
+a * character, you write \* in the pattern. This escaping action applies
+whether or not the following character would otherwise be interpreted as
+a metacharacter, so it is always safe to precede a non-alphanumeric with
+backslash to specify that it stands for itself. In particular, if you want
+to match a backslash, you write \\. <p>
+If a pattern is compiled with the PCRE_EXTENDED
+option, whitespace in the pattern (other than in a character class) and
+characters between a # outside a character class and the next newline character
+are ignored. An escaping backslash can be used to include a whitespace or
+# character as part of the pattern. <p>
+If you want to remove the special meaning
+from a sequence of characters, you can do so by putting them between \Q
+and \E. This is different from Perl in that $ and @ are handled as literals
+in \Q...\E sequences in PCRE, whereas in Perl, $ and @ cause variable interpolation.
+Note the following examples: <p>
+ Pattern PCRE matches Perl matches<br>
+ <p>
+ \Qabc$xyz\E abc$xyz abc followed by the<br>
+ contents of $xyz<br>
+ \Qabc\$xyz\E abc\$xyz abc\$xyz<br>
+ \Qabc\E\$\Qxyz\E abc$xyz abc$xyz<br>
+ <p>
+The \Q...\E sequence is recognized both inside and outside character classes.
+
+<h3><a name='sect3' href='#toc3'>Non-printing characters</a></h3>
+ <p>
+A second use of backslash provides a way of encoding
+non-printing characters in patterns in a visible manner. There is no restriction
+on the appearance of non-printing characters, apart from the binary zero
+that terminates a pattern, but when a pattern is being prepared by text
+editing, it is usually easier to use one of the following escape sequences
+than the binary character it represents: <p>
+ \a alarm, that is, the
+BEL character (hex 07)<br>
+ \cx "control-x", where x is any character<br>
+ \e escape (hex 1B)<br>
+ \f formfeed (hex 0C)<br>
+ \n newline (hex 0A)<br>
+ \r carriage return (hex 0D)<br>
+ \t tab (hex 09)<br>
+ \ddd character with octal code ddd, or backreference<br>
+ \xhh character with hex code hh<br>
+ \x{hhh..} character with hex code hhh... (UTF-8 mode only)<br>
+ <p>
+The precise effect of \cx is as follows: if x is a lower case letter, it
+is converted to upper case. Then bit 6 of the character (hex 40) is inverted.
+Thus \cz becomes hex 1A, but \c{ becomes hex 3B, while \c; becomes hex 7B.
+<p>
+After \x, from zero to two hexadecimal digits are read (letters can be in
+upper or lower case). In UTF-8 mode, any number of hexadecimal digits may
+appear between \x{ and }, but the value of the character code must be less
+than 2**31 (that is, the maximum hexadecimal value is 7FFFFFFF). If characters
+other than hexadecimal digits appear between \x{ and }, or if there is no
+terminating }, this form of escape is not recognized. Instead, the initial
+\x will be interpreted as a basic hexadecimal escape, with no following
+digits, giving a character whose value is zero. <p>
+Characters whose value is
+less than 256 can be defined by either of the two syntaxes for \x when PCRE
+is in UTF-8 mode. There is no difference in the way they are handled. For
+example, \xdc is exactly the same as \x{dc}. <p>
+After \0 up to two further octal
+digits are read. In both cases, if there are fewer than two digits, just
+those that are present are used. Thus the sequence \0\x\07 specifies two binary
+zeros followed by a BEL character (code value 7). Make sure you supply two
+digits after the initial zero if the pattern character that follows is
+itself an octal digit. <p>
+The handling of a backslash followed by a digit other
+than 0 is complicated. Outside a character class, PCRE reads it and any
+following digits as a decimal number. If the number is less than 10, or
+if there have been at least that many previous capturing left parentheses
+in the expression, the entire sequence is taken as a <i>back reference</i>. A description
+of how this works is given later, following the discussion of parenthesized
+subpatterns. <p>
+Inside a character class, or if the decimal number is greater
+than 9 and there have not been that many capturing subpatterns, PCRE re-reads
+up to three octal digits following the backslash, and generates a single
+byte from the least significant 8 bits of the value. Any subsequent digits
+stand for themselves. For example: <p>
+ \040 is another way of writing a space<br>
+ \40 is the same, provided there are fewer than 40<br>
+ previous capturing subpatterns<br>
+ \7 is always a back reference<br>
+ \11 might be a back reference, or another way of<br>
+ writing a tab<br>
+ \011 is always a tab<br>
+ \0113 is a tab followed by the character "3"<br>
+ \113 might be a back reference, otherwise the<br>
+ character with octal code 113<br>
+ \377 might be a back reference, otherwise<br>
+ the byte consisting entirely of 1 bits<br>
+ \81 is either a back reference, or a binary zero<br>
+ followed by the two characters "8" and "1"<br>
+ <p>
+Note that octal values of 100 or greater must not be introduced by a leading
+zero, because no more than three octal digits are ever read. <p>
+All the sequences
+that define a single byte value or a single UTF-8 character (in UTF-8 mode)
+can be used both inside and outside character classes. In addition, inside
+a character class, the sequence \b is interpreted as the backspace character
+(hex 08), and the sequence \X is interpreted as the character "X". Outside
+a character class, these sequences have different meanings (see below).
+
+<h3><a name='sect4' href='#toc4'>Generic character types</a></h3>
+ <p>
+The third use of backslash is for specifying
+generic character types. The following are always recognized: <p>
+ \d any
+decimal digit<br>
+ \D any character that is not a decimal digit<br>
+ \s any whitespace character<br>
+ \S any character that is not a whitespace character<br>
+ \w any "word" character<br>
+ \W any "non-word" character<br>
+ <p>
+Each pair of escape sequences partitions the complete set of characters
+into two disjoint sets. Any given character matches one, and only one, of
+each pair. <p>
+These character type sequences can appear both inside and outside
+character classes. They each match one character of the appropriate type.
+If the current matching point is at the end of the subject string, all
+of them fail, since there is no character to match. <p>
+For compatibility with
+Perl, \s does not match the VT character (code 11). This makes it different
+from the the POSIX "space" class. The \s characters are <a href='HT.9.html'>HT (9)</a>
+, LF (10),
+FF (12), CR (13), and space (32). <p>
+A "word" character is an underscore or
+any character less than 256 that is a letter or digit. The definition of
+letters and digits is controlled by PCRE&rsquo;s low-valued character tables, and
+may vary if locale-specific matching is taking place (see "Locale support"
+ in the <b>pcreapi</b> page). For example, in the "fr_FR" (French) locale, some
+character codes greater than 128 are used for accented letters, and these
+are matched by \w. <p>
+In UTF-8 mode, characters with values greater than 128
+never match \d, \s, or \w, and always match \D, \S, and \W. This is true even
+when Unicode character property support is available.
+<h3><a name='sect5' href='#toc5'>Unicode character
+properties</a></h3>
+ <p>
+When PCRE is built with Unicode character property support,
+three additional escape sequences to match generic character types are
+available when UTF-8 mode is selected. They are: <p>
+ \p{<i>xx</i>} a character with
+the <i>xx</i> property<br>
+ \P{<i>xx</i>} a character without the <i>xx</i> property<br>
+ \X an extended Unicode sequence<br>
+ <p>
+The property names represented by <i>xx</i> above are limited to the Unicode
+general category properties. Each character has exactly one such property,
+specified by a two-letter abbreviation. For compatibility with Perl, negation
+can be specified by including a circumflex between the opening brace and
+the property name. For example, \p{^Lu} is the same as \P{Lu}. <p>
+If only one letter
+is specified with \p or \P, it includes all the properties that start with
+that letter. In this case, in the absence of negation, the curly brackets
+in the escape sequence are optional; these two examples have the same effect:
+<p>
+ \p{L}<br>
+ \pL<br>
+ <p>
+The following property codes are supported: <p>
+ C Other<br>
+ Cc Control<br>
+ Cf Format<br>
+ Cn Unassigned<br>
+ Co Private use<br>
+ Cs Surrogate<br>
+ <p>
+ L Letter<br>
+ Ll Lower case letter<br>
+ Lm Modifier letter<br>
+ Lo Other letter<br>
+ Lt Title case letter<br>
+ Lu Upper case letter<br>
+ <p>
+ M Mark<br>
+ Mc Spacing mark<br>
+ Me Enclosing mark<br>
+ Mn Non-spacing mark<br>
+ <p>
+ N Number<br>
+ Nd Decimal number<br>
+ Nl Letter number<br>
+ No Other number<br>
+ <p>
+ P Punctuation<br>
+ Pc Connector punctuation<br>
+ Pd Dash punctuation<br>
+ Pe Close punctuation<br>
+ Pf Final punctuation<br>
+ Pi Initial punctuation<br>
+ Po Other punctuation<br>
+ Ps Open punctuation<br>
+ <p>
+ S Symbol<br>
+ Sc Currency symbol<br>
+ Sk Modifier symbol<br>
+ Sm Mathematical symbol<br>
+ So Other symbol<br>
+ <p>
+ Z Separator<br>
+ Zl Line separator<br>
+ Zp Paragraph separator<br>
+ Zs Space separator<br>
+ <p>
+Extended properties such as "Greek" or "InMusicalSymbols" are not supported
+by PCRE. <p>
+Specifying caseless matching does not affect these escape sequences.
+For example, \p{Lu} always matches only upper case letters. <p>
+The \X escape
+matches any number of Unicode characters that form an extended Unicode
+sequence. \X is equivalent to <p>
+ (?&gt;\PM\pM*)<br>
+ <p>
+That is, it matches a character without the "mark" property, followed
+by zero or more characters with the "mark" property, and treats the sequence
+as an atomic group (see below). Characters with the "mark" property are
+typically accents that affect the preceding character. <p>
+Matching characters
+by Unicode property is not fast, because PCRE has to search a structure
+that contains data for over fifteen thousand characters. That is why the
+traditional escape sequences such as \d and \w do not use Unicode properties
+in PCRE.
+<h3><a name='sect6' href='#toc6'>Simple assertions</a></h3>
+ <p>
+The fourth use of backslash is for certain
+simple assertions. An assertion specifies a condition that has to be met
+at a particular point in a match, without consuming any characters from
+the subject string. The use of subpatterns for more complicated assertions
+is described below. The backslashed assertions are: <p>
+ \b matches at
+a word boundary<br>
+ \B matches when not at a word boundary<br>
+ \A matches at start of subject<br>
+ \Z matches at end of subject or before newline at end<br>
+ \z matches at end of subject<br>
+ \G matches at first matching position in subject<br>
+ <p>
+These assertions may not appear in character classes (but note that \b
+has a different meaning, namely the backspace character, inside a character
+class). <p>
+A word boundary is a position in the subject string where the current
+character and the previous character do not both match \w or \W (i.e. one matches
+\w and the other matches \W), or the start or end of the string if the first
+or last character matches \w, respectively. <p>
+The \A, \Z, and \z assertions differ
+from the traditional circumflex and dollar (described in the next section)
+in that they only ever match at the very start and end of the subject string,
+whatever options are set. Thus, they are independent of multiline mode. These
+three assertions are not affected by the PCRE_NOTBOL or PCRE_NOTEOL options,
+which affect only the behaviour of the circumflex and dollar metacharacters.
+However, if the <i>startoffset</i> argument of <b>pcre_exec()</b> is non-zero, indicating
+that matching is to start at a point other than the beginning of the subject,
+\A can never match. The difference between \Z and \z is that \Z matches before
+a newline that is the last character of the string as well as at the end
+of the string, whereas \z matches only at the end. <p>
+The \G assertion is true
+only when the current matching position is at the start point of the match,
+as specified by the <i>startoffset</i> argument of <b>pcre_exec()</b>. It differs from
+\A when the value of <i>startoffset</i> is non-zero. By calling <b>pcre_exec()</b> multiple
+times with appropriate arguments, you can mimic Perl&rsquo;s /g option, and it
+is in this kind of implementation where \G can be useful. <p>
+Note, however,
+that PCRE&rsquo;s interpretation of \G, as the start of the current match, is subtly
+different from Perl&rsquo;s, which defines it as the end of the previous match.
+In Perl, these can be different when the previously matched string was
+empty. Because PCRE does just one match at a time, it cannot reproduce this
+behaviour. <p>
+If all the alternatives of a pattern begin with \G, the expression
+is anchored to the starting match position, and the "anchored" flag is
+set in the compiled regular expression.
+<h2><a name='sect7' href='#toc7'>Circumflex and Dollar</a></h2>
+ <p>
+Outside
+a character class, in the default matching mode, the circumflex character
+is an assertion that is true only if the current matching point is at the
+start of the subject string. If the <i>startoffset</i> argument of <b>pcre_exec()</b>
+is non-zero, circumflex can never match if the PCRE_MULTILINE option is
+unset. Inside a character class, circumflex has an entirely different meaning
+ (see below). <p>
+Circumflex need not be the first character of the pattern
+if a number of alternatives are involved, but it should be the first thing
+in each alternative in which it appears if the pattern is ever to match
+that branch. If all possible alternatives start with a circumflex, that
+is, if the pattern is constrained to match only at the start of the subject,
+it is said to be an "anchored" pattern. (There are also other constructs
+that can cause a pattern to be anchored.) <p>
+A dollar character is an assertion
+that is true only if the current matching point is at the end of the subject
+string, or immediately before a newline character that is the last character
+in the string (by default). Dollar need not be the last character of the
+pattern if a number of alternatives are involved, but it should be the
+last item in any branch in which it appears. Dollar has no special meaning
+in a character class. <p>
+The meaning of dollar can be changed so that it matches
+only at the very end of the string, by setting the PCRE_DOLLAR_ENDONLY
+option at compile time. This does not affect the \Z assertion. <p>
+The meanings
+of the circumflex and dollar characters are changed if the PCRE_MULTILINE
+option is set. When this is the case, they match immediately after and immediately
+before an internal newline character, respectively, in addition to matching
+at the start and end of the subject string. For example, the pattern /^abc$/
+matches the subject string "def\nabc" (where \n represents a newline character)
+in multiline mode, but not otherwise. Consequently, patterns that are anchored
+in single line mode because all branches start with ^ are not anchored in
+multiline mode, and a match for circumflex is possible when the <i>startoffset</i>
+argument of <b>pcre_exec()</b> is non-zero. The PCRE_DOLLAR_ENDONLY option is ignored
+if PCRE_MULTILINE is set. <p>
+Note that the sequences \A, \Z, and \z can be used
+to match the start and end of the subject in both modes, and if all branches
+of a pattern start with \A it is always anchored, whether PCRE_MULTILINE
+is set or not.
+<h2><a name='sect8' href='#toc8'>Full Stop (period, Dot)</a></h2>
+ <p>
+Outside a character class, a dot
+in the pattern matches any one character in the subject, including a non-printing
+character, but not (by default) newline. In UTF-8 mode, a dot matches any
+UTF-8 character, which might be more than one byte long, except (by default)
+newline. If the PCRE_DOTALL option is set, dots match newlines as well. The
+handling of dot is entirely independent of the handling of circumflex and
+dollar, the only relationship being that they both involve newline characters.
+Dot has no special meaning in a character class.
+<h2><a name='sect9' href='#toc9'>Matching a Single Byte</a></h2>
+
+<p>
+Outside a character class, the escape sequence \C matches any one byte,
+both in and out of UTF-8 mode. Unlike a dot, it can match a newline. The feature
+is provided in Perl in order to match individual bytes in UTF-8 mode. Because
+it breaks up UTF-8 characters into individual bytes, what remains in the
+string may be a malformed UTF-8 string. For this reason, the \C escape sequence
+is best avoided. <p>
+PCRE does not allow \C to appear in lookbehind assertions
+ (described below), because in UTF-8 mode this would make it impossible
+to calculate the length of the lookbehind.
+<h2><a name='sect10' href='#toc10'>Square Brackets and Character
+Classes</a></h2>
+ <p>
+An opening square bracket introduces a character class, terminated
+by a closing square bracket. A closing square bracket on its own is not
+special. If a closing square bracket is required as a member of the class,
+it should be the first data character in the class (after an initial circumflex,
+if present) or escaped with a backslash. <p>
+A character class matches a single
+character in the subject. In UTF-8 mode, the character may occupy more than
+one byte. A matched character must be in the set of characters defined by
+the class, unless the first character in the class definition is a circumflex,
+in which case the subject character must not be in the set defined by the
+class. If a circumflex is actually required as a member of the class, ensure
+it is not the first character, or escape it with a backslash. <p>
+For example,
+the character class [aeiou] matches any lower case vowel, while [^aeiou]
+matches any character that is not a lower case vowel. Note that a circumflex
+is just a convenient notation for specifying the characters that are in
+the class by enumerating those that are not. A class that starts with a
+circumflex is not an assertion: it still consumes a character from the
+subject string, and therefore it fails if the current pointer is at the
+end of the string. <p>
+In UTF-8 mode, characters with values greater than 255
+can be included in a class as a literal string of bytes, or by using the
+\x{ escaping mechanism. <p>
+When caseless matching is set, any letters in a class
+represent both their upper case and lower case versions, so for example,
+a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does
+not match "A", whereas a caseful version would. When running in UTF-8 mode,
+PCRE supports the concept of case for characters with values greater than
+128 only when it is compiled with Unicode property support. <p>
+The newline
+character is never treated in any special way in character classes, whatever
+the setting of the PCRE_DOTALL or PCRE_MULTILINE options is. A class such
+as [^a] will always match a newline. <p>
+The minus (hyphen) character can be
+used to specify a range of characters in a character class. For example,
+[d-m] matches any letter between d and m, inclusive. If a minus character
+is required in a class, it must be escaped with a backslash or appear in
+a position where it cannot be interpreted as indicating a range, typically
+as the first or last character in the class. <p>
+It is not possible to have
+the literal character "]" as the end character of a range. A pattern such
+as [W-]46] is interpreted as a class of two characters ("W" and "-") followed
+by a literal string "46]", so it would match "W46]" or "-46]". However, if
+the "]" is escaped with a backslash it is interpreted as the end of range,
+so [W-\]46] is interpreted as a class containing a range followed by two
+other characters. The octal or hexadecimal representation of "]" can also
+be used to end a range. <p>
+Ranges operate in the collating sequence of character
+values. They can also be used for characters specified numerically, for
+example [\000-\037]. In UTF-8 mode, ranges can include characters whose values
+are greater than 255, for example [\x{100}-\x{2ff}]. <p>
+If a range that includes
+letters is used when caseless matching is set, it matches the letters in
+either case. For example, [W-c] is equivalent to [][\\^_&lsquo;wxyzabc], matched caselessly,
+and in non-UTF-8 mode, if character tables for the "fr_FR" locale are in
+use, [\xc8-\xcb] matches accented E characters in both cases. In UTF-8 mode,
+PCRE supports the concept of case for characters with values greater than
+128 only when it is compiled with Unicode property support. <p>
+The character
+types \d, \D, \p, \P, \s, \S, \w, and \W may also appear in a character class,
+and add the characters that they match to the class. For example, [\dABCDEF]
+matches any hexadecimal digit. A circumflex can conveniently be used with
+the upper case character types to specify a more restricted set of characters
+than the matching lower case type. For example, the class [^\W_] matches any
+letter or digit, but not underscore. <p>
+The only metacharacters that are recognized
+in character classes are backslash, hyphen (only where it can be interpreted
+as specifying a range), circumflex (only at the start), opening square
+bracket (only when it can be interpreted as introducing a POSIX class name
+- see the next section), and the terminating closing square bracket. However,
+escaping other non-alphanumeric characters does no harm.
+<h2><a name='sect11' href='#toc11'>Posix Character
+Classes</a></h2>
+ <p>
+Perl supports the POSIX notation for character classes. This uses
+names enclosed by [: and :] within the enclosing square brackets. PCRE also
+supports this notation. For example, <p>
+ [01[:alpha:]%]<br>
+ <p>
+matches "0", "1", any alphabetic character, or "%". The supported class
+names are <p>
+ alnum letters and digits<br>
+ alpha letters<br>
+ ascii character codes 0 - 127<br>
+ blank space or tab only<br>
+ cntrl control characters<br>
+ digit decimal digits (same as \d)<br>
+ graph printing characters, excluding space<br>
+ lower lower case letters<br>
+ print printing characters, including space<br>
+ punct printing characters, excluding letters and digits<br>
+ space white space (not quite the same as \s)<br>
+ upper upper case letters<br>
+ word "word" characters (same as \w)<br>
+ xdigit hexadecimal digits<br>
+ <p>
+The "space" characters are <a href='HT.9.html'>HT (9)</a>
+, LF (10), VT (11), FF (12), CR (13),
+and space (32). Notice that this list includes the VT character (code 11).
+This makes "space" different to \s, which does not include VT (for Perl
+compatibility). <p>
+The name "word" is a Perl extension, and "blank" is a GNU
+extension from Perl 5.8. Another Perl extension is negation, which is indicated
+by a ^ character after the colon. For example, <p>
+ [12[:^digit:]]<br>
+ <p>
+matches "1", "2", or any non-digit. PCRE (and Perl) also recognize the POSIX
+syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are
+not supported, and an error is given if they are encountered. <p>
+In UTF-8 mode,
+characters with values greater than 128 do not match any of the POSIX character
+classes.
+<h2><a name='sect12' href='#toc12'>Vertical Bar</a></h2>
+ <p>
+Vertical bar characters are used to separate alternative
+patterns. For example, the pattern <p>
+ gilbert|sullivan<br>
+ <p>
+matches either "gilbert" or "sullivan". Any number of alternatives may
+appear, and an empty alternative is permitted (matching the empty string).
+The matching process tries each alternative in turn, from left to right,
+and the first one that succeeds is used. If the alternatives are within
+a subpattern (defined below), "succeeds" means matching the rest of
+the main pattern as well as the alternative in the subpattern.
+<h2><a name='sect13' href='#toc13'>Internal
+Option Setting</a></h2>
+ <p>
+The settings of the PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL,
+and PCRE_EXTENDED options can be changed from within the pattern by a sequence
+of Perl option letters enclosed between "(?" and ")". The option letters
+are <p>
+ i for PCRE_CASELESS<br>
+ m for PCRE_MULTILINE<br>
+ s for PCRE_DOTALL<br>
+ x for PCRE_EXTENDED<br>
+ <p>
+For example, (?im) sets caseless, multiline matching. It is also possible
+to unset these options by preceding the letter with a hyphen, and a combined
+setting and unsetting such as (?im-sx), which sets PCRE_CASELESS and PCRE_MULTILINE
+while unsetting PCRE_DOTALL and PCRE_EXTENDED, is also permitted. If a letter
+appears both before and after the hyphen, the option is unset. <p>
+When an option
+change occurs at top level (that is, not inside subpattern parentheses),
+the change applies to the remainder of the pattern that follows. If the
+change is placed right at the start of a pattern, PCRE extracts it into
+the global options (and it will therefore show up in data extracted by
+the <b>pcre_fullinfo()</b> function). <p>
+An option change within a subpattern affects
+only that part of the current pattern that follows it, so <p>
+ (a(?i)b)c<br>
+ <p>
+matches abc and aBc and no other strings (assuming PCRE_CASELESS is not
+used). By this means, options can be made to have different settings in
+different parts of the pattern. Any changes made in one alternative do carry
+on into subsequent branches within the same subpattern. For example, <p>
+ (a(?i)b|c)<br>
+ <p>
+matches "ab", "aB", "c", and "C", even though when matching "C" the first
+branch is abandoned before the option setting. This is because the effects
+of option settings happen at compile time. There would be some very weird
+behaviour otherwise. <p>
+The PCRE-specific options PCRE_UNGREEDY and PCRE_EXTRA
+can be changed in the same way as the Perl-compatible options by using the
+characters U and X respectively. The (?X) flag setting is special in that
+it must always occur earlier in the pattern than any of the additional
+features it turns on, even when it is at top level. It is best to put it
+at the start.
+<h2><a name='sect14' href='#toc14'>Subpatterns</a></h2>
+ <p>
+Subpatterns are delimited by parentheses (round
+brackets), which can be nested. Turning part of a pattern into a subpattern
+does two things: <p>
+1. It localizes a set of alternatives. For example, the
+pattern <p>
+ cat(aract|erpillar|)<br>
+ <p>
+matches one of the words "cat", "cataract", or "caterpillar". Without the
+parentheses, it would match "cataract", "erpillar" or the empty string.
+<p>
+2. It sets up the subpattern as a capturing subpattern. This means that,
+when the whole pattern matches, that portion of the subject string that
+matched the subpattern is passed back to the caller via the <i>ovector</i> argument
+of <b>pcre_exec()</b>. Opening parentheses are counted from left to right (starting
+from 1) to obtain numbers for the capturing subpatterns. <p>
+For example, if
+the string "the red king" is matched against the pattern <p>
+ the ((red|white)
+(king|queen))<br>
+ <p>
+the captured substrings are "red king", "red", and "king", and are numbered
+1, 2, and 3, respectively. <p>
+The fact that plain parentheses fulfil two functions
+is not always helpful. There are often times when a grouping subpattern
+is required without a capturing requirement. If an opening parenthesis is
+followed by a question mark and a colon, the subpattern does not do any
+capturing, and is not counted when computing the number of any subsequent
+capturing subpatterns. For example, if the string "the white queen" is matched
+against the pattern <p>
+ the ((?:red|white) (king|queen))<br>
+ <p>
+the captured substrings are "white queen" and "queen", and are numbered
+1 and 2. The maximum number of capturing subpatterns is 65535, and the maximum
+depth of nesting of all subpatterns, both capturing and non-capturing, is
+200. <p>
+As a convenient shorthand, if any option settings are required at the
+start of a non-capturing subpattern, the option letters may appear between
+the "?" and the ":". Thus the two patterns <p>
+ (?i:saturday|sunday)<br>
+ (?:(?i)saturday|sunday)<br>
+ <p>
+match exactly the same set of strings. Because alternative branches are
+tried from left to right, and options are not reset until the end of the
+subpattern is reached, an option setting in one branch does affect subsequent
+branches, so the above patterns match "SUNDAY" as well as "Saturday".
+
+<h2><a name='sect15' href='#toc15'>Named Subpatterns</a></h2>
+ <p>
+Identifying capturing parentheses by number is simple,
+but it can be very hard to keep track of the numbers in complicated regular
+expressions. Furthermore, if an expression is modified, the numbers may
+change. To help with this difficulty, PCRE supports the naming of subpatterns,
+something that Perl does not provide. The Python syntax (?P&lt;name&gt;...) is used.
+Names consist of alphanumeric characters and underscores, and must be unique
+within a pattern. <p>
+Named capturing parentheses are still allocated numbers
+as well as names. The PCRE API provides function calls for extracting the
+name-to-number translation table from a compiled pattern. There is also a
+convenience function for extracting a captured substring by name. For further
+details see the <b>pcreapi</b> documentation.
+<h2><a name='sect16' href='#toc16'>Repetition</a></h2>
+ <p>
+Repetition is specified
+by quantifiers, which can follow any of the following items: <p>
+ a literal
+data character<br>
+ the . metacharacter<br>
+ the \C escape sequence<br>
+ the \X escape sequence (in UTF-8 mode with Unicode properties)<br>
+ an escape such as \d that matches a single character<br>
+ a character class<br>
+ a back reference (see next section)<br>
+ a parenthesized subpattern (unless it is an assertion)<br>
+ <p>
+The general repetition quantifier specifies a minimum and maximum number
+of permitted matches, by giving the two numbers in curly brackets (braces),
+separated by a comma. The numbers must be less than 65536, and the first
+must be less than or equal to the second. For example: <p>
+ z{2,4}<br>
+ <p>
+matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special
+character. If the second number is omitted, but the comma is present, there
+is no upper limit; if the second number and the comma are both omitted,
+the quantifier specifies an exact number of required matches. Thus <p>
+ [aeiou]{3,}<br>
+ <p>
+matches at least 3 successive vowels, but may match many more, while <p>
+
+ \d{8}<br>
+ <p>
+matches exactly 8 digits. An opening curly bracket that appears in a position
+where a quantifier is not allowed, or one that does not match the syntax
+of a quantifier, is taken as a literal character. For example, {,6} is not
+a quantifier, but a literal string of four characters. <p>
+In UTF-8 mode, quantifiers
+apply to UTF-8 characters rather than to individual bytes. Thus, for example,
+\x{100}{2} matches two UTF-8 characters, each of which is represented by
+a two-byte sequence. Similarly, when Unicode property support is available,
+\X{3} matches three Unicode extended sequences, each of which may be several
+bytes long (and they may be of different lengths). <p>
+The quantifier {0} is
+permitted, causing the expression to behave as if the previous item and
+the quantifier were not present. <p>
+For convenience (and historical compatibility)
+the three most common quantifiers have single-character abbreviations: <p>
+
+ * is equivalent to {0,}<br>
+ + is equivalent to {1,}<br>
+ ? is equivalent to {0,1}<br>
+ <p>
+It is possible to construct infinite loops by following a subpattern that
+can match no characters with a quantifier that has no upper limit, for
+example: <p>
+ (a?)*<br>
+ <p>
+Earlier versions of Perl and PCRE used to give an error at compile time
+for such patterns. However, because there are cases where this can be useful,
+such patterns are now accepted, but if any repetition of the subpattern
+does in fact match no characters, the loop is forcibly broken. <p>
+By default,
+the quantifiers are "greedy", that is, they match as much as possible (up
+to the maximum number of permitted times), without causing the rest of
+the pattern to fail. The classic example of where this gives problems is
+in trying to match comments in C programs. These appear between /* and */
+and within the comment, individual * and / characters may appear. An attempt
+to match C comments by applying the pattern <p>
+ /\*.*\*/<br>
+ <p>
+to the string <p>
+ /* first comment */ not comment /* second comment */<br>
+ <p>
+fails, because it matches the entire string owing to the greediness of
+the .* item. <p>
+However, if a quantifier is followed by a question mark, it
+ceases to be greedy, and instead matches the minimum number of times possible,
+so the pattern <p>
+ /\*.*?\*/<br>
+ <p>
+does the right thing with the C comments. The meaning of the various quantifiers
+is not otherwise changed, just the preferred number of matches. Do not confuse
+this use of question mark with its use as a quantifier in its own right.
+Because it has two uses, it can sometimes appear doubled, as in <p>
+ \d??\d<br>
+ <p>
+which matches one digit by preference, but can match two if that is the
+only way the rest of the pattern matches. <p>
+If the PCRE_UNGREEDY option is
+set (an option which is not available in Perl), the quantifiers are not
+greedy by default, but individual ones can be made greedy by following
+them with a question mark. In other words, it inverts the default behaviour.
+<p>
+When a parenthesized subpattern is quantified with a minimum repeat count
+that is greater than 1 or with a limited maximum, more memory is required
+for the compiled pattern, in proportion to the size of the minimum or maximum.
+<p>
+If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equivalent
+to Perl&rsquo;s /s) is set, thus allowing the . to match newlines, the pattern
+is implicitly anchored, because whatever follows will be tried against
+every character position in the subject string, so there is no point in
+retrying the overall match at any position after the first. PCRE normally
+treats such a pattern as though it were preceded by \A. <p>
+In cases where it
+is known that the subject string contains no newlines, it is worth setting
+PCRE_DOTALL in order to obtain this optimization, or alternatively using
+^ to indicate anchoring explicitly. <p>
+However, there is one situation where
+the optimization cannot be used. When .* is inside capturing parentheses
+that are the subject of a backreference elsewhere in the pattern, a match
+at the start may fail, and a later one succeed. Consider, for example: <p>
+
+ (.*)abc\1<br>
+ <p>
+If the subject is "xyz123abc123" the match point is the fourth character.
+For this reason, such a pattern is not implicitly anchored. <p>
+When a capturing
+subpattern is repeated, the value captured is the substring that matched
+the final iteration. For example, after <p>
+ (tweedle[dume]{3}\s*)+<br>
+ <p>
+has matched "tweedledum tweedledee" the value of the captured substring
+is "tweedledee". However, if there are nested capturing subpatterns, the
+corresponding captured values may have been set in previous iterations.
+For example, after <p>
+ /(a|(b))+/<br>
+ <p>
+matches "aba" the value of the second captured substring is "b".
+<h2><a name='sect17' href='#toc17'>Atomic
+Grouping and Possessive Quantifiers</a></h2>
+ <p>
+With both maximizing and minimizing
+repetition, failure of what follows normally causes the repeated item to
+be re-evaluated to see if a different number of repeats allows the rest
+of the pattern to match. Sometimes it is useful to prevent this, either
+to change the nature of the match, or to cause it fail earlier than it
+otherwise might, when the author of the pattern knows there is no point
+in carrying on. <p>
+Consider, for example, the pattern \d+foo when applied to
+the subject line <p>
+ 123456bar<br>
+ <p>
+After matching all 6 digits and then failing to match "foo", the normal
+action of the matcher is to try again with only 5 digits matching the \d+
+item, and then with 4, and so on, before ultimately failing. "Atomic grouping"
+(a term taken from Jeffrey Friedl&rsquo;s book) provides the means for specifying
+that once a subpattern has matched, it is not to be re-evaluated in this
+way. <p>
+If we use atomic grouping for the previous example, the matcher would
+give up immediately on failing to match "foo" the first time. The notation
+is a kind of special parenthesis, starting with (?&gt; as in this example:
+<p>
+ (?&gt;\d+)foo<br>
+ <p>
+This kind of parenthesis "locks up" the part of the pattern it contains
+once it has matched, and a failure further into the pattern is prevented
+from backtracking into it. Backtracking past it to previous items, however,
+works as normal. <p>
+An alternative description is that a subpattern of this
+type matches the string of characters that an identical standalone pattern
+would match, if anchored at the current point in the subject string. <p>
+Atomic
+grouping subpatterns are not capturing subpatterns. Simple cases such as
+the above example can be thought of as a maximizing repeat that must swallow
+everything it can. So, while both \d+ and \d+? are prepared to adjust the
+number of digits they match in order to make the rest of the pattern match,
+(?&gt;\d+) can only match an entire sequence of digits. <p>
+Atomic groups in general
+can of course contain arbitrarily complicated subpatterns, and can be nested.
+However, when the subpattern for an atomic group is just a single repeated
+item, as in the example above, a simpler notation, called a "possessive
+quantifier" can be used. This consists of an additional + character following
+a quantifier. Using this notation, the previous example can be rewritten
+as <p>
+ \d++foo<br>
+ <p>
+Possessive quantifiers are always greedy; the setting of the PCRE_UNGREEDY
+option is ignored. They are a convenient notation for the simpler forms
+of atomic group. However, there is no difference in the meaning or processing
+of a possessive quantifier and the equivalent atomic group. <p>
+The possessive
+quantifier syntax is an extension to the Perl syntax. It originates in Sun&rsquo;s
+Java package. <p>
+When a pattern contains an unlimited repeat inside a subpattern
+that can itself be repeated an unlimited number of times, the use of an
+atomic group is the only way to avoid some failing matches taking a very
+long time indeed. The pattern <p>
+ (\D+|&lt;\d+&gt;)*[!?]<br>
+ <p>
+matches an unlimited number of substrings that either consist of non-digits,
+or digits enclosed in &lt;&gt;, followed by either ! or ?. When it matches, it runs
+quickly. However, if it is applied to <p>
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa<br>
+ <p>
+it takes a long time before reporting failure. This is because the string
+can be divided between the internal \D+ repeat and the external * repeat
+in a large number of ways, and all have to be tried. (The example uses [!?]
+rather than a single character at the end, because both PCRE and Perl have
+an optimization that allows for fast failure when a single character is
+used. They remember the last single character that is required for a match,
+and fail early if it is not present in the string.) If the pattern is changed
+so that it uses an atomic group, like this: <p>
+ ((?&gt;\D+)|&lt;\d+&gt;)*[!?]<br>
+ <p>
+sequences of non-digits cannot be broken, and failure happens quickly.
+
+<h2><a name='sect18' href='#toc18'>Back References</a></h2>
+ <p>
+Outside a character class, a backslash followed by a
+digit greater than 0 (and possibly further digits) is a back reference
+to a capturing subpattern earlier (that is, to its left) in the pattern,
+provided there have been that many previous capturing left parentheses.
+<p>
+However, if the decimal number following the backslash is less than 10,
+it is always taken as a back reference, and causes an error only if there
+are not that many capturing left parentheses in the entire pattern. In other
+words, the parentheses that are referenced need not be to the left of the
+reference for numbers less than 10. See the subsection entitled "Non-printing
+characters" above for further details of the handling of digits following
+a backslash. <p>
+A back reference matches whatever actually matched the capturing
+subpattern in the current subject string, rather than anything matching
+the subpattern itself (see "Subpatterns as subroutines" below for a
+way of doing that). So the pattern <p>
+ (sens|respons)e and \1ibility<br>
+ <p>
+matches "sense and sensibility" and "response and responsibility", but
+not "sense and responsibility". If caseful matching is in force at the time
+of the back reference, the case of letters is relevant. For example, <p>
+ ((?i)rah)\s+\1<br>
+ <p>
+matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original
+capturing subpattern is matched caselessly. <p>
+Back references to named subpatterns
+use the Python syntax (?P=name). We could rewrite the above example as follows:
+<p>
+ (?&lt;p1&gt;(?i)rah)\s+(?P=p1)<br>
+ <p>
+There may be more than one back reference to the same subpattern. If a
+subpattern has not actually been used in a particular match, any back references
+to it always fail. For example, the pattern <p>
+ (a|(bc))\2<br>
+ <p>
+always fails if it starts to match "a" rather than "bc". Because there
+may be many capturing parentheses in a pattern, all digits following the
+backslash are taken as part of a potential back reference number. If the
+pattern continues with a digit character, some delimiter must be used to
+terminate the back reference. If the PCRE_EXTENDED option is set, this can
+be whitespace. Otherwise an empty comment (see "Comments" below) can
+be used. <p>
+A back reference that occurs inside the parentheses to which it
+refers fails when the subpattern is first used, so, for example, (a\1) never
+matches. However, such references can be useful inside repeated subpatterns.
+For example, the pattern <p>
+ (a|b\1)+<br>
+ <p>
+matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration
+of the subpattern, the back reference matches the character string corresponding
+to the previous iteration. In order for this to work, the pattern must be
+such that the first iteration does not need to match the back reference.
+This can be done using alternation, as in the example above, or by a quantifier
+with a minimum of zero.
+<h2><a name='sect19' href='#toc19'>Assertions</a></h2>
+ <p>
+An assertion is a test on the characters
+following or preceding the current matching point that does not actually
+consume any characters. The simple assertions coded as \b, \B, \A, \G, \Z, \z,
+^ and $ are described above. <p>
+More complicated assertions are coded as
+subpatterns. There are two kinds: those that look ahead of the current position
+in the subject string, and those that look behind it. An assertion subpattern
+is matched in the normal way, except that it does not cause the current
+matching position to be changed. <p>
+Assertion subpatterns are not capturing
+subpatterns, and may not be repeated, because it makes no sense to assert
+the same thing several times. If any kind of assertion contains capturing
+subpatterns within it, these are counted for the purposes of numbering
+the capturing subpatterns in the whole pattern. However, substring capturing
+is carried out only for positive assertions, because it does not make sense
+for negative assertions.
+<h3><a name='sect20' href='#toc20'>Lookahead assertions</a></h3>
+ <p>
+Lookahead assertions start
+with (?= for positive assertions and (?! for negative assertions. For example,
+<p>
+ \w+(?=;)<br>
+ <p>
+matches a word followed by a semicolon, but does not include the semicolon
+in the match, and <p>
+ foo(?!bar)<br>
+ <p>
+matches any occurrence of "foo" that is not followed by "bar". Note that
+the apparently similar pattern <p>
+ (?!foo)bar<br>
+ <p>
+does not find an occurrence of "bar" that is preceded by something other
+than "foo"; it finds any occurrence of "bar" whatsoever, because the assertion
+(?!foo) is always true when the next three characters are "bar". A lookbehind
+assertion is needed to achieve the other effect. <p>
+If you want to force a
+matching failure at some point in a pattern, the most convenient way to
+do it is with (?!) because an empty string always matches, so an assertion
+that requires there not to be an empty string must always fail.
+<h3><a name='sect21' href='#toc21'>Lookbehind
+assertions</a></h3>
+ <p>
+Lookbehind assertions start with (?&lt;= for positive assertions
+and (?&lt;! for negative assertions. For example, <p>
+ (?&lt;!foo)bar<br>
+ <p>
+does find an occurrence of "bar" that is not preceded by "foo". The contents
+of a lookbehind assertion are restricted such that all the strings it matches
+must have a fixed length. However, if there are several alternatives, they
+do not all have to have the same fixed length. Thus <p>
+ (?&lt;=bullock|donkey)<br>
+ <p>
+is permitted, but <p>
+ (?&lt;!dogs?|cats?)<br>
+ <p>
+causes an error at compile time. Branches that match different length strings
+are permitted only at the top level of a lookbehind assertion. This is an
+extension compared with Perl (at least for 5.8), which requires all branches
+to match the same length of string. An assertion such as <p>
+ (?&lt;=ab(c|de))<br>
+ <p>
+is not permitted, because its single top-level branch can match two different
+lengths, but it is acceptable if rewritten to use two top-level branches:
+<p>
+ (?&lt;=abc|abde)<br>
+ <p>
+The implementation of lookbehind assertions is, for each alternative,
+to temporarily move the current position back by the fixed width and then
+try to match. If there are insufficient characters before the current position,
+the match is deemed to fail. <p>
+PCRE does not allow the \C escape (which matches
+a single byte in UTF-8 mode) to appear in lookbehind assertions, because
+it makes it impossible to calculate the length of the lookbehind. The \X
+escape, which can match different numbers of bytes, is also not permitted.
+<p>
+Atomic groups can be used in conjunction with lookbehind assertions to
+specify efficient matching at the end of the subject string. Consider a
+simple pattern such as <p>
+ abcd$<br>
+ <p>
+when applied to a long string that does not match. Because matching proceeds
+from left to right, PCRE will look for each "a" in the subject and then
+see if what follows matches the rest of the pattern. If the pattern is specified
+as <p>
+ ^.*abcd$<br>
+ <p>
+the initial .* matches the entire string at first, but when this fails
+(because there is no following "a"), it backtracks to match all but the
+last character, then all but the last two characters, and so on. Once again
+the search for "a" covers the entire string, from right to left, so we
+are no better off. However, if the pattern is written as <p>
+ ^(?&gt;.*)(?&lt;=abcd)<br>
+ <p>
+or, equivalently, using the possessive quantifier syntax, <p>
+ ^.*+(?&lt;=abcd)<br>
+ <p>
+there can be no backtracking for the .* item; it can match only the entire
+string. The subsequent lookbehind assertion does a single test on the last
+four characters. If it fails, the match fails immediately. For long strings,
+this approach makes a significant difference to the processing time.
+<h3><a name='sect22' href='#toc22'>Using
+multiple assertions</a></h3>
+ <p>
+Several assertions (of any sort) may occur in succession.
+For example, <p>
+ (?&lt;=\d{3})(?&lt;!999)foo<br>
+ <p>
+matches "foo" preceded by three digits that are not "999". Notice that
+each of the assertions is applied independently at the same point in the
+subject string. First there is a check that the previous three characters
+are all digits, and then there is a check that the same three characters
+are not "999". This pattern does <i>not</i> match "foo" preceded by six characters,
+the first of which are digits and the last three of which are not "999".
+For example, it doesn&rsquo;t match "123abcfoo". A pattern to do that is <p>
+ (?&lt;=\d{3}...)(?&lt;!999)foo<br>
+ <p>
+This time the first assertion looks at the preceding six characters, checking
+that the first three are digits, and then the second assertion checks that
+the preceding three characters are not "999". <p>
+Assertions can be nested in
+any combination. For example, <p>
+ (?&lt;=(?&lt;!foo)bar)baz<br>
+ <p>
+matches an occurrence of "baz" that is preceded by "bar" which in turn
+is not preceded by "foo", while <p>
+ (?&lt;=\d{3}(?!999)...)foo<br>
+ <p>
+is another pattern that matches "foo" preceded by three digits and any
+three characters that are not "999".
+<h2><a name='sect23' href='#toc23'>Conditional Subpatterns</a></h2>
+ <p>
+It is possible
+to cause the matching process to obey a subpattern conditionally or to
+choose between two alternative subpatterns, depending on the result of
+an assertion, or whether a previous capturing subpattern matched or not.
+The two possible forms of conditional subpattern are <p>
+ (?(condition)yes-pattern)<br>
+ (?(condition)yes-pattern|no-pattern)<br>
+ <p>
+If the condition is satisfied, the yes-pattern is used; otherwise the no-pattern
+(if present) is used. If there are more than two alternatives in the subpattern,
+a compile-time error occurs. <p>
+There are three kinds of condition. If the text
+between the parentheses consists of a sequence of digits, the condition
+is satisfied if the capturing subpattern of that number has previously
+matched. The number must be greater than zero. Consider the following pattern,
+which contains non-significant white space to make it more readable (assume
+the PCRE_EXTENDED option) and to divide it into three parts for ease of
+discussion: <p>
+ ( \( )? [^()]+ (?(1) \) )<br>
+ <p>
+The first part matches an optional opening parenthesis, and if that character
+is present, sets it as the first captured substring. The second part matches
+one or more characters that are not parentheses. The third part is a conditional
+subpattern that tests whether the first set of parentheses matched or not.
+If they did, that is, if subject started with an opening parenthesis, the
+condition is true, and so the yes-pattern is executed and a closing parenthesis
+is required. Otherwise, since no-pattern is not present, the subpattern matches
+nothing. In other words, this pattern matches a sequence of non-parentheses,
+optionally enclosed in parentheses. <p>
+If the condition is the string (R),
+it is satisfied if a recursive call to the pattern or subpattern has been
+made. At "top level", the condition is false. This is a PCRE extension. Recursive
+patterns are described in the next section. <p>
+If the condition is not a sequence
+of digits or (R), it must be an assertion. This may be a positive or negative
+lookahead or lookbehind assertion. Consider this pattern, again containing
+non-significant white space, and with the two alternatives on the second
+line: <p>
+ (?(?=[^a-z]*[a-z])<br>
+ \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} )<br>
+ <p>
+The condition is a positive lookahead assertion that matches an optional
+sequence of non-letters followed by a letter. In other words, it tests for
+the presence of at least one letter in the subject. If a letter is found,
+the subject is matched against the first alternative; otherwise it is matched
+against the second. This pattern matches strings in one of the two forms
+dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
+<h2><a name='sect24' href='#toc24'>Comments</a></h2>
+
+<p>
+The sequence (?# marks the start of a comment that continues up to the
+next closing parenthesis. Nested parentheses are not permitted. The characters
+that make up a comment play no part in the pattern matching at all. <p>
+If the
+PCRE_EXTENDED option is set, an unescaped # character outside a character
+class introduces a comment that continues up to the next newline character
+in the pattern.
+<h2><a name='sect25' href='#toc25'>Recursive Patterns</a></h2>
+ <p>
+Consider the problem of matching a
+string in parentheses, allowing for unlimited nested parentheses. Without
+the use of recursion, the best that can be done is to use a pattern that
+matches up to some fixed depth of nesting. It is not possible to handle
+an arbitrary nesting depth. Perl provides a facility that allows regular
+expressions to recurse (amongst other things). It does this by interpolating
+Perl code in the expression at run time, and the code can refer to the
+expression itself. A Perl pattern to solve the parentheses problem can be
+created like this: <p>
+ $re = qr{\( (?: (?&gt;[^()]+) | (?p{$re}) )* \)}x;<br>
+ <p>
+The (?p{...}) item interpolates Perl code at run time, and in this case refers
+recursively to the pattern in which it appears. Obviously, PCRE cannot support
+the interpolation of Perl code. Instead, it supports some special syntax
+for recursion of the entire pattern, and also for individual subpattern
+recursion. <p>
+The special item that consists of (? followed by a number greater
+than zero and a closing parenthesis is a recursive call of the subpattern
+of the given number, provided that it occurs inside that subpattern. (If
+not, it is a "subroutine" call, which is described in the next section.)
+The special item (?R) is a recursive call of the entire regular expression.
+<p>
+For example, this PCRE pattern solves the nested parentheses problem (assume
+the PCRE_EXTENDED option is set so that white space is ignored): <p>
+ \( (
+(?&gt;[^()]+) | (?R) )* \)<br>
+ <p>
+First it matches an opening parenthesis. Then it matches any number of
+substrings which can either be a sequence of non-parentheses, or a recursive
+match of the pattern itself (that is a correctly parenthesized substring).
+Finally there is a closing parenthesis. <p>
+If this were part of a larger pattern,
+you would not want to recurse the entire pattern, so instead you could
+use this: <p>
+ ( \( ( (?&gt;[^()]+) | (?1) )* \) )<br>
+ <p>
+We have put the pattern into parentheses, and caused the recursion to
+refer to them instead of the whole pattern. In a larger pattern, keeping
+track of parenthesis numbers can be tricky. It may be more convenient to
+use named parentheses instead. For this, PCRE uses (?P&gt;name), which is an
+extension to the Python syntax that PCRE uses for named parentheses (Perl
+does not provide named parentheses). We could rewrite the above example
+as follows: <p>
+ (?P&lt;pn&gt; \( ( (?&gt;[^()]+) | (?P&gt;pn) )* \) )<br>
+ <p>
+This particular example pattern contains nested unlimited repeats, and
+so the use of atomic grouping for matching strings of non-parentheses is
+important when applying the pattern to strings that do not match. For example,
+when this pattern is applied to <p>
+ (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()<br>
+ <p>
+it yields "no match" quickly. However, if atomic grouping is not used,
+the match runs for a very long time indeed because there are so many different
+ways the + and * repeats can carve up the subject, and all have to be tested
+before failure can be reported. <p>
+At the end of a match, the values set for
+any capturing subpatterns are those from the outermost level of the recursion
+at which the subpattern value is set. If you want to obtain intermediate
+values, a callout function can be used (see the next section and the <b>pcrecallout</b>
+ documentation). If the pattern above is matched against <p>
+ (ab(cd)ef)<br>
+ <p>
+the value for the capturing parentheses is "ef", which is the last value
+taken on at the top level. If additional parentheses are added, giving <p>
+
+ \( ( ( (?&gt;[^()]+) | (?R) )* ) \)<br>
+ ^ ^<br>
+ ^ ^<br>
+ <p>
+the string they capture is "ab(cd)ef", the contents of the top level parentheses.
+If there are more than 15 capturing parentheses in a pattern, PCRE has
+to obtain extra memory to store data during a recursion, which it does
+by using <b>pcre_malloc</b>, freeing it via <b>pcre_free</b> afterwards. If no memory
+can be obtained, the match fails with the PCRE_ERROR_NOMEMORY error. <p>
+Do
+not confuse the (?R) item with the condition (R), which tests for recursion.
+Consider this pattern, which matches text in angle brackets, allowing for
+arbitrary nesting. Only digits are allowed in nested brackets (that is,
+when recursing), whereas any characters are permitted at the outer level.
+<p>
+ &lt; (?: (?(R) \d++ | [^&lt;&gt;]*+) | (?R)) * &gt;<br>
+ <p>
+In this pattern, (?(R) is the start of a conditional subpattern, with
+two different alternatives for the recursive and non-recursive cases. The
+(?R) item is the actual recursive call.
+<h2><a name='sect26' href='#toc26'>Subpatterns As Subroutines</a></h2>
+ <p>
+If
+the syntax for a recursive subpattern reference (either by number or by
+name) is used outside the parentheses to which it refers, it operates like
+a subroutine in a programming language. An earlier example pointed out that
+the pattern <p>
+ (sens|respons)e and \1ibility<br>
+ <p>
+matches "sense and sensibility" and "response and responsibility", but
+not "sense and responsibility". If instead the pattern <p>
+ (sens|respons)e
+and (?1)ibility<br>
+ <p>
+is used, it does match "sense and responsibility" as well as the other
+two strings. Such references must, however, follow the subpattern to which
+they refer.
+<h2><a name='sect27' href='#toc27'>Callouts</a></h2>
+ <p>
+Perl has a feature whereby using the sequence (?{...})
+causes arbitrary Perl code to be obeyed in the middle of matching a regular
+expression. This makes it possible, amongst other things, to extract different
+substrings that match the same pair of parentheses when there is a repetition.
+<p>
+PCRE provides a similar feature, but of course it cannot obey arbitrary
+Perl code. The feature is called "callout". The caller of PCRE provides an
+external function by putting its entry point in the global variable <i>pcre_callout</i>.
+By default, this variable contains NULL, which disables all calling out.
+<p>
+Within a regular expression, (?C) indicates the points at which the external
+function is to be called. If you want to identify different callout points,
+you can put a number less than 256 after the letter C. The default value
+is zero. For example, this pattern has two callout points: <p>
+ (?C1)dabc(?C2)def<br>
+ <p>
+If the PCRE_AUTO_CALLOUT flag is passed to <b>pcre_compile()</b>, callouts are
+automatically installed before each item in the pattern. They are all numbered
+255. <p>
+During matching, when PCRE reaches a callout point (and <i>pcre_callout</i>
+is set), the external function is called. It is provided with the number
+of the callout, the position in the pattern, and, optionally, one item
+of data originally supplied by the caller of <b>pcre_exec()</b>. The callout function
+may cause matching to proceed, to backtrack, or to fail altogether. A complete
+description of the interface to the callout function is given in the <b>pcrecallout</b>
+ documentation. <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Regular Expression Details</a></li>
+<li><a name='toc2' href='#sect2'>Backslash</a></li>
+<ul>
+<li><a name='toc3' href='#sect3'>Non-printing characters</a></li>
+<li><a name='toc4' href='#sect4'>Generic character types</a></li>
+<li><a name='toc5' href='#sect5'>Unicode character properties</a></li>
+<li><a name='toc6' href='#sect6'>Simple assertions</a></li>
+</ul>
+<li><a name='toc7' href='#sect7'>Circumflex and Dollar</a></li>
+<li><a name='toc8' href='#sect8'>Full Stop (period, Dot)</a></li>
+<li><a name='toc9' href='#sect9'>Matching a Single Byte</a></li>
+<li><a name='toc10' href='#sect10'>Square Brackets and Character Classes</a></li>
+<li><a name='toc11' href='#sect11'>Posix Character Classes</a></li>
+<li><a name='toc12' href='#sect12'>Vertical Bar</a></li>
+<li><a name='toc13' href='#sect13'>Internal Option Setting</a></li>
+<li><a name='toc14' href='#sect14'>Subpatterns</a></li>
+<li><a name='toc15' href='#sect15'>Named Subpatterns</a></li>
+<li><a name='toc16' href='#sect16'>Repetition</a></li>
+<li><a name='toc17' href='#sect17'>Atomic Grouping and Possessive Quantifiers</a></li>
+<li><a name='toc18' href='#sect18'>Back References</a></li>
+<li><a name='toc19' href='#sect19'>Assertions</a></li>
+<ul>
+<li><a name='toc20' href='#sect20'>Lookahead assertions</a></li>
+<li><a name='toc21' href='#sect21'>Lookbehind assertions</a></li>
+<li><a name='toc22' href='#sect22'>Using multiple assertions</a></li>
+</ul>
+<li><a name='toc23' href='#sect23'>Conditional Subpatterns</a></li>
+<li><a name='toc24' href='#sect24'>Comments</a></li>
+<li><a name='toc25' href='#sect25'>Recursive Patterns</a></li>
+<li><a name='toc26' href='#sect26'>Subpatterns As Subroutines</a></li>
+<li><a name='toc27' href='#sect27'>Callouts</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcreperform.3.html b/spamfilter/Utilities/PCRE/man/html/pcreperform.3.html
new file mode 100644
index 0000000..a4fea50
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcreperform.3.html
@@ -0,0 +1,86 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Performance</a></h2>
+ <p>
+Certain items
+that may appear in regular expression patterns are more efficient than
+others. It is more efficient to use a character class like [aeiou] than
+a set of alternatives such as (a|e|i|o|u). In general, the simplest construction
+that provides the required behaviour is usually the most efficient. Jeffrey
+Friedl&rsquo;s book contains a lot of useful general discussion about optimizing
+regular expressions for efficient performance. This document contains a
+few observations about PCRE. <p>
+Using Unicode character properties (the \p,
+\P, and \X escapes) is slow, because PCRE has to scan a structure that contains
+data for over fifteen thousand characters whenever it needs a character&rsquo;s
+property. If you can find an alternative pattern that does not use character
+properties, it will probably be faster. <p>
+When a pattern begins with .* not
+in parentheses, or in parentheses that are not the subject of a backreference,
+and the PCRE_DOTALL option is set, the pattern is implicitly anchored by
+PCRE, since it can match only at the start of a subject string. However,
+if PCRE_DOTALL is not set, PCRE cannot make this optimization, because
+the . metacharacter does not then match a newline, and if the subject string
+contains newlines, the pattern may match from the character immediately
+following one of them instead of from the very start. For example, the pattern
+<p>
+ .*second<br>
+ <p>
+matches the subject "first\nand second" (where \n stands for a newline character),
+with the match starting at the seventh character. In order to do this, PCRE
+has to retry the match starting after every newline in the subject. <p>
+If you
+are using such a pattern with subject strings that do not contain newlines,
+the best performance is obtained by setting PCRE_DOTALL, or starting the
+pattern with ^.* to indicate explicit anchoring. That saves PCRE from having
+to scan along the subject looking for a newline to restart at. <p>
+Beware of
+patterns that contain nested indefinite repeats. These can take a long time
+to run when applied to a string that does not match. Consider the pattern
+fragment <p>
+ (a+)*<br>
+ <p>
+This can match "aaaa" in 33 different ways, and this number increases
+very rapidly as the string gets longer. (The * repeat can match 0, 1, 2,
+3, or 4 times, and for each of those cases other than 0, the + repeats
+can match different numbers of times.) When the remainder of the pattern
+is such that the entire match is going to fail, PCRE has in principle to
+try every possible variation, and this can take an extremely long time.
+<p>
+An optimization catches some of the more simple cases such as <p>
+ (a+)*b<br>
+ <p>
+where a literal character follows. Before embarking on the standard matching
+procedure, PCRE checks that there is a "b" later in the subject string,
+and if there is not, it fails the match immediately. However, when there
+is no following literal this optimization cannot be used. You can see the
+difference by comparing the behaviour of <p>
+ (a+)*\d<br>
+ <p>
+with the pattern above. The former gives a failure almost instantly when
+applied to a whole line of "a" characters, whereas the latter takes an
+appreciable time with strings longer than about 20 characters. <p>
+In many cases,
+the solution to this kind of performance issue is to use an atomic group
+or a possessive quantifier. <p>
+ Last updated: 09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Performance</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcreposix.3.html b/spamfilter/Utilities/PCRE/man/html/pcreposix.3.html
new file mode 100644
index 0000000..0e7cafd
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcreposix.3.html
@@ -0,0 +1,187 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions.
+<h2><a name='sect1' href='#toc1'>Synopsis of Posix API</a></h2>
+ <p>
+<b>#include
+&lt;pcreposix.h&gt;</b> <p>
+<font size='-1'></font>
+ <br>
+<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b> <b>int <i>cflags</i>);</b> <p>
+<br>
+<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b> <b>size_t <i>nmatch</i>, regmatch_t
+<i>pmatch</i>[], int <i>eflags</i>);</b> <p>
+<br>
+<b>size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b> <b>char *<i>errbuf</i>, size_t
+<i>errbuf_size</i>);</b> <p>
+<br>
+<b>void regfree(regex_t *<i>preg</i>);</b>
+<h2><a name='sect2' href='#toc2'>Description</a></h2>
+ <p>
+This set of functions provides
+a POSIX-style API to the PCRE regular expression package. See the <b>pcreapi</b>
+ documentation for a description of PCRE&rsquo;s native API, which contains additional
+functionality. <p>
+The functions described here are just wrapper functions that
+ultimately call the PCRE native API. Their prototypes are defined in the
+<b>pcreposix.h</b> header file, and on Unix systems the library itself is called
+<b>pcreposix.a</b>, so can be accessed by adding <b>-lpcreposix</b> to the command for
+linking an application that uses them. Because the POSIX functions call
+the native ones, it is also necessary to add <b>-lpcre</b>. <p>
+I have implemented only
+those option bits that can be reasonably mapped to PCRE native options.
+In addition, the options REG_EXTENDED and REG_NOSUB are defined with the
+value zero. They have no effect, but since programs that are written to
+the POSIX interface often use them, this makes it easier to slot in PCRE
+as a replacement library. Other POSIX options are not even defined. <p>
+When
+PCRE is called via these functions, it is only the API that is POSIX-like
+in style. The syntax and semantics of the regular expressions themselves
+are still those of Perl, subject to the setting of various PCRE options,
+as described below. "POSIX-like in style" means that the API approximates
+to the POSIX definition; it is not fully POSIX-compatible, and in multi-byte
+encoding domains it is probably even less compatible. <p>
+The header for these
+functions is supplied as <b>pcreposix.h</b> to avoid any potential clash with other
+POSIX libraries. It can, of course, be renamed or aliased as <b>regex.h</b>, which
+is the "correct" name. It provides two structure types, <i>regex_t</i> for compiled
+internal forms, and <i>regmatch_t</i> for returning captured substrings. It also
+defines some constants whose names start with "REG_"; these are used for
+setting options and identifying error codes. <p>
+
+<h2><a name='sect3' href='#toc3'>Compiling a Pattern</a></h2>
+ <p>
+The function
+<b>regcomp()</b> is called to compile a pattern into an internal form. The pattern
+is a C string terminated by a binary zero, and is passed in the argument
+<i>pattern</i>. The <i>preg</i> argument is a pointer to a <b>regex_t</b> structure that is used
+as a base for storing information about the compiled expression. <p>
+The argument
+<i>cflags</i> is either zero, or contains one or more of the bits defined by the
+following macros: <p>
+ REG_ICASE<br>
+ <p>
+The PCRE_CASELESS option is set when the expression is passed for compilation
+to the native function. <p>
+ REG_NEWLINE<br>
+ <p>
+The PCRE_MULTILINE option is set when the expression is passed for compilation
+to the native function. Note that this does <i>not</i> mimic the defined POSIX
+behaviour for REG_NEWLINE (see the following section). <p>
+In the absence of
+these flags, no options are passed to the native function. This means the
+the regex is compiled with PCRE default semantics. In particular, the way
+it handles newline characters in the subject string is the Perl way, not
+the POSIX way. Note that setting PCRE_MULTILINE has only <i>some</i> of the effects
+specified for REG_NEWLINE. It does not affect the way newlines are matched
+by . (they aren&rsquo;t) or by a negative class such as [^a] (they are). <p>
+The yield
+of <b>regcomp()</b> is zero on success, and non-zero otherwise. The <i>preg</i> structure
+is filled in on success, and one member of the structure is public: <i>re_nsub</i>
+contains the number of capturing subpatterns in the regular expression.
+Various error codes are defined in the header file.
+<h2><a name='sect4' href='#toc4'>Matching Newline Characters</a></h2>
+
+<p>
+This area is not simple, because POSIX and Perl take different views of
+things. It is not possible to get PCRE to obey POSIX semantics, but then
+PCRE was never intended to be a POSIX engine. The following table lists
+the different possibilities for matching newline characters in PCRE: <p>
+
+ Default Change with<br>
+ <p>
+ . matches newline no PCRE_DOTALL<br>
+ newline matches [^a] yes not changeable<br>
+ $ matches \n at end yes PCRE_DOLLARENDONLY<br>
+ $ matches \n in middle no PCRE_MULTILINE<br>
+ ^ matches \n in middle no PCRE_MULTILINE<br>
+ <p>
+This is the equivalent table for POSIX: <p>
+ Default
+ Change with<br>
+ <p>
+ . matches newline yes REG_NEWLINE<br>
+ newline matches [^a] yes REG_NEWLINE<br>
+ $ matches \n at end no REG_NEWLINE<br>
+ $ matches \n in middle no REG_NEWLINE<br>
+ ^ matches \n in middle no REG_NEWLINE<br>
+ <p>
+PCRE&rsquo;s behaviour is the same as Perl&rsquo;s, except that there is no equivalent
+for PCRE_DOLLAR_ENDONLY in Perl. In both PCRE and Perl, there is no way
+to stop newline from matching [^a]. <p>
+The default POSIX newline handling can
+be obtained by setting PCRE_DOTALL and PCRE_DOLLAR_ENDONLY, but there is
+no way to make PCRE behave exactly as for the REG_NEWLINE action.
+<h2><a name='sect5' href='#toc5'>Matching
+a Pattern</a></h2>
+ <p>
+The function <b>regexec()</b> is called to match a compiled pattern
+<i>preg</i> against a given <i>string</i>, which is terminated by a zero byte, subject
+to the options in <i>eflags</i>. These can be: <p>
+ REG_NOTBOL<br>
+ <p>
+The PCRE_NOTBOL option is set when calling the underlying PCRE matching
+function. <p>
+ REG_NOTEOL<br>
+ <p>
+The PCRE_NOTEOL option is set when calling the underlying PCRE matching
+function. <p>
+The portion of the string that was matched, and also any captured
+substrings, are returned via the <i>pmatch</i> argument, which points to an array
+of <i>nmatch</i> structures of type <i>regmatch_t</i>, containing the members <i>rm_so</i> and
+<i>rm_eo</i>. These contain the offset to the first character of each substring
+and the offset to the first character after the end of each substring,
+respectively. The 0th element of the vector relates to the entire portion
+of <i>string</i> that was matched; subsequent elements relate to the capturing
+subpatterns of the regular expression. Unused entries in the array have
+both structure members set to -1. <p>
+A successful match yields a zero return;
+various error codes are defined in the header file, of which REG_NOMATCH
+is the "expected" failure code.
+<h2><a name='sect6' href='#toc6'>Error Messages</a></h2>
+ <p>
+The <b>regerror()</b> function
+maps a non-zero errorcode from either <b>regcomp()</b> or <b>regexec()</b> to a printable
+message. If <i>preg</i> is not NULL, the error should have arisen from the use
+of that structure. A message terminated by a binary zero is placed in <i>errbuf</i>.
+The length of the message, including the zero, is limited to <i>errbuf_size</i>.
+The yield of the function is the size of buffer needed to hold the whole
+message.
+<h2><a name='sect7' href='#toc7'>Memory Usage</a></h2>
+ <p>
+Compiling a regular expression causes memory to
+be allocated and associated with the <i>preg</i> structure. The function <b>regfree()</b>
+frees all such memory, after which <i>preg</i> may no longer be used as a compiled
+expression.
+<h2><a name='sect8' href='#toc8'>Author</a></h2>
+ <p>
+Philip Hazel &lt;ph10@cam.ac.uk&gt; <br>
+University Computing Service, <br>
+Cambridge CB2 3QG, England. <p>
+ Last updated: 07 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis of Posix API</a></li>
+<li><a name='toc2' href='#sect2'>Description</a></li>
+<li><a name='toc3' href='#sect3'>Compiling a Pattern</a></li>
+<li><a name='toc4' href='#sect4'>Matching Newline Characters</a></li>
+<li><a name='toc5' href='#sect5'>Matching a Pattern</a></li>
+<li><a name='toc6' href='#sect6'>Error Messages</a></li>
+<li><a name='toc7' href='#sect7'>Memory Usage</a></li>
+<li><a name='toc8' href='#sect8'>Author</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcresample.3.html b/spamfilter/Utilities/PCRE/man/html/pcresample.3.html
new file mode 100644
index 0000000..6fad4a9
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcresample.3.html
@@ -0,0 +1,72 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRE(3) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+PCRE - Perl-compatible regular expressions
+<h2><a name='sect1' href='#toc1'>Pcre Sample Program</a></h2>
+ <p>
+A simple,
+complete demonstration program, to get you started with using PCRE, is
+supplied in the file <i>pcredemo.c</i> in the PCRE distribution. <p>
+The program compiles
+the regular expression that is its first argument, and matches it against
+the subject string in its second argument. No PCRE options are set, and
+default character tables are used. If matching succeeds, the program outputs
+the portion of the subject that matched, together with the contents of
+any captured substrings. <p>
+If the -g option is given on the command line, the
+program then goes on to check for further matches of the same regular expression
+in the same subject string. The logic is a little bit tricky because of
+the possibility of matching an empty string. Comments in the code explain
+what is going on. <p>
+If PCRE is installed in the standard include and library
+directories for your system, you should be able to compile the demonstration
+program using this command: <p>
+ gcc -o pcredemo pcredemo.c -lpcre<br>
+ <p>
+If PCRE is installed elsewhere, you may need to add additional options
+to the command line. For example, on a Unix-like system that has PCRE installed
+in <i>/usr/local</i>, you can compile the demonstration program using a command
+like this: <p>
+ gcc -o pcredemo -I/usr/local/include pcredemo.c \<br>
+ -L/usr/local/lib -lpcre<br>
+ <p>
+Once you have compiled the demonstration program, you can run simple tests
+like this: <p>
+ ./pcredemo &rsquo;cat|dog&rsquo; &rsquo;the cat sat on the mat&rsquo;<br>
+ ./pcredemo -g &rsquo;cat|dog&rsquo; &rsquo;the dog sat on the cat&rsquo;<br>
+ <p>
+Note that there is a much more comprehensive test program, called <b>pcretest</b>,
+ which supports many more facilities for testing regular expressions and
+the PCRE library. The <b>pcredemo</b> program is provided as a simple coding example.
+<p>
+On some operating systems (e.g. Solaris), when PCRE is not installed in the
+standard library directory, you may get an error like this when you try
+to run <b>pcredemo</b>: <p>
+ ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such
+file or directory<br>
+ <p>
+This is caused by the way shared library support works on those systems.
+You need to add <p>
+ -R/usr/local/lib<br>
+ <p>
+(for example) to the compile command to get round this problem. <p>
+ Last updated:
+09 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Pcre Sample Program</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/man/html/pcretest.1.html b/spamfilter/Utilities/PCRE/man/html/pcretest.1.html
new file mode 100644
index 0000000..e96293e
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/man/html/pcretest.1.html
@@ -0,0 +1,433 @@
+<!-- manual page source format generated by PolyglotMan v3.2, -->
+<!-- available at http://polyglotman.sourceforge.net/ -->
+
+<html>
+<head>
+<title>PCRETEST(1) manual page</title>
+</head>
+<body bgcolor='white'>
+<a href='#toc'>Table of Contents</a><p>
+
+<h2><a name='sect0' href='#toc0'>Name</a></h2>
+pcretest - a program for testing Perl-compatible regular expressions.
+
+<h2><a name='sect1' href='#toc1'>Synopsis</a></h2>
+ <p>
+<b>pcretest "[-C] [-d] [-i] [-m] [-o osize] [-p] [-t] [source]"</b> <b>[destination]</b>
+<p>
+<b>pcretest</b> was written as a test program for the PCRE regular expression
+library itself, but it can also be used for experimenting with regular
+expressions. This document describes the features of the test program; for
+details of the regular expressions themselves, see the <b>pcrepattern</b> documentation.
+For details of the PCRE library function calls and their options, see the
+ <b>pcreapi</b> documentation.
+<h2><a name='sect2' href='#toc2'>Options</a></h2>
+
+<dl>
+
+<dt><b>-C</b> </dt>
+<dd>Output the version number of the PCRE
+library, and all available information about the optional features that
+are included, and then exit. </dd>
+
+<dt><b>-d</b> </dt>
+<dd>Behave as if each regex had the <b>/D</b> (debug)
+modifier; the internal form is output after compilation. </dd>
+
+<dt><b>-i</b> </dt>
+<dd>Behave as if
+each regex had the <b>/I</b> modifier; information about the compiled pattern
+is given after compilation. </dd>
+
+<dt><b>-m</b> </dt>
+<dd>Output the size of each compiled pattern after
+it has been compiled. This is equivalent to adding <b>/M</b> to each regular expression.
+For compatibility with earlier versions of pcretest, <b>-s</b> is a synonym for
+<b>-m</b>. </dd>
+
+<dt><b>-o</b> <i>osize</i> </dt>
+<dd>Set the number of elements in the output vector that is used
+when calling <b>pcre_exec()</b> to be <i>osize</i>. The default value is 45, which is
+enough for 14 capturing subexpressions. The vector size can be changed for
+individual matching calls by including \O in the data line (see below). </dd>
+
+<dt><b>-p</b>
+</dt>
+<dd>Behave as if each regex has <b>/P</b> modifier; the POSIX wrapper API is used
+to call PCRE. None of the other options has any effect when <b>-p</b> is set. </dd>
+
+<dt><b>-t</b> </dt>
+<dd>Run
+each compile, study, and match many times with a timer, and output resulting
+time per compile or match (in milliseconds). Do not set <b>-m</b> with <b>-t</b>, because
+you will then get the size output a zillion times, and the timing will
+be distorted. </dd>
+</dl>
+
+<h2><a name='sect3' href='#toc3'>Description</a></h2>
+ <p>
+If <b>pcretest</b> is given two filename arguments,
+it reads from the first and writes to the second. If it is given only one
+filename argument, it reads from that file and writes to stdout. Otherwise,
+it reads from stdin and writes to stdout, and prompts for each line of
+input, using "re&gt;" to prompt for regular expressions, and "data&gt;" to prompt
+for data lines. <p>
+The program handles any number of sets of input on a single
+input file. Each set starts with a regular expression, and continues with
+any number of data lines to be matched against the pattern. <p>
+Each data line
+is matched separately and independently. If you want to do multiple-line
+matches, you have to use the \n escape sequence in a single line of input
+to encode the newline characters. The maximum length of data line is 30,000
+characters. <p>
+An empty line signals the end of the data lines, at which point
+a new regular expression is read. The regular expressions are given enclosed
+in any non-alphanumeric delimiters other than backslash, for example <p>
+ /(a|bc)x+yz/<br>
+ <p>
+White space before the initial delimiter is ignored. A regular expression
+may be continued over several input lines, in which case the newline characters
+are included within it. It is possible to include the delimiter within the
+pattern by escaping it, for example <p>
+ /abc\/def/<br>
+ <p>
+If you do so, the escape and the delimiter form part of the pattern, but
+since delimiters are always non-alphanumeric, this does not affect its interpretation.
+If the terminating delimiter is immediately followed by a backslash, for
+example, <p>
+ /abc/\<br>
+ <p>
+then a backslash is added to the end of the pattern. This is done to provide
+a way of testing the error condition that arises if a pattern finishes
+with a backslash, because <p>
+ /abc\/<br>
+ <p>
+is interpreted as the first line of a pattern that starts with "abc/",
+causing pcretest to read the next line as a continuation of the regular
+expression.
+<h2><a name='sect4' href='#toc4'>Pattern Modifiers</a></h2>
+ <p>
+A pattern may be followed by any number
+of modifiers, which are mostly single characters. Following Perl usage,
+these are referred to below as, for example, "the <b>/i</b> modifier", even though
+the delimiter of the pattern need not always be a slash, and no slash is
+used when writing modifiers. Whitespace may appear between the final pattern
+delimiter and the first modifier, and between the modifiers themselves.
+<p>
+The <b>/i</b>, <b>/m</b>, <b>/s</b>, and <b>/x</b> modifiers set the PCRE_CASELESS, PCRE_MULTILINE,
+PCRE_DOTALL, or PCRE_EXTENDED options, respectively, when <b>pcre_compile()</b>
+is called. These four modifier letters have the same effect as they do in
+Perl. For example: <p>
+ /caseless/i<br>
+ <p>
+The following table shows additional modifiers for setting PCRE options
+that do not correspond to anything in Perl: <p>
+ <b>/A</b> PCRE_ANCHORED<br>
+ <b>/C</b> PCRE_AUTO_CALLOUT<br>
+ <b>/E</b> PCRE_DOLLAR_ENDONLY<br>
+ <b>/N</b> PCRE_NO_AUTO_CAPTURE<br>
+ <b>/U</b> PCRE_UNGREEDY<br>
+ <b>/X</b> PCRE_EXTRA<br>
+ <p>
+Searching for all possible matches within each subject string can be requested
+by the <b>/g</b> or <b>/G</b> modifier. After finding a match, PCRE is called again to
+search the remainder of the subject string. The difference between <b>/g</b> and
+<b>/G</b> is that the former uses the <i>startoffset</i> argument to <b>pcre_exec()</b> to start
+searching at a new point within the entire string (which is in effect what
+Perl does), whereas the latter passes over a shortened substring. This makes
+a difference to the matching process if the pattern begins with a lookbehind
+assertion (including \b or \B). <p>
+If any call to <b>pcre_exec()</b> in a <b>/g</b> or <b>/G</b> sequence
+matches an empty string, the next call is done with the PCRE_NOTEMPTY and
+PCRE_ANCHORED flags set in order to search for another, non-empty, match
+at the same point. If this second match fails, the start offset is advanced
+by one, and the normal match is retried. This imitates the way Perl handles
+such cases when using the <b>/g</b> modifier or the <b>split()</b> function. <p>
+There are
+yet more modifiers for controlling the way <b>pcretest</b> operates. <p>
+The <b>/+</b> modifier
+requests that as well as outputting the substring that matched the entire
+pattern, pcretest should in addition output the remainder of the subject
+string. This is useful for tests where the subject contains multiple copies
+of the same substring. <p>
+The <b>/L</b> modifier must be followed directly by the
+name of a locale, for example, <p>
+ /pattern/Lfr_FR<br>
+ <p>
+For this reason, it must be the last modifier. The given locale is set,
+<b>pcre_maketables()</b> is called to build a set of character tables for the
+locale, and this is then passed to <b>pcre_compile()</b> when compiling the regular
+expression. Without an <b>/L</b> modifier, NULL is passed as the tables pointer;
+that is, <b>/L</b> applies only to the expression on which it appears. <p>
+The <b>/I</b> modifier
+requests that <b>pcretest</b> output information about the compiled pattern (whether
+it is anchored, has a fixed first character, and so on). It does this by
+calling <b>pcre_fullinfo()</b> after compiling a pattern. If the pattern is studied,
+the results of that are also output. <p>
+The <b>/D</b> modifier is a PCRE debugging
+feature, which also assumes <b>/I</b>. It causes the internal form of compiled
+regular expressions to be output after compilation. If the pattern was studied,
+the information returned is also output. <p>
+The <b>/F</b> modifier causes <b>pcretest</b>
+to flip the byte order of the fields in the compiled pattern that contain
+2-byte and 4-byte numbers. This facility is for testing the feature in PCRE
+that allows it to execute patterns that were compiled on a host with a
+different endianness. This feature is not available when the POSIX interface
+to PCRE is being used, that is, when the <b>/P</b> pattern modifier is specified.
+See also the section about saving and reloading compiled patterns below.
+<p>
+The <b>/S</b> modifier causes <b>pcre_study()</b> to be called after the expression has
+been compiled, and the results used when the expression is matched. <p>
+The
+<b>/M</b> modifier causes the size of memory block used to hold the compiled pattern
+to be output. <p>
+The <b>/P</b> modifier causes <b>pcretest</b> to call PCRE via the POSIX
+wrapper API rather than its native API. When this is done, all other modifiers
+except <b>/i</b>, <b>/m</b>, and <b>/+</b> are ignored. REG_ICASE is set if <b>/i</b> is present, and
+REG_NEWLINE is set if <b>/m</b> is present. The wrapper functions force PCRE_DOLLAR_ENDONLY
+always, and PCRE_DOTALL unless REG_NEWLINE is set. <p>
+The <b>/8</b> modifier causes
+<b>pcretest</b> to call PCRE with the PCRE_UTF8 option set. This turns on support
+for UTF-8 character handling in PCRE, provided that it was compiled with
+this support enabled. This modifier also causes any non-printing characters
+in output strings to be printed using the \x{hh...} notation if they are valid
+UTF-8 sequences. <p>
+If the <b>/?</b> modifier is used with <b>/8</b>, it causes <b>pcretest</b> to
+call <b>pcre_compile()</b> with the PCRE_NO_UTF8_CHECK option, to suppress the
+checking of the string for UTF-8 validity.
+<h2><a name='sect5' href='#toc5'>Data Lines</a></h2>
+ <p>
+Before each data
+line is passed to <b>pcre_exec()</b>, leading and trailing whitespace is removed,
+and it is then scanned for \ escapes. Some of these are pretty esoteric features,
+intended for checking out some of the more complicated features of PCRE.
+If you are just testing "ordinary" regular expressions, you probably don&rsquo;t
+need any of these. The following escapes are recognized: <p>
+ \a alarm
+(= BEL)<br>
+ \b backspace<br>
+ \e escape<br>
+ \f formfeed<br>
+ \n newline<br>
+ \r carriage return<br>
+ \t tab<br>
+ \v vertical tab<br>
+ \nnn octal character (up to 3 octal digits)<br>
+ \xhh hexadecimal character (up to 2 hex digits)<br>
+ \x{hh...} hexadecimal character, any number of digits<br>
+ in UTF-8 mode<br>
+ \A pass the PCRE_ANCHORED option to <b>pcre_exec()</b><br>
+ \B pass the PCRE_NOTBOL option to <b>pcre_exec()</b><br>
+ \Cdd call pcre_copy_substring() for substring dd<br>
+ after a successful match (number less than 32)<br>
+ \Cname call pcre_copy_named_substring() for substring<br>
+ "name" after a successful match (name termin-<br>
+ ated by next non alphanumeric character)<br>
+ \C+ show the current captured substrings at callout<br>
+ time<br>
+ \C- do not supply a callout function<br>
+ \C!n return 1 instead of 0 when callout number n is<br>
+ reached<br>
+ \C!n!m return 1 instead of 0 when callout number n is<br>
+ reached for the nth time<br>
+ \C*n pass the number n (may be negative) as callout<br>
+ data; this is used as the callout return value<br>
+ \Gdd call pcre_get_substring() for substring dd<br>
+ after a successful match (number less than 32)<br>
+ \Gname call pcre_get_named_substring() for substring<br>
+ "name" after a successful match (name termin-<br>
+ ated by next non-alphanumeric character)<br>
+ \L call pcre_get_substringlist() after a<br>
+ successful match<br>
+ \M discover the minimum MATCH_LIMIT setting<br>
+ \N pass the PCRE_NOTEMPTY option to <b>pcre_exec()</b><br>
+ \Odd set the size of the output vector passed to<br>
+ <b>pcre_exec()</b> to dd (any number of digits)<br>
+ \P pass the PCRE_PARTIAL option to <b>pcre_exec()</b><br>
+ \S output details of memory get/free calls during matching<br>
+ \Z pass the PCRE_NOTEOL option to <b>pcre_exec()</b><br>
+ \? pass the PCRE_NO_UTF8_CHECK option to<br>
+ <b>pcre_exec()</b><br>
+ \&gt;dd start the match at offset dd (any number of digits);<br>
+ this sets the <i>startoffset</i> argument for <b>pcre_exec()</b><br>
+ <p>
+A backslash followed by anything else just escapes the anything else. If
+the very last character is a backslash, it is ignored. This gives a way
+of passing an empty line as data, since a real empty line terminates the
+data input. <p>
+If \M is present, <b>pcretest</b> calls <b>pcre_exec()</b> several times, with
+different values in the <i>match_limit</i> field of the <b>pcre_extra</b> data structure,
+until it finds the minimum number that is needed for <b>pcre_exec()</b> to complete.
+This number is a measure of the amount of recursion and backtracking that
+takes place, and checking it out can be instructive. For most simple matches,
+the number is quite small, but for patterns with very large numbers of
+matching possibilities, it can become large very quickly with increasing
+length of subject string. <p>
+When \O is used, the value specified may be higher
+or lower than the size set by the <b>-O</b> command line option (or defaulted to
+45); \O applies only to the call of <b>pcre_exec()</b> for the line in which it
+appears. <p>
+If the <b>/P</b> modifier was present on the pattern, causing the POSIX
+wrapper API to be used, only \B and \Z have any effect, causing REG_NOTBOL
+and REG_NOTEOL to be passed to <b>regexec()</b> respectively. <p>
+The use of \x{hh...}
+to represent UTF-8 characters is not dependent on the use of the <b>/8</b> modifier
+on the pattern. It is recognized always. There may be any number of hexadecimal
+digits inside the braces. The result is from one to six bytes, encoded according
+to the UTF-8 rules.
+<h2><a name='sect6' href='#toc6'>Output from Pcretest</a></h2>
+ <p>
+When a match succeeds, pcretest
+outputs the list of captured substrings that <b>pcre_exec()</b> returns, starting
+with number 0 for the string that matched the whole pattern. Otherwise,
+it outputs "No match" or "Partial match" when <b>pcre_exec()</b> returns PCRE_ERROR_NOMATCH
+or PCRE_ERROR_PARTIAL, respectively, and otherwise the PCRE negative error
+number. Here is an example of an interactive pcretest run. <p>
+ $ pcretest<br>
+ PCRE version 5.00 07-Sep-2004<br>
+ <p>
+ re&gt; /^abc(\d+)/<br>
+ data&gt; abc123<br>
+ 0: abc123<br>
+ 1: 123<br>
+ data&gt; xyz<br>
+ No match<br>
+ <p>
+If the strings contain any non-printing characters, they are output as
+\0x escapes, or as \x{...} escapes if the <b>/8</b> modifier was present on the pattern.
+If the pattern has the <b>/+</b> modifier, the output for substring 0 is followed
+by the the rest of the subject string, identified by "0+" like this: <p>
+
+ re&gt; /cat/+<br>
+ data&gt; cataract<br>
+ 0: cat<br>
+ 0+ aract<br>
+ <p>
+If the pattern has the <b>/g</b> or <b>/G</b> modifier, the results of successive matching
+attempts are output in sequence, like this: <p>
+ re&gt; /\Bi(\w\w)/g<br>
+ data&gt; Mississippi<br>
+ 0: iss<br>
+ 1: ss<br>
+ 0: iss<br>
+ 1: ss<br>
+ 0: ipp<br>
+ 1: pp<br>
+ <p>
+"No match" is output only if the first match attempt fails. <p>
+If any of the
+sequences <b>\C</b>, <b>\G</b>, or <b>\L</b> are present in a data line that is successfully matched,
+the substrings extracted by the convenience functions are output with C,
+G, or L after the string number instead of a colon. This is in addition
+to the normal full list. The string length (that is, the return from the
+extraction function) is given in parentheses after each string for <b>\C</b> and
+<b>\G</b>. <p>
+Note that while patterns can be continued over several lines (a plain
+"&gt;" prompt is used for continuations), data lines may not. However newlines
+can be included in data by means of the \n escape.
+<h2><a name='sect7' href='#toc7'>Callouts</a></h2>
+ <p>
+If the pattern
+contains any callout requests, <b>pcretest</b>&rsquo;s callout function is called during
+matching. By default, it displays the callout number, the start and current
+positions in the text at the callout time, and the next pattern item to
+be tested. For example, the output <p>
+ ---&gt;pqrabcdef<br>
+ 0 ^ ^ \d<br>
+ <p>
+indicates that callout number 0 occurred for a match attempt starting
+at the fourth character of the subject string, when the pointer was at
+the seventh character of the data, and when the next pattern item was \d.
+Just one circumflex is output if the start and current positions are the
+same. <p>
+Callouts numbered 255 are assumed to be automatic callouts, inserted
+as a result of the <b>/C</b> pattern modifier. In this case, instead of showing
+the callout number, the offset in the pattern, preceded by a plus, is output.
+For example: <p>
+ re&gt; /\d?[A-E]\*/C<br>
+ data&gt; E*<br>
+ ---&gt;E*<br>
+ +0 ^ \d?<br>
+ +3 ^ [A-E]<br>
+ +8 ^^ \*<br>
+ +10 ^ ^<br>
+ 0: E*<br>
+ <p>
+The callout function in <b>pcretest</b> returns zero (carry on matching) by default,
+but you can use an \C item in a data line (as described above) to change
+this. <p>
+Inserting callouts can be helpful when using <b>pcretest</b> to check complicated
+regular expressions. For further information about callouts, see the <b>pcrecallout</b>
+ documentation.
+<h2><a name='sect8' href='#toc8'>Saving and Reloading Compiled Patterns</a></h2>
+ <p>
+The facilities
+described in this section are not available when the POSIX inteface to
+PCRE is being used, that is, when the <b>/P</b> pattern modifier is specified.
+<p>
+When the POSIX interface is not in use, you can cause <b>pcretest</b> to write
+a compiled pattern to a file, by following the modifiers with &gt; and a file
+name. For example: <p>
+ /pattern/im &gt;/some/file<br>
+ <p>
+See the <b>pcreprecompile</b> documentation for a discussion about saving and
+re-using compiled patterns. <p>
+The data that is written is binary. The first
+eight bytes are the length of the compiled pattern data followed by the
+length of the optional study data, each written as four bytes in big-endian
+order (most significant byte first). If there is no study data (either the
+pattern was not studied, or studying did not return any data), the second
+length is zero. The lengths are followed by an exact copy of the compiled
+pattern. If there is additional study data, this follows immediately after
+the compiled pattern. After writing the file, <b>pcretest</b> expects to read a
+new pattern. <p>
+A saved pattern can be reloaded into <b>pcretest</b> by specifing
+&lt; and a file name instead of a pattern. The name of the file must not contain
+a &lt; character, as otherwise <b>pcretest</b> will interpret the line as a pattern
+delimited by &lt; characters. For example: <p>
+ re&gt; &lt;/some/file<br>
+ Compiled regex loaded from /some/file<br>
+ No study data<br>
+ <p>
+When the pattern has been loaded, <b>pcretest</b> proceeds to read data lines
+in the usual way. <p>
+You can copy a file written by <b>pcretest</b> to a different
+host and reload it there, even if the new host has opposite endianness
+to the one on which the pattern was compiled. For example, you can compile
+on an i86 machine and run on a SPARC machine. <p>
+File names for saving and
+reloading can be absolute or relative, but note that the shell facility
+of expanding a file name that starts with a tilde (~) is not available.
+<p>
+The ability to save and reload files in <b>pcretest</b> is intended for testing
+and experimentation. It is not intended for production use because only
+a single pattern can be written to a file. Furthermore, there is no facility
+for supplying custom character tables for use with a reloaded pattern. If
+the original pattern was compiled with custom tables, an attempt to match
+a subject string using a reloaded pattern is likely to cause <b>pcretest</b> to
+crash. Finally, if you attempt to load a file that is not in the correct
+format, the result is undefined.
+<h2><a name='sect9' href='#toc9'>Author</a></h2>
+ <p>
+Philip Hazel &lt;ph10@cam.ac.uk&gt; <br>
+University Computing Service, <br>
+Cambridge CB2 3QG, England. <p>
+ Last updated: 10 September 2004 <br>
+Copyright (c) 1997-2004 University of Cambridge. <p>
+
+<hr><p>
+<a name='toc'><b>Table of Contents</b></a><p>
+<ul>
+<li><a name='toc0' href='#sect0'>Name</a></li>
+<li><a name='toc1' href='#sect1'>Synopsis</a></li>
+<li><a name='toc2' href='#sect2'>Options</a></li>
+<li><a name='toc3' href='#sect3'>Description</a></li>
+<li><a name='toc4' href='#sect4'>Pattern Modifiers</a></li>
+<li><a name='toc5' href='#sect5'>Data Lines</a></li>
+<li><a name='toc6' href='#sect6'>Output from Pcretest</a></li>
+<li><a name='toc7' href='#sect7'>Callouts</a></li>
+<li><a name='toc8' href='#sect8'>Saving and Reloading Compiled Patterns</a></li>
+<li><a name='toc9' href='#sect9'>Author</a></li>
+</ul>
+</body>
+</html>
diff --git a/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.mft b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.mft
new file mode 100644
index 0000000..a2ac5c9
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.mft
@@ -0,0 +1,58 @@
+man/html/pcre.3.html
+man/html/pcre_compile.3.html
+man/html/pcre_config.3.html
+man/html/pcre_copy_named_substring.3.html
+man/html/pcre_copy_substring.3.html
+man/html/pcre_exec.3.html
+man/html/pcre_free_substring.3.html
+man/html/pcre_free_substring_list.3.html
+man/html/pcre_fullinfo.3.html
+man/html/pcre_get_named_substring.3.html
+man/html/pcre_get_stringnumber.3.html
+man/html/pcre_get_substring.3.html
+man/html/pcre_get_substring_list.3.html
+man/html/pcre_info.3.html
+man/html/pcre_maketables.3.html
+man/html/pcre_study.3.html
+man/html/pcre_version.3.html
+man/html/pcreapi.3.html
+man/html/pcrebuild.3.html
+man/html/pcrecallout.3.html
+man/html/pcrecompat.3.html
+man/html/pcregrep.1.html
+man/html/pcrepattern.3.html
+man/html/pcreperform.3.html
+man/html/pcreposix.3.html
+man/html/pcresample.3.html
+man/html/pcretest.1.html
+man/man1/pcregrep.1
+man/man1/pcretest.1
+man/man3/pcre.3
+man/man3/pcre_compile.3
+man/man3/pcre_config.3
+man/man3/pcre_copy_named_substring.3
+man/man3/pcre_copy_substring.3
+man/man3/pcre_exec.3
+man/man3/pcre_free_substring.3
+man/man3/pcre_free_substring_list.3
+man/man3/pcre_fullinfo.3
+man/man3/pcre_get_named_substring.3
+man/man3/pcre_get_stringnumber.3
+man/man3/pcre_get_substring.3
+man/man3/pcre_get_substring_list.3
+man/man3/pcre_info.3
+man/man3/pcre_maketables.3
+man/man3/pcre_study.3
+man/man3/pcre_version.3
+man/man3/pcreapi.3
+man/man3/pcrebuild.3
+man/man3/pcrecallout.3
+man/man3/pcrecompat.3
+man/man3/pcrepattern.3
+man/man3/pcreperform.3
+man/man3/pcreposix.3
+man/man3/pcresample.3
+man/pdf/pcre-man.pdf
+man/ps/pcre-man.ps.gz
+manifest/pcre-5.0-doc.mft
+manifest/pcre-5.0-doc.ver
diff --git a/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.ver b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.ver
new file mode 100644
index 0000000..6a4adaa
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-doc.ver
@@ -0,0 +1,2 @@
+Pcre 5.0: Documentation
+Pcre: Perl-compatible regular-expression library
diff --git a/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.mft b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.mft
new file mode 100644
index 0000000..aabb128
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.mft
@@ -0,0 +1,12 @@
+include/pcre.h
+include/pcreposix.h
+lib/libpcre-bcc.lib
+lib/libpcre.def
+lib/libpcre.dll.a
+lib/libpcre.lib
+lib/libpcreposix-bcc.lib
+lib/libpcreposix.def
+lib/libpcreposix.dll.a
+lib/libpcreposix.lib
+manifest/pcre-5.0-lib.mft
+manifest/pcre-5.0-lib.ver
diff --git a/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.ver b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.ver
new file mode 100644
index 0000000..288da85
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/manifest/pcre-5.0-lib.ver
@@ -0,0 +1,2 @@
+Pcre 5.0: Developer files
+Pcre: Perl-compatible regular-expression library
diff --git a/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.3 b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.3
new file mode 100644
index 0000000..b3c269b
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.3
@@ -0,0 +1,73 @@
+.TH PCRE 3
+.SH NAME
+pcre_subst - Perl-compatible regular expression subsitution.
+.SH SYNOPSIS
+.B #include <pcre.h>
+.br
+.B #include <pcre_subst.h>
+.PP
+.SM
+.br
+int pcre_exec(const pcre *\fIcode\fR, const pcre_extra *\fIextra\fR,
+.ti +5n
+const char *\fIsubject\fR, int \fIlength\fR, int \fIstartoffset\fR,
+.ti +5n
+int \fIoptions\fR, char *\fIreplacement\fR);
+
+
+
+.SH DESCRIPTION
+\fBpcre_subst\fR is a convenience routine that calls \fIpcre_exec\fR,
+and returns a freshly allocated string based on the \fIsubject\fR with
+the \fIreplacement\fR action applied. Unlike \fIsubject\fR, whics is
+passed as a byte array with a length, \fIreplacement\fR is expected to
+be a zero terminated string (most users will just pass \fIstrlen(subject)\fR
+as the \fIlength\fR).
+
+.br
+If no match is found, pcre_subst returns NULL. The returned string is zero
+terminated (note that \fIsubject\fR doesn't have to be). For information
+on the \fIcode\fR, \fIextra\fR, \fIsubject\fR, \fIlength\fR,
+\fIstartoffset\fR and \fIoptions\fR parameters, please see \fBpcre(3)\fR.
+
+.SH REPLACEMENT STRING
+The replacement string supports a subset of the PERL replacement string.
+In particular, \\1 style escapes are not supported (actually, only the
+$1 style is handled).
+
+.SH EXAMPLE
+.Bd -literal -compact
+#include <stdio.h>
+#include <pcre.h>
+#include "pcre_subst.h"
+
+int
+main()
+{
+ char *pat = "quick\\\\s(\\\\w+)\\\\s(fox)";
+ char *rep = "$1ish $2";
+ char *str = "The quick brown foxy";
+ char *newstr;
+ const char *err;
+ int erroff;
+ pcre_extra *extra;
+ pcre *p = pcre_compile(pat, 0, &err, &erroff, NULL);
+ if (p == NULL) {
+ fprintf(stderr, "%s at %d\\n", err, erroff);
+ exit(1);
+ }
+ extra = pcre_study(p, 0, &err);
+ if (err != NULL)
+ fprintf(stderr, "Study %s: %s\\n", pat, err);
+ newstr = pcre_subst(ppat, extra, str, strlen(str),
+ 0, 0, rep);
+ if (newstr) {
+ printf("New string: %s\\n", newstr);
+ pcre_free(newstr);
+ };
+ return 0;
+}
+.Ed
+
+.SH SEE ALSO
+pcre(3)
diff --git a/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.c b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.c
new file mode 100644
index 0000000..3f34f97
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.c
@@ -0,0 +1,191 @@
+/*************************************************
+* PCRE string replacement *
+*************************************************/
+
+/*
+PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+pcre_subst is a wrapper around pcre_exec designed to make it easier to
+perform PERL style replacements with PCRE.
+
+Written by: Bert Driehuis <driehuis@playbeing.org>
+
+ Copyright (c) 2000 Bert Driehuis
+
+-----------------------------------------------------------------------------
+Permission is granted to anyone to use this software for any purpose on any
+computer system, and to redistribute it freely, subject to the following
+restrictions:
+
+1. This software is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+2. The origin of this software must not be misrepresented, either by
+ explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+
+4. If PCRE is embedded in any software that is released under the GNU
+ General Purpose Licence (GPL), then the terms of that licence shall
+ supersede any condition above with which it is incompatible.
+*/
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <pcre.h>
+#include "pcre_subst.h"
+
+#define MAXCAPTURE 50
+
+#ifdef DEBUG_PCRE_SUBST
+static void
+dumpstr(const char *str, int len, int start, int end)
+{
+ int i;
+ for (i = 0; i < strlen(str); i++) {
+ if (i >= start && i < end)
+ putchar(str[i]);
+ else
+ putchar('-');
+ }
+ putchar('\n');
+}
+
+static void
+dumpmatch(const char *str, int len, const char *rep, int nmat, const int *ovec)
+{
+ int i;
+ printf("%s Input\n", str);
+ printf("nmat=%d", nmat);
+ for (i = 0; i < nmat * 2; i++)
+ printf(" %d", ovec[i]);
+ printf("\n");
+ for (i = 0; i < nmat * 2; i += 2)
+ dumpstr(str, len, ovec[i], ovec[i+1]);
+ printf("\n");
+}
+#endif
+
+static int
+findreplen(const char *rep, int nmat, const int *replen)
+{
+ int len = 0;
+ int val;
+ char *cp = (char *)rep;
+ while(*cp) {
+ if (*cp == '$' && isdigit(cp[1])) {
+ val = strtoul(&cp[1], &cp, 10);
+ if (val && val <= nmat + 1)
+ len += replen[val -1];
+ else
+ fprintf(stderr, "repl %d out of range\n", val);
+ } else {
+ cp++;
+ len++;
+ }
+ }
+ return len;
+}
+
+static void
+doreplace(char *out, const char *rep, int nmat, int *replen, const char **repstr)
+{
+ int val;
+ char *cp = (char *)rep;
+ while(*cp) {
+ if (*cp == '$' && isdigit(cp[1])) {
+ val = strtoul(&cp[1], &cp, 10);
+ if (val && val <= nmat + 1) {
+ strncpy(out, repstr[val - 1], replen[val - 1]);
+ out += replen[val -1];
+ }
+ } else {
+ *out++ = *cp++;
+ }
+ }
+}
+
+static char *
+edit(const char *str, int len, const char *rep, int nmat, const int *ovec)
+{
+ int i, slen, rlen;
+ const int *mvec = ovec;
+ char *res, *cp;
+ int replen[MAXCAPTURE];
+ const char *repstr[MAXCAPTURE];
+ nmat--;
+ ovec += 2;
+ for (i = 0; i < nmat; i++) {
+ replen[i] = ovec[i * 2 + 1] - ovec[i * 2];
+ repstr[i] = &str[ovec[i * 2]];
+#ifdef DEBUG_PCRE_SUBST
+ printf(">>>%d %d %.*s\n", i, replen[i], replen[i], repstr[i]);
+#endif
+ }
+ slen = len;
+ len -= mvec[1] - mvec[0];
+ len += rlen = findreplen(rep, nmat, replen);
+#ifdef DEBUG_PCRE_SUBST
+ printf("resulting length %d (srclen=%d)\n", len, slen);
+#endif
+ cp = res = pcre_malloc(len + 1);
+ if (mvec[0] > 0) {
+ strncpy(cp, str, mvec[0]);
+ cp += mvec[0];
+ }
+ doreplace(cp, rep, nmat, replen, repstr);
+ cp += rlen;
+ if (mvec[1] < slen)
+ strcpy(cp, &str[mvec[1]]);
+ res[len] = 0;
+ return res;
+}
+
+char *
+pcre_subst(const pcre *ppat, const pcre_extra *extra, const char *str, int len,
+ int offset, int options, const char *rep)
+{
+ int nmat;
+ int ovec[MAXCAPTURE * 3];
+ nmat = pcre_exec(ppat, extra, str, len, offset, options,
+ ovec, sizeof(ovec));
+#ifdef DEBUG_PCRE_SUBST
+ dumpmatch(str, len, rep, nmat, ovec);
+#endif
+ if (nmat <= 0)
+ return NULL;
+ return(edit(str, len, rep, nmat, ovec));
+}
+
+#ifdef DEBUG_BUILD
+int
+main()
+{
+ char *pat = "quick\\s(\\w+)\\s(fox)";
+ char *rep = "$1ish $2";
+ char *str = "The quick brown foxy";
+ char *newstr;
+ const char *err;
+ int erroffset;
+ pcre_extra *extra;
+ pcre *ppat = pcre_compile(pat, 0, &err, &erroffset, NULL);
+ if (ppat == NULL) {
+ fprintf(stderr, "%s at %d\n", err, erroffset);
+ exit(1);
+ }
+ extra = pcre_study(ppat, 0, &err);
+ if (err != NULL)
+ fprintf(stderr, "Study %s failed: %s\n", pat, err);
+ newstr = pcre_subst(ppat, extra, str, strlen(str), 0, 0, rep);
+ if (newstr) {
+ printf("Newstr\t%s\n", newstr);
+ pcre_free(newstr);
+ } else {
+ printf("No match\n");
+ }
+ return 0;
+}
+#endif
diff --git a/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.h b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.h
new file mode 100644
index 0000000..e4f4c44
--- /dev/null
+++ b/spamfilter/Utilities/PCRE/pcre_subst/pcre_subst.h
@@ -0,0 +1,35 @@
+/*************************************************
+* PCRE string replacement *
+*************************************************/
+
+/*
+PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+pcre_subst is a wrapper around pcre_exec designed to make it easier to
+perform PERL style replacements with PCRE.
+
+Written by: Bert Driehuis <driehuis@playbeing.org>
+
+ Copyright (c) 2000 Bert Driehuis
+
+-----------------------------------------------------------------------------
+Permission is granted to anyone to use this software for any purpose on any
+computer system, and to redistribute it freely, subject to the following
+restrictions:
+
+1. This software is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+2. The origin of this software must not be misrepresented, either by
+ explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+
+4. If PCRE is embedded in any software that is released under the GNU
+ General Purpose Licence (GPL), then the terms of that licence shall
+ supersede any condition above with which it is incompatible.
+*/
+
+char *pcre_subst(const pcre *, const pcre_extra *, const char *, int, int, int, const char *);